o
    + i	                     @  s  d dl mZ d dlZd dlZd dlmZ d dlZd dlZd dl	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZmZm Z m!Z! ee"ej#ddZ$G dd dZ%G dd deZ&G dd dZ'dd Z(dddZ)dd Z*dS )    )annotationsN)Enum)coredygraph)scope_guard)Variable)
get_logger)_get_function_names_from_layerget_ast_static_function	to_static)StaticFunction)Layer)PaddleToTensorRTConverter)forbid_op_lower_trtmark_builtin_oprun_pir_passrun_trt_partitionwarmup_shape_inferz&%(asctime)s-%(levelname)s: %(message)s)fmtc                   @  s.   e Zd Z							ddddZdd ZdS )InputNfloat32warmup_datatuple[np.ndarray, ...] | Nonemin_input_shapetuple | Nonemax_input_shapeoptim_input_shapeinput_data_type
str | Noneinput_rangenamereturnNonec                 C  s|   |dur|s
|s
|rt d|dus|durtd nd|||fv r't d|| _|| _|| _|| _|| _|| _|| _	dS )a  
        A class used to configure input data for models. This class serves two purposes:

        1. Random Data Generation: When no input data is supplied, it automatically generates random input data based on the specified minimum, optimal, and maximum shapes. In this mode,you can configure the data type (e.g., 'float32', 'int64', etc.) and the range of values (e.g.,(0.0, 1.0) for floats or (1, 10) for integers).

        2. User-Provided Input: Alternatively, you can supply your own input data via the `warmup_data` argument. In this case, the provided data will be used directly, and the`input_data_type` and `input_range` settings will be ignored.

        Args:
            warmup_data (tuple):
                The tuple of actual input data (for the automatic shape collection mechanism).
            min_input_shape (tuple):
                The shape of the minimum input tensor.
            max_input_shape (tuple):
                The shape of the maximum input tensor.
            optim_input_shape (tuple):
                The shape of the optimal input tensor.
            input_data_type (str, optional):
                The data type for the input tensors, such as 'float32' or 'int64' or 'float32' or 'int32'  (default is float32).
                This option only applies when min_input_shape, optim_input_shape, and max_input_shape are provided; it does not apply to warmup_data.
            input_range (tuple, optional):
                The range of values used to generate input data. For floats, the default range is (0.0, 1.0). For integers, the default range is (1, 10).
                This option only applies when min_input_shape, optim_input_shape, and max_input_shape are provided; it does not apply to warmup_data.
            name:(str,optional):
                The name of the input to the model.
        Returns:
            None

        Examples:
            .. code-block:: python

                >>> # example 1:
                >>> from paddle.tensorrt.export import Input
                >>> input_config = Input(
                >>>     min_input_shape=(1,100),
                >>>     optim_input_shape=(4,100),
                >>>     max_input_shape=(8,100),
                >>> )
                >>> input_config.input_data_type='int64'
                >>> input_config.input_range=(1,10)

                >>> # example 2:
                >>> from paddle.tensorrt.export import Input
                >>> import numpy as np
                >>> input_config = Input(
                >>>     warmup_data=(
                >>>         np.random.rand(1,100).astype(np.float32),
                >>>         np.random.rand(4,100).astype(np.float32),
                >>>         np.random.rand(8,100).astype(np.float32),
                >>>     )
                >>> )
        Nz7warmup data provided; min/max/optim shapes are ignored.zWhen warmup_data is provided,input_data_type and input_range are ignored.These parameters only apply whtn generate random data using min/opt/max shapes.z?When warm_data is None, min/max/optim shapes must be specified.)

ValueError_loggerwarningr   r   r   r   r   r   r    )selfr   r   r   r   r   r   r     r'   b/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/paddle/tensorrt/export.py__init__6   s*   =
zInput.__init__c                 C  sd  | j dur	td| jdu rd| jv rdnd| _| j\}}||krJtj| j|| jd| _tj| j|| jd| _	tj| j
|| jd| _| j| j	| jfS d| jv r}tjj||| jd| j| _tjj||| jd| j| _	tjj||| j
d| j| _n-tjj||| jd| j| _tjj||| jd| j| _	tjj||| j
d| j| _| j| j	| jfS )	aF  
        Generates random input data based on the user-specified min_input_shape, optim_input_shape, and max_input_shape, as well as the data type and input range.

        Returns:
            tuple(numpy.ndarray, numpy.ndarray, numpy.ndarray): A tuple containing the generated input data for the minimum, optimal, and maximum shapes.

        Examples:
            .. code-block:: python

            >>> from paddle.tensorrt.export import Input
            >>> input_config = Input(
            >>>     min_input_shape=(1,100),
            >>>     optim_input_shape=(4,100),
            >>>     max_input_shape=(8,100),
            >>> )
            >>> input.input_data_type='int64'
            >>> input.input_range=(1,10)
            >>> input_min_data, input_optim_data, input_max_data = input_config.generate_input_data()
        NzHgenerate_input_data() should not be called when warmup_data is provided.float)g        g      ?)   
   )Zdtypeint)size)r   RuntimeErrorr   r   npfullr   Zinput_min_datar   Zinput_optim_datar   Zinput_max_datarandomrandintZastypeuniform)r&   lowhighr'   r'   r(   generate_input_data   sr   






zInput.generate_input_data)NNNNr   NN)r   r   r   r   r   r   r   r   r   r   r   r   r    r   r!   r"   )__name__
__module____qualname__r)   r7   r'   r'   r'   r(   r   5   s    Ur   c                   @  s   e Zd ZdZdZdZdZdS )PrecisionModeFP32FP16BF16INT8N)r8   r9   r:   r<   r=   r>   r?   r'   r'   r'   r(   r;      s    r;   c                   @  s0   e Zd Zdddejddg ddddfdddZdS )TensorRTConfig   Ni   @FTinputslistmin_subgraph_size
int | Nonesave_model_dirr   disable_opsstr | list | Noneprecision_moder;   ops_run_floatoptimization_leveldisable_passesworkspace_sizeuse_cuda_graphbool | Nonerefit_params_pathdisable_logglingr!   r"   c                 C  s   dd |D }t |rt|std|| _|| _|| _|| _|| _|| _|| _	|| _
|	| _|
| _|| _|| _| jr@| j	d tjd|i dS )aj  
        A class for configuring TensorRT optimizations.

        Args:
            inputs (list):
                A list of Input configurations
            min_subgraph_size (int, optional):
                The minimum number of operations in a subgraph for TensorRT to optimize (default is 3).
            save_model_dir (str, optional):
                The directory where the optimized model will be saved (default is not to save).
            disable_ops : (str|list, optional):
                A string representing the names of operations that should not be entering by TensorRT (default is None).
            precision_mode (PrecisionMode, optional):
                Specifies the precision mode for TensorRT optimization. The options are:
                - PrecisionMode.FP32: 32-bit floating point precision (default).
                - PrecisionMode.FP16: 16-bit floating point precision.
                - PrecisionMode.INT8: 8-bit integer precision.
                - PrecisionMode.BFP16: 16-bit Brain Floating Point precision. Only supported in TensorRT versions greater than 9.0.
            ops_run_float (str|list, optional):
                A set of operation names that should be executed using FP32 precision regardless of the `tensorrt_precision_mode` setting.
            optimization_level (int, optional):
                Set TensorRT optimization level (default is 3). Only supported in TensorRT versions greater than 8.6.
            disable_passes : (str|list, optional):
                A list of string representing the names of pass that should not be used for origin program (default is []).
            workspace_size (int, optional):
                Specifies the maximum GPU memory (in bytes) that TensorRT can use for the optimization process (default is 1 << 30).
            use_cuda_graph (bool, optional):
                Specify whether TensorRT enables cuda_graph during the optimization process (default is false).
            refit_params_path(str, optional):
                The path to the weights that need to be refitted.
            disable_loggling (bool, optional):
                Specifies whether to enable GLOG info output during the optimization process (default is true).
        Returns:
            None

        Examples:
            .. code-block:: python
            >>> # example 1:
            >>> from paddle.tensorrt.export import (
            >>>    Input,
            >>>    TensorRTConfig,
            >>>    PrecisionMode,
            >>> )
            >>> input_config = Input(
            >>>     min_input_shape=(1,100),
            >>>     optim_input_shape=(4,100),
            >>>     max_input_shape=(8,100),
            >>> )
            >>> input_config.input_data_type='int64'
            >>> input_config.input_range=(1,10)

            >>> trt_config = TensorRTConfig(inputs=[input_config])
            >>> trt_config.disable_ops = ["pd_op.dropout"]
            >>> trt_config.precision_mode = PrecisionMode.FP16
            >>> trt_config.ops_run_float = "pd_op.conv2d"
            >>> trt_config.workspace_size = 1 << 32

            >>> # example 2:
            >>> from paddle.tensorrt.export import (
            >>>     Input,
            >>>     TensorRTConfig,
            >>>     PrecisionMode,
            >>> )
            >>> input_config = Input(
            >>>     warmup_data=(
            >>>         np.random.rand(1,100).astype(np.float32),
            >>>         np.random.rand(4,100).astype(np.float32),
            >>>         np.random.rand(8,100).astype(np.float32),
            >>>     )
            >>> )
            >>> trt_config = TensorRTConfig(inputs=[input_config])
        c                 S  s   g | ]}|j d uqS )Nr   .0ir'   r'   r(   
<listcomp>@  s    z+TensorRTConfig.__init__.<locals>.<listcomp>z,All Inputs must have input_data if any does.Zconstant_folding_passZFLAGS_trt_min_group_sizeN)anyallr#   rB   rD   rF   rI   rJ   rG   rL   rK   rM   rN   rP   rQ   appendpaddleZ	frameworkZ	set_flags)r&   rB   rD   rF   rG   rI   rJ   rK   rL   rM   rN   rP   rQ   Zhas_input_datar'   r'   r(   r)      s*   XzTensorRTConfig.__init__)rB   rC   rD   rE   rF   r   rG   rH   rI   r;   rJ   rH   rK   rE   rL   rC   rM   rE   rN   rO   rP   r   rQ   rO   r!   r"   )r8   r9   r:   r;   r<   r)   r'   r'   r'   r(   r@      s    r@   c              	   C  s  t | tjjjjstdt|  g }|  j	D ]}|
 dks'|
 dkr2| d }|| qtj " g }|jd jd urdd |jD }tdd	 |D d
ksYJ t|d }t|D ]'}	i }
t|jD ]\}}|j
d urx|j
n|| }|| |	 |
|< ql||
 qcn8dd |jD }tt|d D ]'}i }
t|jD ]\}}|j
d ur|j
n|| }|| | |
|< q||
 qt| |j||jd}t|||d} |jrt| |j t|  t| }t|||d}|  g }| j	D ]}|
 dkr| D ]}| }|| qq|j rRg }|!dd | j	D  t"d}tj#$|}t%| tj#j&|j ||||d W d    n	1 sMw   Y  |W  d    S 1 s_w   Y  d S )NzEprogram type must be paddle.base.libpaddle.pir.Program, but received 
pd_op.data
pd_op.feedr    r   c                 S  s   g | ]}|j qS r'   rR   )rT   inpr'   r'   r(   rV   h  s    z"convert_to_trt.<locals>.<listcomp>c                 S  s   h | ]}t |qS r'   )len)rT   tr'   r'   r(   	<setcomp>j      z!convert_to_trt.<locals>.<setcomp>r+   c                 S  s   g | ]}|  qS r'   )r7   rS   r'   r'   r(   rV   s  ra   )rL   scoperI   )feedsrb   )
trt_configzpd_op.fetchc                 s  s:    | ]}|  d ks|  dkr| D ]}|V  qqdS )r[   r\   N)r    results)rT   opresultr'   r'   r(   	<genexpr>  s    z!convert_to_trt.<locals>.<genexpr>)program)'
isinstancerZ   baseZ	libpaddlepirZProgram	TypeErrortypeZglobal_blockopsr    attrsrY   	pir_utilsIrGuardrB   r   r^   range	enumerater   rL   rI   r   rG   r   r   r   r   Zconvert_program_to_trtZoperandssourcerF   extend	CUDAPlacestaticExecutorr   Zsave_inference_model)ri   rd   rb   Z	feed_namerf   
param_namerc   Zinput_tuplesZnum_samplesZ
sample_idxZ	feed_dictrU   r]   r    jZprogram_with_pir	converterZtrt_output_varoperandru   Zinput_valuesplaceexer'   r'   r(   convert_to_trtY  s   



&r   c               	   K  s  t jj| fd|i|}t ot }t|t jr|j}n|}d}|durt|t	rKt
|D ]}t||d}	t|	trJd|krJtdt| dq/t|ttfs\tdt| dg }t j|D ]+}
t|
t jjrs||
 qdt|
tjjtt jjfr|t jj|
 qd||
 qdd}t }i }t|t	rttt
|}t|}|j s|j!rd}n|g}g }d}|D ]}t|t	rt"t||d}	t|	tr|	j#r|	 }|||j$j%d | f q|	j&|||d	}nZd|kr|rt j'||}t|j(|dd
}|j&||d	}d}n:qt|tr/t"|}	|	j#r'|	 }|||	f q|	j&||d}nt"|}	|r=t j'||}t|	|dd
}|j)}d}t|t	rT|* }nt|trc|	j+rc|	j+* }|r~i }i }|, D ]\}}
|||
j-< |
||
j-< qnt./ / t0|j1 D ]!\}}|j2sq|3|j-4 }||j- 5 4 }|6| qW d   n	1 sw   Y  qt j78 " |j9}|j:}t j;j<j=||dd t>|||}||fW  d   S 1 sw   Y  dS )ai  
    Convert a dynamic graph API to a static graph and apply TensorRT optimizations if relevant parameters are configured.

    Args:
        function (callable): Callable dynamic graph function. If it used as a
            decorator, the decorated function will be parsed as this parameter.
        input_spec (list[InputSpec]|tuple[InputSpec]): list/tuple of InputSpec to
            specific the shape/dtype/name information of each input Tensor.
        config: (TensorRTConfig): The configuration of TensorRTConfig.
        kwargs: Support keys including `property`, set `property` to True if the function
            is python property.

    Returns:
        tuple: A tuple containing two elements. The first element is the TensorRT optimized program., optionally optimized with TensorRT if configured. The second element is the scope containing the parameters.

    
input_specNforwardzIf there are static functions other than 'forward' that need to be saved, the input 'input_spec' should be None, but received the type of 'input_spec' is .zIThe input input_spec should be 'list', but received input_spec's type is FT)	with_hookis_prim_infer)r   Z
full_graph)r   fetch)ri   Z
fetch_listZfetch_var_name)?rZ   Zjitr   r   Z_is_fwd_prim_enabledZ_is_bwd_prim_enabledrj   ZDataParallelZ_layersr   r	   getattrr   r#   rn   rC   tuplerm   utilsflattenrx   Z	InputSpecrY   eagerZTensorr   rl   ValueZfrom_tensorZScopesetsortedZ_forward_pre_hooksZ_forward_post_hooksr
   Zis_property	__class__r8   Z#concrete_program_specify_input_specZpack_sequence_asr   concrete_programZto_static_state_dictZclass_instanceitemsr    r   guardzip
parametersZpersistablevarZ
get_tensorvalueZ_share_data_withrq   rr   main_programZoutputsrk   executorZ_add_pir_fetch_opsr   ) functionr   configkwargsZ
static_netr   Zinner_layerZinner_input_specmember_nameZstatic_funcr   r   rb   Zextra_var_infoZ	functionsZproperty_valsr   Z	attr_funcZimmediate_valZstatic_forwardZstatic_functionZdygraph_state_dictZstate_names_dictZstate_var_dictZstrcutured_nameZtensorr   Zparam_or_buffer_tensorZ
src_tensorr   Zoutput_varsZprogram_with_trtr'   r'   r(   	_convert_  s  







&r   c                 C  s  t j|jt j| krtdtj }td}tj	|}d}t j
| r[| } t j| \}}t j|\}}	|	dkrBd}n|	dkrId}ntd|	 dt j||d	 }
n$| }|d	 }
t j|d rld}nt j|d rwd}ntd
|  dt j|
std|
 d|rtj  tjjj| |d\}}}W d   n1 sw   Y  n+tj  dt jd< tjjj| |d\}}}dt jd< W d   n1 sw   Y  t|||S )a  
    Loading a PaddlePaddle Model and Exporting the TensorRT-Optimized Program.

    Args:
       model_path(str):The directory path where the PaddlePaddle model is located.
       The model path can either include the model directory and prefix (e.g., 'model_dir/inference'),
       or it can be the full path to the model (e.g., 'model_dir/inference.json').
       config(TensorRTConfig):The configuration of TensorRTConfig.

    Returns:
        program:The TensorRT optimized program.

    Examples:
        .. code-block:: python

            >>> # example 1:
            >>> # This example takes the user-specified model input shape, and Paddle internally generates corresponding random data.
            >>> import numpy as np
            >>> import paddle
            >>> import paddle.inference as paddle_infer
            >>> import paddle.nn.functional as F
            >>> from paddle import nn
            >>> from paddle.tensorrt.export import Input, TensorRTConfig

            >>> class LinearNet(nn.Layer):
            >>>     def __init__(self, input_dim):
            >>>         super().__init__()
            >>>         self.linear = nn.Linear(input_dim, input_dim)

            >>>     def forward(self, x):
            >>>         return F.relu(self.linear(x))

            >>> input_dim = 3
            >>> # 1.Instantiate the network.
            >>> layer = LinearNet(input_dim)

            >>> save_path = "/tmp/linear_net"
            >>> # 2.Convert dynamic graph to static graph and save as a JSON file.
            >>> paddle.jit.save(layer, save_path, [paddle.static.InputSpec(shape=[-1, input_dim])])

            >>> # 3.Create TensorRTConfig
            >>> input_config = Input(
            >>>     min_input_shape=[1, input_dim],
            >>>     optim_input_shape=[2, input_dim],
            >>>     max_input_shape=[4, input_dim],
            >>>     name='x',
            >>> )

            >>> trt_config = TensorRTConfig(inputs=[input_config])
            >>> trt_config.save_model_dir = "/tmp/linear_net_trt"

            >>> # 4.Perform TensorRT conversion
            >>> program_with_trt = paddle.tensorrt.convert(save_path, trt_config)

            >>> # 5.Create a Predictor and run TensorRT inference.
            >>> config = paddle_infer.Config(
            >>>     trt_config.save_model_dir + '.json',
            >>>     trt_config.save_model_dir + '.pdiparams',
            >>> )
            >>> config.enable_use_gpu(100, 0)
            >>> predictor = paddle_infer.create_predictor(config)

            >>> input_data = np.random.randn(2, 3).astype(np.float32)
            >>> model_input = paddle.to_tensor(input_data)

            >>> output_converted = predictor.run([model_input])

            >>> # example 2:
            >>> # In this example, the user specifies the actual input.
            >>> import numpy as np
            >>> import paddle
            >>> import paddle.inference as paddle_infer
            >>> import paddle.nn.functional as F
            >>> from paddle import nn
            >>> from paddle.tensorrt.export import Input, TensorRTConfig

            >>> class LinearNet(nn.Layer):
            >>>     def __init__(self, input_dim):
            >>>         super().__init__()
            >>>         self.linear = nn.Linear(input_dim, input_dim)

            >>>     def forward(self, x):
            >>>         return F.relu(self.linear(x))

            >>> input_dim = 3
            >>> # 1.Instantiate the network.
            >>> layer = LinearNet(input_dim)

            >>> save_path = "/tmp/linear_net"
            >>> # 2.Convert dynamic graph to static graph and save as a JSON file.
            >>> paddle.jit.save(layer, save_path, [paddle.static.InputSpec(shape=[-1, input_dim])])

            >>> # 3.Create TensorRTConfig
            >>> input_config = Input(
            >>>     warmup_data=(
            >>>         np.random.rand(1,3).astype(np.float32),
            >>>         np.random.rand(2,3).astype(np.float32),
            >>>         np.random.rand(4,3).astype(np.float32),
            >>>     ),
            >>>     name='x',
            >>> )

            >>> trt_config = TensorRTConfig(inputs=[input_config])
            >>> trt_config.save_model_dir = "/tmp/linear_net_trt"

            >>> # 4.Perform TensorRT conversion
            >>> program_with_trt = paddle.tensorrt.convert(save_path, trt_config)

            >>> # 5.Create a Predictor and run TensorRT inference.
            >>> config = paddle_infer.Config(
            >>>     trt_config.save_model_dir + '.json',
            >>>     trt_config.save_model_dir + '.pdiparams',
            >>> )
            >>> config.enable_use_gpu(100, 0)
            >>> predictor = paddle_infer.create_predictor(config)

            >>> input_data = np.random.randn(2, 3).astype(np.float32)
            >>> model_input = paddle.to_tensor(input_data)

            >>> output_converted = predictor.run([model_input])

    z{The `config.save_model_dir` and `model_path` cannot be the same. Please specify a different directory for saving the model.r   Tz.jsonz.pdmodelFzUnsupported extension z. Only support json/pdmodelz
.pdiparamsz,No valid model file found in the directory 'ze'. Expected either 'json' or 'pdmodel'. Please ensure that the directory contains one of these files.zParameters file 'zJ' not found. Please ensure the weights file exists in the model directory.)r   N1ZFLAGS_enable_pir_in_executor0)ospathabspathrF   r#   rZ   rx   Zglobal_scoperw   ry   isfilesplitsplitextjoinexistsrq   rr   ioZload_inference_modelZ
OldIrGuardenvironr   )Z
model_pathr   rb   r~   r   Zis_jsonZ	model_dirZ
model_fileZmodel_prefixextZparams_pathri   Zfeed_target_namesZfetch_targetsr'   r'   r(   convertw  sh   {






	r   )NNN)+
__future__r   loggingr   enumr   numpyr0   rZ   Zpaddle.baser   r   Zpaddle.base.executorr   Zpaddle.base.frameworkr   Zpaddle.base.log_helperr   Zpaddle.jit.apir	   r
   r   Z'paddle.jit.dy2static.program_translatorr   Z	paddle.nnr   Zpaddle.tensorrt.converterr   Zpaddle.tensorrt.utilr   r   r   r   r   r8   INFOr$   r   r;   r@   r   r   r   r'   r'   r'   r(   <module>   s4    "r
_ @