o
    )iAf                  	   @   s  U d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZ d d	lmZmZ d
dlm Z m!Z!m"Z"m#Z# d
dl$m%Z% d
dl&m'Z' d
dl(m)Z) ee*Z+dede fddZ,G dd dZ-ej.G dd dZ/dej0de1e2 de3ej0e1e/ f fddZ4da5G dd dejj6Z7da8e2e9d< ed e2fd!d"Z:G d#d$ d$Z;dS )%    N)Sequence)contextmanager)AnyCallableOptional)enable_python_dispatcher)CompilationConfigCUDAGraphMode
VllmConfig)init_logger)current_platform)is_torch_equal_or_newerresolve_obj_by_qualname   )CompilerInterfaceEagerAdaptorInductorAdaptorInductorStandaloneAdaptor)compilation_counter)InductorPass)PostGradPassManagercompilation_configreturnc                 C   sD   | j rtjrtdrtd t S td t S td t S )Nz	2.8.0.devzUsing InductorStandaloneAdaptorzUsing InductorAdaptorzUsing EagerAdaptor)	Zuse_inductorenvsZVLLM_USE_STANDALONE_COMPILEr   loggerdebugr   r   r   )r    r   e/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/compilation/backends.pymake_compiler    s   


r   c                   @   s   e Zd ZdZdefddZdedefddZ			
ddede	defddZ
dd Z	ddejdee dedee dee f
ddZ			d dejdedededee defddZdS )!CompilerManagera  
    A manager to manage the compilation process, including
    caching the compiled graph, loading the compiled graph,
    and compiling the graph.

    The cache is a dict mapping
    `(runtime_shape, graph_index, backend_name)`
    to `any_data` returned from the compiler.

    When serializing the cache, we save it to a Python file
    for readability. We don't use json here because json doesn't
    support int as key.
    r   c                 C   s"   t  | _d| _|| _t|| _d S )NF)dictcacheis_cache_updatedr   r   compiler)selfr   r   r   r   __init__=   s   zCompilerManager.__init__vllm_configr   c                 C   s   | j |S N)r#   compute_hash)r$   r&   r   r   r   r(   C   s   zCompilerManager.compute_hashF 	cache_dirdisable_cacheprefixc                 C   s~   || _ || _tj|d| _|s4tj| jr4t| j}t	|
 | _W d   n1 s/w   Y  | jj|||d dS )a  
        Initialize the cache directory for the compiler.

        The organization of the cache directory is as follows:
        cache_dir=/path/to/hash_str/rank_i_j/prefix/
        inside cache_dir, there will be:
        - vllm_compile_cache.py
        - computation_graph.py
        - transformed_code.py

        for multiple prefixes, they can share the same
        base cache dir of /path/to/hash_str/rank_i_j/ ,
        to store some common compilation artifacts.
        zvllm_compile_cache.pyN)r*   r+   r,   )r+   r*   ospathjoincache_file_pathexistsopenastliteral_evalreadr!   r#   initialize_cache)r$   r*   r+   r,   fr   r   r   r6   F   s   
z CompilerManager.initialize_cachec                 C   sd   | j s| jsd S tjdd}|| j}t| jd}|| W d    d S 1 s+w   Y  d S )N   )indentw)	r+   r"   pprintPrettyPrinterpformatr!   r2   r0   write)r$   printerdatar7   r   r   r   save_to_filei   s   "zCompilerManager.save_to_fileNgraphexample_inputsgraph_indexruntime_shapec                 C   s~   ||| j jf| jvrd S | j||| j jf }| j |||||}|d u r0td|| j j| |S td|t|| j j| |S )NzEDirectly load the %s-th graph for dynamic shape from %s via handle %sz@Directly load the %s-th graph for shape %s from %s via handle %s)r#   namer!   loadr   r   str)r$   rB   rC   rD   rE   handlecompiled_graphr   r   r   rG   q   s$   
zCompilerManager.loadr   r   
num_graphsc                 C   s  |dkrt   at jd7  _d }| ||||}|d urB||d kr@t   }	|	t }
|d u r7td|
 |S tdt||
 |S t| j	t
rKd }nd| d| }| j	|||||\}}|d usgJ dtjs|d ur|| j||| j	jf< t jd7  _d| _|dkr|d u rtd	 ntd
t| |d u rtd|| j	j| ntd|t|| j	j| ||d krt   }	|	t }
| j|
7  _|d u rtd|
 |S td||
 |S )Nr   r   zQDirectly load the compiled graph(s) for dynamic shape from the cache, took %.3f szLDirectly load the compiled graph(s) for shape %s from the cache, took %.3f sZartifact_shape_Z
_subgraph_zFailed to compile the graphTz/Cache the graph for dynamic shape for later usez)Cache the graph of shape %s for later usez=Store the %s-th graph for dynamic shape from %s via handle %sz8Store the %s-th graph for shape %s from %s via handle %sz0Compiling a graph for dynamic shape takes %.2f sz+Compiling a graph for shape %s takes %.2f s)timecompilation_start_timer   Znum_backend_compilationsrG   r   inforH   
isinstancer#   r   compiler   VLLM_DISABLE_COMPILE_CACHEr!   rF   Znum_cache_entries_updatedr"   r   compilation_time)r$   rB   rC   Zadditional_inductor_configr   rD   rK   rE   rJ   nowelapsedZ	maybe_keyrI   r   r   r   rP      s   

zCompilerManager.compile)Fr)   r'   )r   r   N)__name__
__module____qualname____doc__r   r%   r
   rH   r(   boolr6   rA   fxGraphModulelistr   intr   r   rG   rP   r   r   r   r   r   .   sT    
#
r   c                   @   s0   e Zd ZU eed< eed< eed< ejed< dS )	SplitItemsubmod_namegraph_idis_splitting_graphrB   N)	rU   rV   rW   rH   __annotations__r]   rY   rZ   r[   r   r   r   r   r^      s
   
 r^   rB   opsc                    s  d}i  g }| j jD ]*}|jdv rq
|jdkr0t|j|v r0|d7 }| |< || |d7 }q
| |< q
tjjj	j	| d  fdddd}g }d	d
 |
 D }|D ]$}d|v s\|dkr]qRt||}	t|dd}
|t||
|
|v |	 qR|jdd d ||fS )Nr   )outputplaceholderZcall_functionr   c                    s    |  S r'   r   )nodeZnode_to_subgraph_idr   r   <lambda>   s    zsplit_graph.<locals>.<lambda>T)Zkeep_original_orderc                 S   s   g | ]\}}|qS r   r   ).0rF   moduler   r   r   
<listcomp>  s    zsplit_graph.<locals>.<listcomp>.r)   Zsubmod_c                 S   s   | j S r'   )r`   )xr   r   r   rh     s    )key)rB   nodesoprH   targetappendtorchrZ   ZpassesZsplit_moduleZnamed_modulesgetattrr]   replacer^   sort)rB   rc   Zsubgraph_idZsplit_op_graphsrf   split_gmZoutputsnamesrF   rj   r`   r   rg   r   split_graph   s<   






ry   g        c                       s   e Zd ZdZdejjdee de	ddf fddZ
 fd	d
Zdejjjdeejjjdf deeef def fddZ  ZS )PiecewiseCompileInterpretera  Code adapted from `torch.fx.passes.shape_prop.ShapeProp`.
    It runs the given graph with fake inputs, and compile some
    submodules specified by `compile_submod_names` with the given
    compilation configs.

    NOTE: the order in `compile_submod_names` matters, because
    it will be used to determine the order of the compiled piecewise
    graphs. The first graph will handle logging, and the last graph
    has some special cudagraph output handling.
    rj   compile_submod_namesr&   vllm_backendVllmBackendc                    sJ   t  | ddlm} | | _|| _|j| _|| _|| _|| _	d| _
d S )Nr   detect_fake_modeF)superr%   torch._guardsr   	fake_moder{   r   
graph_poolr&   r|   Zextra_traceback)r$   rj   r{   r&   r   r|   r   	__class__r   r   r%   '  s   
z$PiecewiseCompileInterpreter.__init__c              	      s~    fdd|D } j ) t  t j| W  d    W  d    S 1 s(w   Y  W d    d S 1 s8w   Y  d S )Nc                    s(   g | ]}t |tjr j|n|qS r   )rO   rs   Tensorr   from_tensorri   tr$   r   r   rk   6  s    z3PiecewiseCompileInterpreter.run.<locals>.<listcomp>)r   r   r   run)r$   args	fake_argsr   r   r   r   5  s   

RzPiecewiseCompileInterpreter.runrq   r   .kwargsr   c              
      s  t |tsJ t |||}|| jv r| j|}| |}dd t|D }| jj	j
||| jj| j|t| jd d}ddlm}	 ddlm}
 |
|| j|t| j||| j}| jjtjkr~tt }||| jtj| j|	|j|j |jdd| jj|< n|| jj|< t j d7  _ |S )	Nc                 S   s    g | ]\}}t |tjr|qS r   )rO   rs   ZSymIntri   irm   r   r   r   rk   F  s
    z;PiecewiseCompileInterpreter.call_module.<locals>.<listcomp>)rD   rK   rE   r   )CUDAGraphOptions)PiecewiseBackend)Zdebug_log_enableZ
gc_disableZweak_ref_output)Zrunnabler&   Zruntime_moder   Zcudagraph_options)!rO   rH   r   call_moduler{   indexZ
fetch_attr	enumerater|   compiler_managerrP   r   inductor_compile_configlenZ
cuda_graphr   Zcuda_piecewise_backendr   r&   cudagraph_moder	   NONEr   r   Zget_static_graph_wrapper_clsZ	PIECEWISEr   Zis_first_graphZis_last_graphrj   __dict__r   Z$num_piecewise_capturable_graphs_seen)r$   rq   r   r   rd   r   ZsubmodZsym_shape_indicesZ compiled_graph_for_dynamic_shaper   r   Zpiecewise_backendZstatic_graph_wrapper_classr   r   r   r   =  sV   




z'PiecewiseCompileInterpreter.call_module)rU   rV   rW   rX   rs   rZ   r[   r\   rH   r
   r%   r   rf   ZTargettupleZArgumentr    r   r   __classcell__r   r   r   r   rz     s&    


rz   Zbackbone	model_tagtagc                 c   s@    | t ksJ d|  dt  dt }| a zdV  W |a dS |a w )z%Context manager to set the model tag.z
Model tag z  is the same as the current tag rl   N)r   )r   Zold_tagr   r   r   set_model_tag|  s   
r   c                   @   s   e Zd ZU dZeed< eed< eed< dZe	ed< e
jed< e
jed< ee ed	< eed
< ee ed< ee ed< eej ed< eed< 	ddedefddZdd Zde
jdefddZdS )r}   a  The compilation backend for `torch.compile` with vLLM.
    It is used for compilation level of `CompilationLevel.PIECEWISE`,
    where we customize the compilation.

    The major work of this backend is to split the graph into
    piecewise graphs, and pass them to the piecewise backend.

    This backend also adds the PostGradPassManager to Inductor config,
    which handles the post-grad passes.
    r&   r   r   F_calledrB   rw   piecewise_graphsZreturned_callableZpost_grad_passessym_tensor_indicesinput_buffersr   r)   r,   c                 C   sJ   |pt | _t }|| _t | _g | _g | _|| _	|j
| _
t| j
| _d S r'   )r   r,   r   Zget_global_graph_poolr   r   post_grad_pass_managerr   r   r&   r   r   r   )r$   r&   r,   Zglobal_graph_poolr   r   r   r%     s   

zVllmBackend.__init__c                 C   s   | j }| j| j |j}d}||v r9t|| tr(||  | j ks'J nt|| ts1J | j	||  | j||< d S )NZpost_grad_custom_post_pass)
r   r   	configurer&   r   rO   r   uuidr   add)r$   configZinductor_configZPASS_KEYr   r   r   configure_post_pass  s   

zVllmBackend.configure_post_passr   c              	      s  j }jjsg }t }|| | }|| ttjj}jj	  t
dd| g }|D ]'}	||	 |	dkrBq6t|	}
||
  W d    n1 sXw   Y  q6dd l}|jd| dd }|| j|}|| |jt| dd d d }tjtjd|}|j_jj}tj|d	d
 |j_|jj}|jj}tj|d| d| j}tj|d	d
 |j_tj}|rt
d nt
d| j ||j t! j"d7  _"ddl#m$} t%% | }t
d| j j&|7  _&j'rJ d|_()  t*|jj+\_,_-ddl.m/} |dj( |dj, t! j0t1j-7  _0dd j-D }t2j,|j j3j4   tj|d}tj5|sdj,j6dd }|7dd}t|d}
|
8| W d    n	1 sw   Y  t
d| d	_'jj9t:j;ksjj<sj,S ddl=m>} | fd d D }dd!l?m@ fd"dtA|D _B fd#djBD _Cfd$d%}|S )&Nz9Traced files (to be considered for compilation cache):
%s
z<string>r   F)usedforsecurity
   Ztorch_compile_cacheT)exist_okZrank__z'vLLM's torch.compile cache is disabled.z2Using cache directory: %s for vLLM's torch.compiler   )torch_compile_start_timez&Dynamo bytecode transform time: %.2f sz#VllmBackend can only be called once)lazy_format_graph_codezbefore splitzafter splitc                 S   s   g | ]}|j s|jqS r   )ra   r_   )ri   itemr   r   r   rk   E  s    z(VllmBackend.__call__.<locals>.<listcomp>zcomputation_graph.pyz0from __future__ import annotations
import torch
)Zprint_outputz<lambda>r[   r:   zComputation graph saved to %sr~   c                    s&   g | ]}t |tjr |n|qS r   )rO   rs   r   r   r   )r   r   r   rk   e  s    is_symbolicc                    s>   g | ]\}}t |tjjjrt fd d| D r|qS )c                 3   s    | ]} |V  qd S r'   r   )ri   dr   r   r   	<genexpr>q  s    z2VllmBackend.__call__.<locals>.<listcomp>.<genexpr>)rO   rs   Z_subclassesZfake_tensorZ
FakeTensoranysizer   r   r   r   rk   n  s    c                    s   g | ]} |   qS r   )clone)ri   rm   )rC   r   r   rk   w  s    c                     s\   t | }t jD ]\}}|| }|jd } j| d | }|| |||< q	 j| S )Nr   )r\   r   r   shaper   Zcopy_rw   )r   Z	list_argsr   r   Zruntime_tensorrE   Zstatic_tensorr   r   r   copy_and_call|  s   



z+VllmBackend.__call__.<locals>.copy_and_call)Dr&   r   r*   r   r(   rr   r\   sortedZtraced_filesclearr   r   r/   r2   r5   hashlibmd5encode	hexdigestr   rH   r-   r.   ZVLLM_CACHE_ROOTmakedirsZparallel_configrankZdata_parallel_rankr,   local_cache_dirrQ   rN   r6   r   Znum_graphs_seenZmonitorr   rL   rR   r   rB   r   ry   Zsplitting_opsrw   r   Ztorch._dynamo.utilsr   Znum_piecewise_graphs_seenr   rz   r   r   r1   Zprint_readableru   r>   r   r	   r   Zcudagraph_copy_inputsr   r   Z%torch.fx.experimental.symbolic_shapesr   r   r   r   )r$   rB   rC   r&   ZfactorsZenv_hashZconfig_hashZforward_code_filesZhash_contentfilepathr7   r   Z	code_hashZcompiler_hashZhash_keyr*   r   Zdp_rankr   r+   r   Zdynamo_timer   Zsubmod_names_to_compileZ
graph_pathsrcr   r   r   r   )rC   r   r   r$   r   __call__  s   











	zVllmBackend.__call__N)r)   )rU   rV   rW   rX   r
   rb   r   r   r   rY   rZ   r[   r\   r^   r   r   r]   rs   r   r   rH   r%   r   r   r   r   r   r   r}     s,   
 


$r}   )<r3   dataclassesr-   r;   rL   collections.abcr   
contextlibr   typingr   r   r   rs   Ztorch.fxrZ   Ztorch._dispatch.pythonr   Z	vllm.envsr   Zvllm.configr   r	   r
   Zvllm.loggerr   Zvllm.platformsr   Z
vllm.utilsr   r   Zcompiler_interfacer   r   r   r   counterr   Zinductor_passr   Zpass_managerr   rU   r   r   r   	dataclassr^   r[   r\   rH   r   ry   rM   ZInterpreterrz   r   rb   r   r}   r   r   r   r   <module>   sJ   
 3
0^