o
    )i!                     @   sx   d dl Z d dlmZ d dlmZmZ d dlZd dlmZ d dl	m
Z
 ddlmZ ddlmZ e
eZG d	d
 d
eZdS )    N)Iterable)OptionalUnion)auto_functionalized)init_logger   )is_func)VllmInductorPassc                   @   s  e Zd ZdZdejjfddZdeejj	e
ejj	 f fddZ	ddejjd	ejj	d
eeeejj	ef f deeeejj	ef df  fddZd	ejj	d
eeeejj	ef f fddZd	ejj	deeejj	f fddZ	ddejjd	ejj	deeeejj	ef df  fddZdS )FixFunctionalizationPassa"  
    This pass defunctionalizes certain nodes to avoid redundant tensor copies.
    After this pass, DCE (dead-code elimination) should never be run,
    as de-functionalized nodes may appear as dead code.

    To add new nodes to defunctionalize, add to the if-elif chain in __call__.
    graphc                 C   sD  |    | |d g | _d}|jD ]}t|tsq|j}|jd }|tj	j
jjkrl|d }|jd jd }| | D ]"\}}	|	jD ]}
t|
tj	jjjrY|
| | |
 qD| |	 q=| || | | n|tj	j
jjkrddd}| ||| nr|tj	j
jjkrddd}| ||| n]|tj	j
jjkrdddd	}| ||| nG|tj	j
jjtj	j
jjfv rd
di}| ||| n-|tj	j
jjkrd
di}| j|||dd n|tj	j
jjkrd
di}| j|||dd nq|d
7 }q| |d t| j}| jD ]}|| qt d|| | |d | !  d S )NZbefore_fix_functionalizationr   queryinputZresidual)r      resultscale)r   r      r   )r   r   args)r   r   r   Z$before_fix_functionalization_cleanupz,De-functionalized %s nodes, removed %s nodesZafter_fix_functionalization)"beginZ
dump_graphnodes_to_removenodesr   r   kwargsr   torchopsZ_CZrotary_embeddingdefaultgetitem_usersitemsusersZatenZslice_scatterreplace_all_uses_with_removeinsert_defunctionalizedZfused_add_rms_normdefunctionalizeZ#fused_add_rms_norm_static_fp8_quantZ rms_norm_dynamic_per_token_quantZrms_normZrms_norm_static_fp8_quantZsilu_and_mulZsilu_and_mul_quantlenZ
erase_nodeloggerdebugZend_and_log)selfr   countnoder   Z	at_targetr   Zmm_nodeidxuserZuser_of_getitemmutated_argsZcount_removed r+   r/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/compilation/fix_functionalization.py__call__   s~   













z!FixFunctionalizationPass.__call__node_or_nodesc                 C   s.   t |tjjr| j| dS | j| dS )zM
        Stage a node (or nodes) for removal at the end of the pass.
        N)
isinstancer   fxNoder   appendextend)r%   r.   r+   r+   r,   r   m   s   z FixFunctionalizationPass._removeNr'   r*   r   .c                 C   s*   |  || | j|||d | | dS )z
        De-functionalize a node by replacing it with a call to the original.
        It also replaces the getitem users with the mutated arguments.
        See replace_users_with_mutated_args and insert_defunctionalized.
        r   N)replace_users_with_mutated_argsr    r   )r%   r   r'   r*   r   r+   r+   r,   r!   w   s   z(FixFunctionalizationPass.defunctionalizec                 C   sP   |  | D ]\}}|| }t|tr|j| n|}|| | | qdS )a7  
        Replace all getitem users of the auto-functionalized node with the
        mutated arguments.
        :param node: The auto-functionalized node
        :param mutated_args: The mutated arguments, indexed by getitem index.
        If the value of an arg is a string, `node.kwargs[arg]` is used.
        N)r   r   r/   strr   r   r   )r%   r'   r*   r(   r)   argr+   r+   r,   r4      s   
z8FixFunctionalizationPass.replace_users_with_mutated_argsreturnc                 C   s2   i }|j D ]}t|tjr|jd }|||< q|S )z
        Returns the operator.getitem users of the auto-functionalized node,
        indexed by the index they are getting.
        r   )r   r   operatorgetitemr   )r%   r'   r   r)   r(   r+   r+   r,   r      s   

z&FixFunctionalizationPass.getitem_usersc                    s   t  tsJ d  d| 5  jd }|du r%|j| jd nt fdd|D }|j||d W d   dS W d   dS 1 sJw   Y  dS )	a>  
        Insert a new defunctionalized node into the graph before node.
        If one of the kwargs is 'out', provide args directly,
        as node.kwargs cannot be used.
        See https://github.com/pytorch/pytorch/blob/a00faf440888ffb724bad413f329a49e2b6388e7/torch/_inductor/lowering.py#L351

        :param graph: Graph to insert the defunctionalized node into
        :param node: The auto-functionalized node to defunctionalize
        :param args: If we cannot use kwargs, specify args directly.
        If an arg is a string, `node.kwargs[arg]` is used.
        z%node must be auto-functionalized, is z insteadr   N)r   c                 3   s(    | ]}t |tr j| n|V  qd S N)r/   r5   r   ).0r6   r'   r+   r,   	<genexpr>   s     zCFixFunctionalizationPass.insert_defunctionalized.<locals>.<genexpr>r   )r   r   Zinserting_beforer   Zcall_functionr   tuple)r%   r   r'   r   functionr+   r<   r,   r       s   

"z0FixFunctionalizationPass.insert_defunctionalizedr:   )__name__
__module____qualname____doc__r   r0   ZGraphr-   r   r1   r   r   dictintr5   r   r>   r!   r4   r   r    r+   r+   r+   r,   r
      sH    
Q



"r
   )r8   collections.abcr   typingr   r   r   Z*torch._higher_order_ops.auto_functionalizer   Zvllm.loggerr   Zfx_utilsr   Zvllm_inductor_passr	   r@   r#   r
   r+   r+   r+   r,   <module>   s   