o
    )i                     @   sl   d dl mZ d dlmZ d dlZd dlmZ d dlmZ ddl	m
Z
 ddlmZ eeZG d	d
 d
eZdS )    )Iterable)UnionN)SymInt)init_logger   )is_func)VllmInductorPassc                   @   sz   e Zd ZdZdejjfddZdee	e
ejjf  dee	e
ef  fddZd	e	e
ejjf d
e	e
ef defddZdS )NoOpEliminationPassa  
    This is an inductor pass that removes redundant reshape/slice operations.
    It is required for RMSNorm-quant fusion to work properly.
    That's because apply_fp8_linear adds a reshape, which is redundant
    in the 2D-case. Additionally, torch internal no-op elimination pass does
    not handle certain slice variants.

    Cases handled:
      1. A chain of reshapes is equivalent to the last reshape called on the
      base tensor (input of the first reshape).
      2. A reshape that produces the shape of the input is redundant
      3. A slice that produces the shape of the input is redundant

    Example graph 1:
    mul_1: "f16[s0, 4096]" = ...
    view_1: "f16[s0, 128, 32]" = torch.reshape(mul_1, [-1, 128, 32])
    view_2: "f16[s0, 4096]" = torch.reshape(view_2, [-1, 4096])
    view_3: "f16[s0, 128, 32]" = torch.reshape(view_3, [-1, 128, 32])

    Can be replaced with:
    mul_1: "f16[s0, 4096]" = ...
    view_3: "f16[s0, 128, 32]" = ...

    Example graph 2:
    getitem_1: "f16[s0, 4096]" = ...
    view_1: "f16[s0, 4096]" = torch.reshape(getitem_1, [-1, 4096])
    at = auto_functionalized(static_scaled_fp8_quant, input = view_1, ...)
    out: "f8e4m3fn[s0, 4096]" = at[1]

    Can be replaced with:
    getitem_1: "f16[s0, 4096]" = ...
    at = auto_functionalized(static_scaled_fp8_quant, input = getitem_1, ...)
    out: "f8e4m3fn[s0, 4096]" = at[1]

    Example graph 3:
    arg0: "s0" = SymInt(s0)
    scaled_mm: "f16[s0, 4096]" = ...
    slice_1: "f16[s0, 4096]" = torch.slice(scaled_mm, -1, 0, arg0)
    at = auto_functionalized(fused_add_rms_norm, input = slice_1, ...)
    out: "f16[s0, 4096]" = torch.slice_scatter(scaled_mm, at[1], 0, 0, arg0)

    Can be replaced with:
    arg0: "s0" = SymInt(s0)
    scaled_mm: "f16[s0, 4096]" = ...
    at = auto_functionalized(fused_add_rms_norm, input = scaled_mm, ...)
    out: "f16[s0, 4096]" = at[1]

    TODO(luka): This is currently tested in test_fusion,
     but separate tests could be good.
    graphc                 C   s  |    | |d d}|jD ]}t|tjjjjrv|j	d }t|tjjjjrA|
d|j	d  t|jdkrA|| |d7 }|j	d d \}}|jd j}t|t|krYq|ddkraq| ||ru|| || |d7 }qt|tjjjjr|j	d d \}}}}	|jd j}|| }
|dkr| |	|
r|| || |d7 }qt|tjjjjr|j	d d \}}}}}	|jd j}|jd j}|| }||kr|dkr| |	|r|| || |d7 }qtd	| | |d
 |   d S )NZbefore_noop_eliminationr   r      val      z$Removed %s no-op reshapes and slicesZafter_noop_elimination)beginZ
dump_graphnodesr   torchopsZatenZreshapedefaultargsZ
update_arglenZusersZ
erase_nodemetashapecountall_dims_equivalentZreplace_all_uses_withsliceZTensordims_equivalentZslice_scatterloggerdebugZend_and_log)selfr
   r   nodeinputr   Zinput_shapeZ	dim_indexstartendi_dimbaseviewZ
base_shapeZ
view_shapeZview_dim r'   m/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/compilation/noop_elimination.py__call__F   s\   









zNoOpEliminationPass.__call__dimsi_dimsc                    s   t  fddt||D S )Nc                 3   s     | ]\}}  ||V  qd S )N)r   ).0sZi_sr   r'   r(   	<genexpr>   s    
z:NoOpEliminationPass.all_dims_equivalent.<locals>.<genexpr>)allzip)r   r*   r+   r'   r.   r(   r      s   z'NoOpEliminationPass.all_dims_equivalentdimr$   returnc                 C   s0   ||ks|dkr
dS t |tjjo|jd |kS )a  
        This function checks if two dimensions are equivalent.
        :param dim: The dimension arg to reshape/slice
        :param i_dim: The corresponding dimension in the input tensor
        :return: Are the dimensions equivalent?

        There are three cases in which the dimensions are equivalent:
        1. The dimensions are equal (both integers)
        2. The reshape dimension is -1 (i.e. inferred)
        3. The dimensions both correspond to the same SymInt

        While case 2 does not guarantee the dimensions are equal,
        they are equal if all other dimensions are equal.

        In case 3, the reshape dimension is a torch.fx.Node,
        and its value is a SymInt. That value is equal to the
        input dimension.

        r   Tr   )
isinstancer   fxNoder   )r   r2   r$   r'   r'   r(   r      s   z#NoOpEliminationPass.dims_equivalentN)__name__
__module____qualname____doc__r   r5   ZGraphr)   r   r   intr6   r   r   boolr   r'   r'   r'   r(   r	      s    3A

r	   )collections.abcr   typingr   Ztorch.fxr   r   Zvllm.loggerr   Zfx_utilsr   Zvllm_inductor_passr   r7   r   r	   r'   r'   r'   r(   <module>   s   