o
    pi                     @   sX  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlmZm	Z	 d dl
mZmZmZmZmZ d dlmZmZmZ d dlmZmZ d dlmZ d d	lmZmZ d d
lmZ ddlmZ e  e!Z"edd Z#dd Z$dd Z%dd Z&dd Z'	dUddZ(e) e) d dfddZ*dd Z+dejdejfd d!Z,d"d# Z-	dVd$edejd%e.d&e/fd'd(Z0d)d* Z1d+d, Z2d-d. Z3				dWd/d0Z4d$edejdejd%e.d&e/f
d1d2Z5d$edejd%e.d3e6d4e/d&e/fd5d6Z7d$edejd%e.fd7d8Z8d9d: Z9d;d< Z:d=d> Z;d?d@ Z<dAdB Z=dCdD Z>dEdF Z?dGdH Z@dIdJ ZAdKdL ZBdMdN ZCdOdP ZDdQdR ZEdVdSdTZFdS )X    N)pir)ir_backward)	ValueDictValueSet)call_decomp_rulecall_decomp_vjp decomp_ops_contain_unused_outputhas_decomp_rulehas_decomp_vjp)pir_chunk_id_guardpir_op_name_guardpir_op_role_guard)Block	Operation)signature_safe_contextmanager)
DebugPrintauto_recompute)core   )registerc               	   c   sJ    t  } z| st d d V  W | st d d S d S | s$t d w w )NTF)r   Z_is_all_prim_enabledZ_set_prim_all_enabled)Z
prim_state r   b/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/paddle/decomposition/decomp.py
prim_guard,   s   
r   c                 C   s:   t | tjr	| fS t | tjrt| S tdt|  dS )NzType z is not supported.)
isinstancer   ValuetypingSequencetuple	TypeErrortype)xsr   r   r   _build_tensor_tuple8   s
   r!   c                 C   s   t | t |ks
J g }t|D ]B\}}t| | tjrM| t v r5|t|  v r5|d d u s4J nt |dkrCt|d tjsEJ ||d  q|| q|S )Nr   r   )	len	enumerater   r   r   namer   keysappend)	orig_outsdecomp_outsopresidxvaluer   r   r   _analyse_decomp_results@   s    r-   c                    s   d}g }   D ]-}| }| r0| }t|tr*| |kr*dd |  D }|| q|d q  |kr?|fS | fdd  D  }t	|S )z
    For standard api of operator, its inputs should keep consistent with organization of its inputs and attrs.

    Args:
    op (Operator): The target operator.
    builtin.combinec                 S      g | ]}|  qS r   source).0itemr   r   r   
<listcomp>c       z1_prepare_python_api_arguments.<locals>.<listcomp>Nc                    s   g | ]}   | qS r   )attrsr2   xr)   r   r   r4   o   s    )
operandsr1   initializedget_defining_opr   r   r$   r&   Zget_attr_namesr   )r)   combine_op_nameinputsr8   inputprev_opZapi_argumentsr   r9   r   _prepare_python_api_argumentsR   s    rA   c              	   C   s   d}g }|   D ]X}| }| r`| }t|trG| |krG|  D ] }| j}d|v rEt	d| j d|   d   dS q%q|j}d|v r`t	d|j d|   d  dS qd S )Nr.   z;Decomp op does not support dynamic shape -1, but got shape z in inputs of op  Tz in op )
r:   r1   r;   r<   r   r   r$   shapewarningswarn)r)   r=   r>   r8   r?   r@   r3   rD   r   r   r   _check_prim_dynamics   s4   
rG   c                 C   sD  t |t |ksJ d|  dt | dt | t||D ]\}}|du s*|du r<| tjvr<td|  d| d| |du rAq|dur|durW|durW||v rW|||| < |j}|j}|j}	|j}
||kstJ d|  d| d	| d
|
vsJ d|  d|	|
ksJ d|  d|	 d|
 |du |du A rJ d dS dS )az  
    Check whether the replaced outputs are consistent with origin outputs.

    Args:
    op_name (str): The name of operator.
    orig_outs (tuple): The outputs of original operator.
    new_outs (tuple): The outputs of replaced operator.
    orig_vars (dict): Origin variables of original block.
    dst_vars (list): Corresponding replaced variables of Origin variables.
    zwhen replace origin op z[ with composite rule, num of origin outs should be equal to new outs, but len(orig_outs) = z and len(new_outs) = Nzop z2 should not contain any None value. original outs=z and its composite rule outs=z\ with composite rule, origin out dtype should be equal to new out dtype, but orig_out dtype=z and new_out dtype=rB   z1 with composite rule, composite out shape has -1.z\ with composite rule, origin out shape should be equal to new out shape, but orig_out shape=z and new_out shape=z"orig_out and new_out should match.)r"   zipr   Zops_contain_none
ValueErrorZdtyperD   )op_namer'   new_outs	orig_varsZdst_varsZorig_outZnew_outZ
orig_dtypeZ	new_dtypeZ
orig_shapeZ	new_shaper   r   r   _check_op_results   s`   




rM   rB   c                 C   s>   t jd |B }t|tsJ t|tsJ t | |||||S )a  
    Search nonbasic ops which have be registered composite rules and replace them with primitive ops.
    The operators in blacklist will be excluded from program when decomposed into primitives, and only the
    operators in whitelist will be decomposed. The priority of blacklist is higher than whitelist, it means
    an operator both in blacklist and whitelist will not be decomposed.

    The finally set that will be decomposed is:
        (block.ops & ops have decomposite rule & whitelist) - blacklist

    Note:
        All variables must be contained inside the given program.

    Args:
        program (Program): The program to be processed.
        src_vars (list[Value]): In program, once some operator is decomposed, its vars will be replaced by new ones. This argument means some vars will be used later and corresponding vars will be returned for later usage.
        blacklist (frozenset): The Operators that will be exclude when decomposed into primitives.
        whitelist (frozenset): Only the operators in whitelist will be decomposed into primitives.
        start_index (int): The start index of decomposed operator in global block, default 0;
        end_index (int): The end index of decomposed operator in global block, default -1 means all ops will be composed. start_index and end_index follow the principle of left closed and right open, that is [start_index, end_index).

    Returns:
        dst_vars (list): A list contains all vars which replace origin ones in src_vars.
    Zforward_blacklist)r   prim_configr   intZsinking_decomp)programZsrc_vars	blacklist	whiteliststart_indexZ	end_indexr   r   r   	decompose   s   rT   c                 C   sV   |   }|  }| | krdS t| D ]}||||s( dS qdS )zAcheck whether the inputs of two builtins.combine ops are the sameFT)r<   num_operandsrangeoperand_sourceis_same)Zinput1Zinput2Zbuiltin_combine_op1Zbuiltin_combine_op2ir   r   r   _check_combine_inputs   s   
rZ   fwd_opbwd_opc                 C   s2  | du s|   d |  krdS | }dd | D }t|t|ks)J dg }t|D ]\}}d|vr>|||  q/dd |  D }|  }dd |  D }	d	d
g}
|D ];}| r|   dkrd}|	D ]}t	||rxd} nqm|s~ dS q[|t
|v s|t
|v s|   |
v s dS q[dS )z3check whether the bwd_op is corresponding to fwd_opN_gradFc                 S   r/   r   r0   r7   r   r   r   r4     r5   z_check_op.<locals>.<listcomp>z1backward op names do not match backward op inputsc                 S   r/   r   r0   r7   r   r   r   r4     r5   c                 S   4   g | ]}|   r|    d kr|  qS r.   r1   r;   r<   r$   r7   r   r   r   r4         
zpd_op.full_int_arrayz
pd_op.fullr.   T)r$   get_input_namesr:   r"   r#   r&   resultsr;   r<   rZ   r   )r[   r\   bwd_op_input_names
bwd_inputsZfwd_op_related_inputs_outputsr+   r$   
fwd_inputsfwd_outputsfwd_vec_inputsZinserted_op_name_listoperandin_fwd	vec_inputr   r   r   	_check_op   sJ   
rl   c           	      C   s\   |   }g d}t|D ]\}}||v r+| | }||v r+|| }| }|  S qd S )N)out_gradZOut_gradZ	loss_grad)rb   r#   ri   r1   r<   )	r\   grad_var_to_varrd   Zout_grad_namer+   Z
input_namerm   outr[   r   r   r   _get_fwd_op0  s   rp   blockrn   returnc                 C   s  t j| j | }| }t|}t|}|p|}|rt	|r,	 W d   dS |dur6t 
| nt 
| t|}	|rLt|}
t||
|}nt||	 }t||| t|||d | t v rtt|D ]}|t|  vr|| ||  qmn| t v r|d |d  n|| | | |durd}| D ]
}| rd} nq|r| | d}|dfW  d   S t|dfW  d   S 1 sw   Y  dS )a  
    Decompose the forward op into a list of primitive ops.
    Args:
        block (Block): the block to which the forward op belongs.
        fwd_op (pir.Operation): the forward op to be decomposed.
        grad_var_to_var (dict): a dict obtained from distributed processing,
            which maps the backward grad variable to its corresponding forward variable.
        prev_op (pir.Operation): the previous op of fwd_op in the block. If prev_op is builtin.combine, insertion point when decomposing fwd_op will be set to prev_op.
    Returns:
        new_outputs (tuple(Value)): the new outputs after decomposing.
        has_decomposed: whether the forward op has been successfully decomposed.
    NNF)r'   rK   r   TF)r   r   program_guardrP   r$   rc   r   Zget_decomp_ruler	   rG   set_insertion_pointrA   r   r-   r!   rM   _upgrade_grad_var_to_varr   r%   rV   r"   replace_all_uses_with	remove_ophas_one_user   )rq   r[   rn   r@   rJ   r'   Z
decom_ruleZhas_sink_decomp_rulelowerZ
input_argsr(   rK   r+   rx   r3   r   r   r   _decomp_fwd_op=  sd   






>$r{   c                    sz   g }|   D ]4}|  r2|   dkr2|    fddtd  D }|| q|| g q|S )Nr.   c                    s   g | ]}  |qS r   )rW   )r2   rY   Zbuiltin_combine_opr   r   r4     s    z#_prepare_inputs.<locals>.<listcomp>r   )r:   r1   r;   r<   r$   rV   rU   r&   )r[   Z
new_inputsr?   Z	new_inputr   r|   r   _prepare_inputs  s   

r}   c                 C   sl  |   }|  }t|t|ksJ ddd | D }| }t|t|ks-J ddd |  D }dd |  D }g }g }	t|D ]I\}
}| rw|  dkrwd}|D ]}t	||rfd	} nq[|sv|
|g |	
||
  qG|t|v s|t|v s|
|g |	
||
  qGg }d
}|D ]}|d |	v r|
||  |d7 }q|
t g q|S )Nz7forward op output names do not match forward op outputsc                 S   r/   r   r0   r7   r   r   r   r4     r5   z)_prepare_grad_outputs.<locals>.<listcomp>z7backward op input names do not match backward op inputsc                 S   r/   r   r0   r7   r   r   r   r4     r5   c                 S   r^   r_   r`   r7   r   r   r   r4     ra   r.   FTr   r]   r   )rc   Zget_output_namesr"   r:   rb   r#   r;   r<   r$   rZ   r&   r   r   
fake_value)r[   r\   rg   Zfwd_output_namesre   Zbwd_input_namesrf   rh   grad_outputsZgrad_output_namesrY   	bwd_inputrj   rk   Znew_grad_outputsindexZfwd_output_namer   r   r   _prepare_grad_outputs  sV   

r   c                 C   sR   g }t |D ] \}}| rdgt| |  }n	dgt| |  }|| q|S )NFT)r#   r;   r"   r&   )rf   Zbwd_outputsZstop_gradientsr+   Z
bwd_outputZstop_gradientr   r   r   _prepare_stop_gradients  s   r   c                 C   s   | d usJ d|d ur&|d ur&t |D ]\}}|| v r%| || || < q|d urK|d urM|  D ]\}}t |D ]\}	}
||
rI||	 | |< q:q2d S d S d S )Nz"grad_var_to_var should not be None)r#   popitemsrX   )rn   
orig_grads	new_gradsr'   rK   r+   
grad_inputgrad_varvarrY   Zorin_varr   r   r   rv     s"   
rv   c                 C   s  t |}dd | D }t||}t|| }| j|}t| j}	t|||||}
t| j}||	 }|dkrO| jd 	 |	 krO| 
| jd  dS | jd 	 dkrd| jd d gg}
g }|
D ]}|d dur~|d  r~||d  qh|t  qht|t| ksJ d	t|| |d
 |}t|	|D ]}| | j| | |d7 }q|| | 
| t|dfS )z
    Decompose the backward op into a list of primitive ops.
    If forward op has composite vjp rules (including custom vjp), call call_vjp() to get a list of primitive operators in backward graph, then replace backward op.
    c                 S   s   g | ]}|gqS r   r   )r2   Z
fwd_outputr   r   r   r4     s    z(_decomp_bwd_with_vjp.<locals>.<listcomp>r   rB   rs   builtin.splitr   NzNresults of original backward op do not match results of decomposed backward opr   r   T)r}   rc   r   r   opsr   r"   r   Zcall_vjpr$   rx   ri   r1   r;   r&   r   r~   rv   rV   move_oprw   r   )rq   r[   r\   rn   fwd_inputs_fwd_outputs_Zgrad_outputs_Zstop_gradients_
bwd_op_idxbefore_num_opsnew_grad_inputsafter_num_opsZnum_appended_opsr*   r   
insert_idxrY   r   r   r   _decomp_bwd_with_vjp  sD   







r   rf   fwd_outputs_after_decomposec                    sD  du rt ddd | D }| }t fdd|D }tfdd|D }tfdd|D }	| j|}
t| j}t||	|}t| j}g }d	}t	|D ]\}}|
 rj|||  |d
7 }qV|t  qVt||d |
}t||D ]}| | j| | |d
7 }q|| | | d}t||fS )aV  
    Decompose the backward op into a list of primitive ops.
    If forward op has no composite vjp rules, and forward op has been decomposed to a list of primitive operators in forward graph previously,
    call grad() for the decomposed forward subgraph to get a list of primitive operators in backward graph, then replace backward op.
    Nz=To decompose backward op, please decompose forward op firstlyc                 S   r/   r   r0   r7   r   r   r   r4   G  r5   z+_decomp_bwd_without_vjp.<locals>.<listcomp>c                 3   s,    | ]}|t  v s|t v s|V  qd S N)r   )r2   r   )rf   r   r   r   	<genexpr>I  s    z*_decomp_bwd_without_vjp.<locals>.<genexpr>c                 3   s    | ]} | V  qd S r   r   )r2   Zgrad_outputrn   r   r   r   Q  s    
c                 3   s     | ]}|  r | V  qd S r   )r;   )r2   r   r   r   r   r   T  s    
r   r   r   T)RuntimeErrorr:   rc   r   r   r   r"   r   Zgradr#   r;   r&   r   r~   rv   rV   r   rw   rx   )rq   r\   rn   rf   r   re   Zgrad_inputsr   r   r   r   r   r   r   r*   Zinput_grads_idxr+   r   r   rY   Zhas_decomposedr   )rf   r   rn   r   _decomp_bwd_without_vjp4  sJ   





r   c           	      C   s   t ||}t||st|  d dS t|st|r dS t| |||\}}|sHdd | D }t| ||\}}|rHt	| ||||\}}||fS )a3  
    Decompose a backward op in pir program.
    Get the corresponding forward op according to grad_var_to_var firstly, then
    (1) try to decompose backward op by calling _decompose_bwd_with_vjp, if forward op has composite vjp rules (including custom vjp),
    _decompose_bwd_with_vjp will call call_vjp() to get a list of primitive operators in backward graph, then replace backward op successfully and return True;
    (2) when _decompose_bwd_with_vjp return False, means there is no composite vjp rules,
    try to decompose forward op firstly by calling _decomp_fwd_op firstly and get corresponding primitive operators in backward graph by calling _decompose_bwd_without_vjp secondly, then replace backward op successfully and return True;
    (3) if the backward op is still not decomposed by the above two steps, returns False.

    Args:
        block (Block): the block to which the backward op belongs.
        bwd_op (pir.Operation): the backward op to be decomposed.
        grad_var_to_var (dict): a dict obtained from distributed processing,
            which maps the backward grad variable to its corresponding forward variable.
    Return:
        new_input_grads (tuple(Value)): new results of backward op after decomposing.
        has_decomposed: whether the backward op has been successfully decomposed.
    zM can not be decomposed due to the mismatch between forward op and backward oprs   c                 S   r/   r   r0   r7   r   r   r   r4     r5   z"_decomp_bwd_op.<locals>.<listcomp>)
rp   rl   loggerdebugr$   rG   r   r:   r{   r   )	rq   r\   rn   r[   r   bwd_has_decomposedrf   Znew_fwd_outputsfwd_has_decomposedr   r   r   _decomp_bwd_op{  sN   

r   c                 C   sR   g }|   }|jD ]}| ds| dr&| |vr&||  q	|S )Nr]   Z_grad_)global_blockr   r$   endswithr&   )pir_programbwd_opsr   r)   r   r   r   _get_all_bwd_ops  s   
r   c                  C   sx   g } t  }t  }| | | | t d t d tjj	dd }tj
ddi dtjjj_| | | S )NTFLAGS_enable_pir_api)r   Z_is_fwd_prim_enabledZ_is_bwd_prim_enabledr&   _set_prim_forward_enabled_set_prim_backward_enabledpaddlebase	frameworkZ	get_flags	set_flags
global_var_use_pir_api_)stateZprev_fwd_prim_stateZprev_bwd_prim_stateZprev_pir_api_flagr   r   r   _set_prim_state  s    




r   c                 C   sX   t | dks
J dt| d  t| d  tjd| d i | d tjjj_	d S )N   zEstate should contain fwd_prim_state, bwd_prim_state and pir_api_stater   r   r      )
r"   r   r   r   r   r   r   r   r   r   )r   r   r   r   _reset_prim_state  s   r   c                 C   s  t  }| D ]\}}||  v r||  v rt| | dkr8t| | dkr8| | d }| | d }|||< qg }g }t| | dkrN|| | d  n3t| | dkrl| | d   dkrl|| | d  ntdt| | D ]}	|| | |	  qut| | dkr|| | d  n6t| | dkr| | d   dkr|| | d  n| | d  }
|
 dr|| | d  t|dksJ dtdt|D ]
}	|d |||	 < qq|S )zbtranslate grad_var_to_var (mapping VarDesc->VarDesc) to pir_grad_var_to_var (mapping Value->Value)r   r   r   zbuiltin.slicerB   _z#translate pir_grad_var_to_var error)	r   r   r%   r"   r&   r<   r$   rV   r   )param_mappingrn   pir_grad_var_to_varr   r   Znew_grad_varZnew_varZnew_grad_varsZnew_varsrY   Zlast_opr   r   r   _translate_gradvartovar_to_pir  sD   
r   c           	      C   s   t jj| 7 t| }g }|  j}|D ]!}| }| |v r6t|  ||\}}|s6||vr6|	| qW d   n1 sAw   Y  t
d|  dS )z2Traverse and decompose all backward OPs in programNz.Following backward ops can not be decomposed: )r   r   r   rt   r   r   r   r$   r   r&   r   r   )	r   r   r   Zundecomposed_bwd_opsr   r)   bwd_op_namer   r   r   r   r   _decomp_bwd_program  s(   


r   c                 C   s   t jj| X |  j}t| }ddg}g }d}|D ]<}| }| |vrM| |vrDt|  |||\}	}
|
sC||vrC|	| n	||vrM|	| | dkrU|nd}qW d   n1 sbw   Y  t
d|  dS )z1Traverse and decompose all forward OPs in programzpd_op.stackzpd_op.squeezeNr.   z-Following forward ops can not be decomposed: )r   r   r   rt   r   r   r   r$   r{   r&   r   r   )r   r   r   r   Zblack_fwd_opsZundecomposed_fwd_opsr@   r)   Zfwd_op_namer   r   r   r   r   _decomp_fwd_program0  s:   


r   c                 C   sF  t | g  tjd }|  }d}tjj|  |  j}|D ]}| }|	dd |v r/qd}t
|r
t s@t|r@d}|st| x t|jc t|jN t| | }d}	t|}
tt|D ]0}||  r||   }| dkrd}	t| | |
|  ||
|  || qjW d   n1 sw   Y  W d   n1 sw   Y  W d   n1 sw   Y  |	st||
|}t| || || || | dkr|}|dur
d}| D ]
}| rd} nq|r|| d}qW d   n	1 sw   Y  tj| dS )	z{
    Decompose all non-primitive ops into primitive ops in a pir program. It may contain forward ops and backward ops.
    Zbackward_blacklistN.rB   FTr   r.   )rT   r   rN   r   r   r   rt   r   r$   splitr
   Z_enable_prim_dynamic_shaperG   r   r   Zop_roler   Zchunk_idru   rc   r   rV   r"   ry   Z	first_useownerrM   rw   rx   r-   Z set_insertion_point_to_block_end)r   rQ   rq   Zpre_combine_opr   r)   r   Zskip_decompr'   Zis_next_splitr(   rY   Znext_oprK   rx   r3   r   r   r   decompose_dist_programS  s   






  



;r   c                 C   s0   t  }t||}t| | t| | t| |S )az  
    Decompose all PHI ops into prim ops in a pir program.
    Args:
        pir_program (Program): the program to be decomposed
        param_mapping (dict): a map of program variables to pir program values
        grad_var_to_var (dict): a dict obtained from distributed processing,
            which maps the backward grad variable to its corresponding forward variable.
    )r   r   r   r   r   )r   r   rn   r   r   r   r   r   decompose_pir_program  s   


r   c                 C   sT   g }|   jD ] }| dkr|| d  | dkr'|| d  q|S )Nz
pd_op.datar   zbuiltin.parameter)r   r   r$   r&   rc   r   rc   r)   r   r   r   "get_inputs_from_data_and_parameter  s   r   c                 C   s8   g }|   jD ]}| dkr||d  q|S )Nzpd_op.fetchr   )r   r   r$   r&   ri   r1   r   r   r   r   get_outputs_from_fetch_op  s   r   c                 C   s@   g }t  }| D ]\}}|||< q	| D ]	}|||  q|S r   )r   r   r&   )outputsr   rc   Zvar2grad_varkvoutputr   r   r   get_grad_var_for_list  s   
r   c                    s0    fdd}g }|D ]}| ||  q
|S )Nc                    0   t   jD ]\}}| |kr|  S qtdNzop not found in programr#   r   r   r   r)   r+   Zop_iterrP   r   r   getIdx  
   z'get_defining_op_indices.<locals>.getIdx)r&   r<   )rP   Zoutput_valuesr   rc   r   r   r   r   get_defining_op_indices  s
   r   c                    s:    fdd}g }   jD ]}||r||| q|S )Nc                    r   r   r   r   r   r   r   r     r   z#get_forward_op_idxs.<locals>.getIdx)r   r   r&   )rP   is_forward_op_funcr   rc   r)   r   r   r   get_forward_op_idxs  s   r   c           	      C   s   t d t d|  t| }t| }d}t|rtt| |}td}|tj	 |d ur@z	tt
| |}W n	   |d Y |dkrK|d | S |d }t| ||g ||\}}|S )NzStart Recompute Pir Program:zBefore Recompute: rB   zauto-recomputezNo Forward Ops Found!zSkip Auto Recompute!r   )r   r   r   r"   maxr   logging	getLoggersetLevelINFOr   infor   )	r   r   r>   r   Zfwd_op_end_idxr   Zbackward_op_start_idxrP   r   r   r   r   auto_recompute_pir_program  s:   


	r   )NNr   )NNNN)Gr   r   rE   r   r   Zpaddle.autogradr   Zpaddle.autograd.backward_utilsr   r   Zpaddle.base.corer   r   r   r	   r
   Zpaddle.base.frameworkr   r   r   Zpaddle.base.libpaddle.pirr   r   Zpaddle.base.wrapped_decoratorr   Zpaddle.decomposition.recomputer   r   Zpaddle.frameworkr    r   r   __name__r   r   r!   r-   rA   rG   rM   	frozensetrT   rZ   rl   rp   dictr   r{   r}   r   r   rv   r   listr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s   

!
;
'
2
Q9

;
G
H
.#J

