o
    * i'                     @   s  d dl Z d dlmZ d dlmZ d dlmZ d dlZd dlm	Z	 d dl
mZmZ d dlmZ d dlmZmZmZmZ d d	lmZ d
dlmZ e	jjje	jjje	jjje	jjje	jjjgZ ee j!Z"G dd deZ#dOddZ$dd Z%dd Z&dd Z'dd Z(G dd dZ)dd Z*dd Z+dd Z,d d! Z-d"d# Z.d$d% Z/d&d' Z0dPd)d*Z1dPd+d,Z2d-d. Z3dOd/d0Z4d1d2 Z5d3d4 Z6d5d6 Z7dQd8d9Z8d:d; Z9	(dPd<d=Z:d>d? Z;dOd@dAZ<dBdC Z=dDdE Z>		FdRdGdHZ?	FdSdIdJZ@dKdL ZAG dMdN dNZBdS )T    NOrderedDict)Enum)reduce)core)	ParameterProgram)OperatorDistAttr)
get_loggeris_backward_opis_optimize_op6naive_set_dist_op_attr_for_program_by_mesh_and_mapping)_current_expected_place_   )OpRolec                   @   s   e Zd ZdZdZdZdS )AutoParallelStreamTypedefaultZauto_parallel_mpZauto_parallel_shardingN)__name__
__module____qualname__ZCALC_STREAMZ	MP_STREAMZSHARDING_STREAM r   r   p/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/paddle/distributed/passes/pass_utils.pyr   3   s    r   c                 C   s<   |d u rt  }nt|t sJ | D ]
}||vrd||< q|S )NT)r   
isinstance)list_objZordered_dictobjr   r   r   list_to_ordered_dict9   s   r   c                 C   s\   t  }g }|  jD ]!}|jD ]}||vr|| || q|jD ]}|| q#q
|S N)setglobal_blockopsinput_arg_namesappendaddoutput_arg_names)programvisited_vars
input_varsopin_var_nameout_var_namer   r   r   get_inputs_of_programF   s   



r*   c                 C   s.   t  }|  jD ]}t|j| qt| S r   )r   r   r   r   r#   listkeys)r$   output_varsr'   r   r   r   get_outputs_of_programT   s   r.   c                 C   sZ  t |  j}|dk r||7 }|dkr||k sJ |dk r!||7 }|dkr)||ks-J |||k s3J |  } t|d |d dD ]}|  j|dd qAt|d ddD ]}|  j|dd qU|   t }|  jD ]}|jD ]}|	| qr|j
D ]}|	| q}qmg }	|  jD ]}
|
|vr|	|
 q|	D ]}
|  j|
dd q|   | S )Nr      Fsync)lenr   r   clonerange
_remove_op_sync_with_cppr   r    r"   r#   varsr!   Z_remove_var)r$   Zstart_op_idxZ
end_op_idxop_numidxZ
valid_varsr'   r(   r)   Zvars_to_removevarr   r   r   prune_program[   s<   


r<   c                    s|  |sJ dt |  j  dksJ d fdd|D }|d dkr)dg|}|d  kr4|  tt |d D ]}|| ||d  k sLJ dq<g }tt |d D ]}t| || ||d  }|| qWt |}d	d |D }d
d |D }dd t|D }|d |d< td|D ] }	||	 D ]}
tt|	D ]}|
|| v rd|| |
<  nqqqdd |D }|||fS )ay  
    Split the program by op_indices.

    For examples, a program has 100 ops, and op_indices = [25, 60].
    Then the program is split into 3 parts, containing 25, 35 and 40
    ops respectively.

    The return values are a tuple with 3 elements: the split program
    list, the input var names of each split program, and the output
    var names of each split program.
    zop_indices cannot be emptyr   zprogram cannot be emptyc                    s    g | ]}|d kr
|n|  qS )r   r   ).0r:   r9   r   r   
<listcomp>   s     z!split_program.<locals>.<listcomp>r0   r/   z"op_indices must be strictly sortedc                 S   s   g | ]}t |qS r   )r*   r=   pr   r   r   r?          c                 S   s   g | ]}t t|qS r   )r   r.   r@   r   r   r   r?      s    c                 S      g | ]}t  qS r   r   )r=   _r   r   r   r?          Tc                 S   s   g | ]}t | qS r   )r+   r,   )r=   itemr   r   r   r?      s    )r3   r   r   r!   r5   r<   reversed)r$   Z
op_indicesr:   Zsplit_programsZ	new_splitZ	num_splitr&   r-   Zvalid_output_varsir(   jr   r>   r   split_program~   sD   


rJ   c                   @   s<   e Zd ZdZdd Zedd Zdd Zdd	 Zd
d Z	dS )OpInOutInfozc
    Record unused buffer input_vars of op and other var_names except unused buffer input_vars
    c                 C   s   d| _ t | _t | _d S )NF)	_is_buildr   _no_need_buffer_slots_other_arg_names_setselfr   r   r   __init__   s   zOpInOutInfo.__init__c                 C   s   | j S r   )rL   rO   r   r   r   is_build   s   zOpInOutInfo.is_buildc                 C   sd   i }|j D ]	}||||< qi }|jD ]	}||||< qi }|jD ]	}||||< q#|||fS r   )input_namesinputoutput_namesoutputZ
attr_namesattr)rP   r'   inputsZ
input_nameoutputsoutput_nameattrs	attr_namer   r   r   _get_op_attrs   s   



zOpInOutInfo._get_op_attrsc                 C   s   |  |\}}}t|j|||| _t| jdkrd S |jD ]}|| jvr3||D ]}| j	| q*q|j
D ]}|| jvrL||D ]}| j	| qCq7d| _d S )Nr   T)r]   r   Zinfer_no_need_buffer_slotstyperM   r3   rS   rT   rN   r"   rU   rV   rL   )rP   r'   rX   rY   r[   Z	slot_namein_nameZout_namer   r   r   
build_info   s"   





zOpInOutInfo.build_infoc                 C   s   t | jdkp|| jv S Nr   )r3   rM   rN   )rP   arg_namer   r   r   	is_needed   s   zOpInOutInfo.is_neededN)
r   r   r   __doc__rQ   propertyrR   r]   r`   rc   r   r   r   r   rK      s    
rK   c                 C   s   | | }|d uo|j S r   )_find_var_recursivepersistable)var_nameblockr;   r   r   r   var_can_be_deleted   s   
rj   c                 C   sn   t  }| jD ].}|jD ](}|jdv rqt }|| |j|j D ]}t||r2|	|r2|
| q!qq|S )z^
    Get all vars in the program that are non-persistable and not in op's no_need_buffer.
    )c_sync_comm_streamZconditional_blockdataZnopwhile)r   blocksr   r^   rK   r`   r    r#   rj   rc   r"   )r$   required_varsri   r'   op_inforb   r   r   r   _get_required_vars_of_program   s    




rq   c                 C   s.   t jjdd rt| |||S t| |||S )a#  
    Set `skip_gc_vars` for every job in jobs.

    A whole_program is split up into sub_programs according to the schedule mode,
    thus a sub_program's vars might be used as the op's input of the later sub_program,
    and these vars cannot be gc after executing current sub_program.
    ZFLAGS_enable_pir_api)paddlebase	frameworkZ	get_flags_set_skip_gc_vars_in_pir_set_skip_gc_vars_in_old_ir)num_micro_batches	job_typessub_programsjobsr   r   r   set_skip_gc_vars  s   r{   c              	   C   s   | dksJ dt t||}i }| D ]
\}}t|||< qdd t| D }t|}	tt|	D ]H}
||
 }| }|| }| }||| @ }t	
d| d| d|  |dv rnt|d	ksnJ d
| d| d|| ||  |O  < q3|S )Nr/   "num_micro_batches needs to be >= 1c                 S   rC   r   r   r=   rH   r   r   r   r?   %  rE   z/_set_skip_gc_vars_in_old_ir.<locals>.<listcomp>Skip gc vars for -(): Zbackward
backward_wr   BWhen enabling pipeline parallelism strategy, the skip_gc_vars for % subprogram must be empty, but it is .)dictzipitemsrq   r5   r3   rG   r^   micro_batch_idloggerdebugr{   )rw   rx   ry   rz   type_to_programtype_to_required_varsr^   r$   suffixed_required_varsnum_jobsjob_idjobjob_typero   r   skip_gc_varsr   r   r   rv     s.   
rv   c              	   C   s  | dksJ dt t||}i }t|}| D ]d\}}t }	t }
|  D ]}|	| q*| j	D ]4}|
 D ]}|jrQ|	|j |jrQ|
|j q=| D ]}|jrj|	|j |jrj|
|j qVq7||v rv|	|| 8 }	|	|
8 }	|	||< qdd t| D }t|}tt|D ]H}|| }| }|| }	| }|	|| @ }td| d| d|  |dv rt|d	ksJ d
| d| d|| ||  |	O  < q|S )Nr/   r|   c                 S   rC   r   r}   r~   r   r   r   r?   Y  rE   z,_set_skip_gc_vars_in_pir.<locals>.<listcomp>r   r   r   )Zsend_backwardr   r   r   r   r   )r   r   r   Zget_no_need_buffer_valuesr   r   r   kwargsr"   r   Zoperands_sourceZhas_namenamerg   resultsr5   r3   rG   r^   r   r   r   r{   )rw   rx   ry   rz   r   r   Zno_need_buffer_varsr   r$   ro   Zpersistable_varskeyr'   r;   r   r   r   r   r   r   r   r   r   ru   <  sZ   


ru   c                 C   sp   i }|j |d< |j|d< |j|d< |j|d< |j|d< td| |j|j|j|j	|j
|j|j|j|jd
| d S )N	trainableoptimize_attrregularizerdo_model_average	need_clip)
ri   r^   r   shapedtype	lod_level
error_clipstop_gradientis_databelong_to_optimizerr   )r   r   r   r   r   r   r^   r   r   r   r   r   r   r   r   )	dst_blocksrc_varZcopied_kwargsr   r   r   _create_paramp  s(   





r   c                 C   s6   | j |j|j|j|j|j|j|j|j|j	|j
d
 d S )N)
r^   r   r   r   r   rg   r   r   r   r   )
create_varr^   r   r   r   r   rg   r   r   r   r   )r   r   r   r   r   _create_inter  s   
r   Fc              	   C   s~   |s|  |}n| |}|jtv r,t|dd}|j|j|j||j|j|j	|j
d d S t|tr8t|| d S t|| d S )Nrg   F)r^   r   rg   r   r   r   r   )r;   Z_var_recursiver^   __not_shape_var_type__getattrr   r   r   r   r   r   r   r   r   r   )	src_blockr   Zsrc_varnameforce_creater   Zpersistr   r   r   _create_var  s"   




r   c                 C   s~   |j  }||j  |jD ]}| |s|r#| |r#t| ||| q|jD ]}| |s5|r<| |r<t| ||| q'd S r   )descZ	append_opZ	copy_fromr    Zhas_varrf   r   r#   )r   r   Zsrc_opr   Zdst_op_descZinput_varnameZoutput_varnamer   r   r   _create_program  s&   




r   c                 C   s   | j D ]J}|jD ]D}| dkr0|dd |dd | d }|d|  |d q| d	krL|dd |dd |d
 |d qqdS )a  
    This function is used to replace the function '_insert_sync_for_fthenb_1f1b'.
    The finally target of this function is as follows:
        1. no need to insert the 'c_sync_calc' and 'c_sync_calc' operators
        2. 'send_v2' operator uses 'dist_attr.execution_stream' to set stream of its own.
        3. 'recv_v2' operator uses 'dist_attr.execution_stream' to set stream of its own.
    zpd_op.send_v2dynamic_shapeFuse_calc_streamTring_idZsend_stream_r   zpd_op.recv_v2Zrecv_streamN)rn   r   r   Zset_bool_attrr[   Zset_execution_streamZset_scheduling_priority)r$   ri   r'   r   r   r   r   _pir_overlap_send_recv  s    



r   c              
   C   s  | j D ]B}d}d}tt|jD ]\}}t|r|} nqtt|jD ]\}}|jdv r4|dd |jdkr|dd |d}|d	}|jd }	|	|	}
|j
|| d
d|
gid|
gid|id}|d7 }d}d}t|ttjkr|| }tj}n	|| d }tj}|j
|dd|
gid|
gi||dd}|r||}|r|j|	}t }|jj|_|jj|_||	| ||	| ||| ||| t|ttjkr|dd |d7 }q%|  d}d}t|jD ]\}}|jdkrt|r|} nq|du rqtt|jD ]/\}}||kr n%|jdkr@|dr@|jd }	|	|	}
|j|| dd |d8 }q|  qdS )z
    This implementation refers to lots of Paddle/python/paddle/base/optimizer.py.
    The difference between this function with 'PipelineOptimizer' is that
    'send_v2' op and 'recv_v2' op have been inserted in program by 'reshard'.
    r   N)send_v2recv_v2r   Fr   r   op_roler   Zc_sync_calc_streamXOutindexr^   rX   rY   r[   r/   rk   )r   r   Zpipeline_flag r   r1   )rn   	enumerater+   r   r   r^   Z	_set_attrrW   r    r;   _insert_op_without_syncintr   ZBackwardZOptimizeget_dist_op_for_program	dist_attrZget_input_dist_attrr	   process_meshchunk_idset_input_dist_attrset_output_dist_attrset_op_dist_attr_for_programForwardr7   r   has_attrr#   r6   )r$   dist_contextri   offsetZfirst_optimize_indexr   r'   r   r   rh   r;   Zsync_calc_opZinsert_indexZnew_op_roleZsync_comm_opdist_opZout_dist_attrZop_dist_attrZbackward_recv_indexr   r   r   _insert_sync_for_fthenb_1f1b  s   












r   c                 C   s   |D ]}t | || qd S r   )r   )r   r   r   r'   r   r   r   _add_ops_into_blockB  s   r   c                 C   s
   | j dv S )N)fetchZfetch_v2)r^   )r'   r   r   r   _is_fetch_opG  s   
r   c                 C   s<  |   j}t|}t|dkrd S d}d }d }||k r|| jdkr,|| j}|d7 }q|d }||k rJ|| jdkrJ|d7 }||k rJ|| jdks;||krd|dksVJ dt||D ]}||| _q[n*|| j}|dksy||ksyJ d| dt||D ]}||| _|d }q~||k s|dkr|dkrtdd S d S )Nr   r0   r/   zfirst_left_op_role can't be -1.z%The left and right operators of (idx[z]) have different op_role.z#all the ops don't have the op_role.)r   r   r3   r   r5   
ValueError)main_programall_opsZops_lenZiopZfirst_left_op_roleZfirst_right_op_roleZ	right_idxr:   r   r   r   forward_complete_op_roleK  sF   







r   Tc           
         s    fdd}|| d }|| d }||kr|S || d }||kr%|S  |    dv r] |  d}| }|D ]"}	|	jrM|	jjdkrM|	jjdk  S |	dr\|	jdkr\|	j  S q:dS )	Nc                    sN   | dk s
| t  krdS  |  }r|jd u rdS |jjS |dr%|jS dS )Nr   r0   r   )r3   r   r   r   )op_idxr'   r   	with_distr   r   get_chunk_idt  s   

z$infer_chunk_id.<locals>.get_chunk_idr/   r   )zbuiltin.combinezbuiltin.splitr   r0   r   )r   resultall_used_opsr   r   r   )
r   r   r   r   Zprev_op_chunk_idZnext_op_chunk_idZnext_next_op_chunk_idZ
result_varr   used_opr   r   r   infer_chunk_ids  s$   
r   c                    s   t   fdd  | S )Nc                    sv   |   }|D ]2}|v r dS | |jr#|jjdkr#|jj  S | D ]} |}|dkr7|    S q'qdS )Nr0   )r   r"   r   r   r   )r;   r   r   Z
output_varr   dfsvisitedr   r   r     s   
z&find_var_used_op_chunk_id.<locals>.dfsr}   )r;   r   r   r   find_var_used_op_chunk_id  s   r   c                 C   s  t |  t|  |  j}|  }|  }|  }| j}| j}| j}| }	| }
t }t|tjj	rDtj	tj
 j}tjj }|| d}tt|d ddD ]v}|| jdkr|| jdkrod}n|| jdkryd}n	|| jdkrd}|dkr||   ||   q[|dkr||   t||  D ]K}|| |}| du rd	| d
||   d
| }tj||  tj|| || |	|| }||_|j |_ || |!| q||   q[t||  D ]}|| |}|| |}| du s| du r||  dks0||  dkr9|| |j}nK|| |}|" }d }|D ]}| dkrS|}qH|d ura|# d }n#d	| d
||   d
| }tj||  tj|| || | du r|	|| }||_|j |_ || |!| | du r|
|| }||_|j |_ || |!| q ||   ||   q[|||fS )Noptr/   r0   Zbwdr   Zfwdr   FZvar_rD   z
pd_op.datazbuiltin.parameterzbuiltin.shadow_outputrZ   )$r   r   r   r   r4   _get_devicer   rr   rt   Z	CUDAPlacedistributedZParallelEnvZdev_idrs   Z	libpaddleZPlaceZ	set_placer5   r3   r   erasenum_resultsr   Z	use_emptyr   pirset_insertion_point_after_C_opsZset_persistable_valueZ	add_kwargr^   Z
place_attrrg   replace_all_uses_withr   r[   )r   Zenable_send_recv_overlapZcomplete_opsZfwd_programZbwd_programZopt_programZfwd_opsZbwd_opsZopt_opsZ	opt_blockZ	bwd_blockZplaceZ	cur_placeregionr   r:   Zresult_in_optr   Znew_result_var_in_optZresult_in_bwdZresult_valueZused_opsZshadow_output_op_usedr   Znew_result_var_in_bwdr   r   r   -_split_program_into_forward_backward_optimize  s   







r   c                    sx      fdd}dd }  dkrdgS | r!dgd S d	r)dgS  D ]}||s8dg  S q-dgS )
Nc                     sV   g d}  dsdS dk rdS tdD ]} |   | d|  kr( dS qdS )N)pd_op.full_int_arraypd_op.reshaper   r   zpd_op.matmulr   r   z
pd_op.add_grad_merge_addF      T)r   r5   r   )Zops_patternrH   r   Zcur_opr   r   r   is_reshape_matmul_pattern5  s   

z<_pir_get_backward_op_type.<locals>.is_reshape_matmul_patternc                 S   s"   |   D ]
}|dr dS qdS )Nr   TF)r   r   )valuer'   r   r   r   used_by_grad_merge_addJ  s
   
z9_pir_get_backward_op_type.<locals>.used_by_grad_merge_addr   Z
backward_br   r   r   )r   r   r   )r   r   r   r   rV   r   r   r   _pir_get_backward_op_type0  s   


r   c                 C   s6   |d ur| | }n|}|   }| j}|||fS r   )r4   r   r   )r$   r   r   Zprogram_nameZcloned_programr   r   r   r   _create_program_and_opsb  s   

r   c                 C   s  t  }t| jD ]\}}||}|dg }g }|dkr=| D ]\}}	t ||< || d}
t||
|	 ||
 q n)| D ]$\}}	t	|	dkre|| j
|jd}
|
|j t||
|	 ||
 qA|D ]!}|dd }d }|D ]}
|
|r|
} nqu|rt||| qhq|S )Nr   r   )
parent_idxr   )r   r   rn   popr   r   ri   r   r!   r3   Z_create_blockr   Z_set_forward_block_idxZforward_block_idxrT   rf   r   )r$   Zsplit_methodr   Zibr   Ztype_to_opsZ	fetch_opsZ
dst_blocksr^   r   r   Zfetch_opr_   Zfetch_blockr   r   r   _build_vpp_sub_programsn  sH   


r   c                 C   sB   | j jsd| j _|j j}| j j|vr|| j j ||j _dS dS )z
    Add the extra event dependency of the two operators.
    This function mainly aims for the cross-programs in pipeline parallelism,
    especial for the 'send_v2' 'recv_v2' etc.
    TN)r   Zforce_record_eventZevents_to_waitZevent_to_recordr!   )Zrecorder_opZ	waiter_opZwaiter_wait_listr   r   r   _add_event_dependency  s   r   /c	              	   C   s   |  |d }	||	}
|d u r$| j|d  d|	jdd}|||
 | j|d  d|	jd}|||
 | j|dd|i||d	|||d
d}t||
j|
j||d |S )Nr   @reshape.outFr   r   rg   z@reshape.xshape)r   r   Zreshape2r   )r   ZXShape)r   r   op_namescoper   )r   Zref_mappingctxr   )	r;    get_tensor_dist_attr_for_programr   r    set_tensor_dist_attr_for_programr   r   r   Zdims_mapping)ri   r   xr   r   r   r   outr   var_xZx_dist_attrZx_shapeZ
reshape_opr   r   r   _insert_reshape_op  s:   
r  c                 C   s  | j }|| }|d}|rJ d| d|d}|r%J d| d|d}|d}	|d}
|d	}|d
}|d}| |d }| |
d }| |d }|j}|j}|j}t|t|ksxJ dt| dt| dt|dkr|dd |dd ksJ d| d| d|d |d  gt|dd  }|d |d  g|dd  }||j	}t
| |d ||||||d}t
| |d |
|||||d}| j|d  d|jdd}|||| ||}||j|| ||j|| ||j|| | j|d d||dd|idd||dd}||| t
| |d |jg||||||d 	 | j|d d|
|	dd|idd||dd}|||| | j|dd! d S )"Ntrans_xmatmul_grad(id=J) with tran_x == True is not supported for splitting matmul_grad to matmultrans_yJ) with tran_y == True is not supported for splitting matmul_grad to matmulr   YzOut@GRADzX@GRADzY@GRADr   r   BThe rank of x must be equal to that of out_grad, but got x rank =  and out_grad rank = r   r   PThe first two dimensions of x must be equal to that of out_grad, but got x_dims: and out_grad_dims:r/   )r   r   r   r   Fr      Z	matmul_v2)r   r
  r   T)r  r  r   r   r      )r   r   r  r   r1   )r   rW   rT   rV   r;   r   r3   r+   Zget_op_dist_attr_for_programr   r  r   r   r   r   r   r   r   r   r   r6   )ri   matmul_grad_idr   r   r   matmul_grad_opZtran_xZtran_yr  yout_gradx_grady_gradr   r  Zvar_out_gradZ
var_y_gradx_dimsout_grad_dimsy_grad_dims
new_x_dimsnew_out_grad_dimsr   new_xnew_out_grad
new_y_gradZmatmul_grad_dist_attrZ	matmul_opr   r   r   split_matmul_grad_to_matmul  s   









"
	


r  c                 C   s  | j }|| }|drJ d| d|dr!J d| d|d}|d}|d}|d}|d}|j}	|j}
|j}|j}t|
t|ks]J d	t|
 d
t| dt|
dkrz|
dd |dd kszJ d|
 d| d|
d |
d  gt|
dd  }|d |d  g|dd  }|j}t	j
| t	j||}| }|	|_|d| |	|d _|d d| t	j
| t	j||}| }|	|_|d| |	|d _|d d| t	j
| t	j||dd}| }|	|_|d| t	j
| t	j||}| }|	|_|d| |	|d _|d d| t	j
| t	j||dd}|	| _| d| || || |  d S )Nr  r  r  r  r	  r   r/   r   r  r  r   r  r  r   TF)r   r   Zoperand_sourcer   r   r   r3   r+   r   rr   r   r   r   ZreshapeZget_defining_opZset_int_attrmatmulr   r   )ri   r  r   r  r  r  r  r  r  r   r  r  r  r  r  r   r  Zx_reshape_opr  Zout_grad_reshape_opr  Znew_matmul_opZnew_y_grad_reshapeZy_grad_reshape_opZ
new_x_gradr   r   r    _pir_split_matmul_grad_to_matmule  s   






"
	


r!  c                   @   sd   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd ZdS )PipelineMemoryEstimatorc                 C   s   i | _ g | _tt| _d S r   )type_to_skip_gc_varsprogram_typeslogging	getLoggerr   r   rO   r   r   r   rQ     s   z PipelineMemoryEstimator.__init__c           
      C   s   || _ i }| D ]\}}t|||< i | j|< q	t }t|D ]4}|| }||@ }	|dv r?t|	dks?J d| d|	 dtt|	dgt|	 }	|	| j|< ||O }q dS )z
        Get the skip_gc_vars for each type of program.

        The order of program_types is the same as the order in the pipeline's micro batch.
        For example, in 1F1B pipeline, the order of program_types is ['forward', 'backward'].
        r   r   r   r   r   r0   N)	r$  r   rq   r#  r   rG   r3   r   r   )
rP   r   r$  r   r^   r$   r   r   ro   r   r   r   r   set_program_skip_gc_vars  s"   

z0PipelineMemoryEstimator.set_program_skip_gc_varsc                 C   s   || j vrtd| ddd |jD }|jdd d | ||}| j | D ]}||vr/q(|| d | j | |< q(i }| j | }| j|d	krZ| j| j|d	  }	| j |	 }| ||||\}
}|
|fS )
Nz9Please set the skip_gc_vars before estimating memory for z	 program.c                 S   s&   g | ]}|j D ]	}|j |gqqS r   )r   r   id)r=   ri   r'   r   r   r   r?     s
    z;PipelineMemoryEstimator.estimate_memory.<locals>.<listcomp>c                 S   s   | d S ra   r   )r  r   r   r   <lambda>      z9PipelineMemoryEstimator.estimate_memory.<locals>.<lambda>)r   sizer/   )r#  r   rn   sort_get_program_var_infor$  r   _estimate_max_memory)rP   r$   program_typer   ordered_opsvar_inforh   r%   r   Zprev_program_type	mem_usage
max_memoryr   r   r   estimate_memory  s8   



z'PipelineMemoryEstimator.estimate_memoryc                 C   s  d}d}t  }|D ]}|| q	|D ]\}	}
|
jdv rqg }|
j|
j D ]v}||vr,q%|| d  d8  < ||vr| ||s|| | jd| d|| d  d|| d  d	| d
||| d   d|
j d|
j d|
j  ||| d 7 }t||}| 	||r| ||s||vr|
| t||}q%t |D ]I}| jd| d|| d  d|| d  d	| d
||| d   d|
j d|
j d|
j  ||| d 8 }||v r||  || d 8  < qq|D ]}||vr||| 8 }q||fS )Nr   Zcreate_py_readerZcreate_double_buffer_readerreadcountr/   zadd z, var size: r+  z,count: z,mem_usage: z -> z
,op type: z, input_arg_names: z, output_arg_names: zremove )r   r"   r^   r    r#   _is_persistabler   r   max_is_last_usedr!   )rP   r0  r1  r   r%   r2  r3  Zhas_used_varsrh   rD   r'   Zlast_use_varsr   r   r   r.    s   






z,PipelineMemoryEstimator._estimate_max_memoryc                 C   s4   | j | }tdd | D }|dk rtd|S )aN  
        For a given type of program, calculate the increase memory usage.

        The increase memory usage is the memory usage of the variables that are setting to skip_gc_vars.
        Persistable variables are not included in the increase memory usage because they are allocated when
        running the startup program.
        c                 S   s   g | ]\}}|qS r   r   )r=   rD   Zmemr   r   r   r?   P  rB   z@PipelineMemoryEstimator._get_increase_memory.<locals>.<listcomp>r   zONo size info for skip_gc_vars, please run estimate_memory to get var size info.)r#  sumr   r   )rP   r/  r   Zincrease_memoryr   r   r   _get_increase_memoryG  s   
z,PipelineMemoryEstimator._get_increase_memoryc           	      C   sv   i }|D ]4\}}|j dv rqt }|| |j|j D ]}||s$q||}|r7| j|||||jv d qq|S )Nr5  )is_input)r^   rK   r`   r    r#   rc   r   _update_var_info)	rP   r0  r   r1  rD   r'   rp   rh   r   r   r   r   r-  W  s(   



z-PipelineMemoryEstimator._get_program_var_infoc                 C   s|   |r| |n||}||vr2||dddd |jr%d|| d< d S | |}||| d< d S || d  d7  < d S )	Nr   r/   F)r+  r7  rg   Trg   r+  r7  )Zget_serial_inputZget_serial_output
setdefaultrg   _get_var_size)rP   rh   r   r1  r=  r;   Zvar_sizer   r   r   r>  t  s   
z(PipelineMemoryEstimator._update_var_infoc                 C   s   dd |j D }| ||jS )Nc                 S   s   g | ]
}|d kr
dn|qS )r0   r/   r   )r=   dimr   r   r   r?     s    z9PipelineMemoryEstimator._get_var_size.<locals>.<listcomp>)r   _calculate_bytesr   )rP   r;   	var_shaper   r   r   r@    s   z%PipelineMemoryEstimator._get_var_sizec                 C   sf   t jdt jdt jdt jdt jdt jdt jdt jdt j	di	}|r't
dd |dnd}||d}|| S )Nr   r  r   r/   c                 S   s   | | S r   r   )r  r  r   r   r   r)    r*  z:PipelineMemoryEstimator._calculate_bytes.<locals>.<lambda>r   )rr   Zfloat64Zint64Zfloat32Zint32Zfloat16Zbfloat16Zint16Zint8Zuint8r   get)rP   rC  r   Zdtype_to_sizeZtotal_countZdtype_factorr   r   r   rB    s   z(PipelineMemoryEstimator._calculate_bytesc                 C   s   ||vrdS || d dkS )NFr7  r   r   rP   rh   r1  r   r   r   r:    s   z%PipelineMemoryEstimator._is_last_usedc                 C   s   ||vrdS || d S )NFrg   r   rE  r   r   r   r8    s   z'PipelineMemoryEstimator._is_persistableN)r   r   r   rQ   r'  r4  r.  r<  r-  r>  r@  rB  r:  r8  r   r   r   r   r"    s    $Br"  r   )F)T)Nr   )r   )Cr%  collectionsr   enumr   	functoolsr   rr   Zpaddle.baser   Zpaddle.base.frameworkr   r   Z6paddle.distributed.auto_parallel.static.dist_attributer	   Z-paddle.distributed.auto_parallel.static.utilsr
   r   r   r   Zpaddle.frameworkr   r   Zauto_parallel.static.utilsr   ZVarDescZVarTypeZREADERZSTEP_SCOPESZDENSE_TENSOR_ARRAYZFEED_MINIBATCHZ
FETCH_LISTr   INFOr   r   r   r*   r.   r<   rJ   rK   rj   rq   r{   rv   ru   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r!  r"  r   r   r   r   <module>   sp   

#26#4


o
(%
 
2(
1
 Z