o
    + i&                     @   s   d dl Zd dlZd dlmZ d dlmZ d dlm	Z	m
Z
 ddddZejdejdejdiZdadd	 Zd
d ZG dd dZG dd dZdS )    N)_current_expected_place_)async_offload_with_offsetcreate_async_load   )gpuZnpuxpu      c                  C   sr   t d u r7t rd} nt rd} nt }z| } W n   d} Y | t v s5J dt  d|  d| a t S )Nr   r   unknownz&tensor fusion helper now only support z, but got device z	 instead.)__current_device_type__paddleZis_compiled_with_cudaZis_compiled_with_xpur   Zget_device_type	alignmentkeys)Zdevice_typeZcurrent_device r   i/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/paddle/optimizer/fusion_utils.pyget_current_device_type*   s   r   c                 C   sP   t | jt| j  }|tt   }|dkrdntt  | }|t| j  }|S Nr   )npprodshapealigndtyper   r   )tsize	remainingZaliZalign_r   r   r   	get_align>   s   r   c                   @   sL   e Zd ZdejfddZe dd Ze dd Z	e dd	 Z
dS )
FusionStorageNc                 C   s   t |ts	J dt |tsJ dt |ts|d u sJ d|| _|| _|| _i | _i | _i | _|| _d | _	d | _
d| _|   |   d S )Nzaccumulators must be a dictzmaster_weights must be a dictz*merged_model_params must be a dict or Noner   )
isinstancedictaccumulatorsmaster_weightsmerged_model_paramsaccumulators_metamaster_weights_metamerged_model_params_metar   bufferbuffer_ipc_metaoffsetbuild_buffermapping_tensor)selfr   r    r!   r   r   r   r   __init__K   s&   
zFusionStorage.__init__c                 C   s  d| _ | j D ]@\}}|| jvri | j|< | D ]-\}}|j| jks&J | t| }| j | j | |j|jd| j| |< |  j |7  _ qq| j	 D ]+\}}|j| jksZJ | t| }| j | j | |j|jd| j
|< |  j |7  _ qN| jd ur| j D ]+\}}|j| jksJ | t| }| j | j | |j|jd| j|< |  j |7  _ qtj| j f| jd| _| j   | _d S )Nr   )startendnamer   )r   )r'   r   itemsr"   r   _numelr   r.   r   r    r#   r!   r$   r   Zzerosr%   value
get_tensorZ_share_cudar&   )r*   kv	para_namevar_tmpZsrc_lenr   r   r   r(   d   sF   


zFusionStorage.build_bufferc                 C   s   | j  D ]\}}| D ]\}}| j| j| | |d |d d qq| j D ]\}}| j| j| |d |d d q)| j D ]\}}| j| j| |d |d d qBd S )Nr,   r-   )srcr,   r-   )r"   r/   mapping_tensor_implr   r#   r    r$   r!   )r*   r3   r4   r5   metar   r   r   r)      s&   zFusionStorage.mapping_tensorc                 C   s\   |j }|j}d|_|  t|| j|| | | ||_| j||	| d S )NT)
r   stop_gradientZflatten_r   Zassignr%   _slicer2   	_set_dimsZ_share_buffer_to)r*   r7   r,   r-   Ztensor_shaper:   r   r   r   r8      s   z!FusionStorage.mapping_tensor_impl)__name__
__module____qualname__r   float32r+   imperative_baseno_gradr(   r)   r8   r   r   r   r   r   J   s    

+
r   c                   @   s\   e Zd Zdd Ze dd Zdd Ze dd Zd	d
 Z	dd Z
e dd ZdS )FusionStorageHelperc                 C   sL   t  | _d | _d | _d | _d | _d | _d | _d | _g | _	| 
|||| d S )N)r   async_loaderr"   r#   r$   r&   r%   
cpu_bufferbuffer_lengthtasks
reset_metar*   r"   r#   r$   r&   r   r   r   r+      s   zFusionStorageHelper.__init__c                 C   s   t |ts	J d|| _t |tsJ d|| _t |ts%|d u s%J d|| _t |tr3t|dks7J d|| _t	tj
jj| j| _| j | _| j | _d S )Nz accumulators_meta must be a dictz"master_weights_meta must be a dictz/merged_model_params_meta must be a dict or None   z-buffer_ipc_meta must be a tuple with length 7)r   r   r"   r#   r$   tuplelenr&   r   Z	to_tensorbasecoreZDenseTensorZ_new_shared_cudar%   Z
pin_memoryrE   r0   rF   rI   r   r   r   rH      s2   
zFusionStorageHelper.reset_metac                 C   s   |  d| j d S r   )sync_partial_paramrF   )r*   r   r   r   
sync_param   s   zFusionStorageHelper.sync_paramc                 C   st   t |ts	J dt |tsJ d|dksJ d|| jks#J dt| j| j|||| | jd}| j| d S )Nzstart must be an integerzend must be an integerr   zstart must be non-negativez9end must be less than or equal to the total buffer length)Z
src_tensorZ
dst_tensorZ
src_offsetZ
dst_offsetZoffload_sizerD   )	r   intrF   r   r%   rE   rD   rG   append)r*   r,   r-   taskr   r   r   rO      s   z&FusionStorageHelper.sync_partial_paramc                 C   sZ   t | jdkr	d S | jd}t | jdkr'| jd}|  t | jdks|  d S )Nr   )rL   rG   popZ	cuda_waitZcpu_wait)r*   Z	last_taskrS   r   r   r   wait_all  s   zFusionStorageHelper.wait_allc                 C   s   di i}| j  D ]\}}| D ]\}}| |}|||j< qq	| j D ]\}}| |}||d |< q&| jrRi |d< | j D ]\}}| |}||d |< qB|S )Nr    r!   )r"   r/   restore_tensor_from_metar.   r#   r$   )r*   
state_dictr3   r4   r5   tensor_metar6   r   r   r   rX     s   


zFusionStorageHelper.state_dictc                 C   sF   |d }|d }|d }|d }| j ||}| | ||_|S )Nr   r.   r,   r-   )rE   r;   r2   r<   r.   )r*   rY   r   r.   r,   r-   Ztensorr   r   r   rW     s   z,FusionStorageHelper.restore_tensor_from_metaN)r=   r>   r?   r+   rA   rB   rH   rP   rO   rV   rX   rW   r   r   r   r   rC      s    

	rC   )numpyr   r   Zpaddle.autogradZautogradrA   Zpaddle.frameworkr   Z#paddle.incubate.tensor.manipulationr   r   r   Zfloat16Zbfloat16r@   r   r   r   r   r   rC   r   r   r   r   <module>   s$   k