o
    81 iT                     @   sd   d dl Z d dlZd dlmZ d dlZd dlm  mZ d dl	m
Z
mZ dd Zdede
fdd	ZdS )
    N)OrderedDict)
GPT2Config
GPTJConfigc              	      s  dd t fdd|  D } dd t fdd|  D } | d}t|d	d
}t|j| | }t|ddd||j	d  f| d< t|drT| d | d< n*| d}t|ddd||j	d  f| d< | d}t|d||j	d  f| d< dd t fdd|  D } dd t fdd|  D } t
|jD ]?}| d| d}| d| d}	| d| d}
tj||	|
gdd| d| d< | d| d | d| d qdd  t  fdd|  D } | S ) Nc                 S      t dd| S )Nz^transformer.h.transformer.layers.resubkey r   b/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/flash_attn/models/gptj.pykey_mapping_layers      z4remap_state_dict_hf_gptj.<locals>.key_mapping_layersc                 3        | ]\}} ||fV  qd S Nr   .0kv)r   r   r   	<genexpr>       z+remap_state_dict_hf_gptj.<locals>.<genexpr>c                 S   r   )Nz^transformer.wte.z'transformer.embeddings.word_embeddings.r   r
   r   r   r   key_mapping_emb   r   z1remap_state_dict_hf_gptj.<locals>.key_mapping_embc                 3   r   r   r   r   )r   r   r   r      r   z-transformer.embeddings.word_embeddings.weightpad_vocab_size_multiple   r   tie_word_embeddingszlm_head.weightzlm_head.biasc                 S   r   )Nz^transformer.layers.(\d+).ln_1.ztransformer.layers.\1.norm1.r   r
   r   r   r   key_mapping_ln+   r   z0remap_state_dict_hf_gptj.<locals>.key_mapping_lnc                 3   r   r   r   r   )r   r   r   r   .   r   c                 S   s    t dd| } t dd| } | S )Nz$^transformer.layers.(\d+).mlp.fc_in.ztransformer.layers.\1.mlp.fc1.z%^transformer.layers.(\d+).mlp.fc_out.ztransformer.layers.\1.mlp.fc2.r   r
   r   r   r   key_mapping_mlp1   s   z1remap_state_dict_hf_gptj.<locals>.key_mapping_mlpc                 3   r   r   r   r   )r   r   r   r   :   r   r   z.attn.q_proj.weightz.attn.k_proj.weightz.attn.v_proj.weight)dimz.mixer.Wqkv.weightz
.attn.biasz.attn.masked_biasc                 S   r   )Nz(^transformer.layers.(\d+).attn.out_proj.z%transformer.layers.\1.mixer.out_proj.r   r
   r   r   r   key_mapping_attnF   s
   z2remap_state_dict_hf_gptj.<locals>.key_mapping_attnc                 3   r   r   r   r   )r   r   r   r   M   r   )r   itemspopgetattrmathceil
vocab_sizeFpadshaperangen_layertorchcat)Z
state_dictconfigZword_embeddingsr   r%   Zoutput_embeddingsZoutput_embeddings_biaslZWqZWkZWvr   )r   r   r   r   r   r   remap_state_dict_hf_gptj   sD   



	 r/   gptj_configreturnc                 C   s   | j | j }tdi d| jddd| j d| jd| jd| jd| jd	| jd
| jd| j	d| j
d| jd| jd| jddddddd| j| ddddddddddS )Nr%   Zn_positionsr   n_embdr*   n_headn_inneractivation_functionresid_pdrop
embd_pdrop
attn_pdroplayer_norm_epsiloninitializer_rangebos_token_ideos_token_idZprenormTZparallel_blockZparallel_block_tied_normZrotary_emb_fractionZrotary_emb_interleavedr   FZqkv_proj_biasZout_proj_biasZlm_head_biasr   )r2   r3   r   r%   r*   r4   r5   r6   r7   r8   r9   r:   r;   r<   Z
rotary_dim)r0   Zheaddimr   r   r   gptj_config_to_gpt2_configR   s`   	

r=   )r#   r   collectionsr   r+   Ztorch.nn.functionalnnZ
functionalr&   Ztransformersr   r   r/   r=   r   r   r   r   <module>   s   F