o
    81 ib                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZ d dlZd dlm	  m
Z d dlmZ d dlmZmZmZ dd Zded	efd
dZdS )    N)Path)OrderedDict)	rearrange)
GPT2Config
AutoConfigPretrainedConfigc           	   	      s  dd t fdd|  D } dd t fdd|  D } | d}t|d	d
}t|jd | | }t|ddd||jd  f| d< t|drV| d | d< n"| d}t|jd | | }t|ddd||jd  f| d< dd t fdd|  D } t	|j
D ]#}| d| d}| d| d}tj||gdd| d| d< qdd t fdd|  D } dd  t  fdd|  D } t	|j
D ]}| d| dd  q| S )Nc                 S      t dd| S )Nz^model.ztransformer.resubkey r   f/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/flash_attn/models/baichuan.pykey_mapping_layers   s   z8remap_state_dict_hf_baichuan.<locals>.key_mapping_layersc                 3        | ]\}} ||fV  qd S Nr   .0kv)r   r   r   	<genexpr>       z/remap_state_dict_hf_baichuan.<locals>.<genexpr>c                 S   r   )Nz^transformer.embed_tokens.z'transformer.embeddings.word_embeddings.r	   r   r   r   r   key_mapping_emb   
   z5remap_state_dict_hf_baichuan.<locals>.key_mapping_embc                 3   r   r   r   r   )r   r   r   r      r   z-transformer.embeddings.word_embeddings.weightpad_vocab_size_multiple   r   tie_word_embeddingszlm_head.weightc                 S   s.   t dd| } t dd| } t dd| } | S )Nz^transformer.norm.ztransformer.ln_f.z*^transformer.layers.(\d+).input_layernorm.ztransformer.layers.\1.norm1.z3^transformer.layers.(\d+).post_attention_layernorm.ztransformer.layers.\1.norm2.r	   r   r   r   r   key_mapping_ln<   s   z4remap_state_dict_hf_baichuan.<locals>.key_mapping_lnc                 3   r   r   r   r   )r   r   r   r   J   r   ztransformer.layers.z.mlp.gate_proj.weightz.mlp.up_proj.weight)dimz.mlp.fc1.weightc                 S   r   )Nz(^transformer.layers.(\d+).mlp.down_proj.ztransformer.layers.\1.mlp.fc2.r	   r   r   r   r   key_mapping_mlpU   r   z5remap_state_dict_hf_baichuan.<locals>.key_mapping_mlpc                 3   r   r   r   r   )r    r   r   r   \   r   c                 S   s    t dd| } t dd| } | S )Nz+^transformer.layers.(\d+).self_attn.W_pack.z!transformer.layers.\1.mixer.Wqkv.z+^transformer.layers.(\d+).self_attn.o_proj.z%transformer.layers.\1.mixer.out_proj.r	   r   r   r   r   key_mapping_attn_   s   z6remap_state_dict_hf_baichuan.<locals>.key_mapping_attnc                 3   r   r   r   r   )r!   r   r   r   l   r   z.self_attn.rotary_emb.inv_freq)r   itemspopgetattrmathceilshapeFpadrangen_layertorchcat)	Z
state_dictconfigZword_embeddingsr   
vocab_sizeZoutput_embeddingslZw1Zw3r   )r!   r   r   r   r    r   remap_state_dict_hf_baichuan   sP   



r1   baichuan_configreturnc                 C   s  | j dk }td#i d| jddd| j d| jd| jd| jd	d
ddddddd| jd| jd| jd| j	d| j
ddd|rGdn$dddd| d| ddd| jdkddd dd!dd"dS ddd| d| ddd| jdkddd dd!dd"dS )$Ni  r/   Zn_positionsr   Zn_embdr+   Zn_headZn_innerZactivation_functionZswigluZresid_pdropg        Z
embd_pdropZ
attn_pdropZlayer_norm_epsiloninitializer_rangebos_token_ideos_token_idpad_token_idZrms_normTZrotary_emb_fractiong      ?Zrotary_emb_interleavedFZ	use_alibiZuse_flash_attnr   Z	norm_headip Zqkv_proj_biasZout_proj_biasZmlp_fc1_biasZmlp_fc2_biasr   )Zhidden_sizer   r/   Znum_hidden_layersZnum_attention_headsZintermediate_sizeZrms_norm_epsr4   r5   r6   r7   )r2   Z
use_rotaryr   r   r   baichuan_config_to_gpt2_configs   s   
	


r8   )r%   jsonr
   pathlibr   collectionsr   r,   Ztorch.nn.functionalnnZ
functionalr(   Zeinopsr   Ztransformersr   r   r   r1   r8   r   r   r   r   <module>   s   b