o
    Ó0 i¿Ú  ã                   @  sL   d dl mZ d dlmZ ddlmZmZmZmZ G dd„ dƒZ	ddd„Z
dS )é    )Úannotations)ÚSequenceé   )Ú
MODEL_ARCHÚMODEL_TENSORÚMODEL_TENSORSÚTENSOR_NAMESc                   @  s”  e Zd ZU ejdejdejdejdejdej	dej
dejdejdejd	i
Zd
ed< i ejd“ejd“ejd“ejd“ejd“ejd“ejd“ejd“ejd“ejd“ejd“ejd“ejd“ejd“ejd“ejd“ej d“i ej!d“ej"d“ej#d“ej$d “ej%d!“ej&d"“ej'd#“ej(d$“ej)d%“ej*d&“ej+d'“ej
d(“ej,d)“ej-d*“ej.d+“ej/d,“ej0d-“¥i ej1d.“ej2d/“ej3d0“ej4d1“ej5d2“ej6d3“ej7d4“ej8d5“ej9d6“ej:d7“ej;d8“ej<d9“ej=d:“ej>d;“ej?d<“ej@d=“ejAd>“¥i ejBd?“ejCd@“ejDdA“ejEdB“ejFdC“ejGdD“ejHdE“ejIdF“ejJdG“ejKdH“ejLdI“ejMdJ“ejNdK“ejOdL“ejPdM“ejQdN“ejRdO“¥i ejSdP“ejTdQ“ejUdR“ejVdS“ejWdT“ejXdU“ejYdV“ejZdW“ej[dX“ej\dY“ej]dZ“ej^d[“ej_d\“ej`d]“ejad^“ejbd_“ejcd`“¥i ejdda“ejedb“ejfdc“ejgdd“ejhde“ejidf“ejjdg“ejkdh“ejldi“ejmdj“ejndk“ejodl“ejpdm“ejqdn“ejrdo“ejsdp“ejtdq“¥i ejudr“ejvds“ejwdt“ejxdu“ejydv“ejzdw“ej{dx“ej|dy“ej}dz“ej~d{“ejd|“ej€d}“ejd~“ej‚d“ejƒd€“ej„d“ej…d‚“¥i ej†dƒ“ej‡d„“ejˆd…“ej‰dƒ“ejŠd†“ej‹d‡“ejŒdˆ“ejd‰“ejŽdŠ“ejd‹“ejdŒ“ej‘d“ej’dŽ“ej“d“ej”d“ej•d‘“ej–d’“¥i ej—d““ej˜d”“ej™d•“ejšd–“ej›d—“ejœd˜“ejd™“ejždš“ejŸd›“ej dœ“ej¡d“ej¢dž“ej£dŸ“ej¤d “ej¥d¡“ej¦d¢“ej§d£“¥i ej¨d¤“ej©d¥“ejªd¦“ej«d§“ej¬d¨“ej­d©“ej®dª“ej¯d«“ej°d¬“ej±d­“ej²d®“ej³d¯“ej´d°“ejµd±“ej¶d“ej·d²“ej¸d³“¥ej¹d´ejºdµej»d¶ej¼d·ej½d¸ej¾d¹ej¿dejÀdºejÁd»ejÂd¼ejÃd½ejÄd¾i¥ZÅd
ed¿< eÆjÇejdÀejÈdÁiiZÉdÂedÃ< dÄedÅ< dâdÊdË„ZÊdãdädÒdÓ„ZËdãdådÕdÖ„ZÌdãdædØdÙ„ZÍdçdÚdÛ„ZÎdèdÝdÞ„ZÏdédßdà„ZÐdáS )êÚTensorNameMap)zgpt_neox.embed_inztransformer.wteztransformer.word_embeddingsZword_embeddingszmodel.embed_tokensZtok_embeddingszembeddings.word_embeddingsz(language_model.embedding.word_embeddingsZwteztransformer.embd.wtezmodel.tok_embeddingszmodel.embeddingzbackbone.embeddingzbackbone.embeddingsztransformer.in_out_embedzembedding.word_embeddingsztransformer.token_embeddingsZsharedzrwkv.embeddingszmodel.embeddingszmodel.word_embeddingsz!language_model.model.embed_tokensÚencoder)z embeddings.token_type_embeddings)	Zword_embeddings_layernormzembeddings.LayerNormZemb_lnútransformer.normúrwkv.blocks.0.pre_lnr   zmodel.pre_lnzmodel.layers.0.pre_normzbackbone.norm)ztransformer.wpezembeddings.position_embeddingsZwpe)	Z	embed_outÚlm_headÚoutputZword_embeddings_for_headzlm_head.linearZoutput_layerÚheadzhead.outr   )zgpt_neox.final_layer_normztransformer.ln_fú
model.normZnormztransformer.norm_fZln_fz&language_model.encoder.final_layernormzmodel.final_layernormz
lm_head.lnzmodel.norm_fzbackbone.norm_fztransformer.rms_normzencoder.final_layernormr   r   zrwkv.ln_outzmodel.ln_outzbackbone.final_layer_normr   )z
rope.freqszrotary_pos_emb.inv_freq© )zbackbone.embedz#dict[MODEL_TENSOR, tuple[str, ...]]Úmappings_cfg)z%gpt_neox.layers.{bid}.input_layernormztransformer.h.{bid}.ln_1ztransformer.blocks.{bid}.norm_1z#transformer.h.{bid}.input_layernormzh.{bid}.input_layernormztransformer.h.{bid}.ln_mlpú"model.layers.{bid}.input_layernormzlayers.{bid}.attention_normz3language_model.encoder.layers.{bid}.input_layernormúmodel.layers.{bid}.ln1zh.{bid}.ln_1ztransformer.h.{bid}.lnzmodel.layers.layers.{bid}.normz!model.layers.{bid}.attention_normzmodel.layers.{bid}.normzbackbone.layers.{bid}.normz(transformer.decoder_layer.{bid}.rms_normz.transformer.blocks.{bid}.norm_attn_norm.norm_1z$encoder.layers.{bid}.input_layernormz"transformer.layers.{bid}.attn_normzrwkv.blocks.{bid}.ln1r   r   z(transformer_encoder.{bid}.attention_norm)ztransformer.h.{bid}.ln_attnz encoder.layer.{bid}.layer_norm_1zrwkv.blocks.{bid}.ln2úmodel.layers.{bid}.ln2)z/gpt_neox.layers.{bid}.attention.query_key_valueztransformer.h.{bid}.attn.c_attnz"transformer.blocks.{bid}.attn.Wqkvz1transformer.blocks.{bid}.norm_attn_norm.attn.Wqkvz2transformer.h.{bid}.self_attention.query_key_valuez&h.{bid}.self_attention.query_key_valuezBlanguage_model.encoder.layers.{bid}.self_attention.query_key_valuez,model.layers.{bid}.self_attn.query_key_valuezh.{bid}.attn.c_attnztransformer.h.{bid}.mixer.Wqkvzencoder.layers.{bid}.attn.Wqkvzencoder.layers.{bid}.mixer.Wqkvz%model.layers.{bid}.self_attn.qkv_projz3encoder.layers.{bid}.self_attention.query_key_valuez&transformer.layers.{bid}.attn.qkv_projztransformer_encoder.{bid}.qkv)ú#model.layers.{bid}.self_attn.q_projz+model.layers.{bid}.self_attn.q_proj_no_permzlayers.{bid}.attention.wqz(encoder.layer.{bid}.attention.self.queryz'transformer.layer.{bid}.attention.q_linztransformer.h.{bid}.attn.q_projz*model.layers.layers.{bid}.self_attn.q_projzmodel.layers.{bid}.attention.wqz:transformer.decoder_layer.{bid}.multi_head_attention.queryz)transformer.h.{bid}.attn.attention.q_projr   )ú#model.layers.{bid}.self_attn.k_projz+model.layers.{bid}.self_attn.k_proj_no_permzlayers.{bid}.attention.wkz&encoder.layer.{bid}.attention.self.keyz'transformer.layer.{bid}.attention.k_linztransformer.h.{bid}.attn.k_projztransformer.h.{bid}.attn.kz*model.layers.layers.{bid}.self_attn.k_projzmodel.layers.{bid}.attention.wkz8transformer.decoder_layer.{bid}.multi_head_attention.keyz)transformer.h.{bid}.attn.attention.k_projr   )ú#model.layers.{bid}.self_attn.v_projzlayers.{bid}.attention.wvz(encoder.layer.{bid}.attention.self.valuez'transformer.layer.{bid}.attention.v_linztransformer.h.{bid}.attn.v_projztransformer.h.{bid}.attn.vz*model.layers.layers.{bid}.self_attn.v_projzmodel.layers.{bid}.attention.wvz:transformer.decoder_layer.{bid}.multi_head_attention.valuez)transformer.h.{bid}.attn.attention.v_projr   )z%gpt_neox.layers.{bid}.attention.denseztransformer.h.{bid}.attn.c_projz&transformer.blocks.{bid}.attn.out_projz(transformer.h.{bid}.self_attention.densezh.{bid}.self_attention.denseú#model.layers.{bid}.self_attn.o_projz(model.layers.{bid}.self_attn.linear_attnzlayers.{bid}.attention.woz*encoder.layer.{bid}.attention.output.densez)transformer.layer.{bid}.attention.out_linz!transformer.h.{bid}.attn.out_projz8language_model.encoder.layers.{bid}.self_attention.densez"model.layers.{bid}.self_attn.densezh.{bid}.attn.c_projz"transformer.h.{bid}.mixer.out_projz*model.layers.layers.{bid}.self_attn.o_projzmodel.layers.{bid}.attention.woz"encoder.layers.{bid}.attn.out_projz#encoder.layers.{bid}.mixer.out_projz;transformer.decoder_layer.{bid}.multi_head_attention.linearz5transformer.blocks.{bid}.norm_attn_norm.attn.out_projz)encoder.layers.{bid}.self_attention.densez&transformer.layers.{bid}.attn.out_projz+transformer.h.{bid}.attn.attention.out_projr   ztransformer_encoder.{bid}.wo)z.encoder.layer.{bid}.attention.output.LayerNormz%transformer.layer.{bid}.sa_layer_normzencoder.layers.{bid}.norm1z*transformer.decoder_layer.{bid}.rms_norm_1z.transformer.blocks.{bid}.norm_attn_norm.norm_2)ú+model.layers.{bid}.post_attention_layernormz+model.layers.{bid}.post_self_attn_layernorm)z0model.layers.{bid}.self_attn.rotary_emb.inv_freqz1layers.{bid}.attention.inner_attention.rope.freqsz7model.layers.layers.{bid}.self_attn.rotary_emb.inv_freqz,transformer.h.{bid}.attn.rotary_emb.inv_freq)z.gpt_neox.layers.{bid}.post_attention_layernormztransformer.h.{bid}.ln_2z h.{bid}.post_attention_layernormztransformer.blocks.{bid}.norm_2r   zlayers.{bid}.ffn_normz<language_model.encoder.layers.{bid}.post_attention_layernormr   zh.{bid}.ln_2zmodel.layers.{bid}.ffn_normz*transformer.decoder_layer.{bid}.rms_norm_2z-encoder.layers.{bid}.post_attention_layernormz!transformer.layers.{bid}.ffn_normr   z"transformer_encoder.{bid}.ffn_norm)z,model.layers.{bid}.pre_feedforward_layernorm)z-model.layers.{bid}.post_feedforward_layernormz%model.layers.{bid}.post_mlp_layernorm)zlayers.{bid}.feed_forward.gatez(model.layers.{bid}.block_sparse_moe.gatezmodel.layers.{bid}.mlp.gatez&transformer.decoder_layer.{bid}.routerz)transformer.blocks.{bid}.ffn.router.layerz0model.layers.{bid}.block_sparse_moe.router.layerz&model.layers.{bid}.feed_forward.routerz%encoder.layers.{bid}.mlp.router.layer)z)model.layers.{bid}.mlp.shared_expert_gate)z.model.layers.{bid}.mlp.gate.e_score_correction)z'gpt_neox.layers.{bid}.mlp.dense_h_to_4hztransformer.h.{bid}.mlp.c_fcz$transformer.blocks.{bid}.ffn.up_projz%transformer.h.{bid}.mlp.dense_h_to_4hzh.{bid}.mlp.dense_h_to_4hzmodel.layers.{bid}.mlp.up_projzlayers.{bid}.feed_forward.w3z&encoder.layer.{bid}.intermediate.densez transformer.layer.{bid}.ffn.lin1ztransformer.h.{bid}.mlp.fc_inz transformer.h.{bid}.mlp.linear_3z5language_model.encoder.layers.{bid}.mlp.dense_h_to_4hz$model.layers.{bid}.mlp.dense_h_to_4hztransformer.h.{bid}.mlp.w1zh.{bid}.mlp.c_fcztransformer.h.{bid}.mlp.fc1zmodel.layers.{bid}.mlp.fc1z#model.layers.{bid}.mlp.gate_up_projz%model.layers.layers.{bid}.mlp.up_projz"model.layers.{bid}.feed_forward.w3zencoder.layers.{bid}.mlp.fc11zencoder.layers.{bid}.mlp.fc1zmodel.layers.{bid}.mlp.c_fcz&encoder.layer.{bid}.mlp.gated_layers_vz$encoder.layer.{bid}.mlp.gated_layersz&encoder.layer.{bid}.mlp.up_gated_layerz"model.layers.{bid}.residual_mlp.w3z&encoder.layers.{bid}.mlp.dense_h_to_4hztransformer.h.{bid}.mlp.c_fc_1z'model.layers.{bid}.feed_forward.up_projz!transformer_encoder.{bid}.ffn.w12)z$layers.{bid}.feed_forward.experts.w3z,transformer.decoder_layer.{bid}.moe.linear_vz+transformer.blocks.{bid}.ffn.experts.mlp.v1z&model.layers.{bid}.mlp.experts.up_projz.model.layers.{bid}.block_sparse_moe.experts.w3z/model.layers.{bid}.feed_forward.experts.up_projz'encoder.layers.{bid}.mlp.experts.mlp.w1)z,model.layers.{bid}.mlp.shared_expert.up_projz-model.layers.{bid}.mlp.shared_experts.up_projz5model.layers.{bid}.feed_forward.shared_expert.up_proj)z transformer.blocks.{bid}.ffn.act)z model.layers.{bid}.mlp.gate_projzlayers.{bid}.feed_forward.w1ztransformer.h.{bid}.mlp.w2ztransformer.h.{bid}.mlp.c_fc2z'model.layers.layers.{bid}.mlp.gate_projz"model.layers.{bid}.feed_forward.w1zencoder.layers.{bid}.mlp.fc12z&encoder.layer.{bid}.mlp.gated_layers_wz transformer.h.{bid}.mlp.linear_1z"model.layers.{bid}.residual_mlp.w1ztransformer.h.{bid}.mlp.c_fc_0z)model.layers.{bid}.feed_forward.gate_proj)z$layers.{bid}.feed_forward.experts.w1z*transformer.decoder_layer.{bid}.moe.linearz+transformer.blocks.{bid}.ffn.experts.mlp.w1z(model.layers.{bid}.mlp.experts.gate_projz.model.layers.{bid}.block_sparse_moe.experts.w1z1model.layers.{bid}.feed_forward.experts.gate_proj)z.model.layers.{bid}.mlp.shared_expert.gate_projz/model.layers.{bid}.mlp.shared_experts.gate_projz7model.layers.{bid}.feed_forward.shared_expert.gate_proj)z'gpt_neox.layers.{bid}.mlp.dense_4h_to_hztransformer.h.{bid}.mlp.c_projz&transformer.blocks.{bid}.ffn.down_projz%transformer.h.{bid}.mlp.dense_4h_to_hzh.{bid}.mlp.dense_4h_to_hz model.layers.{bid}.mlp.down_projzlayers.{bid}.feed_forward.w2z encoder.layer.{bid}.output.densez transformer.layer.{bid}.ffn.lin2ztransformer.h.{bid}.mlp.fc_outz5language_model.encoder.layers.{bid}.mlp.dense_4h_to_hz$model.layers.{bid}.mlp.dense_4h_to_hzh.{bid}.mlp.c_projztransformer.h.{bid}.mlp.fc2zmodel.layers.{bid}.mlp.fc2z'model.layers.layers.{bid}.mlp.down_projz"model.layers.{bid}.feed_forward.w2zencoder.layers.{bid}.mlp.fc2zmodel.layers.{bid}.mlp.c_projzencoder.layer.{bid}.mlp.woz#transformer.layers.{bid}.ffn.proj_2z"model.layers.{bid}.residual_mlp.w2z"encoder.layer.{bid}.mlp.down_layerz&encoder.layers.{bid}.mlp.dense_4h_to_hzmodel.layers.h.{bid}.mlp.c_projz)model.layers.{bid}.feed_forward.down_projz transformer_encoder.{bid}.ffn.w3)z$layers.{bid}.feed_forward.experts.w2z,transformer.decoder_layer.{bid}.moe.linear_1z+transformer.blocks.{bid}.ffn.experts.mlp.w2z(model.layers.{bid}.mlp.experts.down_projz1model.layers.{bid}.block_sparse_moe.output_linearz.model.layers.{bid}.block_sparse_moe.experts.w2z1model.layers.{bid}.feed_forward.experts.down_projz'encoder.layers.{bid}.mlp.experts.mlp.w2)z.model.layers.{bid}.mlp.shared_expert.down_projz/model.layers.{bid}.mlp.shared_experts.down_projz7model.layers.{bid}.feed_forward.shared_expert.down_projz+model.layers.{bid}.shared_mlp.output_linear)z>language_model.encoder.layers.{bid}.self_attention.q_layernormz(model.layers.{bid}.self_attn.q_layernormz#model.layers.{bid}.self_attn.q_normz"transformer.blocks.{bid}.attn.q_lnz/encoder.layer.{bid}.attention.self.layer_norm_qz$transformer.layers.{bid}.attn.q_norm)z>language_model.encoder.layers.{bid}.self_attention.k_layernormz(model.layers.{bid}.self_attn.k_layernormz#model.layers.{bid}.self_attn.k_normz"transformer.blocks.{bid}.attn.k_lnz/encoder.layer.{bid}.attention.self.layer_norm_kz$transformer.layers.{bid}.attn.k_norm)zFlanguage_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq)z$encoder.layer.{bid}.output.LayerNormz)transformer.layer.{bid}.output_layer_normzencoder.layers.{bid}.norm2z*transformer.decoder_layer.{bid}.rms_norm_3z!encoder.layer.{bid}.mlp.layernormz encoder.layer.{bid}.layer_norm_2)zmodel.layers.{bid}.in_projz#backbone.layers.{bid}.mixer.in_proj)zmodel.layers.{bid}.conv1dz"backbone.layers.{bid}.mixer.conv1d)zmodel.layers.{bid}.x_projz"backbone.layers.{bid}.mixer.x_proj)zmodel.layers.{bid}.dt_projz#backbone.layers.{bid}.mixer.dt_proj)zmodel.layers.{bid}.A_logz!backbone.layers.{bid}.mixer.A_log)zmodel.layers.{bid}.Dzbackbone.layers.{bid}.mixer.D)zmodel.layers.{bid}.out_projz$backbone.layers.{bid}.mixer.out_proj)zmodel.layers.{bid}.attention.w0)z'rwkv.blocks.{bid}.attention.time_maa_w1z(model.layers.{bid}.self_attn.time_maa_w1zmodel.layers.{bid}.attention.w1)z'rwkv.blocks.{bid}.attention.time_maa_w2z(model.layers.{bid}.self_attn.time_maa_w2zmodel.layers.{bid}.attention.w2)zmodel.layers.{bid}.attention.a0)zmodel.layers.{bid}.attention.a1)zmodel.layers.{bid}.attention.a2)zmodel.layers.{bid}.attention.v0)zmodel.layers.{bid}.attention.v1)zmodel.layers.{bid}.attention.v2)zmodel.layers.{bid}.attention.g1)zmodel.layers.{bid}.attention.g2)z model.layers.{bid}.attention.k_k)z model.layers.{bid}.attention.k_a)z model.layers.{bid}.attention.r_k)z&rwkv.blocks.{bid}.attention.time_maa_xz'model.layers.{bid}.self_attn.time_maa_x)z&rwkv.blocks.{bid}.attention.time_maa_kz'model.layers.{bid}.self_attn.time_maa_k)z&rwkv.blocks.{bid}.attention.time_maa_vz'model.layers.{bid}.self_attn.time_maa_v)z&rwkv.blocks.{bid}.attention.time_maa_rz'model.layers.{bid}.self_attn.time_maa_r)z&rwkv.blocks.{bid}.attention.time_maa_gz'model.layers.{bid}.self_attn.time_maa_g)z&rwkv.blocks.{bid}.attention.time_maa_wz'model.layers.{bid}.self_attn.time_maa_w)z&rwkv.blocks.{bid}.attention.time_faaaa)z&rwkv.blocks.{bid}.attention.time_decayz'model.layers.{bid}.self_attn.time_decay)z)rwkv.blocks.{bid}.attention.time_decay_w1z*model.layers.{bid}.self_attn.time_decay_w1)z)rwkv.blocks.{bid}.attention.time_decay_w2z*model.layers.{bid}.self_attn.time_decay_w2)zrwkv.blocks.{bid}.attention.keyr   z model.layers.{bid}.attention.keyz#model.layers.{bid}.attention.k_proj)z!rwkv.blocks.{bid}.attention.valuer   z"model.layers.{bid}.attention.valuez#model.layers.{bid}.attention.v_proj)z&rwkv.blocks.{bid}.attention.receptancer   z'model.layers.{bid}.attention.receptancez#model.layers.{bid}.attention.r_proj)z rwkv.blocks.{bid}.attention.gatez!model.layers.{bid}.self_attn.gate)z rwkv.blocks.{bid}.attention.ln_xz!model.layers.{bid}.attention.ln_x)z"rwkv.blocks.{bid}.attention.outputr   z#model.layers.{bid}.attention.outputz#model.layers.{bid}.attention.o_proj)z)rwkv.blocks.{bid}.feed_forward.time_maa_kz#model.layers.{bid}.feed_forward.x_k)z)rwkv.blocks.{bid}.feed_forward.time_maa_r)z"rwkv.blocks.{bid}.feed_forward.keyz#model.layers.{bid}.feed_forward.key)z)rwkv.blocks.{bid}.feed_forward.receptance)z$rwkv.blocks.{bid}.feed_forward.valuez%model.layers.{bid}.feed_forward.value)z%model.layers.{bid}.self_attn.q_a_proj)z%model.layers.{bid}.self_attn.q_b_proj)z/model.layers.{bid}.self_attn.kv_a_proj_with_mqa)z&model.layers.{bid}.self_attn.kv_b_proj)z%model.layers.{bid}.self_attn.k_b_proj)z%model.layers.{bid}.self_attn.v_b_proj)z*model.layers.{bid}.self_attn.q_a_layernorm)z+model.layers.{bid}.self_attn.kv_a_layernorm)z*model.layers.{bid}.self_attn.inner_attn_ln)z$model.layers.{bid}.mlp.ffn_layernorm)z&decoder.block.{bid}.layer.0.layer_norm)z+decoder.block.{bid}.layer.0.SelfAttention.q)z+decoder.block.{bid}.layer.0.SelfAttention.k)z+decoder.block.{bid}.layer.0.SelfAttention.v)z+decoder.block.{bid}.layer.0.SelfAttention.o)zAdecoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias)z&decoder.block.{bid}.layer.1.layer_norm)z-decoder.block.{bid}.layer.1.EncDecAttention.q)z-decoder.block.{bid}.layer.1.EncDecAttention.k)z-decoder.block.{bid}.layer.1.EncDecAttention.v)z-decoder.block.{bid}.layer.1.EncDecAttention.o)zCdecoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias)z&decoder.block.{bid}.layer.2.layer_norm)z/decoder.block.{bid}.layer.2.DenseReluDense.wi_0)z-decoder.block.{bid}.layer.2.DenseReluDense.wiz/decoder.block.{bid}.layer.2.DenseReluDense.wi_1)z-decoder.block.{bid}.layer.2.DenseReluDense.wo)zdecoder.final_layer_norm)z&encoder.block.{bid}.layer.0.layer_norm)z+encoder.block.{bid}.layer.0.SelfAttention.q)z+encoder.block.{bid}.layer.0.SelfAttention.k)z+encoder.block.{bid}.layer.0.SelfAttention.v)z+encoder.block.{bid}.layer.0.SelfAttention.o)zAencoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias)z&encoder.block.{bid}.layer.1.layer_norm)z/encoder.block.{bid}.layer.1.DenseReluDense.wi_0)z-encoder.block.{bid}.layer.1.DenseReluDense.wiz/encoder.block.{bid}.layer.1.DenseReluDense.wi_1)z-encoder.block.{bid}.layer.1.DenseReluDense.wo)zencoder.final_layer_normZ
layer_norm)Z
classifierzclassifier.denseZpre_classifierZdense)zclassifier.out_proj)zbackbone.convnext.{bid}.dwconv)zbackbone.convnext.{bid}.norm)zbackbone.convnext.{bid}.pwconv1)zbackbone.convnext.{bid}.pwconv2)zbackbone.convnext.{bid}.gamma)zbackbone.posnet.{bid}.conv1)zbackbone.posnet.{bid}.conv2)zbackbone.posnet.{bid}.norm)zbackbone.posnet.{bid}.norm1)zbackbone.posnet.{bid}.norm2)zbackbone.posnet.{bid}.q)zbackbone.posnet.{bid}.k)zbackbone.posnet.{bid}.v)zbackbone.posnet.{bid}.proj_out)z"multi_modal_projector.linear_{bid}zvisual.merger.mlp.{bid})z(model.connector.modality_projection.proj)z model.mm_projector.mlp.mlp.{bid}z'vision_model.vision_adapter.mlp.fc{bid}z
mlp1.{bid})z model.mm_projector.peg.peg.{bid})z4vision_tower.vision_model.embeddings.class_embeddingzvision_model.class_embedding)z4vision_tower.vision_model.embeddings.patch_embeddingzvpm.embeddings.patch_embeddingz-model.vision_model.embeddings.patch_embeddingzvision_tower.patch_convz#vision_model.patch_embedding.linearzvisual.patch_embed.proj)z7vision_tower.vision_model.embeddings.position_embeddingz!vpm.embeddings.position_embeddingz0model.vision_model.embeddings.position_embeddingz%vision_model.positional_embedding_vlm)z?vision_tower.vision_model.encoder.layers.{bid}.self_attn.q_projz)vpm.encoder.layers.{bid}.self_attn.q_projz8model.vision_model.encoder.layers.{bid}.self_attn.q_projz0vision_model.model.layers.{bid}.self_attn.q_projz6vision_tower.transformer.layers.{bid}.attention.q_projzvisual.blocks.{bid}.attn.q)z:vision_tower.vision_model.encoder.layers.{bid}.attn.q_norm)z?vision_tower.vision_model.encoder.layers.{bid}.self_attn.k_projz)vpm.encoder.layers.{bid}.self_attn.k_projz8model.vision_model.encoder.layers.{bid}.self_attn.k_projz0vision_model.model.layers.{bid}.self_attn.k_projz6vision_tower.transformer.layers.{bid}.attention.k_projzvisual.blocks.{bid}.attn.k)z:vision_tower.vision_model.encoder.layers.{bid}.attn.k_norm)z?vision_tower.vision_model.encoder.layers.{bid}.self_attn.v_projz)vpm.encoder.layers.{bid}.self_attn.v_projz8model.vision_model.encoder.layers.{bid}.self_attn.v_projz0vision_model.model.layers.{bid}.self_attn.v_projz6vision_tower.transformer.layers.{bid}.attention.v_projzvisual.blocks.{bid}.attn.v)z:vision_tower.vision_model.encoder.layers.{bid}.layer_norm1z4vision_tower.vision_model.encoder.layers.{bid}.norm1z$vpm.encoder.layers.{bid}.layer_norm1z3model.vision_model.encoder.layers.{bid}.layer_norm1z4vision_tower.transformer.layers.{bid}.attention_normz/vision_model.model.layers.{bid}.input_layernormzvisual.blocks.{bid}.norm1)zAvision_tower.vision_model.encoder.layers.{bid}.self_attn.out_projz8vision_tower.vision_model.encoder.layers.{bid}.attn.projz+vpm.encoder.layers.{bid}.self_attn.out_projz:model.vision_model.encoder.layers.{bid}.self_attn.out_projz0vision_model.model.layers.{bid}.self_attn.o_projz6vision_tower.transformer.layers.{bid}.attention.o_projzvisual.blocks.{bid}.attn.proj)z:vision_tower.vision_model.encoder.layers.{bid}.layer_norm2z4vision_tower.vision_model.encoder.layers.{bid}.norm2z$vpm.encoder.layers.{bid}.layer_norm2z3model.vision_model.encoder.layers.{bid}.layer_norm2z8vision_model.model.layers.{bid}.post_attention_layernormz.vision_tower.transformer.layers.{bid}.ffn_normzvisual.blocks.{bid}.norm2)z6vision_tower.vision_model.encoder.layers.{bid}.mlp.fc1z vpm.encoder.layers.{bid}.mlp.fc1z/model.vision_model.encoder.layers.{bid}.mlp.fc1z:vision_tower.transformer.layers.{bid}.feed_forward.up_projz'vision_model.model.layers.{bid}.mlp.fc1zvisual.blocks.{bid}.mlp.fc1zvisual.blocks.{bid}.mlp.up_proj)z<vision_tower.transformer.layers.{bid}.feed_forward.gate_projz!visual.blocks.{bid}.mlp.gate_proj)z6vision_tower.vision_model.encoder.layers.{bid}.mlp.fc2z vpm.encoder.layers.{bid}.mlp.fc2z/model.vision_model.encoder.layers.{bid}.mlp.fc2z<vision_tower.transformer.layers.{bid}.feed_forward.down_projz'vision_model.model.layers.{bid}.mlp.fc2zvisual.blocks.{bid}.mlp.fc2z!visual.blocks.{bid}.mlp.down_proj)z2vision_tower.vision_model.encoder.layers.{bid}.ls1)z2vision_tower.vision_model.encoder.layers.{bid}.ls2)z&vision_tower.vision_model.pre_layrnormzvision_tower.ln_prezvision_model.layernorm_pre)z(vision_tower.vision_model.post_layernormz!model.vision_model.post_layernormzvision_model.layernorm_postzvisual.merger.ln_q)z)multi_modal_projector.mm_input_projection)zmulti_modal_projector.norm)z&multi_modal_projector.mm_soft_emb_norm)zresampler.pos_embed_k)zresampler.attn.in_proj_q)zresampler.attn.in_proj_k)zresampler.attn.in_proj_v)zresampler.attn.out_proj)zresampler.kv_proj)zresampler.ln_post)zresampler.ln_kv)zresampler.ln_q)zresampler.proj)zresampler.query)zv.token_embd.img_break)z0multi_modal_projector.patch_merger.merging_layer)zaudio_tower.embed_positions)zaudio_tower.conv{bid})zaudio_tower.layer_normzaudio_tower.ln_post)z)audio_tower.layers.{bid}.self_attn.q_proj)z)audio_tower.layers.{bid}.self_attn.k_proj)z)audio_tower.layers.{bid}.self_attn.v_proj)z-audio_tower.layers.{bid}.self_attn_layer_norm)z+audio_tower.layers.{bid}.self_attn.out_proj)z)audio_tower.layers.{bid}.final_layer_norm)zaudio_tower.layers.{bid}.fc1)zaudio_tower.layers.{bid}.fc2)z(audio.multi_modal_projector.linear_{bid})z"audio.multi_modal_projector.linearzaudio_tower.proj)z"audio.multi_modal_projector.ln_pre)z"audio.multi_modal_projector.ln_midÚblock_mappings_cfg)z%model.layers.{bid}.residual_layernorm)r   z5dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]]Úarch_block_mappings_cfgz#dict[str, tuple[MODEL_TENSOR, str]]ÚmappingÚarchr   Ún_blocksÚintc                 C  sè   i | _ | j ¡ D ]"\}}|t| vrqt| }||f| j |< |D ]	}||f| j |< q q|| jv r9| j | j| ¡ t|ƒD ]4}| j ¡ D ],\}}|t| vrOqDt| j	|d}||f| j |< |D ]}|j	|d}||f| j |< q`qDq=d S )N)Úbid)
r   r   Úitemsr   r   r   r   ÚupdateÚrangeÚformat)Úselfr   r   ZtensorÚkeysZtensor_nameÚkeyr!   r   r   ú_/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/gguf/tensor_mapping.pyÚ__init__Å  s.   ÿ
þúÿzTensorNameMap.__init__r(   ÚstrÚtry_suffixesúSequence[str]Úreturnútuple[MODEL_TENSOR, str] | Nonec                 C  sj   | j  |¡}|d ur|S |D ]$}| |¡r2| j  |d t|ƒ … ¡}|d ur2|d |d | f  S qd S )Nr   r   )r   ÚgetÚendswithÚlen)r&   r(   r,   ÚresultÚsuffixr   r   r)   Úget_type_and_nameÛ  s   
€zTensorNameMap.get_type_and_nameú
str | Nonec                 C  ó"   | j ||d}|d u rd S |d S )N©r,   r   ©r5   ©r&   r(   r,   r3   r   r   r)   Úget_nameæ  ó   zTensorNameMap.get_nameúMODEL_TENSOR | Nonec                 C  r7   )Nr8   r   r9   r:   r   r   r)   Úget_typeì  r<   zTensorNameMap.get_typec                 C  s(   z| j | d W S  ty   t|ƒ‚w )Nr   )r   ÚKeyError©r&   r(   r   r   r)   Ú__getitem__ò  s
   ÿzTensorNameMap.__getitem__Úboolc                 C  s
   || j v S ©N)r   r@   r   r   r)   Ú__contains__ø  ó   
zTensorNameMap.__contains__c                 C  s
   t | jƒS rC   )Úreprr   )r&   r   r   r)   Ú__repr__û  rE   zTensorNameMap.__repr__N)r   r   r   r    )r   )r(   r+   r,   r-   r.   r/   )r(   r+   r,   r-   r.   r6   )r(   r+   r,   r-   r.   r=   )r(   r+   r.   r+   )r(   r+   r.   rB   )r.   r+   )ÑÚ__name__Ú
__module__Ú__qualname__r   Z
TOKEN_EMBDZTOKEN_TYPESZTOKEN_EMBD_NORMZPOS_EMBDZOUTPUTZOUTPUT_NORMZ
ROPE_FREQSZROPE_FACTORS_LONGZROPE_FACTORS_SHORTZCONV1Dr   Ú__annotations__Z	ATTN_NORMZATTN_NORM_2ZATTN_QKVZATTN_QZATTN_KZATTN_VZATTN_OUTZATTN_OUT_NORMZATTN_POST_NORMZATTN_ROT_EMBDZFFN_NORMZFFN_PRE_NORMZFFN_POST_NORMZFFN_GATE_INPZFFN_GATE_INP_SHEXPZFFN_EXP_PROBS_BZFFN_UPZ
FFN_UP_EXPZFFN_UP_SHEXPZFFN_ACTZFFN_GATEZFFN_GATE_EXPZFFN_GATE_SHEXPZFFN_DOWNZFFN_DOWN_EXPZFFN_DOWN_SHEXPZATTN_Q_NORMZATTN_K_NORMZLAYER_OUT_NORMZSSM_INZ
SSM_CONV1DZSSM_XZSSM_DTZSSM_AZSSM_DZSSM_OUTZTIME_MIX_W0ZTIME_MIX_W1ZTIME_MIX_W2ZTIME_MIX_A0ZTIME_MIX_A1ZTIME_MIX_A2ZTIME_MIX_V0ZTIME_MIX_V1ZTIME_MIX_V2ZTIME_MIX_G1ZTIME_MIX_G2ZTIME_MIX_K_KZTIME_MIX_K_AZTIME_MIX_R_KZTIME_MIX_LERP_XZTIME_MIX_LERP_KZTIME_MIX_LERP_VZTIME_MIX_LERP_RZTIME_MIX_LERP_GZTIME_MIX_LERP_WZTIME_MIX_FIRSTZTIME_MIX_DECAYZTIME_MIX_DECAY_W1ZTIME_MIX_DECAY_W2ZTIME_MIX_KEYZTIME_MIX_VALUEZTIME_MIX_RECEPTANCEZTIME_MIX_GATEZTIME_MIX_LNZTIME_MIX_OUTPUTZCHANNEL_MIX_LERP_KZCHANNEL_MIX_LERP_RZCHANNEL_MIX_KEYZCHANNEL_MIX_RECEPTANCEZCHANNEL_MIX_VALUEZATTN_Q_AZATTN_Q_BZATTN_KV_A_MQAZ	ATTN_KV_BZATTN_K_BZATTN_V_BZATTN_Q_A_NORMZATTN_KV_A_NORMZATTN_SUB_NORMZFFN_SUB_NORMZDEC_ATTN_NORMZ
DEC_ATTN_QZ
DEC_ATTN_KZ
DEC_ATTN_VZDEC_ATTN_OUTZDEC_ATTN_REL_BZDEC_CROSS_ATTN_NORMZDEC_CROSS_ATTN_QZDEC_CROSS_ATTN_KZDEC_CROSS_ATTN_VZDEC_CROSS_ATTN_OUTZDEC_CROSS_ATTN_REL_BZDEC_FFN_NORMZDEC_FFN_GATEZ
DEC_FFN_UPZDEC_FFN_DOWNZDEC_OUTPUT_NORMZENC_ATTN_NORMZ
ENC_ATTN_QZ
ENC_ATTN_KZ
ENC_ATTN_VZENC_ATTN_OUTZENC_ATTN_REL_BZENC_FFN_NORMZENC_FFN_GATEZ
ENC_FFN_UPZENC_FFN_DOWNZENC_OUTPUT_NORMZCLSZCLS_OUTZCONVNEXT_DWZCONVNEXT_NORMZCONVNEXT_PW1ZCONVNEXT_PW2ZCONVNEXT_GAMMAZPOSNET_CONV1ZPOSNET_CONV2ZPOSNET_NORMZPOSNET_NORM1ZPOSNET_NORM2ZPOSNET_ATTN_NORMZPOSNET_ATTN_QZPOSNET_ATTN_KZPOSNET_ATTN_VZPOSNET_ATTN_OUTZV_MMPROJZV_MMPROJ_FCZV_MMPROJ_MLPZV_MMPROJ_PEGZV_ENC_EMBD_CLSZV_ENC_EMBD_PATCHZV_ENC_EMBD_POSZV_ENC_ATTN_QZV_ENC_ATTN_Q_NORMZV_ENC_ATTN_KZV_ENC_ATTN_K_NORMZV_ENC_ATTN_VZV_ENC_INPUT_NORMZV_ENC_ATTN_OZV_ENC_POST_ATTN_NORMZV_ENC_FFN_UPZV_ENC_FFN_GATEZV_ENC_FFN_DOWNZV_LAYER_SCALE_1ZV_LAYER_SCALE_2Z
V_PRE_NORMZV_POST_NORMZV_MM_INP_PROJZV_MM_INP_NORMZV_MM_SOFT_EMB_NORMZV_RESMPL_POS_EMBD_KZV_RESMPL_ATTN_QZV_RESMPL_ATTN_KZV_RESMPL_ATTN_VZV_RESMPL_ATTN_OUTZV_RESMPL_KVZV_RESMPL_POST_NORMZV_RESMPL_KV_NORMZV_RESMPL_Q_NORMZV_RESMPL_PROJZV_RESMPL_QUERYZV_TOK_EMBD_IMG_BREAKZV_MM_PATCH_MERGERZA_ENC_EMBD_POSZA_ENC_CONV1DZ
A_PRE_NORMZA_POST_NORMZA_ENC_ATTN_QZA_ENC_ATTN_KZA_ENC_ATTN_VZA_ENC_INPUT_NORMZA_ENC_OUTPUTZA_ENC_OUTPUT_NORMZA_ENC_FFN_UPZA_ENC_FFN_GATEZA_ENC_FFN_DOWNZA_MMPROJZA_MMPROJ_FCZA_MM_NORM_PREZA_MM_NORM_MIDr   r   ZARCTICZFFN_NORM_EXPr   r*   r5   r;   r>   rA   rD   rG   r   r   r   r)   r	      sš  
 žgþâ&Ú:ÆI·Y§h˜  ù  ñ  ë  ã 0 Ð 5 Ë : Æ E » I · N ² p  z †    þ    ù    ê    á  &  Ú  D  ¼  O  ±  V  ª  _  ¡  h  ˜  l  ”  u  ‹  z  †          û   
   ö      ñ      ì      ç      ã   #   Ý   )   ×   -   Ó   1   Ï   5   Ë   9   Ç   =   Ã   A   ¿   E   »   I   ·   M   ³   Q   ¯   U   «   Z   ¦   _   ¡   d   œ   i   —   n   ’   s      w   ‰   |   „        þ        ù        ò        ë        ä    !    ß    &    Ú    -    Ó    2    Î    6    Ê    ;    Å    ?    Á    D    ¼    H    ¸    L    ´    P    °    T    ¬    X    ¨    \    ¤    `         d    œ    h    ˜    l    ”    p        t    Œ    x    ˆ    |    „          ÿ          û     	     ÷          ó          ï          ë          ç          ã     !     ß     %     Û     *     Ö     .     Ò     2     Î     6     Ê     :     Æ     >     Â     B     ¾     F     º     J     ¶     N     ²     R     ®     W     ©     ]     £     b     ž     i     —     n     ’     r     Ž     v     Š     z     †     ~     ‚            ý            ù            õ            ñ            í            é            å            á      #      Ý      '      Ù      .      Ò      3      Í      7      É      =      Ã      A      ¿      F      º      O      ±      V      ª      _      ¡      c            l      ”      p            y      ‡              ü              ò              è       "       Þ       '       Ù       1       Ï       5       Ë       9       Ç       ?       Á       F       º       J       ¶       N       ²       R       ®       V       ª       Z       ¦       ^       ¢       b       ž       f       š       j       –       n       ’       r       Ž       v       Š       z       †       ~       ‚                ý        	        ÷                ó                ï                í                è                ¶        Qüÿ


r	   r   r   r   r    r.   c                 C  s
   t | |ƒS rC   )r	   )r   r   r   r   r)   Úget_tensor_name_mapÿ  rE   rL   N)r   r   r   r    r.   r	   )Ú
__future__r   Útypingr   Ú	constantsr   r   r   r   r	   rL   r   r   r   r)   Ú<module>   s              