o
    0 i,                     @   s   d dl mZ d dlmZ ddlmZ ddlmZmZm	Z	 ddl
mZ ddlmZ er0dd	lmZ e r7d d
lZe rId dlmZ edd Zee_e	eZG dd deZd
S )    )defaultdict)TYPE_CHECKING   )prepare_for_hqq_linear)is_hqq_availableis_torch_availablelogging   )HfQuantizer)get_module_from_name)PreTrainedModelN)	HQQLinearc                 C   s   t jd| j| jdS )Nr   )dtypedevice)torchemptycompute_dtyper   self r   q/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/transformers/quantizers/quantizer_hqq.pyweight%   s   r   c                	       s   e Zd ZdZdZdZdZdgZ fddZddd	e	e
 d
e
de	e
 fddZddde	e
 de	e
 de	e
 fddZddde
defddZddddde
ddfddZdd Z		d&ddZd&dd Zd'd"d#Zedefd$d%Z  ZS )(HqqHfQuantizerz
    HQQ quantizer base HF class.
    nn.Linear modules are first tagged with quant_config in _process_model_before_weight_loading().
    FTZhqqc                    s   t  stdt j|fi | d | _d| _td d  dh | _|	dds.|	ddr2t
d| jd u rJd|v rA|d | _n	tj| _td |	d	}t|trqd
| v s`d| v rdt
dtt| dk| _d S d S )NzA valid HQQ version (>=0.2.1) is not available. Please follow the instructions to install it: `https://github.com/mobiusml/hqq/`.FbiasZfrom_tfZ	from_flaxzwConverting weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.r   zOSetting dtype to torch.float32 as the default value since it was not specified.
device_mapcpuZdiskzYou are attempting to use an HQQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.r	   )r   ImportErrorsuper__init__r   using_multi_gpur   state_dict_keyshqq_keysget
ValueErrorr   Zfloat32loggerinfo
isinstancedictvalueslenset)r   quantization_configkwargsr   	__class__r   r   r   9   s2   



zHqqHfQuantizer.__init__modelr   missing_keysprefixreturnc                 K   s   | j r
dd |D S |S )Nc                 S   s   g | ]}d |vr|qS )r   r   ).0keyr   r   r   
<listcomp>_       z6HqqHfQuantizer.update_missing_keys.<locals>.<listcomp>)pre_quantized)r   r/   r0   r1   r,   r   r   r   update_missing_keys[   s   z"HqqHfQuantizer.update_missing_keysexpected_keysloaded_keysc                    s8  | j s|S  fdd t|}| D ]\}}||_qt } || t }|D ]|jjd D ]}	|	v r;| q0q(||8 }td d tj	ddd
 dh }
t }|D ]tfdd	|D rg| qU||8 }|D ])d
 |v r~|d
  n|fdd|
D  d |v r|d  qnt|S )Nc                    s:   |   D ]\}}t|tjjr||j  || qd S N)Znamed_childrenr&   r   nnLinearaddname)r/   Zlayersr?   module)_find_hqq_quantizable_layersr   r   rA   k   s
   zIHqqHfQuantizer.update_expected_keys.<locals>._find_hqq_quantizable_layersskip_modulesr   FZlinear_layerquant_configr   r   del_origr   c                 3       | ]}| v V  qd S r;   r   )r3   _module)r4   r   r   	<genexpr>       z6HqqHfQuantizer.update_expected_keys.<locals>.<genexpr>z.weightc                    s   h | ]} d  | qS ).r   )r3   Z_ref_key)rG   r   r   	<setcomp>   r6   z6HqqHfQuantizer.update_expected_keys.<locals>.<setcomp>z.bias)r7   r*   Znamed_modulesr?   configr+   r>   r   r   Zfloat16r    anyupdatelist)r   r/   r9   r:   Znew_keysr?   r@   Z_valid_modulesZ_skipped_modulesZ_skip_moduleZ	_ref_keysZ_rm_keysr   )rA   rG   r4   r   update_expected_keysd   sN   

	
z#HqqHfQuantizer.update_expected_keys
param_namec                 K   s   t ||\}}t|tjjS r;   )r   r&   r   r<   r=   )r   r/   rQ   r,   r@   _r   r   r   param_needs_quantization   s   z'HqqHfQuantizer.param_needs_quantizationparam_valueztorch.Tensortarget_deviceztorch.devicec                    s.  t ||\}|ddd }t ||\}}	|jjd }
|jjd }tfdd|D r?j||j|| jdid	d
d d S | jrt	| dsLt
t| _| j| ||i | j|  t fdd| jD rd v spjd u rtd d | j|d	d}|  |jd urt|jtjrtj|j|_| jr| |}t||	| | j|= d S j||id	d
d jjjdkoĈjd u pĈjjjdk}|rdjddd  }d|
v r|
}n||
v r|
| }t|| j|d
d}|jd urt|jtjrtj|j|_| jr| |}t||	| d S d S )NrJ   r	   r   rD   rB   c                 3   s    | ]}| j v V  qd S r;   )r?   )r3   Zskip_module)r@   r   r   rH      s    z8HqqHfQuantizer.create_quantized_param.<locals>.<genexpr>)r   r   FT)strictZassign
hqq_paramsc                 3   rF   r;   r   )r3   k)rW   r   r   rH      rI   r   rC   metaZweight_quant_params)rD   r   r   rE   ) r   rsplitrL   r+   rM   Zload_state_dicttor   r7   hasattrr   r'   rW   rN   allr!   r   r   r&   r   ZTensorr<   	Parameterr   _patch_layer_for_multigpusetattrr   r   typejoinr?   split)r   r/   rT   rQ   rU   r,   Ztensor_namemodule_nameparent_modulenoderD   rB   	hqq_layerZmodule_is_readyZ
module_tagZmodule_quant_configr   )rW   r@   r   create_quantized_param   sl   


*



z%HqqHfQuantizer.create_quantized_paramc                    s   dd   fdd_ S )Nc                 S   s4   t || j|   }| jd ur|| j7 }|S r;   )r   matmulr\   r   Z
dequantizetr   )r   xoutr   r   r   forward_with_device   s   

zEHqqHfQuantizer._patch_layer_for_multigpu.<locals>.forward_with_devicec                    s
    | S r;   r   )rl   rn   rh   r   r   <lambda>   s   
 z:HqqHfQuantizer._patch_layer_for_multigpu.<locals>.<lambda>)forward)r   rh   r   ro   r   r`      s   z(HqqHfQuantizer._patch_layer_for_multigpuc                 K   s   t || jd}d S )N)r+   )r   r+   r   r/   r,   r   r   r   $_process_model_before_weight_loading  s   z3HqqHfQuantizer._process_model_before_weight_loadingc                 K   s   d|_ |  |_|S NT)Zis_hqq_quantizedis_serializableZis_hqq_serializablerr   r   r   r   #_process_model_after_weight_loading
  s   
z2HqqHfQuantizer._process_model_after_weight_loadingNc                 C      dS rt   r   )r   Zsafe_serializationr   r   r   ru     s   zHqqHfQuantizer.is_serializablec                 C   rw   rt   r   r   r   r   r   is_trainable  s   zHqqHfQuantizer.is_trainable)r/   r   r;   )__name__
__module____qualname____doc__Zuse_keep_in_fp32_modulesZ requires_parameters_quantizationZrequires_calibrationZrequired_packagesr   rO   strr8   rP   boolrS   ri   r`   rs   rv   ru   propertyrx   __classcell__r   r   r-   r   r   .   sV    "
	
;
R


	
r   )collectionsr   typingr   Zintegrationsr   utilsr   r   r   baser
   Zquantizers_utilsr   Zmodeling_utilsr   r   Zhqq.core.quantizer   r   r   Z
get_loggerry   r$   r   r   r   r   r   <module>   s"   

