o
    0 i=                     @   sp   d dl mZmZmZ ddlmZ erddlmZ ddlm	Z	m
Z
mZ e
 r)d dlZeeZG dd	 d	eZdS )
    )TYPE_CHECKINGOptionalUnion   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_torch_availableloggingNc                       s   e Zd ZdZdZdZdgZ fddZdd ZdddZ		d d	d
de
ee  fddZdeeeeef f deeeeef f fddZd!ddZd ddZedefddZedefddZ  ZS )"BitNetHfQuantizerz
    1.58-bit quantization from BitNet quantization method:
    Before loading: it converts the linear layers into BitLinear layers during loading.

    Check out the paper introducing this method: https://huggingface.co/papers/2402.17764
    FTZ
acceleratec                    s   t  j|fi | || _d S N)super__init__quantization_config)selfr   kwargs	__class__ t/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/transformers/quantizers/quantizer_bitnet.pyr   -   s   
zBitNetHfQuantizer.__init__c                 O   s   t  std|dds|ddrtdtj s#td d S |d}|d u r3td d S |d urLt	|t
rNd	| v sHd
| v rPtdd S d S d S )NzOLoading a BitNet quantized model requires accelerate (`pip install accelerate`)Zfrom_tfFZ	from_flaxztLoading ternary weights from tf/flax is currently not supported, please make sure the weights are in PyTorch format.zhYou don't have a GPU available to load the model, the inference will be slow because of weight unpacking
device_mapzYou have loaded a BitNet model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.cpuZdiskzYou are attempting to load a BitNet model with a device_map that contains a CPU or disk device.This is not supported. Please remove the CPU or disk device from the device_map.)r	   ImportErrorget
ValueErrortorchcudaZis_availableloggerZwarning_once
isinstancedictvalues)r   argsr   r   r   r   r   validate_environment1   s.   

"z&BitNetHfQuantizer.validate_environmentmodelr   c                 K   s   |S r   r   )r   r$   r   r   r   r   #_process_model_after_weight_loadingN      z5BitNetHfQuantizer._process_model_after_weight_loadingNkeep_in_fp32_modulesc                 K   s:   ddl m} | || jj|| _||| j| j| jd}d S )Nr   )replace_with_bitnet_linear)modules_to_not_convertr   pre_quantized)Zintegrationsr(   Zget_modules_to_not_convertr   r)   r*   )r   r$   r'   r   r(   r   r   r   $_process_model_before_weight_loadingQ   s   

z6BitNetHfQuantizer._process_model_before_weight_loading
max_memoryreturnc                 C   s   dd |  D }|S )Nc                 S   s   i | ]	\}}||d  qS )g?r   ).0keyvalr   r   r   
<dictcomp>e   s    z7BitNetHfQuantizer.adjust_max_memory.<locals>.<dictcomp>)items)r   r,   r   r   r   adjust_max_memoryd   s   z#BitNetHfQuantizer.adjust_max_memorytarget_dtypetorch.dtypec                 C   s
   t j}|S r   )r   Zint8)r   r4   r   r   r   adjust_target_dtypeh   s   z%BitNetHfQuantizer.adjust_target_dtypec                 C   s   dS )NTr   )r   Zsafe_serializationr   r   r   is_serializablel   r&   z!BitNetHfQuantizer.is_serializablec                 C      | j jdko| j jdkS )Nautobitlinearonliner   Zlinear_classZquantization_moder   r   r   r   is_trainableo   s   
zBitNetHfQuantizer.is_trainablec                 C   r8   )zUFlag indicating whether the quantized model can carry out quantization aware trainingr9   r:   r;   r<   r   r   r   is_qat_trainablev   s   
z"BitNetHfQuantizer.is_qat_trainable)r$   r   r   )r4   r5   r-   r5   )__name__
__module____qualname____doc__Z requires_parameters_quantizationZrequires_calibrationZrequired_packagesr   r#   r%   r   liststrr+   r    r   intr3   r6   r7   propertyboolr=   r>   __classcell__r   r   r   r   r       s*    


2

r   )typingr   r   r   baser   Zmodeling_utilsr   utilsr	   r
   r   r   Z
get_loggerr?   r   r   r   r   r   r   <module>   s   
