o
    )i                      @   s
  d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	Z
d dlZd dlZd dlmZ d dlmZmZmZ d dlmZmZmZmZmZmZ d dlmZ d dlmZ d d	lmZm Z  d d
l!m"Z"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4m5Z5 d dl6m7Z7 d dl8m9Z9 d dl:m;Z;m<Z< d dl=m>Z> d dl?m@Z@ eeAZBd aCdd ZDdejEdeFfddZGG dd deZHG dd deZIG d d! d!eeH ZJG d"d# d#eIZKeIfdejEd$eLd%eLd&eLd'ed(ejMd)eNeI deIfd*d+ZOdS ),    N)Sequence)AnyCallableOptionalUnion)nn)AdapterLRUCacheAdapterModelAdapterModelManager)add_adapterdeactivate_adapterget_adapterlist_adaptersremove_adapterset_adapter_mapping)
LoRAConfig)init_logger)BaseLayerWithLoRALoRAMapping)LoRALayerWeightsPackedLoRALayerWeights)
PEFTHelper)get_punica_wrapper)
from_layerfrom_layer_logits_processorget_supported_lora_modulesis_regex_target_modulesparse_fine_tuned_lora_namereplace_submodule)FusedMoE)TensorizerConfig)SupportsLoRAsupports_multimodal)is_pooling_model)MultiModelKeys)PPMissingLayerWeightsMapper)get_packed_modules_mapping)is_pin_memory_availablec                   C   s   t d7 a t S N   )_GLOBAL_LORA_ID r,   r,   \/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/lora/models.pyget_lora_id+   s   r.   modelreturnc                 C   s(   t dd |  D rtd dS dS )z@Checks if the model contains FusedMoE layers and warns the user.c                 s   s    | ]}t |tV  qd S N)
isinstancer   ).0moduler,   r,   r-   	<genexpr>3   s    zis_moe_model.<locals>.<genexpr>zFor MoE models, vLLM currently does not support fused MoE LoRA inference. Please ensure that the loaded LoRA model does not contain expert weights.TF)anymodulesloggerZwarning_once)r/   r,   r,   r-   is_moe_model1   s   r9   c                   @   s  e Zd ZdZdededeeef ddfddZdedd fd	d
Z	e
defddZdedee fddZdedefddZe							d%dedeeejf dededeej deeeejf  dee deeeef  deee  dee dd fddZedddddddddd ed!ee dedee dedeej dee deeeef  deee  dee d"ee dd fd#d$ZdS )&	LoRAModelzA LoRA fine-tuned model.lora_model_idranklorasr0   Nc                 C   s.   || _ |dksJ d| j  || _|| _dS )z
        Args:
            lora_model_id: The integer id for the lora model.
            rank: lora rank.
            loras: module name -> weights for lora-replaced layers.

        r   z.a valid lora id should be greater than 0, got N)idr<   r=   )selfr;   r<   r=   r,   r,   r-   __init__?   s   

zLoRAModel.__init__c                 C   s   | j || j| j dS )z[Return a copy of the object with different ids.

        Will share the underlying tensors.)r<   r=   )	__class__r<   r=   copy)r?   r;   r,   r,   r-   cloneT   s
   zLoRAModel.clonec                 C   s"   | j rtdd | j  D S dS )Nc                 s   s    | ]}|j V  qd S r1   )extra_vocab_size)r3   lorar,   r,   r-   r5   `   s    z-LoRAModel.extra_vocab_size.<locals>.<genexpr>r   )r=   maxvaluesr?   r,   r,   r-   rD   ^   s   
zLoRAModel.extra_vocab_sizemodule_namec                 C   s   | j |dS )z#Get LoRA for a given module by nameN)r=   getr?   rI   r,   r,   r-   get_lorac   s   zLoRAModel.get_lora	lora_namec                 C   s
   || j v S r1   )r=   )r?   rM   r,   r,   r-   check_lora_nameg      
zLoRAModel.check_lora_namecudatensorspeft_helperdevicedtype
embeddingstarget_embedding_paddingembedding_modulesembedding_padding_modulesweights_mapperc                    s  t |dkot }i }| D ]\}}t||
\ }} |vrQd}|rH|dus)J t fdd|D d}|rH|||  j||d}|rH| }t ||| < |rt|j||d	 |  _
|j||d	 }|rn| }||  _
q|r|j||d	 |  _|r|  j |  _q|j||d	 |  _|	dusJ t fdd|	D r|dur|  j}||jd ksJ ||jd  }tjj|d|f|  _|r|  j |  _q| D ]}|  q| ||j|S )	z0Create a LoRAModel from a dictionary of tensors.cpuNc                 3   s    | ]	}| v r|V  qd S r1   r,   )r3   krI   r,   r-   r5      s    z.LoRAModel.from_lora_tensors.<locals>.<genexpr>)rS   rT   c                 3   s    | ]}| v V  qd S r1   r,   )r3   namer\   r,   r-   r5          r*   r   )strr(   itemsr   nextto
pin_memoryr   Zfrom_configtbiaslora_alora_br6   shapetorchr   Z
functionalpadrG   optimizer)clsr;   rQ   rR   rS   rT   rU   rV   rW   rX   rY   rc   r=   Ztensor_nameZtensorZ	is_lora_aZis_biasZlora_embeddings_tensorZembeddings_modulere   rg   ZadditionrE   r,   r\   r-   from_lora_tensorsk   s   



zLoRAModel.from_lora_tensors)r;   rS   rT   rV   rW   rX   rY   tensorizer_config_dictlora_direxpected_lora_modulesro   c                   s  t jd}t jd}t jd}t jd}i }g dtf fdd}|rXdd	lm} tdi |}t j|jd
}| }||fd|j	i|j
}|| nt j|rg tj|dd}|| | D ]	}||||< qpW d   n1 sw   Y  nNt j|rg |j}t|ts|g}|D ]}|dd }| vr| qrt|j std d  d dtj||dd}nt dd}t j|rtj|}nt j|rtj||dd}| j|du rt n|||||||||	d
S )a8  Create a LoRAModel from a local checkpoint.

        Args:
            lora_dir: The local path that has lora data.
            expected_lora_modules: Name of modules that are expected to be
                replaced by lora.
            peft_helper: Loaded lora configuration information.
            lora_model_id: LoRA model id. If not given, automatically set by
                a global counter.
            device: Device where the lora model is loaded.
            dtype: dtype of the lora model weights.

        Returns:
            Loaded LoRA Model.
        zadapter_model.safetensorszadapter_model.binznew_embeddings.safetensorsznew_embeddings.binr7   c                    sb   |   D ]}t|\}}}|dd }| vr| qr/td d  d dd S )N.While loading , expected target modules in  but received 6. Please verify that the loaded LoRA module is correct)keysr   splitappend
ValueError)r7   Zlora_modulerI   _	part_namerq   rp   Zunexpected_modulesrY   r,   r-   check_unexpected_modules   s"   

zALoRAModel.from_local_checkpoint.<locals>.check_unexpected_modulesr   )TensorDeserializerzadapter_model.tensorsrT   pt)Z	frameworkNrr   rs   rt   ru   rv   rw   T)Zmap_locationZweights_onlyz doesn't contain tensors)
r;   rQ   rR   rS   rT   rU   rV   rW   rX   rY   r,   )ospathjoindictZ
tensorizerr   r    Ztensorizer_dirZ_construct_tensorizer_argsrT   Zdeserialization_kwargsisfilesafetensorsZ	safe_openrx   Z
get_tensortarget_modulesr2   listry   rz   r   r{   ri   loadZ	load_filern   r.   )rm   rp   rq   rR   r;   rS   rT   rV   rW   rX   rY   ro   Zlora_tensor_pathZlora_bin_file_pathZnew_embeddings_tensor_pathZnew_embeddings_bin_file_pathrQ   r   r   Ztensorizer_configZtensorizer_argsfr4   r   r}   rU   r,   r~   r-   from_local_checkpoint   s   



zLoRAModel.from_local_checkpoint)rP   NNNNNN)__name__
__module____qualname____doc__intr   r_   r   r@   rC   propertyrD   r   rL   boolrN   classmethodri   ZTensorr   rT   r   r&   rn   r   r,   r,   r,   r-   r:   <   s    


	

F	

r:   c                       s  e Zd ZdZdedededededejf fdd	Z	e
d
efddZe
d
efddZe
d
efddZded
efddZdefddZdefddZded
efddZded
dfddZdd  Zd!d" Zd#ed$d%fd&d'Z	dFded(ed)eeeef  d
efd*d+Zd#efd,d-Zd#ed
efd.d/Zd0ed
dfd1d2Zd3ed
dfd4d5Z d3ed#ed
ee! fd6d7Z"d8ed
efd9d:Z#d;ed
efd<d=Z$ded
dfd>d?Z%d8ed
efd@dAZ&d
eee'f fdBdCZ(d8ed
ee' fdDdEZ)  Z*S )GLoRAModelManagerz7A manager that manages multiple LoRA-fine-tuned models.r/   max_num_seqsmax_num_batched_tokens
vocab_sizelora_configrS   c                    s  || _ || _|| _| j| jksJ t|d d | _dg| j | _|| _	t
|| j| j| j jd| _t | t| j| _| jsEJ dd| jjj d t| j| _t| jo^t| jd| _t| j| _t| j| _i | _i | _d| _|   | | j_d| _ dS )	a  Create a LoRAModelManager and adapter for a given model.

        Args:
            model: the model to be adapted.
            max_num_seqs: the maximum number of sequences model can run in a
                single batch.
            max_num_batched_tokens: the maximum number of tokens model can run
                in a single batch.
            vocab_size: the vocab size of the model.
            lora_config: the LoRA configuration.
           N)Zmax_batchesrS   	max_lorasz"No supported LoRA modules found in rr   get_mm_mappingZLoRA)!r   rS   r   capacity
lora_slotsmathceilr   lora_index_to_idr   r   r   punica_wrappersuperr@   r   r/   supported_lora_modulesrA   r   r'   packed_modules_mappingr"   hasattrsupports_mmr#   r9   packed_modulesr7   _last_mapping_create_lora_moduleslora_managerZadapter_typer?   r/   r   r   r   r   rS   rA   r,   r-   r@   :  s:   


zLoRAModelManager.__init__r0   c                 C      | j jS r1   )r   Zmax_cpu_lorasrH   r,   r,   r-   r   r     zLoRAModelManager.capacityc                 C   r   r1   )r   r   rH   r,   r,   r-   r   v  r   zLoRAModelManager.lora_slotsc                 C   s   | j S r1   )r   rH   r,   r,   r-   adapter_slotsz  s   zLoRAModelManager.adapter_slotslora_idc           
      C   s  || j v rdS tdd t| jD d}|du rtd|\}}d| j |< | j| }td|j| |j| j|< | j	
 D ]H\}}| ||}|r|  |j}	t|	sct|	trrtdd |	D rr| jjsrd|_td| d	|||j|j|j|j q=|| q=d
S )z;Move LoRA into a GPU buffer to be used in the forward pass.Fc                 s   s$    | ]\}}|d u r||fV  qd S r1   r,   )r3   ir   r,   r,   r-   r5     s   
 z4LoRAModelManager.activate_adapter.<locals>.<genexpr>NzNo free lora slotsz+Activating LoRA. int id: %d, slot index: %dc                 s   s    | ]}|d uV  qd S r1   r,   )r3   br,   r,   r-   r5     r^   z Adapter bias cannot be used for z without --enable-lora-bias.T)_active_adaptersra   	enumerater   r{   _registered_adaptersr8   debugr>   r7   r`   _get_lora_layer_weightsrk   re   ri   Z	is_tensorr2   r   r6   r   bias_enabledZset_lorarf   rg   Zembeddings_tensorZ
reset_lora)
r?   r   Zfirst_free_slotindexr|   
lora_modelrI   r4   Zmodule_lorare   r,   r,   r-   activate_adapter~  sN   




z!LoRAModelManager.activate_adapterc                 C   s2   z| j |}d | j |< W d S  ty   Y d S w r1   )r   r   r{   )r?   r   r   r,   r,   r-   _deactivate_adapter  s   z$LoRAModelManager._deactivate_adapterrE   c                 C   s   |  | || j|j< d S r1   )_create_merged_loras_inplacer   r>   )r?   rE   r,   r,   r-   _add_adapter  s   
zLoRAModelManager._add_adapterc                 C   s   t d)%Pin a LoRAModel in the manager cache.zVPinning is not supported in LoRAModelManager. Use LRUCacheLoRAModelManager for pinning)NotImplementedErrorr?   r   r,   r,   r-   pin_adapter  s   zLoRAModelManager.pin_adaptermappingNc                 C   s&   | j || j| jd | j| jj d S r)   )r   Zupdate_metadatar   r   r   r   lora_extra_vocab_sizer?   r   r,   r,   r-   _set_adapter_mapping  s   z%LoRAModelManager._set_adapter_mappingc                 C   s&   | j   dg| j | _| j  dS )z'Remove all LoRAModels from the manager.N)r   clearr   r   r   rH   r,   r,   r-   remove_all_adapters  s   
z$LoRAModelManager.remove_all_adaptersc           
   
   C   s$  dt dt fdd}| jjddD ]\}}t|trq| |s q| |r,td| q|	dd	 }| j
|g }t| j|t|| j| j|| jj}d
|v rud}||}|r^| d| }| j|}	t| j|t|	|| j| j| jj}| jr~t|ts~q| || | | || j qd S )NrI   r0   c                 S   s   |  dd S )Nrr   r   )
rpartitionr\   r,   r,   r-   _parent_module  s   z=LoRAModelManager._create_lora_modules.<locals>._parent_moduleF)Zremove_duplicatezlRegarding multimodal models, vLLM currently only supports adding LoRA to language model, %s will be ignored.rr   rs   Zlm_headZlogits_processor)r_   r/   named_modulesr2   r%   _match_target_modules_filter_unsupported_mm_moduler8   warningry   r   rJ   r   r   r   r   configZget_submoduler   r   r   register_module_register_packed_modulesZset_mappingr   )
r?   r   rI   r4   partsZpacked_moduled_lstZ
new_moduleZlogits_processor_module_nameparent_moduleZlogits_processor_moduler,   r,   r-   r     s^   




z%LoRAModelManager._create_lora_modulesrI   r4   r   c                 C   s   t |tsJ || j|< d S r1   )r2   r   r7   )r?   rI   r4   r,   r,   r-   r     s   z LoRAModelManager.register_moduler<   rW   c                 C   s  t ||i }| j D ]\}}| jj}| |r"t|tr"| |r#q|	d}|| j
vr|dus3J |d |v rt|jdrG|jj| jj n|jjjd }	t|jdrX|jjn|jjjd }
t|jdri|jjn|jjjd }tj||	|
||jd jd||d	}ntj||jd jd |jd jd
 ||jd jd|d}|  nE|	d}| j|d  }g }t|D ]-\}}tj|d | |j| jd |j| jd
 ||j| jd|d}|  || qt|}||j|< q|S )z-Create zero-initialized LoRAModel for warmup.rr   Nrs   org_vocab_sizer*   embedding_dimr   rZ   )embeddings_tensor_dimr   )r   )r:   r/   r   r   r   r   r2   r   r   ry   r   r   Z
base_layerr   r   weightrh   r   r   Zcreate_dummy_lora_weightsZlora_a_stackedrT   Zlora_b_stackedrk   r   r   rz   r   packr=   )r?   r   r<   rW   r/   rI   r4   r   r   Z	input_dimZ
output_dimr   rE   replacementsZsublorasr   rl   r,   r,   r-   create_dummy_lora  s   








	


	
z"LoRAModelManager.create_dummy_lorac                    s   t  fdd| jD S )Nc                 3   s,    | ]}t d j|d p| kV  qdS )z.*\.{target_module}$)target_moduleN)rematchformat)r3   r   r\   r,   r-   r5   M  s    

z9LoRAModelManager._match_target_modules.<locals>.<genexpr>)r6   r   rK   r,   r\   r-   r   L  s   z&LoRAModelManager._match_target_modulesc                    s6   | j r| j }|j|j }t fdd|D S dS )z
        Regarding multimodal models, vLLM currently only supports adding LoRA to
        language model. LoRA for other modules, such as the vision tower, will
        be filtered out.
        c                    s   g | ]}  |qS r,   )
startswith)r3   prefixr\   r,   r-   
<listcomp>]  s    zBLoRAModelManager._filter_unsupported_mm_module.<locals>.<listcomp>F)r   r/   r   Z	connectorZtower_modelr6   )r?   rI   Zmodule_mappingZ
prefix_lstr,   r\   r-   r   S  s   
z.LoRAModelManager._filter_unsupported_mm_modulemodule_full_namec                    s^   | d}|d }| j|g }t|dkrd S d|d d   fdd|D | j|< d S )Nrr   rs   r*   c                    s    g | ]} r d  | n|qS )rr   r,   )r3   rl   r   r,   r-   r   i  s    z=LoRAModelManager._register_packed_modules.<locals>.<listcomp>)ry   r   rJ   lenr   r   )r?   r   r   rI   r   r,   r   r-   r   `  s   

z)LoRAModelManager._register_packed_modulesr   c                 C   s   | j  D ]b\}}g }t }d}|D ]}| ||}|| |r(d}|| q|s,qtt|D ]}	||	 r9q2d ||	< q2| jrS|	|sS|
dd}
|	|rS|
}t||j|< |D ]	}|j|d  q]qd S )NFTmodel. )r   r`   setr   rz   addranger   r#   rN   replacer   r   r=   pop)r?   r   rI   Znew_module_namesZreplacement_lorasZreplaced_moduleZhas_replacementrl   rE   r   Zreplaced_module_namer4   r,   r,   r-   r   m  s<   





z-LoRAModelManager._create_merged_loras_inplacec                 C   sB   |}| j r||s|dd}||r|}td ||S )Nr   r   z\For the pool model, successfully loaded the LoRA weights after removing the prefix 'model.'.)r#   rN   r   r8   Z	info_oncerL   )r?   r   rI   Zorg_module_namer,   r,   r-   r     s   


z(LoRAModelManager._get_lora_layer_weights
adapter_idc                 C      t || j| jS r1   )r   r   r   r?   r   r,   r,   r-   r        z#LoRAModelManager.deactivate_adapteradapterc                 C   s&   t d|j|j t|| j| j| jS )N%Adding lora. Model id: %d, int id: %d)r8   r   r>   r   r   r   r   )r?   r   r,   r,   r-   r     s   zLoRAModelManager.add_adapterc                 C   s   t || j| j| _d S r1   )r   r   r   r   r,   r,   r-   r     s   
z$LoRAModelManager.set_adapter_mappingc                 C   r   r1   )r   r   r   r   r,   r,   r-   r     r   zLoRAModelManager.remove_adapterc                 C   s
   t | jS r1   )r   r   rH   r,   r,   r-   r     rO   zLoRAModelManager.list_adaptersc                 C   s   t || jS r1   )r   r   r   r,   r,   r-   r     s   zLoRAModelManager.get_adapterr1   )+r   r   r   r   r!   r   r   ri   rS   r@   r   r   r   r   r   r   r   r:   r   r   r   r   r   r   r_   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__r,   r,   r   r-   r   7  sx    8
'
>
D
r   c                       s0   e Zd Zdedeegef f fddZ  ZS )LoRALRUCacher   deactivate_lora_fnc                    s   t  || d S r1   )r   r@   )r?   r   r   r   r,   r-   r@     s   zLoRALRUCache.__init__)r   r   r   r   r   r   r@   r   r,   r,   r   r-   r     s    r   c                       s   e Zd ZdZdejdededededej	f fdd	Z
d
eeef fddZded
efddZded
ef fddZd
efddZded
efddZdefddZdefddZ  ZS )LRUCacheLoRAModelManagerz;A model manager that manages multiple LoRAs with LRU cache.r/   r   r   r   r   rS   c                    s:   t  |||||| t| j| j| _t| j| j| _d S r1   )	r   r@   r   r   r   r   r   r   r   r   r   r,   r-   r@     s   
z!LRUCacheLoRAModelManager.__init__r0   c                 C   s   t | jjS )zList all registered LoRAModels.)r   r   cacherH   r,   r,   r-   r     s   z&LRUCacheLoRAModelManager.list_adaptersrE   c                 C   sF   t d|j|j |j| jvr| | d}|S | j|j d}|S )zAdd a LoRAModel to the manager.r   TF)r8   r   r>   r   r   touch)r?   rE   Z	was_addedr,   r,   r-   r     s   
z$LRUCacheLoRAModelManager.add_adapterr   c                    s@   || j vrt| j | jkr| j   t |}| j | |S r1   )r   r   r   remove_oldestr   r   r   )r?   r   resultr   r,   r-   r     s   
z)LRUCacheLoRAModelManager.activate_adapterc                 C   s    t | jdkr| j  dS dS )Nr   TF)r   r   r   rH   r,   r,   r-   remove_oldest_adapter  s   
z.LRUCacheLoRAModelManager.remove_oldest_adapterc                 C   s   |  | | | dS )r   T)_pin_lora_in_cpu_cache_pin_lora_in_gpu_cacher   r,   r,   r-   r     s   

z$LRUCacheLoRAModelManager.pin_adapterc              
   C   s>   z	| j | W d S  ty } z	td| d|d }~ww )NzPinning failed. LoRA z is not registered.)r   pinr{   )r?   r   errr,   r,   r-   r     s   z/LRUCacheLoRAModelManager._pin_lora_in_cpu_cachec                 C   s$   || j vr
| | | j | d S r1   )r   r   r   r   r,   r,   r-   r     s   

z/LRUCacheLoRAModelManager._pin_lora_in_gpu_cache)r   r   r   r   r   Moduler   r   ri   rS   r@   r   r:   r   r   r   r   r   r   r   r   r   r,   r,   r   r-   r     s,    
r   r   r   r   r   rS   lora_manager_clsc           	   	   K   s>   t | tstdt|  d|d| |||||d|}|S )z(Create a LoRA adapter for a given model.zModel z is not supported for LoRA.)r/   r   r   r   r   rS   Nr,   )r2   r!   r{   type)	r/   r   r   r   r   rS   r  kwargsr   r,   r,   r-   create_lora_manager  s   

r  )Pr   r   collections.abcr   typingr   r   r   r   regexr   Zsafetensors.torchr   ri   r   Zvllm.adapter_commons.modelsr   r	   r
   Zvllm.adapter_commons.utilsr   r   r   r   r   r   Zvllm.configr   Zvllm.loggerr   Zvllm.lora.layersr   r   Zvllm.lora.lorar   r   Zvllm.lora.peft_helperr   Zvllm.lora.punica_wrapperr   Zvllm.lora.utilsr   r   r   r   r   r   Z$vllm.model_executor.layers.fused_moer   Z+vllm.model_executor.model_loader.tensorizerr    Zvllm.model_executor.modelsr!   r"   Z%vllm.model_executor.models.interfacesr#   Z)vllm.model_executor.models.module_mappingr$   Z vllm.model_executor.models.utilsr%   r&   Zvllm.model_executor.utilsr'   Z
vllm.utilsr(   r   r8   r+   r.   r  r   r9   r:   r   r   r   r   rS   r  r  r,   r,   r,   r-   <module>   sl      |  ~L