o
    )i1                     @   sx  d dl mZ d dlmZ d dlmZ d dlmZmZm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d
dlmZmZmZ d
dlmZmZmZmZ er^d dl m!Z! ee"Z#ede$ej% dZ&ededZ'ededdZ(G dd de
e( Z)G dd de
e' Z*G dd de
e' Z+eddG dd dee' Z,ed
dde-de.fd d!Z/G d"d# d#Z0dS )$    )Mapping)	dataclass)	lru_cache)TYPE_CHECKINGGenericOptionalProtocolTypeVarN)InputProcessingContext)init_logger)AnyTokenizercached_tokenizer_from_config)ClassRegistry   )BaseMultiModalProcessorBaseProcessingInfoProcessingCache)BaseDummyInputsBuilderDummyDecoderDataDummyEncoderDataMultiModalProfiler)ModelConfigN)bound_I_I_coT)r   	covariantc                   @   s"   e Zd ZdZdedefddZdS )ProcessingInfoFactory
    Constructs a
    [`BaseMultiModalProcessor`][vllm.multimodal.processing.BaseMultiModalProcessor]
    instance from the context.
    ctxreturnc                 C      d S N )selfr   r#   r#   d/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/multimodal/registry.py__call__&   s   zProcessingInfoFactory.__call__N)__name__
__module____qualname____doc__r
   r   r&   r#   r#   r#   r%   r      s    r   c                   @   s&   e Zd ZdZdedee fddZdS )DummyInputsBuilderFactoryz
    Constructs a
    [`BaseDummyInputsBuilder`][vllm.multimodal.profiling.BaseDummyInputsBuilder]
    instance from the context.
    infor    c                 C   r!   r"   r#   )r$   r,   r#   r#   r%   r&   4   s   z"DummyInputsBuilderFactory.__call__N)r'   r(   r)   r*   r   r   r&   r#   r#   r#   r%   r+   -   s    r+   c                
   @   s<   e Zd ZdZdddedee dee dee fdd	Z	dS )
MultiModalProcessorFactoryr   Ncacher,   dummy_inputsr/   r    c                C   r!   r"   r#   )r$   r,   r0   r/   r#   r#   r%   r&   ?   s   z#MultiModalProcessorFactory.__call__)
r'   r(   r)   r*   r   r   r   r   r   r&   r#   r#   r#   r%   r-   8   s    r-   )frozenc                   @   sN   e Zd ZU ee ed< ee ed< ee ed< dddede	e
 fdd	ZdS )
_ProcessorFactoriesr,   	processorr0   Nr.   r   r/   c                C   s$   |  |}| |}| j|||dS )Nr.   r,   r0   r3   )r$   r   r/   r,   Zdummy_inputs_builderr#   r#   r%   build_processorO   s   

z#_ProcessorFactories.build_processor)r'   r(   r)   r   r   __annotations__r-   r+   r
   r   r   r5   r#   r#   r#   r%   r2   I   s   
 r2   )maxsizemodel_idcapacity_gbc                 C   s   |dkrt |S d S )Nr   )r   )r8   r9   r#   r#   r%   _get_processor_cache\   s   r:   c                
   @   s  e Zd ZdZd1ddZd2dd	Zdddefd
dZdddefddZdddefddZ	ddde
eef fddZddde
eef fddZddde
eef fddZdddefddZddde
eef fddZdee dee dee fddZd2dd Z	d3ddd!ee defd"d#Zdd$ddd!ee defd%d&Zddd'ddd!ee d(ee dee fd)d*Z	d3ddd+ed,ee
eef  de fd-d.Z!	d3ddd+ed,ee
eef  de"fd/d0Z#dS )4MultiModalRegistryzL
    A registry that dispatches data processing according to the model.
    r    Nc                 C   s   t tjtf  | _d S r"   )r   nnModuler2   _processor_factories)r$   r#   r#   r%   __init__f   s   zMultiModalRegistry.__init__model_configr   c                 C   s   |j }|j}t||S r"   )modelmm_processor_cache_gbr:   )r$   r@   r8   r9   r#   r#   r%   r:   j   s   
z'MultiModalRegistry._get_processor_cachec                 C   s   |  | }r|  dS )z'Reset the multi-modal processing cache.T)r:   reset)r$   r@   Zprocessor_cacher#   r#   r%   reset_processor_cacheo   s   z(MultiModalRegistry.reset_processor_cachec                 C   s    |  |sdS | }|jdkS )zWhether the multi-modal input cache should be enabled.
        NOTE: This is put under MultiModalRegistry on purpose to respect 
        text-only mode for multimodal models.
        Fr   )supports_multimodal_inputsget_multimodal_configrB   )r$   r@   	mm_configr#   r#   r%   enable_mm_input_cachev   s   

z(MultiModalRegistry.enable_mm_input_cachec                    sP   |j sdS | j|dd}| }|  t fdd|D r&td dS dS )z
        Checks if the model supports multimodal inputs.
        Returns True if the model is multimodal with any non-zero supported 
        modalities, otherwise returns False, effectively running in 
        text-only mode.
        FN	tokenizerc                 3   s    | ]
}  |d kV  qdS )r   N)Zget_limit_per_prompt).0modalityrG   r#   r%   	<genexpr>   s
    
z@MultiModalRegistry.supports_multimodal_inputs.<locals>.<genexpr>zcAll limits of multimodal modalities supported by the model are set to 0, running in text-only mode.T)is_multimodal_model_create_processing_infoZget_supported_mm_limitsrF   allloggerZ	info_once)r$   r@   r,   Zsupported_modalitiesr#   rM   r%   rE      s   z-MultiModalRegistry.supports_multimodal_inputsc                 C   sJ   |j si S | j|dd}t|}|j}| |}||dd | D S )z
        Get the maximum number of tokens per data item from each modality based
        on underlying model configuration.
        Fdisable_cachec                 S   s   i | ]\}}|d kr|dqS )r   r   r#   )rK   rL   limitr#   r#   r%   
<dictcomp>   s    zJMultiModalRegistry.get_max_tokens_per_item_by_modality.<locals>.<dictcomp>)rO   create_processorr   Zmax_model_lenget_mm_limits_per_promptZget_mm_max_contiguous_tokensitems)r$   r@   r3   profilerseq_len	mm_limitsr#   r#   r%   #get_max_tokens_per_item_by_modality   s   
z6MultiModalRegistry.get_max_tokens_per_item_by_modalityc                    &   |  |  fdd| | D S )aW  
        Get the maximum number of tokens per data item from each modality based
        on underlying model configuration, excluding modalities that user
        explicitly disabled via `limit_mm_per_prompt`.

        Note:
            This is currently directly used only in V1 for profiling the memory
            usage of a model.
        c                    s"   i | ]\}} | d kr||qS )r   r#   rK   keyZmax_tokens_per_mm_itemr\   r#   r%   rV      s
    zRMultiModalRegistry.get_max_tokens_per_item_by_nonzero_modality.<locals>.<dictcomp>rX   r]   rY   r$   r@   r#   ra   r%   +get_max_tokens_per_item_by_nonzero_modality   s   

z>MultiModalRegistry.get_max_tokens_per_item_by_nonzero_modalityc                    r^   )zx
        Get the maximum number of tokens from each modality
        for profiling the memory usage of a model.
        c                    s   i | ]\}}| | | qS r#   r#   r_   ra   r#   r%   rV      s    zAMultiModalRegistry.get_max_tokens_by_modality.<locals>.<dictcomp>rb   rc   r#   ra   r%   get_max_tokens_by_modality   s   

z-MultiModalRegistry.get_max_tokens_by_modalityc                 C   s   t | | S )zq
        Get the maximum number of multi-modal tokens
        for profiling the memory usage of a model.
        )sumre   valuesrc   r#   r#   r%   get_max_multimodal_tokens   s   z,MultiModalRegistry.get_max_multimodal_tokensc                 C   s(   |j si S | j|dd}t|}| S )z
        Get the maximum number of multi-modal input instances for each modality
        that are allowed per prompt for a model class.
        FrS   )rO   rW   r   Zget_mm_limits)r$   r@   r3   rZ   r#   r#   r%   rX      s
   z+MultiModalRegistry.get_mm_limits_per_promptr3   r,   r0   c                   s    dt dt f fdd}|S )a/  
        Register a multi-modal processor to a model class. The processor
        is constructed lazily, hence a factory method should be passed.

        When the model receives multi-modal data, the provided function is
        invoked to transform the data into a dictionary of model inputs.
        	model_clsr    c                    s6   j j| ddrtd|  t dj | < | S )NT)strictzfModel class %s already has a multi-modal processor registered to %s. It is overwritten by the new one.r4   )r>   containsrR   warningr2   )ri   r0   r,   r3   r$   r#   r%   wrapper  s   z6MultiModalRegistry.register_processor.<locals>.wrapper)r   )r$   r3   r,   r0   rn   r#   rm   r%   register_processor   s   z%MultiModalRegistry.register_processorc                 C   s   ddl m} ||\}}|S )Nr   )get_model_architecture)Z vllm.model_executor.model_loaderrp   )r$   r@   rp   ri   _r#   r#   r%   _get_model_cls  s   z!MultiModalRegistry._get_model_clsrJ   c                 C   s    |d u r|j st|}t||S r"   )Zskip_tokenizer_initr   r
   )r$   r@   rJ   r#   r#   r%   _create_processing_ctx  s   
z)MultiModalRegistry._create_processing_ctxrI   c                C   s*   |  |}| j| }| ||}||S r"   )rr   r>   rs   r,   )r$   r@   rJ   ri   	factoriesr   r#   r#   r%   rP   "  s   


z*MultiModalRegistry._create_processing_info)rJ   rT   rT   c                C   sf   |j st|j d|du r|j }| |}| j| }| ||}|r'dn| |}|j||dS )zT
        Create a multi-modal processor for a specific model and tokenizer.
        z is not a multimodal modelNr.   )	rO   
ValueErrorrA   Zenable_mm_processor_cacherr   r>   rs   r:   r5   )r$   r@   rJ   rT   ri   rt   r   r/   r#   r#   r%   rW   -  s   


z#MultiModalRegistry.create_processorr[   	mm_countsc                 C   sR   | j |dd}t|}|||}|j}t||k r'td| dt| d|S )
        Create dummy data for profiling the memory usage of a model.

        The model is identified by ``model_config``.
        FrS   zExpected at least z' dummy tokens for profiling, but found z tokens instead.)rW   r   get_decoder_dummy_dataprompt_token_idslenAssertionErrorr$   r@   r[   rv   r3   rZ   Z
dummy_dataZ	token_idsr#   r#   r%   rx   F  s   z)MultiModalRegistry.get_decoder_dummy_datac                 C   sJ   | j |dd}t|}|||}|j}t||k r#td|t| |S )rw   FrS   zUExpected at least %d dummy encoder tokens for profiling, but found %d tokens instead.)rW   r   get_encoder_dummy_datary   rz   rR   Zwarning_oncer|   r#   r#   r%   r}   ^  s   z)MultiModalRegistry.get_encoder_dummy_data)r    N)r@   r   r"   )$r'   r(   r)   r*   r?   r:   boolrD   rH   rE   r   strintr]   rd   re   rh   rX   r-   r   r   r+   ro   rr   r   r   r
   rs   r   rP   r   rW   r   rx   r   r}   r#   r#   r#   r%   r;   a   s    











 




r;   )1collections.abcr   dataclassesr   	functoolsr   typingr   r   r   r   r	   Ztorch.nnr<   Zvllm.inputsr
   Zvllm.loggerr   Z!vllm.transformers_utils.tokenizerr   r   Z
vllm.utilsr   
processingr   r   r   Z	profilingr   r   r   r   Zvllm.configr   r'   rR   typer=   r   r   r   r   r+   r-   r2   r   r   r:   r;   r#   r#   r#   r%   <module>   s2   