o
    )id                     @   s  U d dl Z d dlZd dlmZmZ d dlmZmZmZ d dl	m
Z
mZ d dlmZmZmZ d dlmZ d dlmZmZmZmZmZmZmZmZ d dlZd dlm  mZ  d dl!m"Z"m#Z#m$Z$ d d	l!m%Z& d d
l!m'Z'm(Z( d dl!m)Z* d dl!m+Z+m,Z, d dl-m.Z. d dl/m0Z0 d dl1m2Z3 d dl4m5Z5 d dl6m7Z7m8Z8m9Z9 d dl:m;Z;m<Z<m=Z= d dl>m?Z?m@Z@mAZA d dlBmCZC d dlDmEZE d dlFmGZG d dlHmIZImJZJ d dlKmLZL d dlMmNZN d dlOmPZP d dlQmRZRmSZS d dlTmUZU eEeVZWdddd ZXG d!d" d"eAd#d$ZYG d%d& d&eAd#d$ZZG d'd( d(eAd#d$Z[G d)d* d*eAd#d$Z\G d+d, d,eAd#d$Z]G d-d. d.e7Z^G d/d0 d0eAd#d$Z_G d1d2 d2eAd#d$Z`G d3d4 d4eAd#d$ZaG d5d6 d6eAd#d$ZbG d7d8 d8eAd#d$Zcee&eZe$e]e'e_e`e[eaebedecf Z%e@eed9< G d:d; d;eAd#d$Zfee*efe3f Z)G d<d= d=eAd#d$Zged> Zhed? Zid@ejjjkdAeddBelfdCdDZmd@ejjjkdAeddEeddBelfdFdGZn	dd@ejjjkdAeddEeed dBelfdHdIZodJejjjkdAedfdKdLZpdJejjjkfdMdNZqdJejjjkfdOdPZrdQeddBeejjjs fdRdSZtedTdUdQeddVeidBeifdWdXZudQeed dYedBeed fdZd[Zvd\ee;e<f dQeed d]eewexedef   d^eCdBeed f
d_d`ZydQeed d]eewexedef   d\eRd^eCdBeif
dadbZzedQeed dcehddehfdedfZ{dQeed d]eewexedef   dcehd\eRd^eCdBeifdgdhZ|edi Z}edjZ~G dkdl dleee~ ZG dmdn dnee ZG dodp dpee
e  ZG dqdr dreZG dsdt dteZG dudv dveZdQeeeedf  fdwdxZd#dydQeeeedf  dzeldBeed fd{d|ZeeZd#dydQeeeedf  dzeldBeed fd}d~Zdexedewf dewed dBedfddZdexedewf dewed deldBedfddZeee(Zeee[Zeee$Zeee'Zeee_ZeeecZe9e#jZe9eZjZe9e]jZe9e0jZeedexededf e.e^f Ze@eed< dd dd dd dd dd dd dd dd dd dd dd dZexedee%gef f eed< de%dBeedef fddZdZdeddee% dedeldeldBeweg fddZde%dedeldeldBee f
ddZeee"Zeee,Zde)dedeideldBeweg f
ddZdeweg dBdfddZdewe) d^eCd\eRdeidBeeweg eeJ f f
ddZdewe) d^eCd\eRdeidBeeweg e
eeJ  f f
ddZd#dd\ee;e<f deweg dQeed d]eewexedef   d^eCdeldYedBedfddZd\eSdewe) dQeed d]eewexedef   dYedBewe fddZdBedfddZdS )    N)ABCabstractmethod)Counterdefaultdictdeque)	AwaitableIterable)cached_property	lru_cachepartial)Path)AnyCallableGenericLiteralOptionalTypeVarUnioncast)#ChatCompletionAssistantMessageParam#ChatCompletionContentPartImageParam(ChatCompletionContentPartInputAudioParam)ChatCompletionContentPartParam)%ChatCompletionContentPartRefusalParam"ChatCompletionContentPartTextParam)ChatCompletionMessageParam)"ChatCompletionMessageToolCallParamChatCompletionToolMessageParam)
InputAudio)ResponseInputImageParam)Message)Image)	BaseModel
ConfigDictTypeAdapter)PreTrainedTokenizerPreTrainedTokenizerFastProcessorMixin)Required	TypeAlias	TypedDict)ModelConfig)init_logger)SupportsMultiModal)MULTIMODAL_REGISTRYMultiModalDataDict)MediaConnector)get_chat_template_fallback_path)cached_get_processor)AnyTokenizerMistralTokenizerrandom_uuidz<##IMAGE##>z<##AUDIO##>z<##VIDEO##>)imageaudiovideoc                   @      e Zd ZU ee ed< dS )AudioURLurlN__name__
__module____qualname__r(   str__annotations__ rC   rC   g/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/entrypoints/chat_utils.pyr;   ?      
 r;   F)totalc                   @   *   e Zd ZU ee ed< eed  ed< dS )#ChatCompletionContentPartAudioParam	audio_urltypeN)r>   r?   r@   r(   r;   rB   r   rC   rC   rC   rD   rH   F      
 rH   c                   @   s<   e Zd ZU eeeeeef f  ed< 	 eed  ed< dS ))ChatCompletionContentPartImageEmbedsParamimage_embedsrJ   N)	r>   r?   r@   r(   r   rA   dictrB   r   rC   rC   rC   rD   rL   M   s
   
 rL   c                   @   r:   )VideoURLr<   Nr=   rC   rC   rC   rD   rO   X   rE   rO   c                   @   rG   )#ChatCompletionContentPartVideoParam	video_urlrJ   N)r>   r?   r@   r(   rO   rB   r   rC   rC   rC   rD   rP   _   rK   rP   c                   @   s&   e Zd ZU dZejed< eddZdS )PILImagez#
    A PIL.Image.Image object.
    	image_pilT)Zarbitrary_types_allowedN)r>   r?   r@   __doc__r!   rB   r#   model_configrC   rC   rC   rD   rR   f   s   
 
rR   c                   @      e Zd ZU dZee ed< dS )(CustomChatCompletionContentPILImageParamzA simpler version of the param that only accepts a PIL image.

    Example:
    {
        "image_pil": ImageAsset('cherry_blossom').pil_image
    }
    rS   N)r>   r?   r@   rT   r(   rR   rB   rC   rC   rC   rD   rW   n      
 rW   c                   @   rV   )+CustomChatCompletionContentSimpleImageParamzA simpler version of the param that only accepts a plain image_url.
    This is supported by OpenAI API, although it is not documented.

    Example:
    {
        "image_url": "https://example.com/image.jpg"
    }
    	image_urlNr>   r?   r@   rT   r(   rA   rB   rC   rC   rC   rD   rY   y   s   
 rY   c                   @   rV   )+CustomChatCompletionContentSimpleAudioParamzA simpler version of the param that only accepts a plain audio_url.

    Example:
    {
        "audio_url": "https://example.com/audio.mp3"
    }
    rI   Nr[   rC   rC   rC   rD   r\      rX   r\   c                   @   rV   )+CustomChatCompletionContentSimpleVideoParamzA simpler version of the param that only accepts a plain audio_url.

    Example:
    {
        "video_url": "https://example.com/video.mp4"
    }
    rQ   Nr[   rC   rC   rC   rD   r]      rX   r]   c                   @   s:   e Zd ZU dZee ed< 	 eed< 	 eed  ed< dS )!CustomThinkCompletionContentParamzA Think Completion Content Param that accepts a plain text and a boolean.

    Example:
    {
        "thinking": "I am thinking about the answer",
        "closed": True,
        "type": "thinking"
    }
    thinkingclosedrJ   N)	r>   r?   r@   rT   r(   rA   rB   boolr   rC   rC   rC   rD   r^      s   
 
r^   r   c                   @   s^   e Zd ZU dZee ed< 	 eeee	 f ed< 	 eed< 	 e
e ed< 	 e
ee  ed< dS ) CustomChatCompletionMessageParamz0Enables custom roles in the Chat Completion API.rolecontentnametool_call_id
tool_callsN)r>   r?   r@   rT   r(   rA   rB   r   listr   r   r   r   rC   rC   rC   rD   rb      s   
 rb   c                   @   sj   e Zd ZU ee ed< 	 eee ee	eef  f ed< 	 ee ed< 	 ee ed< 	 ee
e  ed< dS )ConversationMessagerc   rd   rf   re   rg   N)r>   r?   r@   r(   rA   rB   r   r   rh   rN   r   r   rC   rC   rC   rD   ri      s   
  ri   )autostringopenai)rk   rl   nodevarnamereturnc                 C   s&   t | tjjr| jdko| j|kS dS )NloadF)
isinstancejinja2nodesNamectxre   )rm   rn   rC   rC   rD   _is_var_access   s   rv   keyc                 C   s^   t | tjjrt| j|ot | jtjjo| jj|kS t | tjj	r-t| j|o,| j
|kS dS )NF)rq   rr   rs   Getitemrv   rm   argZConstvalueZGetattrattrrm   rn   rw   rC   rC   rD   _is_attr_access   s   
r}   c                 C   s   t | tjjr| jd uot| j||S t | tjjr!t| j||S t | tjjr7t | jtjj	r7t| j||S |r?t
| ||S t| |S N)rq   rr   rs   Filterrm   _is_var_or_elems_accessZTestrx   ry   Slicer}   rv   r|   rC   rC   rD   r     s   
r   rootc                 c   s    | |fV  t |g}|rD| }| tjjD ]'}|j}|j}t||r?t	|tjj
s.J ||jfV  |j|kr?||j q|sd S d S r~   )r   popleftfind_allrr   rs   Assigntargetrm   r   rq   rt   re   append)r   rn   Zrelated_varnamesZrelated_varnameZ
assign_astlhsrhsrC   rC   rD   _iter_nodes_assign_var_or_elems  s   



r   c                 c   sp    dd t | dD }| tjjD ]#}|j}|j}|D ]}t||r4t|tjj	s,J ||j
fV   nqqd S )Nc                 S      g | ]\}}|qS rC   rC   .0_rn   rC   rC   rD   
<listcomp>2  s    z4_iter_nodes_assign_messages_item.<locals>.<listcomp>messages)r   r   rr   rs   Foriterr   r   rq   rt   re   )r   Zmessages_varnamesloop_ast	loop_iterloop_targetrn   rC   rC   rD    _iter_nodes_assign_messages_item1  s   
r   c                 c   sp    dd t | D }| tjjD ]$}|j}|j}|D ]}t||dr4t|tjj	s,J ||j
fV   nqqd S )Nc                 S   r   rC   rC   r   rC   rC   rD   r   D  s    z3_iter_nodes_assign_content_item.<locals>.<listcomp>rd   )r   r   rr   rs   r   r   r   r   rq   rt   re   )r   Zmessage_varnamesr   r   r   rn   rC   rC   rD   _iter_nodes_assign_content_itemC  s   r   chat_templatec                 C   s8   zt | }|j| W S  ty   td Y d S w )Nz#Error when compiling Jinja template)hf_chat_utilsZ_compile_jinja_templateenvironmentparse	Exceptionlogger	exception)r   Zjinja_compiledrC   rC   rD   _try_extract_astT  s   

r       )maxsizedefaultc                C   sZ   t | }|d u r
|S z	tt| W dS  ty   Y dS  ty,   td | Y S w )Nrk   z(Error when parsing AST of Jinja templaterl   )r   nextr   StopIterationr   r   r   )r   r   Z	jinja_astrC   rC   rD   _detect_content_format]  s   
r   kwargsc                 K   s:   | d ur	t d d|v rt d d|v rt d d S )Nz;'chat_template' cannot be overridden for mistral tokenizer.Zadd_generation_promptzV'add_generation_prompt' is not supported for mistral tokenizer, so it will be ignored.Zcontinue_final_messagezW'continue_final_message' is not supported for mistral tokenizer, so it will be ignored.)r   Zwarning_once)r   r   rC   rC   rD   resolve_mistral_chat_templater  s   r   	tokenizertoolsrU   c                C   s   |d ur|S |d u r>z!t | jtttf|jd}t|tr*t|dr*|jd ur*|jW S W n t	y=   t
jd| jdd Y nw z| j||dW S  t	yX   t
jd| jdd Y nw t|jj|jd}|d urst
d	| j t|}|S t
d
| j |S )N)Zprocessor_clstrust_remote_coder   z1Failed to load AutoProcessor chat template for %sT)exc_info)r   z1Failed to load AutoTokenizer chat template for %s)
model_typeZtokenizer_name_or_pathzKLoading chat template fallback for %s as there isn't one defined on HF Hub.z)There is no chat template fallback for %s)r2   Zname_or_pathr%   r&   r'   r   rq   hasattrr   r   r   debugZget_chat_templater1   Z	hf_configr   r   infoload_chat_template)r   r   r   rU   	processorpathrC   rC   rD   resolve_hf_chat_template  sR   


r   c                C   s^   t |ttfrt|| ||d}nd }t |tr|nt| dd}|d u r'd}|S t|dd}|S )Nr   r   rU   T
is_literalrk   )r   )rq   r%   r&   r   rA   r   r   )r   r   r   rU   hf_chat_templateZ
jinja_textdetected_formatrC   rC   rD   %_resolve_chat_template_content_format  s    

r   given_formatr   c                 C   s6   t d| |dkr||krt d|| d S d S d S )NztDetected the chat template content format to be '%s'. You can set `--chat-template-content-format` to override this.rj   a  You specified `--chat-template-content-format %s` which is different from the detected format '%s'. If our automatic detection is incorrect, please consider opening a GitHub issue so that we can improve it: https://github.com/vllm-project/vllm/issues/new/choose)r   r   warning)r   r   r   rC   rC   rD   !_log_chat_template_content_format  s   r   c                C   s.   |dkr|S t | |||d}t| ||d |S )Nrj   )rU   )r   r   )r   r   )r   r   r   r   rU   r   rC   rC   rD   $resolve_chat_template_content_format  s   r   )r7   r8   r9   rM   _Tc                       s   e Zd ZdZdedef fddZedefddZe	de
e fd	d
Zedd Zedd Ze	dd Zdededee fddZedddZ  ZS )BaseMultiModalItemTrackerz
    Tracks multi-modal items in a given request and ensures that the number
    of multi-modal items in a given request does not exceed the configured
    maximum per prompt.
    rU   r   c                    s0   t    || _|| _tttt f t| _d S r~   )	super__init___model_configZ
_tokenizerr   rA   rh   r   _items_by_modality)selfrU   r   	__class__rC   rD   r     s   
z"BaseMultiModalItemTracker.__init__ro   c                 C   s   | j S r~   )r   r   rC   rC   rD   rU     s   z&BaseMultiModalItemTracker.model_configc                 C   s$   ddl m} || j}ttt |S )Nr   )get_model_cls)Z vllm.model_executor.model_loaderr   rU   r   rJ   r-   )r   r   	model_clsrC   rC   rD   r     s   
z#BaseMultiModalItemTracker.model_clsc                 C   s   | j jS r~   )r   allowed_local_media_pathr   rC   rC   rD   r   !  s   z2BaseMultiModalItemTracker.allowed_local_media_pathc                 C   s   t S r~   )r.   r   rC   rC   rD   mm_registry%     z%BaseMultiModalItemTracker.mm_registryc                 C   s   | j | jS r~   )r   Zcreate_processorrU   r   rC   rC   rD   mm_processor)  s   z&BaseMultiModalItemTracker.mm_processormodalityitemc                 C   sJ   | dd}t| j| d }| j|| | j| | | j||S )zy
        Add a multi-modal item to the current prompt and returns the
        placeholder string to use, if any.
        Z_embeds    )replacelenr   r   Zvalidate_num_itemsr   r   Zget_placeholder_str)r   r   r   Zinput_modality	num_itemsrC   rC   rD   add-  s
   zBaseMultiModalItemTracker.addBaseMultiModalContentParserc                 C      t r~   NotImplementedErrorr   rC   rC   rD   create_parser;  r   z'BaseMultiModalItemTracker.create_parserro   r   )r>   r?   r@   rT   r+   r3   r   propertyrU   r	   rJ   r-   r   r   r   r   ModalityStrr   r   rA   r   r   r   __classcell__rC   rC   r   rD   r     s     


r   c                   @   (   e Zd Zdee fddZdddZdS )	MultiModalItemTrackerro   c                 C   s   | j sd S i }t| j }d|v rd|v rtdd|v r0|d }t|dkr*td|d |d< d|v r:|d |d< d|v rD|d |d< d|v rN|d |d< |S )	Nr7   rM   4Mixing raw image and embedding inputs is not allowedr   2Only one message can have {'type': 'image_embeds'}r   r8   r9   )r   rN   
ValueErrorr   r   Z	mm_inputsZitems_by_modalityZimage_embeds_lstrC   rC   rD   all_mm_dataB  s,   
z!MultiModalItemTracker.all_mm_datar   c                 C      t | S r~   )MultiModalContentParserr   rC   rC   rD   r   Y     z#MultiModalItemTracker.create_parserNr   r>   r?   r@   r   r/   r   r   rC   rC   rC   rD   r   @  s    r   c                   @   r   )	AsyncMultiModalItemTrackerro   c                    s   | j sd S i }dd | j  D I d H }d|v r!d|v r!tdd|v r9|d }t|dkr3td|d |d< d|v rC|d |d< d	|v rM|d	 |d	< d
|v rW|d
 |d
< |S )Nc                    s$   i | ]\}}|t j| I d H qS r~   )asynciogather)r   r   itemsrC   rC   rD   
<dictcomp>c  s
    z:AsyncMultiModalItemTracker.all_mm_data.<locals>.<dictcomp>r7   rM   r   r   r   r   r8   r9   )r   r   r   r   r   rC   rC   rD   r   _  s2   z&AsyncMultiModalItemTracker.all_mm_datar   c                 C   r   r~   )AsyncMultiModalContentParserr   rC   rC   rD   r   z  r   z(AsyncMultiModalItemTracker.create_parserNr   r   rC   rC   rC   rD   r   ]  s    r   c                       s   e Zd Zd fddZdedee fddZdeee	f fd	d
Z
ededdfddZedeeeeef f ddfddZedejddfddZededdfddZededdfddZededdfddZ  ZS )r   ro   Nc                    s   t    tt| _d S r~   )r   r   r   rh   _placeholder_storager   r   rC   rD   r     s   
z$BaseMultiModalContentParser.__init__r   placeholderc                 C   s$   t | }|r| j| | d S d S r~   )MODALITY_PLACEHOLDERS_MAPr   r   )r   r   r   Zmod_placeholderrC   rC   rD   _add_placeholder  s   z,BaseMultiModalContentParser._add_placeholderc                 C   s
   t | jS r~   )rN   r   r   rC   rC   rD   mm_placeholder_storage  s   
z2BaseMultiModalContentParser.mm_placeholder_storagerZ   c                 C   r   r~   r   )r   rZ   rC   rC   rD   parse_image  r   z'BaseMultiModalContentParser.parse_imagerM   c                 C   r   r~   r   )r   rM   rC   rC   rD   parse_image_embeds  s   z.BaseMultiModalContentParser.parse_image_embedsrS   c                 C   r   r~   r   )r   rS   rC   rC   rD   parse_image_pil  r   z+BaseMultiModalContentParser.parse_image_pilrI   c                 C   r   r~   r   )r   rI   rC   rC   rD   parse_audio  r   z'BaseMultiModalContentParser.parse_audioinput_audioc                 C   r   r~   r   )r   r   rC   rC   rD   parse_input_audio  r   z-BaseMultiModalContentParser.parse_input_audiorQ   c                 C   r   r~   r   )r   rQ   rC   rC   rD   parse_video  r   z'BaseMultiModalContentParser.parse_video)ro   N)r>   r?   r@   r   r   r   rA   r   rN   rh   r   r   r   r   r   r!   r   r   r   r   r   r   rC   rC   r   rD   r   ~  s,    
r   c                          e Zd Zdeddf fddZdeddfddZd	eeeeef f ddfd
dZ	de
j
ddfddZdeddfddZdeddfddZdeddfddZ  ZS )r   trackerro   Nc                    *   t    || _t| jjj|jd| _d S N)media_io_kwargsr   r   r   _trackerr0   r   r   r   
_connectorr   r   r   rC   rD   r     s   
z MultiModalContentParser.__init__rZ   c                 C   *   | j |}| jd|}| d| d S Nr7   )r  Zfetch_imager  r   r   )r   rZ   r7   r   rC   rC   rD   r        z#MultiModalContentParser.parse_imagerM   c                    sb   t |tr fdd| D } jd|}t |tr) j|} jd|} d| d S )Nc                       i | ]\}}| j |qS rC   r  fetch_image_embeddingr   kvr   rC   rD   r         z>MultiModalContentParser.parse_image_embeds.<locals>.<dictcomp>rM   r7   )	rq   rN   r   r  r   rA   r  r
  r   )r   rM   embedsr   	embeddingrC   r   rD   r     s   


z*MultiModalContentParser.parse_image_embedsrS   c                 C   s   | j d|}| d| d S r  )r  r   r   )r   rS   r   rC   rC   rD   r     s   z'MultiModalContentParser.parse_image_pilrI   c                 C   r  Nr8   )r  Zfetch_audior  r   r   )r   rI   r8   r   rC   rC   rD   r     r  z#MultiModalContentParser.parse_audior   c                 C   2   | dd}| dd}d| d| }| |S Ndatar   formatzdata:audio/z;base64,getr   r   r   Z
audio_dataZaudio_formatrI   rC   rC   rD   r        
z)MultiModalContentParser.parse_input_audiorQ   c                 C   ,   | j j|d}| jd|}| d| d S N)rQ   r9   )r  Zfetch_videor  r   r   r   rQ   r9   r   rC   rC   rD   r        z#MultiModalContentParser.parse_video)r>   r?   r@   r   r   rA   r   r   rN   r   r!   r   r   r   r   r   r   rC   rC   r   rD   r     s    

r   c                       r   )r   r   ro   Nc                    r   r   r  r  r   rC   rD   r     s   
z%AsyncMultiModalContentParser.__init__rZ   c                 C   r  r  )r  Zfetch_image_asyncr  r   r   )r   rZ   Z
image_coror   rC   rC   rD   r     r  z(AsyncMultiModalContentParser.parse_imagerM   c                    sp   t  }t|tr fdd| D }|| t|tr) j|}||  j	
d|} d| d S )Nc                    r  rC   r	  r  r   rC   rD   r     r  zCAsyncMultiModalContentParser.parse_image_embeds.<locals>.<dictcomp>rM   r7   )r   Futurerq   rN   r   
set_resultrA   r  r
  r  r   r   )r   rM   futurer  r  r   rC   r   rD   r     s   




z/AsyncMultiModalContentParser.parse_image_embedsrS   c                 C   s0   t  }|| | jd|}| d| d S r  )r   r  r  r  r   r   )r   rS   r   r   rC   rC   rD   r     s   
z,AsyncMultiModalContentParser.parse_image_pilrI   c                 C   r  r  )r  Zfetch_audio_asyncr  r   r   )r   rI   Z
audio_coror   rC   rC   rD   r     r  z(AsyncMultiModalContentParser.parse_audior   c                 C   r  r  r  r  rC   rC   rD   r     r  z.AsyncMultiModalContentParser.parse_input_audiorQ   c                 C   r  r  )r  Zfetch_video_asyncr  r   r   r  rC   rC   rD   r     r  z(AsyncMultiModalContentParser.parse_video)r>   r?   r@   r   r   rA   r   r   rN   r   r!   r   r   r   r   r   r   rC   rC   r   rD   r     s    	
r   c                    s    du rdS t  tr  stdt  tr7d}t fdd|D s3t  s5td  ddS dS tt  d)	z5Raises if the provided chat template appears invalid.Nz-the supplied chat template path doesn't exist{}
c                 3       | ]}| v V  qd S r~   rC   r   cr   rC   rD   	<genexpr>1  s    z)validate_chat_template.<locals>.<genexpr>z#The supplied chat template string (z') appears path-like, but doesn't exist!z" is not a valid chat template type)	rq   r   existsFileNotFoundErrorrA   anyr   	TypeErrorrJ   )r   JINJA_CHARSrC   r%  rD   validate_chat_template&  s(   


r,  r   r   c             
      s    d u rd S |rt  trtd S zt }| W  d    W S 1 s(w   Y  W d S  tyg } z,t  tr= d}t fdd|D sWd  d| }t||t ddW  Y d }~S d }~ww )	Nz<chat_template is expected to be read directly from its valuer!  c                 3   r"  r~   rC   r#  r%  rC   rD   r&  S  s    z&_load_chat_template.<locals>.<genexpr>zThe supplied chat template (z>) looks like a file path, but it failed to be opened. Reason: Tr   )	rq   r   r*  openreadOSErrorr)  r   _load_chat_template)r   r   fer+  msgrC   r%  rD   r0  <  s,   

(

r0  c                C   s   t | |dS )Nr   )_cached_load_chat_template)r   r   rC   rC   rD   r   a  s   r   placeholder_storagetextsc                 C   s6   t |D ]\}}|| v r| | d||< qd|S )Nr   
)	enumeratepopjoin)r5  r6  idxelemrC   rC   rD   _get_interleaved_text_prompti  s
   
r=  interleave_stringsc                 C   s   t dd |  D }|rt| |}nd|}g }|D ]1}||  ||8  < || dk rCtd| td| td| d|	|g||   qd||g S )	z;Combine multimodal prompts for a multimodal language model.c                 S   s   g | ]	}|D ]}|qqS rC   rC   )r   r<  r  rC   rC   rD   r     s    z4_get_full_multimodal_text_prompt.<locals>.<listcomp>r7  r   zPlaceholder count is negative! Ensure that the 'interleave_strings' flag is disabled (current value: %s) when manually placing image placeholders.zInput prompt: %szFound more 'zA' placeholders in input prompt than actual multimodal data items.)
r   valuesr=  r:  countr   errorr   r   extend)r5  r6  r>  Zplaceholder_countstext_promptZmissing_placeholdersr   rC   rC   rD    _get_full_multimodal_text_promptt  s,   

rD  _ContentPartc                 C      t | dd S Ntext_TextParserr  partrC   rC   rD   <lambda>      rM  c                 C   rF  )Nr_   )_ThinkParserr  rK  rC   rC   rD   rM    rN  c                 C   rF  rG  rI  rK  rC   rC   rD   rM    rN  c                 C   rF  )NrZ   )_ResponsesInputImageParserr  rK  rC   rC   rD   rM    rN  c                 C      t | di dd S )NrZ   r<   )_ImageParserr  rK  rC   rC   rD   rM        c                 C   rF  )NrM   )_ImageEmbedsParserr  rK  rC   rC   rD   rM    rN  c                 C   rF  )NrS   )_PILImageParserr  rK  rC   rC   rD   rM    rN  c                 C   rQ  )NrI   r<   )_AudioParserr  rK  rC   rC   rD   rM    rS  c                 C   rF  )Nr   )_InputAudioParserr  rK  rC   rC   rD   rM    rN  c                 C   rF  )Nrefusal)_RefusalParserr  rK  rC   rC   rD   rM    rN  c                 C   rQ  )NrQ   r<   )_VideoParserr  rK  rC   rC   rD   rM    rS  )rH  r_   
input_textinput_imagerZ   rM   rS   rI   r   rX  rQ   MM_PARSER_MAPrL  c                 C   s,  t | tsJ | dd}t |tr1|tv r1t| | }|dkr-| dddkr-td ||fS |du r| ddurItt| }d|ddfS | ddur]tt	| }d|ddfS | d	durqttttf | }d	|fS | d
durtt
| }d
|d
dfS tdt |tstd|dfS )a  
    Parses a given multi-modal content part based on its type.

    Args:
        part: A dict containing the content part, with a potential 'type' field.

    Returns:
        A tuple (part_type, content) where:
        - part_type: Type of the part (e.g., 'text', 'image_url').
        - content: Parsed content (e.g., text, image URL).

    Raises:
        ValueError: If the 'type' field is missing and no direct URL is found.
    rJ   NrZ   detailrj   zB'image_url.detail' is currently not supported and will be ignored.r   rI   r   rQ   z(Missing 'type' field in multimodal part.z(Invalid 'type' field in multimodal part.zunknown part_type content)rq   rN   r  rA   r]  r   r   r   rY   r\   r]   r   )rL  	part_typerd   Zimage_paramsZaudio_paramsZinput_audio_paramsZvideo_paramsrC   rC   rD   #_parse_chat_message_content_mm_part  s@   

r`  )rH  rX  rZ   rM   rS   rI   r   rQ   rc   parts
mm_tracker
wrap_dictsc                C   s   t t  }| }|D ]}t||||d}|r|| q|r&t| |dgS tt t |}	| }
|
r:t	|
|	|}nd
|	}t| |dgS )Nrc  r>  )rc   rd   r7  )rh   rE  r    _parse_chat_message_content_partr   ri   r   rA   r   rD  r:  )rc   ra  rb  rc  r>  rd   	mm_parserrL  Z	parse_resr6  r   rC  rC   rC   rD   !_parse_chat_message_content_parts  s2   


rg  rf  c          
      C   sl  t | tr| S t| \}}|tv r|du rtd| | dS |dv r0tt|}|r.d|dS |S d}|dkrDttj|}|| d}nd|dv rUtt|}|	| d}nS|d	krntt
ttttf f |}|| d}n:|d
krtt|}|| d}n)|dkrtt|}	||	 d}n|dkrtt|}|| d}ntd| |rd|iS |rt| S dS )a|  Parses a single part of a conversation. If wrap_dicts is True,
    structured dictionary pieces for texts and images will be
    wrapped in dictionaries, i.e., {"type": "text", "text", ...} and
    {"type": "image"}, respectively. Otherwise multimodal data will be
    handled by mm_parser, and texts will be returned as strings to be joined
    with multimodal placeholders.
    NzKSkipping multimodal part '%s' (type: '%s') with empty / unparsable content.)rH  r[  rX  r_   rH  rJ   rH  rS   r7   )rZ   r\  rM   rI   r8   r   rQ   r9   zUnknown part type: rJ   )rq   rA   r`  #VALID_MESSAGE_CONTENT_MM_PART_TYPESr   r   r   r!   r   r   r   rN   r   r   r   r   r   r   r   )
rL  rf  rc  r>  r_  rd   Zstr_contentr   Zimage_contentZdict_contentrC   rC   rD   re  7  sZ   













re  messagecontent_formatc           	      C   s   | d }|  d}|d u rg }nt|trtd|dg}t||||dk|d}|D ]@}|dkrFt| }d|v rE|d d urEt|d |d< n|d	krXt| }d
|v rX|d
 |d
< d| v rit| d tri| d |d< q)|S )Nrc   rd   rH  rh  rl   rd  	assistantrg   Ztoolrf   re   )r  rq   rA   r   rg  _AssistantParserrh   _ToolParser)	rj  rb  rk  r>  rc   rd   resultZ
result_msgZ
parsed_msgrC   rC   rD   _parse_chat_message_content~  s:   


rp  r   c                 C   sX   | D ]'}|d dkr)d|v r)t |d tr)|d D ]}t|d d |d d< qqd S )Nrc   rl  rg   function	arguments)rq   rh   jsonloads)r   rj  r   rC   rC   rD   _postprocess_messages  s   
ru  c                 C   \   g }t ||}| D ]}t||||dko|jd uo|jjd}|| q	t| || fS Nrk   )r>  )r   rp  multimodal_configinterleave_mm_stringsrB  ru  r   r   rU   r   rk  conversationrb  r3  Zsub_messagesrC   rC   rD   parse_chat_messages     
r|  c                 C   rv  rw  )r   rp  rx  ry  rB  ru  r   rz  rC   rC   rD   parse_chat_messages_futures  r}  r~  )tokenizer{  r  c          	   
   K   sn   t | |||d}|d u rtdz| jd||||d|W S  ty6 } ztd tt||d }~ww )Nr   zAs of transformers v4.44, default chat template is no longer allowed, so you must provide a chat template if the tokenizer does not define one.)r{  r   r   r  z@An error occurred in `transformers` while applying chat templaterC   )r   r   apply_chat_templater   r   r   rA   )	r   r{  r   r   rU   r  r   r   r2  rC   rC   rD   apply_hf_chat_template  s4   

r  c              
   K   s   ddl m} tdd|i| z| jd||d|W S  t|fy0 } ztt||d }~w tyG } zt	d tt||d }~ww )Nr   )MistralCommonExceptionr   )r   r   zBAn error occurred in `mistral_common` while applying chat templaterC   )
Zmistral_common.exceptionsr  r   r  AssertionErrorr   rA   r   r   r   )r   r   r   r   r   r  r2  rC   rC   rD   apply_mistral_chat_template  s.   	r  c                   C   s   dt   S )Nzchatcmpl-tool-r5   rC   rC   rC   rD   random_tool_call_idD  s   r  r~   )r   rs  abcr   r   collectionsr   r   r   collections.abcr   r   	functoolsr	   r
   r   pathlibr   typingr   r   r   r   r   r   r   r   Zjinja2.nodesrr   Z&transformers.utils.chat_template_utilsutilsZchat_template_utilsr   Zopenai.types.chatr   r   r   r   Z$OpenAIChatCompletionContentPartParamr   r   r   Z OpenAIChatCompletionMessageParamr   r   Z@openai.types.chat.chat_completion_content_part_input_audio_paramr   Zopenai.types.responsesr   Zopenai_harmonyr    ZOpenAIHarmonyMessageZPILr!   Zpydanticr"   r#   r$   Ztransformersr%   r&   r'   Ztyping_extensionsr(   r)   r*   Zvllm.configr+   Zvllm.loggerr,   Zvllm.model_executor.modelsr-   Zvllm.multimodalr.   r/   Zvllm.multimodal.utilsr0   Z&vllm.transformers_utils.chat_templatesr1   Z!vllm.transformers_utils.processorr2   Z!vllm.transformers_utils.tokenizerr3   r4   Z
vllm.utilsr6   r>   r   r   r;   rH   rL   rO   rP   rR   rW   rY   r\   r]   r^   rA   rB   rb   ri   ZChatTemplateContentFormatOptionZ_ChatTemplateContentFormatrs   Nodera   rv   r}   r   r   r   r   Templater   r   r   rh   rN   r   r   r   r   r   r   r   objectr   r   r   r   r   r,  r0  r4  r   r=  rD  rJ  rT  rW  rY  rU  rO  Zvalidate_pythonrR  rV  rZ  rP  rE  r]  tupler`  ri  rg  re  rm  rn  rp  ru  r|  r~  r  intr  r  rC   rC   rC   rD   <module>   sj  
(
	


2

8!09?
"



3







 

9
&

C

,

#
	
+
(