o
    )i^"                     @   sN  d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZm Z m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+m,Z, d dl-m.Z. e)e/Z0de+ded de	e1e2 e3f fddZ4G dd de#Z5dS )    N)AsyncGenerator)FinalLiteralOptionalUnioncast)Request)assert_never)ModelConfig)EngineClient)ChatTemplateContentFormatOption)RequestLogger)ErrorResponsePoolingChatRequestPoolingRequestPoolingResponsePoolingResponseData	UsageInfo)OpenAIServing)OpenAIServingModels)_validate_truncation_size)init_logger)PoolingOutputPoolingRequestOutput)merge_async_iteratorsoutputencoding_formatfloatbase64returnc                 C   sX   |dkr	| j  S |dkr&| j jtjd}tj|dd }t	|
dS t| d S )Nr   r   )Zdtypefloat32zutf-8)datatolisttotorchr!   nparraytobytesr   	b64encodedecoder	   )r   r   Z
pt_float32Zpooling_bytes r+   s/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/entrypoints/openai/serving_pooling.py	_get_data"   s   
r-   c                       s   e Zd Zdedededee dee de	ddf fd	d
Z
	ddedee deeef fddZdee dedededed defddZ  ZS )OpenAIServingPoolingengine_clientmodel_configmodelsrequest_loggerchat_templatechat_template_content_formatr    Nc                   s$   t  j||||d || _|| _d S )N)r/   r0   r1   r2   )super__init__r3   r4   )selfr/   r0   r1   r2   r3   r4   	__class__r+   r,   r6   4   s   

zOpenAIServingPooling.__init__requestraw_requestc                    s  |  |I dH }|dur|S |j}|jdur| dS | |j}d| | }tt }|j	}zOt
| j|}| |}	| jjrFd}
n	| j|	I dH }
t|trp| j||
|j|jp_| j| jdd||jd	I dH \}}}n| j||
|j||jdI dH \}}W n" tttjfy } ztd | t |W  Y d}~S d}~ww g }zf|! }z	|"d| j W n ty } z| t |W  Y d}~W S d}~ww t#|D ]7\}}| d	| }| j$||| ||	d
 |du rdn| %|j&I dH }| jj'||||	||j(d}|)| qW n ty' } z| t |W  Y d}~S d}~ww t*| }t+|}dg| }z0|2 z3 dH W \}}|||< q86 t,dd |D sSJ t-t.t/ |}| 0|||||}W |S  t1j2yu   | d Y S  ty } z| t |W  Y d}~S d}~ww )z
        See https://platform.openai.com/docs/api-reference/embeddings/create
        for the API specification. This API mimics the OpenAI Embedding API.
        Nz%dimensions is currently not supportedzpool-F)r3   r4   Zadd_generation_promptZcontinue_final_messagetruncate_prompt_tokensadd_special_tokens)r<   r=   z$Error in preprocessing prompt inputsencode-)paramslora_request)rA   trace_headerspriorityc                 s   s    | ]}|d uV  qd S Nr+   ).0	final_resr+   r+   r,   	<genexpr>   s    z6OpenAIServingPooling.create_pooling.<locals>.<genexpr>zClient disconnected)3Z_check_modelr   
dimensionsZcreate_error_responseZ_get_model_namemodelZ_base_request_idinttimer<   r   Zmax_model_lenZ_maybe_get_adaptersr0   Zskip_tokenizer_initr/   Zget_tokenizer
isinstancer   Z_preprocess_chatmessagesr3   r4   r=   Z_preprocess_completioninput
ValueError	TypeErrorjinja2ZTemplateErrorlogger	exceptionstrZto_pooling_paramsverify	enumerateZ_log_inputsZ_get_trace_headersheadersr>   rC   appendr   lenallr   listr   "request_output_to_pooling_responseasyncioCancelledError)r7   r:   r;   Zerror_check_retr   
model_name
request_idcreated_timer<   rA   Z	tokenizer_Zrequest_promptsZengine_promptse
generatorsZpooling_paramsiZengine_promptZrequest_id_itemrB   	generatorZresult_generatorZnum_promptsfinal_res_batchresZfinal_res_batch_checkedresponser+   r+   r,   create_poolingF   s   	







	
z#OpenAIServingPooling.create_poolingrg   r`   ra   r_   r   r   c                 C   sh   g }d}t |D ]\}}	t|t|	j|d}
|	j}||
 |t|7 }qt||d}t|||||dS )Nr   )indexr"   )Zprompt_tokensZtotal_tokens)idcreatedrI   r"   usage)	rV   r   r-   Zoutputsprompt_token_idsrX   rY   r   r   )r7   rg   r`   ra   r_   r   itemsZnum_prompt_tokensidxrF   itemro   rn   r+   r+   r,   r\      s*   

z7OpenAIServingPooling.request_output_to_pooling_responserD   )__name__
__module____qualname__r   r
   r   r   r   rT   r   r6   r   r   r   r   r   rj   r[   r   rJ   r   r\   __classcell__r+   r+   r8   r,   r.   2   sL    	

 r.   )6r]   r   rK   collections.abcr   typingr   r   r   r   r   rQ   numpyr&   r%   Zfastapir   Ztyping_extensionsr	   Zvllm.configr
   Zvllm.engine.protocolr   Zvllm.entrypoints.chat_utilsr   Zvllm.entrypoints.loggerr   Z vllm.entrypoints.openai.protocolr   r   r   r   r   r   Z&vllm.entrypoints.openai.serving_enginer   Z&vllm.entrypoints.openai.serving_modelsr   Zvllm.entrypoints.utilsr   Zvllm.loggerr   Zvllm.outputsr   r   Z
vllm.utilsr   rs   rR   r[   r   rT   r-   r.   r+   r+   r+   r,   <module>   s<    
