o
    )iN                     @   s  d dl Z d dlmZ d dlmZ d dlmZ d dlmZm	Z	m
Z
mZ d dlZd dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZmZmZmZmZ eeZeG dd dZ eG dd dZ!G dd dZ"ede!dZ#G dd de	e# Z$G dd dZ%eG dd dZ&G dd de$e& Z'eG dd dZ(G dd de$e( Z)eG d d! d!Z*G d"d# d#e$e* Z+dS )$    N)MutableSequence)Sequence)	dataclass)AnyGenericOptionalUnion)TypeVar)init_logger)LoRARequest)MultiModalPlaceholderDict)RequestOutputKind)PromptLogprobsRequestMetricsSampleLogprobsSequenceGroupSequenceGroupBaseSequenceStatusc                   @   s   e Zd ZU dZeed< eed< ee ed< ee	 ed< ee
 ed< dZee ed< dZeeedf ed	< dZee ed
< defddZdefddZdS )CompletionOutputa!  The output data of one completion output of a request.

    Args:
        index: The index of the output in the request.
        text: The generated output text.
        token_ids: The token IDs of the generated output text.
        cumulative_logprob: The cumulative log probability of the generated
            output text.
        logprobs: The log probabilities of the top probability words at each
            position if the logprobs are requested.
        finish_reason: The reason why the sequence is finished.
        stop_reason: The stop string or token id that caused the completion
            to stop, None if the completion finished for some other reason
            including encountering the EOS token.
        lora_request: The LoRA request that was used to generate the output.
    indextext	token_idscumulative_logproblogprobsNfinish_reasonstop_reasonlora_requestreturnc                 C   s
   | j d uS N)r   self r!   X/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/outputs.pyfinished3   s   
zCompletionOutput.finishedc                 C   s>   d| j  d| jd| j d| j d| j d| j d| j dS )	NzCompletionOutput(index=z, text=z, token_ids=z, cumulative_logprob=z, logprobs=z, finish_reason=z, stop_reason=)r   r   r   r   r   r   r   r   r!   r!   r"   __repr__6   s   
zCompletionOutput.__repr__)__name__
__module____qualname____doc__int__annotations__strGenericSequencer   floatr   r   r   r   r   r   boolr#   r&   r!   r!   r!   r"   r      s   
 r   c                   @   s<   e Zd ZU dZejed< defddZde	de
fddZd	S )
PoolingOutputznThe output data of one pooling output of a request.

    Args:
        data: The extracted hidden states.
    datar   c                 C      d| j  dS )NzPoolingOutput(data=r$   )r2   r   r!   r!   r"   r&   I      zPoolingOutput.__repr__otherc                 C   s    t || jot| j|jk S r   )
isinstance	__class__r0   r2   all)r    r5   r!   r!   r"   __eq__L   s   zPoolingOutput.__eq__N)r'   r(   r)   r*   torchZTensorr,   r-   r&   objectr0   r9   r!   r!   r!   r"   r1   @   s
   
 
r1   c                    @   s   e Zd ZdZ					d ddddedee deee  dee dee	 d	e
d
ee dee dee deee  dee dee deeeef  deddfddZdd de
ddfddZedede
deeef ded  fddZdefddZdS )!RequestOutputak  The output data of a completion request to the LLM.

    Args:
        request_id: The unique ID of the request.
        prompt: The prompt string of the request.
                For encoder/decoder models, this is the
                decoder input prompt.
        prompt_token_ids: The token IDs of the prompt.
                          For encoder/decoder models, this is the
                          decoder input prompt token ids.
        prompt_logprobs: The log probabilities to return per prompt token.
        outputs: The output sequences of the request.
        finished: Whether the whole request is finished.
        metrics: Metrics associated with the request.
        lora_request: The LoRA request that was used to generate the output.
        encoder_prompt: The encoder prompt string of the request.
                        None if decoder-only.
        encoder_prompt_token_ids: The token IDs of the encoder prompt.
                                  None if decoder-only.
        num_cached_tokens: The number of tokens with prefix cache hit.
        kv_transfer_params: The params for remote K/V transfer.
    N)multi_modal_placeholderskv_transfer_params
request_idpromptprompt_token_idsprompt_logprobsoutputsr#   metricsr   encoder_promptencoder_prompt_token_idsnum_cached_tokensr=   r>   kwargsr   c                K   sj   |r
t dt| || _|| _|| _|pi | _|| _|| _|| _	|| _
|| _|	| _|
| _|| _|| _d S )Nz+RequestOutput: Ignoring extra arguments: %s)loggerZwarning_oncer-   r?   r@   rA   r=   rB   rC   r#   rD   r   rE   rF   rG   r>   )r    r?   r@   rA   rB   rC   r#   rD   r   rE   rF   rG   r=   r>   rH   r!   r!   r"   __init__i   s"   

zRequestOutput.__init__next_output	aggregatec                 C   s   |  j |j O  _ |j| _|jD ]Z}t| jD ]L\}}|j|jkrb|r[| j|j7  _t|jts6t	|j|_|j
|j |jrN|jdusGJ |j
|j |j|_|j|_|j|_n|| j|<  nq| j| qdS )z,Merge subsequent RequestOutput into this oneN)r#   r>   rC   	enumerater   r   r6   r   r   listextendr   r   r   r   append)r    rK   rL   Znext_completioni
completionr!   r!   r"   add   s6   


zRequestOutput.add	seq_group	use_cacheseq_id_to_seq_groupc                  C   s2  |  }|j|v r?||j }||}|r|| |d u r d S t|jdkr8t|j D ]	}||v r7||= q.| 	|||S |j
}|d u rJtd|jtjkrT|sTd S |rf|jd u rftdd g d g dd|_| }	|jd u}
|j}|jtjk}g }d}d }t|	D ]\}}|||}||}t|trdnt|}|j }|
r|jnd }|r|r|dkr|| d  }nd }|r| |krd}|r|jj}|t|kr|t |dg d d d d d || }|j!|ksJ ||_"t|tr|j#$  |j#| n||_#|
r|% nd |_&||_t'(|j)|_*|j+|_+n"t |	!||t|tr)|gn||
r1|% nd |t'(|j)|j+}|| q|rU|j,}|j-}|j.}|j/}|j0}n
d }d }d }d }d }|rft11 nd }|2| |j||||||j3|j4||||j5d	}|r|j}|j6d
i | |S | d
i |}|S )Nr   z8Sampling parameters are missing for a CompletionRequest. F)r?   r@   rA   rB   rC   r#   T   r%   )r?   r@   rA   rB   rC   r#   rD   r   rE   rF   rG   r=   r!   )7is_finishedr?   Zmaybe_assemble_groupZ
finish_seqlenZto_be_finishedrN   Zseq_id_to_indexkeysfrom_seq_groupsampling_params
ValueErrorZoutput_kindr   Z
FINAL_ONLYZcached_request_outputr<   Zget_seqsr   Zoutput_text_buffer_lengthZDELTArM   Zget_output_text_to_returnZget_output_token_ids_to_returnr6   r+   r2   Zget_num_cached_tokensoutput_logprobsZget_output_lenrC   rP   r   r   r   r   clearZget_cumulative_logprobr   r   Zget_finished_reasonstatusr   r   r@   rA   rE   rF   rB   timeZset_finished_timerD   r   r=   rJ   ) clsrT   rU   rV   r#   groupZassembled_seq_groupZsub_request_idr]   Z
top_n_seqsZinclude_logprobsZtext_buffer_lengthdeltarC   Zinclude_promptrG   rQ   seqZoutput_textZoutput_token_idsZnum_output_tokensr_   Zcached_outputsoutputr@   rA   rE   rF   rB   finished_timeZinit_kwargsrequest_outputr!   r!   r"   r\      s  












zRequestOutput.from_seq_groupc                 C   sf   d| j  d| jd| j d| jd| j d| j d| j d| j d	| j d
| j	 d| j
 d| j dS )NzRequestOutput(request_id=z	, prompt=, prompt_token_ids=z, encoder_prompt=z, encoder_prompt_token_ids=z, prompt_logprobs=
, outputs=, finished=z
, metrics=z, lora_request=z, num_cached_tokens=z, multi_modal_placeholders=r$   )r?   r@   rA   rE   rF   rB   rC   r#   rD   r   rG   r=   r   r!   r!   r"   r&   Q  s.   
	
zRequestOutput.__repr__)NNNNN)r'   r(   r)   r*   r-   r   rN   r+   r   r   r0   r   r   r   dictr   rJ   rS   classmethodr   r   r\   r&   r!   r!   r!   r"   r<   Q   sl    
	


%
 $r<   _O)defaultc                   @   sL   e Zd ZdZdededee defddZ	e
ded	d fd
dZdd ZdS )PoolingRequestOutputaq  
    The output data of a pooling request to the LLM.

    Args:
        request_id (str): A unique identifier for the pooling request.
        outputs (PoolingOutput): The pooling results for the given input.
        prompt_token_ids (list[int]): A list of token IDs used in the prompt.
        finished (bool): A flag indicating whether the pooling is completed.
    r?   rC   rA   r#   c                 C   s   || _ || _|| _|| _d S r   )r?   rA   r#   rC   )r    r?   rC   rA   r#   r!   r!   r"   rJ   n  s   
zPoolingRequestOutput.__init__rT   r   c                 C   sH   | j }|d us	J |jtjdd}t|}| j}|  }t| j|||S )Ncpu)ZdtypeZdevice)	pooled_datator:   Zfloat32r1   rA   rY   rq   r?   )rT   rs   r2   rg   rA   r#   r!   r!   r"   r\   u  s   z#PoolingRequestOutput.from_seq_groupc              
   C   s0   t | j d| jd| jd| j d| j d
S )Nz(request_id=rk   rj   rl   r$   )typer'   r?   rC   rA   r#   r   r!   r!   r"   r&     s   zPoolingRequestOutput.__repr__N)r'   r(   r)   r*   r-   ro   rN   r+   r0   rJ   staticmethodr   r\   r&   r!   r!   r!   r"   rq   c  s    


rq   c                   @   s2   e Zd Ze	ddedeeef defddZ	dS )	RequestOutputFactoryFrT   rV   rU   c                 C   s"   | j d ur
t| S t| ||S r   )rs   rq   r\   r<   )rT   rV   rU   r!   r!   r"   create  s
   

zRequestOutputFactory.createN)F)
r'   r(   r)   rv   r   rm   r-   r   r0   rx   r!   r!   r!   r"   rw     s    
rw   c                   @   P   e Zd ZU dZee ed< edefddZ	e
defddZdefd	d
ZdS )EmbeddingOutputzThe output data of one embedding output of a request.

    Args:
        embedding: The embedding vector, which is a list of floats.
        Its length depends on the hidden dimension of the model.
    	embeddingpooling_outputc                 C   $   | j }|jdkrtdt| S )NrX   z,pooled_data should be a 1-D embedding vector)r2   ndimr^   rz   tolistr|   rs   r!   r!   r"   	from_base  s   
zEmbeddingOutput.from_baser   c                 C   
   t | jS r   )rZ   r{   r   r!   r!   r"   hidden_size     
zEmbeddingOutput.hidden_sizec                 C   r3   )NzEmbeddingOutput(hidden_size=r$   )r   r   r!   r!   r"   r&     r4   zEmbeddingOutput.__repr__N)r'   r(   r)   r*   rN   r/   r,   rv   r1   r   propertyr+   r   r-   r&   r!   r!   r!   r"   rz     s   
 rz   c                   @      e Zd ZedefddZdS )EmbeddingRequestOutputri   c                 C      t | jt| j| j| jdS N)r?   rC   rA   r#   )r   r?   rz   r   rC   rA   r#   ri   r!   r!   r"   r        
z EmbeddingRequestOutput.from_baseNr'   r(   r)   rv   rq   r   r!   r!   r!   r"   r         r   c                   @   ry   )ClassificationOutputzThe output data of one classification output of a request.

    Args:
        probs: The probability vector, which is a list of floats.
        Its length depends on the number of classes.
    probsr|   c                 C   r}   )NrX   z.pooled_data should be a 1-D probability vector)r2   r~   r^   r   r   r   r!   r!   r"   r     s   
zClassificationOutput.from_baser   c                 C   r   r   )rZ   r   r   r!   r!   r"   num_classes  r   z ClassificationOutput.num_classesc                 C   r3   )Nz!ClassificationOutput(num_classes=r$   )r   r   r!   r!   r"   r&     r4   zClassificationOutput.__repr__N)r'   r(   r)   r*   rN   r/   r,   rv   r1   r   r   r+   r   r-   r&   r!   r!   r!   r"   r     s   
 r   c                   @   r   )ClassificationRequestOutputri   c                 C   r   r   )r   r?   r   r   rC   rA   r#   r   r!   r!   r"   r     r   z%ClassificationRequestOutput.from_baseNr   r!   r!   r!   r"   r     r   r   c                   @   s:   e Zd ZU dZeed< edefddZde	fddZ
d	S )
ScoringOutputzThe output data of one scoring output of a request.

    Args:
        score: The similarity score, which is a scalar value.
    scorer|   c                 C   s(   | j  }|jdkrtdt| S )Nr   z$pooled_data should be a scalar score)r2   Zsqueezer~   r^   r   itemr   r!   r!   r"   r     s   

zScoringOutput.from_baser   c                 C   r3   )NzScoringOutput(score=r$   )r   r   r!   r!   r"   r&     r4   zScoringOutput.__repr__N)r'   r(   r)   r*   r/   r,   rv   r1   r   r-   r&   r!   r!   r!   r"   r     s   
 
r   c                   @   r   )ScoringRequestOutputri   c                 C   r   r   )r   r?   r   r   rC   rA   r#   r   r!   r!   r"   r     r   zScoringRequestOutput.from_baseNr   r!   r!   r!   r"   r     r   r   ),rb   collections.abcr   r   r.   dataclassesr   typingr   r   r   r   r:   Ztyping_extensionsr	   Zvllm.loggerr
   Zvllm.lora.requestr   Zvllm.multimodal.inputsr   Zvllm.sampling_paramsr   Zvllm.sequencer   r   r   r   r   r   r'   rI   r   r1   r<   ro   rq   rw   rz   r   r   r   r   r   r!   r!   r!   r"   <module>   s@    (  &