o
    )ih#                     @   s   d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ dZG dd dejZdS )z>A layer that samples the next tokens from the model's outputs.    N)LogprobsMode)is_pin_memory_available)LogprobsTensorsSamplerOutput)SamplingMetadata)apply_bad_words)batched_count_greater_than)apply_all_penalties)TopKTopPSamplergh㈵>c                       s
  e Zd Zddef fddZdejdedefdd	Z	dejd
ejdejfddZ
dejdejfddZdejdedejfddZdejdejfddZdejdedejdefddZdejdedejfddZdejdedejfddZdejdedejfddZ  ZS )Samplerraw_logprobslogprobs_modec                    s$   t    t | _t | _|| _d S N)super__init__r
   topk_topp_samplerr   Z
pin_memoryr   )selfr   	__class__ b/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/v1/sample/sampler.pyr      s   

zSampler.__init__logitssampling_metadatareturnc           	      C   s   |j }|d ur| jdkr| |}n	| jdkr| }|tj}| ||}| ||}|j	j
D ]}||}q1| ||}|d urW| jdkrN| |}n	| jdkrW| }| ||}| }|d u rgd n| j|||d}|tj}t|d|d}|S )Nr   Z
raw_logitsZprocessed_logprobsZprocessed_logits)	token_ids)Zsampled_token_idslogprobs_tensors)Zmax_num_logprobsr   compute_logprobsclonetotorchfloat32apply_allowed_token_idsr   logitsprocsZnon_argmax_invariantapplyapply_penaltiessamplelonggather_logprobsint32r   	unsqueeze)	r   r   r   num_logprobsr   	processorsampledr   Zsampler_outputr   r   r   forward   s8   



zSampler.forwardtempc                 C   s   | |jddS )N   dim)Zdiv_r*   )r   r   r/   r   r   r   apply_temperature]   s   zSampler.apply_temperaturec                 C   s   |j dddS )Nr   r1   )Zargmaxviewr   r   r   r   r   greedy_samplee   s   zSampler.greedy_samplec                 C   s   |j r|jrJ |jrd}n
| |}|j r|S |jdusJ | ||j}|jjD ]}||}q*| ||j	|j
|j}|du rC|S tj|jtk |||d}|S )zSample logits based on sampling metadata.

        The various logits processing functions called in this method
        may update the logits tensor in-place.
        N)out)Z
all_greedyZ
all_randomr6   Ztemperaturer3   r#   Zargmax_invariantr$   r   
generatorsZtop_kZtop_pr    where_SAMPLING_EPS)r   r   r   Zgreedy_sampledr,   Zrandom_sampledr-   r   r   r   r&   h   s6   
zSampler.samplec                 C   s   |j dtjdS )Nr   )r2   dtype)Zlog_softmaxr    r!   r5   r   r   r   r      s   zSampler.compute_logprobslogprobsr+   r   c           	      C   s   |j tjksJ tj||dd\}}|d}|d|}t||}tj||fdd}tj||fdd}|tj	}t
|||S )a  
        Gather logprobs for topk and sampled/prompt token.

        Args:
          logprobs: (num tokens) x (vocab) tensor
          num_logprobs: minimum number of logprobs to
                        retain per token
          token_ids: prompt tokens (if prompt logprobs)
                     or sampled tokens (if sampled
                     logprobs); 1D token ID tensor
                     with (num tokens) elements
                     Must be int64.

        Returns:
          Top-k int indices tensor, (num tokens) x (num_logprobs + 1)
          Top-k float logprobs tensor, (num tokens) x (num_logprobs + 1)
          Sampled token rank tensor, (num tokens)
        r   r1   r0   )r;   r    Zint64Ztopkr*   gatherr   catr   r)   r   )	r   r<   r+   r   Ztopk_logprobsZtopk_indicesZtoken_logprobsZtoken_ranksindicesr   r   r   r(      s   


zSampler.gather_logprobsc                 C   s4   |j s|jd us
J t||j|j|j|j|j}|S r   )Zno_penaltiesZprompt_token_idsr	   Zpresence_penaltiesZfrequency_penaltiesZrepetition_penaltiesoutput_token_idsr   r   r   r   r   r   r%      s   zSampler.apply_penaltiesc                 C   s    |j d ur||j td |S )Nz-inf)Zallowed_token_ids_maskZmasked_fill_floatrA   r   r   r   r"      s
   
zSampler.apply_allowed_token_idsc                 C   s   |j rt||j |j |S r   )Zbad_words_token_idsr   r@   rA   r   r   r   r      s   zSampler.apply_bad_words)r   )__name__
__module____qualname__r   r   r    ZTensorr   r   r.   r3   r6   r&   r   intr   r(   r%   r"   r   __classcell__r   r   r   r   r      sn    
A

1
.


r   )__doc__r    Ztorch.nnnnZvllm.configr   Z
vllm.utilsr   Zvllm.v1.outputsr   r   Zvllm.v1.sample.metadatar   Zvllm.v1.sample.ops.bad_wordsr   Zvllm.v1.sample.ops.logprobsr   Zvllm.v1.sample.ops.penaltiesr	   Z$vllm.v1.sample.ops.topk_topp_samplerr
   r:   Moduler   r   r   r   r   <module>   s   