o
    )i$                     @   sZ   d Z ddlZddlmZ ddlmZmZ ddlmZ ddl	m
Z
 dZG dd dejZdS )	z4Sampler layer implementing TPU supported operations.    N)LogprobsTensorsSamplerOutput)TopKTopPSampler)TPUSupportedSamplingMetadatagh㈵>c                       s   e Zd Z fddZdejdedefddZdejdejdejfd	d
Z	dejdejfddZ
dejdedejfddZdejdejfddZdejdedejdefddZdejdejdejfddZ  ZS )Samplerc                    s   t    t | _d S N)super__init__r   topk_topp_sampler)self	__class__ f/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/v1/sample/tpu/sampler.pyr	      s   
zSampler.__init__logitssampling_metadatareturnc                 C   s.   | tj}| ||}t|dd d}|S )N)Zsampled_token_idsZlogprobs_tensors)totorchfloat32sampler   	unsqueeze)r   r   r   sampledZsampler_outputr   r   r   forward   s   zSampler.forwardtempc                 C   s   | |jddS )N   dim)Zdiv_r   )r   r   r   r   r   r   apply_temperature)   s   zSampler.apply_temperaturec                 C   s   |j dddS )Nr   r   )Zargmaxviewr   r   r   r   r   greedy_sample0   s   zSampler.greedy_samplec                 C   sl   |  |}|jd usJ | ||j}|jd ur| ||j}| ||j|j|j}t	
|jtk ||}|S r   )r"   Ztemperaturer   min_papply_min_pr
   
generatorsZtop_kZtop_pr   where_SAMPLING_EPS)r   r   r   Zgreedy_sampledZrandom_sampledr   r   r   r   r   3   s   

zSampler.samplec                 C   s   |j dtjdS )Nr   )r   Zdtype)Zlog_softmaxr   r   r!   r   r   r   compute_logprobsO   s   zSampler.compute_logprobslogprobsnum_logprobs	token_idsc           	      C   st   t j||dd\}}|d}|d|}||kd}t j||fdd}t j||fdd}|t j}t|||S )a  
        Gather logprobs for topk and sampled/prompt token.

        Args:
          logits: (num tokens) x (vocab) tensor
          num_logprobs: minimum number of logprobs to
                        retain per token
          token_ids: prompt tokens (if prompt logprobs)
                     or sampled tokens (if sampled
                     logprobs); 1D token ID tensor
                     with (num tokens) elements

        Returns:
          Top-k int indices tensor, (num tokens) x (num_logprobs + 1)
          Top-k float logprobs tensor, (num tokens) x (num_logprobs + 1)
          Sampled token rank tensor, (num tokens)
        r   r   r   )	r   Ztopkr   gathersumcatr   Zint32r   )	r   r)   r*   r+   Ztopk_logprobsZtopk_indicesZtoken_logprobsZtoken_ranksindicesr   r   r   gather_logprobsR   s   

zSampler.gather_logprobsr#   c                 C   sP   t jjj|dd}t j|ddd}|d| }||k}|| td  |S )zI
        Filters logits using adaptive probability thresholding.
        r   r   T)r   Zkeepdimr   inf)r   nnZ
functionalZsoftmaxZamaxr   Zmasked_fill_float)r   r   r#   Zprobability_valuesZmax_probabilitiesZadjusted_min_pZvalid_token_maskr   r   r   r$   ~   s   	zSampler.apply_min_p)__name__
__module____qualname__r	   r   ZTensorr   r   r   r   r"   r   r(   intr   r0   r$   __classcell__r   r   r   r   r      sR    



,r   )__doc__r   Ztorch.nnr2   Zvllm.v1.outputsr   r   Z$vllm.v1.sample.ops.topk_topp_samplerr   Zvllm.v1.sample.tpu.metadatar   r'   Moduler   r   r   r   r   <module>   s   