o
    W+ i!f                     @   s  d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZ d d	lmZ d d
lmZ d dl m!Z! G dd dZ"G dd de"Z#G dd de"Z$defddZ%dS )    N)contextmanager)AnyDict	GeneratorListOptionalUnion)version)nn)PreTrainedModel)GreedySearchDecoderOnlyOutput) GreedySearchEncoderDecoderOutputLogitsProcessorListSampleDecoderOnlyOutputSampleEncoderDecoderOutputStoppingCriteriaListvalidate_stopping_criteria)Input)
Frameworks)device_placementc                   @   s   e Zd ZdefddZdS )StreamingOutputMixinreturnc                 O   s   t )z
        Support the input of Model and Pipeline.
        The output is a `Generator` type,
        which conforms to the output standard of modelscope.
        )NotImplementedError)selfargskwargs r   m/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/modelscope/utils/streaming_output.pystream_generate   s   z$StreamingOutputMixin.stream_generateN)__name__
__module____qualname__r   r   r   r   r   r   r      s    r   c                
   @   s   e Zd Zdeeee f defddZdedee	e
f dee	e
f fddZdee	e
f d	ee	e
f d
ee	e
f defddZdeee	e
f  ded	ee	e
f d
ee	e
f def
ddZdS )PipelineStreamingOutputMixininputr   c                    s   t jts
J djsjrjd rjs  |dd}jdi |\ }}t |t	r] fdd|D }|du rSg }|D ]}	|
|	|| qD|S ||||}|S | }
|
||}|S )a  
        Similar to the `Pipeline.__call__` method.
        it supports the input that the pipeline can accept,
        and also supports batch input.

        self.model must be a subclass of StreamingOutputMixin
        and implement the stream method.
        z,pipeline.model must be StreamingOutputMixin!r   
batch_sizeNc                    s   g | ]} | qS r   )_preprocess_with_check).0ipreprocess_paramsr   r   r   
<listcomp><   s    
z@PipelineStreamingOutputMixin.stream_generate.<locals>.<listcomp>r   )
isinstancemodelr   Zhas_multiple_modelsmodelsZ_model_prepareZprepare_modelpopZ_sanitize_parameterslistappend_stream_single_stream_batchr%   )r   r#   r   r   r$   forward_paramspostprocess_paramsmodel_input_listoutputZelemodel_inputr   r(   r   r   '   s@   

z,PipelineStreamingOutputMixin.stream_generater)   c                 C   s   |  | | j|fi |S N)Z_check_input
preprocess)r   r#   r)   r   r   r   r%   Q   s   
z3PipelineStreamingOutputMixin._preprocess_with_checkr7   r3   r4   c              	   c   s    t | j| jV | jtjkr6t  | jr| |}| jj	|fi |}W d    n1 s0w   Y  n
| jj	|fi |}|D ]}| j
|fi |}| | |V  qBW d    d S 1 saw   Y  d S r8   )r   	frameworkdevice_namer   torchno_grad_auto_collate_collate_fnr,   r   postprocess_check_output)r   r7   r3   r4   streamoutr   r   r   r1   W   s,   


"z+PipelineStreamingOutputMixin._stream_singler5   r$   c              
   #   sN   g }g }t | j| j tdt||D ]Z}t|| t|}|| }	||	 | ||| }
| jtj	krct	
  | jrF| |
}
|| jj|
fi | W d    n1 s]w   Y  q|| jj|
fi | qd gt| }d}|t|k rd}tt||D ]x\}\}}	zdt|}
t|	D ]Y i }|
 D ]8\}}|d urt|ttfrt|d t	jrt| fdd|D ||< q|  ||< q|  d  ||< q| j|fi |}| | ||   }|||< qW q ty   |d7 }Y qw |V  |t|k sW d    |S W d    |S 1 s w   Y  |S )Nr   c                 3   s     | ]}|  d   V  qdS )   Nr   )r&   eZ	batch_idxr   r   	<genexpr>   s
    
z=PipelineStreamingOutputMixin._stream_batch.<locals>.<genexpr>rD   )r   r:   r;   rangelenminr0   Z_batchr   r<   r=   r>   r?   r,   r   	enumeratezipnextitemsr+   tupler/   ZTensortyper@   rA   StopIteration)r   r5   r$   r3   r4   Zstream_listZreal_batch_sizesr'   endZreal_batch_sizeZbatched_outZoutput_listZstop_streamsrB   rC   kelementZoutput_indexr   rF   r   r2   k   s   





3
33z*PipelineStreamingOutputMixin._stream_batchN)r   r    r!   r   r   r   r   r   r   strr   r%   r1   intr2   r   r   r   r   r"   %   s6    
*







r"   c                   @   sF  e Zd ZdefddZededefddZe										dd	e	j
d
ee dee dee dee deeeee f  dee dee dee dee dedefddZe											dd	e	j
d
ee dee dee dee dee deeeee f  dee dee dee dee dedefddZdS )#PretrainedModelStreamingOutputMixinr   c                 O   sd   t | tr| n| j}t |tsJ d| | |j|i |W  d    S 1 s+w   Y  d S )Nz-self or self.model must be `PretrainedModel`!)r+   r   r,   _replace_generategenerate)r   r   r   r,   r   r   r   r      s   $z3PretrainedModelStreamingOutputMixin.stream_generater,   c                 c   s    t tjt dkrd}d}nt tjt dkr!d}d}nd}d}t||}t||}t||t| j| t||t| j	| d V  t||| t||| d S )Nz4.43.0stream_greedy_searchZ_samplez4.39.0Z_greedy_searchZgreedy_searchsample)
r	   parsetransformers__version__getattrsetattrtypes
MethodTyperZ   stream_sample)r   r,   Zgreedy_search_nameZsample_nameZorigin_greedy_searchZorigin_sampler   r   r   rX      s0   

z5PretrainedModelStreamingOutputMixin._replace_generateNF	input_idslogits_processorstopping_criteria
max_lengthpad_token_ideos_token_idoutput_attentionsoutput_hidden_statesoutput_scoresreturn_dict_in_generatesynced_gpusc              	   k   s   |d ur|nt  }|d ur|nt }|d ur"tdt t||}|d ur(|n| jj}|d ur2|n| jj}t	|t
r>|g}|d urKt||jnd }|	d urS|	n| jj}	|d ur]|n| jj}|d urg|n| jj}|
d urq|
n| jj}
|
r{|	r{dnd }|
r|rdnd }|
r|rdnd }|
r|rdnd }|
r| jjr|r|d dnd }|r|d dnd }tj|jd tj|jd}d}	 |rt|rd
nd|j}tj|tjjd | d
krd S | j|fi |}| di |d	||d}|r|rq|jd d dd d f }|||}|
rG|	r||f7 }|r5|| jjr%|j fn|j!f7 }| jjr5||j"f7 }|rG|| jjrB|j#fn|j$f7 }tj%|dd}|d urf|d u r\t&d|| |d|   }tj'||d d d f gdd}|
r| jjrt(|||||||dV  nt)||||dV  n|V  | j*||| jjd}|d ur|+|,|jd d-|.dj/dd}|0 dkrd	}|||rd	}|r|sd S q)Nz`max_length` is deprecated in this function, use `stopping_criteria=StoppingCriteriaList([MaxLengthCriteria(max_length=max_length)])` instead.r   encoder_outputs
attentionshidden_statesr   ZdtypedeviceFT              ?opZreturn_dictrj   rk   dimGIf `eos_token_id` is defined, make sure that `pad_token_id` is defined.rD   	sequencesscoresencoder_attentionsencoder_hidden_statesdecoder_attentionscross_attentionsdecoder_hidden_statesr~   r   rp   rq   is_encoder_decoder)1r   r   warningswarnUserWarningr   generation_configrh   ri   r+   rV   r<   tensortors   rl   rj   rk   rm   configr   getonesshapelongdist
all_reduceReduceOpSUMitemprepare_inputs_for_generationlogitsr   rp   r   r   rq   Zargmax
ValueErrorcatr   r   #_update_model_kwargs_for_generationmultilene	unsqueezeprodmax)r   rd   re   rf   rg   rh   ri   rj   rk   rl   rm   rn   model_kwargseos_token_id_tensorr   r   r   r   r   r   unfinished_sequencesthis_peer_finishedthis_peer_finished_flagmodel_inputsoutputsnext_token_logitsZnext_tokens_scoresnext_tokensr   r   r   rZ      s2  














z8PretrainedModelStreamingOutputMixin.stream_greedy_searchlogits_warperc              	   k   s   |d ur|nt  }|d ur|nt }|d ur"tdt t||}|d ur(|nt  }|d ur1|n| jj}|d ur;|n| jj}t	|t
rG|g}|d urTt||jnd }|
d ur\|
n| jj}
|d urf|n| jj}|	d urp|	n| jj}	|d urz|n| jj}|r|
rdnd }|r|rdnd }|r|rdnd }|r|	rdnd }|r| jjr|r|d dnd }|	r|d dnd }tj|jd tj|jd}d}	 |rt|rd
nd|j}tj|tjjd | d
krd S | ||}| j|fi |}| di |d	||	d}|r|rq|j d d dd d f }|||}|||}|r]|
r.||f7 }|rK|| jjr;|j!fn|j"f7 }| jjrK||j#f7 }|	r]|| jjrX|j$fn|j%f7 }t&j'j(|dd}tj)|dd*d}|d ur|d u r}t+d|| |d|   }tj,||d d d f gdd}|r| jjrt-|||||||dV  nt.||||dV  n|V  | j/||| jjd}|d ur|0|1|jd d2|3dj4dd}|5 dkrd	}|||rd	}|r|sd S q)Nz`max_length` is deprecated in this function, use `stopping_criteria=StoppingCriteriaList(MaxLengthCriteria(max_length=max_length))` instead.r   ro   rp   rq   r   rr   FTrt   ru   rv   rx   ry   rz   rD   )Znum_samplesr|   r}   r   r   )6r   r   r   r   r   r   r   rh   ri   r+   rV   r<   r   r   rs   rl   rj   rk   rm   r   r   r   r   r   r   r   r   r   r   r   Z_get_initial_cache_positionr   r   r   rp   r   r   rq   r
   Z
functionalZsoftmaxZmultinomialZsqueezer   r   r   r   r   r   r   r   r   r   r   )r   rd   re   rf   r   rg   rh   ri   rj   rk   rl   rm   rn   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Znext_token_scoresZprobsr   r   r   r   rc   t  s>  















z1PretrainedModelStreamingOutputMixin.stream_sample)
NNNNNNNNNF)NNNNNNNNNNF)r   r    r!   r   r   r   r   rX   staticmethodr<   Z
LongTensorr   r   r   rV   r   r   boolrZ   rc   r   r   r   r   rW      s    	
 .	
rW   r,   c                 C   s6   t | }|tf}t |j|i | j}|j| j |S r8   )rP   rW   r   r   __dict__update)r,   Zpretrained_classZparent_classesZ	new_modelr   r   r   add_stream_generate+  s   r   )&ra   r   
contextlibr   typingr   r   r   r   r   r   r<   Ztorch.distributeddistributedr   r]   	packagingr	   r
   r   Ztransformers.generationr   r   r   r   r   r   r   Zmodelscope.pipelines.baser   Zmodelscope.utils.constantr   Zmodelscope.utils.devicer   r   r"   rW   r   r   r   r   r   <module>   s.         