o
    rqi                     @   s  U d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
mZ d dlZd dlm  mZ d dlmZ d dlmZ d dlmZ d dlmZmZ dd	lmZ dd
lmZ ddlmZm Z m!Z!m"Z"m#Z#m$Z$ ddlm%Z%m&Z&m'Z'm(Z(m)Z) e Z*dZ+i Z,ee-ee-ef f e.d< 			d&de-dede	e- de	e/ ded ddfddZ0dee/ dee/ee/ f dee/ fddZ1	d'de)d e-de
e-e%ee- f fd!d"Z2G d#d$ d$eZ3G d%d dZ4dS )(    N)deepcopy)AnyDictListLiteralOptionalTupleUnion)
get_logger)Module)pad_sequence)PreTrainedTokenizerBaseStoppingCriteria   )loss_scale_map)get_tools_prompt)
load_batch
load_imagerescale_image	fetch_one	to_devicedecode_base64)HistoryPrompt	StopWordsContextMessageszYou are a helpful assistant.TEMPLATE_MAPPINGdeletetemplate_type	tokenizerdefault_system
max_lengthtruncation_strategyr   Ztruncation_leftreturnTemplatec                 K   s0   t |  }t|d }|j||||fi | |S )Ntemplate)r   r   init_template)r   r    r!   r"   r#   kwargsZtemplate_infor'    r*   n/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/modelscope/preprocessors/templates/base.pyget_template   s   r,   
token_listsub_token_listc                 C   st   t |tr|g}g }d}z#	 | |d |d }t|dks*|| ||t|  kr/|| q ty9   Y |S w )z,Find the index of a token in the token_list.Tr   r   )
isinstanceintindexlenappend
ValueError)r-   r.   residxr*   r*   r+   _findall'   s   
$
r8   <img>(.+?)</img>messagesreplace_tokenc                 C   s   g }g }t | D ]4\}}| }|d d u s|d dv r"|| q|t||d 7 }t|||d |d< || q| |fS )Ncontentrole)toolsystemZ	assistant)	enumeratecopyr4   refindallsub)r:   r;   patternimages_pathZnew_messagesimr*   r*   r+   replace_img_tag7   s   rI   c                   @   s@   e Zd ZdZdededdfddZdejd	ejde	fd
dZ
dS )StopWordsCriteriazAdding extra stop words in template to prevent unstoppable generation
        Like suffixes and chat seps in the template.
    r    
stop_wordsr%   Nc                 K   s   || _ || _|| _d| _d S )Nr/   )r    rK   tokenizer_kwargs	start_idx)selfr    rK   rL   r*   r*   r+   __init__K   s   
zStopWordsCriteria.__init__	input_idsscoresc                 K   s   | j dkrt|d d | _ | j}| j}|j|d| j d f dd  fi | j}|D ]'}t|tr:||v r9 dS q+t|dkrR|d  t| d  |krR dS q+dS )Nr/   r   r   iTF)	rM   r3   r    rK   decoderL   r0   strtolist)rN   rP   rQ   r)   r    rK   textZ	stop_wordr*   r*   r+   __call__Q   s   
*
*zStopWordsCriteria.__call__)__name__
__module____qualname____doc__r   r   rO   torchTensorboolrV   r*   r*   r*   r+   rJ   G   s    rJ   c                   @   s  e Zd ZdZg dZg dZdZdgZdZdZ	dZ
							
	ddededee dedee dee dededee ded ded ddfddZededefddZededefddZed ed!ee dee fd"d#Z			$	%	&dd edee d'ee d(ed) d*ed+eddfd,d-Zd.ed/edeeef fd0d1Zd2eeef ddfd3d4Zd2eeef ddfd5d6Zdd7d8Zd9d: Zd;d< Z dd2eeef d=ed>ede!eeef eeef f fd?d@Z"d2eeef de!eeef eeef f fdAdBZ#					ddCe$e% dDe$e% dEe$e& dFee dGee dHee dIee dJeddfdKdLZ'dCe$e% dEe$e& de!e$e% e$e& f fdMdNZ(edCe$e% dEe$e& de!e$e% e$e& f fdOdPZ)dQdR Z*dSedT dUed2eeef de$e% fdVdWZ+dUed2eeef de$e% fdXdYZ,dUed2eeef de$e% fdZd[Z-e.d\e$eeef  d]e$e d^ed_ ddfd`daZ/dCe$e% dEe$e& de!e$e% e$e& f fdbdcZ0	ddCe$e% dEee$e&  de!e$e e$e e$e& eeef f fdddeZ1edfe$e dge$e ddfdhdiZ2	ddje$eeef  d(edede!eeef eeef f fdkdlZ3dmedeeef fdndoZ4dpeeef dqeeef ddfdrdsZ5e	t	
ddue$e6j7 dve&dedw de6j7fdxdyZ8ddze$eeef  d{ee deeef fd|d}Z9e.d~e6j7dede$e fddZ:ed~e$e dede$e fddZ;ededefddZ<e.ddHedededefddZ=	ddddddd~e$e dedpeeeef  dedee$e  dee$e  fddZ>dHed2e?defddZ@dS )r&   a  A template class for all supported models.

    Args:
        prefix: Prefix tokens before the first turn's prompt
        prompt: A list of elements whose types are str and list of integers. The input query part of every turn.
        chat_sep: The chat separators between every turn.
        suffix: The end tokens after the chat finished.
        default_system: A default system instruction.
        system_prefix: The prefix if the `system` is not empty.
        auto_add_bos: By default, the bos_token is not added. The auto_add_bos option will determine
            whether to add it based on `tokenizer.encode('')`.
        tools_prompt: The tools prompt name
        tool_prompt: The tool prompt, usually useful when there is a tool role
        padding_side: The padding side
        infer_media_type: The media type supported by the multi-modals
        Examples:
            <start_of_output>system
You are a helpful assistant!<end_of_output>
<bos><start_of_output>Who are you?<end_of_output>
<start_of_output>assistant:I am a robot<end_of_output>
<start_of_output>Who are you?<end_of_output>
<start_of_output>assistant:I am a robot<end_of_output> # noqa
                                     ----------system------------                                       ---query----                                            --response- -----chatsep-----                 ---query---                                             --response- ----suffix-----
            ----------------------------system_prefix---------------------------- ---------------------------- prompt -------------------------------------                                  ---------------------------- prompt -------------------------------------

    )<image><video><audio><bbox><ref-object>)imagesvideosaudiosobjects	norm_1000r^   TFNreact_enright
interleaveprefixpromptchat_sepsuffixr!   system_prefixauto_add_bostools_prompttool_promptpadding_side)leftri   infer_media_type)rj   Zdialogueroundr%   c                 C   s   |||||fD ]}|d u st |tsJ q|dkrd }| |r/|d u s(J d|}| |}|| _|| _| jd u rKtdd |D sK|d u sKJ d|| _|| _| jd u| _	|| _
|| _d| _|| _d| _|| _|	d uro|	n| j| _|
| _|| _d S )N z'The prefix already contains {{SYSTEM}}.c                 S      g | ]}d |v qS 
{{SYSTEM}}r*   .0contextr*   r*   r+   
<listcomp>       z%Template.__init__.<locals>.<listcomp>z'The template does not support `system`.TF)r0   list_has_system_replace_systemrk   ro   anyrl   rm   support_multi_roundrn   r!   use_default_systemrp   _is_initrq   rr   rs   ru   )rN   rk   rl   rm   rn   r!   ro   rp   rq   rr   rs   ru   xr*   r*   r+   rO      s2   


zTemplate.__init__c                 C   s   dd | D S )zReplace system with the c                 S   s    g | ]}d |v r| d dqS )rz   rw   )replacer|   pr*   r*   r+   r~      s     z,Template._replace_system.<locals>.<listcomp>r*   rk   r*   r*   r+   r      s   zTemplate._replace_systemc                 C   s   t dd | D S )Nc                 S   rx   ry   r*   r   r*   r*   r+   r~      r   z(Template._has_system.<locals>.<listcomp>)r   r   r*   r*   r+   r      s   zTemplate._has_systemr    valuec                 C   sb   |du rdS g }|D ]$}t |tr)g }|D ]}t |tr!t| |}|| q|}|| q
|S )zRTurn `eos_token_id` to token id

        e.g. [['eos_token_id']] -> [[2]]
        N)r0   r   rS   getattrr4   )r    r   Z	res_valuevZres_vZsub_vr*   r*   r+   token_attr_to_id   s   


zTemplate.token_attr_to_idr   defaultr/   r"   r#   r$   
loss_scaler   c           
      K   s   | j du s	J dd| _ || _t|dd| _|dkrd| _n|dur5| jdus2J dt| dd || _|| _|| _t|t	rHt
|d| _n|| _|| _d	D ]}t| |}	| ||	}	t| ||	 qPdS )
a  Init template by a tokenizer
        Args:
            tokenizer: The tokenizer to tokenize the sentence
            default_system: The default system to use if the dataset does not provide one
            max_length: Max length of the sequence
            truncation_strategy: The truncation strategy
            loss_scale: The loss scale function to use
            rescale_image: Rescale image to reduce memory usage, default `-1` means no limitation
        Fz"The template has been initialized.Tis_multimodalNrw   7The template does not support `system`, template_type: r   )rk   rl   rm   rn   ro   )r   r    r   r   r!   ro   r"   r#   r0   rS   r   getr   r   r   setattr)
rN   r    r!   r"   r#   r   r   r)   keyr   r*   r*   r+   r(      s,   

zTemplate.init_templatemodeldatac                 C      i S )a  This method will be called after data_collator and before the forward
        Args:
            data: The `_data` field from the example batch, this field should be packed manually
        Returns:
            Any extra fields need to be passed into the model.forward
        r*   )rN   r   r   r*   r*   r+   post_encode   s   zTemplate.post_encodeexamplec                 C   s   dS )zCheck example validNr*   )rN   r   r*   r*   r+   check_example      zTemplate.check_examplec              	      s  |d }dD ]\}}| |rdd |D  t }|d dks"J  fddt|d D }| jdkrtt|d ||| D ]A\}}}	tt||d d	 }
|	rt|
d
kscJ d|
 d|d  |
dkrs||d d	  |d d	< q@|
dksJ d|d  q@dd || D ||< qtt|ddd |D }
dd || D ||< t|| }||
 }|dksJ d| d|
 || |d d d	  |d d d	< qdS )a:  Add default tags to example, this is for the multi-modal datasets
            1. For the round infer_media_type, this method will check the tag equals with the chat round
            2. Else, this method will try to add tags to the head of the messages
        Args:
            example: The input example
        r:   ))rd   r_   )rc   r^   )re   r`   c                 S      g | ]
}|d  dkr|qS r=   r?   r*   r|   messager*   r*   r+   r~         z-Template.add_default_tags.<locals>.<listcomp>   r   c                    s   g | ]
} ||d   qS )r   r*   r|   rG   Z	_messagesr*   r+   r~   	  r   rv   r<   r   zMThe model includes at most one media per round. However, this round contains z media_tags. query: zMissing media. query: c                 S      g | ]}|r|qS r*   r*   r|   rH   r*   r*   r+   r~     r   
c                 S   s   g | ]}|d  d qS )r   r<   r*   )r|   hr*   r*   r+   r~         c                 S   r   r*   r*   r   r*   r*   r+   r~     r   zNumber of media: z, number of media_tags: N)r   r3   rangeru   ziprB   rC   join)rN   r   r:   	media_keyZ	media_tagZn_roundhistoryrG   r   rH   Znum_media_tagsZ	num_mediaZnum_new_tagsr*   r   r+   add_default_tags   s>   

"
 $zTemplate.add_default_tagsc                 C   s   | j dv r't|dd\|d< }|dr|rtd|dp$g | |d< | j dv rVtddgdd	gd
dgD ]\}}}t|d||\|d< }||pRg | ||< q8dS dS )zzReplace the <img></img> with the images key and <image> tag

        Args:
            example: The input example
        >   TNr:   r^   rc   z3Do not mix use the <img></img> tag and <image> tag.re   rd   r`   r_   z<audio>(.+?)</audio>z<video>(.+?)</video>N)r   rI   r   r5   r   )rN   r   rF   ktagrE   Zmedias_pathr*   r*   r+   replace_media_tags  s"   


zTemplate.replace_media_tagsc                    sX  dddd}|  D ]}||r!t|| ttfs!|| g||< q
 |  | |drct|d trct	|d |d< g }|d D ]}t|trY|d |d ddd	}|
| qG||d< |dpig }|r|dst jryt|t}|dr j|d | jd
  jr jdkr fdd|D } jst|dd }||d< dS dS )ap  Preprocess multi-modal media resources in one example
            1. Wrap all values in media keys to list
            2. Replace <img></img> tags
            3. Add or check missing tags to examples
            4. Parse the string field in the `objects` field to jsons
            5. Load images if needed
        Args:
            example: The input example
        re   rc   rd   )audioimagevideorf   r   r   N)captionbbox	bbox_typer   to_typerealc                    s   g | ]}t | j qS r*   )r   )r|   imgrN   r*   r+   r~   i  r   z.Template._preprocess_media.<locals>.<listcomp>)rc   )valuesr   r0   tupler   r   r   rS   jsonloadsr4   load_mediasr   r   normalize_bboxgrounding_typer   )rN   r   Zmultimodal_keysr   rf   objectrc   r*   r   r+   _preprocess_media6  sD   




zTemplate._preprocess_mediac                 C   s*  |  }t| dd }|dpg }| jstd|d }dd |D }dd |D }|r2|d d	 nd
}|s=| jr<| j}n| jd usIJ d| |rct|t	rUt
|}|d u r[d
}|t|| j7 }|rs|smdd dg}||d d	< t|dkr| jsJ d| || |d< | | | | |S )Nr   toolsz[Template is not initialized, please use the `get_template` function to obtain the template.r:   c                 S      g | ]
}|d  dkr|qS r   r*   r   r*   r*   r+   r~   z  r   z'Template.preprocess.<locals>.<listcomp>c                 S   r   r   r*   r   r*   r*   r+   r~   {  r   r   r<   rw   r   r?   )r=   r<   r   z?The template does not support multi-round chat, template_type: )rA   r   r   r   r5   r   r!   ro   r0   rS   r   r   r   rq   r3   r   r   r   )rN   r   r   r   r:   Zsystem_roundr?   r*   r*   r+   
preprocessn  sH   



zTemplate.preprocess	streamingis_trainingc           	      K   sz   |  |}| j|fi |}|d }|s7d|v r7|d}|dus#J |d}t||j}|| || |s;|S |S )a  The entrance method of Template!

        Args:
            example: The input example
            streaming: If is streaming mode
            is_training: Use template in training
            **kwargs:
                model: The model instance, use only in `is_training=False`
        Returns:
            if not streaming mode, returns tuple of (example, tokenizer_kwargs), else return example only
        r   _datar   N)r   _encoder   popr   Zdeviceupdater   )	rN   r   r   r   r)   r6   inputsr   r   r*   r*   r+   encode  s   


zTemplate.encodec                    s^    d }t  fddtjD }| j|| j| j| d\}}|ddu r+|dd ||fS )z return: inputs, tokenizer_kwargsr:   c                    s   g | ]}  |qS r*   r   )r|   r   r   r*   r+   r~         z$Template._encode.<locals>.<listcomp>)rp   is_multi_modalr   labelsNr   )r   r&   special_keys_concat_and_tokenizer#   rp   r   r   )rN   r   r)   r:   r   r   rL   r*   r   r+   r     s   
zTemplate._encodecontext_listres_context_listloss_scale_listr?   queryresponseround0compute_lossc	                 C   s   d}	|durt |d }	t |}|D ]a}
t|
t rbd|
kr@|dus#J |r.| ||\}}n|gdg}}|| || qg d}||||	g}t||D ]\}}|dura||
v ra|
||}
qOt|
dkriq||
 |d qdS )z+Concat context list and replace placeholderNr   {{RESPONSE}}        )rz   z	{{QUERY}}z
{{ROUND0}}z
{{ROUND1}}r   )rS   r0   r   extendr   r   r3   r4   )rN   r   r   r   r?   r   r   r   r   Zround1r}   Zcontent_partZweight_partZold_str_listZnew_str_listZold_strZnew_strr*   r*   r+   _concat_context_list  s2   



zTemplate._concat_context_listc                 K   s  | dd}|r| ||\}}| j||fi |\}}g }g }g }d}tt||D ]C\}	\}
}t|
tr@||kr@||
 q+t|dkrW|d	| || |
  t|
trb||
 n
||
 || |}q+t|dkr|d	| || ||fS )z4Merge anything in the context to simplify the inputsr   Fr   r   rw   )r   split_special_tokenspre_tokenizer@   r   r0   rS   r4   r3   r   clear)rN   r   r   r)   r   r6   res_loss_scaletempZtemp_loss_scalerG   r}   r   r*   r*   r+   _simplify_context_list  s0   




zTemplate._simplify_context_listc           	      C   s   ddl m} g }g }t| |D ]B\}}g }tt|trG||tjD ]}||d |d g q"dd |D }|| ||gt	|  q|
| |
| q||fS )z`Split special tokens, for example `<image>`, `<video>`, this will help the replace_tag operationr   )split_str_parts_byr   r<   c                 S   r   r*   r*   )r|   cr*   r*   r+   r~     r   z1Template.split_special_tokens.<locals>.<listcomp>)utilsr   r   r0   r   rS   r&   special_tokensr   r3   r4   )	r   r   r   r6   Zloss_scale_resr}   r   Zcontextsdr*   r*   r+   r     s   

zTemplate.split_special_tokensc                 K   s   | j |fddd|d S )NF)Zreturn_attention_maskZadd_special_tokensrP   )r    )rN   r}   rL   r*   r*   r+   	_tokenize  s   zTemplate._tokenize
media_typer   r   r   r2   c                 C   s.   |dkr| j S |dkrdgS |dkrdgS dS )a  Override this function to do your own replace operation.

        This method is used to replace standard tags like `<image>` to some tokens that the model needs.

        Args:
            media_type: The modal.
            index: The index of the medias, for example 0 represents the first elements in `images`
            example: The input example

        Returns:
            The content or input_ids after replacement.
        r   r   r_   r   r`   N)image_placeholder)rN   r   r2   r   r*   r*   r+   replace_tag  s   zTemplate.replace_tagc                 C   s&   | d}|r|| }|d gS dgS )aY  Replace objects referenced by the bbox to contents or input_ids. This is useful in the grounding task.
        Override this function to do your own replace operation.

        Args:
            index: The index in the `objects` key
            example: The input example

        Returns:
            The contents or input_ids replaced
        rf   r   rb   r   )rN   r2   r   rf   object_r*   r*   r+   replace_object1  s
   

zTemplate.replace_objectc                 C   s   | d}|r_|| }t|d d tr?d}|d D ]}|d|d  d|d  d|d	  d|d
  d	7 }q|dd }|gS d|d d  d|d d  d|d d	  d|d d
  d	gS dgS )aW  Replace bbox pointing to the objects to contents or input_ids. This is useful in the grounding task.
        Override this function to do your own replace operation.

        Args:
            index: The index in the `objects` key
            example: The input example

        Returns:
            The contents or input_ids replaced
        rf   r   r   rw   z[(,r   z),(r      z)],Nr/   z)]ra   )r   r0   r   )rN   r2   r   rf   r   Zall_objectsZ
sub_objectr*   r*   r+   replace_boxC  s   
4@zTemplate.replace_boxrf   rc   r   )r   rg   norm_1c                    s  |r|sdS |D ]}|d }|d }|d }|| }|dkrg dkr#q|j |j}	}
t|d trPg }|D ]}| fddt||	|
|	|
gD  q5||d< n fd	dt||	|
|	|
gD |d<  |d< q|d
kr d
krpq dkr~dd |D |d< n dkr|j |j}	}
dd t||	|
|	|
gD |d<  |d< q|dkrԈ dkrq d
krdd |D |d< n dkr|j |j}	}
dd t||	|
|	|
gD |d<  |d< qdS )a]  Normalize bbox to needed.
        to_type support real/norm_1000/norm_1, which literally means the coordinates in real, or normalized by 1000,
            or normalized by 1.

        Args:
            objects: The objects containing the bbox
            images: The images list
            to_type: The coordinate type needed by the model.
        Nr   r   r   r   r   c                    0   g | ]\}} d krt || d n|| qS rg     r1   r|   coorddimr   r*   r+   r~   w      z+Template.normalize_bbox.<locals>.<listcomp>c                    r   r   r   r   r   r*   r+   r~   }  r  rg   r   c                 S   s   g | ]}|d  qS g     8@r*   r|   r   r*   r*   r+   r~     r   c                 S   s    g | ]\}}t |d  | qS r  r   r   r*   r*   r+   r~     s    c                 S   s   g | ]}t |d  qS )r   r   r  r*   r*   r+   r~     r   c                 S   s   g | ]
\}}t || qS r*   r   r   r*   r*   r+   r~     r   )widthheightr0   r   r4   r   )clsrf   rc   r   r   r   r   r7   r   r  r  Zbboxes_boxr*   r   r+   r   \  sX   






 zTemplate.normalize_bboxc                 K   s  | d}g }g }dD ]	}d|| d< qt||D ]l\}}	dD ]#}|d| dkrC| ||| d |}
|| d  d7  <  n6q |dkr]| | d	d|}
| d	dd |d	< n|d
krv| | dd|}
| ddd |d< n|g}
||
7 }||	gt|
 7 }q||fS )aA  This method happens before tokenization, replace standard tags to the contents or input_ids needed by
        the model.

        Args:
            context_list: The content list
            loss_scale_list: The loss scale list
        Returns:
            The context_list and loss_scale_list after replacement.
        r   r   r   _index<>r   rb   Zobject_indexra   Z	box_index)r   r   r   r   r   r3   )rN   r   r   r)   r   r6   r   r   r}   r   Zc_listr*   r*   r+   r     s,   
zTemplate.pre_tokenizec                 C   s   g }g }g }i }|du rdgt | }tt||D ]D\}\}}	t|tr:| |}
| ||
 | j|fi |
}n|}||7 }|| dkrK||7 }n	|dgt | 7 }||	gt |  q||||fS )z+return: input_ids, labels, tokenizer_kwargsNr   )	r3   r@   r   r0   rS   _get_tokenizer_kwargs_concat_tokenizer_kwargsr   r   )rN   r   r   rP   r   r   rL   rG   r}   Zloss_weightcurr_tokenizer_kwargsr-   r*   r*   r+   _encode_context_list  s$   


zTemplate._encode_context_listr   suffix_tokens_idc                 C   s   t |}d}tdt | D ]4}| |d  dkr| | dkr|}|dkrA| |d  dkrA| | dkrA|| }||krA|| ||| < qd S )Nr   r   r  )r3   r   )r   r  Z
suffix_lenstartrG   lengthr*   r*   r+   use_dynamic_eos  s   $zTemplate.use_dynamic_eosr:   c                     s  dd  D }dd  D  t |dkr|d d }nd}t  dks%J t  dkrK d d	kr>d d g}d d g}n5 d dg} d dg}n(t  d
 dksUJ  fddtt  d
 D } fddtt  d
 D }g }g }	|r| jj}
t|
tr|
| jdv r||
g |	d | j	 }|du rdd |D }|du st
dd |D r| j}n| j}| j|||	|d tt||D ]v\}\\}}\}}|dkr| j	 n|	 }g }d}|t |d k rdd |D }|d ||d  d r| j}n|dur|d | j}d}|s|r?| j|||	||||| jp)|d ||7 }|	|r7dgndgt | 7 }	qi }| jrt |t|d d du }t |}tddgt|| |td|| gD ]9\}}| j|| |	| fi |\}}| ||\}}}}|||| d< || d< | jr||| d< qk|d |d   }|d! |d"  }|d d du rt |d" dksJ d|d"< n'| j||	fi |\}}	| ||	\}}}}|dur| || | jd  |d d du r d}| jdurH|d#kr&t || jkr&td$t | d%| j d& i i fS || j d }|dur;|| j d }|durH|| j d }||d'< ||d(< | jrX||d)< ||fS )*z2
        return: inputs, tokenizer_kwargs
        c                 S   r   r   r*   r   r*   r*   r+   r~     r   z1Template._concat_and_tokenize.<locals>.<listcomp>c                 S   r   r   r*   r   r*   r*   r+   r~     r   r   r<   Nr   r=   r   r   c                    (   g | ]} | d   |d  d  gqS )r<   r   r*   r   r:   r*   r+   r~        ( c                    r  )r=   r   r*   r   r  r*   r+   r~     r  rw   r   c                 S      g | ]}d |vr|qS ry   r*   r{   r*   r*   r+   r~   
  r   c                 S   rx   ry   r*   r{   r*   r*   r+   r~     r   )r?   r>   Fc                 S   r  ry   r*   r{   r*   r*   r+   r~     r   r   T)r   r   r?   r   r   g      ?r/   Zanswerrl   Z
_input_idsZ_labelsZ_loss_scaleZprompt_input_idsZanswer_input_idsZprompt_labelsZanswer_labelsr   zCurrent length of row(z ) is larger than the max_length(z), deleted.rP   r   r   )r3   r   r    bos_token_idr0   r1   r   r4   rl   rA   r   rk   ro   r   r@   r   rr   rm   rn   compute_per_round_lossoutput_prompt_answerr]   slicer   r  r   r  r"   loggerwarn) rN   r:   r#   rp   r)   r?   r   Zhistory_rolesr   r   r  rl   rk   rG   qrZqrrrr   Zextra_context_listZ	is_suffixr   Z
answer_lenZ	total_lenr   Z_sliceZ_res_context_listZ_loss_scale_listrP   r   r   rL   r*   r  r+   r     s   

"



	





zTemplate._concat_and_tokenizer}   c                 C   r   )zreturn: curr_tokenizer_kwargsr*   )rN   r}   r*   r*   r+   r  [  r   zTemplate._get_tokenizer_kwargsrL   r  c                 C   s   t |dksJ d S )Nr   )r3   )rN   rL   r  r*   r*   r+   r  _  s   z!Template._concat_tokenizer_kwargsr   	sequencespadding_value)ri   rt   c           
      C   s   |dk}|rt | d|dS tdd | D }g }| D ]'}||d }dg| d d  |dg }t|t|d	|}	||	 qt	|S )
zPad sequence by some side

        Args:
            sequences: The input sequences in tensor.
            padding_value: The padding value
            padding_side: The padding side

        Returns:
            A tensor after padding
        ri   T)Zbatch_firstr#  c                 S   s   g | ]}| d qS )r   )size)r|   sr*   r*   r+   r~   t  r   z)Template.pad_sequence.<locals>.<listcomp>r   r   r   constant)
r   maxr$  r  Fpadr   r4   r[   stack)
r"  r#  rs   padding_rightmax_lenZpadded_sequencesseqZ
pad_lengthZ	pad_tupleZ
padded_seqr*   r*   r+   r   b  s   
zTemplate.pad_sequencebatch
padding_toc                    sD  | j }|jdus
J | jdk}i }d|d v r2dd |D |d< fddttD |d< n d	|d v rRd
d |D   |d	<  fddtt D |d< dD ]|d v rgfdd|D |< qT|durd	|v srJ ||d	 d jd  }|dkrtg d|jddddgD ] \}|v rt| d |rd|fn|dfd|| d< qtg d|jdddddgD ]\}|v r| 	| || j|< qd|d v rdd |D |d< dd |D }t|dkrt
||d< dd |D }	t|	dkrt
|	|d< dd |D }
t|
dkr t
|
|d< |S )z
        Args:
            batch(`List[Dict[str, Any]]`): The input data in batch
            padding_to(`int`, optional): Whether padding the batch to a fixed length, if none, the batch
                will be padded to the `longest`
        Nri   inputs_embedsr   c                 S      g | ]}|d  qS r0  r*   r|   br*   r*   r+   r~     r   z*Template.data_collator.<locals>.<listcomp>c                    s&   g | ]}t j | jd  t jdqS )r   Zdtype)r[   onesshapeint64r   r2  r*   r+   r~     s    attention_maskrP   c                 S   s   g | ]	}t |d  qS rP   r[   Ztensorr3  r*   r*   r+   r~         c                    s$   g | ]}t jt | t jd qS )r5  )r[   r6  r3   r8  r   r:  r*   r+   r~     s   $ )r   r   position_idsc                    s   g | ]	}t |  qS r*   r;  r3  )r   r*   r+   r~     r<  r/   )rP   r9  r   r   r=  r  r   r&  )rP   r0  r9  r   r   r=  r   c                 S   r1  )r   r*   r3  r*   r*   r+   r~     r   c                 S   "   g | ]}| d dur|d  qS )pixel_valuesNr   r3  r*   r*   r+   r~        " r?  c                 S   r>  )image_sizesNr   r3  r*   r*   r+   r~     r@  rA  c                 S   r>  )pixel_values_videosNr   r3  r*   r*   r+   r~     r@  rB  )r    Zpad_token_idrs   r   r3   r7  r   r(  r)  r   r[   concat)rN   r.  r/  r    r+  r6   Zpadding_lenr   r?  rA  rB  r*   )rP   r0  r   r+   data_collator  s`   


 zTemplate.data_collatorgenerate_idsinput_token_lenc                 C   sF   t |tjr
| }t|dkrt |d ttfr|d }| ||S )Nr   r   )r0   r[   r\   rT   r3   r   r   _get_generate_ids)r  rE  rF  r*   r*   r+   get_generate_ids  s
   zTemplate.get_generate_idsc                 C   s   | |d  S Nr*   )rE  rF  r*   r*   r+   rG    s   zTemplate._get_generate_idscpc                 C   s   d|   kr
dks[n d|   krdks[n d|   kr dks[n d|   kr+dks[n d	|   kr6d
ks[n d|   krAdks[n d|   krLdks[n d|   krXdkr]dS  dS dS dS )z6Checks whether CP is the codepoint of a CJK character.i N  i  i 4  iM  i   iߦ i  i? i@ i i  i i   i  i  i TFr*   )rJ  r*   r*   r+   _is_chinese_char  s   BB(zTemplate._is_chinese_char	print_idxis_finishedc                 C   sX   |rt |S |dst |dkr | t|d r t |}|S t|dd |}|S )Nr   r   r/    r   )r3   endswithrK  ordr'  rfind)r  r   rL  rM  r*   r*   r+   _get_safe_print_idx  s   (zTemplate._get_safe_print_idx)rL   return_deltarL  first_num_spacerS  rT  c                C   s  |d u ri }| j }t|dr| }t| jd tr;|r/|r;|t| jd  d  | jd kr;|d t| jd   }|rJ|rP|dd  | j jgkrP|d d }|j|fi |}|d ur|}	|d }t|t|	d }
|sz|dkrz|
}||	d< |
|k rd||
  | }n|
|kr||
| d  }t| jd t
r|r|r|t| jd  d  | jd krtt|t| jd  d}|d urt||d }|d | }|d ur|d }|s| ||d |d< |d |d  }|r||d  }|S |r|rJ |S )NrT   r/   r   rN  )r    hasattrrT   r0   rn   r   r3   Zeos_token_idrR   lstriprS   r'  rR  )rN   rE  rM  rL   rS  rL  rT  r    r   Zres_fnsZcur_num_spacer7   Zold_print_idxr*   r*   r+   generate_ids_to_response  sX   
"
"z!Template.generate_ids_to_responsec                 C   s   |S rI  r*   )rN   r   r   r*   r*   r+   post_process_generate_response  s   z'Template.post_process_generate_response)NNFrh   Nri   rj   )NNr   r   r/   )r%   N)FF)NNNNTrI  )F)r   ri   )T)ArW   rX   rY   rZ   r   r   r   r   r   r  r  r   r   rS   r]   r   rO   staticmethodr   r   r   r   r1   r(   r   r   r   r   r   r   r   r   r   r   r   r   r   r   floatr   r   r   r   r   r   r   classmethodr   r   r  r  r   r  r  r[   r\   r   rD  rH  rG  rK  rR  rW  dictrX  r*   r*   r*   r+   r&   c   st   	

' 
)	
"8<+2		
$
!


"":
(
"
 
v&09 


	
7)NNr   )r9   )5r   rB   rA   r   typingr   r   r   r   r   r   r	   r[   Ztorch.nn.functionalnnZ
functionalr(  Z
modelscoper
   Ztorch.nnr   Ztorch.nn.utils.rnnr   Ztransformersr   r   r   r   rq   r   r   r   r   r   r   r   r   r   r   r   r   r   r  ZDEFAULT_SYSTEMr   rS   __annotations__r1   r,   r8   rI   rJ   r&   r*   r*   r*   r+   <module>   sT   
$ 
*
