o
    )i>                     @   s   d dl mZ d dlmZmZ d dlZd dlmZ d dl	m
Z
mZ d dlmZ d dlmZmZ eeZedG d	d
 d
eZdS )    )Sequence)OptionalUnionN)PreTrainedTokenizerBase)ChatCompletionRequestDeltaMessage)init_logger)ReasoningParserReasoningParserManagerZgranitec                       s$  e Zd ZdZdef fddZdededee	e e	e f fdd	Z
d
edededee dee dee deedf fddZdedefddZdedefddZdededefddZdedededefddZdedededededefdd Zdedee	e e	e e	e f fd!d"Z  ZS )#GraniteReasoningParserz
    Reasoning parser for IBM Granite.

    IBM granite models currently use "Here is my thought process:"
    and "Here is my response:" to separate its thinking / response outputs.
    	tokenizerc                    sr   t  | d| _d| _t| j d| j dtj| _ddg| _ddg| _	d	| _
d
| _tdd | jD | _d S )Nz&(?:Here's|Here is) my thought process:z(?:Here's|Here is) my response:z(.*?)z(.*)zHere's my thought process:zHere is my thought process:zHere's my response:zHere is my response::ZHerec                 s   s    | ]}t |V  qd S N)len.0think_start r   s/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/reasoning/granite_reasoning_parser.py	<genexpr>4   s    
z2GraniteReasoningParser.__init__.<locals>.<genexpr>)super__init__Zthink_start_exprZresponse_start_exprrecompileDOTALLreasoning_regexvalid_think_startsvalid_response_startsseq_boundary_endseq_boundary_startmaxZlongest_think_start)selfr   	__class__r   r   r      s    zGraniteReasoningParser.__init__model_outputrequestreturnc                 C   s8   | j |}|sd|fS |d \}}|s|dfS ||fS )a  Extract the reasoning content & content sections, respectively.
        If the sequence doesn't match what we expect, i.e., the model generates
        something else, all content is considered non-reasoning content.

        Args:
            model_output (str): Output of the model to be parsed.
            request (ChatCompletionRequest): Request being processed.

        Returns:
            tuple[Optional[str], Optional[str]]: Tuple pair containing the
            reasoning content and non-reasoning content.
        Nr   )r   findall)r!   r$   r%   re_matchreasoning_contentresponse_contentr   r   r   extract_reasoning_content7   s   z0GraniteReasoningParser.extract_reasoning_contentprevious_textcurrent_text
delta_textprevious_token_idscurrent_token_idsdelta_token_idsNc                 C   sh   |  |\}}}	|s| ||}
n|	s| |||}
n|dus!J | |||	||}
|
js2|
js2dS |
S )a  Extract the reasoning content / content emitted by granite models;
        If the sequence doesn't match what we expect, i.e., the model generates
        something else, all content is considered non-reasoning content.

        NOTE: Granite models do not use a special token to start their reasoning
        and response sections; instead they have token sequences, e.g.,

                Here is my thought process: Foo Here is my response: Bar

        This increases the complexity of correctly handling streams, since we
        need to watch for specific sequences and correctly parse them without
        dropping content that is potentially overlapping & spanning multiple
        delta messages.

        Args:
            previous_text (str): Previous text outside of this delta message.
            current_text (str): Previous text + delta text.
            delta_text (str): Text to consider and parse content from.
            previous_token_ids (Sequence[int]): Token IDs of previous_text.
            current_token_ids (Sequence[int]): Token IDs of current_text.
            delta_token_ids (Sequence[int]): Token IDs of delta_text.

        Returns:
            Union[DeltaMessage, None]
                DeltaMessage with either reasoning content or content, or None.
        N)_get_content_sections+_get_delta_message_with_no_reasoning_bounds*_get_delta_message_with_no_response_bounds#_get_delta_message_with_both_boundscontentr)   )r!   r,   r-   r.   r/   r0   r1   r)   Zresp_seq_lenr6   Zdelta_messager   r   r   #extract_reasoning_content_streamingN   s&   #
z:GraniteReasoningParser.extract_reasoning_content_streamingtextc                       t  fdd| jD S )zCheck if a text matches one of the possible start reasoning seqs.

        Args:
            text (str): Text to check for leading substr.
        
        Returns:
            bool: True if any of the possible reasoning start seqs match.
        c                 3       | ]}|  V  qd S r   
startswithr   r8   r   r   r      
    
zDGraniteReasoningParser._is_reasoning_start_substr.<locals>.<genexpr>)anyr   r!   r8   r   r=   r   _is_reasoning_start_substr      	z1GraniteReasoningParser._is_reasoning_start_substrc                    r9   )zCheck if a text matches one of the possible start response seqs.

        Args:
            text (str): Text to check for leading substr.
        
        Returns:
            bool: True if any of the possible response start seqs match.
        c                 3   r:   r   r;   r   response_startr=   r   r   r      r>   zCGraniteReasoningParser._is_response_start_substr.<locals>.<genexpr>)r?   r   r@   r   r=   r   _is_response_start_substr   rB   z0GraniteReasoningParser._is_response_start_substrc                 C   s\   t |t | }| |}| |d| }|r |s td|dS |r(tdddS td|dS )a^  Parse the delta message when the current text has not yet completed
        its start of reasoning sequence.

        Args:
            current_text (str): The full previous + delta text.
            delta_text (str): Text to consider and parse content from.

        Returns:
            DeltaMessage: Message containing the parsed content.
        Nr)   r6   )r   rA   r   )r!   r-   r.   Zprev_longest_lengthZ	is_substrZ
was_substrr   r   r   r3      s   

zBGraniteReasoningParser._get_delta_message_with_no_reasoning_boundsr)   c                    s0  t  fdd| jD }|du s|rtdddS |dt|  }| || j}|| j}|dkr<| ||d nd}|dkrK|  |d nd}	|dkrZ| ||d nd}
|	rdtdddS |sx|
rrt|d| ddS t|ddS |
r||d |d|  }t|ddS t||d | ddS )a=  Parse the delta message when the current text has both reasoning
        content with no (response) content. NOTE that we may have overlapping
        tokens with the start of reasoning / start of response sequences on
        either side of the delta text.

        Args:
            current_text (str): The full previous + delta text.
            reasoning_content (str): reasoning content from current_text.
            delta_text (str): Text to consider and parse content from.

        Returns:
            DeltaMessage: Message containing the parsed content.
        c                 3   s    | ]}  |V  qd S r   )endswithrC   r-   r   r   r      r>   zTGraniteReasoningParser._get_delta_message_with_no_response_bounds.<locals>.<genexpr>NrF   r   F)r?   r   r   r   rfindr   rE   )r!   r-   r)   r.   Zends_with_start_response_seqr,   Zprev_idxZ	delta_idxZprev_was_substrZdelta_continues_substrZdelta_new_substrr   rH   r   r4      s`   



zAGraniteReasoningParser._get_delta_message_with_no_response_boundsr*   response_seq_lenc                 C   s   |t | d }t |t ||  }|dk rd}n$t || t | d }	t |t | }
|	|
 }|dk r8d}||| }t||dS )aB  Parse the delta message when the current text has both reasoning
        content and normal (response) content.

        Args:
            delta_text (str): Text to consider and parse content from.
            reasoning_content (str): reasoning content from current_text.
            response_content (str): response content from current_text.
            current_text (str): The full previous + delta text.
            response_seq_len(str): Len of the complete response sequence used.

        Returns:
            DeltaMessage: Message containing the parsed content.
        Nr      rF   )r   r   )r!   r.   r)   r*   r-   rJ   Zdelta_contentZreasoning_end_idxZdelta_reasoning_contentZstart_reasoning_content_idxZdelta_offsetZstart_offsetr   r   r   r5     s8   z:GraniteReasoningParser._get_delta_message_with_both_boundsc                    s   d}d}d} fddt |D }|D ]]}||| }|du r8 jD ]}||dd kr6|d }|d } nq"q|sp jD ]2}	|t|	 d d |	dd kro|t|	 }
|||
 }||d d }|t|	|f    S q=q|r~|s~||d ddfS dS )	a  Parse the text to extract the reasoning content / content
        if we have them.

        Args:
            current_text (str): The full previous + delta text.

        Returns:
            tuple[Optional[str], Optional[int], Optional[str]]: Tuple of len 3
            containing the reasoning content, the length of the response seq
            (if there is one) and the non-reasoning content.
        r   NFc                    s   g | ]\}}| j kr|qS r   )r   )r   idxcharr!   r   r   
<listcomp>K  s
    
z@GraniteReasoningParser._get_content_sections.<locals>.<listcomp>rK   )NNN)	enumerater   r   r   )r!   r-   Zcurrent_chunk_startZstart_reasoning_contentZparsed_contentZdelimiter_idxsZcurrent_chunk_endZcurrent_chunkr   rD   Zend_reasoning_contentr)   r*   r   rN   r   r2   :  sP   




z,GraniteReasoningParser._get_content_sections)__name__
__module____qualname____doc__r   r   strr   tupler   r+   r   intr   r   r7   boolrA   rE   r3   r4   r5   r2   __classcell__r   r   r"   r   r      sz    


;
#
G
-r   )collections.abcr   typingr   r   regexr   Ztransformersr   Z vllm.entrypoints.openai.protocolr   r   Zvllm.loggerr   Zvllm.reasoningr	   r
   rR   loggerZregister_moduler   r   r   r   r   <module>   s   