o
    )iX                     @   s   d dl mZmZ d dlmZ d dlmZ d dlmZm	Z	m
Z
mZ er(d dlmZ ddlmZmZ edZG d	d
 d
ZG dd dee	e ZdS )    )ABCabstractmethod)Sequence)Path)TYPE_CHECKINGGeneric
NamedTupleTypeVar)SequenceGroupMetadata   )MultiModalKwargsPlaceholderRange_Tc                   @   s   e Zd ZU dZG dd deZee ed< 	 e	ed< 	 ee ed< 	 e	ed< 	 dd	 Z
ed
ddedeeeed f f fddZdedee dee dee fddZdddZdddZdS )MultiModalPlaceholderMapzq
    Relates multi-modal embeddings to their corresponding placeholders.

    Note: This is only used in V0.
    c                   @   s&   e Zd ZU ee ed< ee ed< dS )z!MultiModalPlaceholderMap.IndexMapsrcdestN)__name__
__module____qualname__listint__annotations__ r   r   `/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/multimodal/base.pyIndexMap   s   
 r   
src_rangessrc_lendest_rangesdest_lenc                 C   s   g | _ d| _g | _d| _d S )Nr   )r   r   r   r   selfr   r   r   __init__2   s   
z!MultiModalPlaceholderMap.__init__	seq_groupr
   	positionsreturnc           	      C   sp   |j }|j}|r
|st i fS tttf  }| D ]\}}t }|r/||dgt| | |||< q||fS )a  
        Returns the multi-modal items that intersect with the portion of a
        prompt (``seq_group``) represented by ``positions``, as well as a
        ``MultiModalPlaceholderMap`` that relates the multi-modal embedding
        vectors to their corresponding placeholders.

        Examples:

        ```
        Prompt:    |AAAA BBBB What's in these images?|
        Positions: |.................................|

            images      = [A, B]
            src_ranges  = [(0, 4), (4, 8)]
            dest_ranges = [(0, 4), (5, 9)]

        Prompt:    |AAAA BBBB What's in these images?|
        Positions: |  .....                          |

            images      = [A, B]
            src_ranges  = [(2, 4), (4, 6)]
            dest_ranges = [(0, 2), (3, 5)]

        Prompt:    |AAAA BBBB What's in these images?|
        Positions: |     .........                   |

            images      = [B]
            src_ranges  = [(0, 4)]
            dest_ranges = [(0, 4)]

        Prompt:    |AAAA BBBB What's in these images?|
        Positions: |          .......................|

            images      = []
            src_ranges  = []
            dest_ranges = []
        ```
        N)	Zmulti_modal_datamulti_modal_placeholdersr   dictstrr   itemsappend_items_from_seq_grouplen)	clsr"   r#   Zseq_mm_dataZseq_mm_placeholdersZplaceholder_mapsZmodalityZplaceholdersZplaceholder_mapr   r   r   from_seq_group8   s   *

z'MultiModalPlaceholderMap.from_seq_groupmulti_modal_itemsr%   c                 C   s   g }t |t |krtdt||D ]Z\}}t|j|j|j }tt|j|jt|j	|j	}|s3qt|j|j |j	|j }	t|j|j | j
 |j	|j | j
 }
|| | j|	 | j|
 |  j
t |7  _
q|  jt |7  _|S )z
        Adds the multi-modal items that intersect ```positions`` to this
        placeholder map and returns the intersecting items.
        z=Multi-modal placeholders and items must have the same length.)r*   
ValueErrorziprangeoffsetlengthmaxstartminstopr   appendr   r   r   )r    r#   r-   r%   Zintersecting_itemsZplaceholder_dictZmm_itemplaceholderintersectionZtoken_embedding_rangeZmultimodal_embedding_ranger   r   r   r)   y   s@   




z4MultiModalPlaceholderMap.append_items_from_seq_groupotherc                    s\    j  fdd|j D    j|j7  _ j fdd|jD    j|j7  _dS )z
        Adds the placeholders from another ``MultiModalPlaceholderMap`` to this
        instance based on the source and destination tensors being
        concatenated.
        c                 3   *    | ]}t  j|j  j|j V  qd S N)r0   r   r4   r6   .0rr   r   r   	<genexpr>   
    
z2MultiModalPlaceholderMap.extend.<locals>.<genexpr>c                 3   r;   r<   )r0   r   r4   r6   r=   r   r   r   r@      rA   N)r   extendr   r   r   )r    r:   r   r   r   rB      s   zMultiModalPlaceholderMap.extendc                 C   s\   dd | j D }dd | jD }t|t|kr'tdt| dt| d| j||dS )z
        Finalizes the placeholder map into lists of indices that can be used to
        index the source and destination tensors.
        c                 S      g | ]	}|D ]}|qqS r   r   r>   r?   ir   r   r   
<listcomp>       z6MultiModalPlaceholderMap.index_map.<locals>.<listcomp>c                 S   rC   r   r   rD   r   r   r   rF      rG   zThe number of source (z) and destination indices (z) must be the same.)r   r   )r   r   r*   r.   r   )r    Zsrc_indicesZdest_indicesr   r   r   	index_map   s   z"MultiModalPlaceholderMap.index_mapN)r:   r   )r$   r   )r   r   r   __doc__r   r   r   r0   r   r   r!   classmethodtupler   r&   r'   r,   r   r   r   r)   rB   rH   r   r   r   r   r      s>   
 @

1r   c                   @   sR   e Zd ZededefddZedededefddZede	defd	d
Z
dS )MediaIOdatar$   c                 C      t r<   NotImplementedError)r    rM   r   r   r   
load_bytes      zMediaIO.load_bytes
media_typec                 C   rN   )zm
        List of media types:
        https://www.iana.org/assignments/media-types/media-types.xhtml
        rO   )r    rS   rM   r   r   r   load_base64   s   zMediaIO.load_base64filepathc                 C   rN   r<   rO   )r    rU   r   r   r   	load_file   rR   zMediaIO.load_fileN)r   r   r   r   bytesr   rQ   r'   rT   r   rV   r   r   r   r   rL      s    rL   N)abcr   r   collections.abcr   pathlibr   typingr   r   r   r	   Zvllm.sequencer
   Zinputsr   r   r   r   rL   r   r   r   r   <module>   s    ;