o
    W+ iJ                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZmZmZmZmZmZmZmZmZmZ d dlZd dlZd dlZd dlmZ eeeeef ee f  Zeeeee ee f  Z e Z!eeee f Z"eeeeeee f f  Z#dZ$dZ%d	ed
ee fddZ&de'deeee( f ddfddZ)dkdededefddZ*deede+f defddZ,dkdeedf dedefddZ-ddddddee# dee d eee  dedeeef f
d!d"Z.d#ed$ej/defd%d&Z0d'ed(ed)eege1f defd*d+Z2dld,eeeeeef d-edefd.d/Z3d0d1 Z4d2d3 Z5dmd8d9Z6dnd;dd<eddfd=d<Z7ed>Z8d?eee8f dee
e8f fd@dAZ9dBdC Z:e:dDede
f ddfdEdFZ;e;fdGeeedee
f  dHeege8f dee8 fdIdJZ<dodLdMZ=dpdNdOZ>e:dqdPe
fdQdRZ?dSedTee dUedVefdWdXZ@e:dPe
dejAfdYdZZBe:dPe
dejAfd[d\ZCe:dPe
fd]d^ZDe:d_e
d`efdadbZEdcefdddeZFeGdfkre@dgg dhdidj dS dS )r    N)Mapping)deepcopy)BytesIO)AnyCallableListTypeVarUnionTupleSetDictTypeOptionalSequence)version)g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?text
delimitersc           
         sD  t | tsJ d|  dd |D }dd |D }g }d}t| dkrt| D ]W\}  fddt|D }d}|D ]<}	| ||||	   ||	 krw|rT||d	 d
< n
|r^|d|d d}|d||	 i | |||	  d } d} nq;|s| 7 }q( || krd} t| dks$t|r||d	 d
< |S |d|d |S )zSplit the text field into parts.

    Args:
        text: A text to be split.
        delimiters: The delimiters.

    Returns:
        The split text in list of dicts.
    ztext: c                 S   s   g | ]}|d  qS )r    .0dr   r   t/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/modelscope/preprocessors/templates/utils.py
<listcomp>(       z&split_str_parts_by.<locals>.<listcomp>c                 S   s   g | ]}t |qS r   )lenr   r   r   r   r   )   r    r   c                    s   g | ]
\}}| kr|qS r   r   )r   idxZ
start_charcharr   r   r   0       Fcontentkeyr!   r#   NT)
isinstancestrr   	enumerateappend)
r   r   Zall_start_charsZ
all_length	text_listZ
last_wordsZchar_idxZmatch_indexZis_delimiterindexr   r   r   split_str_parts_by   s@   


r*   r(   regex_delimitersreturnc              
      s  dd l   fdd| D }tt| d ddD ]k}| | }|ddkr|d }d}g }|D ]6\}}	t ||}
|
D ]'}| |krT|d|||  d	 ||	d |	dd	 |
 }q>q0|t|k rz|dd||d  d	 |r|| ||d < qd S )
Nr   c                    s   g | ]\}}  ||fqS r   )compile)r   patternscalerer   r   r   M   s    z(split_parts_by_regex.<locals>.<listcomp>   r    r#   r   r!   r"   )r1   itemsranger   getlistfinditerstartr'   groupendinsert)r(   r+   Zcompiled_patternsiitemZres_textZlast_idxsegmentsr.   r/   matchesmatchr   r0   r   split_parts_by_regexK   s,   
rA   tmpprompttmp_dirc           
      C   st   d}t || }d}d}|D ]!}|d}|d}t||}	|| ||d  |	 7 }|d }q|| |d  7 }|S )Nz0<(?:img|audio|video)>(.+?)</(?:img|audio|video)>r   r   r2   )r1   r7   spanr9   _from_base64)
rC   rD   r.   Z
match_iterZnew_contentr   mrE   
img_base64img_pathr   r   r   _decode_promptd   s   



rJ   rI   zPIL.Image.Imagec                 C   s   t | trtj| s| S t | tr,t| d}| }W d    n1 s&w   Y  nt | ts@t }| j	|dd |
 }n| }t|d}|S )NrbZpngformatutf-8)r$   r%   ospathisfileopenreadbytesr   savegetvaluebase64	b64encodedecode)rI   fZ_bytesZbytes_iorH   r   r   r   
_to_base64s   s   



r[   rH   c                 C   s   ddl m} t| tst| } tj| s| dr| S t	
| d }tj|| d}|tt| }tj|sE|| |S )Nr   ImagehttprN   z.png)PILr]   r$   r%   r[   rO   rP   rQ   
startswithhashlibsha256encode	hexdigestjoinrR   r   rW   	b64decodeexistsrU   )rH   rD   r]   Zsha256_hashrI   imager   r   r   rF      s   

rF   )messagesrC   imagesrD   ri   rj   c           
      C   s   t j|dd i }| d ur*g }| D ]}t|}t|d ||d< || q||d< |d ur7t||}||d< |d urPg }|D ]}	t|	|}	||	 q?||d< |S )NT)exist_okr!   ri   rC   rj   )rO   makedirsr   rJ   r'   rF   )
ri   rC   rj   rD   resZres_messagesrG   Zm_newZ
res_imagesrh   r   r   r   decode_base64   s&   

rn   inputsdevicec                 C   s   t t| ddr| j|dS t| tr'i }|  D ]\}}t||||< q|S t| trBt| tsBg }| D ]
}|	t|| q5|S | }|S )zMove inputs to a devicetoN)rp   )
callablegetattrrq   r$   r   r3   	to_devicer   r%   r'   )ro   rp   rm   kvbr   r   r   rt      s   
rt   lohicondc                 C   s:   | |k r| | d d? }||r|} n|d }| |k s| S )Nr2   r   )rx   ry   rz   midr   r   r   upper_bound   s   r|   elementtypec                 C   sb   t | tttfr"| D ]}t|}|r|d u st ||r|  S q
d S t | tr/tt|  S | S N)r$   tuplesetr6   	fetch_onedictvalues)r}   r~   Zeleoutr   r   r   r      s   
r   c              	   C   s`   dd l m} ddlm} tt}}||dd |j| | f|j	d|
 |j||dg}|S )Nr   InterpolationModec                 S   s   | j dkr
| dS | S )NRGB)modeconvert)imgr   r   r   <lambda>       z"_build_transform.<locals>.<lambda>)interpolation)meanZstd)torchvision.transforms
transformsZ!torchvision.transforms.functionalr   IMAGENET_MEANIMAGENET_STDZComposeLambdaResizeBICUBICZToTensorZ	Normalize)
input_sizeTr   ZMEANZSTD	transformr   r   r   _build_transform   s   
r   c                 C   s|   t d}d}|| }|D ]/}|d |d  }	t| |	 }
|
|k r%|
}|}q|
|kr;|d| | |d  |d  kr;|}q|S )Ninf)r2   r2   r   r2         ?)floatabs)aspect_ratiotarget_ratioswidthheight
image_sizeZbest_ratio_diffZ
best_ratioarearatiotarget_aspect_ratioZ
ratio_diffr   r   r   _find_closest_aspect_ratio   s    r   r2        Fc                    s0  | j \}}|| }t fddt d D }t|dd d}t|||||}	||	d  }
||	d  }|	d |	d  }| |
|f}g }t|D ].}||
|  | ||
|  | ||
|  d | ||
|  d | f}||}|| qKt||ksJ |rt|dkr| ||f}|| |S )Nc                 3   sX    | ]'}t d |d  D ]}t d |d  D ]}||  kr|| kr||fV  qqqdS )r2   N)r4   )r   nr<   jmax_nummin_numr   r   	<genexpr>   s   * 
z&_dynamic_preprocess.<locals>.<genexpr>r2   c                 S   s   | d | d  S )Nr   r2   r   )xr   r   r   r          z%_dynamic_preprocess.<locals>.<lambda>)r#   r   )	sizer   r4   sortedr   resizecropr'   r   )rh   r   r   r   use_thumbnailZ
orig_widthZorig_heightr   r   r   Ztarget_widthZtarget_heightblocksZresized_imgZprocessed_imagesr<   boxZ	split_imgZthumbnail_imgr   r   r   _dynamic_preprocess   s6   
"

r   r    r   rescale_imagec                 C   sj   dd l m} | j}| j}|dks|| |kr| S || }t|| d}|| }|t|t|f| S )Nr   r   )r   r   r   r   mathpowr   int)r   r   r   r   r   r   Zheight_scaledZwidth_scaledr   r   r   r     s   _TrP   c           	   
   C   s  | }t | tr|  } | dr2i }ttdd}|dkr"||d< tj| fi |j	}t
|}|S tj| rVt| d}t
| }W d    |S 1 sOw   Y  |S dd l}zt| }t
|}W |S  t|jfy } zt| dk rtd|  d	td
| d }~ww |S )Nr^   TIMEOUTZ60r   timeoutrK      zinvalid image: ""zinvalid image: )r$   r%   stripr`   r   rO   getenvrequestsr5   r!   r   rP   rg   rR   rS   binasciirW   rf   
ValueErrorErrorr   )	rP   rm   Zrequest_kwargsr   r!   rZ   r   dataerrorr   r   r   	load_file)  s<   




r   c                    s    fdd}|S )Nc                    s"   t | }  | g|R i |}|S r   )r   )rP   argskwargsrm   funcr   r   new_funcF  s   z%load_file_decorator.<locals>.new_funcr   )r   r   r   r   r   load_file_decoratorD  s   r   rh   c                 C   s8   ddl m} t| tr|| } | jdkr| d} | S )Nr   r\   r   )r_   r]   r$   r   rR   r   r   )rh   r]   r   r   r   
load_imageN  s   



r   	path_list	load_funcc                 C   sF   g }t | ttfsJ d|  | D ]}|d u rq||| q|S )Nzpath_list: )r$   r6   r   r'   )r   r   rm   rP   r   r   r   
load_batchX  s   r       c           	         sv   | r| d | d }}nd\}}t |t|| tt|| |}t| |  t fddt|D }|S )Nr   r2   )i`yi c                    s*   g | ]}t  d   t |  qS    )r   npround)r   r   Zseg_sizeZ	start_idxr   r   r   l  s   * z_get_index.<locals>.<listcomp>)maxr   minr   r   arrayr4   )	boundfps	max_frame	first_idxnum_segmentsr8   r:   Zend_idxframe_indicesr   r   r   
_get_indexc  s   r   c                    s:   t |d t| |d|d} fdd|D }t|}|S )N)r   T)r   r   r   c                    s   g | ]} |qS r   r   )r   rh   r   r   r   r   s  r   z#transform_image.<locals>.<listcomp>)r   r   torchstack)rh   r   r   rj   Zpixel_valuesr   r   r   transform_imagep  s
   

r   video_ioc                 C   s   ddl m}m} ddlm} || |ddd}t|d }t| }g }	t|||d|d}
|
D ]}|		|
||  d q0|	S )Nr   VideoReadercpur\   r2   )ctxZnum_threads)r   r   r   )decordr   r   r_   r]   r   r   get_avg_fpsr   r'   	fromarrayasnumpyr   )r   r   r   r   r   r]   vrr   r   rj   r   frame_indexr   r   r   load_video_internvlx  s    r   img_dirbbox	bbox_typeoutput_filec           
      C   sx   ddl m}m} ddlm} || }||ddg}|||gd |d d }||}	|	j|ddd	 |	| d S )
Nr   )r]   	ImageDraw)Template)r   r   rh   realr   redr   )outliner   )
r_   r]   r   swift.llm.template.templater   rR   Znormalize_bboxZDrawZ	rectanglerU   )
r   r   r   r   r]   r   r   rh   objectsZdrawr   r   r   	draw_plot  s   

r  c                 C   s   ddl m}m}m} |d d}d}d}|| |dd}t|}t||  }	|d ur8t|t||  n|}
t	j
|	|
d |td}||}|d	ddd
}|S )Nr   )r   r   bridger   <      r   r2   )Zdtype   r   )r   r   r   r  Z
set_bridger   r   r   r   r   linspace	get_batchZpermute)r   r   r   r  Zclip_end_secZclip_start_secZ
num_framesZ	decord_vrdurationZstart_frameZ	end_frameZframe_id_listZ
video_datar   r   r   load_video_cogvlm2  s    

r
  c           
      C   s   dd l }|| }|jjd j}td||d t}g }|	d |d }|d }t
|jddD ]\}}	||kr= n||krJ||v rJ||	 q3tdd |D S )Nr      r    )videoc                 S   s   g | ]}|j d dqS )Zrgb24rL   )Z
to_ndarray)r   r   r   r   r   r     r   z$load_video_llava.<locals>.<listcomp>)avrR   streamsr  framesr   Zarangeastyper   seekr&   rY   r'   r   )
r   r  	containerZtotal_framesindicesr  start_indexZ	end_indexr<   framer   r   r   load_video_llava  s   


r  c           	         s   ddl m  ddlm}m} dd }|| |dd}t| d }dd	 tdt||D }t||kr:|||}|	|
 } fd
d	|D }|S )Nr   r\   r   c                    s4   t  | fddt|D } fdd|D S )Nc                    s    g | ]}t |   d   qS r   )r   r   r<   )gapr   r   r     s     zJload_video_minicpmv_mplug_owl3.<locals>.uniform_sample.<locals>.<listcomp>c                    s   g | ]} | qS r   r   r  )_lr   r   r     r   )r   r4   )r  Z_nZidxsr   )r  r  r   uniform_sample  s   z6load_video_minicpmv_mplug_owl3.<locals>.uniform_sampler  r2   c                 S   s   g | ]}|qS r   r   r  r   r   r   r     r   z2load_video_minicpmv_mplug_owl3.<locals>.<listcomp>c                    s   g | ]
}  |d qS )Zuint8)r   r  )r   rv   r\   r   r   r     r   )r_   r]   r   r   r   r   r   r4   r   r  r   )	r   Zmax_num_framesr   r   r  r   Z
sample_fpsZ	frame_idxr  r   r\   r   load_video_minicpmv_mplug_owl3  s   
r  audio_iosampling_ratec                 C   s   dd l }|j| |dd S )Nr   )sr)librosaload)r  r  r  r   r   r   load_audio_qwen  s   r!  
video_pathc           !      C   sn  ddl m} dd l}ddlm}m} ddlm}m}m}m	}m
}	m}
m}m}m}m}m} ddlm} t|jtdkrAt| } |j| ddd	\}}}|d
td }|dtd }|dt|}|rf|rfJ d|d urp|||}n0|}|d|d  | }|||}|dt|}|dt|	}||k r|||}||kr|||}||kr||dkstd| d|d d| dtd|dd |  }|jdd  \}}|| }|dt|
}|dt|}|dtd }|d u r|}t t!||| | |d }|dtd }|dtd } |r| r||| |d\}} n||||||d\}} |j"j#||| g|j$dd % }|S )!Nr   )get_env_args)ior   )round_by_factorFPSFRAME_FACTORFPS_MIN_FRAMESFPS_MAX_FRAMESVIDEO_MIN_PIXELSVIDEO_MAX_PIXELSVIDEO_TOTAL_PIXELSsmart_resizeceil_by_factorfloor_by_factorr   z0.19secZTCHW)Zpts_unitZoutput_formatnframesr   size_factorz%Only accept either `fps` or `nframes`Z	video_fps
min_frames
max_framesznframes should in interval [z, z], but got .r2   r   
min_pixelstotal_pixels
max_pixelsg?resized_heightresized_width)factor)r;  r6  r8  T)r   Z	antialias)&r   r#  torchvisionr$  r   Zqwen_vl_utils.vision_processr%  r&  r'  r(  r)  r*  r+  r,  r-  r.  r/  r   r   r   parse__version__r   Z
read_videor   r   r   r   r  r   longshaper   r   Z
functionalr   r   r   )!r"  r#  r<  r$  r   r%  r&  r'  r(  r)  r*  r+  r,  r-  r.  r/  r   r  _infor1  r   r2  r3  r4  r   r   r   r6  r7  r8  r9  r:  r   r   r   load_video_qwen2  s|   4


" 
rC  __main__zman.jpg)ib  i  iD  i  Z	norm_1000zman_bbox.jpg)rB   r   )r2   r   r   F)r    )r   r   )r   r   )Nr   )HrW   ra   r   rO   r1   collections.abcr   copyr   r$  r   typingr   r   r   r   r	   r
   r   r   r   r   r   numpyr   r   r   	packagingr   r%   ZHistoryr   PromptZ	StopWordsContextZMessagesr   r   r*   r6   r   rA   rJ   rT   r[   rF   rn   rp   rt   boolr|   r   r   r   r   r   r   r   r   r   r   r   r   r   r  Zndarrayr
  r  r  r!  rC  __name__r   r   r   r   <module>   s    4". 


$&
%"





I