o
    )i                     @   s2  d dl mZ d dlmZ d dlmZmZmZmZ d dl	Z	d dl
Zd dlmZ d dlmZ d dlmZ d dlmZ dd	lmZ zd dlZW n eyQ   ed
ZY nw ededefddZddededejfddZ	ddededeej fddZdedeeef fddZ ed Z!eddG dd dZ"dS )    )	dataclass)	lru_cache)AnyClassVarLiteralOptionalN)hf_hub_download)Image)PlaceholderModule   )get_cache_dirlibrosafilenamereturnc                 C   sD   t  d }|jddd ||  }t|}| s td| d|d}|S )z^
    Download and open an image from huggingface
    repo: raushan-testing-hf/videos-test
    zvideo-example-dataT)parentsexist_okzraushan-testing-hf/videos-testZdataset)Zrepo_idr   Z	repo_type	cache_dir)r   mkdirstrexistsr   )r   Zvideo_directory
video_pathZvideo_path_str r   ]/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/assets/video.pydownload_video_asset   s   
r   path
num_framesc           
      C   s   t | }| std|  t|t j}g }|dkr |n|}tjd|d |td}t	|D ] }|
 }|s; n||v rQ| \}}	|rQ|t |	t j q1t|}t||k rmtd|  d| dt| d|S )	NCould not open video file r   r   )Zdtypez-Could not read enough frames from video file z (expected z frames, got ))cv2VideoCaptureisOpened
ValueErrorintgetCAP_PROP_FRAME_COUNTnpZlinspacerangeZgrabretrieveappendZcvtColorZCOLOR_BGR2RGBstacklen)
r   r   captotal_framesframesZframe_indicesidxokretframer   r   r   video_to_ndarrays-   s0   



r3   c                 C   s   t | |}dd |D S )Nc                 S   s   g | ]}t |qS r   )r	   Z	fromarray).0r2   r   r   r   
<listcomp>L   s    z,video_to_pil_images_list.<locals>.<listcomp>)r3   )r   r   r.   r   r   r   video_to_pil_images_listI   s   
r6   c                 C   sb   t | }| std|  t|t j}|t j}|dkr&|| nd}|||dd}|S )Nr   r   Zopencv)Ztotal_num_framesfpsdurationZvideo_backend)r   r    r!   r"   r#   r$   r%   ZCAP_PROP_FPS)r   r,   r-   r7   r8   metadatar   r   r   video_get_metadataO   s   
r:   baby_readingT)frozenc                   @   s   e Zd ZU eed< dZeed< ddiZee	ee
f  ed< ede
fdd	Zedeej fd
dZedejfddZede	e
ef fddZddee dejfddZdS )
VideoAssetnamer   r   r;   zsample_demo_1.mp4_NAME_TO_FILEr   c                 C   s   | j | j S N)r?   r>   )selfr   r   r   r   m   s   zVideoAsset.filenamec                 C      t | j}t|| j}|S r@   )r   r   r6   r   rA   r   r1   r   r   r   
pil_imagesq      
zVideoAsset.pil_imagesc                 C   rB   r@   )r   r   r3   r   rC   r   r   r   np_ndarraysw   rE   zVideoAsset.np_ndarraysc                 C   s   t | j}t|}|S r@   )r   r   r:   rC   r   r   r   r9   }   s   
zVideoAsset.metadataNsampling_ratec                 C   s   t | j}tj||dd S )z
        Read audio data from the video asset, used in Qwen2.5-Omni examples.
        
        See also: examples/offline_inference/qwen2_5_omni/only_thinker.py
        )srr   )r   r   r   load)rA   rG   r   r   r   r   	get_audio   s   
zVideoAsset.get_audior@   )__name__
__module____qualname__VideoAssetName__annotations__r   r#   r?   r   dictr   propertyr   listr	   rD   nptNDArrayrF   r   r9   r   floatrJ   r   r   r   r   r=   d   s   
 r=   )r   )#dataclassesr   	functoolsr   typingr   r   r   r   r   numpyr&   Znumpy.typingrS   Zhuggingface_hubr   ZPILr	   Z
vllm.utilsr
   baser   r   ImportErrorr   r   r#   rT   r3   rR   r6   rP   r:   rN   r=   r   r   r   r   <module>   s8   
