o
    )i+                  	   @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
mZmZmZmZmZmZmZ d dlZd dlmZ d dlmZmZmZ d dlmZmZmZmZ e
rfd d	lmZ d d
l m!Z!m"Z" ee#Z$edZ%G dd dee% eZ&	 d(de'de(de)de(fddZ*G dd dZ+		d)de+deed  ded ddfddZ,de-e fddZ.d ej/d!ej/d"e)dej/fd#d$Z0ej1fd%eddfd&d'Z2dS )*    N)Sequence)
connection)BaseProcess)TYPE_CHECKINGAnyCallableGenericOptionalTypeVarUnionoverload)init_logger)UsageContextis_usage_stats_enabledusage_message)get_open_portget_open_zmq_ipc_pathget_tcp_urikill_process_tree)DPCoordinatorCoreEngineActorManagerCoreEngineProcManagerTc                	   @   sF  e Zd Zdee ddfddZdd Zdd	 Zd
d Zdd Z	dd Z
dd Z		d,dededee defddZededefddZededee fddZdeeef deeee f fddZededefddZededefd dZdeeef deeee f fd!dZd"d# Zd$d% Zd&d' Zd(d) Zd*d+ ZdS )-ConstantListxreturnNc                 C   s
   || _ d S N_x)selfr    r!   Y/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/v1/utils.py__init__!      
zConstantList.__init__c                 C      t d)Nz Cannot append to a constant list	TypeErrorr    itemr!   r!   r"   append$      zConstantList.appendc                 C   r%   )NzCannot extend a constant listr&   r(   r!   r!   r"   extend'   r+   zConstantList.extendc                 C   r%   )Nz"Cannot insert into a constant listr&   r(   r!   r!   r"   insert*   r+   zConstantList.insertc                 C   r%   )NzCannot pop from a constant listr&   r(   r!   r!   r"   pop-   r+   zConstantList.popc                 C   r%   )Nz"Cannot remove from a constant listr&   r(   r!   r!   r"   remove0   r+   zConstantList.removec                 C   r%   )NzCannot clear a constant listr&   r    r!   r!   r"   clear3   r+   zConstantList.clearr   r)   startstopc                 C   s$   | j |||d ur|S t| j S r   )r   indexlen)r    r)   r2   r3   r!   r!   r"   r4   6   s
   

zConstantList.indexc                 C      d S r   r!   r(   r!   r!   r"   __getitem__=      zConstantList.__getitem__sc                C   r6   r   r!   )r    r9   r!   r!   r"   r7   A   r8   c                 C   s
   | j | S r   r   r(   r!   r!   r"   r7   E   r$   valuec                 C   r6   r   r!   r    r)   r:   r!   r!   r"   __setitem__H   r8   zConstantList.__setitem__c                C   r6   r   r!   )r    r9   r:   r!   r!   r"   r<   L   r8   c                 C   r%   )Nz"Cannot set item in a constant listr&   r;   r!   r!   r"   r<   P   r+   c                 C   r%   )Nz'Cannot delete item from a constant listr&   r(   r!   r!   r"   __delitem__S   r+   zConstantList.__delitem__c                 C   
   t | jS r   )iterr   r0   r!   r!   r"   __iter__V   r$   zConstantList.__iter__c                 C   s
   || j v S r   r   r(   r!   r!   r"   __contains__Y   r$   zConstantList.__contains__c                 C   r>   r   )r5   r   r0   r!   r!   r"   __len__\   r$   zConstantList.__len__c                 C   s   d| j  dS )NzConstantList()r   r0   r!   r!   r"   __repr___   s   zConstantList.__repr__)r   N)__name__
__module____qualname__listr   r#   r*   r,   r-   r.   r/   r1   intr	   r4   r   r7   slicer   r<   r=   r@   rA   rB   rD   r!   r!   r!   r"   r      sD    
&&r   
local_onlyhostportr   c                 C   s   | rt  S t||pt S )a  Assign a new ZMQ socket address.

    If local_only is True, participants are colocated and so a unique IPC
    address will be returned.

    Otherwise, the provided host and port will be used to construct a TCP
    address (port == 0 means assign an available port).)r   r   r   )rK   rL   rM   r!   r!   r"   get_engine_client_zmq_addrc   s   
rN   c                   @   sV   e Zd ZdZ	ddedededejde	de
e d	e
e d
ee fddZdddZdS )APIServerProcessManagerzManages a group of API server processes.
    
    Handles creation, monitoring, and termination of API server worker
    processes. Also monitors extra processes to check if they are healthy.
    Ntarget_server_fnlisten_addresssockargsnum_serversinput_addressesoutput_addressesstats_update_addressc	                 C   s   || _ || _|| _td}	g | _tt|||D ]-\}
}}||||
d}|dur-||d< |	j|d|
 ||||fd}| j	| |
  qtdt| j t| t| j| _dS )a@  Initialize and start API server worker processes.
        
        Args:
            target_server_fn: Function to call for each API server process
            listen_address: Address to listen for client connections
            sock: Socket for client connections
            args: Command line arguments
            num_servers: Number of API server processes to start
            input_addresses: Input addresses for each API server
            output_addresses: Output addresses for each API server
            stats_update_address: Optional stats update address 
        Zspawn)Zinput_addressZoutput_addressZclient_countZclient_indexNrW   Z
ApiServer_)targetnamerS   zStarted %d API server processes)rQ   rR   rS   multiprocessingZget_context	processesziprangeProcessr*   r2   loggerinfor5   weakreffinalizeshutdown
_finalizer)r    rP   rQ   rR   rS   rT   rU   rV   rW   Zspawn_contextiZin_addrZout_addrZclient_configprocr!   r!   r"   r#   y   s2   


z APIServerProcessManager.__init__r   c                 C   s   |    d S r   )rd   r0   r!   r!   r"   close   s   zAPIServerProcessManager.closer   )r   N)rE   rF   rG   __doc__r   strr   argparse	NamespacerI   rH   r	   r#   rg   r!   r!   r!   r"   rO   r   s*    	
7rO   api_server_managerengine_manager)r   r   coordinatorr   c              
   C   s  ddl m}m} zzqtd dd | jD }|r |j||jj< g }t||r3|jD ]}|||j< q*n	t||r<|	 }|s@|ryt
j|dd}|D ]}	||	}|jdkretd|j d	|j d
|j qI|ruddl}
|
j|dd\}}|s@|s@W n" ty   td Y n ty } z	tdt|  d}~ww W td |   |r|  |r|  dS dS td |   |r|  |r|  w w )a  Wait for all processes to complete or detect if any fail.
    
    Raises an exception if any process exits with a non-zero status.

    Args:
        api_server_manager: The manager for API servers.
        engine_manager: The manager for engine processes.
            If CoreEngineProcManager, it manages local engines;
            if CoreEngineActorManager, it manages all engines.
        coordinator: The coordinator for data parallel.
    r   r   z'Waiting for API servers to complete ...c                 S   s   i | ]}|j |qS r!   )sentinel).0rf   r!   r!   r"   
<dictcomp>   s    z2wait_for_completion_or_failure.<locals>.<dictcomp>   )timeoutzProcess z (PID: z) died with exit code Nz8Received KeyboardInterrupt, shutting down API servers...z0Exception occurred while running API servers: %sz#Terminating remaining processes ...)vllm.v1.engine.utilsr   r   r_   r`   r[   rf   ro   
isinstanceZget_run_refsr   waitr.   exitcodeRuntimeErrorrY   pidrayKeyboardInterrupt	Exception	exceptionri   rg   )rl   rm   rn   r   r   Zsentinel_to_procZactor_run_refsrf   Zready_sentinelsro   rz   _er!   r!   r"   wait_for_completion_or_failure   sr   








r   procsc                 C   s   | D ]
}|  r|  qt d }| D ]}|t  }|dkr# n
|  r,|| q| D ]}|  r@|j }d ur@t| q/d S )Nrr   r   )is_alive	terminatetime	monotonicjoinry   r   )r   rf   deadline	remainingry   r!   r!   r"   rc      s"   
rc   from_tensor	to_tensorlengthc                 C   s   |d| j | d| ddS )z
    Copy the first length elements of a tensor into another tensor in a
    non-blocking manner.

    Used to copy pinned CPU tensor data to pre-allocated GPU tensors.

    Returns the sliced target tensor.
    NT)Znon_blocking)Zcopy_)r   r   r   r!   r!   r"   
copy_slice  s   
r   usage_contextc                 C   sx   t  sdS ddlm} tj|| j|t| jj| jj	| j
j| j
j| jjt| j
jt| j| j
j| jj| jjd
d dS )z#Report usage statistics if enabled.Nr   )get_architecture_class_name)
dtypetensor_parallel_size
block_sizegpu_memory_utilizationquantizationZkv_cache_dtypeZenable_loraenable_prefix_cachingenforce_eagerdisable_custom_all_reduce)Z	extra_kvs)r   Z vllm.model_executor.model_loaderr   r   Zreport_usageZmodel_configri   r   Zparallel_configr   Zcache_configr   r   r   Zcache_dtypeboolZlora_configr   r   r   )Zvllm_configr   r   r!   r!   r"   report_usage_stats!  s$   


r   )r   )NN)3rj   rZ   r   ra   collections.abcr   r   Zmultiprocessing.processr   typingr   r   r   r   r	   r
   r   r   ZtorchZvllm.loggerr   Zvllm.usage.usage_libr   r   r   Z
vllm.utilsr   r   r   r   Zvllm.v1.engine.coordinatorr   rt   r   r   rE   r_   r   r   r   ri   rI   rN   rO   r   rH   rc   ZTensorr   ZENGINE_CONTEXTr   r!   r!   r!   r"   <module>   sd   (F
E

L
