o
    )iB                     @   s  d dl Z d dlmZ d dlmZmZmZmZmZ d dl	Z	d dl
mZ d dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ erkd dlmZ d dl m!Z! d dl"m#Z# neZeZ!eZ#ee$Z%ed Z&eeG dd dZ'dS )    N)field)TYPE_CHECKINGAnyLiteralOptionalUnion)model_validator)	dataclass)ProcessGroupReduceOp)Self)config)init_loggercurrent_platform)cuda_device_count_statelessget_open_port)
RuntimeEnv)PlacementGroupExecutorBase)raympuniexternal_launcherc                   @   sz  e Zd ZU dZdZeed< 	 dZeed< 	 dZeed< 	 dZ	eed< 	 dZ
eed< 	 d	Zee ed
< 	 dZeed< 	 dZeed< 	 dZeed< 	 dZeed< 	 dZeed< 	 dZeed< 	 dZeed< 	 dZeed< 	 dZeed< 	 dZeed< 	 dZeed< 	 dZeed< 	 d	Zee ed< 	 dZeed< 	 dZeed < 	 d	Zee ed!< 	 d	Z ee! ed"< 	 d	Z"ee#e$e%e& f  ed#< 	 d$Z'eed%< 	 d$Z(eed&< 	 d'Z)eed(< 	 e*dd)Z+eed*< 	 dZ,eed+< 	 dZ-eed,< 	 e.d-efd.d/Z/d-efd0d1Z0d-e1fd2d3Z2e3d4e1d5ed-efd6d7Z4e3d4e1d8ed-efd9d:Z5d;d< Z6dEd=d>Z7e.d-efd?d@Z8e9dAdBd-e:fdCdDZ;d	S )FParallelConfigz,Configuration for the distributed execution.   pipeline_parallel_sizetensor_parallel_sizedata_parallel_sizedata_parallel_size_localr   data_parallel_rankNdata_parallel_rank_localz	127.0.0.1data_parallel_master_ipins  data_parallel_rpc_porti<s  data_parallel_master_portr   data_parallel_backendFdata_parallel_external_lbdata_parallel_hybrid_lbenable_expert_parallelenable_eplbnum_redundant_expertsi  eplb_window_sizei  eplb_step_intervaleplb_log_balancednessmax_parallel_loading_workersdisable_custom_all_reduceray_workers_use_nsightray_runtime_envplacement_groupdistributed_executor_backendauto
worker_clssd_worker_cls worker_extension_cls)init
world_sizerank'enable_multimodal_encoder_data_parallelreturnc                 C   s   | j | j S )zaworld_size_across_dp is TPxPPxDP, it is the size of the world
        including data parallelism.)r;   r   self rA   `/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/config/parallel.pyworld_size_across_dp   s   z#ParallelConfig.world_size_across_dpc                 C   s   | j }|  j d7  _ |S )ax  
        We might need to initialize process groups in multiple
        processes that is related to data parallelism,
        e.g. both in the worker and in the engine, which
        can live in different processes. To avoid port conflicts, we
        increment the port number each time we need to initialize a
        new process group related to data parallelism.
        r   )r%   )r@   ZanswerrA   rA   rB   get_next_dp_init_port   s   	z$ParallelConfig.get_next_dp_init_portc                 C   s   ddl m} ddlm} d}d }t|D ]4}z|| j|  | j| jddW   S  |yH } zdt	|v rBt
d |}W Y d }~q|d }~ww |d usOJ |)	Nr   )DistNetworkError).stateless_init_torch_distributed_process_group   Zgloo)backend
EADDRINUSEz1Address already in use. Retrying with a new port.)torch.distributedrE   Zvllm.distributed.utilsrF   ranger#   rD   r!   r   strloggerwarning)r@   rE   rF   max_retrieslast_exc_erA   rA   rB   stateless_init_dp_group   s2   
z&ParallelConfig.stateless_init_dp_groupdp_grouphas_unfinishedc                 C   s8   t j|gt jdd}t jj|tj| d t| }|S )NcpuZdtypeZdeviceopgroup)	torchtensorZint32distributed
all_reducer   MAXboolitem)rT   rU   r\   Zaggregated_has_unfinishedrA   rA   rB   has_unfinished_dp   s   z ParallelConfig.has_unfinished_dpkv_cache_memoryc                 C   sF   |dkrt t jj}t j|gt jdd}t jj|tj| d |	 S )NrV   rW   rX   )
r[   ZiinfoZint64maxr\   r]   r^   r   ZMINra   )rT   rc   r\   rA   rA   rB   sync_kv_cache_memory_size   s   z(ParallelConfig.sync_kv_cache_memory_sizec                 C   sV   g }| | j | | j | | j | | j | tj tt	|
  S )a$  
        Provide a hash that uniquely identifies all the configs
        that affect the structure of the computation
        graph from input ids/embeddings to the final hidden states,
        excluding anything before input ids/embeddings and after
        the final hidden states.
        )appendr   r   r)   r   envsZVLLM_ALL2ALL_BACKENDhashlibsha256rL   encode	hexdigest)r@   ZfactorsrA   rA   rB   compute_hash   s   zParallelConfig.compute_hashc                 C   s  | j | j | _| j| jkrtd| j d| j d| jdks$| jdkrCt | _d| j  kr4| jk sBn td| j d| j dnt	j
| _t	j| _t	j| _t	j| _t	j| _| jr^td| jd	krqdd l}d
|jd< td | jrt s|td| jdk rtd| j d| jstd| j| j dkrtd| j d| j dn| jdkrtd| j d| jd u r-| jdkr-ddlm} d}| }t rd}nPt  rt	j!rd}nFt rt" | jk r|std|j# dd}n.| j$dkrtd d}n |r$| j%rd}nddl&m'} | r$ddl(m)} | r$d}|| _t*d| | jd u r>| jdkr@d| _d S d S d S )Nzdata_parallel_size_local (z!) must be <= data_parallel_size ()r   r   zdata_parallel_rank (z) must be in the range [0, zEdata_parallel_external_lb can only be set when data_parallel_size > 1r   0ZVLLM_ENABLE_V1_MULTIPROCESSINGz3Disabling V1 multiprocessing for external launcher.zHExpert parallelism load balancing is only supported on CUDA devices now.z4num_redundant_experts must be non-negative, but got .z0enable_expert_parallel must be True to use EPLB.zZEPLB requires tensor_parallel_size or data_parallel_size to be greater than 1, but got TP=z,DP=z/num_redundant_experts should be used with EPLB.	ray_utilsr   r   zUnable to load Ray: zV. Ray is required for multi-node inference, please install Ray with `pip install ray`.r   zDUsing ray distributed inference because data_parallel_backend is ray)is_initialized)get_current_placement_groupz.Defaulting to use %s for distributed inference)+r   r   r;   r    r   
ValueErrorr   r%   r!   rh   ZVLLM_DP_SIZEZVLLM_DP_RANKZVLLM_DP_RANK_LOCALr"   ZVLLM_DP_MASTER_IPr#   ZVLLM_DP_MASTER_PORTr'   r4   osenvironrM   infor*   r   Zis_cudar+   r)   vllm.executorrr   Zray_is_availableZ	is_neuronZis_tpuZVLLM_XLA_USE_SPMDr   Zray_import_errr&   r3   r   rs   Zray.utilrt   debug)r@   rv   rr   rH   Z	ray_foundZray_is_initializedrt   rA   rA   rB   __post_init__   s   









zParallelConfig.__post_init__c                 C   s   | j dkpt| j to| j jS )Nr   )r4   
isinstancetypeZuses_rayr?   rA   rA   rB   use_rayU  s   
zParallelConfig.use_rayafter)modec                 C   s   ddl m} ddlm} | jdvr&t| jtrt| j|s&td| j d| j	r3ddl
m} |  | s?d| _td	 | jrI| j	sItd
| S )Nr   r   r   )r   r   r   r   Nz*Unrecognized distributed executor backend z^. Supported values are 'ray', 'mp' 'uni', 'external_launcher' or custom ExecutorBase subclass.rq   TzVDisabled the custom all-reduce kernel because it is not supported on current platform.z;Unable to use nsight profiling unless workers run with Ray.)vllm.executor.executor_baser   vllm.platformsr   r4   r|   r}   
issubclassru   r~   ry   rr   Zassert_ray_availableZuse_custom_allreducer0   rM   rz   r1   )r@   r   r   rr   rA   rA   rB   _verify_args[  s6   
zParallelConfig._verify_args)r>   N)<__name__
__module____qualname____doc__r   int__annotations__r   r   r    r!   r"   r   r#   rL   r$   r%   r&   r'   r`   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r   r3   r   r4   r   DistributedExecutorBackendr}   r   r6   r7   r9   r   r;   r<   r=   propertyrC   rD   r
   rS   staticmethodrb   rf   rm   r{   r~   r   r   r   rA   rA   rA   rB   r   #   s   
 %
br   )(ri   dataclassesr   typingr   r   r   r   r   r[   Zpydanticr   Zpydantic.dataclassesr	   rJ   r
   r   Ztyping_extensionsr   Z	vllm.envsrh   Zvllm.config.utilsr   Zvllm.loggerr   r   r   Z
vllm.utilsr   r   Zray.runtime_envr   Zray.util.placement_groupr   r   r   r   rM   r   r   rA   rA   rA   rB   <module>   s2   