o
    0 i*<                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	 G dd deZ
zd dlmZ d dlmZmZmZ dZW n eyE   d	ZY nw dd
dZ	ddd	dddeeeeee	eeegef f f   fddZeeZe G dd dZdS )    N)Enum)AnyCallableOptionalUnionc                   @   s,   e Zd ZdZdZdZdZdZdZdZ	dZ
d	S )
RequestStatusz5Status of a generation request through its lifecycle.pendingZ
prefillingZprefilling_splitZsplit_pending_remainderZdecodingfinishedfailedN)__name__
__module____qualname____doc__PENDING
PREFILLINGPREFILLING_SPLITZSPLIT_PENDING_REMAINDERDECODINGFINISHEDZFAILED r   r   f/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/transformers/utils/metrics.pyr      s    r   )metrics)Status
StatusCode
get_tracerTFc                    s   t sdd S  fdd}|S )a  
    Decorator that attaches a tracer to a class.

    This decorator should be applied to classes that need OpenTelemetry tracing.
    It adds a tracer attribute to the class instance that can be used by the traced decorator.

    Args:
        tracer_name_template: Optional template string for the tracer name.
            If provided, it should contain {module} which will be replaced with the class's full module path
            and {class_name} for the class name.
            If None, a default naming scheme will be used where:
              - If the module already starts with "transformers.", it will use that directly
              - Otherwise, it will prepend "transformers." to the module name

    Returns:
        Class decorator function
    c                 S   s   | S Nr   )clsr   r   r   <lambda>0   s    zattach_tracer.<locals>.<lambda>c                    s*    j t fdd}| _  S )Nc                    sr   | g|R i |  j } j}d u r+|dr"| d| }nd| d| }nj||d}t|| _d S )Ntransformers..)module
class_name)r   r   
startswithformatr   tracer)selfargskwargsmodule_namer    Ztracer_name)r   original_inittracer_name_templater   r   init_with_tracer5   s   
z:attach_tracer.<locals>.decorator.<locals>.init_with_tracer)__init__	functoolswraps)r   r*   r)   )r   r(   r   	decorator2   s
   z attach_tracer.<locals>.decorator)_has_opentelemetry)r)   r/   r   r.   r   attach_tracer   s   r1   )	span_name
standaloneadditional_attributesr4   c                   s$    fdd}| du r|S || S )a  
    Decorator to trace function calls with OpenTelemetry.

    Can be used as @traced or @traced(span_name="custom_name")

    Args:
        func: The function to trace
        span_name: Optional custom name for the span (defaults to function name)
        standalone: If True, creates a parentless span
        additional_attributes: Optional list of additional attributes to set on the span.
          Each item is a tuple of (instance_attribute_name, span_attribute_key, value_or_transform_function)
          where:
            - instance_attribute_name: Name of the attribute to get from the class instance
            - span_attribute_key: Key to use when setting the attribute on the span
            - value_or_transform_function: Either a raw value to use directly, or a function to transform
              the attribute value before setting it on the span

    Returns:
        Decorated function with tracing
    c                    s(   t s S t  fdd}|S )Nc                     s,  | rt drjd ur| d nd }|d u}|r!t |dr!|j}ntdj dj }p1j}r7|jn|j}||}|dj |dj |d| | rt	| D ]*\}}	t
|	ttttfsj|	d u rv|d	| t|	 qY|d	| tt|	 qY|r| D ]*\}
}t
|ttttfs|d u r|d
|
 t| q|d
|
 tt| q r|rވ D ]"}|\}}}t ||rt||}t|r||}n|}||| qz| i |}|W W  d    S  ty
 } z|ttj ||  d }~ww 1 sw   Y  d S )N__self__r   r#   r   r   zfunction.namezfunction.modulezfunction.is_methodzargs.zkwargs.)hasattrr5   r#   r   r   r   Z
start_spanZstart_as_current_spanZset_attribute	enumerate
isinstancestrintfloatbooltypeitemsgetattrcallable	ExceptionZ
set_statusr   r   ERRORZrecord_exception)r%   r&   instanceZ	is_methodr#   nameZspan_fnspaniargkeyvalueZattr_configZinstance_attribute_nameZspan_attribute_keyZvalue_or_transform_functionZattribute_valueZtransformed_valueresulte)r4   funcr2   r3   r   r   wrapperl   sT   $





 
z*traced.<locals>.decorator.<locals>.wrapper)r0   r,   r-   )rL   rM   r4   r2   r3   )rL   r   r/   h   s
   0ztraced.<locals>.decoratorNr   )rL   r2   r3   r4   r/   r   rN   r   tracedL   s   7rO   c                   @   s   e Zd ZdZdefddZdd Zedede	d	d
fddZ
eded	d
fddZedddZededed	d
fddZedede	d	d
fddZd
S )ContinuousBatchProcessorMetricsz0Metrics collection for ContinuousBatchProcessor.max_batch_tokensc                 C   s   || _ |   dS )zInitialize metrics for continuous batch processor.

        Args:
            max_batch_tokens: Maximum number of tokens in a batch
        N)rQ   _setup_metrics)r$   rQ   r   r   r   r+      s   z(ContinuousBatchProcessorMetrics.__init__c                 C   s  t s	td dS td| _g d}| jjddd|d| _| jjd	d
dd| _	| jjdddd| _
g d}| jjddd|d| _| jjdddd| _| jjdddd| _| jjdddd| _g d}| jjddd|d| _| jjddd d| _| jjd!d"d d| _dS )#zIInitialize OpenTelemetry metrics and tracing if the library is available.zIOpenTelemetry is not installed. Metrics and tracing will not be recorded.Nz2transformers.generation.continuous_batch_processor)
      2   K   d         i,    i        '  Zttft_millisecondsz#Time to first token in millisecondsms)rD   descriptionunitZ#explicit_bucket_boundaries_advisoryZactive_requests_countz3Number of active requests currently being processedrequests)rD   r`   ra   Zwaiting_requests_countz*Number of requests waiting to be processed)rU   rW      rZ   r[   r\   r]   r^   i N  i0u  i`  Zrequest_latency_millisecondsz9End-to-end latency for completed requests in millisecondsZdecode_prefill_ratioz3Ratio of decode tokens to prefill tokens in a batchratioZprefill_tokens_processedz"Number of prefill tokens processedtokensZdecode_tokens_processedz!Number of decode tokens processed)   rS         (   rU   <   F   P   Z   _   b   rW   Zbatch_fill_percentagez5Percentage of max_batch_tokens utilized in each batchpercentZkv_cache_free_memory_bytesz/Free memory of the PagedAttentionCache in bytesbytesZkv_cache_memory_bytesz0Memory usage of the PagedAttentionCache in bytes)r0   loggerinfor   Z	get_meterZmeterZcreate_histogramttft_histogramZcreate_gaugeactive_requests_gaugewaiting_requests_gaugerequest_latency_histogramdecode_prefill_ratio_gaugeZcreate_counterprefill_tokens_counterdecode_tokens_counterbatch_fill_percentage_histogramkv_cache_free_memory_gaugekv_cache_memory_gauge)r$   Zttft_bucketsZlatency_bucketsZbatch_fill_bucketsr   r   r   rR      sx   
z.ContinuousBatchProcessorMetrics._setup_metricscreated_time
request_idreturnNc              
   C   |   t sdS t | d }z| j| td| d|dd W dS  ty= } ztd|  W Y d}~dS d}~ww )zRecord Time to First Token (TTFT).

        Args:
            created_time: The time the request was created
            request_id: The ID of the request
        N     @@zRecorded TTFT for request : .2fr_   zFailed to record TTFT metric: )r0   timert   recordrr   debugrA   warning)r$   r~   r   Zttft_msrK   r   r   r   record_ttft_metric  s    z2ContinuousBatchProcessorMetrics.record_ttft_metricrequests_in_batchc           	      C   s&  t r|sdS d}d}|D ]}|jtjkr|d7 }q|jtjtjfv r)|t|j7 }q|| }zI|dkr9| j	| |dkrC| j
	| |dkrQ|| }| j| || j d }| j| td| d| d|dd	| d
| j d W dS  ty } ztd|  W Y d}~dS d}~ww )zRecord metrics about the batch composition including decode/prefill ratio and batch fill percentage.

        Args:
            requests_in_batch: List of request states in the current batch
        Nr      g      Y@zBatch metrics: z decode tokens, z prefill tokens, batch fill: r   z% (/)z Failed to record batch metrics: )r0   statusr   r   r   r   lenZ
prompt_idsry   addrz   rx   setrQ   r{   r   rr   r   rA   r   )	r$   r   Zdecode_tokensZprefill_tokensstateZtotal_batch_tokensrd   Zfill_percentagerK   r   r   r   record_batch_metrics  sD   

z4ContinuousBatchProcessorMetrics.record_batch_metricsc           
      C   s   t sdS zT|j|j }||jj }dt|j |j | }| }|j	| }|| }|| }| j
| | j| td|d dd| d|j	 d||j	 d	 d
d	 W dS  tys }	 ztd|	  W Y d}	~	dS d}	~	ww )a&  Record memory usage of the PagedAttentionCache without GPU synchronization.

        This calculates the theoretical memory usage based on cache configuration
        and the number of blocks currently in use.

        Args:
            cache: The PagedAttentionCache object to measure
        N   zKV Cache memory: i   r   zMB, Used blocks: r   z (rW   z.1fz%)z*Failed to record KV cache memory metrics: )r0   Zhead_dimZnum_key_value_headsZdtypeitemsizer   Z	key_cache
block_sizeZget_num_free_blocksZ
num_blocksr}   r   r|   rr   r   rA   r   )
r$   cacheZ	page_sizeZpage_mem_in_bytesZblock_mem_in_bytesZfree_blocksZused_blocksZused_memory_bytesZfree_memory_bytesrK   r   r   r   record_kv_cache_memory_metricsB  s2   


z>ContinuousBatchProcessorMetrics.record_kv_cache_memory_metricsactive_requestswaiting_requestsc              
   C   sv   t sdS z| j| | j| td| d| d W dS  ty: } ztd|  W Y d}~dS d}~ww )zRecord metrics about active and waiting requests.

        Args:
            active_requests: Number of active requests
            waiting_requests: Number of waiting requests
        NzQueue metrics: z active requests, z waiting requestsz Failed to record queue metrics: )r0   ru   r   rv   rr   r   rA   r   )r$   r   r   rK   r   r   r   record_queue_metricsj  s   z4ContinuousBatchProcessorMetrics.record_queue_metricsc              
   C   r   )zRecord metrics about a completed request.

        Args:
            created_time: The time the request was created
            request_id: The ID of the request
        Nr   z Recorded request completion for r   r   r_   z,Failed to record request completion metric: )r0   r   rw   r   rr   r   rA   r   )r$   r~   r   Z
latency_msrK   r   r   r   record_request_completion|  s    z9ContinuousBatchProcessorMetrics.record_request_completion)r   N)r   r   r   r   r:   r+   rR   rO   r;   r9   r   listr   r   r   r   r   r   r   r   rP      s    
Q('rP   r   )r,   loggingr   enumr   typingr   r   r   r   r   Zopentelemetryr   Zopentelemetry.tracer   r   r   r0   ImportErrorr1   r   tupler9   rO   	getLoggerr   rr   rP   r   r   r   r   <module>   s2    
0&

X