o
    ưi1C                     @   s6  d Z ddlmZmZmZmZmZmZmZ ddl	m
Z
 ddlmZ ddlZddlmZ ddlmZmZ ddlmZ dd	lmZ erudd
lmZ ddlmZ ddlmZ ddlmZ  ddl!m"Z# ddl$m%Z& eeef Ze#Z"e&Z%e Z'eZeZneZeZ"eZ%eZ'ee(ef Zee(ef ZG dd deZ)G dd deZ*dS )a  
Batch Rate Limiter Hook

This hook implements rate limiting for batch API requests by:
1. Reading batch input files to count requests and estimate tokens at submission
2. Validating actual usage from output files when batches complete
3. Integrating with the existing parallel request limiter infrastructure

## Integration & Calling
This hook is automatically registered and called by the proxy system.
See BATCH_RATE_LIMITER_INTEGRATION.md for complete integration details.

Quick summary:
- Add to PROXY_HOOKS in litellm/proxy/hooks/__init__.py
- Gets auto-instantiated on proxy startup via _add_proxy_hooks()
- async_pre_call_hook() fires on POST /v1/batches (batch submission)
- async_log_success_event() fires on GET /v1/batches/{id} (batch completion)
    )TYPE_CHECKINGAnyDictListLiteralOptionalUnion)HTTPException)	BaseModelN)verbose_proxy_logger)_get_batch_job_input_file_usage_get_file_content_as_dictionary)CustomLogger)UserAPIKeyAuth)Span)RateLimitDescriptor)RateLimitStatus)$_PROXY_MaxParallelRequestsHandler_v3)InternalUsageCache)Routerc                   @   s"   e Zd ZU dZeed< eed< dS )BatchFileUsagezT
    Internal model for batch file usage tracking, used for batch rate limiting
    total_tokensrequest_countN)__name__
__module____qualname____doc__int__annotations__ r   r   ]/home/app/Keep/.python/lib/python3.10/site-packages/litellm/proxy/hooks/batch_rate_limiter.pyr   ?   s   
 r   c                   @   s   e Zd ZdZdedefddZddded	 d
ede	ddf
ddZ
deded
eddfddZ		d de	ded dee defddZde	dedefddZdededede	deee	edf f
ddZdS )!_PROXY_BatchRateLimiterz
    Rate limiter for batch API requests.
    
    Handles rate limiting at two points:
    1. Batch submission - reads input file and reserves capacity
    2. Batch completion - reads output file and adjusts for actual usage
    internal_usage_cacheparallel_request_limiterc                 C   s   || _ || _dS )a  
        Initialize the batch rate limiter.
        
        Note: These dependencies are automatically injected by ProxyLogging._add_proxy_hooks()
        when this hook is registered in PROXY_HOOKS. See BATCH_RATE_LIMITER_INTEGRATION.md.

        Args:
            internal_usage_cache: Cache for storing rate limit data (auto-injected)
            parallel_request_limiter: Existing rate limiter to integrate with (needs custom injection)
        N)r"   r#   )selfr"   r#   r   r   r    __init__O   s   
z _PROXY_BatchRateLimiter.__init__statusr   descriptorsr   batch_usage
limit_typereturnNc                    s  ddl m } t fddt|D d}|r|| ndddd}|  }| jj}	||	 }
||
d}t	d d	 } d
 }|dkrcd|
dd d|
dd d|j d| d| d| }nd|
dd d|
dd d|j d| d| d| }td|t|	||dd)z,Raise HTTPException for rate limit exceeded.r   )datetimec                 3   s,    | ]\}}| d   dkr|V  qdS )keyZdescriptor_keyN)get).0idr&   r   r    	<genexpr>m   s   
 zB_PROXY_BatchRateLimiter._raise_rate_limit_error.<locals>.<genexpr> N)r,   value
rate_limitz%Y-%m-%d %H:%M:%S UTClimit_remainingcurrent_limitrequestszBatch rate limit exceeded for r,   unknown: r4   z. Batch contains z requests but only z requests remaining out of z RPM limit. Limit resets at: z tokens but only z tokens remaining out of z TPM limit. Limit resets at: i  )zretry-afterrate_limit_typeZreset_at)status_codedetailheaders)r+   next	enumeratenow	timestampr#   window_sizefromtimestampstrftimemaxr-   r   r   r	   str)r$   r&   r'   r(   r)   r+   Zdescriptor_index
descriptorrA   rC   Z
reset_timeZreset_time_formattedZremaining_displayr7   r=   r   r1   r    _raise_rate_limit_errora   sV   
z/_PROXY_BatchRateLimiter._raise_rate_limit_erroruser_api_key_dictdatac                    s^  ddl m} | jj||dddd}| jj||jddI dH }|d D ]&}|d	 }|d
 }	|dkr4|jn|dkr;|jnd}
|
|	krI| |||| q#g }|D ]N}|d }|d }|	d}|du rbqN|	ddur| jj
||dd}||||j| jjd |	ddur| jj
||dd}||||j| jjd qN|r| jj||jdI dH  dS dS )z
        Check rate limits and increment counters by the batch amounts.
        
        Raises HTTPException if any limit would be exceeded.
        r   )RedisPipelineIncrementOperationNF)rJ   rK   Zrpm_limit_typeZtpm_limit_typeZmodel_has_failuresT)r'   parent_otel_spanZ	read_onlyZstatusesr;   r6   r8   tokensr,   r4   r5   Zrequests_per_unit)r,   r4   r;   )r,   Zincrement_valuettlZtokens_per_unit)pipeline_operationsrM   )Zlitellm.types.cachingrL   r#   Z_create_rate_limit_descriptorsZshould_rate_limitrM   r   r   rI   r-   Zcreate_rate_limit_keysappendrC   Z,async_increment_tokens_with_ttl_preservation)r$   rJ   rK   r(   rL   r'   Zrate_limit_responser&   r;   r6   Zrequired_capacityrP   rH   r,   r4   r5   Zrpm_keyZtpm_keyr   r   r    #_check_and_increment_batch_counters   s|   	
		z;_PROXY_BatchRateLimiter._check_and_increment_batch_countersopenaifile_idcustom_llm_provider)rS   ZazureZ	vertex_aic              
      s   z=ddl m} ||}|r|dur| j||dI dH }ntj|||dI dH }t|j}t||d}t|}	t	|j
|	dW S  tyX }
 ztd| d	t|
   d}
~
ww )
a  
        Count number of requests and tokens in a batch input file.
        
        Args:
            file_id: The file ID to read
            custom_llm_provider: The custom LLM provider to use for token encoding
            user_api_key_dict: User authentication information for file access (required for managed files)
            
        Returns:
            BatchFileUsage with total_tokens and request_count
        r   )"_is_base64_encoded_unified_file_idN)rT   rJ   rT   rU   rJ   )Zfile_content_dictionaryrU   )r   r   z$Error counting input file usage for r:   )Z1litellm.proxy.openai_files_endpoints.common_utilsrV   _fetch_managed_file_contentlitellmafile_contentr   contentr   lenr   r   	Exceptionr   errorrG   )r$   rT   rU   rJ   rV   Zis_managed_filefile_contentZfile_content_as_dictZinput_file_usager   er   r   r    count_input_file_usage   sB   z._PROXY_BatchRateLimiter.count_input_file_usagec           	   
      s   ddl m} z
ddlm}m} W n ty' } z
tdt| dd}~ww |du r0td|d}|du r=td	t	||sFtd
|du rNtd|j
||j|dI dH }|S )a  
        Fetch file content from managed files hook.
        
        This is needed for managed files because they require proper user context
        to verify file ownership and access permissions.
        
        Args:
            file_id: The managed file ID (base64 encoded)
            user_api_key_dict: User authentication information
            
        Returns:
            HttpxBinaryResponseContent with the file content
        r   )BaseFileEndpoints)
llm_routerproxy_logging_objz)Cannot import proxy_server dependencies: z7. Managed files require proxy_server to be initialized.NzBproxy_logging_obj not available. Cannot access managed files hook.Zmanaged_filesz9Managed files hook not found. Cannot access managed file.z7Managed files hook is not a BaseFileEndpoints instance.z6llm_router not available. Cannot access managed files.)rT   Zlitellm_parent_otel_spanrc   )Z*litellm.llms.base_llm.files.transformationrb   Zlitellm.proxy.proxy_serverrc   rd   ImportError
ValueErrorrG   Zget_proxy_hook
isinstancerZ   rM   )	r$   rT   rJ   rb   rc   rd   r`   Zmanaged_files_objr_   r   r   r    rX   *  sB   

z3_PROXY_BatchRateLimiter._fetch_managed_file_contentcache	call_typec           	   
      s  |dkrt d|  |S t d zR|d}|s$t d |W S |dd}t d|  | j|||d	I d
H }t d|j d|j  |j|d< |j|d< | j|||dI d
H  t d |W S  tyn     ty } zt j	dt
| dd |W  Y d
}~S d
}~ww )aR  
        Pre-call hook for batch operations.
        
        Only handles batch creation (acreate_batch):
        - Reads input file
        - Counts tokens and requests
        - Reserves rate limit capacity via parallel_request_limiter

        Args:
            user_api_key_dict: User authentication information
            cache: Cache instance (not used directly)
            data: Request data
            call_type: Type of call being made

        Returns:
            Modified data dict or None

        Raises:
            HTTPException: 429 if rate limit would be exceeded
        Zacreate_batchzMBatch rate limiter: Not handling batch creation rate limiting for call type: z9Batch rate limiter: Handling batch creation rate limitinginput_file_idz9No input_file_id in batch request, skipping rate limitingrU   rS   z'Counting tokens from batch input file: rW   Nz!Batch input file usage - Tokens: z, Requests: Z_batch_token_countZ_batch_request_count)rJ   rK   r(   z3Batch rate limit check passed, counters incrementedzError in batch rate limiting: T)exc_info)r   debugr-   ra   r   r   rR   r	   r]   r^   rG   )	r$   rJ   rh   rK   ri   rj   rU   r(   r`   r   r   r    async_pre_call_hookg  s`   




z+_PROXY_BatchRateLimiter.async_pre_call_hook)rS   N)r   r   r   r   r   ParallelRequestLimiterr%   r   r   rG   rI   r   r   rR   r   r   ra   r   rX   r   r]   rm   r   r   r   r    r!   F   sp    

5
]
:
=r!   )+r   typingr   r   r   r   r   r   r   Zfastapir	   Zpydanticr
   rY   Zlitellm._loggingr   Zlitellm.batches.batch_utilsr   r   Z"litellm.integrations.custom_loggerr   Zlitellm.proxy._typesr   Zopentelemetry.tracer   _SpanZ/litellm.proxy.hooks.parallel_request_limiter_v3r   Z_RateLimitDescriptorr   Z_RateLimitStatusr   Z_ParallelRequestLimiterZlitellm.proxy.utilsr   Z_InternalUsageCacheZlitellm.routerr   Z_Routerrn   rG   r   r!   r   r   r   r    <module>   s<    $