o
    ưin                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
mZmZmZmZ d dlZd dlZd dlmZmZmZ d dlmZmZmZ d dlZd dlmZ d dlmZ d dlmZm Z m!Z!m"Z" d d	l#m$Z$ d d
l%m&Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z-m.Z. d dl/m0Z0 d dl1m2Z2m3Z3 d dl4m5Z5 d dl6m7Z7 d dl8m9Z9 d dl:m;Z; d dl<m=Z= e
ege>f Z?e
e-ge>f Z@erd dlAmBZC eCZBneZBd dlDmEZE d dl<mFZFmGZGmHZH dee>eIf deeJ fddZKdee>eIf deLfddZMejNfde	e>df de>d eLd!eJdeeef f
d"d#ZOd$ed%e>d&e>ddfd'd(ZPd)ee' deeeQ eeQ eeQ eeQ f fd*d+ZRG d,d- d-ZSdS ).    N)datetime)TYPE_CHECKINGAnyAsyncGeneratorCallableLiteralOptionalTupleUnion)HTTPExceptionRequeststatus)JSONResponseResponseStreamingResponse)verbose_proxy_logger)uuid)(DD_TRACER_STREAMING_CHUNK_YIELD_RESOURCELITELLM_DETAILED_TIMINGMAX_PAYLOAD_SIZE_FOR_DEBUG_LOGSTREAM_SSE_DATA_PREFIX)tracer)Logging)get_response_headers)
safe_dumps)ProxyExceptionUserAPIKeyAuth)check_response_size_is_safe)get_logging_caching_headers3get_remaining_tokens_and_requests_from_request_data)DDSpanTagger)route_request)ProxyLogging)Router)ServerToolUse)ProxyConfig)add_litellm_data_to_request)ModelResponseModelResponseStreamUsage
event_linereturnc              	      sH  t | tr| dn| } | dr| tdd  }|r"|dkr$dS zot|}t |trd|v rt |d tr|d 	d}d}t |t
rK|}nt |trhzt
|}W n tyg   td|  Y nw |duryd|  krxd	kry|W S  |durtd
|  W dS W dS W dS W dS W dS  tjtjfy   Y dS w dS )zEParses an event line and returns an error code if present, else None.utf-8data: N[DONE]errorcodez0Error code is a string but not a valid integer: d   iW  z+Error has invalid or non-convertible code: )
isinstancebytesdecode
startswithlenstriporjsonloadsdictgetintstr
ValueErrorr   warningJSONDecodeErrorjson)r*   json_strdataZerror_code_rawZ
error_code rD   ^/home/app/Keep/.python/lib/python3.10/site-packages/litellm/proxy/common_request_processing.py_parse_event_data_for_errorA   s`   



 rF   c              	   C   s   t | tr
| dn| } ddddd}| dr\| tdd  }|r(|dkr*|S z#t|}t |trEd	|v rH|d	 }t |trK|W S W |S W |S W |S  tj	t
j	fy[   Y |S w |S )
z
    Extract error dictionary from SSE format chunk.

    Args:
        event_line: SSE format event line, e.g. "data: {"error": {...}}

"

    Returns:
        Error dictionary in OpenAI API format
    r,   zUnknown errorinternal_server_errorN500messagetypeparamr0   r-   r.   r/   )r2   r3   r4   r5   r6   r7   r8   r9   r:   r@   rA   )r*   Zdefault_errorrB   rC   Z	error_objrD   rD   rE   _extract_error_from_sse_chunko   s8   


rM   	generator
media_typeheadersdefault_status_codec              
      s  d |}zkt rI dH  I dH   duroz9t I dH }|durS|}td|  t }z	 I dH  W n	 tyG   Y nw t	|d|i|dW W S W n tyn } ztd|  W Y d}~nd}~ww W nK t
y   dttdf fdd}t| |||d	 Y S  ty } z#td
|  dttdf fdd}	t|	 ||tjd	W  Y d}~S d}~ww dttdf f fdd}
t|
 |||d	S )z
    Create streaming response, checking if the first chunk is an error.
    If the first chunk is an error, return a standard JSON error response.
    Otherwise, return StreamingResponse and stream all content.
    NzVError detected in first stream chunk. Returning JSON error response with status code: r/   )status_codecontentrP   z!Error parsing first chunk value: r+   c                   S  s   d S NrD   rD   rD   rD   rE   	empty_gen   s   z"create_response.<locals>.empty_gen)rO   rP   rR   z,Error consuming first chunk from generator: c                   S  s,   dt ddtjdi dV  dV  d S )Nr-   r/   zError processing stream start)rJ   r0   

zdata: [DONE]

)rA   dumpsr   HTTP_500_INTERNAL_SERVER_ERRORrD   rD   rD   rE   error_gen_message   s    
z*create_response.<locals>.error_gen_messagec               	     s    d urt t  V  W d    n1 sw   Y  2 z3 d H W } t t | V  W d    n1 s8w   Y  q6 d S rT   )r   tracer   )chunkZfirst_chunk_valuerN   rD   rE   combined_generator   s   z+create_response.<locals>.combined_generator)asyncioiscoroutine	__anext__rF   r   debugrM   aclose	Exceptionr   StopAsyncIterationr   r=   r   	exceptionr   rX   )rN   rO   rP   rQ   Zfinal_status_codeZerror_code_from_chunkZ
error_dicterU   rY   r]   rD   r\   rE   create_response   s|   




rg   response_objrequested_modellog_contextc                 C   s4  |sdS t | di pi }t|tr0|di pi }|dd}|dur0|dkr0td|| dS t| trL| d}||krFtd||| || d< dS t| ds\td	|t|  dS t | dd}||krntd
||| z	t	| d| W dS  t
y } ztjd||t| t|dd W Y d}~dS d}~ww )a  
    Force the OpenAI-compatible `model` field in the response to match what the client requested.

    LiteLLM internally prefixes some provider/deployment model identifiers (e.g. `hosted_vllm/...`).
    That internal identifier should not be returned to clients in the OpenAI `model` field.

    Note: This is intentionally verbose. A model mismatch is a useful signal that an internal
    model identifier is being stamped/preserved somewhere in the request/response pipeline.
    We log mismatches as warnings (and then restamp to the client-requested value) so these
    paths stay observable for maintainers/operators without breaking client compatibility.

    Errors are reserved for cases where the proxy cannot read/override the response model field.

    Exception: If a fallback occurred (indicated by x-litellm-attempted-fallbacks header),
    we should preserve the actual model that was used (the fallback model) rather than
    overriding it with the originally requested model.
    N_hidden_paramsadditional_headerszx-litellm-attempted-fallbacksr   zv%s: fallback detected (attempted_fallbacks=%d), preserving actual model used instead of overriding to requested model.modelzj%s: response model mismatch - requested=%r downstream=%r. Overriding response['model'] to requested model.zO%s: cannot override response model; missing `model` attribute. response_type=%szg%s: response model mismatch - requested=%r downstream=%r. Overriding response.model to requested model.zF%s: failed to override response.model=%r on response_type=%s. error=%sT)exc_info)getattrr2   r:   r;   r   ra   hasattrr/   rK   setattrrc   r=   )rh   ri   rj   hidden_paramsZfallback_headersZattempted_fallbacksZdownstream_modelrf   rD   rD   rE   _override_openai_response_model   sl   



rs   litellm_logging_objc                 C   sT   | rt | ds	dS | j}|sdS |d}|d}|d}|d}||||fS )z
    Extract discount and margin information from logging object's cost breakdown.

    Returns:
        Tuple of (original_cost, discount_amount, margin_total_amount, margin_percent)
    cost_breakdown)NNNNoriginal_costdiscount_amountmargin_total_amountmargin_percent)rp   ru   r;   )rt   ru   rv   rw   rx   ry   rD   rD   rE   $_get_cost_breakdown_from_logging_objM  s   	



rz   c                '   @   s  e Zd ZdefddZedddddddddi ddddedee dee d	ee d
ee dee dee dee	e
ef  dee dee dee dee	e
eejf  dee defddZ								dPdedededededed dee dee dee
 dee
 dee d ee d!ee d"ee deeef fd#d$Zedededefd%d&ZdQd'd(Z										)	dRded*ededed+ dededed,ee d"ee d!ee dee dee
 dee
 dee d ee dee d-ee d.ee def&d/d0Z								dPded*ededededed,ed"ee d!ee dee dee
 dee
 dee d ee dee fd1d2Zd3edefd4d5Z 	)dSded-ee defd6d7Z!	dTd8e"dededee fd9d:Z#ed;edefd<d=Z$ed3edededed>e%d?e&de'edf fd@dAZ(ed3edededede'edf f
dBdCZ)ed;edDedefdEdFZ*edGedDedee fdHdIZ+edJedDedee fdKdLZ,dMee dee fdNdOZ-dS )UProxyBaseLLMRequestProcessingrC   c                 C   s
   || _ d S rT   )rC   )selfrC   rD   rD   rE   __init__f  s   
z&ProxyBaseLLMRequestProcessing.__init__N)call_idmodel_id	cache_keyapi_baseversionmodel_regionresponse_costrr   !fastest_response_batch_completionrequest_datatimeoutrt   user_api_key_dictr~   r   r   r   r   r   r   rr   r   r   r   rt   r+   c              
      s  h d |pi }t |d\}}}}| jpd}|}|d ur=zt|tr&t|n|}|dkr0|| }W n ttfy<   Y nw i d|d|d|d|rQ|d	d nd d
|d|dt|d|d urgt|nd d|d urrt|nd d|d ur}t|nd d|d urt|nd dt| jdt| j	dt| j
dt|dt|dd dt|dd dt|dd itrt|dd t|dd t|dd t|dd dni |	d urt|	nd |d urt|nd d d!d" | D }|
rt|
}|| t|
}|r|| z fd#d"| D W S  ty@ } ztd$|  i W  Y d }~S d }~ww )%N>    NoneN)rt   g        r   x-litellm-call-idzx-litellm-model-idzx-litellm-cache-keyzx-litellm-model-api-base?zx-litellm-versionzx-litellm-model-regionzx-litellm-response-costz x-litellm-response-cost-originalz'x-litellm-response-cost-discount-amountz%x-litellm-response-cost-margin-amountz&x-litellm-response-cost-margin-percentzx-litellm-key-tpm-limitzx-litellm-key-rpm-limitzx-litellm-key-max-budgetzx-litellm-key-spendzx-litellm-response-duration-msZ_response_mszx-litellm-overhead-duration-msZlitellm_overhead_time_mszx-litellm-callback-duration-msZcallback_duration_msZtiming_pre_processing_msZtiming_llm_api_msZtiming_post_processing_msZtiming_message_copy_ms)z"x-litellm-timing-pre-processing-mszx-litellm-timing-llm-api-msz#x-litellm-timing-post-processing-msz x-litellm-timing-message-copy-ms)z+x-litellm-fastest_response_batch_completionzx-litellm-timeoutc                 S   s   i | ]	\}}|t |qS rD   r=   ).0kvrD   rD   rE   
<dictcomp>  s    zDProxyBaseLLMRequestProcessing.get_custom_headers.<locals>.<dictcomp>c                    s"   i | ]\}}| vr|t |qS rD   r   )r   keyvalueZexclude_valuesrD   rE   r     s
    zError setting custom headers: )rz   Zspendr2   r=   floatr>   	TypeErrorsplitZ	tpm_limitZ	rpm_limitZ
max_budgetr;   r   itemsr   updater   rc   r   r/   )r   r~   r   r   r   r   r   r   rr   r   r   r   rt   kwargsrv   rw   rx   ry   Zcurrent_spendZupdated_spendZ
cost_valuerP   Zremaining_tokens_headerZlogging_caching_headersrf   rD   r   rE   get_custom_headersi  s   

	





 
2



7;=


z0ProxyBaseLLMRequestProcessing.get_custom_headersrequestgeneral_settingsproxy_logging_objproxy_config
route_type)Dacompletion
aembedding
aresponses
_arealtimeZ_aresponses_websocketaget_responsesadelete_responsesacancel_responsesacompact_responsesZacreate_batchZaretrieve_batchZalist_batchesacancel_batchZafile_contentZafile_retrieveafile_deleteatext_completionZacreate_fine_tuning_jobZacancel_fine_tuning_jobZalist_fine_tuning_jobsZaretrieve_fine_tuning_jobalist_input_itemsaimage_editagenerate_contentagenerate_content_streamallm_passthrough_routeavector_store_searchavector_store_createavector_store_file_createavector_store_file_listavector_store_file_retrieveavector_store_file_contentavector_store_file_updateavector_store_file_deleteaocrasearchavideo_generationavideo_listavideo_statusavideo_contentavideo_remixacreate_containeralist_containersaingestaretrieve_containeradelete_containeracreate_skillalist_skills
aget_skilladelete_skillanthropic_messagesacreate_interactionaget_interactionadelete_interactionacancel_interactionZasend_messageZcall_mcp_toolacreate_evalalist_evals	aget_evalaupdate_evaladelete_evalacancel_evalacreate_run
alist_runsaget_runacancel_runadelete_run
user_modeluser_temperatureuser_request_timeoutuser_max_tokensuser_api_baserm   
llm_routerc                    s  t  }t| j|||||dI d H | _| jdi }|d}d }|d ur.t }|| }|d urZddlm} ||}|| jvrFi | j|< t| j| t	sSi | j|< || j| d< |dd pj|pj|pj| jdd | jd< |	ru|	| jd	< |
r||
| jd
< |r|| jd< |r|| jd< t| jd t
r| jd tjv rtj| jd  | jd< t| jd t
r|jrt|jt	r| jd |jv r|j| jd  | jd< |jdt
t | jd< t| jd tj|| jdd |dddu r$| jdddu r$d| jvrddi| jd< nt| jd t	r$d| jd vr$d| jd d< tjjd|tj |d| j\}| _|| jd< |j|| j|dI d H | _|d uro|d uroddlm} |j|||dI d H }|d uro|| jd< d| jv r| jd r|| jd  | j|fS )N)rC   r   r   r   r   r   proxy_server_requestarrival_timer   )_get_metadata_variable_namequeue_time_secondsZcompletion_modelrm   Ztemperaturerequest_timeoutZ
max_tokensr   r   litellm_call_id)r   ri   Zalways_include_stream_usageFTstreamZstream_optionsZinclude_usage)Zoriginal_functionZ	rules_obj
start_timert   )r   rC   	call_type)prisma_client)r   r   r   Zrouter_settings_overridemessagesrD   )r   nowr&   rC   r;   time$litellm.proxy.litellm_pre_call_utilsr   r2   r:   r=   litellmZmodel_alias_mapaliasesrP   r   uuid4r    Ztag_call_idZtag_requestutilsZfunction_setupZRulesZpre_call_hooklitellm.proxy.proxy_serverr   Z!_get_hierarchical_router_settingsZupdate_messages)r|   r   r   r   r   r   r   r   r   r   r   r   r   rm   r   r   r   r   r   Zprocessing_start_timer   Z_metadata_variable_namelogging_objr   Zrouter_settingsrD   rD   rE    common_processing_pre_call_logic  s   V










	

	

z>ProxyBaseLLMRequestProcessing.common_processing_pre_call_logicc                 C   sH   |  ddpd}|s"| di pi }| di pi }| ddp!d}|S )zFExtract model_id from hidden_params with fallback to litellm_metadata.r   Nr   litellm_metadata
model_infoid)r;   )rr   rC   r   r   r   rD   rD   rE   _get_model_id_from_response  s   z9ProxyBaseLLMRequestProcessing._get_model_id_from_responsec                 C   s   t tjsdS tj| jtd}t|t	kr6t 
dt|t	t| jtr-t| j  dS t| jj dS t 
dtj| jdtd dS )z<Log request payload at DEBUG level, truncating if too large.N)defaultzTRequest received by LiteLLM: payload too large to log (%d bytes, limit %d). Keys: %szRequest received by LiteLLM:
%s   )indentr   )r   isEnabledForloggingDEBUGrA   rW   rC   r=   r6   r   ra   r2   r:   listkeysrK   __name__)r|   Z_payload_strrD   rD   rE   _debug_log_request_payload  s"   

	z8ProxyBaseLLMRequestProcessing._debug_log_request_payloadFfastapi_response)8r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   select_data_generatoris_streaming_requestcontentsc           %         s  t | jdtr| jdnd}|   | j||||||||||||
||	dI dH \| _}g }|t|j	| j||d |rG|| jd< t
| j||	|dI dH }|| tj| }|I dH }|d }t|di pmi }| || j}|d	dp|d
|ddpd
|ddpd
}}}|dd|di pi }} |	dur|	j|d| jd< t|j| jdd
dd | j| j|ds| |r]tjd#||j|||||t|dd
|| j||d| }!|j| j||dI dH }"|"r|!|" |r|| jd< |dkr(| |rt|r|I dH }#n|}#t|#tj|!dS t| |j|!dS |dkrH| |rGtj||| j|d}$t|$d|!dI dH S n|r]|||| jd}$t|$d|!dI dH S |j| j||dI dH }|rwt ||d |j d! t|di pi }|di pi } |j!tjd#||j|||||t|dd
|| j||d|  |j| j||dI dH }"|"r|j!|" t"|d"I dH  |S )$zg
        Common request processing logic for both chat completions and responses API endpoints
        rm   N)r   r   r   r   r   r   r   r   r   r   r   rm   r   r   )rC   r   r   r  )rC   r   r   r      rk   r   r   r   r   r   rl   )r   Z
deploymentr   success)r   r   )rC   r  allowed_model_region)r   r~   r   r   r   r   r   r   r   r   rr   rt   rC   r   responseZ_litellm_client_requested_modelr   rS   rR   rP   r   )r
  r   r   r   ztext/event-stream)rN   rO   rP   )r
  r   r   zlitellm_call_id=)rh   ri   rj   )r
  rD   )#r2   rC   r;   r=   r  r   appendr^   create_taskZduring_call_hookr!   gatherro   r   Zget_deploymentZupdate_request_status_is_streaming_request_is_streaming_responser{   r   r   post_call_response_headers_hookr   r_   r   r   HTTP_200_OKZaiter_bytesrR   async_sse_data_generatorrg   Zpost_call_success_hookrs   rP   r   )%r|   r   r  r   r   r   r   r   r  r   rm   r   r   r   r   r   r   r  r  Zrequested_model_from_clientr   tasksZllm_callZllm_responses	responsesr
  rr   r   r   r   r   r   rl   custom_headerscallback_headersrN   Zselected_data_generatorrD   rD   rE   base_process_llm_request  s^   R










z6ProxyBaseLLMRequestProcessing.base_process_llm_requestc                    s   ddl m} | jdi d|d|d|ddd|d	|d
|d|d|d|	d|
d|d|d|d|d|I d H }t|trG|S | I d H }t||j|j|j	d ddS )Nr   )HttpPassThroughEndpointHelpersr   r  r   r   r   r   r   r   r   r  rm   r   r   r   r   r   r   )rP   r  r  rD   )
Z;litellm.proxy.pass_through_endpoints.pass_through_endpointsr  r  r2   r   areadr   rR   r   rP   )r|   r   r  r   r   r   r   r  r   rm   r   r   r   r   r   r   r  resultrS   rD   rD   rE   $base_passthrough_process_llm_request  s\   
	


zBProxyBaseLLMRequestProcessing.base_passthrough_process_llm_requestr
  c                 C   s<   ddl }ddlm}m} ||rdS t|||frdS dS )z
        Check if the response object is actually a streaming response by inspecting its type.

        This uses standard Python inspection to detect streaming/async iterator objects
        rather than relying on specific wrapper classes.
        r   N)r   AsyncIteratorTF)inspectcollections.abcr   r  
isasyncgenr2   )r|   r
  r  r   r  rD   rD   rE   r  F  s   
z4ProxyBaseLLMRequestProcessing._is_streaming_responsec                 C   s(   |du rdS d|v r|d du rdS dS )z
        Check if the request is a streaming request.

        1. is_streaming_request is a dynamic param passed in
        2. if "stream" in data and data["stream"] is True
        Tr   FrD   )r|   rC   r  rD   rD   rE   r  Z  s
   	z3ProxyBaseLLMRequestProcessing._is_streaming_requestrf   c                    s  t dt|  |j||| jdI dH }|dur|}t|dd}t d|| t|dd}| jdd}| |}	t	j
||rD|jnd|	|d	t|d
d| j||d	}
t|ddp[i }|svt|dd}|durvt|dd}|rvtt|}||
 z|j| j|ddI dH }|r|| W n	 ty   Y nw t|trtt|dt|t|ddt|ddt|dtjt|dd|dt|tjr|j I dH }|d}t|jjd|idt| }t|tott|ddtpt|tott|ddtpt|tott|ddt}|rtd| ddtj|dtt|d|t|ddt|ddt|d dt|dd!t|dd|d")#zGRaises ProxyException (OpenAI API compatible) if an exception is raisedzLlitellm.proxy.proxy_server._handle_llm_api_exception(): Exception occured - r   original_exceptionr   Nlitellm_debug_infor   zo[1;31mAn error occurred: %s %s

 Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`r   rt   r   r  )	r   r~   r   r   r   r   r   r   rt   rP   r
  r	  detailrK   r   rL   rR   provider_specific_fields)rJ   rK   rL   r0   r%  rP   r,   r/   )rR   r$  	__cause____context__r"  zInvalid request format: Zinvalid_request_error)rJ   rK   rL   r0   rP   rJ   r0     )rJ   rK   rL   Zopenai_coder0   r%  rP   )r   re   r=   post_call_failure_hookrC   ro   ra   r;   maybe_get_model_idr{   r   r   r   r:   r   r  rc   r2   r   r   r   ZHTTP_400_BAD_REQUESThttpxZHTTPStatusErrorr
  r  r4   rR   AttributeError)r|   rf   r   r   r   transformed_exceptionr#  r   Z_litellm_logging_objr   r  rP   	_responseZ_response_headersr  Z
error_bodyZ
error_text	error_msgZhas_attribute_errorrD   rD   rE   _handle_llm_api_exceptioni  s   











	





z7ProxyBaseLLMRequestProcessing._handle_llm_api_exceptionr[   c                 C   s$   t | trt| }t | dS | S )z
        Helper function to format streaming chunks for Anthropic API format

        Args:
            chunk: A string or dictionary to be returned in SSE format

        Returns:
            str: A properly formatted SSE chunk string
        rV   )r2   r:   r   r   )r[   Z	chunk_strrD   rD   rE   return_sse_chunk  s   
z.ProxyBaseLLMRequestProcessing.return_sse_chunkserialize_chunkserialize_errorc                C  s  t d zd}|j|| |d2 zs3 dH W }t d| |j||||dI dH }t|ttfr=tj	|d}||7 }n6t
|drdz|jd	d
d}	t|	trY|t|	dd7 }W n tyc   Y nw t|trs|t|dd7 }|dd}
t||
}||V  q6 W dS  ty } z\t dt| |j|||dI dH }|dur|}t d| d t|tr|t }t| d| }tt|d|t|ddt|ddt|ddd}||V  W Y d}~dS d}~ww )z
        Shared streaming data generator: runs proxy iterator hook, per-chunk hook,
        cost injection, then yields chunks via serialize_chunk; on exception runs
        failure hook and yields via serialize_error. Use for SSE or NDJSON.
        zinside generatorr   )r   r
  r   Nz3async_data_generator: received streaming chunk - {})r   r
  rC   
str_so_far)rh   
model_dumprA   T)modeZexclude_nonerS   rm   zIlitellm.proxy.proxy_server.async_data_generator(): Exception occured - {}r!  z[1;31mAn error occurred: zP

 Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`rV   rJ   rK   r   rL   rR   r(  rI   )r   ra   Z'async_post_call_streaming_iterator_hookformatZasync_post_call_streaming_hookr2   r'   r(   r   Zget_response_stringrp   r5  r:   r=   r;   rc   r{   "_process_chunk_with_cost_injectionre   r)  r   	traceback
format_excr   ro   )r
  r   r   r   r2  r3  r4  r[   Zresponse_strd
model_namerf   r-  Zerror_tracebackr/  Zproxy_exceptionrD   rD   rE   async_streaming_data_generator  s   




#





z<ProxyBaseLLMRequestProcessing.async_streaming_data_generatorc                 C  s8   t j| |||t jdd d2 z	3 dH W }|V  q6 dS )z
        Anthropic /messages and Google /generateContent streaming data generator require SSE events.
        Delegates to async_streaming_data_generator with SSE serializers.
        c                 S   s   t  td|  i dS )Nr/   rV   )r   rA   rW   to_dict)Z	proxy_excrD   rD   rE   <lambda>f  s    zHProxyBaseLLMRequestProcessing.async_sse_data_generator.<locals>.<lambda>)r
  r   r   r   r2  r3  N)r{   r=  r1  )r
  r   r   r   r[   rD   rD   rE   r  U  s   z6ProxyBaseLLMRequestProcessing.async_sse_data_generatorr<  c                 C   s  t tdds| S zot| trt| |}|dur|W S W | S t| ttfrVz%| jddd}t	||}|durG||
dr?dnd dW W S W W | S  tyU   Y W | S w t| trrt	| |}|duru|
drm|W S |d W S W | S W | S  ty   Y | S w )	aA  
        Process a streaming chunk and inject cost information if enabled.

        Args:
            chunk: The streaming chunk (dict, str, bytes, or bytearray)
            model_name: Model name for cost calculation

        Returns:
            The processed chunk with cost information injected if applicable
        Zinclude_cost_in_streaming_usageFNr,   ignore)errorsrV   r   )ro   r   r2   r:   r{   _inject_cost_into_usage_dictr3   	bytearrayr4   _inject_cost_into_sse_frame_strendswithencoderc   r=   )r[   r<  maybe_modifiedsZ	maybe_modrD   rD   rE   r8  j  s`   
#
z@ProxyBaseLLMRequestProcessing._process_chunk_with_cost_injection	frame_strc           	      C   s   zJ|  d}t|D ]=\}}| }|drG| ddd  }|rG|dkrGt|}t||}|durGdt| ||< d	|  W S q
W dS  t
yT   Y dS w )aj  
        Inject cost information into an SSE frame string by modifying the JSON in the 'data:' line.

        Args:
            frame_str: SSE frame string that may contain multiple lines
            model_name: Model name for cost calculation

        Returns:
            Modified SSE frame string with cost injected, or None if no modification needed
        
zdata:r  r.   Nr-   )r   	enumerater7   r5   rA   r9   r{   rB  r   joinrc   )	rI  r<  linesidxlnZstripped_lnZ	json_partobjrG  rD   rD   rE   rD    s(   


z=ProxyBaseLLMRequestProcessing._inject_cost_into_sse_frame_strrP  c                 C   s\  |  ddkrt|  dtr| d }t| ddpd}t| ddp%d}t| d|| p3|| }| d}| d	}| d
}| d}	| d}
|||d}|	dur\|	|d< |
durd|
|d< |durot|d|d< |durw||d< |dur||d	< ttdi |d}z	tj||d}W n t	y   d}Y nw |dur|| 
di d< | S dS )aD  
        Inject cost information into a usage dictionary for message_delta events.

        Args:
            obj: Dictionary containing the SSE event data
            model_name: Model name for cost calculation

        Returns:
            Modified dictionary with cost injected, or None if no modification needed
        rK   Zmessage_deltausageZinput_tokensr   Zoutput_tokenstotal_tokenscache_creation_input_tokenscache_read_input_tokensweb_search_requestscompletion_tokens_detailsprompt_tokens_details)prompt_tokenscompletion_tokensrR  N)rU  Zserver_tool_use)rQ  )Zcompletion_responserm   ZcostrD   )r;   r2   r:   r<   r$   r'   r)   r   Zcompletion_costrc   
setdefault)rP  r<  Z_usagerX  rY  rR  rS  rT  rU  rV  rW  Zusage_kwargsZ_mrZcost_valrD   rD   rE   rB    sZ   






z:ProxyBaseLLMRequestProcessing._inject_cost_into_usage_dict_logging_objc                 C   s   d}|rdt |dr1|jr1|jdpi }|dd}|s1|jdp#i }|dp*i }|dd}|sdt|dd}|rd|di }|dpGi }|dd}|sd|dpVi }|dp]i }|dd}|s}| jdi pni }|di pvi }|dd}|S )z
        Get model_id from logging object or request metadata.

        The router sets model_info.id when selecting a deployment. This tries multiple locations
        where the ID might be stored depending on the request lifecycle stage.
        Nlitellm_paramsr   r   metadatar   r   )rp   r\  r;   ro   rC   )r|   r[  r   r   r]  _kwargsr\  r   rD   rD   rE   r*    s0   	z0ProxyBaseLLMRequestProcessing.maybe_get_model_id)NNNNNNNN)r+   N)NNNNNNNNNFN)FrT   ).r   
__module____qualname__r:   r}   staticmethodr   r   r=   r
   r   boolr<   r+  TimeoutLiteLLMLoggingObjr   r   r"   r%   r   r#   r	   r   r   r  r   r   r   r   r  r  r  r  rc   r0  r1  StreamChunkSerializerStreamErrorSerializerr   r=  r  r8  rD  rB  r*  rD   rD   rD   rE   r{   e  s   	
 NMNOPQRST
U
 Z
	V?@ABCDEFGHIJKLM
  9	

7

 	
Q
:#Dr{   )Tr^   rA   r   r   r9  r   typingr   r   r   r   r   r   r	   r
   r+  r8   Zfastapir   r   r   Zfastapi.responsesr   r   r   r   Zlitellm._loggingr   Zlitellm._uuidr   Zlitellm.constantsr   r   r   r   Z%litellm.litellm_core_utils.dd_tracingr   Z*litellm.litellm_core_utils.litellm_loggingr   rd  Z9litellm.litellm_core_utils.llm_response_utils.get_headersr   Z*litellm.litellm_core_utils.safe_json_dumpsr   Zlitellm.proxy._typesr   r   Zlitellm.proxy.auth.auth_utilsr   Z)litellm.proxy.common_utils.callback_utilsr   r   Zlitellm.proxy.dd_span_taggerr    Zlitellm.proxy.route_llm_requestr!   Zlitellm.proxy.utilsr"   Zlitellm.routerr#   Zlitellm.types.utilsr$   r=   re  rf  r   r%   Z_ProxyConfigr   r&   r'   r(   r)   r3   r<   rF   r:   rM   r  rg   rs   r   rz   r{   rD   rD   rD   rE   <module>   s    (.+


c
T
