o
    ưid                 +   @   s<	  U d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZ d dlZd dlZd dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZmZmZm Z m!Z!m"Z"m#Z# d dl$m%Z& d dl'm%Z( d dl)m%Z* d dl+m,Z, d dl-m%Z. d dl/m%Z0 d dl1m%Z2 d dl3m%Z4 d dl5m%Z6 d dl7m%Z8 d dl9m:Z; d dl9m%Z< d dl=m%Z> d dl?m@Z@ d dlAmBZC d dlAm%ZD d dlAmEZF d dlGm%ZH d dlImJZJ d dlKmLZL d dlMmNZNmOZOmPZPmQZQmRZRmSZSmTZTmUZU d dlVmWZWmXZX d dlYmZZZm[Z[m\Z\m]Z]m^Z^m_Z_m`Z`maZambZbmcZc d dldmeZemfZfmgZgmhZhmiZimjZjmkZkmlZlmmZmmnZnmoZo er5d dlpmqZr neZreseejtjueejvjuhZweseejxjueejyjueejzjueej{juhZ|eseej}jueej~juhZeseejjueejjuhZeseejjueejjuhZeseejjueejjuhZeejjuZeejjuZ	 	 			ddedede	e de	ef de	e de	e
eef  fdd Zd!ed"e	e dedede	e f
d#d$Zd%e	eb defd&d'Z	(	 	 						 	 						)				dd!ededede	e d"e	e d*e	e d+e	e d,e	e d-e	e de	ef de	e d.e	e d/e	eb d0e	eW d1eZd2ed3e	e d4e	e d5e	e de
eef f(d6d7Z%dd8efd9d:Zd;edefd<d=Z	dd!e	e d"e	e de	e fd>d?Z				dd!e	e d8e	e d@e	e dAe	e d"e	e dBe	e de	e fdCdDZeedEd!edefdFdGZd8ede	e fdHdIZdJdKdKddLZeedM< dNe	e de	e fdOdPZd8ede	eb fdQdRZdSdT Zd1e	eZ d8ede	eZ fdUdVZdWed"e	e de
eeef fdXdYZdWed"e	e de
eeeef fdZd[Z							dd\e	er d]ed^ed_ed`edae	e dbe	e dce	e dde	e dee	e dfe	e dge	e ddfdhdiZddd(g d(dddddddddddddddddfd!e	e djedke	e d1e	eZ dle	e dme	e dne	e de	ef de	e doe	e dAe	e d@e	e dpe	e_ dqe	e dBe	e d\e	er d3e	e def$drdsZdteeef de	e fdudvZ				(					ddweeiegehemeleNeXeUe[ePee,f d!ed"e	e d1edx doedye	e d@e	e dAe	e dzedpe	e_ dqe	e dBe	e d\e	er d3e	e defd{d|Z	dd!ed"e	e d4e	e de
eef fd}d~Zd!e	e d"ed4ecde
eef fddZ	dd!ed"e	e de	eW de
eef fddZd!ed"e	e dede
eef fddZ					dd!ed"e	e dme	e dne	e dle	e doe	e defddZ		dd!eded"e	e de	e^ def
ddZ		ddebd!ed"e	e de	e^ de
eef f
ddZG dd dZG dd deZdeQdebd"edqedef
ddZdS )    N)	lru_cache)TYPE_CHECKINGAnyListLiteralOptionalTupleUnioncast)Response)	BaseModel)verbose_logger)DEFAULT_MAX_LRU_CACHE_SIZE&DEFAULT_REPLICATE_GPU_PRICE_PER_SECOND)StandardBuiltInToolCostTracking)&TranscriptionUsageObjectTransformation)CostCalculatorUtils_generic_cost_per_character_get_service_tier_cost_key_parse_prompt_tokens_detailscalculate_cost_componentgeneric_cost_per_tokenget_billable_input_tokensselect_cost_metric_for_modelcost_per_token)SearchResponse)cost_per_second)get_model_params_and_category)cost_per_character)cost_router)ResponseAPILoggingUtils)LiteLLMSendMessageResponse)HttpxBinaryResponseContentImageGenerationRequestQualityOpenAIModerationResponseOpenAIRealtimeStreamList&OpenAIRealtimeStreamResponseBaseObject!OpenAIRealtimeStreamSessionEventsResponseAPIUsageResponsesAPIResponse)RerankBilledUnitsRerankResponse)
CallTypesLiteral"LiteLLMRealtimeStreamLoggingObjectLlmProvidersLlmProvidersSet	ModelInfoStandardBuiltInToolsParams TranscriptionUsageDurationObjectTranscriptionUsageTokensObjectUsageVectorStoreSearchResponse)	CallTypesCostPerTokenEmbeddingResponseImageResponseModelResponseModelResponseStreamProviderConfigManagerTextCompletionResponseTranscriptionResponse_cached_get_model_info_helpertoken_counter)Logging        prompt_tokenscompletion_tokensresponse_time_mscustom_cost_per_tokencustom_cost_per_secondreturnc                 C   s\   |du r
|du r
dS |dur|d |  }|d | }||fS |dur,|| d }d|fS dS )zFInternal helper function for calculating cost, if custom pricing givenNinput_cost_per_tokenoutput_cost_per_token  r    )rD   rE   rF   rG   rH   
input_costoutput_costrM   rM   N/home/app/Keep/.python/lib/python3.10/site-packages/litellm/cost_calculator.py%_cost_per_token_custom_pricing_helper   s   	rQ   modelcustom_llm_providerc              
   C   s   |sdS z'd}|dkrddl m} || }|r&t|dr)|j| ||dW S W dS W dS  tyF } ztd|  W Y d}~dS d}~ww )a  
    Calculate additional costs beyond standard token costs.

    This function delegates to provider-specific config classes to calculate
    any additional costs like routing fees, infrastructure costs, etc.

    Args:
        model: The model name
        custom_llm_provider: The provider name (optional)
        prompt_tokens: Number of prompt tokens
        completion_tokens: Number of completion tokens

    Returns:
        Optional dictionary with cost names and amounts, or None if no additional costs
    Nazure_air   )AzureFoundryModelInfocalculate_additional_costs)rR   rD   rE   z$Error calculating additional costs: )Z"litellm.llms.azure_ai.common_utilsrU   Zget_azure_ai_config_for_modelhasattrrV   	Exceptionr   debug)rR   rS   rD   rE   Zconfig_classrU   erM   rM   rP   _get_additional_costs   s,   
		r[   usage_blockc                 C   s   | d u rdS t | ddpd}t | ddpd}t | dd }|d ur:t |ddp'd}t |ddp/d}|dks8|dkr:dS |dkpA|dkS )	NFrD   r   rE   prompt_tokens_detailsZaudio_tokensZtext_tokensT)getattr)r\   Zprompt_tokens_valZcompletion_tokens_valZprompt_detailsZaudio_token_countZtext_token_countrM   rM   rP   &_transcription_usage_has_token_details   s   r_    
completionprompt_characterscompletion_characterscache_creation_input_tokenscache_read_input_tokensnumber_of_queriesusage_objectrerank_billed_units	call_type!audio_transcription_file_durationservice_tierresponserequest_modelc           (   	   C   sr  | du rt d|dur|}nt|||| ||	d}t|||||
d}|dur/|d |d fS d}d}tj}| }|durW|d |  }|durV| d| d|  }||v rV|}n
tj| d\}}}}| }| dd}t|dkrt|d }n| }	 ||v r~|} n| |v r| } n||v r|} |d	ks|d
krtj||d}t	|}d} d}!|dkr|du rt
d|| ||t|||dddd\}"}#|"du s|#du rt
d|"|#|||||"} |#}!| |!fS |dkrt||||d\} }!| |!fS |dks|dkrt| ||dS |dks|dkrt| |tt|dS |dks|dkr"t| ||dS |dks8|dks8|tjks8|tjkr?t|| |dS |dksI|dkr\t|rUt|||d S t|||d!S |d"ksf|d#krdd$lm}$ |$| ||psd|rt|d%r|jd&S dd&S |d'krt|||d(}%|%dkrt|||||d)S |%dkrt ||||d*S dS |d+krt!| |d,S |d-krt"| ||d S |d.krt| ||d S |d/krt#| |d,S |d0krt$| |d,S |d1krt%| ||d2S |d3krt&| ||d S |d4kr
t'| |d,S |d5krt(| |d,S |d6kr t)| |d,S |d7kr+t*| |d,S |d8kr<dd9l+m,}& |&| |d,S |d:krIt-| |||d;S t.| |d}'|'/d<ddksa|'/d=ddkrit| |||dS |'/d>ddur|durt01d?| |'/d>d| |'d> | d@ }|'/dAddur|durt01dB| |'/dAd| |'dA | d@ }t01dC| || ||fS )Dar  
    Calculates the cost per token for a given model, prompt tokens, and completion tokens.

    Parameters:
        model (str): The name of the model to use. Default is ""
        prompt_tokens (int): The number of tokens in the prompt.
        completion_tokens (int): The number of tokens in the completion.
        response_time (float): The amount of time, in milliseconds, it took the call to complete.
        prompt_characters (float): The number of characters in the prompt. Used for vertex ai cost calculation.
        completion_characters (float): The number of characters in the completion response. Used for vertex ai cost calculation.
        custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
        custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
        custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
        call_type: Optional[str]: the call type

    Returns:
        tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
    Nz"Invalid arg. Model cannot be none.)rD   rE   total_tokensrd   re   )rD   rE   rF   rH   rG   r      /rR   speechaspeechrR   rS   rC   r   zvprompt_characters must be provided for tts calls. prompt_characters={}, model={}, custom_llm_provider={}, call_type={})rR   rS   rb   rc   Zcustom_prompt_costZcustom_completion_costzcost for tts call is None. prompt_cost={}, completion_cost={}, model={}, custom_llm_provider={}, prompt_characters={}, completion_characters={}r   )rR   usagerS   rk   arerankrerank)rR   rS   billed_unitsZavector_store_searchZvector_store_search)rR   rS   rl   ZocrZaocraretrieve_batchretrieve_batch)ru   rR   rS   atranscriptiontranscription)rR   ru   rk   rR   rS   durationsearchasearchsearch_provider_cost_per_query_hidden_paramsrR   rS   rf   optional_params	vertex_ai)rR   rS   ri   )rR   rS   rb   rc   ru   )rR   rS   ru   rk   Z	anthropic)rR   ru   ZbedrockopenaiZ
databricksZfireworks_aiazure)rR   ru   rF   geminiZdeepseekZ
perplexityZxaiZlemonadeZ	dashscoper   rT   )rR   ru   rF   rm   rJ   rK   Zinput_cost_per_secondz;For model=%s - input_cost_per_second: %s; response time: %srL   output_cost_per_secondz<For model=%s - output_cost_per_second: %s; response time: %szlReturned custom cost for model=%s - prompt_tokens_cost_usd_dollar: %s, completion_tokens_cost_usd_dollar: %s)2rX   r5   rQ   litellm
model_costget_llm_providersplitlenget_model_infor   
ValueErrorformatr   r   rerank_costvector_store_search_costr
   r6   ocr_costr7   ry   rz   batch_cost_calculatorr_   openai_cost_per_tokenopenai_cost_per_secondlitellm.searchr   rW   r   google_cost_routergoogle_cost_per_charactergoogle_cost_per_tokenanthropic_cost_per_tokenbedrock_cost_per_tokendatabricks_cost_per_tokenfireworks_ai_cost_per_tokenazure_openai_cost_per_tokengemini_cost_per_tokendeepseek_cost_per_tokenperplexity_cost_per_tokenxai_cost_per_tokenlemonade_cost_per_tokenZ&litellm.llms.dashscope.cost_calculatorr   azure_ai_cost_per_tokenr@   getr   rY   )(rR   rD   rE   rF   rS   region_namerb   rc   rd   re   rG   rH   rf   rg   rh   ri   rj   rk   rl   rm   r\   response_costprompt_tokens_cost_usd_dollar!completion_tokens_cost_usd_dollarZmodel_cost_refZmodel_with_providerZmodel_with_provider_and_region_Zmodel_without_prefixZmodel_partsZspeech_model_infoZcost_metricprompt_costcompletion_costZ_prompt_costZ_completion_costr   r    Zdashscope_cost_per_token
model_inforM   rM   rP   r      s  2	


	
























r   completion_responsec                 C   s@   t }|dkr| dt }t| dt }|| }|| d S )NrC   createdZendedrL   )r   r   timer^   )r   
total_timeZ!a100_80gb_price_per_second_public
start_timeend_timerM   rM   rP    get_replicate_completion_pricingF  s   r   objc                 C   s
   t | dS )Nr   )rW   )r   rM   rM   rP   has_hidden_paramsR  s   
r   c              
   C   sn   |d ur|S | d u rd S zt j| d\}}}}W |S  ty6 } ztdt|  W Y d }~d S d }~ww )Nrq   zblitellm.cost_calculator.py::_get_provider_for_cost_calc() - Error inferring custom_llm_provider - )r   r   rX   r   rY   str)rR   rS   r   rZ   rM   rM   rP   _get_provider_for_cost_calcV  s   r   
base_modelcustom_pricingrouter_model_idc           
      C   sb  d}d}t | |d}d}|dur't|trt|dd}nt|tr'|dd}t|dd}	|du r@|dur=|tjv r=|}n;| }n8|durG|}n1|du rf|	durf|	ddduret|	d dkre|	d| }n|	durx|	dddurx|	dd}|du r|dur|}|du r| dur| }|dur|durt	|s|dur| d| d| }|S | d| }|S )	z
    1. If custom pricing is true, return received model name
    2. If base_model is set (e.g. for azure models), return that
    3. If completion response has model set return that
    4. Check if model is passed in return that
    Nrt   rR   r   Tr   r   rp   )
r   
isinstancer   r^   dictr   r   r   r   "_model_contains_known_llm_provider)
rR   r   r   r   rS   r   Zreturn_modelr   Zcompletion_response_modelhidden_paramsrM   rM   rP    _select_model_name_for_cost_calci  sL   

r   )maxsizec                 C   s   |  dd }|tv S )z:
    Check if the model contains a known llm provider
    rp   r   )r   r0   )rR   Z_provider_prefixrM   rM   rP   r     s   r   c                 C   s<   | du rdS t | trt| ddS t | tr| ddS dS )z
    Extract the model name from a completion response object.

    Used as a fallback for cost calculation when the input model name
    doesn't exist in model_cost (e.g., Azure Model Router).
    NrR   )r   r   r^   r   r   r   rM   rM   rP   _get_response_model  s   

r   priorityZflex)ZON_DEMAND_PRIORITYZFLEXZBATCHZ	ON_DEMAND$_GEMINI_TRAFFIC_TYPE_TO_SERVICE_TIERtraffic_typec                 C   s   | du rdS t |  }|S )a  
    Map a Gemini usageMetadata.trafficType value to a LiteLLM service_tier string.

    This allows the same `_priority` / `_flex` cost-key suffix logic used for
    OpenAI/Azure to work for Gemini and Vertex AI models.

    trafficType values seen in practice
    ------------------------------------
    ON_DEMAND          -> standard pricing  (service_tier = None)
    ON_DEMAND_PRIORITY -> priority pricing  (service_tier = "priority")
    FLEX / BATCH       -> batch/flex pricing (service_tier = "flex")
    N)r   r   upper)r   rk   rM   rM   rP   !_map_traffic_type_to_service_tier  s   r   c                 C   s   t tttttf t| tr| dn	t| ddd d}|d u r#d S t|tr*|S |d urBt|ts8t|trBt	
|rBt	|S t|rStt tttf |S t|tr_tdi |S t|trmtdi | S tdt| d|  d S )Nru   r   c                 S   s   d S NrM   )xrM   rM   rP   <lambda>  s    z#_get_usage_object.<locals>.<lambda>zUnknown usage object type: z, usage_obj: rM   )r
   r	   r5   r)   r   r   r   r   r^   r!   _is_response_api_usage+_transform_response_api_usage_to_chat_usager   is_transcription_usage_object$transform_transcription_usage_objectr3   r4   
model_dumpr   rY   type)r   	usage_objrM   rM   rP   _get_usage_object  sP   	



r   c                 C   s    t | tjpt | tpt| S )z3Returns True if the usage obj is a known Usage type)r   r   r5   r)   r   r   r   rM   rM   rP   _is_known_usage_objects  s   r   c                 C   s   | d ur| S |d u rd S t |tst |trdS t |trdS t |tr&dS t |tr-dS t |tr4dS t |tr;dS t |trBdS t |t	rIdS | S )	Nra   	embeddingr|   rr   rw   image_generationtext_completionsend_message)
r   r;   r<   r9   r?   r#   r,   r:   r>   r"   )ri   r   rM   rM   rP   _infer_call_type!  s.   






r   	base_costc                 C   s   | }d}d}|r?|t jv r?t j| }|| }|| }ttjr:td|d  d| d|dd|dd|dd	 |||fS | ||fS )
a  
    Apply provider-specific cost discount from module-level config.

    Args:
        base_cost: The base cost before discount
        custom_llm_provider: The LLM provider name

    Returns:
        Tuple of (final_cost, discount_percent, discount_amount)
    rC   zApplied d   z% discount to : $.6f -> $z	 (saved $))r   cost_discount_configr   isEnabledForloggingDEBUGrY   )r   rS   original_costdiscount_percentdiscount_amount
final_costrM   rM   rP   _apply_cost_discount@  s&   


r   c                 C   s  | }d}d}d}d}|r(|t jv r(t j| }ttjr'td| d|  n/dt jv rAt jd }ttjr@td|  nttjrWtd| dtt j   |durt	|t
tfrkt|}|| }n#t	|trd	|v rt|d	 }||| 7 }d
|v rt|d
 }||7 }|| }ttjrtd|pd d|dd|dd|dkr|d nd d|dd|dd ||||fS | |||fS )aC  
    Apply provider-specific or global cost margin from module-level config.

    Args:
        base_cost: The base cost before margin (after discount if applicable)
        custom_llm_provider: The LLM provider name

    Returns:
        Tuple of (final_cost, margin_percent, margin_fixed_amount, margin_total_amount)
    rC   Nz*Found provider-specific margin config for z: globalzUsing global margin config: z"No margin config found. Provider: z, Available configs: 
percentageZfixed_amountzApplied margin to r   r   r   z
 (margin: r   r   z% + $z = $r   )r   cost_margin_configr   r   r   r   rY   listkeysr   intfloatr   )r   rS   r   margin_percentmargin_fixed_amountmargin_total_amountZmargin_configr   rM   rM   rP   _apply_cost_marginb  sf   




r   litellm_logging_objr   r   'cost_for_built_in_tools_cost_usd_dollartotal_cost_usd_dollaradditional_costsr   r   r   r   r   r   c                 C   sn   | du rdS z| j |||||||||	|
|d W dS  ty6 } ztdt|  W Y d}~dS d}~ww )ar  
    Helper function to store cost breakdown in the logging object.

    Args:
        litellm_logging_obj: The logging object to store breakdown in
        prompt_tokens_cost_usd_dollar: Cost of input tokens
        completion_tokens_cost_usd_dollar: Cost of completion tokens (includes reasoning if applicable)
        cost_for_built_in_tools_cost_usd_dollar: Cost of built-in tools
        total_cost_usd_dollar: Total cost of request
        additional_costs: Free-form additional costs dict (e.g., {"azure_model_router_flat_cost": 0.00014})
        original_cost: Cost before discount
        discount_percent: Discount percentage applied (0.05 = 5%)
        discount_amount: Discount amount in USD
        margin_percent: Margin percentage applied (0.10 = 10%)
        margin_fixed_amount: Fixed margin amount in USD
        margin_total_amount: Total margin added in USD
    N)rN   rO   
total_costr   r   r   r   r   r   r   r   zError storing cost breakdown: )Zset_cost_breakdownrX   r   rY   r   )r   r   r   r   r   r   r   r   r   r   r   r   Zbreakdown_errorrM   rM   rP   $_store_cost_breakdown_in_logging_obj  s*   r   messagesr   sizequalitynr   standard_built_in_tools_paramslitellm_model_namec           J      C   sV
  zt || pd}|dks|dkr&|dur&t|tr&t|dkr&|dkr&d}d}d}d}d}d}d}d}t| d	}d}|du rH|durH|d
}|du rf| durft| tr\t| d
d}n
t| trf| d
}|du r|durt|trzt|d
d}n
t|tr|d
}t	|| ||||d}|t
| g} |dur| | t| D ]r\}!}zDttjrtd|  | durt| tst| trt| tr| di }"nt| di }"t|"trt|"dst| dtjdWi |"  |"du ri }#nt|"tr|" }#n|"}#t|#rt|# }#nt|#r.ttttt f |#}$|$dur-|$ }#n|#}#|#dd}|#dd}|#dd}|#dd}d|#v re|#d i kre|#d re|#di }%|%dd}t| dd}t| dd}&|&dur|&d|p}d}|&d|}|du r|&dpi }'|'d}(|(rt!|(}n0|du rt"d|  d| t|dkrt#||d}nt|dkrt#||d}t#||d}|t$v rddl%m&}) |)j'|d W   W S |du rt"d|  d| |du r"ztj(|d!\}}}*}*W n t)y! }+ ztd"*t|+ W Y d}+~+nd}+~+ww t+,|r?t| t-r?t+j.||| |
||	||d#W   W S |t/v rd},|rh|durht|d$d}-|-durh|-d%i pai }.|.d&d},t| dd}"| dur|"rt|"tr|"d'd}/nt|"d'd}/|/durdd(l0m1}0 |0||/||,d)W   W S t2|d||,d)W   W S |t3v rtj4j5|d*}n|t6v rt| di pi }1|1d+t| d,d}n|t7v r| durt| t8r| j9}2|2dur|2d-i pi }3ni }3t:|3d.|3d/d0}|3d.p
d1}4|4}n|t;v rdd2l<m=}5 d1}6|dur7|d3}7t|7t>r0t|7}6n|7dur7d1}6|p;d4}8|rId5|8vrI| d6}8|5|8||6|d7\}9}:|9|: };|;}<t?|;|d8\};}=}>t@|;|d8\};}?}@}AtA||9|:d|;|<|=|>|?|@|Ad9 |;W   W S |tBkrt| tCr|du s|du rt"d:*||tD| jE|||d;W   W S |tFkrdd<lGmH}B |BjI|d W   W S d=|v sd>|v s|d>krtJ|tK|d?}n|tjLv sd@|v r|tjMvrtN| |W   W S |du rt"d|  d| |dur<|dAkr<t|dkr#tj4jOdB|iddC}Ctj4j5|Cd*}| dur<t| tPr<tj4jQ| dD}Dtj4j5|Dd*}d}E|durF|jR}EtSdWi dE|d|d|d|dF|d|dG|dH|dI|dJ|d|d|dK|dL|dM|dN|d
|dO| dP|E\}F}G|dQkrtT||||dR}Hnd}H|F|G };tUjV|| |||dS}I|;|I7 };|;}<tjWrt?|;|d8\};}=}>nd}=d}>tjXrt@|;|d8\};}?}@}And}?d}@d}A|durtA||F|G|I|;|<|H|=|>|?|@|AdT |;W   W S  t)y }+ ztdU*|t|+ |!t| d1 kr
|+W Y d}+~+qd}+~+ww t)dV*|  t)y* }+ z|+d}+~+ww )Xa  
    Calculate the cost of a given completion call fot GPT-3.5-turbo, llama2, any litellm supported llm.

    Parameters:
        completion_response (litellm.ModelResponses): [Required] The response received from a LiteLLM completion request.

        [OPTIONAL PARAMS]
        model (str): Optional. The name of the language model used in the completion calls
        prompt (str): Optional. The input prompt passed to the llm
        completion (str): Optional. The output completion text from the llm
        total_time (float, int): Optional. (Only used for Replicate LLMs) The total time used for the request in seconds
        custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
        custom_cost_per_second: Optional[float]: the cost per second for the llm api call.

    Returns:
        float: The cost in USD dollars for the completion based on the provided parameters.

    Exceptions:
        Raises exception if model not in the litellm model cost map. Register model, via custom pricing or PR - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json


    Note:
        - If completion_response is provided, the function extracts token information and the model name from it.
        - If completion_response is not provided, the function calculates token counts based on the model and input text.
        - The cost is calculated based on the model, prompt tokens, and completion tokens.
        - For certain models containing "togethercomputer" in the name, prices are based on the model size.
        - For un-mapped Replicate models, the cost is calculated based on the total time used for the request.
    ra   aimage_generationr   Nr   r   zdall-e-2rC   r   rk   )rR   r   rS   r   r   r   z*selected model name for cost calculation: ru   r   rD   rE   rd   re   r]   Zcached_tokensZ_response_msr   rS   r   Zprovider_specific_fieldsr   z[Model is None and does not exist in passed completion_response. Passed completion_response=z, model=)rR   r   )rR   text)A2ACostCalculator)r   rq   zXlitellm.cost_calculator.py::completion_cost() - Error inferring custom_llm_provider - {})rR   rS   r   r   r  r   r   ri   Zlitellm_paramsmetadatar   duration_seconds)video_generation_cost)rR   r  rS   r   )r  Zaudio_transcription_durationr~   rx   search_unitsrn   )r
  rn   ro   r   queryr`   rp   z/searchr   )r   rS   )r   r   r   r   r   r   r   r   r   r   r   zusage object and custom_llm_provider must be provided for realtime stream cost calculation. Got cost_per_token_usage_object={}, custom_llm_provider={})resultscombined_usage_objectrS   r  )MCPCostCalculatorZtogethercomputerZtogether_ai)ri   Z	replicater   r   )datari   )Zresponse_objrR   rF   rH   rG   rb   rc   rg   ri   rj   rh   rl   rm   rT   )rR   rS   rD   rE   )rR   response_objectru   r  rS   )r   r   r   r   r   r   r   r   r   r   r   r   zXlitellm.cost_calculator.py::completion_cost() - Error calculating cost for model={} - {}z?Unable to calculat cost for received potential model names - {}rM   )Yr   r   r   r   r   r   r   r^   r   r   r   append	enumerater   r   r   r   rY   r   setattrr   r5   r   r!   r   r   r   r   r   r
   r	   r3   r4   r   r   rA   _A2A_CALL_TYPESZ$litellm.a2a_protocol.cost_calculatorr  Zcalculate_a2a_costr   rX   r   r   Z_call_type_has_image_responser:   Z&route_image_generation_cost_calculator_VIDEO_CALL_TYPES$litellm.llms.openai.cost_calculationr	  default_video_cost_calculator_SPEECH_CALL_TYPESutilsZ_count_characters_TRANSCRIPTION_CALL_TYPES_RERANK_CALL_TYPESr,   metar+   _SEARCH_CALL_TYPESr   r   r   r   r   r   _AREALTIME_CALL_TYPEr.   'handle_realtime_stream_cost_calculationr  _MCP_CALL_TYPEZ6litellm.proxy._experimental.mcp_server.cost_calculatorr  Zcalculate_mcp_tool_call_costr   r7   Zreplicate_modelsr   r   Zget_formatted_promptr;   Zget_response_stringrR   r   r[   r   Zget_cost_for_built_in_toolsr   r   )Jr   rR   promptr   ra   r   ri   rS   r   r   r   r  rG   rH   r   r   r   r  r  r   r   rk   rD   rb   rE   rc   rd   re   rj   Zcost_per_token_usage_objectrh   Zselected_modelpotential_model_namesidxr   Z_usageZtr_usager]   r   Zprovider_specificZraw_traffic_typer  r   rZ   Z_video_model_infoZ_litellm_params	_metadatar  r	  Z_hiddenZmeta_objrx   r
  r   rf   r  Zsearch_modelr   Zcompletion_cost_resultZ_final_costr   r   r   r   r   r   r  Zprompt_stringZcompletion_stringZrequest_model_for_costr   r   r   Zcost_for_built_in_toolsrM   rM   rP   r     s  8













	
























	









	

	
r   r   c                 C   sT   t | tr
|  }n| }|di }|r(d|v r(|d }|d u r"d S t|d S d S )Nadditional_headersz$llm_provider-x-litellm-response-cost)r   r   r   r   r   )r   Z_hidden_params_dictr%  r   rM   rM   rP   $get_response_cost_from_hidden_params-  s   

r&  r  )r   Z
aembeddingra   ZacompletionZatext_completionr   r   r  Z
moderationZamoderationr{   r|   rs   rr   rw   rv   r   r   	cache_hitr!  c                 C   s   z>d}|dur|du rd}|W S t | tr+t| dr+|| jd< t| j}|dur+|W S t| ||||||||	|
|||d}|W S  tyL } z|d}~ww )z7
    Returns
    - float or None: cost of response
    rC   NTr   r   )r   rR   ri   rS   r   r   r   r!  r  r  r   r   rk   )r   r   rW   r   r&  r   rX   )r  rR   rS   ri   r   r'  r   r   r!  r  r  r   r   rk   r   Zprovider_response_costrZ   rM   rM   rP   response_cost_calculatorA  sB   5


r(  c                 C   s   ddl m} |du st||stdt| |jdu r!td|jj}|du r-tdz	tj| |d}W n t	yA   d}Y nw d}|durO|
d	pNd}|| }|dfS )
aJ  
    Args:
        model: str - model name
        custom_llm_provider: Optional[str] - custom LLM provider
        response: Optional[Any] - response object

    Returns:
        Tuple[float, float]: cost of OCR processing

        (Parent function requires a tuple, so we return a tuple. Cost is only in the first element.)
    r   )OCRResponseNz.response must be of type OCRResponse got type=zOCR response usage_info is Nonez$OCR response pages_processed is Nonert   rC   ocr_cost_per_page)Z(litellm.llms.base_llm.ocr.transformationr)  r   r   r   Z
usage_infopages_processedr   r   rX   r   )rR   rS   rl   r)  r+  r   r*  Ztotal_ocr_processing_costrM   rM   rP   r     s,   

r   c                 C   sn   d}|du rd}| durd| v rt j| d\}}}}tjt||d}|du r1td|  dS |j|dS )	zB
    Returns
    - float or None: cost of vector store search
    Nr   rp   rq   )providerapi_typez)Vector store search is not supported for rC   rC   )rl   )r   r   r=   Z!get_provider_vector_stores_configr/   r   rY   Zcalculate_vector_store_cost)rR   rS   rl   r-  r   configrM   rM   rP   r     s&   	r   rx   c              
   C   s   t j| |d\}}}}z*tj| dg t|d}z	t j| |d}W n ty+   d}Y nw |j| |||dW S  tyC } z|d}~ww )zI
    Returns
    - float or None: cost of response OR none if error.
    rt   N)rR   Zapi_baseZpresent_version_paramsr,  )rR   rS   rx   r   )r   r   r=   Zget_provider_rerank_configr/   r   rX   Zcalculate_rerank_cost)rR   rS   rx   r   r/  r   rZ   rM   rM   rP   r     s6   	
r   r~   c                 C   s   t | ||dS )Nr}   )r   r}   rM   rM   rP   transcription_cost  s   r0  ro   1024-x-1024c                 C   s  |pd}d|v rd|vr| ddn|}tt|d\}}| d|  }	d}
|rD| | drD|  | dd}
| d| d|
 }	|rM| d|	 n|	}tjj d|	 }t	d| d|	  | d| dd	  }|rx| d| n|}d}||	|||| |
g}|D ]}|dur|t
jv rt
j| } nq|du rtd
| d|v r|d dur|d | S d|v r|d dur|d | | | S td|  d| )a  
    Default image cost calculator for image generation

    Args:
        model (str): Model name
        image_response (ImageResponse): Response from image generation
        quality (Optional[str]): Image quality setting
        n (Optional[int]): Number of images generated
        size (Optional[str]): Image size (e.g. "1024x1024" or "1024-x-1024")

    Returns:
        float: Cost in USD for the image generation

    Raises:
        Exception: If model pricing not found in cost map
    r1  r   z-x-rp   Nr`   zLooking up cost for models: z, z,Model not found in cost map. Tried checking Zinput_cost_per_imageZinput_cost_per_pixelz'No pricing information found for model z. Tried checking )replacemapr   r   
startswithr$   ZHIGHvaluer   rY   r   r   rX   )rR   rS   r   r  r   r   Zsize_strheightwidthbase_model_name&model_name_without_custom_llm_providerZmodel_name_with_qualityZmodel_name_with_v2_qualitymodel_without_providerZ#model_with_quality_without_provider	cost_infomodels_to_check_modelrM   rM   rP   default_image_cost_calculator  sf   
	
r?  r  r   c                 C   s4  d}|durt |}n_| }d}|r)| | dr)| | dd}| d| }td|  | dd }|| ||g}|D ]}	|	durR|	tjv rRtj|	 } nq@|du rj|rj| d|  }
|
tjv rjtj|
 }|du rutd|  |	d}|dur|| S |	d}|dur|| S t
d	|  d
 dS )aN  
    Default video cost calculator for video generation

    Args:
        model (str): Model name
        duration_seconds (float): Duration of the generated video in seconds
        custom_llm_provider (Optional[str]): Custom LLM provider
        model_info (Optional[ModelInfo]): Deployment-level model info containing
            custom video pricing. When provided, used before falling back to
            the global litellm.model_cost lookup.

    Returns:
        float: Cost in USD for the video generation

    Raises:
        Exception: If model pricing not found in cost map
    Nrp   r`   z!Looking up cost for video model: r2  z&Model not found in cost map for model=Z output_cost_per_video_per_secondr   z*No cost information found for video model z<. Please add pricing to model_prices_and_context_window.jsonrC   )r   r5  r3  r   rY   r   r   r   rX   r   info)rR   r  rS   r   r<  r9  r:  r;  r=  r>  Zprefixed_modelZvideo_cost_per_secondr   rM   rM   rP   r  }  sR   







r  ru   c                 C   s  t j||d\}}}}td|| |du r+z	t j||d}W n ty*   d}Y nw |s/dS |d}|d}|d}|d}d	}	d	}
|rO| j| }	n!|rpt| | d
 }	t	| }|d }t
dd}|	t|||d
 7 }	|r{| j| }
|	|
fS |r| j| d
 }
|	|
fS )aF  
    Calculate the cost of a batch job.

    Args:
        model_info: Optional deployment-level model info containing custom
            batch pricing (e.g. input_cost_per_token_batches). When provided,
            skips the global litellm.get_model_info() lookup so that
            deployment-specific pricing is used.
    rt   zBCalculating batch cost per token. model=%s, custom_llm_provider=%sNr.  input_cost_per_token_batchesrJ   output_cost_per_token_batchesrK   rC      Zcache_hit_tokensZcache_read_input_token_cost)r   r   r   rY   r   rX   r   rD   r   r   r   r   rE   )ru   rR   rS   r   r   rA  rJ   rB  rK   Ztotal_prompt_costZtotal_completion_costdetailsZcache_read_tokensZcache_read_cost_keyrM   rM   rP   r     s\   






r   c                   @   s&   e Zd Zedee defddZdS )BaseTokenUsageProcessorusage_objectsrI   c           	      C   s  ddl m}m}m} | }| D ]}t|D ]4}|dsItt||sIt||d}t||d}|durIt|t	t
frIt|t	t
frIt||||  qt|dr|jrt|drZ|js^| |_t|jjD ];}t|j|r|dstt|j|st|j|dpd}t|j|dpd}|durt|t	t
frt|j|||  qdt|dr|jrt|dr|js| |_t|jjD ].}|dstt|j|st|j|d}t|j|d}|dur|durt|j|||  qq|S )zs
        Combine multiple Usage objects into a single Usage object, checking model keys for nested values.
        r   )CompletionTokensDetailsWrapperPromptTokensDetailsWrapperr5   r   Nr]   completion_tokens_details)litellm.types.utilsrG  rH  r5   dirr5  callabler^   r   r   r   r  rW   r]   r   Zmodel_fieldsrI  )	rF  rG  rH  r5   combinedru   attrZcurrent_valnew_valrM   rM   rP   combine_usage_objects  s~   

z-BaseTokenUsageProcessor.combine_usage_objectsN)__name__
__module____qualname__staticmethodr   r5   rP  rM   rM   rM   rP   rE    s    rE  c                   @   sV   e Zd Zededee fddZededefddZededede	fdd	Z
d
S )RealtimeAPITokenUsageProcessorr  rI   c                 C   sJ   t tt dd | D }g }|D ]}t|d di }|| q|S )z<
        Collect usage from realtime stream results
        c                 S   s   g | ]
}|d  dkr|qS )r   zresponse.donerM   ).0resultrM   rM   rP   
<listcomp>z  s    z]RealtimeAPITokenUsageProcessor.collect_usage_from_realtime_stream_results.<locals>.<listcomp>rl   ru   )r
   r   r'   r!   r   r   r  )r  Zresponse_done_eventsrF  rW  rg   rM   rM   rP   *collect_usage_from_realtime_stream_resultsq  s   zIRealtimeAPITokenUsageProcessor.collect_usage_from_realtime_stream_resultsc                 C   s   t | }t |}|S )zH
        Collect and combine usage from realtime stream results
        )rU  rY  rP  )r  Zcollected_usage_objectsr  rM   rM   rP   6collect_and_combine_usage_from_realtime_stream_results  s   zURealtimeAPITokenUsageProcessor.collect_and_combine_usage_from_realtime_stream_resultsru   c                 C   s   t | |dS )Nru   r  )r.   r[  rM   rM   rP   create_logging_realtime_object  s   z=RealtimeAPITokenUsageProcessor.create_logging_realtime_objectN)rQ  rR  rS  rT  r&   r   r5   rY  rZ  r.   r\  rM   rM   rM   rP   rU  p  s*    rU  r  r  c              	   C   s   d}g }| D ]}|d dkrt t|d dd}|| q|| d}d}|D ]%}	z|	du r3W q*t|	||d\}
}W n	 tyF   Y q*w ||
7 }||7 } || }|S )z
    Handles the cost calculation for realtime stream responses.

    Pick the 'response.done' events. Calculate total cost across all 'response.done' events.

    Args:
        results: A list of OpenAIRealtimeStreamBaseObject objects
    Nr   zsession.createdsessionrR   rC   )rR   ru   rS   )r
   r(   r   r  r   rX   )r  r  rS   r  Zreceived_modelr"  rW  rJ   rK   Z
model_nameZ_input_cost_per_tokenZ_output_cost_per_tokenr   rM   rM   rP   r    s>   

r  )r   r   rC   NN)r`   r   r   rC   NNNNr   r   NNNNNra   rC   NNN)rC   r   )NNNN)NNNNNNN)	NNNr`   NNNNN)NNro   r1  N)NN)r   r   	functoolsr   typingr   r   r   r   r   r   r	   r
   Zhttpxr   Zpydanticr   r   Zlitellm._loggingr   Zlitellm.constantsr   r   Z@litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_trackingr   ZDlitellm.litellm_core_utils.llm_cost_calc.usage_object_transformationr   Z.litellm.litellm_core_utils.llm_cost_calc.utilsr   r   r   r   r   r   r   r   Z'litellm.llms.anthropic.cost_calculationr   r   Z#litellm.llms.azure.cost_calculationr   Z%litellm.llms.azure_ai.cost_calculatorr   Z+litellm.llms.base_llm.search.transformationr   Z%litellm.llms.bedrock.cost_calculationr   Z'litellm.llms.databricks.cost_calculatorr   Z%litellm.llms.deepseek.cost_calculatorr   Z)litellm.llms.fireworks_ai.cost_calculatorr   Z#litellm.llms.gemini.cost_calculatorr   Z%litellm.llms.lemonade.cost_calculatorr   r  r   r   r   Z'litellm.llms.perplexity.cost_calculatorr   Z(litellm.llms.together_ai.cost_calculatorr   Z&litellm.llms.vertex_ai.cost_calculatorr   r   r   r    r   Z litellm.llms.xai.cost_calculatorr   Zlitellm.responses.utilsr!   Zlitellm.types.agentsr"   Zlitellm.types.llms.openair#   r$   r%   r&   r'   r(   r)   r*   Zlitellm.types.rerankr+   r,   rJ  r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   Zlitellm.utilsr7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   Z*litellm.litellm_core_utils.litellm_loggingrB   ZLitellmLoggingObject	frozensetZasend_messager6  r   r  Zcreate_videoZacreate_videoZvideo_remixZavideo_remixr  rr   rs   r  r{   r|   r  rw   rv   r  r   r   r  Z	arealtimer  Zcall_mcp_toolr   r   rQ   r   r   r   r[   boolr_   r   r   r   r   r   r   r   __annotations__r   r   r   r   r   r   r   r   r&  r(  r   r   r   r0  r?  r  r   rE  rU  r  rM   rM   rM   rP   <module>   s  
((
(
04

.
	

  Q

D
0

"
I	

9
    T

:%&'()*+,-/0
Z

0

%

&



h
V

HX1