o
    ưi+                     @   s   d Z ddlZddlmZmZmZmZmZmZm	Z	m
Z
 ddlmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZ dd	lmZ dd
lmZmZmZmZmZm Z  ddl!m"Z" G dd deZ#G dd de#Z$G dd de#Z%G dd deZ&dS )z\
Translates from OpenAI's `/v1/chat/completions` endpoint to Triton's `/generate` endpoint.
    N)AnyAsyncIteratorDictIteratorListLiteralOptionalUnion)HeadersResponse)DEFAULT_MAX_TOKENS_FOR_TRITON)prompt_factory)BaseModelResponseIterator)
BaseConfigBaseLLMExceptionLiteLLMLoggingObj)AllMessageValues)ChatCompletionToolCallChunkChatCompletionUsageBlockChoicesGenericStreamingChunkMessageModelResponse   TritonErrorc                   @   s  e Zd ZdZdededeeef de	fddZ
		d,ded	ed
ee dededee dee defddZd	edefddZdeded	ededef
ddZ	d-dee dee d	edededee defddZ		d,d	ededededed
ee dedededee dee defd d!Zd	ed
ee dedededefd"d#Zdeded$ fd%d&Z	'd.d(eee ee ef d)edee defd*d+ZdS )/TritonConfigzx
    Base class for Triton configurations.

    Handles routing between /infer and /generate triton completion llms
    error_messagestatus_codeheadersreturnc                 C   s   t |||dS )N)r   messager   r   )selfr   r   r    r#   d/home/app/Keep/.python/lib/python3.10/site-packages/litellm/llms/triton/completion/transformation.pyget_error_class&   s   zTritonConfig.get_error_classNmodelmessagesoptional_paramslitellm_paramsapi_keyapi_basec                 C   s   ddiS )NzContent-Typezapplication/jsonr#   )r"   r   r&   r'   r(   r)   r*   r+   r#   r#   r$   validate_environment-   s   
z!TritonConfig.validate_environmentc                 C   s   ddgS N
max_tokensZmax_completion_tokensr#   )r"   r&   r#   r#   r$   get_supported_openai_params9   s   z(TritonConfig.get_supported_openai_paramsnon_default_paramsdrop_paramsc                 C   s.   |  D ]\}}|dks|dkr|||< q|S r-   )items)r"   r0   r(   r&   r1   paramvaluer#   r#   r$   map_openai_params<   s
   zTritonConfig.map_openai_paramsstreamc                 C   s2   |d u rt d| |}|dkr|r|d S |S )Nzapi_base is requiredgenerate_stream)
ValueError_get_triton_llm_type)r"   r+   r*   r&   r(   r)   r6   llm_typer#   r#   r$   get_complete_urlH   s   	
zTritonConfig.get_complete_urlraw_responsemodel_responselogging_objrequest_dataencoding	json_modec                 C   sn   | dd}| |}|dkr t j|||||||||	|
|dS |dkr5t j|||||||||	|
|dS |S )Nr+    r7   )r&   r=   r>   r?   r@   r'   r(   r)   rA   r*   rB   infer)getr:   TritonGenerateConfigtransform_responseTritonInferConfig)r"   r&   r=   r>   r?   r@   r'   r(   r)   rA   r*   rB   r+   r;   r#   r#   r$   rG   X   s>   
zTritonConfig.transform_responsec                 C   sV   | dd}| |}|dkrt j|||||dS |dkr)t j|||||dS i S )Nr+   rC   r7   )r&   r'   r(   r)   r   rD   )rE   r:   rF   transform_requestrH   )r"   r&   r'   r(   r)   r   r+   r;   r#   r#   r$   rI      s&   
zTritonConfig.transform_request)r7   rD   c                 C   s*   | drdS | drdS td| )Nz	/generater7   z/inferrD   zInvalid Triton API base: )endswithr9   )r"   r+   r#   r#   r$   r:      s
   

z!TritonConfig._get_triton_llm_typeFstreaming_responsesync_streamc                 C   s   t |||dS )N)rK   rL   rB   )TritonResponseIterator)r"   rK   rL   rB   r#   r#   r$   get_model_response_iterator   s
   z(TritonConfig.get_model_response_iteratorNN)N)F)__name__
__module____qualname____doc__strintr	   r   r
   r   r%   r   r   dictr   r,   r/   boolr5   r<   r   r   r   r   rG   rI   r   r:   r   r   rN   r#   r#   r#   r$   r      s    

	


	

.
r   c                   @      e Zd ZdZdedee dedededefdd	Z	
	
ddede	de
dededee dedededee dee de
fddZd
S )rF   zP
    Transformations for triton /generate endpoint (This is a trtllm model)
    r&   r'   r(   r)   r   r    c           	      C   sN   |  }|dd}t||ddt|dtit|d}|d | |S )Nr6   F)r&   r'   r.   )
text_input
parametersr6   rZ   )copypopr   rU   rE   r   rW   update)	r"   r&   r'   r(   r)   r   Zinference_paramsr6   data_for_tritonr#   r#   r$   rI      s   

	z&TritonGenerateConfig.transform_requestNr=   r>   r?   r@   rA   r*   rB   c                 C   sJ   z|  }W n ty   t|j|jdw tdt|d ddg|_|S )Nr!   r   r   text_outputcontentindexr!   )json	Exceptionr   textr   r   r   choices)r"   r&   r=   r>   r?   r@   r'   r(   r)   rA   r*   rB   raw_response_jsonr#   r#   r$   rG      s   z'TritonGenerateConfig.transform_responserO   rP   rQ   rR   rS   rT   r   r   rV   rI   r   r   r   r   r   r   rW   rG   r#   r#   r#   r$   rF      sT    
!	
rF   c                   @   rX   )rH   zj
    Transformations for triton /infer endpoint (his is an infer model with a custom model on triton)
    r&   r'   r(   r)   r   r    c                 C   s   |d  dd}dddgd|gdgi}| D ],\}}	|d	ksD|d
ksDt|	tr+dnd}
t|	tr4dn|
}
|d |dg|
|	gd qd|vrW|d ddgddgd |S )Nr   rb   rC   ZinputsrY      ZBYTES)nameshapedatatypedatar6   max_retriesZINT32ZFP32r.      )rE   r2   
isinstancerU   floatappend)r"   r&   r'   r(   r)   r   rY   r^   kvrn   r#   r#   r$   rI      s4   z#TritonInferConfig.transform_requestNr=   r>   r?   r@   rA   r*   rB   c                 C   st   z|  }W n ty   t|j|jdw |d d d }d }t|tr+d|}n|}tdt	|ddg|_
|S )Nr_   Zoutputsr   ro   rC   ra   rc   )re   rf   r   rg   r   rr   listjoinr   r   rh   )r"   r&   r=   r>   r?   r@   r'   r(   r)   rA   r*   rB   ri   Z_triton_response_dataZtriton_response_datar#   r#   r$   rG     s$   
z$TritonInferConfig.transform_responserO   rj   r#   r#   r#   r$   rH      sT    
2	
rH   c                   @   s   e Zd ZdedefddZdS )rM   chunkr    c           	   	   C   s   z2d}d }d}d}d }d }t |dd}|dd}|dd}|dd}t|||||||dW S  tjyA   td	| w )
NrC   Frd   r   r`   Zstop_reasonis_finished)rg   tool_userz   finish_reasonusagerd   provider_specific_fieldsz"Failed to decode JSON from chunk: )rU   rE   r   re   JSONDecodeErrorr9   )	r"   ry   rg   r{   rz   r|   r}   r~   rd   r#   r#   r$   chunk_parser@  s.   	z#TritonResponseIterator.chunk_parserN)rP   rQ   rR   rV   r   r   r#   r#   r#   r$   rM   ?  s    rM   )'rS   re   typingr   r   r   r   r   r   r   r	   Zhttpxr
   r   Zlitellm.constantsr   Z3litellm.litellm_core_utils.prompt_templates.factoryr   Z)litellm.llms.base_llm.base_model_iteratorr   Z)litellm.llms.base_llm.chat.transformationr   r   r   Zlitellm.types.llms.openair   Zlitellm.types.utilsr   r   r   r   r   r   Zcommon_utilsr   r   rF   rH   rM   r#   r#   r#   r$   <module>   s     ( 	 6R