
    lix                        d Z ddlmZ ddlZddlmZmZ ddlZddlm	Z	 ddl
mZ ddlmZ  ee          Z G d d	          ZdS )
uU  LLM client using an OpenAI-compatible API (e.g. vLLM, Ollama, DashScope).

大语言模型客户端封装模块。
通过 OpenAI 兼容接口与 LLM 服务通信，支持流式对话、JSON 结构化输出
和单轮补全三种调用模式。可通过 extra_body 传递供应商专属参数
（如 DashScope 的 enable_thinking 开关）。
    )annotationsN)AnyAsyncIterator)AsyncOpenAI)settings)
get_loggerc                  n    e Zd ZdZddddddddZddZdddddd dZdddddd!dZddddddd"dZdS )#	LLMClientu  Async wrapper around an OpenAI-compatible chat completions API.

    Supports ``extra_body`` for provider-specific parameters such as
    DashScope's ``enable_thinking`` flag (set ``LLM_ENABLE_THINKING=false``
    in environment to disable chain-of-thought for Qwen thinking models).

    异步 LLM 客户端，封装了流式对话、JSON 输出和单轮补全三种调用方式。
    通过 trust_env=False 绕过系统代理，确保直连 LLM 服务端点。
    N)base_urlapi_keymodeltemperature
max_tokensr   
str | Noner   r   r   float | Noner   
int | NonereturnNonec               >   |pt           j        | _        ||nt           j        | _        |pt           j        | _        t          |pt           j        |pt           j	        t          j        d                    | _        i | _        t           j        sd| j        d<   d S d S )NF)	trust_env)r   r   http_clientenable_thinking)r   	llm_model_modelllm_temperature_temperaturellm_max_tokens_max_tokensr   llm_base_urlllm_api_keyhttpxAsyncClient_client_extra_bodyllm_enable_thinking)selfr   r   r   r   r   s         7D:\work\zm-rag\backend\app\infrastructure\llm_client.py__init__zLLMClient.__init__"   s     1x1+6+BKKH`%@)@ #6!63x3)E:::
 
 
 ,.+ 	827D.///	8 	8    c                   K   | j         j         }| j                                          d{V  |r#|j        s|                                 d{V  dS dS dS )u   关闭 LLM 客户端及底层 httpx 连接，释放资源。
        Close the OpenAI client and its underlying httpx.AsyncClient.N)r#   close	is_closedaclose)r&   r   s     r'   r+   zLLMClient.close:   s      
 l*l  """"""""" 	'{4 	'$$&&&&&&&&&&&	' 	' 	' 	'r)   )r   r   r   
extra_bodymessageslist[dict[str, str]]r.   dict[str, Any] | NoneAsyncIterator[str]c                K   i | j         |pi }| j        j        j                            |p| j        |||n| j        |p| j        d|r|nd           d{V }t          j	        d          4 d{V  |2 3 d{V }|j
        s|j
        d         j        }	|	j        r
|	j        W V  36 	 ddd          d{V  dS # 1 d{V swxY w Y   dS )a  Yield text chunks from a streaming chat completion.

        Parameters
        ----------
        messages:
            OpenAI-style message list, e.g.
            ``[{"role": "system", "content": "..."}, {"role": "user", "content": "..."}]``
        extra_body:
            Additional provider-specific body fields.  Merged with the
            instance-level ``_extra_body`` (per-call values take precedence).
        NTr   r/   r   r   streamr.   i,  r   )r$   r#   chatcompletionscreater   r   r   asynciotimeoutchoicesdeltacontent)
r&   r/   r   r   r   r.   merged_extraresponsechunkr<   s
             r'   r6   zLLMClient.chatF   s     ( B$*Az/?RA*6==&4;'2'>DDU!5T%5'3=|| > 
 
 
 
 
 
 
 
 ?3'' 	( 	( 	( 	( 	( 	( 	( 	(' ( ( ( ( ( ( (e} a(.= (-''''  (x	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	(s   5C 8B+>.C  
C
C
dict[str, Any]c          	       K   ddl }i | j        |pi }| j        j        j                            |p| j        |||n| j        |p| j        ddid|r|nd           d{V }|j	        d         j
        j        pd}		 |                    |	          S # |j        $ r? t                              d|	dd	         
           t!          d|	dd                    w xY w)zRequest a single chat completion with ``response_format=json_object``.

        Returns the parsed JSON dict from the assistant's reply.
        r   Ntypejson_objectF)r   r/   r   r   response_formatr5   r.   z{}llm_json_parse_errori  )rawz#Failed to parse LLM JSON response:    )jsonr$   r#   r6   r7   r8   r   r   r   r;   messager=   loadsJSONDecodeErrorloggererror
ValueError)
r&   r/   r   r   r   r.   _jsonr>   r?   r=   s
             r'   	chat_jsonzLLMClient.chat_jsonp   s>      	A$*Az/?RA*6==&4;'2'>DDU!5T%5#]3'3=|| > 
 
 
 
 
 
 
 
 "1%-5=	;;w'''$ 	 	 	LL/WTcT]LCCC EgdsdmEE  		s   <B A	C)systemr   r   r   r.   promptstrrR   c               P  K   g }|r|                     d|d           |                     d|d           i | j        |pi }| j        j        j                            |p| j        |||n| j        |p| j        d|r|nd           d{V }	|	j	        d         j
        j        pdS )	z<Non-streaming single-shot completion. Returns the full text.rR   )roler=   userNFr4   r    )appendr$   r#   r6   r7   r8   r   r   r   r;   rJ   r=   )
r&   rS   rR   r   r   r   r.   r/   r>   r?   s
             r'   completezLLMClient.complete   s       *, 	COOX&AABBBF;;<<<A$*Az/?RA*6==&4;'2'>DDU!5T%5'3=|| > 
 
 
 
 
 
 
 
 "*28b8r)   )r   r   r   r   r   r   r   r   r   r   r   r   )r   r   )r/   r0   r   r   r   r   r   r   r.   r1   r   r2   )r/   r0   r   r   r   r   r   r   r.   r1   r   rA   )rS   rT   rR   r   r   r   r   r   r   r   r.   r1   r   rT   )	__name__
__module____qualname____doc__r(   r+   r6   rQ   rZ    r)   r'   r
   r
      s           $" $(!%8 8 8 8 8 80' ' ' '  !$(!%,0&( &( &( &( &( &(\ !$(!%,0# # # # # #V " $(!%,09 9 9 9 9 9 9 9r)   r
   )r^   
__future__r   r9   typingr   r   r!   openair   
app.configr   app.utils.loggerr   r[   rM   r
   r_   r)   r'   <module>re      s     # " " " " "  % % % % % % % %              ' ' ' ' ' '	H		Y9 Y9 Y9 Y9 Y9 Y9 Y9 Y9 Y9 Y9r)   