
    =iM                        d Z ddlZddlZddlZddlmZ ddlmZ ddlZddl	m
Z
 ddlZddlZddlZddlmZmZmZmZmZmZmZmZ ddlmZ ddlmZ d	d
lmZ d	dlmZmZ d	dl m!Z! d	dl"m#Z# d	dl$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ dZ, ej-         e.ee,ej/                              ej0        e1          Z2e23                     e.ee,ej/                              e
e4          5                                j6        d	         dz  Z7	 e78                    dd          5 Z9 ej:        e9          Z;ddd           n# 1 swxY w Y   n# e<$ r i Z;Y nw xY w e=e;>                    dd                    Z? e=e;>                    de?                    Z@ eAe;>                    dd                    ZB eCe;>                    dd                    D                                E                                dv ZFdeGfdZHd ZIed efd!            ZJ eI            \  ZKZKZKZL ed"d#eJ$          ZMde!fd%ZN ed&          fd'edefd(ZOeMP                    d)           ed&           ed           ed          fd*ed'ed+eeC         d,eeC         def
d-            ZQeMR                    d.          d/efd0            ZSeMP                    d1           ed&          d2d	d3d4fd'ed5e=d6e=d7e=d8eCdefd9            ZTdS ):uN  gzzm 服务主入口。

职责概览：
- 暴露 HTTP `/transcribe` 与 WebSocket `/ws/transcribe` 两个转写接口。
- 在请求入口做参数校验、会话级资源初始化（SpeakerRegistry、队列）和结果汇总。
- 调用 services 中的统一流水线完成 ASR、二次切分、说话人匹配与时间戳合并。
    N)asynccontextmanager)Optional)Path)FastAPIFileFormHTTPExceptionRequest
UploadFile	WebSocketWebSocketDisconnect)JSONResponse)AudioSegment   )choose_device)	get_model
init_model)SpeakerRegistry)transcribe_audio_file)
QueueChunkSessionChunkQueuemerge_adjacent_same_speakerpreload_persisted_speakersprocess_chunk_pipelinesplit_file_to_segmentsfilter_time_stampsINFO)levelzgzzm_config.jsonrutf-8encodingchunk_seconds<   ws_chunk_secondsws_emit_secondsg       @enable_detailed_logF)1trueyesonreturnc                      	 t                               dd          5 } t          j        |           cd d d            S # 1 swxY w Y   d S # t          $ r i cY S w xY w)Nr   r    r!   )_config_pathopenjsonload	Exception)_fs    src\gzzm\app.py_load_gzzm_configr5   ?   s    sW55 	!9R==	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	!   			s1   A ?A AA AA AAc                     t                      } t          |                     dd                    }t          |                     dd                    }|                     d|                     dd                    }t          |          }t	          |                     d          pd	                                          }t          |          }|d
k    rd}n|dk    rd}nd}||||fS )zReturn (similarity_threshold, soft_margin, disable_soft_reuse, speaker_device).

    Values are read from environment variables first, then from `gzzm_config.json`.
    This is intentionally lightweight so callers can call it per-request.
    speaker_sim_thresholdg      ?speaker_sim_soft_margingffffff?speaker_disable_soft_reuse disable_soft_reuse_for_persistedFmodel_device_speaker npuznpu:0cudazcuda:0cpu)r5   floatgetboolstrstripr   )cfgsim_thrsim_softdisable_softcfg_speaker_deviceprimaryspeaker_devices          r4   _get_runtime_speaker_settingsrL   G   s     

CCGG3S99::GSWW6==>>H777Acej9k9kllL%%L SWW%;<<BCCIIKK.//G% 	F		!HlN::    _c                0   K   t                       d W V  d S )N)r   )rN   s    r4   lifespanrP   `   s      LLL	EEEEEErM   zQwen3 ASR APIz1.0.0)titleversionrP   c                  N    t                      \  } }}}t          | |||          S )N)similarity_thresholdsoft_marginmodel_devicer:   )rL   r   )rF   rG   rH   rK   s       r4   _build_speaker_registryrW   n   s9    6S6U6U3GX|^$#)5	   rM   .filec                   K   t                      }|t          dd          | j        pd                                }|                    d          s&|                    d          st          dd	          |                    d          rdnd}t          j        d
|          5 }|                    |                                  d{V            |j	        }ddd           n# 1 swxY w Y   	 t          |t                    }t          |          	 t          j        |           S # t          $ r Y S w xY w# 	 t          j        |           w # t          $ r Y w w xY wxY w)z7Legacy helper: pure ASR without speaker identification.N  Model is not initializedstatus_codedetailr<   .wav.mp3  #Only wav or mp3 files are supportedFdeletesuffix)r   r	   filenamelowerendswithtempfileNamedTemporaryFilewritereadnamer   TRANSCRIBE_CHUNK_SECONDSr   osremover2   )rX   modelrf   re   tmptmp_pathresults          r4   transcribe_audio_file_handlerru   x   s     KKE}4NOOOO#**,,Hf%% [):):6)B)B [4YZZZZ((00<VVfF		$E&	A	A	A S				######$$$8              &x1IJJF##	Ih 	 	 	D		Ih 	 	 	D	sT   )5C**C.1C.6#D? D//
D<;D<?E&EE&
E# E&"E##E&z/transcriberequest
speakerIdsspeaker_idsc                 	  K   t                      }|t          dd          |j        pd                                }|                    d          s&|                    d          st          dd	          |                    d          rdnd}t          j        d
|          5 }|                    |                                 d{V            |j	        }ddd           n# 1 swxY w Y   t                      }	|p|}
|
s1| j        }|                    d          p|                    d          }
t                              d|
||           t          |	|
           t!          j                    }t%                      }	 t'          |t(                    }|D ]}|                    |           d}d}g }g }t-          |          }d}|                                s|                                }|nvt3          t-          |                    dz  }t4          r t                              d||||z              t7          ||t(          |	t8                    }||                    d          }|                    d          pd                                }|r|                    |           |                    d          pg }	 t?          |          }n# t@          $ r Y nw xY w|r|!                    |           t3          |                    d|          p|          }t4          r!t                              d||||z   |           ||z  }|dz  }|                                tE          |          }tG          |d$                    |                                          tK          t!          j                    |z
  d          ||d          	 |	&                                 n# t@          $ r Y nw xY w	 tO          j(        |           S # t@          $ r Y S w xY w# t          $ r  t@          $ r=}t          )                    d           t          dtU          |                    d}~ww xY w# 	 |	&                                 n# t@          $ r Y nw xY w	 tO          j(        |           w # t@          $ r Y w w xY wxY w)zgQueue-based /transcribe pipeline: preload speakers -> chunk queue -> ASR -> second split -> speaker id.NrZ   r[   r\   r<   r_   r`   ra   rb   Frc   rw   rx   zT/transcribe manual speakerIds resolved: %s (form speakerIds=%s, form speaker_ids=%s)        r   g     @@z?[detailed][/transcribe] start chunk index=%s range=[%.3f, %.3f])segment
offset_secr#   speaker_registryrK   languagetexttime_stampsduration_seczJ[detailed][/transcribe] result chunk index=%s range=[%.3f, %.3f] result=%sr       )r~   r   time_secchunksr   zTranscribe failed  )+r   r	   rf   rg   rh   ri   rj   rk   rl   rm   rW   query_paramsrA   loggerinfor   timeperf_counterr   r   rn   putlenemptytry_getr@   DETAILED_LOG_ENABLEDr   SPEAKER_DEVICErD   appendr   r2   extendr   r   joinroundclearro   rp   	exceptionrC   )rv   rX   rw   rx   rq   rf   re   rr   rs   r}   manual_speaker_idsqp
started_atqueuesegmentssegr|   r~   
text_partsenriched_tschunk_countchunk_indexestimated_duration_secchunk_resulttxtchunk_tschunk_duration_seces                               r4   transcribe_audior      s      KKE}4NOOOO#**,,Hf%% [):):6)B)B [4YZZZZ((00<VVfF		$E&	A	A	A S				######$$$8               /00#2{ K!VVL11JRVVM5J5J
KK^	   /1CDDD"$$J->-@-@EX)(4LMM 	 	CIIcNNNN

(mm++-- 1	--//C{%*3s88__v%="# U!77	   2%6!1-  L '++J77##F++1r88::C '!!#&&&#''66<"H-h77     -""8,,,!&|'7'7H^'_'_'ycy!z!z# `!33    ,,J1Kc ++-- 1	h 2+>>$,,2244!$"3"5"5
"BAFF%* 
 
	""$$$$ 	 	 	D		Ih 	 	 	D	     < < <,---CFF;;;;<	""$$$$ 	 	 	D		Ih 	 	 	D	s   )5C**C.1C.	EP* K!  P* !
K.+P* -K..C0P* O44
P PP
P'&P'*Q;>8Q66Q;;Q> >S RS
R"S!R""S&R;:S;
SSSSz/ws/transcribe	websocketc                    K                                      d{V  t                      }|9t                              d                                dd           d{V  dS 	                                   d{V }nH# t          $ r; t                              d                                dd           d{V  Y dS w xY w|                    d	d
          pd
}t          |                    dd                    t          dt          |                    dd                              t          |                    dd                    |d
k    r:t                              d|                                dd           d{V  dS t          dz  z            }t          dt          t                              }t          dt          t          |t          |          z                                }t                      t                      }t                      d|v r|                    d          n|                    d          }t                              d|           t#          |           d}	ddg ddt$          dt&          f fddt&          ff
d}
	 	                                   d{V }|                    d          dk    rEt                              d |                    d!          |                    d"          |           n"|                    d          d#k    r|                    d$          |                    d$          }|                    |           t-          |          |k    rt/          |d|                   }|d|=                     t3          |	|t          t-          |                    t          |          z  %                     |	dz  }	t-          |          |k     |
             d{V }|sn n|                    d          d#k    r|                    d&          r|                    d&          d'k    r|rwt/          |          }|                                                     t3          |	|t          t-          |                    t          |          z  %                     |	dz  }	 |
             d{V }|st                              d(           n0 d)dk    rdz
  ndt          d*          d+           d{V  n# t6          $ r:}t                              d,|j        t;          |d"d                     Y d}~nvd}~wt          $ rf t                              d-           	 t                              d.                                d/d0           d{V  n# t          $ r Y nw xY wY nw xY w	                                  dS # t          $ r Y dS w xY w# 	                                  w # t          $ r Y w w xY wxY w)1zqQueue-based WS pipeline. Client controls session lifecycle; server keeps connection open unless error/disconnect.Nz5WS closed before processing: model is not initializedi  r[   )codereasonz7WS closed due to invalid init payload: JSON is requiredi  z&Invalid init payload, JSON is requiredformat	pcm_s16lesample_rate>  r   channelssample_widthr   z'WS closed due to unsupported format: %szOnly pcm_s16le is supportedg?rw   rx   z$/ws/transcribe manual speakerIds: %sr   rz   payloadr,   c                    K   	                      |            d {V  dS # t          $ rN}t                              d|                     d          |                     d          |           Y d }~dS d }~ww xY w)NTz(WS send failed type=%s index=%s error=%stypeindexF)	send_jsonr2   r   warningrA   )r   r   r   s     r4   safe_send_jsonz,websocket_transcribe.<locals>.safe_send_jsonD  s      
	%%g.........4 	 	 	NN:F##G$$	   55555	s   " 
A:AA55A:c                    
K                                    s\                                } | nDt          r+t                              d| j        | j        z              t          j        t          j
        | j                  
          }t          j                    }	 |                    d t          |t           t"                     d {V }n:# t$          $ r-} dt'          |          d           d {V  Y d }~dS d }~ww xY w|                    d          |                    d          pd                                }|                    d	          pg }	 t-          |          }n# t$          $ r Y nw xY wt/          |          }|r$	                    |           t/          	          	t3          |                    d
| j                  p| j                  }t          r't                              d| j        |z   |            d||t5          |d          t5          |z   d          d           d {V }|sdS |z  dz                                   \dS )NzI[detailed][/ws/transcribe] start chunk index=%s seq=%s range=[%.3f, %.3f])r   
frame_rater   chunk_error)r   r   errorFr~   r   r<   r   r   zT[detailed][/ws/transcribe] result chunk index=%s seq=%s range=[%.3f, %.3f] result=%sr      )r   r   r   r   chunk_durationtotal_durationr   T)r   r   r   r   r   seqr   r   from_rawioBytesIOr   asyncioget_running_looprun_in_executorr   WS_CHUNK_SECONDSr   r2   rC   rA   rD   r   r   r   r@   r   )itemr{   loopr   r   
chunk_textr   r   okall_time_stampsr   r   r~   r|   r   r   r   r   r}   s            r4   process_queuez+websocket_transcribe.<locals>.process_queueQ  s"      ++-- N	==??D|# _H!22   #+
4<(()&!	  G +--D%)%9%9*$$"& &                $nmeVYZ[V\V\%]%]^^^^^^^^^uuuuu '++J77&**6228b??AAJ#''66<"H-h77    38<<H O&&x000"=o"N"N !1!1.$BS!T!T!iX\XijjL# jH-    &~*"&#+&+L!&<&<&+J,Eq&I&I 	 	 	 	 	 	 	 	B  u,&JQJE] ++-- N	` ts*   &0C 
D!"D		D)E9 9
FFTr   zwebsocket.disconnectz=WebSocket disconnected by client: code=%s reason=%s detail=%sr   r   zwebsocket.receivebytes)r   r   r   r   eofz;WS session stop after eof flush because send/process failedfinalr   )r   r   r   z<WebSocket client disconnected (exception): code=%s reason=%szWS handling failedz(WS closing with internal error code=1011i  zInternal error)acceptr   r   r   closereceive_jsonr2   rA   intmaxr@   WS_EMIT_SECONDSr   r   	bytearrayrW   r   r   dictrB   receiver   r   r   r   r   r   r   r   getattrr   )r   rq   initfmtbytes_per_secemit_secondschunk_bytespendingmanual_ws_speaker_idsr   r   msgdatar   r   r   r   r   r   r~   r|   r   r   r   r   r}   s   `               @@@@@@@@@@r4   websocket_transcriber     s      



KKE}NOOOoo40JoKKKKKKKKK++--------   PQQQoo40XoYYYYYYYYY
 88Hk**9kCdhh}e4455K1c$((:q112233Htxx2233L
k@#FFFoo40MoNNNNNNNNN;1L@AAMsE/2233LaU<%2F2F#FGGHHIIK+<+>+>EkkG.006Bd6J6JDHH\222PTPXPXYfPgPg
KK68MNNN  
 CEJOHd t      S S S S S S S S S S S S S S S SjI:	!))++++++++Cwwv"888SGGFOOGGH%%	   wwv"555#''':J:J:Vwww''t$$$ 'llk11#GL[L$9::G-II" #$+).s7||)<)<u]?S?S)S     1HC 'llk11 )=??******  $777CGGFOO7776??e++ 
!"'..		&$'(/-23w<<-@-@5CWCW-W     q,}......B 'deee(.$+27!))UQYY.3J.B.B         i:	v  x x xRTUTZ\cdegoqu\v\vwwwwwwww   -...	NNEFFF//t4D/EEEEEEEEEE 	 	 	D		""$$$$$ 	 	 	DD		""$$$$ 	 	 	D	s   2B ACCJU* )Y *
X40V)$Y )'X7X	X	
XXXXY XY  X6 6
YYY.	YY.
Y+(Y.*Y++Y.z/speaker/registerr   r   r   r   r   r   r   c                 X  K   |                                   d{V }d}| j        pd                                }|dv s|                    d          r	 t	          j        t          j        |                    }t          j                    }	|                    |	d           |		                                }nj# t          $ r}
t          dd|
 	          d}
~
ww xY w|d
v s|                    d          r|dk    rt          dd	          |}nt          dd	          t                      }	 |                    |||t                    }nJ# t          $ r t          dd	          t          $ r#}
t          dt!          |
          	          d}
~
ww xY wt#          d|i          S )z.Persist one speaker embedding and return UUID.Nr<   )wavmp3)r_   r`   raw)r   ra   zfailed to decode audio: r\   )pcmr   z.pcmr   z-only 16-bit PCM (sample_width=2) is supportedz6unsupported audio format; provide wav/mp3 or pcm_s16le)r   r   devicei  z2speechbrain not available for embedding extractionr   uuid)rl   rf   rg   rh   r   	from_filer   r   exportgetvaluer2   r	   rW   persist_from_pcmr   ImportErrorrC   r   )rX   r   r   r   r   r   	pcm_bytesfnamer   bufr   reguids                r4   speaker_registerr     s      DI] b''))E5>>2B#C#C	X(D)9)9::C*,,CJJs5J)))II 	X 	X 	XC8VST8V8VWWWW	X )))U^^F-C-C)q  #<kllllIIC8pqqqq
!
#
#C<""9+PXao"pp j j j4hiiii < < <CFF;;;;< &&&s1   A$B9 9
CCC2E $F5FF)U__doc__ro   ri   logging
contextlibr   typingr   r0   pathlibr   r   r   r   fastapir   r   r   r	   r
   r   r   r   fastapi.responsesr   pydubr   utils.device_utilsr   rq   r   r   utils.speaker_idr   
transcriber   servicesr   r   r   r   r   r   r   	LOG_LEVELbasicConfigr   r   	getLogger__name__r   setLevel__file__resolveparentsr.   r/   r3   r1   _GZZM_CONFIGr2   r   rA   rn   r   r@   r   rC   rD   rg   r   r   r5   rL   rP   rN   r   apprW   ru   postr   r   r   r    rM   r4   <module>r     s    
			   * * * * * *               				  k k k k k k k k k k k k k k k k k k k k * * * * * *       - - - - - - ( ( ( ( ( ( ( ( - - - - - - - - - - - -                  	  '''9glCC D D D D		8	$	$ GL99 : : :tH~~%%''/25GG			3		1	1 %R ty}}% % % % % % % % % % % % % % %   LLL 3|//DDEE 3|''(:<TUUVV %(():C@@AAs<++,A5IIJJPPRRXXZZ^xx 4    ; ; ;2 
g 
 
 
 
 8799 1a gOWxHHH     <@499  j     2 - tCyy $T

!%d	~ ~~
~ ~ #	~
 ~ ~ ~ ~B   [) [ [ [ ! [| 
tCyy$' $'
$'$' $' 	$'
 $' $' $' $' $' $' $'s6   E D5)E 5D99E <D9=E E
E