
    iY                     8   d Z ddlZddlZddlZddlmZ ddlmZ ddlZddl	m
Z
 ddlZddlZddlZddlmZmZmZmZmZmZmZmZ ddlmZ ddlmZ dd	lmZ d
dlmZ d
dlm Z m!Z! d
dl"m#Z# d
dl$m%Z%m&Z& d
dl'm(Z( d
dl)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/ d
dl0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6 dZ7 ejp                   e9ee7ejt                                ejv                  e<      Z=e=j}                   e9ee7ejt                                e
e?      j                         j                  d
   dz  ZB	 eBj                  dd      5 ZD ej                  eD      ZFddd        eHeFj                  dd            ZJ eHeFj                  deJ            ZK eLeFj                  dd            ZM eNeFj                  dd            j                         j                         dv ZQd eRfd!ZSd" ZTed#efd$       ZU eT       \  ZVZVZVZW ed%d&eU'      ZXd e#fd(ZYd)ed*eeN   d+eeN   d eeN   fd,ZZ ed-      fd.ed efd/Z[eXj                  d0       ed-       ed       ed      fd)ed.ed1eeN   d2eeN   d ef
d3       Z]eXj                  d4       ed-       ed       ed      fd)ed.ed1eeN   d2eeN   d ef
d5       Z^eXj                  d6      d7eNd efd8       Z_eXj                  d9      d:efd;       ZaeXj                  d<       ed-      d=d
d>d?fd.ed@eHdAeHdBeHdCeNd efdD       Zby# 1 sw Y   xY w# eG$ r i ZFY w xY w)EuN  gzzm 服务主入口。

职责概览：
- 暴露 HTTP `/transcribe` 与 WebSocket `/ws/transcribe` 两个转写接口。
- 在请求入口做参数校验、会话级资源初始化（SpeakerRegistry、队列）和结果汇总。
- 调用 services 中的统一流水线完成 ASR、二次切分、说话人匹配与时间戳合并。
    N)asynccontextmanager)Optional)Path)FastAPIFileFormHTTPExceptionRequest
UploadFile	WebSocketWebSocketDisconnect)JSONResponse)WebSocketState)AudioSegment   )choose_device)	get_model
init_model)SpeakerRegistry)run_async_transcribe_task run_transcribe_pipeline_for_file)transcribe_audio_file)
QueueChunkSessionChunkQueuemerge_adjacent_same_speakerpreload_persisted_speakersprocess_chunk_pipelinefilter_time_stamps)create_task_recordget_task_recordinit_task_dbmark_task_failedmark_task_runningmark_task_succeededINFO)levelzgzzm_config.jsonrutf-8encodingchunk_seconds<   ws_chunk_secondsws_emit_secondsg       @enable_detailed_logF)1trueyesonreturnc                      	 t         j                  dd      5 } t        j                  |       cd d d        S # 1 sw Y   y xY w# t        $ r i cY S w xY w)Nr'   r(   r)   )_config_pathopenjsonload	Exception)_fs    /app/gzzm/app.py_load_gzzm_configr=   H   sF    sW599R= 655 	s*   A 8	A AA A AAc                  z   t               } t        | j                  dd            }t        | j                  dd            }| j                  d| j                  dd            }t        |      }t	        | j                  d      xs d	      j                         }t        |      }|d
k(  rd}n
|dk(  rd}nd}||||fS )zReturn (similarity_threshold, soft_margin, disable_soft_reuse, speaker_device).

    Values are read from environment variables first, then from `gzzm_config.json`.
    This is intentionally lightweight so callers can call it per-request.
    speaker_sim_thresholdg      ?speaker_sim_soft_margingffffff?speaker_disable_soft_reuse disable_soft_reuse_for_persistedFmodel_device_speaker npuznpu:0cudazcuda:0cpu)r=   floatgetboolstrstripr   )cfgsim_thrsim_softdisable_softcfg_speaker_deviceprimaryspeaker_devices          r<   _get_runtime_speaker_settingsrT   P   s     
CCGG3S9:GSWW6=>H777Acej9klL%L SWW%;<BCIIK./G% 	F	!HlN::    _c                >   K   t                t                d  y wN)r   r!   )rV   s    r<   lifespanrY   i   s     LN	s   zQwen3 ASR APIz1.0.0)titleversionrY   c                  >    t               \  } }}}t        | |||      S )N)similarity_thresholdsoft_marginmodel_devicerB   )rT   r   )rN   rO   rP   rS   s       r<   _build_speaker_registryr`   x   s,    6S6U3GX|^$#)5	 rU   requestspeaker_ids_camelspeaker_ids_snakec                 v    |xs |}|r|S | j                   }|j                  d      xs |j                  d      S )N
speakerIdsspeaker_ids)query_paramsrI   )ra   rb   rc   manual_speaker_idsqps        r<   _resolve_manual_speaker_idsrj      s?    *?.?!!			B66,8266-#88rU   .filec                   K   t               }|t        dd      | j                  xs dj                         }|j	                  d      s|j	                  d      st        dd	      |j	                  d      rdnd}t        j                  d
|      5 }|j                  | j                          d{          |j                  }ddd       	 t        t              }t        |      	 t        j                  |       S 7 N# 1 sw Y   =xY w# t        $ r Y S w xY w# 	 t        j                         w # t        $ r Y w w xY wxY ww)z7Legacy helper: pure ASR without speaker identification.N  Model is not initializedstatus_codedetailrD   .wav.mp3  #Only wav or mp3 files are supportedFdeletesuffix)r   r	   filenamelowerendswithtempfileNamedTemporaryFilewritereadnamer   TRANSCRIBE_CHUNK_SECONDSr   osremover:   )rk   modelry   rx   tmptmp_pathresults          r<   transcribe_audio_file_handlerr      s    KE}4NOO#**,Hf%):):6)B4YZZ((0VfF		$	$E&	AS				#$88 
B&x1IJF#	IIh $ 
B	A  			IIh 		s   BE
D4D
5D	E
D  -DE
DD
E
	DE
DE
 E"D87E8	EEEEE
/transcribere   rf   c           
        K   t               }|t        dd      |j                  xs dj                         }|j	                  d      s|j	                  d      st        dd	      |j	                  d      rdnd}t        j                  d
|      5 }|j                  |j                          d{          |j                  }ddd       t        | ||      }	t        j                  d|	||       	 t        |	t        t        t         t"        t        d      }
t%        |
      	 t'        j(                  |       S 7 # 1 sw Y   yxY w# t*        $ r Y S w xY w# t        $ r  t*        $ r0}t        j-                  d       t        dt/        |            d}~ww xY w# 	 t'        j(                         w # t*        $ r Y w w xY wxY ww)zgQueue-based /transcribe pipeline: preload speakers -> chunk queue -> ASR -> second split -> speaker id.Nrm   rn   ro   rD   rr   rs   rt   ru   Frv   zT/transcribe manual speakerIds resolved: %s (form speakerIds=%s, form speaker_ids=%s)r   )r   rh   r+   rS   build_speaker_registrydetailed_log_enabledloggerlog_tagzTranscribe failed  )r   r	   ry   rz   r{   r|   r}   r~   r   r   rj   r   infor   r   SPEAKER_DEVICEr`   DETAILED_LOG_ENABLEDr   r   r   r:   	exceptionrK   )ra   rk   re   rf   r   ry   rx   r   r   rh   payloades               r<   transcribe_audior      s     KE}4NOO#**,Hf%):):6)B4YZZ((0VfF		$	$E&	AS				#$88 
B 5Wj+V
KK^	212)#:!5!	
 G$	IIh? $ 
B	AB  		   <,-CF;;<	IIh 		s   BGE4D?
5E	-G71E )E>G?EE
G	EGEGF.+FFF! !G#F98G9	GGGGGz/transcribe/asyncc                   K   t               }|t        dd      |j                  xs dj                         }|j	                  d      s|j	                  d      st        dd	      |j	                  d      rdnd}t        j                  d
|      5 }|j                  |j                          d{          |j                  }ddd       t        | ||      }	t        j                  d|	||       t        t        j                               }
	 t!        |
|j                  xs d|	       t+        j,                  t/        |
|	t0        t2        t4        t6        t8        t:        t<        t                     t?        |
dd|
 d      S 7 # 1 sw Y   xY w# t"        $ rP}	 t%        j&                         n# t"        $ r Y nw xY wt        j)                  d       t        dd|       d}~ww xY ww)z>Submit async transcribe task and return task UUID immediately.Nrm   rn   ro   rD   rr   rs   rt   ru   Frv   zZ/transcribe/async manual speakerIds resolved: %s (form speakerIds=%s, form speaker_ids=%s)z-Failed to create async transcribe task recordr   zfailed to create task: )task_idr   rh   mark_runningmark_succeededmark_failedr+   rS   r   r   r   queuedz/transcribe/async/)r   status
query_path) r   r	   ry   rz   r{   r|   r}   r~   r   r   rj   r   r   rK   uuiduuid4r   r:   r   r   r   asynciocreate_taskr   r#   r$   r"   r   r   r`   r   r   )ra   rk   re   rf   r   ry   rx   r   r   rh   r   r   s               r<   transcribe_audio_asyncr      s     KE}4NOO#**,Hf%):):6)B4YZZ((0VfF		$	$E&	AS				#$88 
B 5Wj+V
KKd	 $**,GS7DMM$7R9KL !1*.(2)#:!5	
  .wi8	
 M $ 
B	A  S	IIh 		HI6MaS4QRRSst   BG4F4F

5F	A
G4F /AG4
FFG4	G1"F87G,8	GG,G(G,,G11G4z/transcribe/async/{task_id}r   c                 T   K   t        |       }|t        dd      t        |      S w)z6Query async transcribe task status and result by UUID.i  ztask not foundro   )r    r	   r   )r   tasks     r<   get_transcribe_taskr     s0      7#D|4DEEs   &(z/ws/transcribe	websocketc                    K    j                          d{    t               }|1t        j                  d        j	                  dd       d{    y	  j                          d{   }|j                  d	d
      xs d
}t        |j                  dd            t        dt        |j                  dd                  t        |j                  dd            |d
k7  r2t        j                  d|        j	                  dd       d{    yt        dz  z        }t        dt        t                    }t        dt        t        |t        |      z                    }t               t               }t               d|v r|j                  d      n|j                  d      }t        j!                  d|       t#        |       d}	ddg ddt        dt$        fddt        dt$        ddf fd}
dt&        dt(        f fd dt*        t(        t,        t           t,        t$           f   f fd!}	 	  j/                          d{   }|j                  d"      d#k(  r8t        j!                  d$|j                  d      |j                  d      |       n|j                  d"      d%k(  r|j                  d&      |j                  d&      }|j1                  |       t3        |      |k\  rbt5        |d|       }|d|= j7                  t9        |	|t        t3        |            t        |      z  '             |	dz  }	t3        |      |k\  rb |        d{   \  }}}|s||r |
||       d{    n |j                  d"      d%k(  r|j                  d(      r|j                  d(      d)k(  r|r[t5        |      }|j;                          j7                  t9        |	|t        t3        |            t        |      z  '             |	dz  }	 |        d{   \  }}}|s+t        j                  d*       ||r |
||       d{    n* d+dkD  rdz
  ndt        d,      d-       d{    H	 j;                          y7 7 Y7 B# t        $ r4 t        j                  d        j	                  dd       d{  7   Y yw xY w7 7 7 7 k7 7 7 q# t<        $ r6}t        j!                  d.|j>                  tA        |dd             Y d}~d}~wt        $ r[ t        jC                  d/       	 t        j                  d0        j	                  d1d2       d{  7   n# t        $ r Y nw xY wY w xY w# t        $ r Y yw xY w# 	 j;                          w # t        $ r Y w w xY wxY ww)3zqQueue-based WS pipeline. Client controls session lifecycle; server keeps connection open unless error/disconnect.Nz5WS closed before processing: model is not initializedi  rn   codereasonz7WS closed due to invalid init payload: JSON is requiredi  z&Invalid init payload, JSON is requiredformat	pcm_s16lesample_rate>  r   channelssample_width   z'WS closed due to unsupported format: %szOnly pcm_s16le is supportedg?re   rf   z$/ws/transcribe manual speakerIds: %sr   g        r   r4   c                 R    t        |       j                         }|r|S t        |       S rX   )rK   rL   repr)r   texts     r<   _format_excz)websocket_transcribe.<locals>._format_excW  s"    1v||~KAwrU   r   r   c           	      h  K   t        dd       t        j                  k(  rt        j	                  d| |       y 	 j                  | |       d {    t        j                  d| |       y 7 # t        $ r6}t        j	                  d| |t        |      j                  |       Y d }~y d }~ww xY ww)Napplication_statezJWS close skipped because application_state=DISCONNECTED: code=%s reason=%sr   z&WS closed by server: code=%s reason=%szFWS close skipped/failed: code=%s reason=%s error_type=%s error_repr=%r)
getattrr   DISCONNECTEDr   r   closewarningr:   type__name__)r   r   close_errorr   s      r<   
safe_closez(websocket_transcribe.<locals>.safe_close]  s     9148N<W<WWKKdfjlrs
	//tF/;;;NNCT6R < 	KKX[!** 	s@   7B2A0 A.A0 -B2.A0 0	B/9,B*%B2*B//B2r   c                 ,  K   	 j                  |        d {    y7 # t        $ rn}t        j                  d| j	                  d      | j	                  d      |j
                  t        |dd       t        dd       t        dd              Y d }~yd }~wt        $ rr}t        j                  d	| j	                  d      | j	                  d      t        |      j                   |      t        dd       t        dd              Y d }~yd }~ww xY ww)
NTzlWS send skipped because client disconnected: type=%s index=%s code=%s reason=%s client_state=%s app_state=%sr   indexr   client_stater   FzSWS send failed type=%s index=%s error_type=%s error=%s client_state=%s app_state=%s)	send_jsonr   r   r   rI   r   r   r:   r   r   r   )r   r   r   r   s     r<   safe_send_jsonz,websocket_transcribe.<locals>.safe_send_jsonm  s     	%%g... /" 
	KK~F#G$8T*	>48	#6=  
	NNeF#G$Q  A	>48	#6= 
	sJ   D! ! D! 	DA$BDDA(DDDDc                    K   j                         sj                         } | 	 yt        r0t        j	                  d| j
                  | j                  z          t        j                  t        j                  | j                              }t        j                         }	 |j                  d t        |t         t"               d {   }|j'                  d
      |j'                  d      xs dj)                         }|j'                  d      xs g }	 t+        |      }t-        |      }|rj/                  |       t-              t1        |j'                  d| j                        xs | j                        }	t        r't        j	                  d| j
                  |	z   |        d||t3        |	d      t3        |	z   d      d       d {   }
|
s't5        dd       t6        j8                  k(  ryddd fS |	z  dz  j                         sy7 Y# t$        $ rA} |      } d|d       d {  7  }|sddd fcY d }~S ddd d	| fcY d }~S d }~ww xY w# t$        $ r Y Yw xY w7 w)NzI[detailed][/ws/transcribe] start chunk index=%s seq=%s range=[%.3f, %.3f])r   
frame_rater   chunk_error)r   r   errorF  z!chunk_error send failed at index=z!chunk processing failed at index=z: languager   rD   time_stampsduration_seczT[detailed][/ws/transcribe] result chunk index=%s seq=%s range=[%.3f, %.3f] result=%schunk_result   )r   r   r   r   chunk_durationtotal_durationr   )FNz.client disconnected while sending chunk_resultz"chunk_result send failed at index=r   )TNN)emptytry_getr   r   r   seqr   r   from_rawioBytesIOr   r   get_running_looprun_in_executorr   WS_CHUNK_SECONDSr   r:   rI   rL   r   r   extendrH   roundr   r   r   )itemsegmentloopr   r   
error_textsent
chunk_textchunk_tsr   okr   all_time_stampsr   r   r   
offset_secqueuer   r   r   speaker_registryr   s              r<   process_queuez+websocket_transcribe.<locals>.process_queue  s     ++-==?D|f  c $_HH!2!22 #++

4<<()&!	G ++-D^%)%9%9*$$"&    '++J7&**628b??AJ#''6<"H-h7 38<H&&x0"=o"N !1!1.$BSBS!T!iX\XiXijL#jHH-  &*"&#+&+L!&<&+J,Eq&I	 	B 9&94@ND_D__Xd&H$PPP,&JQJEi ++-l     ^(^
+]U]g,hiii $*KE7(SSSd&GwbQ[P\$]]]^   &	s   B&J*&H, H)H, AJ!I9 ,B2JJ	AJ(J)H, ,	I65I1II1I6J!
I1+I6,J1I66J9	JJJJr   zwebsocket.disconnectz=WebSocket disconnected by client: code=%s reason=%s detail=%szwebsocket.receivebytes)r   r   r   r   eofz;WS session stop after eof flush because send/process failedfinalr   )r   r   r   z<WebSocket client disconnected (exception): code=%s reason=%szWS handling failedz(WS closing with internal error code=1011r   zInternal error)"acceptr   r   r   r   receive_jsonr:   rI   intmaxrH   WS_EMIT_SECONDSr   r   	bytearrayr`   r   r   rK   dictrJ   tupler   receiver   lenr   putr   clearr   r   r   r   )r   r   initfmtbytes_per_secemit_secondschunk_bytespendingmanual_ws_speaker_idsr   r   r   msgdatar   r   
close_codeclose_reasonr   r   r   r   r   r   r   r   r   r   r   r   s   `                  @@@@@@@@@@@r<   websocket_transcriber  &  s     


KE}NOoo40JoKKK++-- 88Hk*9kCdhh}e45K1c$((:q123Htxx23L
k@#Foo40MoNNN;1L@AMsE/23LaU<%2F#FGHIK+<+>EkG.06Bd6JDHH\2PTPXPXYfPg
KK68MN
 CEJOHy S s C D  d t 6Y tXc]HSM'I!J Y  Y  Y vM!))++Cwwv"88SGGFOGGH%	 wwv"55#''':J:Vwww't$ 'lk1#GL[$9:G-II" #$+).s7|)<u]?S)S 1HC 'lk1 6C_/D,J!-,(\BBB$77CGGFO776?e+"'.		&$'(/-23w<-@5CW-W q9F3H0B
L'de%1l",Z"FFF($+27!)UQY.3J.B  q R	""$K 
 	L . PQoo40XoYYY 	OL ,6 0E C$ 4I G  xRTUTZTZ\cdegoqu\vww -.	NNEF//t4D/EEE 		  			""$ 		s  W'R4;W'R7W'%R= 8R:9R= =B"W'S= D8W'T -T .DT 5
T ?T T TB&T T	/T 2T3(T TT #V3 3W'7W':R= =4S:1S42S:7W'9S::W' T T T 	T T T 	V0,U	W 	!V0++VVVV0	V)&V0(V))V0,W /V00W 3	V?<W'>V??W'W$WW$	W!W$ W!!W$$W'z/speaker/registerr   r   r   r   r   r   r   c                   K   | j                          d{   }d}| j                  xs dj                         }|dv s|j                  d      ra	 t	        j
                  t        j                  |            }t        j                         }	|j                  |	d       |	j                         }n7|d
v s|j                  d      r|dk7  rt        dd	      |}nt        dd	      t               }	 |j                  |||t              }t#        d|i      S 7 # t        $ r}
t        dd|
 	      d}
~
ww xY w# t        $ r t        dd	      t        $ r}
t        dt!        |
      	      d}
~
ww xY ww)z.Persist one speaker embedding and return UUID.NrD   )wavmp3)rr   rs   raw)r   rt   zfailed to decode audio: ro   )pcmr   z.pcmr   z-only 16-bit PCM (sample_width=2) is supportedz6unsupported audio format; provide wav/mp3 or pcm_s16le)r   r   devicei  z2speechbrain not available for embedding extractionr   r   )r   ry   rz   r{   r   	from_filer   r   exportgetvaluer:   r	   r`   persist_from_pcmr   ImportErrorrK   r   )rk   r   r   r   r   r  	pcm_bytesfnamesegbufr   reguids                r<   speaker_registerr  3  sk     DI]] b'')E5>>2B#C	X((D)9:C**,CJJs5J)I ))U^^F-Cq #<kllIC8pqq
!
#C<""9+PXao"p &&9   	XC:RSTRU8VWW	X  j4hii <CF;;<sY   E9D9E9AD 0AE93D= E9	D:%D55D::E9=E6E11E66E9)c__doc__r   r|   logging
contextlibr   typingr   r8   pathlibr   r   r   r   fastapir   r   r   r	   r
   r   r   r   fastapi.responsesr   starlette.websocketsr   pydubr   utils.device_utilsr   r   r   r   utils.speaker_idr   async_transcriber   r   
transcriber   servicesr   r   r   r   r   r   services.async_task_storer   r    r!   r"   r#   r$   	LOG_LEVELbasicConfigr   r%   	getLoggerr   r   setLevel__file__resolveparentsr6   r7   r;   r9   _GZZM_CONFIGr:   r   rI   r   r   rH   r   rK   rL   rz   r   r   r=   rT   rY   rV   r   appr`   rj   r   postr   r   r   r   r  r   rU   r<   <module>r1     s   
   *     	  k k k * /  - ( - Y -   	   ''9gllC D			8	$ GLL9 :H~%%'//25GG			3		1R tyy} 
2 |//DE |''(:<TUV (():C@A<++,A5IJPPRXXZ^xx 4 ;2 
g 
 
 89 1a OWxH 9 9Xc] 9gopsgt 9  zB  CF  zG 9 <@9 j  2 - Cy $T
!%d	22
2 2 #	2
 2 2j 
 Cy $T
!%d	==
= = #	=
 = =@ 	&'s |  (  I) I !IX 
Cy$'
$'$' $' 	$'
 $' $' $'} 
2	1 Ls*   L /LL LL LL