
    =i<*                        d Z ddlZddlZddlZddlZddlmZmZmZ ddl	m
Z
 ddlmZmZmZ ddlmZ ddlZddlmZ  ee                                          j        d         d	z  Z	 e                    d
d          5 Z ej        e          Zddd           n# 1 swxY w Y   n# e$ r i ZY nw xY w ee                    dd                                                    Z  ej!         e"ee ej#                              ej$        e%          Z&e&'                     e"ee ej#                              e(e                    dd                    Z) e(e                    dd                    Z* e(e                    dd                    Z+deee         eee,         f         fdZ-de(fdZ.de(dee,         de/fdZ0d%dede1deee         eee,         e(f         fdZ2d&d!ed"e1d#e/de,fd$Z3dS )'u   文件转写核心逻辑。

职责概览：
- 对输入音频进行分块转写并拼接文本与时间戳。
- 当单块转写失败/空结果时自动降级为更小子块重试。
- 提供可配置的空结果容错模式（fail_on_empty）。
    N)ListOptionalTuple   )	get_model)split_audio_to_chunks!insert_punctuations_into_segmentsmerge_sentences_from_tokens)AudioSegment)Pathzgzzm_config.jsonrzutf-8)encoding	log_levelINFO)level&asr_min_chunk_sec_for_partial_fallback   !asr_partial_result_coverage_ratiog?asr_partial_result_tail_gap_sec   returnc                 ~   t          | dd          p+t          | t                    r|                     d          nd}t          | dd          p+t          | t                    r|                     d          nd}t          | dd          p-t          | t                    r|                     d          ndpg }|||fS )z=Normalize transcribe result to (language, text, time_stamps).languageNtext time_stamps)getattr
isinstancedictget)firstr   text_valts_lists       src\gzzm\transcribe.py_extract_transcribe_payloadr%   (   s    uj$//oZX]_cMdMd4nEIIj4I4I4IjnHufd++eZPUW[E\E\0d		&0A0A0AbdHe]D11zR\]bdhRiRi6seii6N6N6NoszxzGXw&&    c           
      ^   t          | t                    r?|                     d|                     d|                     dd                              }n7t          | dd           }|t          | dd           }|t          | dd          }	 t	          |pd          S # t
          $ r Y dS w xY w)Nendfinishend_time        )r   r   r    r   float	Exception)ts_itemend_vals     r$   _extract_end_secondsr0   0   s    '4   8++eW[[7;;zSV;W;W%X%XYY'5$//?gx66G?gz377GW^$$$   sss   B 
B,+B,chunk_durationr#   c                     | t           k     rdS |sdS t          d |D             d          }|dk    rdS |t          d|           z  }t          d| |z
            }|t          k     p
|t          k    S )NFc              3   4   K   | ]}t          |          V  d S Nr0   .0tss     r$   	<genexpr>z6_should_fallback_for_partial_result.<locals>.<genexpr>E   s+      BBB+B//BBBBBBr&   r+   defaultr   TgMbP?)"MIN_CHUNK_SEC_FOR_PARTIAL_FALLBACKmaxPARTIAL_RESULT_COVERAGE_RATIOPARTIAL_RESULT_TAIL_GAP_SEC)r1   r#   max_end_seccoverage_ratiotail_gap_secs        r$   #_should_fallback_for_partial_resultrC   ?   s    :::u uBB'BBBCPPPKat 3un#=#==NsN[899L99g\Lg=ggr&      
chunk_pathfallback_secondsc                 n	   t                      }|t          d          t          j        |           }t	          |          dz  }	 |                    | dd          }|rn|d         }t          |          \  }}}	|s|	rOt          ||	          r9t          d |	D             d	          }
t          
                    d
| ||
           n|||	|fS t          
                    d|            n+# t          $ r t                              d|            Y nw xY wt          dt          |                    dz  }d}g }g }t          dt	          |          |          D ]B}||||z            }t          j        dd          5 }|                    |j        d           |j        }ddd           n# 1 swxY w Y   	 |                    |dd          }|s'	 	 t'          j        |           # t          $ r Y w xY w|d         }t          |          \  }}}|p|}|r|                    |           |dz  }|D ]}t-          |t.                    rt1          |                    d|                    d|                    dd                                        }t1          |                    d|                    d|                    d|                                        }|                    d|                    dd                    }nt5          |dd          }t5          |dd          }| t7          |d          rt5          |d          }| t7          |d          rt5          |d          }t1          |pd          }t1          |p|          }t5          |dt5          |dd                    }|                    t9          ||z   d          t9          ||z   d          |d           nR# t          $ rE t                              d|           Y 	 t'          j        |           # t          $ r Y w xY ww xY w	 	 t'          j        |           	# t          $ r Y w xY w# 	 t'          j        |           w # t          $ r Y w w xY wxY w|d                    |          ||fS ) zTranscribe one chunk; if it fails, split it into smaller pieces and retry.

    Returns (language, text, time_stamps, chunk_duration_sec).
    N   模型尚未加载完成g     @@T)audior   return_time_stampsr   c              3   4   K   | ]}t          |          V  d S r4   r5   r6   s     r$   r9   z6_run_transcribe_chunk_with_fallback.<locals>.<genexpr>a   s+      &R&RB';B'?'?&R&R&R&R&R&Rr&   r+   r:   u]   检测到疑似截断结果，降级为小分片重试: chunk=%s duration=%.3fs max_end=%.3fsu8   切片转写结果为空，降级为小分片重试：%su;   模型对切片转写失败，降级为小分片重试：%sr   i  Fz.wav)deletesuffixwav)formatstartbegin
start_timer(   r)   r*   r   wordr      rP   r(   r   u#   降级子切片仍转写失败：%s)r   RuntimeErrorr   	from_filelen
transcriber%   rC   r=   loggerwarningr-   	exceptionintrangetempfileNamedTemporaryFileexportnameosremoveappendr   r   r,   r    r   hasattrroundjoin)rE   rF   MODELsegr1   resultsr!   r   r"   r#   r@   fallback_msmerged_languagemerged_texts	merged_tssub_start_mssub_segsub_tmpsub_pathsub_results	sub_firstsub_langsub_textsub_ts_list
sub_offsetr8   rP   r(   text_seg
start_attrend_attrs                                  r$   #_run_transcribe_chunk_with_fallbackr}   N   s   
 KKE}5666

 
,
,CXX&Nd""dW["\\ 	GAJE*Ee*L*L'Hh 
G7 
G6~wOO 	G"%&R&R'&R&R&R\_"`"`"`KNNw"&#	    $XwFFQS]^^^^ d d dVXbcccccd a-..//$6K%)O LIaS;77 , ,lL;$>>?(fEEE 	$NN7<N666|H	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$'	**D]a*bbK B	(####   E $AI.I).T.T+Hh-9O .##H---%.J!  b$'' L!"&&"&&"&&WXBYBY2Z2Z"["[\\EubffXrvvjRW?X?X.Y.Y Z Z[[C!vvfbffVR.@.@AAHH!(Wd!;!;J&r5$77H!)gb,.G.G)%,R%>%>
'GB
,C,C'#*2z#:#:!*"344E 1E22C&r672vr3J3JKKH  "5:#5q99 z!1155$" "    !*  	 	 	BHMMM	(####   	+2	(####   	(####    BGGL119nLLs   BC+ C+ +%DD $F00F4	7F4	<O9G--
G:9G:=G;O98Q49%QQ4 P66
QQQQ4Q##
Q10Q14R6R
R
R	RR	RTtmp_pathchunk_secondsfail_on_emptyc                 D   t                      }|t          d          t          j                    }t          | |          }t                              dt          |          |           g }d}d}g }		 |D ] }
t          |
|dk    r!t          dt          d|dz                      nd	
          \  }}}}|p|}|r|
                    |           |D ]}t          |t                    rt          |                    d|                    d|                    dd                                        }t          |                    d|                    d|                    d|                                        }|                    d|                    dd                    }nt          |dd          }t          |dd          }| t!          |d          rt          |d          }| t!          |d          rt          |d          }t          |pd          }t          |p|          }t          |dt          |dd                    }|	
                    t#          ||z   d          t#          ||z   d          |d           ||z  }"t          j                    |z
  }|s|r#t          dt          |           d| d          t                              dt          |                     |dt#          |d          t          |          g g d|D ]'}	 t'          j        |           # t*          $ r Y $w xY wS d                    |          }t/          |	|          }t1          |          }|d                    |                                          t#          |d          t          |          ||d|D ]'}	 t'          j        |           # t*          $ r Y $w xY wS # |D ]'}	 t'          j        |           # t*          $ r Y $w xY ww xY w)a  Transcribe a file at `tmp_path` using the global model.

    Returns a dict compatible with the previous JSONResponse payload.
    This function will remove any temporary chunk files it created, but
    does not remove `tmp_path` (caller is responsible).
    NrH   u    切分后得到 %d 个 chunk: %sr+   r      r      rD   )rF   rP   rQ   rR   r(   r)   r*   r   rS   r   rT   rU   u5   识别失败: 未从任何切片获得文本 (chunks=z, paths=)u8   本次转写未获得文本，返回空结果: chunks=%d   )r   r   time_secchunksr   time_stamps_tokens )r   rV   timer   rZ   debugrX   r}   r=   minre   r   r   r,   r    r   rf   rg   r[   rc   rd   r-   rh   r	   r
   strip)r~   r   r   ri   start_inferchunk_pathstextsr   start_offsetall_time_stampsrE   
chunk_lang
chunk_textchunk_ts_listr1   r8   rP   r(   rz   r{   r|   
infer_timep	full_textr   merged_time_stampss                             r$   transcribe_audio_filer      s    KKE}5666)++K'-@@K
LL3S5E5E{SSSE"HL"$OE%  	+  	+JDgHUXYHYHYQB0B(C(C!D!D!D_aE E EAJ
M>
  -:H )Z(((#  b$'' L!"&&"&&"&&WXBYBY2Z2Z"["[\\EubffXrvvjRW?X?X.Y.Y Z Z[[C!vvfbffVR.@.@AAHH!(Wd!;!;J&r5$77H!)gb,.G.G)%,R%>%>
'GB
,C,C'#*2z#:#:!*"344E 1E22C&r672vr3J3JKKH&&"5<#7;; |!3Q77$( (     N*LLY[[;.
 	 E"  $D[^_j[k[k  $D  $D  vA  $D  $D  $D  E  E  ENNUWZ[fWgWghhh$!*a00k**!&( .  	 	A	!   	 GGENN	>PYZZ89KLL !HHUOO))++j!,,+&&-"4
 
  	 	A	!   	 	 	A	!   	s\   5J,O3 &L;;
MMA<O3 O""
O/.O/3P9PP
P	PP	P)rD   )T)4__doc__rc   r   loggingr_   typingr   r   r   modelr   utils.audio_utilsr   r	   r
   pydubr   jsonpathlibr   __file__resolveparents_config_pathopen_fload_GZZM_CONFIGr-   strr    upper	LOG_LEVELbasicConfigr   r   	getLogger__name__rZ   setLevelr,   r<   r>   r?   r   r%   r0   boolrC   r]   r}   r    r&   r$   <module>r      s    
			    ( ( ( ( ( ( ( ( ( (       t t t t t t t t t t              tH~~%%''/25GG			3		1	1 %R ty}}% % % % % % % % % % % % % % %   LLL C  f5566<<>>	  '''9glCC D D D D		8	$	$ GL99 : : : &+U<+;+;<dfh+i+i%j%j " %l&6&67Z\_&`&` a a #eL$4$45VXY$Z$Z[[ '%sDJ0N*O ' ' ' 'U    h hT
 hW[ h h h hSM SMC SM3 SMX]^fgj^kmprvw{r|  D  _D  YE SM SM SM SMlY YC Y YD Y\` Y Y Y Y Y Ys6   )B)  BB) B!!B) $B!%B) )B32B3