
    Oi>                       d Z ddlmZ ddlmZmZ ddlmZ ddlmZm	Z	 ddl
mZ ddlmZmZmZmZmZmZmZ ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZ ddl m!Z! ddl"m#Z#m$Z$ ddl%m&Z&m'Z' ddl(m)Z)  e)e*          Z+ eddg          Z,	 dFdGd Z-dHd"Z.dd#dId(Z/d)d*dJd,Z0d)d*dKd/Z1e,2                    d0e1          dLd7            Z3 G d8 d9e          Z4e,2                    d:           ed;           ed;          fdMd>            Z5 G d? d@e          Z6e,2                    dA          dNdB            Z7e,8                    dCe1          dOdD            Z9dES )Pu  Document ingestion endpoints – trigger and monitor Celery tasks.

文档入库接口模块。
提供手动触发入库、OA 系统 Webhook 接收文档、权限变更通知，
以及入库任务状态查询等功能。所有入库操作均通过 Celery 异步执行。
    )annotations)datetimetimezone)Path)	AnnotatedAny)uuid4)	APIRouterDependsHTTPExceptionRequest
UploadFileFileForm)	BaseModel)UserContextget_current_user)IngestRequestIngestStatus)settings)IngestTraceRecorder)ESClient)ingest_document_taskupdate_permissions_task)detect_file_typeis_supported)
get_loggerz/ingestingest)prefixtags requestr   doc_idstr	file_pathmetadatadict[str, Any]task_id	file_typereturnNonec                2  K   	 | j         j        j        }t          j        t
          j                                                  }|pi }|                    dd          }	i d|d|                    dd          d|	d|                    dd          d|                    dd          d|                    dd          d|                    dd          d	|                    d	g           d
|                    d
d          d|                    d          pdd|                    dg           d|                    dd          d|                    dd          d|                    dd          d|d|dd|||d}
|j	        
                    t          j        ||
           d{V  t                              d||           dS # t          $ r5}t                              d|t#          |                     Y d}~dS d}~ww xY w)u   Write a minimal 'pending' record to the meta index when a task is queued.

    在任务入队时预写一条 pending 状态的元数据记录，
    使前端在 Worker 真正处理之前就能看到该文档。
    original_filenamer!   r#   title
doc_numberissuing_orgdoc_typedocument_scene_typesubject_wordssignerpublish_dateNacl_idsservice_guide_statusguide_profile_idguide_matter_namer%   r)   statuspending)r(   
created_at
updated_at)indexidbodypending_meta_written)r#   r(   pending_meta_write_failedr#   error)appstate	es_clientr   nowr   utc	isoformatgetrawr>   r   es_meta_indexloggerinfo	Exceptionwarningr$   )r"   r#   r%   r&   r(   r)   rG   now_isometar-   r@   excs               +D:\work\zm-rag\backend\app\api\v1\ingest.py_write_pending_metarV       s     "S%k/9	,x|,,6688~2 HH%8"== 
f 
TXXgr** 
  !2 
 $((<44	 

 488M266 
 R00 
 "488,A2#F#F 
 TXXor:: 
 dhhx,, 
 DHH^44< 
 txx	2.. 
 #DHH-CR$H$H 
 );R @ @ 
  *=r!B!B 
  
  ! 
" i# 
$ !!) 
  
  
, m!!( " 
 
 	
 	
 	
 	
 	
 	
 	

 	*67KKKKK S S S26SRRRRRRRRRSs   GG 
H!*HHr   c                J    | j         j        j        }t          ||||          S )uD   构造入库追踪记录器，用于在 API 层写入追踪事件。)trace_idr#   r(   )rE   rF   rG   r   )r"   r#   r(   rG   s       rU   _make_trace_recorderrY   R   s)     "++5Iy76SZ[[[[    )	file_sizerecordersource_typer[   intc                 K   	 |pi }|                      |||                    dd          |                    dd          |                    dd                     d{V  |r	|dz  dd	nd}|                     d
dd| d|rd| ndz   ||||                    dd          ||                    dd          |                    dd          |                    dd          |                    dd          |                    dg           |                    dd          dd           d{V  dS # t          $ r5}t                              d|t          |                     Y d}~dS d}~ww xY w)z<Create the trace record and write the upload_received event.r-   r!   r.   operator)r]   r)   r-   r.   r`   N   .1fz MBupload_received	completedu   收到u   上传u   ，文件大小 r/   r0   r1   r6   knowledge_category)r]   r)   r[   r-   r#   r.   r/   r0   r1   r6   re   apisummarydetailsservicetrace_initial_write_failedrC   )start_tracerK   recordrP   rN   rQ   r$   )	r\   r#   r&   r)   r]   r[   rS   size_strrT   s	            rU   _write_initial_tracero   \   s     T~2""#"hh':B??((7B''XXj"-- # 
 
 	
 	
 	
 	
 	
 	
 	
 ;DKi9-66666oo{0[000U]4e4Qx4Q4Q4Qcef*&&%)XX.A2%F%F '2.."hh|R88#xxr:: HHZ4488Ir22&*hh/CR&H&H  !  
 
 	
 	
 	
 	
 	
 	
 	
 	
 	
$  T T T3F#c((SSSSSSSSSTs   D3D9 9
E8*E33E8default)queuerq   c                  K   	 |                      ddd|dd          d||dd	           d{V  dS # t          $ r5}t                              d
|t	          |                     Y d}~dS d}~ww xY w)z4Record that the Celery task was queued successfully.task_queuedrd   u   Celery 任务已入队: N   z...r(   rq   rf   rg   trace_task_queued_write_failedrC   )rm   rP   rN   rQ   r$   )r\   r#   r(   rq   rT   s        rU   _record_task_queuedrw      s      Xoo;?wrr{??? '%88	  
 
 	
 	
 	
 	
 	
 	
 	
 	
 	
  X X X7cRUhhWWWWWWWWWXs   /5 
A4*A//A4rD   rP   c          	     J  K   	 t          |          }|                     ddd|dd          ||ddd|	           d{V  |                     dd|
           d{V  dS # t          $ r5}t                              d|t          |                     Y d}~dS d}~ww xY w)z=Record a queue failure without leaving a false-success trace.rs   failedu   Celery 任务入队失败: Nd   ru   rf   INGEST_TASK_QUEUE_FAILED)rh   ri   rj   
error_codeerror_message)r|   r}   $trace_task_queue_failed_write_failedrC   )r$   rm   finish_tracerP   rN   rQ   )r\   r#   r(   rD   rq   r}   rT   s          rU   _record_task_queue_failedr      s(     ^E

ooG-2EGG '%881'  
 
 	
 	
 	
 	
 	
 	
 	
 ##1' $ 
 
 	
 	
 	
 	
 	
 	
 	
 	
 	

  ^ ^ ^=fTWX[T\T\]]]]]]]]]^s   AA# #
B"-*BB"z/trigger)response_modelr@   r   user1Annotated[UserContext, Depends(get_current_user)]r   c           	     
  K   |j         }|st          dd          t          |          }|                                st          j        |z  }|                                }|                    t          t          j                                                            st          dd          |                                st          dd|           t          |          }t          |          s4t          dd| dd                    t          j                             |j        }t          t                                }t!          | |j        |          }t%          ||j        |j        |d	
           d{V  	 t)          j        |j        ||j        d|           nb# t,          $ rU}	t/          ||j        ||	           d{V  t0                              d|j                   t          dd          |	d}	~	ww xY wt5          | |j        ||j        ||           d{V  t7          ||j        |           d{V  t0                              d|j        ||           t;          |dd          S )u   Queue a document for ingestion.

    手动触发文档入库。校验文件存在性和类型后，将任务发送至 Celery 队列。
      zfile_path is requiredstatus_codedetailz7Invalid file path: must be within the storage directoryzFile not found: Unsupported file type: . Supported: , manual)r]   Nr#   r%   r&   kwargsr(   ingest_task_queue_failedr#        操作失败，请稍后重试ingest_triggered)r#   r(   r)   PENDING        r(   r:   progress)r%   r   r   is_absoluter   file_storage_pathresolveis_relative_toexistsr   r   joinsupported_file_typesnamer$   r	   rY   r#   ro   r&   r   apply_asyncrP   r   rN   	exceptionrV   rw   rO   r   )
r"   r@   r   raw_pathr%   r)   filenamer(   r\   rT   s
             rU   trigger_ingestr      s      ~H M4KLLLLXI  "" ;.:	 !!##I##D)C$D$D$L$L$N$NOO 
L
 
 
 	

  
0h00
 
 
 	
 !++I	"" 
oYooTYYxOlEmEmoo
 
 
 	

 ~H %''llG#GT['BBH
$+t}i         
	_("kdm\\	
 	
 	
 	
 	
  _ _ _'$+wLLLLLLLLL3DKHHH4TUUU[^^	_ gt{HdmWV_
`
````````
hW
=
========
KK"4;S\K]]]   s   	$F. .
H8AHHc                      e Zd ZU dZded<   dZded<   dZded<   dZded<   g Zded	<   dZ	ded
<   g Z
ded<   dZded<   dZded<   g Zded<   dS )WebhookDocumentRequestuo   Request body from OA webhook for new document.

    OA 系统推送新文档时使用的请求体结构。
    r$   r#   r!   r.   r/   r0   	list[str]receiving_orgsr1   r3   r4   r5   r6   N)__name__
__module____qualname____doc____annotations__r.   r/   r0   r   r1   r3   r4   r5   r6    rZ   rU   r   r      s           KKKEOOOOJK "N""""H!M!!!!FLGrZ   r   z/webhook/document.filer   c           
       K   ddl }	 |                    |          }n'# |j        $ r}t          dd|           |d}~ww xY w|                    d          }|st          dd          |                                 d{V }t          |          dz  }|t          j        k    r#t          dd	|d
dt          j         d          |j	        pd}	t          |	          j                            d                                          pd}
|	|d<   t          j        }|                    dd           |d| d|
 z  }	 t!          |d          5 }|                    |           ddd           n# 1 swxY w Y   n'# t$          $ r}t          dd|           |d}~ww xY wt'          |          }t)          |          sJ|                    d           t          dd| dd                    t          j                             t0                              d|t          |          |t5          |                     |j        }t5          t9                                }t;          | ||          }t=          ||||dt          |                     d{V  	 t?          j         |||d|            nn# t$          $ ra}tC          ||||           d{V  |                    d           t0          "                    d!|"           t          d#d$          |d}~ww xY wtG          | |||||           d{V  tI          |||           d{V  ||d%|d&d'S )(zReceive a document from OA system webhook.

    Expects multipart form data:
    - metadata: JSON string of document metadata (with acl_ids)
    - file: Document file (any supported format)

    Saves the file locally and queues ingestion.
    r   Nr   zInvalid metadata JSON: r   r#   zdoc_id is required in metadatara   zFile too large: rb   z	MB (max: zMB)document.binr-   T)parentsexist_ok_tmp_wbi  zFailed to save file: )
missing_okr   r   r   webhook_received)r#   r[   r)   r%   webhook)r]   r[   r   r    webhook_ingest_task_queue_failedr   r   r   queuedz+Document received and queued for processing)r#   r(   r:   r)   message)%jsonloadsJSONDecodeErrorr   rK   readlenr   max_file_size_mbr   r   suffixlstriplowerr   mkdiropenwriterP   r   r   unlinkr   r   rN   rO   r$   r   r	   rY   ro   r   r   r   r   rV   rw   )r"   r&   r   r   	meta_dicter#   contentfile_size_mboriginal_nameextfile_dirtmp_pathfr)   r   r(   r\   rT   s                      rU   webhook_receive_documentr     s      KKKZJJx((		 Z Z Z4Qa4Q4QRRRXYYZ ]]8$$F V4TUUUU IIKKGw<<;/Lh///_l___AZ___
 
 
 	
 M/ZM
}


$
+
+C
0
0
6
6
8
8
AEC &3I!" )HNN4$N///0&003000HX(D!! 	QGGG	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 X X X4OA4O4OPPPVWWX
 !**I	"" 
4(((oYooTYYxOlEmEmoo
 
 
 	

 KKg,,h--     }H %''llG#GVW==H
&)YW         

_($8SS	
 	
 	
 	
 	
  _ _ _'&'3GGGGGGGGG4(((;FKKK4TUUU[^^_ gvxGY
W
WWWWWWWW
h
8
88888888 @  sg    
A=AF E6*F 6E::F =E:>F 
F&F!!F&&K 
L,AL''L,c                  ,    e Zd ZU dZded<   g Zded<   dS )PermissionUpdateRequestuo   Request body for permission change webhook.

    OA 系统推送权限变更时使用的请求体结构。
    r$   r#   r   r6   N)r   r   r   r   r   r6   r   rZ   rU   r   r     s8           KKKGrZ   r   z/webhook/permissionc                   K   t          j        |j        |j                  }t                              d|j        |j        t          |j                             |j        |j        ddS )z)Receive permission update from OA system.)r#   r6   permission_update_queued)r#   r(   	acl_countr   )r#   r(   r:   )r   delayr#   r6   rN   rO   r?   r   )r"   r@   tasks      rU   webhook_update_permissionr     s~       #({  D
 KK"{dl##	     +7  rZ   z/status/{task_id}c           
       K   ddl m}  || t          j                  }|j        dk    rt          | dd          S |j        dk    r/|j        pi }t          | d|                    dd                    S |j        d	k    r|j        pi }|                    d
d          }ddd}|                    |d          }t          | |d|                    d          |                    dd          |                    d                    S |j        dk    r%t          | ddt          |j                            S t          | |j        d          S )u   Check the status of a running or completed ingest task.

    通过 Celery AsyncResult 查询任务状态，将内部状态映射为前端可理解的枚举值。
    r   )AsyncResult)rE   r   r   r   
PROCESSINGr   SUCCESSr:   ry   	COMPLETEDPARTIAL_FAILED)rd   partial_failedFAILEDg      ?rD   deduplicatedFcontent_hash)r(   r:   r   rD   r   r   FAILURE)r(   r:   r   rD   )
celery.resultr   r   rE   rF   r   rO   rK   resultr$   )r(   r   r   r   rO   
raw_status
status_mapmappeds           rU   get_ingest_statusr     s      *)))))[&:&>???F|y  GILLLL		%	%{ bXXj#..
 
 
 	

 
	"	"}"XXh11
$.
 

 
H55((7##.%88.11
 
 
 	
 
	"	"fm$$	
 
 
 	
 <
 
 
 	
rZ   N)r!   )r"   r   r#   r$   r%   r$   r&   r'   r(   r$   r)   r$   r*   r+   )r"   r   r#   r$   r(   r$   r*   r   )r\   r   r#   r$   r&   r'   r)   r$   r]   r$   r[   r^   r*   r+   )
r\   r   r#   r$   r(   r$   rq   r$   r*   r+   )r\   r   r#   r$   r(   r$   rD   rP   rq   r$   r*   r+   )r"   r   r@   r   r   r   r*   r   )r"   r   r&   r$   r   r   r*   r'   )r"   r   r@   r   r*   r'   )r(   r$   r   r   r*   r   ):r   
__future__r   r   r   pathlibr   typingr   r   uuidr	   fastapir
   r   r   r   r   r   r   pydanticr   app.api.depsr   r   app.api.schemas.ingestr   r   
app.configr   app.core.ingest_trace_recorderr   app.infrastructure.es_clientr   app.tasks.ingest_taskr   r   app.utils.file_typer   r   app.utils.loggerr   r   rN   routerrV   rY   ro   rw   r   postr   r   r   r   r   rK   r   r   rZ   rU   <module>r      s    # " " " " " ' ' ' ' ' ' ' '       ! ! ! ! ! ! ! !       V V V V V V V V V V V V V V V V V V       6 6 6 6 6 6 6 6 > > > > > > > >       > > > > > > 1 1 1 1 1 1 O O O O O O O O > > > > > > > > ' ' ' ' ' '	H			)8*	5	5	5 /S /S /S /S /Sd\ \ \ \" 'T 'T 'T 'T 'T 'T^ X X X X X X2 ^ ^ ^ ^ ^ ^: Z55F F F 65FR    Y   "  !! DIItCyyj j j j "!jZ    i    "##   $#0 ==1
 1
 1
 >=1
 1
 1
rZ   