
    ifM                    x   U d Z ddlmZ ddlZddlmZ ddlmZmZ ddl	m
Z
mZmZmZ ddlmZmZ ddlmZ dd	lmZmZ dd
lmZmZmZmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$  e$e%          Z& e
ddg          Z'dWdZ(e')                    de          dXd"            Z*i d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d4d6d7d8d9d:d;d<d=d>d=d?d@Z+dAe,dB<   e')                    dC          dYdF            Z-e')                    dG          dYdH            Z.e')                    dI          dZdK            Z/e')                    dL          d[dN            Z0e')                    dOe          d\dQ            Z1e'2                    dO          d]dS            Z3e')                    dTe          d^dV            Z4dS )_u   Document detail, versions, graph, and file download endpoints.

文档详情接口模块。
提供文档元数据查询、同内容多版本列表、原始文件下载、
文档删除（含共享内容处理）以及单文档知识图谱子图查询。
    )annotationsN)Path)	AnnotatedAny)	APIRouterDependsHTTPExceptionRequest)FileResponseResponse)NotFoundError)UserContextget_current_user)DocumentDetailDocumentVersionGraphSubDataVersionListResponse)settings)PermissionService)ESClient)RedisClient)
get_loggerz	/documentdocument)prefixtagsdoc_acl_ids	list[str]user_acl_tokensreturnboolc                b    | sdS t          t          |          t          |           z            S )u4  Return True if the user has access to a document.

    Access is granted when:
      - The document has no acl_ids (public), OR
      - Any of the user's tokens appears in the document's acl_ids.

    ACL 权限校验：无 acl_ids 视为公开文档，否则要求用户令牌与文档 ACL 有交集。
    T)r    set)r   r   s     -D:\work\zm-rag\backend\app\api\v1\document.py_check_acl_accessr$   $   s4      tO$$s;'7'77888    z/versions/{content_hash})response_modelcontent_hashstruser1Annotated[UserContext, Depends(get_current_user)]requestr
   r   c           	     >  K   |j         j        j        }|j         j        j        }t	          |          }|                    |           d{V }|                                }|j                            t          j
        ddd| iidddiig|gdid	g d
dddiigd           d{V }t          |t                    r|n|j        }	|	                    di                               dg           }
d |
D             }t          | |t!          |                    S )zList all document versions sharing the same content, filtered by user ACL.

    Returns only versions the current user has permission to see.
    redis_clientNr    termr'   status	completed)mustfilteri  doc_idtitle
doc_numberissuing_orgdoc_typepublish_datesignerr:   orderdesc)querysize_sourcesort)indexbodyhitsc                   g | ]}t          |d                              d|d                   |d                              dd          |d                              d          |d                              d          |d                              d          |d                              d          |d                              d	          
          S )r@   r5   _idr6    r7   r8   r9   r:   r;   r4   )r   get).0hits     r#   
<listcomp>z!list_versions.<locals>.<listcomp>a   s         	y>%%hE
;;i.$$Wb119~)),77I**=99^''
33Y++N;;y>%%h//	
 	
 	
  r%   )r'   versionstotal)appstate	es_clientr.   r   resolvebuild_es_filterrawsearchr   es_meta_index
isinstancedictrC   rH   r   len)r'   r)   r+   rP   r.   perm_serviceperm
acl_filterresprS   rD   rL   s               r#   list_versionsr]   8   s      "++5I ' 1 >L$,???L%%d++++++++D%%''J%%$ .,!?@(K!89  *l     %w&789
 
 &        D( T4((
7$$diC7762""62..D    H !(mm   r%   pdfapplication/pdfdoczapplication/msworddocxzGapplication/vnd.openxmlformats-officedocument.wordprocessingml.documentxlszapplication/vnd.ms-excelxlsxzAapplication/vnd.openxmlformats-officedocument.spreadsheetml.sheetpptzapplication/vnd.ms-powerpointpptxzIapplication/vnd.openxmlformats-officedocument.presentationml.presentationpngz	image/pngjpgz
image/jpegjpegtiffz
image/tiffbmpz	image/bmptxtz
text/plainmdztext/markdownmarkdowncsvztext/csvzdict[str, str]_FILE_TYPE_MIMEz/pdf/{doc_id}r5   r   c                4   K   t          | ||           d{V S )zKDownload the PDF file for a document (legacy endpoint, redirects to /file).N)download_file)r5   r)   r+   s      r#   download_pdfrr      s,       vtW555555555r%   z/file/{doc_id}c                  K   |j         j        j        }|j         j        j        }	 |j                            t          j        |            d{V }nk# t          $ r t          dd|  d          t          $ r@}t                              d| t          |                     t          d	d
          d}~ww xY wt          |t                    r|n|j        }|                    di           }t#          |          }	|	                    |           d{V }
|                    dg           }t'          ||
j                  st          dd          |                    dd          }|                    dd          }|st          dd          t+          j        d|          st          dd          t          j        | d| z  }|                                s9t          j        | dz  }|                                st          dd          d}|                    d|           }t+          j        dd|          }t4                              |d          }| d| }t7          t          |          ||          S ) zDownload the original file for a document.

    Checks ACL permissions before allowing download.
    Files are stored by content_hash under ``settings.file_storage_path``.
    rB   idN  	Document 
 not foundstatus_codedetailes_get_document_errorr5   error  Internal server errorr@   r-   acl_ids  z'No permission to download this documentr'   rG   	file_typer^   zFile not available	[a-f0-9]+  Invalid content hash format..pdfFile not found on diskr6   z[<>:"/\\|?*\x00-\x1f]_application/octet-stream)path
media_typefilename)rN   rO   rP   r.   rS   rH   r   rU   r   r	   	Exceptionloggerr~   r(   rV   rW   rC   r   rQ   r$   
acl_tokensre	fullmatchfile_storage_pathexistssubro   r   )r5   r)   r+   rP   r.   responseerS   sourcerY   rZ   r   r'   r   	file_pathr6   r   r   s                     r#   rq   rq      s      "++5I ' 1 >L	M"**( + 
 
 
 
 
 
 
 
  T T T4R4R4R4RSSSS M M M,V3q66JJJ4KLLLLM !400
C((hmCWWY##F %,???L%%d++++++++D**Y++K[$/:: _4]^^^^ ::nb11L

;..I J4HIIII <l33 S4QRRRR*-J-Jy-J-JJI .L1F1F1FF	!! 	RC8PQQQQ	JJw''EF+S%88E $$Y0JKKJ%%)%%H^^      ,A (B;;;B66B;z/preview/{doc_id}r   c                  K   |j         j        j        }	 |j                            t
          j        |            d{V }nk# t          $ r t          dd|  d          t          $ r@}t                              d| t          |                     t          d	d
          d}~ww xY wt          |t                    r|n|j        }|                    di           }|                    dd          }|                    dd          }|st          dd          t!          j        d|          st          dd          t
          j        | d| z  }	|	                                s9t
          j        | dz  }	|	                                st          dd          d}|dk    rt)          t          |	          d          S d| d| }
t
          j         d}|
||d}	 ddl}|                    t
          j        d          4 d{V }|                    ||           d{V }ddd          d{V  n# 1 d{V swxY w Y   |j        dk    rVt                              d | |j        |j        dd         !           t          |j        d"|j        dd                    |j                            d#d$          }|j        }d$|v rI|                    d%d&'          }|                    | d(d)| d*          }|                     d%          }tC          ||+          S # t          $ r@}t                              d,| t          |                     t          d-d.          d}~ww xY w)/uU  Preview a document by converting it to HTML (or serving PDF directly).

    通过 Java converter 服务将文档转换为 HTML 预览。PDF 直接返回，其他格式转为 HTML。
    Proxies to the Java converter's /api/preview endpoint.
    无需 JWT 认证——预览内容本身不包含敏感数据，文档 ID 不可枚举。
    rt   Nrv   rw   rx   ry   r|   r}   r   r   r@   r'   rG   r   r^   zFile not available for previewr   r   r   r   r   r   r_   )r   r   z/data/files/z/api/preview)filePathexthashr   timeoutproxy)params   preview_converter_error)r5   r0   rC   zPreview conversion failed: content-typez	text/htmlzutf-8replace)errorsz_files/z /api/ai/v1/document/preview-res//contentr   preview_request_error  zPreview service unavailable)"rN   rO   rP   rS   rH   r   rU   r   r	   r   r   r~   r(   rV   rW   rC   r   r   r   r   r   converter_base_urlhttpxAsyncClientconverter_timeoutrz   warningtextheadersr   decoder   encoder   )r5   r+   rP   r   r   rS   r   r'   r   r   docker_file_pathpreview_urlr   r   clientr\   content_type	html_body	html_texts                      r#   preview_documentr      s      "++5I	M"**( + 
 
 
 
 
 
 
 
  T T T4R4R4R4RSSSS M M M,V3q66JJJ4KLLLLM !400
C((hmCWWY##F ::nb11L

;..I V4TUUUU<l33 S4QRRRR*-J-Jy-J-JJI .L1F1F1FF	!! 	RC8PQQQQ	 EY(
 
 
 	
 Al@@Y@@0>>>K$ F'
$$X-Gt$TT 	@ 	@ 	@ 	@ 	@ 	@ 	@X^K????????D	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ s""NN)'Ytt_	       ,FTYtt_FF   
 |''DDL	 ,&&!(((CCI!))(((B<BBB I "((11I#
 
 
 	
  
 
 
,V3q66JJJ0
 
 
 	

sT   ,A (B**;B%%B*)+L6 I2L6 
IL6 IC#L6 6
N  ;M;;N z+/preview-res/{content_hash}/{filename:path}r   c           	       K   t          j        d|           st          dd          d|v s|                    d          rt          dd          t          j         d|  d| }	 d	d
l}|                    dd
          4 d
{V }|                    |           d
{V }d
d
d
          d
{V  n# 1 d
{V swxY w Y   |j	        dk    rt          |j	        d          |j
                            dd          }t          |j        |          S # t          $ r  t          $ rA}t                              d| |t!          |                     t          dd          d
}~ww xY w)u  Proxy sub-resources (images, CSS, HTML tabs) from converter preview cache.

    转换后 HTML（如 Excel）引用的子资源（图片、样式表、子页面）通过此端点代理。
    无需认证——资源本身不包含敏感信息，且路径中 hash 不可猜测。
    r   r   zInvalid content hashry   z..r   zInvalid filenamez/api/preview/resource/r   Ng      >@r   r   zResource not foundr   r   r   preview_resource_error)r   r   r~   r   zResource unavailable)r   r   r	   
startswithr   r   r   r   rH   rz   r   r   r   r   r   r~   r(   )r'   r   resource_urlr   r   r\   r   r   s           r#   preview_resourcer   A  s<      <l33 L4JKKKKx8..s334FGGGG1bbbbX`bbLL$$T$>> 	2 	2 	2 	2 	2 	2 	2&L11111111D	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2s""D,<EYZZZZ|''8RSSFFFF    L L L-L8[^_`[a[abbb4JKKKKLs=   (!D 	B7%D 7
CD CAD E/.<E**E/z	/{doc_id}r   c           	       K   |j         j        j        }|j         j        j        }	 |j                            t          j        |            d{V }nk# t          $ r t          dd|  d          t          $ r@}t                              d| t          |                     t          d	d
          d}~ww xY wt          |t                    r|n|j        }|                    di           }t#          |          }	|	                    |           d{V }
|                    dg           }t'          ||
j                  st          dd          t+          d!i d|                    d|           d|                    dd          d|                    dd          d|                    d          d|                    d          d|                    d          d|                    dg           d|                    d          d|                    d          d|                    d          d|                    d          d|                    d          d|                    d          d|                    d          d|                    d          d |                    d           S )"z}Retrieve full metadata for a single document.

    Checks that the user has permission to view the document via acl_ids.
    rt   Nrv   rw   rx   ry   r|   r}   r   r   r@   r-   r   r   #No permission to view this documentr5   r'   rG   r6   r7   r8   r9   subject_wordsr;   r:   summarychunk_count
page_countr   r   
created_at
updated_at )rN   rO   rP   r.   rS   rH   r   rU   r   r	   r   r   r~   r(   rV   rW   rC   r   rQ   r$   r   r   )r5   r)   r+   rP   r.   r   r   rS   r   rY   rZ   r   s               r#   get_documentr   d  sF      "++5I ' 1 >L	M"**( + 
 
 
 
 
 
 
 
  T T T4R4R4R4RSSSS M M M,V3q66JJJ4KLLLLM !400
C((hmCWWY##F %,???L%%d++++++++D**Y++K[$/:: [4YZZZZ   zz(F+++ZZ333 jj"%%% ::l+++	
 JJ}--- J''' jj"555 zz(### ZZ/// 

9%%% JJ}--- ::l+++ **[))) **[))) ::l+++  ::l+++! r   dict[str, Any]c                  K   |j         j        j        }|j         j        j        }|j         j        j        }	 |j                            t          j        |            d{V }nk# t          $ r t          dd|  d          t          $ r@}t                              d| t          |                     t          d	d
          d}~ww xY wt          |t                     r|n|j        }|                    di           }	t%          |          }
|
                    |           d{V }|	                    dg           }t)          ||j                  st          dd          |                    |            d{V }|	                    dd          }|r|                    dd          dk    r|	                    dd          }t          j        | d| z  }	 |                    d           |dk    r(t          j        | dz  }|                    d           d|d<   n# t          $ r d|d<   Y nw xY wd}	 |                    |            d{V }|                    dd          }nA# t          $ r4}t                              d| t          |                     Y d}~nd}~ww xY w||d<   t          j        d!d | i| d | i|S )"zDelete a document, handling shared content correctly.

    If other documents share the same content_hash, only the meta record
    is removed and the chunks' ACL is recomputed.  If this is the last
    reference, chunks and PDF are deleted entirely.
    rt   Nrv   rw   rx   ry   r|   r}   r   r   r@   r-   r   r   z%No permission to delete this documentr'   rG   deleted_chunksr   r   r^   r   T)
missing_okr   file_deletedFdeleted_nodesdelete_neo4j_errordeleted_graph_nodesdocument_deletedr5   )r   )rN   rO   rP   r.   neo4j_clientrS   rH   r   rU   r   r	   r   r   r~   r(   rV   rW   rC   r   rQ   r$   r   delete_documentr   unlinkdelete_document_graphr   info)r5   r)   r+   rP   r.   r   r   r   rS   r   rY   rZ   r   resultr'   r   r   
legacy_pdfr   neo4j_resultexcs                        r#   r   r     s      "++5I ' 1 >L;$1L	M"**( + 
 
 
 
 
 
 
 
  T T T4R4R4R4RSSSS M M M,V3q66JJJ4KLLLLM !400
C((hmCWWY##F$,???L%%d++++++++D**Y++K[$/:: ]4[\\\\ ,,V44444444F ::nb11L +

#3Q77!;;JJ{E22	.L1N1N91N1NN		+---E!!%7\:O:O:OO
!!T!222%)F>"" 	+ 	+ 	+%*F>"""	+ L)??GGGGGGGG*..BB L L L+F#c((KKKKKKKKL %8F !
K<<6<V<<<f'''sB   ,A$ $(C;CC"A	H, ,H>=H>1I6 6
J4 *J//J4z/{doc_id}/graphr   c                  K   ddl m} |j        j        j        }|j        j        j        }|j        j        j        }	 |j                            t          j
        |            d{V }n## t          $ r t          dd|  d          w xY wt          |t                    r|n|j        }|                    d	i           }	t!          |
          }
|
                    |           d{V }|	                    dg           }t%          ||j                  st          dd          	 |                    | |j                   d{V }t+          |                    dg           |                    dg                     S # t          $ rE}t,                              d| t1          |                     t+          g g           cY d}~S d}~ww xY w)zReturn the knowledge-graph neighbourhood for a document.

    Queries Neo4j for all entities and relationships connected to this document.
    r   )Neo4jClientrt   Nrv   rw   rx   ry   r@   r-   r   r   r   )r   nodesedges)r   r   graph_query_errorr}   )app.infrastructure.neo4j_clientr   rN   rO   rP   r.   r   rS   rH   r   rU   r   r	   rV   rW   rC   r   rQ   r$   r   query_document_graphr   r   r   r(   )r5   r)   r+   r   rP   r.   r   r   rS   r   rY   rZ   r   
graph_datar   s                  r#   get_document_graphr     sN      <;;;;;!++5I ' 1 >L ' 1 >LT"**( + 
 
 
 
 
 
 
 
  T T T4R4R4R4RSSSST !400
C((hmCWWY##F$,???L%%d++++++++D**Y++K[$/:: [4YZZZZ0'<< = 
 
 
 
 
 
 
 

 .."--.."--
 
 
 	
  0 0 0*6QHHH"B/////////0s*   ,A* * B
)AF 
G:GGG)r   r   r   r   r   r    )r'   r(   r)   r*   r+   r
   r   r   )r5   r(   r)   r*   r+   r
   r   r   )r5   r(   r+   r
   r   r   )r'   r(   r   r(   r   r   )r5   r(   r)   r*   r+   r
   r   r   )r5   r(   r)   r*   r+   r
   r   r   )r5   r(   r)   r*   r+   r
   r   r   )5__doc__
__future__r   r   pathlibr   typingr   r   fastapir   r   r	   r
   fastapi.responsesr   r   opensearchpy.exceptionsr   app.api.depsr   r   app.api.schemas.documentr   r   r   r   
app.configr   app.core.permissionr   app.infrastructure.es_clientr   app.infrastructure.redis_clientr   app.utils.loggerr   __name__r   routerr$   rH   r]   ro   __annotations__rr   rq   r   r   r   deleter   r   r   r%   r#   <module>r      s     # " " " " " 				       ! ! ! ! ! ! ! ! > > > > > > > > > > > > 4 4 4 4 4 4 4 4 1 1 1 1 1 1 6 6 6 6 6 6 6 6                   1 1 1 1 1 1 1 1 1 1 1 1 7 7 7 7 7 7 ' ' ' ' ' '	H			+ZL	9	9	99 9 9 9( &7JKK9 9 9 LK9z#	#	# U# 
%	#
 O# 
*# W# 
;# 
<# L# L# 
;# 
<# 	/# #  
:!#    ( O6 6 6 6 A A A AH   d
 d
 d
 ! d
N 9::L L L ;:LD K775 5 5 875p {B( B( B( B(J l;;+0 +0 +0 <;+0 +0 +0r%   