
    mi                        U d Z ddlmZ ddlZddlZddlmZmZ ddlm	Z	 ddl
mZ ddlZddlmZ  ee          Z e	e                                          j        j        dz  d	z  Z ej        d
          Z ed           G d d                      Zdaded<    ej                    ZddddZddZddZd dZdS )!u  Load graph schema definitions from ``config/graph_schema.yaml``.

Provides a cached, validated view of entity types, relationship types,
normalization rules, and knowledge-category mappings.  All other graph
modules should obtain type definitions through this loader rather than
hard-coding them.

Usage::

    from app.core.graph_schema_loader import get_schema

    schema = get_schema()
    entity_types = schema.entity_types          # list[dict]
    rel_types    = schema.relationship_types    # list[dict]
    norm_rules   = schema.normalization_rules   # dict[str, dict]

图谱 Schema 加载器模块。
从 config/graph_schema.yaml 读取并缓存实体类型、关系类型、
规范化规则和知识分类映射等配置。所有图谱相关模块都应通过
get_schema() 获取类型定义，而非硬编码。
支持按 active_phases 过滤类型定义，实现分阶段功能开放。
    )annotationsN)	dataclassfield)Path)Any)
get_loggerconfigzgraph_schema.yamlz^[A-Za-z_]\w{0,49}$T)frozenc                  B   e Zd ZU dZ ee          Zded<    ee          Zded<    ee	          Z
ded<    ee          Zded	<    ee	          Zd
ed<    ee          Zded<    ee          Zded<   ddZddZddZddZddZd dZddZddZdS )!GraphSchemau   Immutable snapshot of the graph schema configuration.

    图谱 Schema 的不可变快照，包含实体类型、关系类型、规范化规则等配置。
    )default_factoryzlist[dict[str, Any]]entity_typesrelationship_typesdict[str, dict[str, Any]]normalization_rulesz	list[str]document_propertieszdict[str, str]knowledge_category_mapping	doc_typesactive_phasesreturnset[str]c                $    d | j         D             S )z<Return set of active entity-type names (excluding Document).c                    h | ]
}|d          S name .0ets     6D:\work\zm-rag\backend\app\core\graph_schema_loader.py	<setcomp>z0GraphSchema.entity_type_names.<locals>.<setcomp>B   s    777r6
777    r   selfs    r    entity_type_nameszGraphSchema.entity_type_names@   s    77T%67777r"   c                $    d | j         D             S )z-Return set of active relationship-type names.c                    h | ]
}|d          S r   r   r   rts     r    r!   z-GraphSchema.rel_type_names.<locals>.<setcomp>F   s    ===r6
===r"   r   r$   s    r    rel_type_nameszGraphSchema.rel_type_namesD   s    ==T%<====r"   c                $    d | j         D             S )z<Return mapping ``name -> full definition`` for entity types.c                     i | ]}|d          |S r   r   r   s     r    
<dictcomp>z/GraphSchema.entity_type_map.<locals>.<dictcomp>J   s    ;;;26
B;;;r"   r#   r$   s    r    entity_type_mapzGraphSchema.entity_type_mapH   s    ;;):;;;;r"   c                $    d | j         D             S )zBReturn mapping ``name -> full definition`` for relationship types.c                     i | ]}|d          |S r   r   r)   s     r    r/   z,GraphSchema.rel_type_map.<locals>.<dictcomp>N   s    AAA26
BAAAr"   r+   r$   s    r    rel_type_mapzGraphSchema.rel_type_mapL   s    AA)@AAAAr"   entity_namestrdict[str, Any]c                8    | j                             |i           S )z;Return normalization rule for *entity_name*, or empty dict.)r   get)r%   r4   s     r    get_norm_rulezGraphSchema.get_norm_ruleP   s    '++K<<<r"   category_namec                8    | j                             |d          S )z2Map a Chinese knowledge-category name to its code. )r   r8   )r%   r:   s     r    knowledge_category_codez#GraphSchema.knowledge_category_codeT   s    .22="EEEr"   c                2    |                                  dhz  S )z4Return all valid node labels including ``Document``.Document)r&   r$   s    r    all_node_labelszGraphSchema.all_node_labelsX   s    %%'':,66r"   c                *    |                                  S )z)Return all valid relationship type names.)r,   r$   s    r    all_rel_typeszGraphSchema.all_rel_types\   s    ""$$$r"   N)r   r   )r   r   )r4   r5   r   r6   )r:   r5   r   r5   )__name__
__module____qualname____doc__r   listr   __annotations__r   dictr   r   r   r   r   r&   r,   r0   r3   r9   r=   r@   rB   r   r"   r    r   r   /   s         
 */t)D)D)DLDDDD/4uT/J/J/JJJJJ5:U45P5P5PPPPP%*U4%@%@%@@@@@16t1L1L1LLLLL 5666I6666$uT:::M::::8 8 8 8> > > >< < < <B B B B= = = =F F F F7 7 7 7% % % % % %r"   r   zGraphSchema | None_cacheFforce_reloadrL   boolr   c                    t           5  t          | st          cddd           S t                      at          cddd           S # 1 swxY w Y   dS )zReturn the cached :class:`GraphSchema`.

    On first call (or when *force_reload* is ``True``) the YAML file is
    read, validated and cached.  Subsequent calls return the same object.
    N)_cache_lockrJ   _load_and_validaterK   s    r    
get_schemarQ   k   s     
  l        $%%                 s   AAA
A
c                 "    t          d          S )z0Force-reload the schema from disk and return it.TrK   )rQ   r   r"   r    reload_schemarS   z   s    4((((r"   c            	     
   t                                           st          dt                      t          t           dd          5 } t	          j        |           pi }d d d            n# 1 swxY w Y   dD ]U}|                    |          }|<t          |t                    s't          d| dt          |          j                   V|                    d          pd	g}d
|v rd|vrt          d          |                    d          pg }g }|D ]U}|                    dd	          }||v r9t          |                    dd          d           |                    |           V|                    d          pg }	g }
|	D ]U}|                    dd	          }||v r9t          |                    dd          d           |
                    |           V|                    d          pi }|                    d          pg }|                    d          pi }|                    d          pg }t          ||
|||||          }t                               d|d |D             d |
D                        |S )NzGraph schema config not found: rzutf-8)encoding)r   r   zgraph_schema.yaml: 'z' must be a list, got r   phase_0phase_2bphase_2auZ   phase_2b depends on phase_2a — cannot enable phase_2b without phase_2a in active_phases.r   phaser   r<   entity_typer   relationship_typer   r   r   r   )r   r   r   r   r   r   r   graph_schema_loadedc                    g | ]
}|d          S r   r   r   s     r    
<listcomp>z&_load_and_validate.<locals>.<listcomp>   s    888Rbj888r"   c                    g | ]
}|d          S r   r   r)   s     r    r_   z&_load_and_validate.<locals>.<listcomp>   s    ;;;2BvJ;;;r"   )r   r   r   )_SCHEMA_PATHexistsFileNotFoundErroropenyaml	safe_loadr8   
isinstancerG   	TypeErrortyperC   
ValueError_validate_identifierappendr   loggerinfo)fhrawrequired_fieldvaluer   all_entity_typesr   r   rZ   rB   	rel_typesr*   
norm_rules	doc_props
kc_mappingr   schemas                    r    rP   rP      s7      
<l<<
 
 	
 
lC'	2	2	2 7b"nR006B7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
 A  ''Zt%<%<.~ . .E{{+. .  
  #ww77FI;M ]""z'F'F1
 
 	
 $'77>#:#:#@b!L $ $w	**M!! !3!3]CCC### !$(< = = CMI ! !w	**M!! !3!35HIIIR   "%''*?"@"@"FBJ77#899?RI!$)E!F!F!L"J77;//52I!$&%#-#  F KK#88<888;;;;;	     Ms   A++A/2A/r   r5   kindNonec                    | st          d| d          t                              |           s"t          d| d|  dt          j                   d S )NzEmpty name for z in graph_schema.yamlzInvalid z name 'z#' in graph_schema.yaml. Must match )rj   _SAFE_IDENTIFIER_REmatchpattern)r   ry   s     r    rk   rk      s     HF4FFFGGG$$T** 
8t 8 8D 8 8-58 8
 
 	

 
r"   )rL   rM   r   r   )r   r   )r   r5   ry   r5   r   rz   ) rF   
__future__r   re	threadingdataclassesr   r   pathlibr   typingr   re   app.utils.loggerr   rC   rm   __file__resolveparentra   compiler|   r   rJ   rH   LockrO   rQ   rS   rP   rk   r   r"   r    <module>r      s    . # " " " " " 				     ( ( ( ( ( ( ( (              ' ' ' ' ' '	H		tH~~%%''.5@CVV bj!788  $.% .% .% .% .% .% .% .%j " ! ! ! ! in (-      ) ) ) )E E E EP
 
 
 
 
 
r"   