"""Unit tests for Phase 1 (事项图) implementation.

Covers:
  - Schema activation: phase_1 entity and relationship types loaded correctly
  - Prompt generation: Phase 1 rules included when Matter entities are active
  - ID generation: _generate_entity_id() deterministic, type-safe
  - _AUTO_ID_FIELDS coverage

Run:  pytest tests/test_phase1_unit.py -v
"""

from __future__ import annotations

import textwrap
from pathlib import Path
from typing import Any
from unittest.mock import AsyncMock, MagicMock, patch

import pytest
import yaml

from app.core.graph_builder import (
    _AUTO_ID_FIELDS,
    _generate_entity_id,
    GraphBuilder,
)
from app.prompts.entity_extraction import build_system_prompt


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

_SCHEMA_PATH = Path(__file__).resolve().parent.parent / "app" / "config" / "graph_schema.yaml"


def _load_schema_with_phases(phases: list[str]) -> dict:
    """Load the real graph_schema.yaml and override active_phases."""
    with open(_SCHEMA_PATH, encoding="utf-8") as f:
        data = yaml.safe_load(f)
    data["active_phases"] = phases
    return data


def _load_schema_obj(phases: list[str]):
    """Load a GraphSchema object with specific active phases."""
    import tempfile
    import app.core.graph_schema_loader as loader

    data = _load_schema_with_phases(phases)
    tmp = tempfile.NamedTemporaryFile(
        mode="w", suffix=".yaml", delete=False, encoding="utf-8",
    )
    yaml.dump(data, tmp, allow_unicode=True)
    tmp.close()

    original_path = loader._SCHEMA_PATH
    try:
        loader._SCHEMA_PATH = Path(tmp.name)
        schema = loader.get_schema(force_reload=True)
        return schema
    finally:
        loader._SCHEMA_PATH = original_path
        loader.get_schema(force_reload=True)
        Path(tmp.name).unlink(missing_ok=True)


# ===========================================================================
# Test: Schema activation with Phase 1
# ===========================================================================


class TestPhase1SchemaActivation:
    """Verify that enabling phase_1 loads all expected entity/relationship types."""

    def test_phase1_entity_types_loaded(self):
        schema = _load_schema_obj(["phase_0", "phase_1"])
        names = schema.entity_type_names()
        for expected in ("Matter", "Condition", "Material", "TimeLimit", "TargetGroup"):
            assert expected in names, f"{expected} not in entity_type_names"

    def test_phase1_relationship_types_loaded(self):
        schema = _load_schema_obj(["phase_0", "phase_1"])
        names = schema.rel_type_names()
        for expected in (
            "GOVERNS", "HAS_CONDITION", "REQUIRES_MATERIAL",
            "HAS_TIME_LIMIT", "APPLIES_TO_TARGET", "HANDLED_BY",
        ):
            assert expected in names, f"{expected} not in rel_type_names"

    def test_phase1_applies_to_region_loaded(self):
        """APPLIES_TO_REGION should be in phase_1 relationships."""
        schema = _load_schema_obj(["phase_0", "phase_1"])
        names = schema.rel_type_names()
        assert "APPLIES_TO_REGION" in names

    def test_phase0_only_excludes_phase1_entities(self):
        """When only phase_0 is active, Phase 1 entities should NOT be loaded."""
        schema = _load_schema_obj(["phase_0"])
        names = schema.entity_type_names()
        for excluded in ("Matter", "Condition", "Material", "TimeLimit", "TargetGroup"):
            assert excluded not in names, f"{excluded} should not be in phase_0-only schema"

    def test_phase0_only_excludes_phase1_relationships(self):
        schema = _load_schema_obj(["phase_0"])
        names = schema.rel_type_names()
        for excluded in ("GOVERNS", "HAS_CONDITION", "REQUIRES_MATERIAL",
                         "HAS_TIME_LIMIT", "APPLIES_TO_TARGET", "HANDLED_BY"):
            assert excluded not in names, f"{excluded} should not be in phase_0-only schema"

    def test_phase1_entity_count(self):
        """Phase 0+1 should have exactly 8 entity types (3 from P0 + 5 from P1)."""
        schema = _load_schema_obj(["phase_0", "phase_1"])
        assert len(schema.entity_type_names()) == 8


# ===========================================================================
# Test: Prompt generation with Phase 1
# ===========================================================================


class TestPhase1PromptGeneration:
    """Verify build_system_prompt includes Phase 1 rules when Phase 1 entities are present."""

    def _make_entity_types(self, include_phase1: bool = True) -> list[dict]:
        base = [
            {"name": "Organization", "description": "政府机关"},
            {"name": "Region", "description": "行政地域"},
            {"name": "PolicyTheme", "description": "政策主题"},
        ]
        if include_phase1:
            base.extend([
                {"name": "Matter", "description": "具体行政事项"},
                {"name": "Condition", "description": "适用条件"},
                {"name": "Material", "description": "申请材料"},
                {"name": "TimeLimit", "description": "时限要求"},
                {"name": "TargetGroup", "description": "适用对象类型"},
            ])
        return base

    def _make_rel_types(self) -> list[dict]:
        return [
            {"name": "ISSUED_BY", "source_labels": ["Document"], "target_labels": ["Organization"], "description": "发布"},
            {"name": "GOVERNS", "source_labels": ["Document"], "target_labels": ["Matter"], "description": "规范事项"},
        ]

    def test_phase1_rules_included_when_matter_present(self):
        prompt = build_system_prompt(self._make_entity_types(True), self._make_rel_types())
        assert "Matter 粒度控制" in prompt
        assert "TargetGroup 与 Organization 的区别" in prompt
        assert "APPLIES_TO_REGION" in prompt
        assert "不要虚构" in prompt

    def test_phase1_rules_absent_when_only_phase0(self):
        prompt = build_system_prompt(self._make_entity_types(False), self._make_rel_types())
        assert "Matter 粒度控制" not in prompt
        assert "不要虚构" not in prompt

    def test_phase1_example_in_prompt(self):
        prompt = build_system_prompt(self._make_entity_types(True), self._make_rel_types())
        assert "政府采购供应商资格审查" in prompt
        assert "REQUIRES_MATERIAL" in prompt

    def test_handled_by_vs_issued_by_distinction(self):
        prompt = build_system_prompt(self._make_entity_types(True), self._make_rel_types())
        assert "HANDLED_BY" in prompt
        assert "ISSUED_BY" in prompt


# ===========================================================================
# Test: _generate_entity_id
# ===========================================================================


class TestGenerateEntityId:
    """Verify _generate_entity_id produces stable, type-safe IDs."""

    def test_deterministic(self):
        id1 = _generate_entity_id("Matter", "建设用地审批")
        id2 = _generate_entity_id("Matter", "建设用地审批")
        assert id1 == id2

    def test_different_names_different_ids(self):
        id1 = _generate_entity_id("Matter", "建设用地审批")
        id2 = _generate_entity_id("Matter", "差旅报销")
        assert id1 != id2

    def test_different_types_different_ids(self):
        """Same name but different label should produce different IDs."""
        id_matter = _generate_entity_id("Matter", "建设用地审批")
        id_target = _generate_entity_id("TargetGroup", "建设用地审批")
        assert id_matter != id_target
        assert id_matter.startswith("matter_")
        assert id_target.startswith("targetgroup_")

    def test_format(self):
        eid = _generate_entity_id("Condition", "注册资本不低于100万元")
        assert eid.startswith("condition_")
        # label_lower + _ + 12 hex chars
        parts = eid.split("_", 1)
        assert len(parts) == 2
        assert len(parts[1]) == 12

    def test_uses_normalized_name(self):
        """ID should be based on the exact input (normalized) name."""
        # Whitespace-stripped vs not
        id_clean = _generate_entity_id("Material", "营业执照副本")
        id_spaces = _generate_entity_id("Material", " 营业执照副本 ")
        # These are different inputs, so different IDs (normalization happens before calling this)
        assert id_clean != id_spaces


# ===========================================================================
# Test: _AUTO_ID_FIELDS coverage
# ===========================================================================


class TestAutoIdFields:
    """Verify _AUTO_ID_FIELDS covers all Phase 1 entities."""

    def test_all_phase1_entities_covered(self):
        expected = {"Matter", "Condition", "Material", "TimeLimit", "TargetGroup"}
        assert set(_AUTO_ID_FIELDS.keys()) == expected

    def test_field_names(self):
        assert _AUTO_ID_FIELDS["Matter"] == "matter_id"
        assert _AUTO_ID_FIELDS["Condition"] == "condition_id"
        assert _AUTO_ID_FIELDS["Material"] == "material_id"
        assert _AUTO_ID_FIELDS["TimeLimit"] == "time_limit_id"
        assert _AUTO_ID_FIELDS["TargetGroup"] == "target_group_id"

    def test_phase0_entities_not_in_auto_id(self):
        for label in ("Organization", "Region", "PolicyTheme", "Document"):
            assert label not in _AUTO_ID_FIELDS


# ===========================================================================
# Test: Auto ID generation in graph builder pipeline
# ===========================================================================


@pytest.mark.asyncio
class TestAutoIdInPipeline:
    """Verify that _write_to_neo4j auto-generates *_id for Phase 1 entities."""

    async def test_matter_gets_auto_id(self):
        """When LLM returns a Matter entity without matter_id,
        the pipeline should auto-generate one."""
        llm_resp = {
            "entities": [
                {"type": "Matter", "name": "建设用地审批", "properties": {"description": "审批建设用地"}},
            ],
            "relations": [
                {
                    "source_type": "Document",
                    "source_name": "CURRENT_DOC",
                    "relation": "GOVERNS",
                    "target_type": "Matter",
                    "target_name": "建设用地审批",
                },
            ],
            "referenced_doc_numbers": [],
        }

        llm_mock = AsyncMock()
        llm_mock.chat_json = AsyncMock(return_value=llm_resp)
        neo4j_mock = AsyncMock()
        neo4j_mock.merge_document_graph = AsyncMock()

        builder = GraphBuilder(llm_client=llm_mock, neo4j_client=neo4j_mock)

        with patch("app.core.graph_builder.get_schema") as mock_schema:
            schema = MagicMock()
            schema.entity_type_names.return_value = {
                "Organization", "Region", "PolicyTheme",
                "Matter", "Condition", "Material", "TimeLimit", "TargetGroup",
            }
            schema.rel_type_names.return_value = {
                "ISSUED_BY", "APPLIES_TO_REGION", "BELONGS_TO_THEME",
                "BASED_ON", "AMENDS", "REPEALS", "REFERENCES",
                "GOVERNS", "HAS_CONDITION", "REQUIRES_MATERIAL",
                "HAS_TIME_LIMIT", "APPLIES_TO_TARGET", "HANDLED_BY",
            }
            schema.get_norm_rule.return_value = {}
            mock_schema.return_value = schema

            await builder.build_graph(
                doc_id="doc-p1-001",
                metadata={"title": "建设用地管理办法", "doc_code": "P1-001"},
                content="关于建设用地审批的管理办法。",
            )

        neo4j_mock.merge_document_graph.assert_called_once()
        call_kwargs = neo4j_mock.merge_document_graph.call_args.kwargs
        entities = call_kwargs["entities"]

        # Find the Matter entity
        matters = [e for e in entities if e["label"] == "Matter"]
        assert len(matters) == 1
        matter_props = matters[0]["properties"]
        assert "matter_id" in matter_props
        assert matter_props["matter_id"].startswith("matter_")
        # Verify deterministic
        expected_id = _generate_entity_id("Matter", "建设用地审批")
        assert matter_props["matter_id"] == expected_id


# ===========================================================================
# Test: Matter→Region not generated without explicit constraint
# ===========================================================================


@pytest.mark.asyncio
class TestMatterRegionConstraint:
    """APPLIES_TO_REGION from Matter to Region should only appear
    when the LLM explicitly extracts it (indicating an independent
    geographic constraint). The pipeline itself does not auto-create
    Matter→Region — it passes through what the LLM returns."""

    async def test_no_auto_matter_region(self):
        """When LLM returns Document→Region but NOT Matter→Region,
        the pipeline should NOT auto-create Matter→Region."""
        llm_resp = {
            "entities": [
                {"type": "Region", "name": "广东省", "properties": {}},
                {"type": "Matter", "name": "差旅报销", "properties": {}},
            ],
            "relations": [
                {
                    "source_type": "Document",
                    "source_name": "CURRENT_DOC",
                    "relation": "APPLIES_TO_REGION",
                    "target_type": "Region",
                    "target_name": "广东省",
                },
                {
                    "source_type": "Document",
                    "source_name": "CURRENT_DOC",
                    "relation": "GOVERNS",
                    "target_type": "Matter",
                    "target_name": "差旅报销",
                },
                # NOTE: No Matter→Region relation here
            ],
            "referenced_doc_numbers": [],
        }

        llm_mock = AsyncMock()
        llm_mock.chat_json = AsyncMock(return_value=llm_resp)
        neo4j_mock = AsyncMock()
        neo4j_mock.merge_document_graph = AsyncMock()

        builder = GraphBuilder(llm_client=llm_mock, neo4j_client=neo4j_mock)

        with patch("app.core.graph_builder.get_schema") as mock_schema:
            schema = MagicMock()
            schema.entity_type_names.return_value = {
                "Organization", "Region", "PolicyTheme",
                "Matter", "Condition", "Material", "TimeLimit", "TargetGroup",
            }
            schema.rel_type_names.return_value = {
                "ISSUED_BY", "APPLIES_TO_REGION", "BELONGS_TO_THEME",
                "BASED_ON", "AMENDS", "REPEALS", "REFERENCES",
                "GOVERNS", "HAS_CONDITION", "REQUIRES_MATERIAL",
                "HAS_TIME_LIMIT", "APPLIES_TO_TARGET", "HANDLED_BY",
            }
            schema.get_norm_rule.return_value = {}
            mock_schema.return_value = schema

            await builder.build_graph(
                doc_id="doc-p1-002",
                metadata={"title": "差旅管理办法", "doc_code": "P1-002"},
                content="本办法适用于广东省范围内的差旅报销。",
            )

        call_kwargs = neo4j_mock.merge_document_graph.call_args.kwargs
        rels = call_kwargs["relationships"]

        # Document→Region should exist
        doc_region_rels = [
            r for r in rels
            if r["type"] == "APPLIES_TO_REGION"
            and r["from_label"] == "Document"
        ]
        assert len(doc_region_rels) == 1

        # Matter→Region should NOT exist (LLM didn't return it)
        matter_region_rels = [
            r for r in rels
            if r["type"] == "APPLIES_TO_REGION"
            and r["from_label"] == "Matter"
        ]
        assert len(matter_region_rels) == 0, (
            "Matter→Region should not be auto-created; "
            "only explicit LLM extraction should produce it"
        )
