"""Unit tests for graph_builder.py — doc_code unification and placeholder logic.

These tests mock LLM and Neo4j clients to verify:
  - Placeholder docs use doc_code (not doc_number)
  - REFERENCES relation properties use doc_code
  - own_code deduplication uses doc_code with doc_number fallback
  - REFERENCES dedup against typed relations (BASED_ON/AMENDS/REPEALS)

Run:  pytest tests/test_graph_builder_unit.py -v
"""

from __future__ import annotations

from typing import Any
from unittest.mock import AsyncMock, MagicMock, patch

import pytest

from app.core.graph_builder import GraphBuilder


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


def _make_builder(
    llm_response: dict[str, Any] | None = None,
) -> tuple[GraphBuilder, AsyncMock]:
    """Create a GraphBuilder with mocked LLM and Neo4j clients.

    Returns (builder, neo4j_mock) — the neo4j_mock captures merge_document_graph calls.
    """
    llm_mock = AsyncMock()
    llm_mock.chat_json = AsyncMock(return_value=llm_response or {
        "entities": [],
        "relations": [],
        "referenced_doc_numbers": [],
    })

    neo4j_mock = AsyncMock()
    neo4j_mock.merge_document_graph = AsyncMock()

    builder = GraphBuilder(llm_client=llm_mock, neo4j_client=neo4j_mock)
    return builder, neo4j_mock


def _get_merge_call_args(neo4j_mock: AsyncMock) -> dict[str, Any]:
    """Extract the keyword arguments from the single merge_document_graph call."""
    neo4j_mock.merge_document_graph.assert_called_once()
    return neo4j_mock.merge_document_graph.call_args.kwargs


# ===========================================================================
# Test: Placeholder documents use doc_code, not doc_number
# ===========================================================================


@pytest.mark.asyncio
class TestPlaceholderDocCode:
    """Verify placeholder Document nodes use doc_code exclusively."""

    async def test_placeholder_from_llm_relation_uses_doc_code(self):
        """When LLM returns a Document→Document relation, the placeholder
        should have doc_code (not doc_number) in its properties."""
        llm_resp = {
            "entities": [],
            "relations": [
                {
                    "source_type": "Document",
                    "source_name": "CURRENT_DOC",
                    "relation": "BASED_ON",
                    "target_type": "Document",
                    "target_name": "国发〔2024〕15号",
                },
            ],
            "referenced_doc_numbers": [],
        }
        builder, neo4j_mock = _make_builder(llm_resp)

        with patch("app.core.graph_builder.get_schema") as mock_schema:
            schema = MagicMock()
            schema.entity_type_names.return_value = {"Organization", "Region", "PolicyTheme"}
            schema.rel_type_names.return_value = {
                "ISSUED_BY", "APPLIES_TO_REGION", "BELONGS_TO_THEME",
                "BASED_ON", "AMENDS", "REPEALS", "REFERENCES",
            }
            schema.get_norm_rule.return_value = {}
            mock_schema.return_value = schema

            await builder.build_graph(
                doc_id="doc-001",
                metadata={"title": "测试文件", "doc_code": "TEST-001"},
                content="根据《XX法》制定本办法。",
            )

        args = _get_merge_call_args(neo4j_mock)
        entities = args["entities"]

        # Find the placeholder entity
        placeholders = [
            e for e in entities
            if e["label"] == "Document" and e["properties"].get("is_placeholder")
        ]
        assert len(placeholders) == 1
        ph = placeholders[0]["properties"]

        # Must have doc_code, must NOT have doc_number
        assert ph["doc_code"] == "国发〔2024〕15号"
        assert "doc_number" not in ph
        assert ph["doc_id"] == "ref:国发〔2024〕15号"

    async def test_placeholder_from_regex_scan_uses_doc_code(self):
        """Placeholders created from regex-scanned references should also
        use doc_code (not doc_number) in properties and REFERENCES relation."""
        llm_resp = {
            "entities": [],
            "relations": [],
            "referenced_doc_numbers": [],
        }
        builder, neo4j_mock = _make_builder(llm_resp)

        with patch("app.core.graph_builder.get_schema") as mock_schema:
            schema = MagicMock()
            schema.entity_type_names.return_value = {"Organization", "Region", "PolicyTheme"}
            schema.rel_type_names.return_value = {
                "ISSUED_BY", "APPLIES_TO_REGION", "BELONGS_TO_THEME",
                "BASED_ON", "AMENDS", "REPEALS", "REFERENCES",
            }
            schema.get_norm_rule.return_value = {}
            mock_schema.return_value = schema

            # Content contains a doc number that regex will scan
            await builder.build_graph(
                doc_id="doc-002",
                metadata={"title": "关于落实的通知", "doc_code": "TEST-002"},
                content="根据省政办〔2024〕3号文件精神。",
            )

        args = _get_merge_call_args(neo4j_mock)
        entities = args["entities"]
        rels = args["relationships"]

        # Find REFERENCES relationships
        ref_rels = [r for r in rels if r["type"] == "REFERENCES"]
        if ref_rels:
            for r in ref_rels:
                props = r.get("properties", {})
                # REFERENCES properties should use doc_code, not doc_number
                assert "doc_number" not in props, (
                    f"REFERENCES relation still uses doc_number: {props}"
                )
                if "doc_code" in props:
                    assert isinstance(props["doc_code"], str)

        # Check placeholder entities
        placeholders = [
            e for e in entities
            if e["label"] == "Document" and e["properties"].get("is_placeholder")
        ]
        for ph in placeholders:
            assert "doc_number" not in ph["properties"], (
                f"Placeholder still has doc_number: {ph['properties']}"
            )
            assert "doc_code" in ph["properties"]


# ===========================================================================
# Test: own_code deduplication
# ===========================================================================


@pytest.mark.asyncio
class TestOwnCodeDedup:
    """Verify that doc_code (with doc_number fallback) is used for self-dedup."""

    async def test_no_self_reference_with_doc_code(self):
        """Document should not create a REFERENCES to itself via doc_code match."""
        llm_resp = {
            "entities": [],
            "relations": [],
            "referenced_doc_numbers": ["TEST-003"],
        }
        builder, neo4j_mock = _make_builder(llm_resp)

        with patch("app.core.graph_builder.get_schema") as mock_schema:
            schema = MagicMock()
            schema.entity_type_names.return_value = {"Organization", "Region", "PolicyTheme"}
            schema.rel_type_names.return_value = {
                "ISSUED_BY", "APPLIES_TO_REGION", "BELONGS_TO_THEME",
                "BASED_ON", "AMENDS", "REPEALS", "REFERENCES",
            }
            schema.get_norm_rule.return_value = {}
            mock_schema.return_value = schema

            await builder.build_graph(
                doc_id="doc-003",
                metadata={"title": "自引用测试", "doc_code": "TEST-003"},
                content="正文。",
            )

        args = _get_merge_call_args(neo4j_mock)
        ref_rels = [r for r in args["relationships"] if r["type"] == "REFERENCES"]
        assert len(ref_rels) == 0, "Should not create REFERENCES to self"

    async def test_doc_number_fallback_for_self_dedup(self):
        """When metadata has doc_number but not doc_code, self-dedup should still work."""
        llm_resp = {
            "entities": [],
            "relations": [],
            "referenced_doc_numbers": ["OLD-NUM-001"],
        }
        builder, neo4j_mock = _make_builder(llm_resp)

        with patch("app.core.graph_builder.get_schema") as mock_schema:
            schema = MagicMock()
            schema.entity_type_names.return_value = {"Organization", "Region", "PolicyTheme"}
            schema.rel_type_names.return_value = {
                "ISSUED_BY", "APPLIES_TO_REGION", "BELONGS_TO_THEME",
                "BASED_ON", "AMENDS", "REPEALS", "REFERENCES",
            }
            schema.get_norm_rule.return_value = {}
            mock_schema.return_value = schema

            await builder.build_graph(
                doc_id="doc-004",
                # No doc_code — only doc_number (legacy)
                metadata={"title": "回退测试", "doc_number": "OLD-NUM-001"},
                content="正文。",
            )

        args = _get_merge_call_args(neo4j_mock)
        ref_rels = [r for r in args["relationships"] if r["type"] == "REFERENCES"]
        assert len(ref_rels) == 0, (
            "Should not create REFERENCES to self even when using doc_number fallback"
        )


# ===========================================================================
# Test: REFERENCES dedup against typed relations
# ===========================================================================


@pytest.mark.asyncio
class TestReferencesDedup:
    """REFERENCES should be skipped when BASED_ON/AMENDS/REPEALS already exists."""

    async def test_no_duplicate_references_for_based_on(self):
        """If BASED_ON exists for a target, REFERENCES should not be created
        for the same doc_number in referenced_doc_numbers."""
        target_doc = "省政办〔2024〕3号"
        llm_resp = {
            "entities": [],
            "relations": [
                {
                    "source_type": "Document",
                    "source_name": "CURRENT_DOC",
                    "relation": "BASED_ON",
                    "target_type": "Document",
                    "target_name": target_doc,
                },
            ],
            # Same doc number also listed in referenced_doc_numbers
            "referenced_doc_numbers": [target_doc],
        }
        builder, neo4j_mock = _make_builder(llm_resp)

        with patch("app.core.graph_builder.get_schema") as mock_schema:
            schema = MagicMock()
            schema.entity_type_names.return_value = {"Organization", "Region", "PolicyTheme"}
            schema.rel_type_names.return_value = {
                "ISSUED_BY", "APPLIES_TO_REGION", "BELONGS_TO_THEME",
                "BASED_ON", "AMENDS", "REPEALS", "REFERENCES",
            }
            schema.get_norm_rule.return_value = {}
            mock_schema.return_value = schema

            # Content intentionally does NOT contain the doc_number pattern
            # so regex scan won't produce a different-length match
            await builder.build_graph(
                doc_id="doc-005",
                metadata={"title": "去重测试", "doc_code": "TEST-005"},
                content="正文内容无文号引用。",
            )

        args = _get_merge_call_args(neo4j_mock)
        ref_rels = [r for r in args["relationships"] if r["type"] == "REFERENCES"]
        assert len(ref_rels) == 0, (
            "REFERENCES should be suppressed when BASED_ON already targets the same doc"
        )
