import asyncio

from app.api.v1.lobster_kb import (
    _build_detail_chunk_search_body,
    _ensure_prefix,
    _fetch_detail_chunks_by_offset,
    _hit_from_qa_doc,
    _map_lobster_org_to_office_id,
    _render_detail_markdown,
    _render_detail_markdown_page,
    _safe_source_url,
    _select_detail_chunks,
)


def test_lobster_org_id_maps_to_office_id():
    assert _map_lobster_org_to_office_id("17") == "O_17"
    assert _map_lobster_org_to_office_id("O_17") == "O_17"
    assert _map_lobster_org_to_office_id("  ") == ""


def test_ensure_prefix_keeps_empty_values_empty():
    assert _ensure_prefix(None, "U_") == ""
    assert _ensure_prefix("", "U_") == ""
    assert _ensure_prefix("42", "U_") == "U_42"
    assert _ensure_prefix("U_42", "U_") == "U_42"


def test_hit_from_qa_doc_matches_lobster_contract():
    hit = _hit_from_qa_doc(
        {
            "doc_id": "doc-1",
            "title": "Policy",
            "passages": ["", "usable snippet"],
            "score": 0.91,
            "source_type": "graph",
            "source_label": "Graph Evidence",
            "source_url": "https://example.com/policy",
            "publish_date": "2026-01-02",
        }
    )

    assert hit == {
        "docId": "doc-1",
        "title": "Policy",
        "snippet": "usable snippet",
        "score": 0.91,
        "source": "graph",
        "scopeName": "Graph Evidence",
        "url": "https://example.com/policy",
        "sourceUrl": "https://example.com/policy",
        "updatedAt": "2026-01-02",
    }


def test_hit_from_qa_doc_uses_source_metadata_url_fallback():
    hit = _hit_from_qa_doc(
        {
            "doc_id": "doc-1",
            "title": "Policy",
            "source_metadata": {"source_url": "https://example.com/from-meta"},
        }
    )

    assert hit["url"] == "https://example.com/from-meta"
    assert hit["sourceUrl"] == "https://example.com/from-meta"


def test_hit_from_qa_doc_strips_html_highlight_tags_by_default():
    hit = _hit_from_qa_doc(
        {
            "doc_id": "doc-1",
            "title": "<em>广东省</em>人民政府 <policy-code>",
            "passages": [
                "关于<em>深化</em><em>产</em><em>教</em><em>融合</em> <policy-code>"
            ],
        }
    )

    assert hit["title"] == "广东省人民政府 <policy-code>"
    assert hit["snippet"] == "关于深化产教融合 <policy-code>"


def test_hit_from_qa_doc_preserves_html_highlight_tags_when_requested():
    hit = _hit_from_qa_doc(
        {
            "doc_id": "doc-1",
            "title": "<em>广东省</em>人民政府 <policy-code>",
            "passages": [
                "关于<em>深化</em><em>产</em><em>教</em><em>融合</em> <policy-code>"
            ],
        },
        escape_html=False,
    )

    assert hit["title"] == "<em>广东省</em>人民政府 <policy-code>"
    assert hit["snippet"] == (
        "关于<em>深化</em><em>产</em><em>教</em><em>融合</em> <policy-code>"
    )


def test_safe_source_url_rejects_non_http_schemes():
    assert _safe_source_url("javascript:alert(1)") == ""
    assert _safe_source_url("file:///etc/passwd") == ""
    assert _safe_source_url("https://example.com/path") == "https://example.com/path"


def test_render_detail_markdown_includes_metadata_and_chunks():
    rendered = _render_detail_markdown(
        {
            "title": "Policy",
            "doc_number": "NO.1",
            "issuing_org": "Office",
            "summary": "Summary text",
        },
        [
            {
                "_source": {
                    "content": "Chunk text",
                    "heading_hierarchy": ["Chapter 1"],
                    "page_number": 3,
                }
            }
        ],
        max_chars=10_000,
    )

    assert "# Policy" in rendered
    assert "- Doc Number: NO.1" in rendered
    assert "- Issuing Org: Office" in rendered
    assert "## Summary" in rendered
    assert "Summary text" in rendered
    assert "### Chapter 1" in rendered
    assert "> Page: 3" in rendered
    assert "Chunk text" in rendered


def test_render_detail_markdown_truncates_large_content():
    rendered = _render_detail_markdown(
        {"title": "Policy"},
        [{"_source": {"content": "x" * 200}}],
        max_chars=80,
    )

    assert len(rendered) < 120
    assert rendered.endswith("...[content truncated]")


def test_detail_chunk_query_does_not_reapply_merged_acl_filter():
    body = _build_detail_chunk_search_body("hash-1")

    assert body["query"] == {"term": {"content_hash": "hash-1"}}
    assert "acl_ids" not in str(body)
    assert body["size"] == 500


def test_detail_chunk_scan_uses_search_after_when_provided():
    body = _build_detail_chunk_search_body("hash-1", search_after=[499])

    assert body["search_after"] == [499]


def test_fetch_detail_chunks_by_offset_can_read_past_first_scan_window():
    class Raw:
        def __init__(self):
            self.hits = [
                {"_source": {"chunk_index": idx, "content": str(idx)}, "sort": [idx]}
                for idx in range(520)
            ]

        async def search(self, index, body):
            search_after = body.get("search_after")
            start = 0 if not search_after else int(search_after[0]) + 1
            size = body["size"]
            return {"hits": {"hits": self.hits[start : start + size]}}

    class ES:
        raw = Raw()

    selected = asyncio.run(
        _fetch_detail_chunks_by_offset(ES(), "hash-1", offset=505, limit=3)
    )

    assert [hit["_source"]["chunk_index"] for hit in selected] == [505, 506, 507]


def test_select_detail_chunks_can_locate_relevant_section_with_context():
    hits = [
        {"_source": {"chunk_index": 0, "content": "general intro"}},
        {"_source": {"chunk_index": 1, "content": "budget rules"}},
        {"_source": {"chunk_index": 2, "content": "special procurement policy"}},
        {"_source": {"chunk_index": 3, "content": "appendix"}},
    ]

    selected, page = _select_detail_chunks(
        hits,
        offset=0,
        limit=2,
        query="procurement policy",
    )

    assert [hit["_source"]["chunk_index"] for hit in selected] == [1, 2]
    assert page["locatedBy"] == "query"
    assert page["matchedOffset"] == 2
    assert page["matchedChunkIndex"] == 2


def test_render_detail_markdown_page_stops_on_char_budget_before_next_chunk():
    content, returned, completed, truncated, next_char_offset = _render_detail_markdown_page(
        {"title": "Policy"},
        [
            {"_source": {"chunk_index": 0, "content": "first chunk"}},
            {"_source": {"chunk_index": 1, "content": "x" * 500}},
        ],
        max_chars=120,
    )

    assert "first chunk" in content
    assert returned == 1
    assert completed == 1
    assert truncated is True
    assert next_char_offset is None


def test_render_detail_markdown_page_returns_char_cursor_for_large_first_chunk():
    content, returned, completed, truncated, next_char_offset = _render_detail_markdown_page(
        {"title": "Policy"},
        [{"_source": {"chunk_index": 0, "content": "abcdef" * 100}}],
        max_chars=120,
    )

    assert "abc" in content
    assert returned == 1
    assert completed == 0
    assert truncated is True
    assert isinstance(next_char_offset, int)
    assert next_char_offset > 0


def test_render_detail_markdown_page_continues_from_char_cursor():
    _, _, _, _, next_char_offset = _render_detail_markdown_page(
        {"title": "Policy"},
        [{"_source": {"chunk_index": 0, "content": "abcdef" * 100}}],
        max_chars=120,
    )

    content, returned, completed, truncated, following_char_offset = _render_detail_markdown_page(
        {"title": "Policy"},
        [{"_source": {"chunk_index": 0, "content": "abcdef" * 100}}],
        max_chars=120,
        chunk_char_offset=next_char_offset,
    )

    assert returned == 1
    assert completed == 0
    assert truncated is True
    assert following_char_offset > next_char_offset
    assert "abcdef" in content
