from __future__ import annotations

import json
from types import SimpleNamespace

from govcrawler.adapters import xinhua_xjp


class _Resp:
    status_code = 200

    def __init__(self, text: str, url: str):
        self.text = text
        self.url = url

    def json(self):
        return json.loads(self.text)

    def raise_for_status(self):
        pass


LIST_HTML = """
<html><body>
<ul class="xpage-content-list" data="datasource:abc123" preview="ds_">
  <li><div class="tit"><a href="/first.html">首屏</a></div></li>
</ul>
</body></html>
"""


def _rt() -> SimpleNamespace:
    return SimpleNamespace(
        site=SimpleNamespace(site_code="xinhua_xjp", base_url="https://www.news.cn"),
        target=SimpleNamespace(
            id=175,
            dept_id=None,
            entry_url="https://www.news.cn/politics/leaders/xijinping/jhqw.htm",
            channel_name="讲话全文",
            channel_path="新华网·习近平报道集/讲话全文",
            content_category="时政",
            content_subcategory="讲话全文",
        ),
    )


def test_fetch_list_page_reads_load_more_datasource(monkeypatch):
    payload = {
        "datasource": [
            {
                "contentId": "row-1",
                "showTitle": "<a href='https://www.news.cn/a.html'>第一篇</a>",
                "publishUrl": "/politics/leaders/202601/a/c.html",
                "publishTime": "2026-01-01 08:00:00",
            },
            {
                "contentId": "row-2",
                "title": "第二篇",
                "publishUrl": "https://www.news.cn/politics/leaders/202601/b/c.html",
                "publishTime": "2026-01-02 08:00:00",
            },
            {
                "contentId": "row-3",
                "showTitle": "<a href='https://www.news.cn/c.html'>第三篇</a>",
                "publishTime": "2026-01-03 08:00:00",
            },
        ]
    }
    seen = []

    def _fake_get(url, **_kw):
        seen.append(str(url))
        if str(url).endswith("jhqw.htm"):
            return _Resp(LIST_HTML, str(url))
        return _Resp(json.dumps(payload, ensure_ascii=False), str(url))

    monkeypatch.setattr(xinhua_xjp.httpx, "get", _fake_get)

    list_url, items, fr = xinhua_xjp.fetch_list_page(
        _rt(),
        page_num=2,
        params={"page_size": 2},
    )

    assert fr.status == 200
    assert seen[1] == "https://www.news.cn/politics/leaders/xijinping/ds_abc123.json"
    assert list_url.endswith("ds_abc123.json?page=2&pageSize=2")
    assert len(items) == 1
    item = items[0]
    assert item.native_post_id == "row-3"
    assert item.title == "第三篇"
    assert item.url == "https://www.news.cn/c.html"
    assert item.publish_date.isoformat() == "2026-01-03"
    assert item.channel_path == "新华网·习近平报道集/讲话全文"
    assert item.metadata_json["datasource_id"] == "abc123"


def test_extract_datasource_url_requires_datasource_marker():
    try:
        xinhua_xjp._extract_datasource_url("<html></html>", "https://www.news.cn/a/b.htm")
    except ValueError as exc:
        assert "datasource" in str(exc)
    else:
        raise AssertionError("expected ValueError")
