"""Business flow tests: Research (deep Q&A) pipeline.

Tests the end-to-end research flow:
  1. Ensure documents are ingested (or skip)
  2. Ask a research question via SSE stream
  3. Parse the SSE chunks
  4. Verify we get thinking → reference → text → done sequence
  5. Test multi-turn conversation with session_id

Requires running Docker infra + Celery worker + LLM service.
"""

from __future__ import annotations

import json

import pytest
from httpx import AsyncClient


def _parse_sse_events(raw_text: str) -> list[dict]:
    """Parse SSE text/event-stream body into a list of JSON events."""
    events = []
    for line in raw_text.split("\n"):
        line = line.strip()
        if line.startswith("data:"):
            data_str = line[5:].strip()
            if data_str:
                try:
                    events.append(json.loads(data_str))
                except json.JSONDecodeError:
                    events.append({"raw": data_str})
    return events


@pytest.mark.asyncio
class TestResearchFlow:
    """End-to-end research Q&A flow."""

    async def _get_token(self, client: AsyncClient, api_prefix: str) -> str:
        resp = await client.post(f"{api_prefix}/mock/login", json={
            "username": "admin",
            "password": "admin123",
        })
        return resp.json()["access_token"]

    async def test_research_sse_stream(
        self,
        client: AsyncClient,
        api_prefix: str,
    ):
        """Research query should return SSE stream with expected chunk types."""
        token = await self._get_token(client, api_prefix)
        headers = {"Authorization": f"Bearer {token}"}

        resp = await client.post(
            f"{api_prefix}/research",
            json={"question": "数字政府改革建设的主要目标和措施是什么？"},
            headers=headers,
        )
        assert resp.status_code == 200

        # Parse SSE events from the response body
        events = _parse_sse_events(resp.text)

        if not events:
            pytest.skip("No SSE events returned (possibly no documents ingested)")

        # Should have at least a 'done' event
        event_types = [e.get("type") for e in events if "type" in e]
        assert "done" in event_types, f"Missing 'done' event. Got types: {event_types}"

    async def test_research_multiturn(
        self,
        client: AsyncClient,
        api_prefix: str,
    ):
        """Multi-turn research should accept session_id."""
        token = await self._get_token(client, api_prefix)
        headers = {"Authorization": f"Bearer {token}"}

        session_id = "test-multiturn-session"

        # First turn
        resp1 = await client.post(
            f"{api_prefix}/research",
            json={
                "question": "清远市的数字政府规划有哪些重点？",
                "session_id": session_id,
            },
            headers=headers,
        )
        assert resp1.status_code == 200

        # Second turn (follow-up)
        resp2 = await client.post(
            f"{api_prefix}/research",
            json={
                "question": "请详细介绍其中的技术架构部分",
                "session_id": session_id,
            },
            headers=headers,
        )
        assert resp2.status_code == 200

    async def test_research_empty_question_rejected(
        self,
        client: AsyncClient,
        api_prefix: str,
    ):
        """Empty question should be rejected."""
        token = await self._get_token(client, api_prefix)
        headers = {"Authorization": f"Bearer {token}"}

        resp = await client.post(
            f"{api_prefix}/research",
            json={"question": ""},
            headers=headers,
        )
        # Should be 422 or 400
        assert resp.status_code in (400, 422)
