"""Business flow tests: Document Ingest → Search → Detail.

These tests exercise the full lifecycle:
  1. Login with test user
  2. Upload a PDF via webhook
  3. Wait for ingestion to complete
  4. Search for the ingested document
  5. View document detail
  6. Clean up (delete the test document)

Requires running Docker infra (OpenSearch, Neo4j, Redis) + Celery worker.
Run with:  pytest tests/test_flow_ingest_search.py --base-url http://localhost:8900 -v
"""

from __future__ import annotations

import asyncio
import json
import time
from pathlib import Path

import pytest
from httpx import AsyncClient

# Maximum seconds to wait for ingestion to complete
_INGEST_TIMEOUT = 180
_POLL_INTERVAL = 3


@pytest.mark.asyncio
class TestIngestSearchFlow:
    """End-to-end: Ingest → Search → Detail → Delete."""

    async def _get_admin_token(self, client: AsyncClient, api_prefix: str) -> str:
        """Login as admin and return the JWT token."""
        resp = await client.post(f"{api_prefix}/mock/login", json={
            "username": "admin",
            "password": "admin123",
        })
        assert resp.status_code == 200, f"Login failed: {resp.text}"
        return resp.json()["access_token"]

    async def _wait_for_ingest(
        self,
        client: AsyncClient,
        api_prefix: str,
        task_id: str,
        headers: dict,
        timeout: int = _INGEST_TIMEOUT,
    ) -> dict:
        """Poll task status until completion or timeout.

        Enhanced diagnostics: captures the last seen status and error info
        so failures produce actionable error messages instead of generic timeouts.
        """
        start = time.time()
        last_body: dict = {}
        retry_count = 0
        while time.time() - start < timeout:
            resp = await client.get(
                f"{api_prefix}/ingest/status/{task_id}",
                headers=headers,
            )
            assert resp.status_code == 200
            last_body = resp.json()
            status = last_body.get("status", "")

            if status in ("COMPLETED", "FAILED"):
                return last_body

            # Detect stuck RETRY — fail fast with diagnostic info
            if status == "RETRY":
                retry_count += 1
                if retry_count >= 5:
                    error = last_body.get("error", "unknown")
                    pytest.fail(
                        f"Ingest task {task_id} is stuck in RETRY state.\n"
                        f"  Last status body: {last_body}\n"
                        f"  This usually means the Celery task is repeatedly failing.\n"
                        f"  Common causes:\n"
                        f"    - OpenSearch API incompatibility (document= vs body=)\n"
                        f"    - Connection errors to OpenSearch/Neo4j\n"
                        f"    - LLM/Embedding service unavailable\n"
                        f"  Error: {error}"
                    )

            await asyncio.sleep(_POLL_INTERVAL)

        elapsed = round(time.time() - start, 1)
        last_status = last_body.get("status", "unknown")
        last_error = last_body.get("error", "none")
        pytest.fail(
            f"Ingest task {task_id} did not complete within {timeout}s.\n"
            f"  Last status: {last_status}\n"
            f"  Last error: {last_error}\n"
            f"  Elapsed: {elapsed}s\n"
            f"  Full response: {last_body}"
        )

    async def test_full_ingest_search_flow(
        self,
        client: AsyncClient,
        api_prefix: str,
        example_pdf_path: Path,
    ):
        """Full business flow: ingest a PDF, search for it, view detail, delete."""

        # ── Step 1: Login as admin ────────────────────────────────────────
        token = await self._get_admin_token(client, api_prefix)
        headers = {"Authorization": f"Bearer {token}"}

        doc_id = f"flow-test-{int(time.time())}"

        # ── Step 2: Upload PDF via webhook ────────────────────────────────
        meta = json.dumps({
            "doc_id": doc_id,
            "title": "自动化流程测试文档",
            "issuing_org": "测试单位",
            "doc_type": "通知",
            "acl_ids": ["A_01", "O_01", "D_01"],
        })
        with open(example_pdf_path, "rb") as f:
            resp = await client.post(
                f"{api_prefix}/ingest/webhook/document",
                data={"metadata": meta},
                files={"file": (example_pdf_path.name, f, "application/pdf")},
            )
        assert resp.status_code == 200, f"Webhook upload failed: {resp.text}"
        task_id = resp.json()["task_id"]

        # ── Step 3: Wait for ingestion to complete ─────────────────────────
        result = await self._wait_for_ingest(client, api_prefix, task_id, headers)
        assert result["status"] == "COMPLETED", f"Ingest failed: {result.get('error')}"

        # ── Step 4: Verify document appears in admin logs ──────────────────
        resp = await client.get(
            f"{api_prefix}/admin/ingest-logs",
            params={"page": 1, "page_size": 50},
            headers=headers,
        )
        assert resp.status_code == 200
        records = resp.json()["records"]
        doc_ids_in_log = [r["doc_id"] for r in records]
        assert doc_id in doc_ids_in_log, "Ingested doc not found in admin logs"

        # ── Step 5: Search for the document ────────────────────────────────
        # Wait a moment for OpenSearch to refresh
        await asyncio.sleep(2)

        resp = await client.post(
            f"{api_prefix}/search",
            json={"query": "自动化流程测试文档", "page": 1, "page_size": 10},
            headers=headers,
        )
        assert resp.status_code == 200
        search_body = resp.json()
        assert search_body["total"] >= 0  # May or may not find via text search
        # Broader search to find our document
        resp = await client.post(
            f"{api_prefix}/search",
            json={"query": "测试", "page_size": 50},
            headers=headers,
        )
        assert resp.status_code == 200

        # ── Step 6: View document detail ───────────────────────────────────
        resp = await client.get(
            f"{api_prefix}/document/{doc_id}",
            headers=headers,
        )
        assert resp.status_code == 200, f"Document detail failed: {resp.status_code}"
        detail = resp.json()
        assert detail["doc_id"] == doc_id
        assert detail["title"] == "自动化流程测试文档"

        # ── Step 7: Check document graph ───────────────────────────────────
        resp = await client.get(
            f"{api_prefix}/document/{doc_id}/graph",
            headers=headers,
        )
        assert resp.status_code == 200

        # ── Step 8: Clean up — delete the document ─────────────────────────
        resp = await client.delete(
            f"{api_prefix}/admin/document/{doc_id}",
            headers=headers,
        )
        assert resp.status_code == 200

        # Verify deletion
        resp = await client.get(
            f"{api_prefix}/document/{doc_id}",
            headers=headers,
        )
        assert resp.status_code in (404, 200)  # May be 404 or stale


@pytest.mark.asyncio
class TestMultiDocIngestFlow:
    """Ingest multiple PDFs and verify they all appear in search."""

    async def test_batch_ingest(
        self,
        client: AsyncClient,
        api_prefix: str,
        all_example_pdfs: list[Path],
    ):
        """Upload all example PDFs and verify they are accepted."""
        # Login
        resp = await client.post(f"{api_prefix}/mock/login", json={
            "username": "admin",
            "password": "admin123",
        })
        assert resp.status_code == 200
        token = resp.json()["access_token"]
        headers = {"Authorization": f"Bearer {token}"}

        task_ids = []
        for pdf in all_example_pdfs[:3]:  # Limit to 3 for speed
            doc_id = f"batch-{int(time.time())}-{pdf.stem[:15]}"
            meta = json.dumps({
                "doc_id": doc_id,
                "title": pdf.stem,
                "acl_ids": ["A_01"],
            })
            with open(pdf, "rb") as f:
                resp = await client.post(
                    f"{api_prefix}/ingest/webhook/document",
                    data={"metadata": meta},
                    files={"file": (pdf.name, f, "application/pdf")},
                )
            assert resp.status_code == 200, f"Upload failed for {pdf.name}: {resp.text}"
            task_ids.append(resp.json()["task_id"])

        assert len(task_ids) >= 1, "No documents were uploaded"

        # Verify all tasks were queued
        for tid in task_ids:
            resp = await client.get(
                f"{api_prefix}/ingest/status/{tid}",
                headers=headers,
            )
            assert resp.status_code == 200
            assert resp.json()["status"] in ("PENDING", "PROCESSING", "COMPLETED")
