"""Unit tests for the SSRF guard (is_safe_to_fetch) in compliance/paths.py.

These tests bypass the conftest auto-downgrade by importing is_safe_to_fetch
directly and monkeypatching the DNS resolver, so we exercise the real
decision tree (scheme / path / always-deny / IP-range / allowlist).
"""
from __future__ import annotations

import pytest

from govcrawler.compliance import paths as paths_mod
from govcrawler.compliance.paths import is_safe_to_fetch


@pytest.fixture(autouse=True)
def _clear_dns_cache():
    """Clear the LRU cache before each test (no teardown — monkeypatch
    will already have restored the real function by then)."""
    fn = paths_mod._resolve_host
    if hasattr(fn, "cache_clear"):
        fn.cache_clear()
    yield


def _fake_resolver(monkeypatch, mapping: dict[str, tuple[str, ...]]):
    """Replace _resolve_host with a deterministic mapping."""
    def _r(host: str) -> tuple[str, ...]:
        return mapping.get(host, ())
    monkeypatch.setattr(paths_mod, "_resolve_host", _r)


def test_rejects_non_http_scheme(monkeypatch):
    _fake_resolver(monkeypatch, {"a.com": ("8.8.8.8",)})
    assert is_safe_to_fetch("file:///etc/passwd") is False
    assert is_safe_to_fetch("javascript:alert(1)") is False
    assert is_safe_to_fetch("ftp://a.com/x") is False


def test_rejects_loopback_literal(monkeypatch):
    _fake_resolver(monkeypatch, {})
    assert is_safe_to_fetch("http://127.0.0.1/") is False
    assert is_safe_to_fetch("http://[::1]/") is False


def test_rejects_private_literal(monkeypatch):
    _fake_resolver(monkeypatch, {})
    for ip in ("10.0.0.5", "172.16.1.1", "192.168.1.1"):
        assert is_safe_to_fetch(f"http://{ip}/some") is False, ip


def test_rejects_aws_metadata_literal(monkeypatch):
    """169.254.169.254 = AWS / 阿里云 / 腾讯云 metadata service."""
    _fake_resolver(monkeypatch, {})
    assert is_safe_to_fetch("http://169.254.169.254/latest/meta-data/") is False


def test_rejects_localhost_alias(monkeypatch):
    _fake_resolver(monkeypatch, {"localhost": ("127.0.0.1",)})
    assert is_safe_to_fetch("http://localhost/admin") is False


def test_rejects_dns_resolves_to_private(monkeypatch):
    """Even if host name is innocuous, if DNS returns a private IP, deny."""
    _fake_resolver(monkeypatch, {"evil.example.com": ("10.0.0.5",)})
    assert is_safe_to_fetch("http://evil.example.com/") is False


def test_accepts_public_resolved(monkeypatch):
    _fake_resolver(monkeypatch, {"www.gov.cn": ("103.235.46.40",)})
    assert is_safe_to_fetch("https://www.gov.cn/zhengce/xxgk/") is True


def test_rejects_when_dns_fails(monkeypatch):
    """Empty DNS = could not resolve → fail closed (better than letting
    httpx hang for 20s on a confusing error)."""
    _fake_resolver(monkeypatch, {})
    assert is_safe_to_fetch("https://nonexistent.example/") is False


def test_path_blacklist_still_enforced(monkeypatch):
    _fake_resolver(monkeypatch, {"www.gov.cn": ("103.235.46.40",)})
    assert is_safe_to_fetch("https://www.gov.cn/admin/users") is False
    assert is_safe_to_fetch("https://www.gov.cn/login") is False


def test_allowlist_blocks_off_list(monkeypatch):
    _fake_resolver(monkeypatch, {
        "www.gov.cn": ("103.235.46.40",),
        "evil.com": ("8.8.8.8",),
    })
    monkeypatch.setenv("GOVCRAWLER_HOST_ALLOWLIST", "gov.cn,news.cn")
    assert is_safe_to_fetch("https://www.gov.cn/foo") is True
    assert is_safe_to_fetch("https://evil.com/foo") is False


def test_allowlist_unset_allows_any_public(monkeypatch):
    _fake_resolver(monkeypatch, {"x.com": ("8.8.8.8",)})
    monkeypatch.delenv("GOVCRAWLER_HOST_ALLOWLIST", raising=False)
    assert is_safe_to_fetch("https://x.com/foo") is True
