from govcrawler.storage.attachments import parse_disposition_filename, safe_filename
from govcrawler.storage.filenames import safe_filename as shared_safe_filename
from govcrawler.storage.filenames import with_best_extension, with_extension


def test_plain_filename():
    assert parse_disposition_filename('attachment; filename="notice.pdf"') == "notice.pdf"


def test_rfc5987_utf8():
    h = "attachment; filename*=UTF-8''%E5%85%AC%E5%91%8A.pdf"
    assert parse_disposition_filename(h) == "公告.pdf"


def test_regular_filename_url_encoded_utf8():
    h = 'attachment; filename="%E5%85%AC%E5%91%8A%E5%85%A8%E6%96%87.pdf"'
    assert parse_disposition_filename(h) == "公告全文.pdf"


def test_missing_header_returns_none():
    assert parse_disposition_filename(None) is None
    assert parse_disposition_filename("") is None


def test_safe_filename_strips_illegal():
    assert "/" not in safe_filename("a/b.pdf")
    assert "\\" not in safe_filename("a\\b.pdf")


def test_safe_filename_keeps_chinese():
    out = safe_filename("公告全文.pdf")
    assert "公告" in out


def test_safe_filename_strips_cross_platform_special_chars():
    out = safe_filename('年度检查/100小时检查:*?"<>|.pdf')
    assert "/" not in out
    assert ":" not in out
    assert "*" not in out
    assert "?" not in out
    assert out.endswith(".pdf")


def test_with_extension_uses_url_extension_for_link_text():
    assert with_extension(
        "年度检查和100小时检查的范围和详细项目",
        "5265eed4db054814b58af67ecb2a2a14.pdf",
    ) == "年度检查和100小时检查的范围和详细项目.pdf"


def test_safe_filename_preserves_extension_when_truncating_long_names():
    out = shared_safe_filename("很长的标题" * 100 + ".txt", max_len=80)
    assert len(out) <= 80
    assert out.endswith(".txt")


def test_with_best_extension_skips_extensionless_disposition_name():
    assert with_best_extension(
        "年度检查和100小时检查的范围和详细项目",
        "download",
        "5265eed4db054814b58af67ecb2a2a14.pdf",
        "application/octet-stream",
    ) == "年度检查和100小时检查的范围和详细项目.pdf"


def test_with_best_extension_ignores_dots_in_link_text_titles():
    assert with_best_extension(
        "附件1. 年度检查和100小时检查的范围和详细项目",
        "5265eed4db054814b58af67ecb2a2a14.pdf",
    ) == "附件1. 年度检查和100小时检查的范围和详细项目.pdf"


def test_safe_filename_does_not_treat_title_period_as_extension():
    out = shared_safe_filename("关于做好1.0版工作的通知" * 20 + ".txt", max_len=80)
    assert out.endswith(".txt")
    assert "0版工作的通知" in out
