from __future__ import annotations

import mimetypes
import re
from pathlib import Path

from slugify import slugify


WINDOWS_RESERVED_NAMES = {
    "CON",
    "PRN",
    "AUX",
    "NUL",
    *(f"COM{i}" for i in range(1, 10)),
    *(f"LPT{i}" for i in range(1, 10)),
}

KNOWN_EXTENSIONS = {
    "bin",
    "doc",
    "docx",
    "html",
    "htm",
    "jpg",
    "jpeg",
    "ofd",
    "pdf",
    "png",
    "rar",
    "txt",
    "wps",
    "xls",
    "xlsx",
    "zip",
}

MAX_FILENAME_BYTES = 200


def _split_known_extension(name: str) -> tuple[str, str]:
    stem, dot, ext = name.rpartition(".")
    if dot and re.fullmatch(r"[A-Za-z0-9]{1,8}", ext) and ext.lower() in KNOWN_EXTENSIONS:
        return stem, f".{ext}"
    return name, ""


def _truncate_utf8_bytes(text: str, max_bytes: int) -> str:
    total = 0
    out: list[str] = []
    for ch in text:
        size = len(ch.encode("utf-8"))
        if total + size > max_bytes:
            break
        out.append(ch)
        total += size
    return "".join(out)


def safe_filename(name: str, max_len: int = 180) -> str:
    cleaned = slugify(
        name,
        allow_unicode=True,
        separator="_",
        regex_pattern=r'[\\/:*?"<>|\x00-\x1f]',
    ).strip(" ._")
    cleaned = re.sub(r"_+", "_", cleaned)
    if not cleaned:
        cleaned = "attachment"

    stem, ext_part = _split_known_extension(cleaned)
    max_stem_len = max(1, max_len - len(ext_part))
    if len(stem) > max_stem_len:
        stem = stem[:max_stem_len].rstrip(" ._") or "attachment"
    max_stem_bytes = max(1, MAX_FILENAME_BYTES - len(ext_part.encode("utf-8")))
    if len(stem.encode("utf-8")) > max_stem_bytes:
        stem = _truncate_utf8_bytes(stem, max_stem_bytes).rstrip(" ._") or "attachment"
    if stem.upper() in WINDOWS_RESERVED_NAMES:
        stem = f"_{stem}"
    return f"{stem}{ext_part}"


def _ext_of(name: str) -> str:
    _, ext = _split_known_extension(name)
    return ext[1:].lower() if ext else ""


def extension_from_content_type(content_type: str | None) -> str:
    if not content_type:
        return ""
    media_type = content_type.split(";", 1)[0].strip().lower()
    ext = mimetypes.guess_extension(media_type) or ""
    if ext == ".jpe":
        ext = ".jpg"
    return ext.lstrip(".")


def with_extension(name: str, ext_source: str | None) -> str:
    if _ext_of(name):
        return name
    ext = _ext_of(ext_source or "") or extension_from_content_type(ext_source)
    if not ext:
        return name
    return f"{name}.{ext}"


def with_best_extension(name: str, *ext_sources: str | None) -> str:
    if _ext_of(name):
        return name
    for source in ext_sources:
        ext = _ext_of(source or "") or extension_from_content_type(source)
        if ext:
            return f"{name}.{ext}"
    return name


def dedupe_filename(abs_dir: Path, name: str, digest: str) -> str:
    safe = safe_filename(name)
    candidate = abs_dir / safe
    if not candidate.exists():
        return safe
    stem, dot, ext = safe.rpartition(".")
    if not dot:
        return f"{safe}_{digest[:12]}"
    return f"{stem}_{digest[:12]}.{ext}"
