"""Normalize stored attachment filenames and DB paths.

Older downloads prefixed every attachment with the article key. For URL-based
article keys and URL-encoded Content-Disposition filenames this produced names
like ``detail_%e4%b8%ad...docx``. This script decodes URL-escaped names and
removes the legacy ``detail_`` prefix when present.
"""
from __future__ import annotations

from pathlib import PurePosixPath
from urllib.parse import unquote

from govcrawler.db import get_sessionmaker
from govcrawler.models import Attachment
from govcrawler.settings import get_settings
from govcrawler.storage.attachments import safe_filename
from govcrawler.storage.paths import to_os_path


def _normalized_name(name: str) -> str:
    decoded = unquote(name or "")
    if decoded.startswith("detail_"):
        decoded = decoded[len("detail_") :]
    return safe_filename(decoded)


def main() -> None:
    settings = get_settings()
    Session = get_sessionmaker()
    checked = 0
    updated = 0
    skipped = 0

    with Session() as s:
        rows = s.query(Attachment).all()
        for att in rows:
            checked += 1
            old_name = att.file_name or ""
            new_name = _normalized_name(old_name)
            if not new_name or new_name == old_name:
                skipped += 1
                continue
            if not att.file_path:
                att.file_name = new_name
                updated += 1
                continue

            old_rel = PurePosixPath(att.file_path)
            new_rel = old_rel.with_name(new_name)
            old_abs = to_os_path(settings.data_dir, old_rel)
            new_abs = to_os_path(settings.data_dir, new_rel)
            if new_abs.exists() and old_abs != new_abs:
                stem, dot, ext = new_name.rpartition(".")
                suffix = (att.file_hash or "attachment")[:12]
                new_name = f"{stem}_{suffix}.{ext}" if dot else f"{new_name}_{suffix}"
                new_rel = old_rel.with_name(new_name)
                new_abs = to_os_path(settings.data_dir, new_rel)
            if old_abs.exists() and old_abs != new_abs:
                new_abs.parent.mkdir(parents=True, exist_ok=True)
                old_abs.rename(new_abs)
            att.file_name = new_name
            att.file_path = str(new_rel)
            updated += 1
        s.commit()

    print(f"checked={checked} updated={updated} skipped={skipped}")


if __name__ == "__main__":
    main()
