#!/usr/bin/env python3
import argparse
import json
from pathlib import Path

from docx import Document
from pypdf import PdfReader, PdfWriter


def load_spec(manifest):
    path = resolve_manifest_path(manifest)
    data = json.loads(path.read_text(encoding="utf-8-sig"))
    if isinstance(data, dict) and isinstance(data.get("items"), list):
        return data
    if isinstance(data, list):
        return system_manifest_to_spec(data)
    raise SystemExit(f"manifest must be a system input list or an items object: {path}")


def resolve_manifest_path(value):
    if value and value.lower() not in ("auto", "manifest"):
        p = Path(value)
        if p.is_dir():
            return p / "manifest.json"
        return p
    return Path("/inputs/manifest.json")


def system_manifest_to_spec(items):
    spec_items = []
    for item in items:
        path = item.get("path")
        if not path:
            continue
        spec_items.append({
            "path": path,
            "name": item.get("displayName") or Path(path).name,
            "category": item.get("mimeType") or "",
            "note": item.get("ref") or "",
        })
    return {"title": "Material Catalog", "items": spec_items}


def build_index(spec, out_docx):
    doc = Document()
    doc.add_heading(spec.get("title") or "Material Catalog", 0)
    table = doc.add_table(rows=1, cols=5)
    headers = ["No.", "Material Name", "Category", "File Path", "Note"]
    for i, header in enumerate(headers):
        table.rows[0].cells[i].text = header
    for i, item in enumerate(spec.get("items") or [], 1):
        cells = table.add_row().cells
        cells[0].text = str(i)
        cells[1].text = item.get("name") or Path(item.get("path", "")).name
        cells[2].text = item.get("category") or ""
        cells[3].text = item.get("path") or ""
        cells[4].text = item.get("note") or ""
    doc.save(out_docx)


def merge_pdfs(spec, out_pdf):
    writer = PdfWriter()
    added = 0
    errors = []
    for item in spec.get("items") or []:
        path = item.get("path")
        if not path:
            errors.append(f"{item.get('name') or '<unnamed>'}: missing path")
            continue
        p = Path(path)
        if p.suffix.lower() != ".pdf":
            errors.append(f"{path}: not a PDF; convert it before merging")
            continue
        if not p.is_file():
            errors.append(f"{path}: file not found")
            continue
        reader = PdfReader(str(p))
        for page in reader.pages:
            writer.add_page(page)
        added += 1
    if errors:
        raise SystemExit("cannot merge pdf:\n- " + "\n- ".join(errors))
    if not added:
        raise SystemExit("cannot merge pdf: no PDF files found in manifest")
    with open(out_pdf, "wb") as f:
        writer.write(f)


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--manifest", help="Business manifest path, /inputs directory, or 'auto'. Defaults to /inputs/manifest.json.")
    ap.add_argument("--index-docx", required=True)
    ap.add_argument("--merged-pdf")
    args = ap.parse_args()
    spec = load_spec(args.manifest)
    build_index(spec, args.index_docx)
    if args.merged_pdf:
        merge_pdfs(spec, args.merged_pdf)


if __name__ == "__main__":
    main()
