#!/usr/bin/env python3
import argparse
import json
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

plt.rcParams["font.sans-serif"] = [
    "Noto Sans CJK SC",
    "WenQuanYi Micro Hei",
    "WenQuanYi Zen Hei",
    "DejaVu Sans",
]
plt.rcParams["axes.unicode_minus"] = False

TABLE_EXTENSIONS = (".xlsx", ".xlsm", ".xls", ".csv", ".tsv")


def resolve_input_path(value):
    if value and value.lower() not in ("auto", "manifest"):
        p = Path(value)
        if p.is_file():
            return str(p)
        if p.is_dir():
            return select_from_manifest(p / "manifest.json")
        raise SystemExit(f"input path not found: {p}")
    return select_from_manifest(Path("/inputs/manifest.json"))


def select_from_manifest(path):
    if not path.exists():
        raise SystemExit(f"input manifest not found: {path}")
    items = json.loads(path.read_text(encoding="utf-8-sig"))
    if not isinstance(items, list):
        raise SystemExit(f"system input manifest must be a list: {path}")
    candidates = [
        item for item in items
        if str(item.get("path", "")).lower().endswith(TABLE_EXTENSIONS)
    ]
    if not candidates:
        raise SystemExit("no xlsx/xls/csv/tsv input found in /inputs/manifest.json")
    return candidates[0]["path"]


def read_table(path):
    suffix = Path(path).suffix.lower()
    if suffix in [".xlsx", ".xlsm", ".xls"]:
        return pd.read_excel(path)
    sep = "\t" if suffix == ".tsv" else ","
    return pd.read_csv(path, encoding="utf-8-sig", sep=sep)


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--input", help="Table path, /inputs directory, or 'auto'. Defaults to first table in /inputs/manifest.json.")
    ap.add_argument("--category", required=True)
    ap.add_argument("--amount", required=True)
    ap.add_argument("--project")
    ap.add_argument("--out-xlsx", required=True)
    ap.add_argument("--out-md", required=True)
    ap.add_argument("--out-chart", required=True)
    args = ap.parse_args()

    df = read_table(resolve_input_path(args.input))
    df[args.amount] = pd.to_numeric(df[args.amount], errors="coerce")
    valid = df.dropna(subset=[args.amount])
    category = valid.groupby(args.category, as_index=False)[args.amount].sum().sort_values(args.amount, ascending=False)
    warnings = valid[valid[args.amount] < 0].copy()
    overview = pd.DataFrame([
        {"metric": "records", "value": len(df)},
        {"metric": "valid_amount_records", "value": len(valid)},
        {"metric": "total_amount", "value": valid[args.amount].sum()},
        {"metric": "negative_amount_records", "value": len(warnings)},
    ])

    with pd.ExcelWriter(args.out_xlsx, engine="xlsxwriter") as writer:
        overview.to_excel(writer, sheet_name="overview", index=False)
        category.to_excel(writer, sheet_name="category_summary", index=False)
        if args.project and args.project in valid.columns:
            project = valid.groupby(args.project, as_index=False)[args.amount].sum().sort_values(args.amount, ascending=False)
            project.to_excel(writer, sheet_name="project_ranking", index=False)
        warnings.to_excel(writer, sheet_name="items_to_review", index=False)

    plt.figure(figsize=(9, 5), dpi=160)
    sns.barplot(data=category.head(12), x=args.amount, y=args.category, color="#5B9BD5")
    plt.title("预算/支出分类汇总")
    plt.tight_layout()
    plt.savefig(args.out_chart)

    total = valid[args.amount].sum()
    top_line = ""
    if not category.empty:
        top = category.iloc[0]
        top_line = f"- 最大类别：{top[args.category]}，金额 {top[args.amount]:,.2f}。"
    md = "\n".join([
        "# Budget Analysis Summary",
        "",
        f"- Records: {len(df)}",
        f"- Valid amount records: {len(valid)}",
        f"- Total amount: {total:,.2f}",
        top_line,
        f"- Items requiring review: {len(warnings)}",
        "",
        "Note: 自动分析仅提示需复核事项，不替代财务审计结论。",
    ])
    Path(args.out_md).write_text(md, encoding="utf-8")


if __name__ == "__main__":
    main()
