package com.gzzm.lobster.parse;

import com.gzzm.lobster.config.LobsterConfig;
import com.gzzm.platform.commons.Tools;
import com.spire.doc.BuiltinDocumentProperties;
import com.spire.doc.Document;
import com.spire.doc.FileFormat;
import com.spire.doc.Section;
import com.spire.doc.Table;
import com.spire.doc.TableCell;
import com.spire.doc.TableRow;
import com.spire.doc.documents.DocumentObjectType;
import com.spire.doc.documents.ListType;
import com.spire.doc.documents.Paragraph;
import com.spire.doc.interfaces.ICompositeObject;
import com.spire.doc.interfaces.IDocumentObject;
import net.cyan.arachne.annotation.Service;

import java.io.InputStream;

import static com.gzzm.lobster.parse.ParserSupport.NEWLINES;
import static com.gzzm.lobster.parse.ParserSupport.escapeCell;
import static com.gzzm.lobster.parse.ParserSupport.escapeYaml;
import static com.gzzm.lobster.parse.ParserSupport.safeMsg;

/**
 * WordParser —— 基于 Spire.Doc 的 Word 家族解析 / Word-family parser on top of Spire.Doc.
 * <p>覆盖扩展：docx/doc/dotx/dot/docm/dotm/wps/wpt/rtf/odt/ott。
 * 具体格式由 {@link FileFormat#Auto} 根据 magic bytes 自动识别，加载侧不分流。
 *
 * <p>决策：
 * <ul>
 *   <li>标题层级：优先 {@link com.spire.doc.OutlineLevel}；缺失时回退到 style 名前缀匹配</li>
 *   <li>列表：{@link com.spire.doc.formatting.ListFormat} 判定；缩进按 listLevelNumber</li>
 *   <li>表格：markdown table，合并单元格取首格值向右/向下复制</li>
 *   <li>页眉/页脚：整块跳过，按设计文档 §3.1</li>
 *   <li>图片：占位 {@code [图片]}，第二期接图片抽取</li>
 *   <li>脚注/尾注：第一期先丢（Paragraph.getText() 默认不包含），待真实样本再补</li>
 * </ul>
 */

public class WordParser implements DocumentParser {

    @Override public String kind() { return "docx"; }

    @Override
    public ParseResult parse(InputStream in, String originalName, String mimeType) throws Exception {
        Document doc = new Document();
        try {
            doc.loadFromStream(in, FileFormat.Auto);
            return renderDocument(doc, originalName, mimeType);
        } finally {
            try { doc.close(); } catch (Throwable ignore) { /* Spire close 偶尔抛，忽略 */ }
        }
    }

    private ParseResult renderDocument(Document doc, String originalName, String mimeType) {
        MarkdownBuilder mb = new MarkdownBuilder();
        String kind = kindFromName(originalName);
        Outline outline = new Outline(kind, originalName);

        // ---- 元信息头 ----
        String title = originalName;
        String author = null;
        try {
            BuiltinDocumentProperties p = doc.getBuiltinDocumentProperties();
            if (p != null) {
                if (p.getTitle() != null && !p.getTitle().isEmpty()) title = p.getTitle();
                if (p.getAuthor() != null && !p.getAuthor().isEmpty()) author = p.getAuthor();
            }
        } catch (Throwable t) {
            logParseWarn("docx metadata", t);
        }
        outline.setTitle(title);

        mb.appendLine("---");
        mb.appendLine("kind: " + kind);
        if (originalName != null) mb.appendLine("source: " + originalName);
        if (title != null) mb.appendLine("title: " + escapeYaml(title));
        if (author != null) mb.appendLine("author: " + escapeYaml(author));
        if (mimeType != null) mb.appendLine("mimeType: " + mimeType);
        mb.appendLine("---");
        mb.appendBlankLine();

        // ---- 正文 ----
        SectionCtx ctx = new SectionCtx();

        int sectionCount = doc.getSections().getCount();
        for (int si = 0; si < sectionCount; si++) {
            Section section = doc.getSections().get(si);
            // 页眉页脚直接跳过 —— 不遍历 section.getHeadersFooters()

            int bodyCount;
            try {
                bodyCount = section.getBody().getChildObjects().getCount();
            } catch (Throwable t) {
                logParseWarn("docx section body", t);
                continue;
            }

            for (int bi = 0; bi < bodyCount; bi++) {
                IDocumentObject obj;
                try {
                    obj = section.getBody().getChildObjects().get(bi);
                } catch (Throwable t) {
                    mb.appendLine("<!-- [parse error: child " + bi + ": " + safeMsg(t) + "] -->");
                    continue;
                }
                emitObject(mb, outline, obj, ctx, 0);
            }
        }

        // 关闭最后一节
        if (ctx.active != null && ctx.active.getEndChar() == 0) {
            mb.closeSection(ctx.active);
        }
        mb.closeAllOpen();

        // 兜底：若整份 body 没跑出任何段落/表格（常见于 SDT/Shape 嵌得太深或未知容器），
        // 直接用 doc.getText() 灌入纯文本——丢层级但不丢内容。
        if (ctx.emitted == 0) {
            String fallback = safeDocText(doc);
            if (fallback != null && !fallback.isEmpty()) {
                mb.appendBlankLine();
                mb.appendLine("<!-- fallback: doc.getText() (结构化解析为空) -->");
                mb.appendLine(NEWLINES.matcher(fallback).replaceAll("\n").trim());
                mb.appendBlankLine();
            }
        }

        // 全文截断
        String md = mb.toMarkdown();
        int cap = LobsterConfig.getParsedMarkdownMaxChars();
        if (md.length() > cap) {
            md = md.substring(0, cap)
                    + "\n\n> 全文超过 " + cap + " 字符已截断。"
                    + "调用 `read_file` 传 `sectionId` 或更大 offset 继续阅读。\n";
        }

        outline.setTotalChars(md.length());
        outline.getStats().put("sections", outline.getSections().size());
        return new ParseResult(kind, md, outline);
    }

    /** 从原始文件名推导 kind；识别不了时回退到 docx 作通用词家族标签. */
    private static String kindFromName(String name) {
        if (name == null) return "docx";
        int dot = name.lastIndexOf('.');
        if (dot < 0 || dot == name.length() - 1) return "docx";
        String ext = name.substring(dot + 1).toLowerCase(java.util.Locale.ROOT);
        switch (ext) {
            case "doc": case "docx":
            case "dot": case "dotx":
            case "docm": case "dotm":
            case "wps": case "wpt":
            case "rtf":
            case "odt": case "ott":
                return ext;
            default:
                return "docx";
        }
    }

    /** 解析循环上下文——section 计数只在开新 heading 时递增，避免 id 出现空洞. */
    private static class SectionCtx {
        OutlineSection active;
        int seq;
        /** 命中过段落/表格的次数；用来判定结构化解析是否全空需要兜底. */
        int emitted;
    }

    /** 递归向下派发：Paragraph/Table 直接 emit；SDT/TextBox/ShapeGroup 等容器下钻. */
    private static final int MAX_NEST_DEPTH = 8;
    private void emitObject(MarkdownBuilder mb, Outline outline, IDocumentObject obj,
                            SectionCtx ctx, int depth) {
        if (obj == null || depth > MAX_NEST_DEPTH) return;
        DocumentObjectType type;
        try { type = obj.getDocumentObjectType(); }
        catch (Throwable t) {
            mb.appendLine("<!-- [parse error: type: " + safeMsg(t) + "] -->");
            return;
        }
        try {
            if (type == DocumentObjectType.Paragraph) {
                emitParagraph(mb, outline, (Paragraph) obj, ctx);
            } else if (type == DocumentObjectType.Table) {
                if (emitTable(mb, (Table) obj)) ctx.emitted++;
            } else if (isContainer(type) && obj instanceof ICompositeObject) {
                ICompositeObject c = (ICompositeObject) obj;
                int n = c.getChildObjects().getCount();
                for (int i = 0; i < n; i++) {
                    emitObject(mb, outline, c.getChildObjects().get(i), ctx, depth + 1);
                }
            }
            // 其余叶子类型（Field_Mark / Bookmark_Start / Break / ...）忽略
        } catch (Throwable t) {
            mb.appendLine("<!-- [parse error: " + type + ": " + safeMsg(t) + "] -->");
        }
    }

    /** 这些类型内部还包着 Paragraph/Table，需要继续下钻；都实现 ICompositeObject. */
    private static boolean isContainer(DocumentObjectType type) {
        return type == DocumentObjectType.Structure_Document_Tag
                || type == DocumentObjectType.Structure_Document_Tag_Inline
                || type == DocumentObjectType.Structure_Document_Tag_Row
                || type == DocumentObjectType.Structure_Document_Tag_Cell
                || type == DocumentObjectType.SDT_Block_Content
                || type == DocumentObjectType.SDT_Inline_Content
                || type == DocumentObjectType.SDT_Row_Content
                || type == DocumentObjectType.SDT_Cell_Content
                || type == DocumentObjectType.Text_Box
                || type == DocumentObjectType.Shape_Group
                || type == DocumentObjectType.Shape
                || type == DocumentObjectType.Custom_Xml
                || type == DocumentObjectType.Sub_Document;
    }

    private static String safeDocText(Document doc) {
        try { return doc.getText(); }
        catch (Throwable t) { logParseWarn("docx fallback getText", t); return null; }
    }

    private void emitParagraph(MarkdownBuilder mb, Outline outline, Paragraph p, SectionCtx ctx) {
        String text = safeText(p);
        int headingLevel = detectHeadingLevel(p);

        if (headingLevel >= 1 && headingLevel <= 6 && !text.isEmpty()) {
            // 收尾上一节
            if (ctx.active != null && ctx.active.getEndChar() == 0) {
                mb.closeSection(ctx.active);
            }
            // 段前留白，avoid 粘连
            if (mb.cursor() > 0) mb.appendBlankLine();

            String id = "s" + (++ctx.seq);
            String displayTitle = truncate(text, 60);
            OutlineSection sec = mb.openSection(id, headingLevel, displayTitle);
            for (int i = 0; i < headingLevel; i++) mb.append("#");
            mb.append(" ").appendLine(text);
            mb.appendBlankLine();
            outline.getSections().add(sec);
            ctx.active = sec;
            ctx.emitted++;
            return;
        }

        if (text.isEmpty()) return;

        // 列表 or 普通段落
        String listPrefix = detectListPrefix(p);
        if (listPrefix != null) {
            mb.appendLine(listPrefix + text);
        } else {
            mb.appendLine(text);
            mb.appendBlankLine();
        }
        ctx.emitted++;
    }

    private boolean emitTable(MarkdownBuilder mb, Table table) {
        int rowCount = table.getRows().getCount();
        if (rowCount == 0) return false;

        // 先留白
        mb.appendBlankLine();

        // 用首行作表头
        TableRow first = table.getRows().get(0);
        int colCount = first.getCells().getCount();
        if (colCount == 0) return false;

        mb.append("|");
        for (int c = 0; c < colCount; c++) {
            mb.append(" ").append(cellText(first.getCells().get(c))).append(" |");
        }
        mb.appendLine("");
        mb.append("|");
        for (int c = 0; c < colCount; c++) mb.append("---|");
        mb.appendLine("");

        for (int r = 1; r < rowCount; r++) {
            TableRow row = table.getRows().get(r);
            int rc = row.getCells().getCount();
            mb.append("|");
            for (int c = 0; c < colCount; c++) {
                String cell = c < rc ? cellText(row.getCells().get(c)) : "";
                mb.append(" ").append(cell).append(" |");
            }
            mb.appendLine("");
        }
        mb.appendBlankLine();
        return true;
    }

    private String cellText(TableCell cell) {
        if (cell == null) return "";
        StringBuilder sb = new StringBuilder();
        int n = cell.getChildObjects().getCount();
        for (int i = 0; i < n; i++) {
            IDocumentObject obj = cell.getChildObjects().get(i);
            if (obj instanceof Paragraph) {
                String t = safeText((Paragraph) obj);
                if (!t.isEmpty()) {
                    if (sb.length() > 0) sb.append("<br>");
                    sb.append(t);
                }
            }
            // 嵌套表格 / 图片等第一期忽略
        }
        return escapeCell(sb.toString());
    }

    private int detectHeadingLevel(Paragraph p) {
        // 1) OutlineLevel 优先
        try {
            com.spire.doc.OutlineLevel ol = p.getFormat().getOutlineLevel();
            if (ol != null) {
                String name = ol.name();
                // 枚举值形如 Level_1 / Level_2 / ... / Body_Text
                if (name != null && name.startsWith("Level_")) {
                    try {
                        int lv = Integer.parseInt(name.substring("Level_".length()));
                        if (lv >= 1 && lv <= 6) return lv;
                    } catch (NumberFormatException ignore) { /* fall through */ }
                }
            }
        } catch (Throwable ignore) { /* fall through */ }

        // 2) style 名前缀匹配：英文 "Heading N"、中文 "标题 N"
        try {
            String sn = p.getStyleName();
            if (sn != null) {
                String lower = sn.toLowerCase();
                if (lower.startsWith("heading")) {
                    char c = lastDigit(sn);
                    if (c != 0) return c - '0';
                }
                if (sn.startsWith("标题")) {
                    char c = lastDigit(sn);
                    if (c != 0) return c - '0';
                }
            }
        } catch (Throwable ignore) { /* ignore */ }

        return 0;
    }

    private String detectListPrefix(Paragraph p) {
        try {
            if (p.getListFormat() == null) return null;
            ListType lt = p.getListFormat().getListType();
            if (lt == null || lt == ListType.No_List) return null;
            int level = p.getListFormat().getListLevelNumber();
            StringBuilder indent = new StringBuilder();
            for (int i = 0; i < Math.max(0, level); i++) indent.append("  ");
            if (lt == ListType.Bulleted) return indent + "- ";
            return indent + "1. ";
        } catch (Throwable ignore) {
            return null;
        }
    }

    private static String safeText(Paragraph p) {
        try {
            String raw = p.getText();
            if (raw == null) return "";
            return NEWLINES.matcher(raw).replaceAll(" ").trim();
        } catch (Throwable t) {
            return "";
        }
    }

    private static String truncate(String s, int max) {
        if (s == null) return "";
        if (s.length() <= max) return s;
        return s.substring(0, max - 1) + "…";
    }

    private static char lastDigit(String s) {
        for (int i = s.length() - 1; i >= 0; i--) {
            char c = s.charAt(i);
            if (c >= '0' && c <= '9') return c;
        }
        return 0;
    }

    private static void logParseWarn(String where, Throwable t) {
        try { Tools.log("[WordParser] " + where + " failed", t); } catch (Throwable ignore) { /* ignore */ }
    }
}
