package com.gzzm.lobster.parse;

import com.gzzm.lobster.config.LobsterConfig;
import com.gzzm.platform.commons.Tools;
import com.spire.xls.CellRange;
import com.spire.xls.Workbook;
import com.spire.xls.Worksheet;

import java.io.InputStream;
import java.util.Locale;

import static com.gzzm.lobster.parse.ParserSupport.NEWLINES;
import static com.gzzm.lobster.parse.ParserSupport.escapeCell;
import static com.gzzm.lobster.parse.ParserSupport.escapeYaml;
import static com.gzzm.lobster.parse.ParserSupport.safeMsg;

/**
 * SpreadsheetParser —— 基于 Spire.Xls 的表格解析 / Spreadsheet parser on top of Spire.Xls.
 *
 * <p>覆盖扩展：xlsx/xls/xlsm/xlsb/xlt/xltx/xltm/ods。FileFormat 由 {@code Workbook.loadFromStream}
 * 自动识别，不分流。
 *
 * <p>决策（对齐 design doc）：
 * <ul>
 *   <li>每个 sheet 独立成一节（{@code ## Sheet N: <name>}），id {@code sh1}/{@code sh2}/...</li>
 *   <li>表格以 markdown table 渲染；首行做表头；空 sheet 保留 section 头但不画表</li>
 *   <li>行数超过 {@link LobsterConfig#getXlsxInlineRowLimit} 截断，留提示让 LLM 走 {@code read_sheet}</li>
 *   <li>单元格取 {@code getText(row,col)}（格式化后字符串，日期/公式都已 eval）</li>
 *   <li>合并单元格：Spire 已把主格值放到左上角，其余副格读出为空——这里不做展开</li>
 *   <li>图片/图表/注释第一期忽略</li>
 * </ul>
 */
public class SpreadsheetParser implements DocumentParser {

    @Override public String kind() { return "xlsx"; }

    @Override
    public ParseResult parse(InputStream in, String originalName, String mimeType) throws Exception {
        Workbook wb = new Workbook();
        try {
            wb.loadFromStream(in);
            return renderWorkbook(wb, originalName, mimeType);
        } finally {
            try { wb.dispose(); } catch (Throwable ignore) { /* Spire dispose 偶尔抛 */ }
        }
    }

    private ParseResult renderWorkbook(Workbook wb, String originalName, String mimeType) {
        String kind = kindFromName(originalName);
        MarkdownBuilder mb = new MarkdownBuilder();
        Outline outline = new Outline(kind, originalName);

        // ---- 元信息头 ----
        String title = originalName;
        String author = null;
        try {
            if (wb.getDocumentProperties() != null) {
                String t = wb.getDocumentProperties().getTitle();
                String a = wb.getDocumentProperties().getAuthor();
                if (t != null && !t.isEmpty()) title = t;
                if (a != null && !a.isEmpty()) author = a;
            }
        } catch (Throwable t) {
            logParseWarn("xlsx metadata", t);
        }
        outline.setTitle(title);

        int sheetCount = wb.getWorksheets().size();
        mb.appendLine("---");
        mb.appendLine("kind: " + kind);
        if (originalName != null) mb.appendLine("source: " + originalName);
        if (title != null) mb.appendLine("title: " + escapeYaml(title));
        if (author != null) mb.appendLine("author: " + escapeYaml(author));
        mb.appendLine("sheets: " + sheetCount);
        if (mimeType != null) mb.appendLine("mimeType: " + mimeType);
        mb.appendLine("---");
        mb.appendBlankLine();

        int rowLimit = LobsterConfig.getXlsxInlineRowLimit();
        long totalRenderedRows = 0;
        long totalTruncatedRows = 0;

        for (int si = 0; si < sheetCount; si++) {
            Worksheet sheet;
            try { sheet = wb.getWorksheets().get(si); }
            catch (Throwable t) {
                mb.appendLine("<!-- [parse error: sheet " + si + ": " + safeMsg(t) + "] -->");
                continue;
            }
            String name = safeName(sheet, si);
            String id = "sh" + (si + 1);
            OutlineSection sec = mb.openSection(id, 2, "Sheet " + (si + 1) + ": " + name);
            sec.putExtra("kind", "sheet");
            sec.putExtra("sheetIndex", si);
            sec.putExtra("sheetName", name);

            mb.appendLine("## Sheet " + (si + 1) + ": " + name);
            mb.appendBlankLine();

            SheetStats stats = emitSheet(mb, sheet, rowLimit);
            sec.putExtra("rows", stats.rows);
            sec.putExtra("cols", stats.cols);
            sec.putExtra("renderedRows", stats.rendered);
            // LLM 排障线索：首行是否被强当作 header 渲染了（纯数据 sheet 会被误导）
            sec.putExtra("assumedHeader", stats.headerAssumed);
            if (stats.truncated > 0) sec.putExtra("truncatedRows", stats.truncated);
            totalRenderedRows += stats.rendered;
            totalTruncatedRows += stats.truncated;

            mb.closeSection(sec);
            outline.getSections().add(sec);
        }
        mb.closeAllOpen();

        String md = mb.toMarkdown();
        int cap = LobsterConfig.getParsedMarkdownMaxChars();
        if (md.length() > cap) {
            md = md.substring(0, cap)
                    + "\n\n> 全文超过 " + cap + " 字符已截断。调用 `read_file` 传 `sectionId` 或更大 offset 继续阅读。\n";
        }
        outline.setTotalChars(md.length());
        outline.getStats().put("sheets", sheetCount);
        outline.getStats().put("renderedRows", totalRenderedRows);
        if (totalTruncatedRows > 0) outline.getStats().put("truncatedRows", totalTruncatedRows);
        outline.getStats().put("sections", outline.getSections().size());
        return new ParseResult(kind, md, outline);
    }

    /** 单 sheet 渲染；返回统计. */
    private SheetStats emitSheet(MarkdownBuilder mb, Worksheet sheet, int rowLimit) {
        SheetStats st = new SheetStats();
        int firstRow, lastRow, firstCol, lastCol;
        try {
            if (sheet.isEmpty()) {
                mb.appendLine("<!-- 空 sheet -->");
                mb.appendBlankLine();
                return st;
            }
            firstRow = sheet.getFirstRow();
            lastRow = sheet.getLastRow();
            firstCol = sheet.getFirstColumn();
            lastCol = sheet.getLastColumn();
        } catch (Throwable t) {
            mb.appendLine("<!-- [parse error: sheet bounds: " + safeMsg(t) + "] -->");
            return st;
        }
        if (lastRow < firstRow || lastCol < firstCol) {
            mb.appendLine("<!-- 空 sheet -->");
            mb.appendBlankLine();
            return st;
        }
        int totalRows = lastRow - firstRow + 1;
        int totalCols = lastCol - firstCol + 1;
        st.rows = totalRows;
        st.cols = totalCols;

        int renderEnd = Math.min(lastRow, firstRow + rowLimit - 1);

        if (totalRows < 2) {
            // 只有一行——不画分隔线，避免把唯一一行数据误当 header 渲染
            st.headerAssumed = false;
            mb.append("|");
            for (int c = firstCol; c <= lastCol; c++) {
                mb.append(" ").append(escapeCell(safeCell(sheet, firstRow, c))).append(" |");
            }
            mb.appendLine("");
            st.rendered = 1;
            mb.appendBlankLine();
            return st;
        }

        // 2+ 行：假定首行作 header（纯数据 sheet 会被误导，sec.extra.assumedHeader=true 给 LLM 排查线索）
        st.headerAssumed = true;
        mb.append("|");
        for (int c = firstCol; c <= lastCol; c++) {
            mb.append(" ").append(escapeCell(safeCell(sheet, firstRow, c))).append(" |");
        }
        mb.appendLine("");
        mb.append("|");
        for (int c = firstCol; c <= lastCol; c++) mb.append("---|");
        mb.appendLine("");

        for (int r = firstRow + 1; r <= renderEnd; r++) {
            mb.append("|");
            for (int c = firstCol; c <= lastCol; c++) {
                mb.append(" ").append(escapeCell(safeCell(sheet, r, c))).append(" |");
            }
            mb.appendLine("");
        }

        st.rendered = renderEnd - firstRow + 1; // 含 header
        if (renderEnd < lastRow) {
            int skipped = lastRow - renderEnd;
            st.truncated = skipped;
            mb.appendBlankLine();
            mb.appendLine("> 本表共 " + totalRows + " 行，已渲染前 " + st.rendered
                    + " 行；剩余 " + skipped + " 行请用 `read_sheet` 按 rowRange 查询。");
        }
        mb.appendBlankLine();
        return st;
    }

    private static class SheetStats {
        int rows;
        int cols;
        int rendered;
        int truncated;
        /** 是否把首行当 markdown table header 渲染（仅 rows>=2 时成立）. */
        boolean headerAssumed;
    }

    /**
     * 取单元格字符串，按"用户在 Excel 里看到什么就提取什么"优先级：
     * <ol>
     *   <li>{@code getDisplayedText()} —— 数字/日期/百分比走单元格格式化后的文本（首选）</li>
     *   <li>{@code getValue()} —— 原始字符串形式，兜底罕见场景</li>
     *   <li>{@code sheet.getText(row,col)} —— 仅字符串单元格可取，最后兜底</li>
     * </ol>
     * 用 {@code sheet.getText} 单独兜底的原因：{@link CellRange} 取不到时（罕见）退回一次。
     */
    private static String safeCell(Worksheet sheet, int row, int col) {
        try {
            CellRange cell = sheet.getCellRange(row, col);
            if (cell == null || cell.isBlank()) return "";
            String t = cell.getDisplayedText();
            if (t == null || t.isEmpty()) t = cell.getValue();
            if (t == null || t.isEmpty()) {
                String legacy = sheet.getText(row, col);
                t = legacy == null ? "" : legacy;
            }
            // 单元格内换行在 markdown table 里会破坏表格；压成 <br>
            return NEWLINES.matcher(t).replaceAll("<br>").trim();
        } catch (Throwable t) {
            return "";
        }
    }

    private static String safeName(Worksheet sheet, int idx) {
        try {
            String n = sheet.getName();
            if (n == null || n.isEmpty()) return "Sheet" + (idx + 1);
            return n;
        } catch (Throwable ignore) {
            return "Sheet" + (idx + 1);
        }
    }

    /** 从原始文件名推导 kind；识别不了时回退 xlsx 作通用标签. */
    private static String kindFromName(String name) {
        if (name == null) return "xlsx";
        int dot = name.lastIndexOf('.');
        if (dot < 0 || dot == name.length() - 1) return "xlsx";
        String ext = name.substring(dot + 1).toLowerCase(Locale.ROOT);
        switch (ext) {
            case "xls": case "xlsx":
            case "xlsm": case "xlsb":
            case "xlt": case "xltx": case "xltm":
            case "ods":
                return ext;
            default:
                return "xlsx";
        }
    }

    private static void logParseWarn(String where, Throwable t) {
        try { Tools.log("[SpreadsheetParser] " + where + " failed", t); } catch (Throwable ignore) { /* ignore */ }
    }
}
