package com.gzzm.lobster.tool.builtin;

import com.gzzm.lobster.common.ToolCategory;
import com.gzzm.lobster.common.ToolRiskLevel;
import com.gzzm.lobster.config.LobsterConfig;
import com.gzzm.lobster.identity.UserContext;
import com.gzzm.lobster.sandbox.SandboxException;
import com.gzzm.lobster.sandbox.SandboxRequest;
import com.gzzm.lobster.sandbox.SandboxResult;
import com.gzzm.lobster.sandbox.SandboxService;
import com.gzzm.lobster.skill.SkillDefinition;
import com.gzzm.lobster.skill.SkillService;
import com.gzzm.lobster.thread.ThreadRoom;
import com.gzzm.lobster.thread.ThreadService;
import com.gzzm.lobster.tool.BuiltinToolDefinition;
import com.gzzm.lobster.tool.SchemaBuilder;
import com.gzzm.lobster.tool.ToolContext;
import com.gzzm.lobster.tool.ToolExecutor;
import com.gzzm.lobster.tool.ToolRegistry;
import com.gzzm.lobster.tool.ToolResult;
import net.cyan.nest.annotation.Inject;

import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

/**
 * CodeExecTool —— 代码沙箱工具 / Code execution tool.
 *
 * <p>LLM 入口：写 Python 代码，沙箱跑，产物以二进制 Artifact 回落 Workspace.
 * 单次调用对应一次 {@link SandboxService#exec} —— 一次性容器、无状态.
 *
 * <p>审计脱敏：{@link #redactAuditDetail} 只落 {@code code_sha256} / {@code code_length} /
 * {@code input_refs} / {@code activated_skill} / {@code exit_code} / {@code walltime_ms} /
 * {@code output_artifact_ids} / {@code error_category}. code 原文不入审计.
 */
public class CodeExecTool implements ToolExecutor {

    /**
     * Inline code is for short throwaway snippets only. Longer scripts should be
     * persisted via write_file as CODE_SCRIPT and passed through code_ref.
     */
    private static final int INLINE_CODE_MAX_CHARS = 8000;
    private static final int INLINE_CODE_MAX_BYTES = 12 * 1024;

    @Inject private ThreadService threadService;
    @Inject private SandboxService sandboxService;
    @Inject private SkillService skillService;

    public void registerTo(ToolRegistry registry) {
        registry.register(def(), this);
    }

    private BuiltinToolDefinition def() {
        return BuiltinToolDefinition.builder()
                .name("code_exec")
                .displayName("代码沙箱（Python）")
                .description("在隔离 Docker 沙箱里跑 Python / JavaScript 脚本，生成 "
                        + "docx/xlsx/pptx/pdf/html 等产物，或处理 /inputs 里的资源。"
                        + "\n\n【职责边界】code_exec 是执行器，不是代码文件写入工具。除少量短小一次性片段外，"
                        + "完整业务脚本、生成 PPT/Word/Excel/PDF/HTML 的脚本、需要修改迭代的脚本，"
                        + "都必须先用 write_file 创建 CODE_SCRIPT，再用 code_ref 执行。"
                        + "\n\n【关键规则】"
                        + "\n- **code 和 code_ref 严格二选一**：短脚本只传 code；长脚本先 write_file，再只传 code_ref。"
                        + "同一次调用不要同时带 code_ref 和 code；如果只是读取上一轮产物，把 artifactId 放进 input_refs 即可"
                        + "\n- 产物必须写到 /outputs/（单次 ≤ 50MB），/inputs 只读，/work 可写，无出网"
                        + "\n- 给用户的最终交付文件默认使用简体中文业务文件名，保留正确扩展名；"
                        + "只有代码脚本、用户指定名称或技术约定必须英文时才使用英文文件名"
                        + "\n- **跨轮次读文件**：每次是独立容器，/outputs 启动时为空。要读上一轮产物，"
                        + "把上轮 tool_result.produced[].artifactId 放进本轮 input_refs，文件会被挂到 /inputs/"
                        + "（同理 resourceId / oaFileId）。直接读 /outputs/xxx 会 FileNotFoundError"
                        + "\n- **读取 input_refs**：/inputs/manifest.json 的顶层是 JSON 数组，不是 {\"inputs\": [...]} 对象。"
                        + "正确：`manifest = json.load(open('/inputs/manifest.json')); src = manifest[0]['path']`；"
                        + "错误：`manifest['inputs'][0]['path']`。读 manifest 的同一次调用必须带 input_refs，"
                        + "且禁止硬编码 `/inputs/<文件名>`，真实路径只以 manifest[i]['path'] 为准"
                        + "\n- 生成 .docx **首选 python-docx** 而不是 docx-js（docx-js 页码字段违反 OOXML schema，MS Office 拒开）"
                        + "\n- **脚本传递规则**：少量几十行的一次性脚本可用 code inline；较长、可复用、需迭代的脚本必须先用 "
                        + "write_file(artifactType=CODE_SCRIPT, content=<脚本>) 保存，再用 code_ref=artifactId 执行。"
                        + "不要把长脚本原文塞进 code 参数；已有 code_ref 时不要再附带 code"
                        + "\n\n【详细指南】调用**前**若不熟悉镜像预装、跨轮次示例、skill bundle 路径、常见坑，"
                        + "先 `use_skill('sys_code-exec-guide')` 加载完整指南（含 docx→PDF 等完整可跑示例）。")
                .category(ToolCategory.WORKSPACE)
                .risk(ToolRiskLevel.WRITE)
                .rateLimitPerMinute(LobsterConfig.getSandboxRatePerMinute())
                .timeoutMs(LobsterConfig.getSandboxMaxTimeoutSec() * 1000L + 15000L)
                .inputSchema(SchemaBuilder.obj()
                        .propEnum("language", "脚本语言，默认 python",
                                "python", "javascript", "node")
                        .propStringMax("code", "【code 和 code_ref 二选一】仅用于短小、一次性的 inline 脚本。"
                                + "如果填写了 code，就不要再填写 code_ref。"
                                + "经验阈值：少量几十行，且不超过 " + INLINE_CODE_MAX_CHARS + " 字符。"
                                + "禁止把完整业务脚本、生成 PPT/Word/Excel/PDF/HTML 的长脚本放在这里。"
                                + "较长脚本、需要复用/调试迭代的脚本，必须先调用 write_file("
                                + "artifactType=CODE_SCRIPT, mimeType=text/x-python 或 application/javascript, content=<脚本>)，"
                                + "再调用 code_exec(code_ref=<artifactId>)；不要把长脚本原文放到 code。",
                                INLINE_CODE_MAX_CHARS)
                        .prop("code_ref", "string", "【code 和 code_ref 二选一】脚本文件的 workspace 引用"
                                + "；如果填写了 code_ref，就不要再填写 code。"
                                + "（artifactId / resourceId / oaFileId）。长脚本首选：先 write_file 创建 CODE_SCRIPT，"
                                + "再把返回的 artifactId 放这里。生成 PPT/Word/Excel/PDF/HTML 的完整脚本默认走 code_ref。"
                                + "适合复用、调试迭代，也避免每轮重传 code。")
                        .propArray("input_refs", "要挂到 /inputs 的 workspace 引用列表（只读）. "
                                        + "接受三种 ID：resourceId（用户上传） / artifactId（art_ 前缀，**包括本 thread "
                                        + "之前 code_exec 产生的 artifact**——这是跨轮次读写同一个文件的唯一路径） / "
                                        + "oaFileId（oa_ 前缀，OA 平台文件）. "
                                        + "每个 ref 落到 /inputs/{NN}-{safeName}.{ext}，**NN 是零填充两位**（第一个是 00 不是 0）. "
                                        + "**实际路径必须以 /inputs/manifest.json 的 path 字段为准**，不要靠 idx 直接猜——"
                                        + "常见错把 `/inputs/0-xxx.xlsx` 当成第一个文件直接 FileNotFoundError，正确通常是 `/inputs/00-xxx.xlsx`，"
                                        + "但如果原始 displayName 已带编号，也可能变成 `/inputs/00-00-xxx.pdf`，所以不要硬编码 /inputs/<文件名>. "
                                        + "manifest 顶层是数组，每项含 index/ref/displayName/mimeType/size/path；"
                                        + "脚本里通常这么读：`json.load(open('/inputs/manifest.json'))[i]['path']`，"
                                        + "不要写成 `json.load(... )['inputs'][i]['path']`.",
                                singleton("string"))
                        .prop("activated_skill", "string", "激活的 skill id；bundle 挂到 /skill/<id>/")
                        .propInt("timeout_seconds", "脚本超时（秒），默认 30，上限 120")
                        .prop("output_hint", "string", "仅用于产物 MIME 猜测；真正类型以扩展名为准")
                        .required("language")
                        .build())
                .build();
    }

    private static Map<String, Object> singleton(String type) {
        Map<String, Object> m = new LinkedHashMap<>();
        m.put("type", type);
        return m;
    }

    @Override
    public ToolResult execute(ToolContext ctx, Map<String, Object> args) throws Exception {
        String language = asStr(args.get("language"));
        // 兼容 "node" 别名；null/空字符串默认 python.
        String lang;
        if (language == null || language.isEmpty()) {
            lang = "python";
        } else if ("python".equalsIgnoreCase(language) || "py".equalsIgnoreCase(language)) {
            lang = "python";
        } else if ("javascript".equalsIgnoreCase(language) || "js".equalsIgnoreCase(language)
                || "node".equalsIgnoreCase(language) || "nodejs".equalsIgnoreCase(language)) {
            lang = "javascript";
        } else {
            return ToolResult.error("code_exec.unsupported_language: " + language
                    + " (supported: python, javascript)");
        }
        String code = asStr(args.get("code"));
        String codeRef = asStr(args.get("code_ref"));
        // Prompt/schema 仍要求 code 和 code_ref 二选一；运行时为了容错允许二者同时出现，
        // 并以 inline code 为准执行。模型偶发多带 code_ref 时不应阻断真实可执行的 code。
        boolean hasCode = code != null && !code.isEmpty();
        boolean hasCodeRef = codeRef != null && !codeRef.isEmpty();
        if (!hasCode && !hasCodeRef) {
            return ToolResult.error("code_exec.invalid: must provide either `code` (inline) or `code_ref` "
                    + "(artifactId / resourceId from write_file).");
        }
        List<String> inputRefs = asStringList(args.get("input_refs"));
        String skill = asStr(args.get("activated_skill"));
        int timeout = asInt(args.get("timeout_seconds"),
                LobsterConfig.getSandboxDefaultTimeoutSec());
        if (timeout <= 0) timeout = LobsterConfig.getSandboxDefaultTimeoutSec();
        timeout = Math.min(timeout, LobsterConfig.getSandboxMaxTimeoutSec());
        String outputHint = asStr(args.get("output_hint"));

        ThreadRoom thread = threadService.requireOwnedThread(ctx.getUserContext(), ctx.getThreadId());
        UserContext user = ctx.getUserContext();

        // skill 必须先走过 use_skill 才能挂进 /skill/<id>/——确保模型已经读过该 skill 的
        // guidance 再来跑其 asset bundle 里的脚本，避免瞎跑.
        if (skill != null && !skill.isEmpty()) {
            SkillDefinition def;
            try {
                def = skillService.get(skill);
            } catch (Exception e) {
                return ToolResult.error("code_exec.skill_not_found: " + skill);
            }
            if (def == null) return ToolResult.error("code_exec.skill_not_found: " + skill);
            if (!skillService.activatedForThread(ctx.getThreadId()).contains(skill)) {
                return ToolResult.error(
                        "code_exec.skill_not_activated: call use_skill('" + skill + "') first");
            }
        }

        // Inline 只服务短小一次性脚本。长脚本必须先 write_file 成 CODE_SCRIPT，再 code_ref 执行，
        // 避免 LLM 把大量源码塞进 tool_call 参数，污染上下文并触发上游参数长度问题.
        int inlineCodeLength = 0;
        String inlineCodeSha = null;
        if (hasCode) {
            inlineCodeLength = code.length();
            inlineCodeSha = sha256(code);
            int inlineCodeBytes = code.getBytes(StandardCharsets.UTF_8).length;
            if (inlineCodeLength > INLINE_CODE_MAX_CHARS || inlineCodeBytes > INLINE_CODE_MAX_BYTES) {
                Map<String, Object> diag = new LinkedHashMap<>();
                diag.put("errorCategory", "rejected");
                diag.put("error_code", "code_exec.inline_too_long");
                diag.put("code_sha256", inlineCodeSha);
                diag.put("code_length", inlineCodeLength);
                diag.put("code_bytes", inlineCodeBytes);
                diag.put("inline_max_chars", INLINE_CODE_MAX_CHARS);
                diag.put("inline_max_bytes", INLINE_CODE_MAX_BYTES);
                return ToolResult.errorData("code_exec.inline_too_long: inline code is "
                        + inlineCodeLength + " chars / " + inlineCodeBytes + " bytes. "
                        + "短脚本才放 code；长脚本请先调用 write_file("
                        + "artifactType=CODE_SCRIPT, displayName='script.py', mimeType='text/x-python', "
                        + "content=<脚本>)，然后用返回的 artifactId 调 code_exec(code_ref=artifactId)。",
                        diag);
            }
        }

        SandboxRequest req = SandboxRequest.builder()
                .thread(thread)
                .user(user)
                .runId(ctx.getRunId())
                .toolCallId(ctx.getToolCallId())
                .language(lang)
                .code(code)
                .codeRef(codeRef)
                .inputRefs(inputRefs)
                .activatedSkill(skill)
                .timeoutSec(timeout)
                .outputHint(outputHint)
                .build();

        // 开工前发一条 "准备中" 让前端立刻有反馈，别像死了.
        String sourceDesc = hasCode
                ? ("inline，" + inlineCodeLength + " 字符")
                : ("来自 " + codeRef);
        ctx.getProgress().emit("sandbox.accepted",
                "⚙️ 代码沙箱：正在准备 " + lang + " 脚本（" + sourceDesc + "）"
                        + (inputRefs.isEmpty() ? "" : "，附 " + inputRefs.size() + " 个输入文件")
                        + (skill == null || skill.isEmpty() ? "" : "，激活 skill=" + skill),
                "language", lang);

        SandboxResult result;
        try {
            result = sandboxService.exec(req, ctx.getProgress());
        } catch (SandboxException se) {
            // 入参校验失败 / docker 不可达 / 容器创建失败.
            // 错误消息尽量详细，让 LLM 能转述给用户，而不是只说"沙箱不可用".
            Map<String, Object> diag = new LinkedHashMap<>();
            diag.put("errorCategory", "rejected");
            diag.put("error_code", se.getCode());
            diag.put("code_ref", codeRef);
            String msg = se.getCode() + ": " + se.getMessage();
            if ("sandbox.docker_create".equals(se.getCode())
                    || "sandbox.docker_start".equals(se.getCode())) {
                msg = msg + "（排查：宿主 docker daemon 是否运行 / 镜像 "
                        + com.gzzm.lobster.config.LobsterConfig.getSandboxImage()
                        + " 是否已 pull / Tomcat 用户是否有 docker 组权限）";
            }
            return ToolResult.errorData(msg, diag);
        } catch (Throwable t) {
            // 意外异常（NPE / DI 未注入等）—— 兜底让 LLM 看到真实 cause，不被别处包装掉.
            try { com.gzzm.platform.commons.Tools.log("[CodeExecTool] sandboxService.exec threw", t); }
            catch (Throwable ignore) { /* ignore */ }
            Map<String, Object> diag = new LinkedHashMap<>();
            diag.put("errorCategory", "rejected");
            diag.put("error_code", "sandbox.internal_error");
            diag.put("code_ref", codeRef);
            String cause = t.getMessage() == null ? t.getClass().getSimpleName() : t.getMessage();
            return ToolResult.errorData("sandbox.internal_error: " + cause, diag);
        }

        Map<String, Object> data = new LinkedHashMap<>();
        data.put("exit_code", result.getExitCode());
        data.put("walltime_ms", result.getWalltimeMs());
        data.put("errorCategory", result.getErrorCategory());
        String stdout = truncate(result.getStdout(), 8000);
        String stderr = truncate(result.getStderr(), 8000);
        if (stdout != null) data.put("stdout", stdout);
        if (stderr != null) data.put("stderr", stderr);
        List<Map<String, Object>> produced = new ArrayList<>();
        for (SandboxResult.Produced p : result.getProduced()) {
            Map<String, Object> row = new LinkedHashMap<>();
            row.put("resourceId", p.getResourceId());
            row.put("artifactId", p.getArtifactId());
            row.put("displayName", p.getDisplayName());
            row.put("mimeType", p.getMimeType());
            row.put("size", p.getSize());
            produced.add(row);
        }
        data.put("produced", produced);
        if ("timeout".equals(result.getErrorCategory())) {
            return ToolResult.errorData("code_exec.timeout: script exceeded " + timeout + "s", data);
        }
        if ("oom".equals(result.getErrorCategory())) {
            return ToolResult.errorData("code_exec.oom: container out-of-memory", data);
        }
        if ("cancelled".equals(result.getErrorCategory())) {
            return ToolResult.errorData("code_exec.cancelled", data);
        }
        if ("rejected".equals(result.getErrorCategory())) {
            return ToolResult.errorData("code_exec.output_rejected: " + stderr, data);
        }
        if ("script_error".equals(result.getErrorCategory()) || result.getExitCode() != 0) {
            // stderr 结构化回传给 LLM，使其下一轮能读到失败原因并自动修正.
            // 同时把"最后一行有效 stderr"挂进 message——LLM 实际有些场景只看 message 摘要，
            // 不会展开 data.stderr，把关键报错塞进单行能让它一眼自修（比如 FileNotFoundError 的路径错误）.
            String hint = extractFirstError(result.getStderr());
            String msg = "code_exec.script_error: exit=" + result.getExitCode();
            if (hint != null) msg += " | " + hint;
            return ToolResult.errorData(msg, data);
        }
        // 成功：把 artifactIds 提升到 ToolResult 顶层，供 AgentRuntime 的 loop detector 识别进展.
        List<String> artifactIds = new ArrayList<>();
        for (SandboxResult.Produced p : result.getProduced()) {
            if (p.getArtifactId() != null) artifactIds.add(p.getArtifactId());
        }
        // 给 LLM 塞个 reuse 提示：/outputs 下次调用会清空，再读要把 artifactId 放进 input_refs.
        // 不是关键信息，但多花 40 字节能避免 LLM 下一轮忘了这个模式瞎试（上次事故就是这个）.
        StringBuilder okMsg = new StringBuilder("ok");
        if (!artifactIds.isEmpty()) {
            okMsg.append(". 下一轮若要继续读这些产物（如 docx → PDF），把 artifactId 填到 input_refs：")
                    .append(String.join(",", artifactIds))
                    .append(" —— 文件会挂到 /inputs/，而非保留在 /outputs/（每次容器启动时 /outputs 是空的）.");
        }
        return ToolResult.okWithArtifacts(okMsg.toString(), data, artifactIds);
    }

    @Override
    public Map<String, Object> redactAuditDetail(Map<String, Object> args, ToolResult result) {
        Map<String, Object> d = new LinkedHashMap<>();
        String code = asStr(args.get("code"));
        String codeRef = asStr(args.get("code_ref"));
        // 审计只落 sha256 + length 用于去重 / 回溯，原文不入审计.
        if (code != null && !code.isEmpty()) {
            d.put("code_sha256", sha256(code));
            d.put("code_length", code.length());
        }
        if (codeRef != null && !codeRef.isEmpty()) {
            d.put("code_ref", codeRef);
        }
        d.put("input_refs", args.get("input_refs"));
        d.put("activated_skill", args.get("activated_skill"));
        d.put("timeout_seconds", args.get("timeout_seconds"));
        if (result != null && result.getData() != null) {
            Map<String, Object> rd = result.getData();
            d.put("exit_code", rd.get("exit_code"));
            d.put("walltime_ms", rd.get("walltime_ms"));
            d.put("error_category", rd.get("errorCategory"));
            Object produced = rd.get("produced");
            if (produced instanceof List) {
                List<String> ids = new ArrayList<>();
                for (Object o : (List<?>) produced) {
                    if (o instanceof Map) {
                        Object id = ((Map<?, ?>) o).get("artifactId");
                        if (id != null) ids.add(String.valueOf(id));
                    }
                }
                d.put("output_artifact_ids", ids);
            }
        }
        return d;
    }

    // --- helpers ---
    private static String asStr(Object o) { return o == null ? null : String.valueOf(o); }

    private static int asInt(Object o, int def) {
        if (o == null) return def;
        if (o instanceof Number) return ((Number) o).intValue();
        try { return Integer.parseInt(String.valueOf(o)); } catch (Exception e) { return def; }
    }

    @SuppressWarnings("unchecked")
    private static List<String> asStringList(Object o) {
        List<String> out = new ArrayList<>();
        if (o instanceof List) {
            for (Object e : (List<Object>) o) if (e != null) out.add(String.valueOf(e));
        }
        return out;
    }

    private static String sha256(String s) {
        if (s == null) return null;
        try {
            MessageDigest md = MessageDigest.getInstance("SHA-256");
            byte[] d = md.digest(s.getBytes(StandardCharsets.UTF_8));
            StringBuilder sb = new StringBuilder(d.length * 2);
            for (byte b : d) sb.append(String.format("%02x", b & 0xFF));
            return sb.toString();
        } catch (Exception e) {
            return null;
        }
    }

    /**
     * 从 stderr 里抽一行最有信息量的错误摘要——给 script_error 的 message 字段用.
     *
     * <p>Python traceback 最后一行是 {@code "ExceptionType: message"}，含路径含原因，最有用；
     * Node 的 {@code "Error: ..."} 也通常落在末尾. 倒序扫，跳过 stack frame（"at ..." / "File ..."）
     * 之类的导航行，命中第一条非空且非 stack 的行就返.
     *
     * <p>限长 200 字符防长 message 盖住 LLM 视野；超出加省略号.
     */
    static String extractFirstError(String stderr) {
        if (stderr == null || stderr.isEmpty()) return null;
        String[] lines = stderr.split("\\r?\\n");
        for (int i = lines.length - 1; i >= 0; i--) {
            String line = lines[i].trim();
            if (isIgnorableErrorSummaryLine(line)) continue;
            return line.length() > 200 ? line.substring(0, 200) + "…" : line;
        }
        return null;
    }

    private static boolean isIgnorableErrorSummaryLine(String line) {
        if (line == null || line.isEmpty()) return true;
        if (line.matches("^Node\\.js v\\d+\\.\\d+\\.\\d+.*$")) return true;
        // Node stack: "    at Foo (file:1:2)"
        if (line.startsWith("at ")) return true;
        // Python stack: 'File "/path", line N, in <module>'
        if (line.startsWith("File \"")) return true;
        // Python "  ^^^^^" underline/caret hints and JavaScript object braces.
        if (line.matches("^[\\^~{} ]+$")) return true;
        // Node sometimes appends structured error object fields after the stack.
        if (line.matches("^(code|requireStack|stack|errno|syscall|path)\\s*:\\s*.*$")) return true;
        if (line.matches("^['\"].*['\"]\\s*,?$")) return true;
        return false;
    }

    private static String truncate(String s, int max) {
        if (s == null) return null;
        if (s.length() <= max) return s;
        return s.substring(0, max) + "\n... [truncated]";
    }
}