package com.gzzm.lobster.sandbox;

import com.gzzm.lobster.artifact.Artifact;
import com.gzzm.lobster.artifact.ArtifactService;
import com.gzzm.lobster.common.ArtifactType;
import com.gzzm.lobster.common.IdGenerator;
import com.gzzm.lobster.common.JsonUtil;
import com.gzzm.lobster.config.LobsterConfig;
import com.gzzm.lobster.identity.UserContext;
import com.gzzm.lobster.skill.SkillAssetService;
import com.gzzm.lobster.thread.ThreadRoom;
import com.gzzm.lobster.workspace.ResolvedFile;
import com.gzzm.lobster.workspace.WorkspaceResource;
import com.gzzm.lobster.workspace.WorkspaceResourceResolver;
import com.gzzm.lobster.workspace.WorkspaceService;
import com.gzzm.platform.commons.Tools;
import net.cyan.arachne.annotation.Service;
import net.cyan.nest.annotation.Inject;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.attribute.PosixFilePermission;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.Set;
import java.util.Comparator;
import java.util.LinkedHashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;

/**
 * SandboxService —— 代码沙箱编排器 / Code sandbox orchestrator.
 *
 * <p>职责：
 * <ol>
 *   <li>入参校验（code 长度 / timeout 上限 / skill 存在）</li>
 *   <li>skill bundle 解压挂载到 {@code /skill/<skillId>}（只读）</li>
 *   <li>把 {@code input_refs} 解析成字节并按 {@code /inputs/{idx}-{safeName}.{ext}} 落盘，
 *       同时写 {@code manifest.json}</li>
 *   <li>把 {@code code} 写到 {@code /work/entry.py}，只读挂载</li>
 *   <li>docker run：一次性容器，runId→containerId 注册表支持外部 kill</li>
 *   <li>扫描 {@code /outputs}，逐个回落为 Artifact + WorkspaceResource，遵守 50MB 上限</li>
 *   <li>清理临时目录</li>
 * </ol>
 */
@Service
public class SandboxService {

    @Inject private DockerRunner dockerRunner;
    @Inject private ArtifactService artifactService;
    @Inject private WorkspaceService workspaceService;
    @Inject private WorkspaceResourceResolver resourceResolver;
    @Inject private SkillAssetService skillAssetService;
    @Inject private SandboxPoolService sandboxPoolService;

    private static final Pattern SKILL_PATH_PATTERN =
            Pattern.compile("/skill/(sys_[A-Za-z0-9_.-]+)(?=/)");
    private static final Pattern SKILL_FILE_PATH_PATTERN =
            Pattern.compile("/skill/(sys_[A-Za-z0-9_.-]+)/([^\\s'\"`),}\\]]+)");
    private static final Pattern DIRECT_INPUT_FILE_PATTERN =
            Pattern.compile("/inputs/(?!manifest\\.json(?=$|[\\s'\"`),}\\]]))[^\\s'\"`),}\\]]+");

    /**
     * runId → 取消 token 注册表，供 {@link AgentRuntime#cancel} 做 docker kill.
     *
     * <p>双防线防止取消竞态：
     * <ul>
     *   <li>{@code containerId}：一旦 {@code docker create} 返回就通过
     *       {@link DockerRunner.RunSpec#onContainerCreated} 回调填上 —— 此后的
     *       {@code killByRun} 用真实 id 调 {@code docker kill}，容器处于 created
     *       还是 running 都能 kill 到</li>
     *   <li>{@code cancelled}：一旦置位，{@code DockerRunner} 在三个检查点（create 完、
     *       start 前、start 后每秒 poll）都会看到并主动结束流程</li>
     * </ul>
     */
    private final ConcurrentHashMap<String, CancelToken> runToContainer = new ConcurrentHashMap<>();

    private static final class CancelToken {
        /** 容器名（预先生成）供 docker create --name 使用，也作 kill 的 fallback. */
        final String containerName;
        /** docker create 返回后填入；早期为 null. */
        volatile String containerId;
        final AtomicBoolean cancelled = new AtomicBoolean(false);
        CancelToken(String name) { this.containerName = name; }
    }

    public SandboxResult exec(SandboxRequest req) throws Exception {
        return exec(req, com.gzzm.lobster.tool.ProgressSink.NOOP);
    }

    /**
     * 带 progress 推送的 exec —— CodeExecTool 入口在这里，把阶段事件桥到前端 SSE.
     */
    public SandboxResult exec(SandboxRequest req, com.gzzm.lobster.tool.ProgressSink progress) throws Exception {
        final com.gzzm.lobster.tool.ProgressSink prog = progress == null
                ? com.gzzm.lobster.tool.ProgressSink.NOOP : progress;
        long startMs = System.currentTimeMillis();
        // 如果 LLM 给的是 codeRef（脚本存在 workspace artifact 里，比如用 write_file 写的），
        // 先解析成 code 字符串，再走后续流程. code 和 codeRef 互斥（code 优先，已在 tool 层校验）.
        req = resolveCodeRefIfNeeded(req, prog);
        validate(req);
        validateLanguageSpecificSyntaxHints(req.getLanguage(), req.getCode());
        validateSkillMountReferencesForCode(req.getCode(), req.getActivatedSkill());
        validateInputPathReferencesForCode(req.getCode(), req.getInputRefs());

        String runKey = (req.getRunId() == null || req.getRunId().isEmpty()) ? IdGenerator.uuid() : req.getRunId();
        String shortId = IdGenerator.uuid().substring(0, 12);
        boolean pooled = LobsterConfig.isSandboxPoolEnabled();
        SandboxPoolService.Lease poolLease = null;
        String containerName;
        Path runRoot;
        Path inputs;
        Path outputs;
        // /work：rw bind mount；LLM 用它做 scratch（cp 项目模板 / 生成中间文件）
        // entry.py 也放这里，容器内路径为 /work/entry.py
        Path work;
        if (pooled) {
            poolLease = sandboxPoolService.acquire();
            containerName = sandboxPoolService.containerName(poolLease);
            runRoot = sandboxPoolService.root(poolLease);
            inputs = sandboxPoolService.inputs(poolLease);
            outputs = sandboxPoolService.outputs(poolLease);
            work = sandboxPoolService.work(poolLease);
        } else {
            containerName = "lobster-sbx-" + shortId;
            runRoot = Paths.get(LobsterConfig.getSandboxWorkDir(), runKey + "-" + shortId).toAbsolutePath();
            inputs = runRoot.resolve("inputs");
            outputs = runRoot.resolve("outputs");
            work = runRoot.resolve("work");
        }

        DockerRunner.RunSpec lastSpec = null;
        try {
            if (!pooled) {
                Files.createDirectories(inputs);
                Files.createDirectories(outputs);
                Files.createDirectories(work);
            }
            // Linux bind mount 写权限坑：
            // host 目录 owner 是 Tomcat 进程用户（比如 tomcat:tomcat），容器里跑的是 uid=10001.
            // 如果宿主没建 uid=10001 的本地用户，或 Tomcat 用户与 10001 不一致，10001 对 outputs/
            // 和 work/ 没写权限 → 脚本写 /outputs 立刻 permission denied，看起来像"脚本正常但没产物".
            // 最简单的稳妥做法：runRoot 里这三个目录给全员可写 (rwxrwxrwx).
            // 安全上可接受：runRoot 本身在 sandboxWorkDir 下，无敏感内容；每次 run 结束整盘删.
            // 非 POSIX 文件系统（Windows Docker Desktop）的 setPosixFilePermissions 会抛
            // UnsupportedOperationException，此时宿主本就不严格管权限，忽略即可.
            relaxPermissions(inputs);
            relaxPermissions(outputs);
            relaxPermissions(work);

            // 1) stage inputs + manifest
            if (!req.getInputRefs().isEmpty()) {
                prog.emit("sandbox.staging",
                        "📥 正在准备 " + req.getInputRefs().size() + " 个输入文件...");
            }
            List<Map<String, Object>> manifest = stageInputs(req, inputs);
            Path manifestFile = inputs.resolve("manifest.json");
            Files.write(manifestFile, JsonUtil.toJson(manifest).getBytes(StandardCharsets.UTF_8));
            relaxFilePermissions(manifestFile);

            // 2) entry 脚本写在 /work 根下（与 /work 整体一起 bind 进容器）
            //    根据 language 选择文件名 + 稍后设置 entrypoint / args.
            String entryFileName = "python".equals(req.getLanguage()) ? "entry.py" : "entry.js";
            Path entry = work.resolve(entryFileName);
            Files.write(entry, req.getCode().getBytes(StandardCharsets.UTF_8));
            relaxFilePermissions(entry);

            // 3) skill bundle（可选）
            Path skillHost = null;
            if (req.getActivatedSkill() != null && !req.getActivatedSkill().isEmpty()) {
                prog.emit("sandbox.skill",
                        "📦 正在挂载 skill: " + req.getActivatedSkill());
                skillHost = skillAssetService.ensureExtracted(req.getActivatedSkill());
                validateReferencedSkillFilesExist(req.getCode(), req.getActivatedSkill(), skillHost);
            }

            // 4) docker run
            DockerRunner.RunSpec spec = new DockerRunner.RunSpec();
            spec.image = LobsterConfig.getSandboxImage();
            spec.containerName = containerName;
            spec.user = LobsterConfig.getSandboxUid() + ":" + LobsterConfig.getSandboxUid();
            spec.memoryMb = LobsterConfig.getSandboxMemoryMb();
            spec.cpus = LobsterConfig.getSandboxCpus();
            spec.pidsLimit = LobsterConfig.getSandboxPidsLimit();
            spec.walltimeSec = req.getTimeoutSec();
            spec.mounts.add(new DockerRunner.Mount(inputs.toString(), "/inputs", true));
            spec.mounts.add(new DockerRunner.Mount(outputs.toString(), "/outputs", false));
            // /work 整体 rw 绑定 —— 允许 node_modules / cp -a 模板 / pnpm 写 state 等
            spec.mounts.add(new DockerRunner.Mount(work.toString(), "/work", false));
            if (skillHost != null) {
                spec.mounts.add(new DockerRunner.Mount(skillHost.toString(),
                        "/skill/" + req.getActivatedSkill(), true));
            }
            // /tmp 128m：通用临时目录（matplotlib 字体缓存 / 用户脚本临时文件）
            spec.tmpfs.add("/tmp:size=128m");
            // /home/sandbox：HOME 目录的写入层. rootfs --read-only 下不给 tmpfs，LibreOffice
            // 会因无法创建 UserInstallation profile 启动失败；pnpm 也写不进 cache. uid/gid/mode
            // 指定 owner 为 sandbox 用户，0700 权限仅 owner 可访问.
            int uid = LobsterConfig.getSandboxUid();
            spec.tmpfs.add("/home/sandbox:size=64m,uid=" + uid + ",gid=" + uid + ",mode=0700");
            // ENTRYPOINT 与 args 按 language 分派：
            //   python：Dockerfile ENTRYPOINT=[tini,--,python]，不覆盖；args=["/work/entry.py"]
            //   javascript：覆盖 ENTRYPOINT=/usr/bin/tini，args=["--","node","/work/entry.js"]
            //              保留 tini 作 PID 1，避免 Node 子进程遗留 zombie.
            if ("javascript".equals(req.getLanguage())) {
                spec.entrypoint = "/usr/bin/tini";
                spec.args.add("--");
                spec.args.add("node");
                spec.args.add("/work/" + entryFileName);
            } else {
                spec.args.add("/work/" + entryFileName);
            }

            CancelToken token = new CancelToken(containerName);
            spec.cancelCheck = token.cancelled::get;
            SandboxRequest finalReq = req;
            spec.onContainerCreated = cid -> {
                token.containerId = cid;
                // 容器真创建好那一刻告诉前端（短 id 12 位便于识别）
                String shortCid = cid.length() >= 12 ? cid.substring(0, 12) : cid;
                prog.emit("sandbox.container_created",
                        "🚀 容器已创建 " + shortCid + "，开始执行 " + finalReq.getLanguage() + " 脚本...",
                        "containerId", shortCid);
            };
            lastSpec = spec;
            runToContainer.put(runKey, token);

            // 心跳线程：docker run 阻塞期间每 3 秒发一次 "执行中 Xs"，最长跟 walltime 走.
            // 避免前端 10+ 秒无反馈感觉"卡死"；用 daemon 线程，主线程退出即回收.
            final long runStartMs = System.currentTimeMillis();
            final java.util.concurrent.atomic.AtomicBoolean heartbeatStop = new java.util.concurrent.atomic.AtomicBoolean(false);
            Thread heartbeat = new Thread(() -> {
                try {
                    while (!heartbeatStop.get()) {
                        Thread.sleep(3000);
                        if (heartbeatStop.get()) break;
                        long elapsed = (System.currentTimeMillis() - runStartMs) / 1000;
                        if (elapsed <= 0) continue;
                        // emit 前再 check 一次 —— 把 race window 从"sleep 返回后"压到"nanoseconds"级.
                        // 防止主线程已经发完 sandbox.done 后，心跳线程又糊一条 sandbox.running 上去，
                        // 前端 tool-card 出现 "完成 ✅" 后面又跟一条"⏱ 运行中 Ns"的僵尸消息.
                        if (heartbeatStop.get()) break;
                        prog.emit("sandbox.running",
                                "⏱ 脚本运行中 " + elapsed + "s / " + spec.walltimeSec + "s",
                                "elapsedSec", elapsed);
                    }
                } catch (InterruptedException ignore) { /* 结束 */ }
            }, "sandbox-heartbeat-" + shortId);
            heartbeat.setDaemon(true);
            heartbeat.start();

            DockerRunner.RunOutcome outcome;
            try {
                // put 与 docker create 之间纳秒级空窗 —— 再判一次保险.
                if (token.cancelled.get()) {
                    outcome = new DockerRunner.RunOutcome();
                    outcome.cancelled = true;
                    outcome.exitCode = DockerRunner.EXIT_TIMEOUT;
                } else if (pooled) {
                    outcome = sandboxPoolService.start(poolLease, spec);
                } else {
                    outcome = dockerRunner.run(spec);
                }
            } finally {
                heartbeatStop.set(true);
                heartbeat.interrupt();
                runToContainer.remove(runKey);
            }

            // 5) 扫描产物 + 回落
            SandboxResult result = new SandboxResult();
            result.setExitCode(outcome.exitCode);
            result.setStdout(outcome.stdout);
            result.setStderr(outcome.stderr);
            result.setWalltimeMs(System.currentTimeMillis() - startMs);

            if (outcome.cancelled) {
                result.setErrorCategory("cancelled");
                prog.emit("sandbox.cancelled", "⛔ 已取消执行");
                return result;
            }
            if (outcome.timedOut) {
                result.setErrorCategory("timeout");
                prog.emit("sandbox.timeout",
                        "⏰ 超时（" + spec.walltimeSec + "s）被终止");
                return result;
            }
            if (outcome.oomKilled) {
                result.setErrorCategory("oom");
                prog.emit("sandbox.oom", "💥 内存溢出（OOM），容器被杀");
                return result;
            }
            if (outcome.exitCode != 0) {
                result.setErrorCategory("script_error");
                prog.emit("sandbox.script_error",
                        "❌ 脚本异常退出 exit=" + outcome.exitCode + "，扫描已产出部分",
                        "exitCode", outcome.exitCode);
                // stderr 已经填了，继续扫产物（可能 python 在异常前已经写过部分文件）
            } else {
                result.setErrorCategory("ok");
                prog.emit("sandbox.harvest", "📤 脚本执行完成，正在回落产物...");
            }

            harvestOutputs(req, outputs, result);
            // 最终汇报
            if (!result.getProduced().isEmpty()) {
                StringBuilder names = new StringBuilder();
                for (int i = 0; i < result.getProduced().size(); i++) {
                    if (i > 0) names.append(", ");
                    names.append(result.getProduced().get(i).getDisplayName());
                }
                prog.emit("sandbox.done",
                        "✅ 完成：产出 " + result.getProduced().size() + " 个文件（" + names + "）",
                        "producedCount", result.getProduced().size());
            } else if ("ok".equals(result.getErrorCategory())) {
                prog.emit("sandbox.done", "✅ 执行成功（未产出 /outputs 文件）");
            }
            return result;
        } finally {
            if (pooled) {
                sandboxPoolService.release(poolLease, lastSpec);
            } else {
                try { deleteRecursive(runRoot); } catch (Throwable t) {
                    try { Tools.log("[SandboxService] cleanup failed: " + runRoot, t); } catch (Throwable ignore) { /* ignore */ }
                }
            }
        }
    }

    /**
     * 外部 kill：取消传播专用.
     *
     * <p>三条防线，任一条击中都能收口：
     * <ol>
     *   <li>置位 {@code token.cancelled} —— DockerRunner 的 create/start/poll 三个
     *       检查点都读它，任何阶段都能主动结束</li>
     *   <li>如果 {@code containerId} 已填（docker create 已完成），用 id 精确 kill</li>
     *   <li>否则 fallback 用 {@code containerName} kill —— docker daemon 如果正好在
     *       create 过程中，name 已占位可 kill；如果还未到，docker kill 返 No such container
     *       但 cancelled 位已置，exec 进入 start 前会判断</li>
     * </ol>
     */
    public void killByRun(String runId) {
        if (runId == null || runId.isEmpty()) return;
        CancelToken token = runToContainer.get(runId);
        if (token == null) return;
        token.cancelled.set(true);
        String target = token.containerId != null ? token.containerId : token.containerName;
        try { dockerRunner.kill(target); } catch (Throwable ignore) { /* best effort */ }
    }

    // ---------- 内部工具 ----------

    /**
     * 若 req.code 为空且 req.codeRef 非空，按 workspace ref 解析出脚本内容，返回带 code 的新请求.
     * 其它情况原样返回. 解析失败抛 {@link SandboxException}.
     *
     * <p>典型用法：LLM 长脚本先 {@code write_file} 成 Artifact，再 {@code code_exec(code_ref=art_xxx)}，
     * 避开 tool_call 参数 JSON 的尺寸上限.
     *
     * <p>安全校验：
     * <ul>
     *   <li>inline code 和 codeRef 同时非空 → 容错执行 inline code；prompt/schema 仍要求二选一，
     *       但运行时不因模型多带 code_ref 阻断可执行脚本</li>
     *   <li>mime 黑名单：明显二进制格式（docx/xlsx/pptx/pdf/image/zip/audio/video 等）直接拒绝—
     *       LLM 误把 docx 之类的 artifactId 塞进 code_ref 时，我们在这里给清晰错误，
     *       而不是让容器里 Python 撞 "SyntaxError: invalid character" 之类的天书</li>
     *   <li>长度：解析出的字节仍受 {@code sandboxCodeMaxBytes} 约束</li>
     * </ul>
     */
    private SandboxRequest resolveCodeRefIfNeeded(SandboxRequest req,
                                                  com.gzzm.lobster.tool.ProgressSink prog) {
        if (req == null) return req;
        String inlineCode = req.getCode();
        String ref = req.getCodeRef();
        boolean hasInline = inlineCode != null && !inlineCode.isEmpty();
        boolean hasRef = ref != null && !ref.isEmpty();
        if (hasInline) {
            return req;
        }
        if (!hasRef) {
            // 两者都空；交给 validate() 报 "code is empty"
            return req;
        }
        prog.emit("sandbox.code_ref.resolving", "📄 读取脚本工件 " + ref + " …", "ref", ref);
        ResolvedFile rf;
        try {
            rf = resourceResolver.resolve(req.getUser(), req.getThread().getThreadId(), ref);
        } catch (Exception e) {
            throw new SandboxException("sandbox.rejected",
                    "failed to resolve code_ref: " + ref + " — " + e.getMessage(), e);
        }
        if (rf == null || rf.getBytes() == null) {
            throw new SandboxException("sandbox.rejected",
                    "code_ref resolved to empty content: " + ref);
        }
        // mime 黑名单：明显二进制，禁止当脚本跑
        String mime = rf.getMimeType();
        if (isBinaryMimeForCode(mime)) {
            throw new SandboxException("sandbox.rejected",
                    "code_ref points to a binary artifact (mimeType=" + mime + ", ref=" + ref
                            + ")；code_ref 仅接受纯文本脚本. "
                            + "如果这是要给脚本读的数据文件（.docx / .pdf / .xlsx 等），"
                            + "把它放进 input_refs（会挂到 /inputs/），不是 code_ref.");
        }
        byte[] bytes = rf.getBytes();
        int codeMax = LobsterConfig.getSandboxCodeMaxBytes();
        if (bytes.length > codeMax) {
            // 脚本存进 Artifact 没有 DB 字段限制，但跑到 sandbox 里还是受 codeMax 约束.
            // 用户要跑超大脚本：调大 lobster.xml 的 <sandboxCodeMaxBytes>，或把大段数据挪到 input_refs.
            throw new SandboxException("sandbox.rejected",
                    "code_ref content too long: " + bytes.length + " bytes > limit " + codeMax
                            + " bytes (ref=" + ref + "). 调 lobster.xml <sandboxCodeMaxBytes>，"
                            + "或把数据挪到 input_refs 里、脚本本身保持精简.");
        }
        String code = new String(bytes, StandardCharsets.UTF_8);
        Map<String, Object> resolvedDetail = new LinkedHashMap<>();
        resolvedDetail.put("ref", ref);
        resolvedDetail.put("size", bytes.length);
        prog.emit("sandbox.code_ref.resolved",
                "📄 脚本就绪（" + bytes.length + " 字节，来自 " + ref + "）",
                resolvedDetail);
        return req.withResolvedCode(code);
    }

    /**
     * 明确不能作为脚本跑的 mime 类型.
     *
     * <p>黑名单而非白名单：write_file 默认 mime 是 application/octet-stream（用户没传 mimeType 时），
     * 这种情况应该放行——否则 LLM 走"write_file(displayName='gen.py', content=...)" 不带 mime
     * 的正常路径就跑不通. 只拒绝**能肉眼判定是二进制文档**的 mime.
     *
     * <p>拒绝：docx/xlsx/pptx（OOXML）、doc/xls/ppt（老 Office）、pdf、zip、rar、7z、
     * 各种 image/ audio/ video/.
     * <p>放行：text/*、application/javascript、application/x-python、application/json、
     * application/xml、application/octet-stream、null（未声明 mime）.
     */
    private static boolean isBinaryMimeForCode(String mime) {
        if (mime == null || mime.isEmpty()) return false;
        String m = mime.toLowerCase(java.util.Locale.ROOT);
        if (m.startsWith("image/")) return true;
        if (m.startsWith("audio/")) return true;
        if (m.startsWith("video/")) return true;
        if (m.startsWith("font/")) return true;
        if (m.equals("application/pdf")) return true;
        if (m.equals("application/zip") || m.equals("application/x-zip-compressed")) return true;
        if (m.equals("application/x-rar-compressed") || m.equals("application/vnd.rar")) return true;
        if (m.equals("application/x-7z-compressed")) return true;
        if (m.equals("application/x-tar") || m.equals("application/gzip")) return true;
        if (m.startsWith("application/vnd.openxmlformats-")) return true;  // docx/xlsx/pptx
        if (m.equals("application/msword")) return true;
        if (m.equals("application/vnd.ms-excel")) return true;
        if (m.equals("application/vnd.ms-powerpoint")) return true;
        if (m.equals("application/x-msdownload")) return true;  // .exe / .dll
        return false;
    }

    private void validate(SandboxRequest req) {
        if (req == null) throw new SandboxException("sandbox.rejected", "request is null");
        if (req.getThread() == null || req.getUser() == null) {
            throw new SandboxException("sandbox.rejected", "thread/user required");
        }
        if (req.getCode() == null || req.getCode().isEmpty()) {
            throw new SandboxException("sandbox.rejected", "code is empty");
        }
        int codeMax = LobsterConfig.getSandboxCodeMaxBytes();
        int codeActual = req.getCode().getBytes(StandardCharsets.UTF_8).length;
        if (codeActual > codeMax) {
            // 告诉 LLM 可操作的下一步，避免反复撞同一堵墙
            throw new SandboxException("sandbox.rejected",
                    "code too long: " + codeActual + " bytes > limit " + codeMax + " bytes. "
                            + "建议拆分：(1) 把大段数据（JSON / 表格原始数据 / 模板文本）写成单独文件，"
                            + "通过 input_refs 挂到 /inputs/；(2) 把通用样板代码（如 docx 政务公文骨架）"
                            + "放到 skill bundle 里，用 use_skill 激活后调 /skill/<id>/scripts/ 的绝对路径；"
                            + "(3) 把大脚本按功能拆成多次 code_exec 调用，中间产物走 artifactId 传递.");
        }
        int max = LobsterConfig.getSandboxMaxTimeoutSec();
        if (req.getTimeoutSec() <= 0 || req.getTimeoutSec() > max) {
            throw new SandboxException("sandbox.rejected",
                    "timeout_seconds out of range (1.." + max + ")");
        }
    }

    static void validateLanguageSpecificSyntaxHints(String language, String code) {
        if (!"python".equals(language) || code == null || code.isEmpty()) return;
        String[] lines = code.split("\\R", -1);
        for (int i = 0; i < lines.length; i++) {
            String trimmed = lines[i].trim();
            if (trimmed.startsWith("//")) {
                throw new SandboxException("code_exec.invalid_python_syntax",
                        "Python script contains JavaScript-style `//` comment at line " + (i + 1)
                                + ". Use `#` for Python comments, or set language='javascript' "
                                + "when the script is Node/pptxgenjs code.");
            }
        }
    }

    static void validateSkillMountReferencesForCode(String code, String activatedSkill) {
        if (code == null || code.isEmpty()) return;
        Matcher matcher = SKILL_PATH_PATTERN.matcher(code);
        Set<String> referenced = new LinkedHashSet<>();
        while (matcher.find()) {
            referenced.add(matcher.group(1));
        }
        if (referenced.isEmpty()) return;

        String activated = activatedSkill == null ? "" : activatedSkill.trim();
        if (activated.isEmpty()) {
            String skillId = referenced.iterator().next();
            throw new SandboxException("code_exec.skill_not_activated",
                    "script references /skill/" + skillId + "/ but code_exec did not set "
                            + "activated_skill='" + skillId + "'. Call use_skill('" + skillId
                            + "') first, then pass activated_skill='" + skillId + "' in the same "
                            + "code_exec call.");
        }
        if (!referenced.contains(activated) || referenced.size() > 1) {
            throw new SandboxException("code_exec.skill_mismatch",
                    "script references skill bundle path(s) " + referenced
                            + " but code_exec activated_skill='" + activated + "'. A code_exec call "
                            + "mounts only one skill bundle under /skill/<id>/; use the matching "
                            + "activated_skill or split the work into separate calls.");
        }
    }

    static void validateInputPathReferencesForCode(String code, List<String> inputRefs) {
        if (code == null || code.isEmpty()) return;
        boolean hasInputs = false;
        if (inputRefs != null) {
            for (String ref : inputRefs) {
                if (ref != null && !ref.isEmpty()) {
                    hasInputs = true;
                    break;
                }
            }
        }

        if (!hasInputs && code.contains("/inputs/manifest.json")) {
            throw new SandboxException("code_exec.missing_input_refs",
                    "script reads /inputs/manifest.json but this code_exec call has no input_refs. "
                            + "Pass the needed artifactId/resourceId in input_refs, then read "
                            + "json.load(open('/inputs/manifest.json'))[0]['path'].");
        }

        Matcher matcher = DIRECT_INPUT_FILE_PATTERN.matcher(code);
        if (matcher.find()) {
            throw new SandboxException("code_exec.invalid_input_path",
                    "script hard-codes input file path '" + matcher.group()
                            + "'. Input filenames are staged and may be prefixed or deduplicated "
                            + "(for example 00-00-name.pdf). Always read the actual path from "
                            + "/inputs/manifest.json via manifest[i]['path'].");
        }
    }

    static void validateReferencedSkillFilesExist(String code, String activatedSkill, Path skillHost) {
        if (code == null || code.isEmpty()) return;
        if (activatedSkill == null || activatedSkill.isEmpty() || skillHost == null) return;
        Path root = skillHost.toAbsolutePath().normalize();
        Matcher matcher = SKILL_FILE_PATH_PATTERN.matcher(code);
        while (matcher.find()) {
            String skillId = matcher.group(1);
            if (!activatedSkill.equals(skillId)) continue;
            String relative = matcher.group(2);
            Path candidate = root.resolve(relative).normalize();
            if (!candidate.startsWith(root) || !Files.exists(candidate, LinkOption.NOFOLLOW_LINKS)) {
                throw new SandboxException("code_exec.skill_file_missing",
                        "script references " + matcher.group() + ", but the activated skill bundle "
                                + activatedSkill + " does not contain " + relative + ". "
                                + "Check the deployed skill bundle or use a system command already in PATH "
                                + "such as `soffice` instead of a missing helper script.");
            }
        }
    }

    private List<Map<String, Object>> stageInputs(SandboxRequest req, Path inputsDir) throws Exception {
        List<Map<String, Object>> manifest = new ArrayList<>();
        int maxFiles = LobsterConfig.getSandboxInputMaxFiles();
        long maxPerFile = LobsterConfig.getSandboxInputMaxBytes();
        long maxTotal = LobsterConfig.getSandboxInputTotalMaxBytes();
        if (req.getInputRefs().size() > maxFiles) {
            throw new SandboxException("sandbox.rejected",
                    "too many input_refs (" + req.getInputRefs().size() + " > " + maxFiles + ")");
        }
        long total = 0;
        int idx = 0;
        for (String ref : req.getInputRefs()) {
            if (ref == null || ref.isEmpty()) continue;
            ResolvedFile rf;
            try {
                rf = resourceResolver.resolve(req.getUser(), req.getThread().getThreadId(), ref);
            } catch (Exception e) {
                throw new SandboxException("sandbox.rejected",
                        "failed to resolve input ref: " + ref + " — " + e.getMessage(), e);
            }
            long size = rf.getSize();
            if (size > maxPerFile) {
                throw new SandboxException("sandbox.rejected",
                        "input ref exceeds per-file limit: " + ref + " (" + size + " > " + maxPerFile + ")");
            }
            total += size;
            if (total > maxTotal) {
                throw new SandboxException("sandbox.rejected",
                        "input refs exceed total limit: " + total + " > " + maxTotal);
            }
            String safe = sanitize(rf.getDisplayName());
            String ext = rf.getExtension() == null ? "bin" : rf.getExtension();
            String fname = String.format("%02d-%s.%s", idx, safe, ext);
            Path target = inputsDir.resolve(fname);
            Files.write(target, rf.getBytes());
            relaxFilePermissions(target);

            Map<String, Object> row = new LinkedHashMap<>();
            row.put("index", idx);
            row.put("ref", ref);
            row.put("displayName", rf.getDisplayName());
            row.put("mimeType", rf.getMimeType());
            row.put("size", size);
            row.put("path", "/inputs/" + fname);
            manifest.add(row);
            idx++;
        }
        return manifest;
    }

    /**
     * 扫描 outputs 目录；总大小超 50MB 或出现 symlink 直接整批拒绝.
     * 不超限再逐个落 Artifact + WorkspaceResource.
     *
     * <p><b>安全：拒绝 symlink</b> —— 容器内的 uid 10001 可以在 /outputs 创建
     * {@code ln -s /etc/passwd outputs/exfil.txt}；Tomcat JVM 对宿主 /etc/passwd 通常有读权限，
     * 若 JVM 走 follow-symlink API 读，攻击者就把宿主任意文件读进 Artifact. 这里所有
     * 文件 API 都显式 NOFOLLOW_LINKS，遇到 symlink 报错整批拒绝.
     */
    private void harvestOutputs(SandboxRequest req, Path outputs, SandboxResult result) throws Exception {
        if (!Files.exists(outputs, LinkOption.NOFOLLOW_LINKS)) return;
        long totalLimit = LobsterConfig.getSandboxOutputMaxBytes();
        Path outputsAbs = outputs.toAbsolutePath().normalize();
        List<Path> files = new ArrayList<>();
        // Files.walk 默认不跟随 symlink 目录 —— 保持默认. 但 walk 返回的 Path 可能是指向
        // 宿主外文件的 symlink；后续对每个 Path 二次判定 + NOFOLLOW 读.
        try (Stream<Path> w = Files.walk(outputs)) {
            w.sorted(Comparator.naturalOrder()).forEach(files::add);
        }
        // 阶段 1：安全 + 大小双重校验
        long total = 0;
        Path offender = null;
        long offenderSize = 0;
        String rejectReason = null;
        List<Path> regularFiles = new ArrayList<>();
        for (Path f : files) {
            if (f.equals(outputs)) continue;
            // 路径越界检查：每个条目必须真实落在 outputs 下面
            Path fAbs = f.toAbsolutePath().normalize();
            if (!fAbs.startsWith(outputsAbs)) {
                offender = f; rejectReason = "path escapes outputs root";
                break;
            }
            // 关键：显式 NOFOLLOW —— 任何 symlink 都拒（文件或目录）
            if (Files.isSymbolicLink(f)) {
                offender = f; rejectReason = "symlink not allowed";
                break;
            }
            if (Files.isDirectory(f, LinkOption.NOFOLLOW_LINKS)) continue;
            if (!Files.isRegularFile(f, LinkOption.NOFOLLOW_LINKS)) {
                // FIFO / device / socket 都不收
                offender = f; rejectReason = "not a regular file";
                break;
            }
            // NOFOLLOW 读 size：Files.size(f) 会 follow symlink 去 stat 目标.
            // 虽然前面 isSymbolicLink 已拦，但两次 syscall 之间存在 TOCTOU —— attacker 在容器里把 f
            // 替换成指向宿主敏感路径的 symlink，Files.size 就会问到那个目标. 改用 readAttributes
            // + NOFOLLOW_LINKS 一步到位，彻底关掉这个窗口.
            long size = Files.readAttributes(f,
                    java.nio.file.attribute.BasicFileAttributes.class,
                    LinkOption.NOFOLLOW_LINKS).size();
            total += size;
            if (size > totalLimit || total > totalLimit) {
                offender = f; offenderSize = size;
                rejectReason = "size exceeds " + totalLimit + " bytes";
                break;
            }
            regularFiles.add(f);
        }
        if (offender != null) {
            result.setErrorCategory("rejected");
            result.setStderr((result.getStderr() == null ? "" : result.getStderr())
                    + "\n[sandbox] output rejected: " + rejectReason
                    + " (file=" + offender.getFileName()
                    + (offenderSize > 0 ? ", size=" + offenderSize : "") + ")");
            return;
        }
        // 阶段 2：落盘（NOFOLLOW 读，防止 TOCTOU 攻击者在阶段 1/2 之间换成 symlink）
        for (Path f : regularFiles) {
            byte[] bytes = readBytesNoFollow(f);
            String displayName = f.getFileName().toString();
            String format = extOf(displayName);
            ArtifactType type = ArtifactType.GENERATED_DOCUMENT;
            Artifact a = artifactService.createBinary(req.getThread(), req.getUser(), type,
                    displayName, bytes, format, null, req.getRunId(), null);
            WorkspaceResource r = workspaceService.registerArtifact(req.getThread(), req.getUser(), a);

            SandboxResult.Produced p = new SandboxResult.Produced();
            p.setResourceId(r.getResourceId());
            p.setArtifactId(a.getArtifactId());
            p.setDisplayName(displayName);
            p.setMimeType(r.getMimeType());
            p.setSize(bytes.length);
            result.addProduced(p);
        }
    }

    /**
     * 读字节但不跟随 symlink. {@link Files#readAllBytes} 没有 LinkOption 重载，
     * 要用 {@link Files#newInputStream} + {@link LinkOption#NOFOLLOW_LINKS}.
     */
    private static byte[] readBytesNoFollow(Path p) throws IOException {
        if (Files.isSymbolicLink(p)) {
            throw new SandboxException("sandbox.rejected", "symlink refused: " + p);
        }
        try (InputStream in = Files.newInputStream(p, LinkOption.NOFOLLOW_LINKS)) {
            java.io.ByteArrayOutputStream buf = new java.io.ByteArrayOutputStream();
            byte[] chunk = new byte[64 * 1024];
            int n;
            while ((n = in.read(chunk)) > 0) buf.write(chunk, 0, n);
            return buf.toByteArray();
        }
    }

    /** displayName 清洗 —— 去除路径穿越 / 控制字符 / 非法字符. */
    static String sanitize(String name) {
        if (name == null || name.isEmpty()) return "unnamed";
        // 剥掉扩展名（由调用方单独传 ext），只清洗主体
        int dot = name.lastIndexOf('.');
        String base = dot > 0 ? name.substring(0, dot) : name;
        StringBuilder sb = new StringBuilder(base.length());
        for (int i = 0; i < base.length(); i++) {
            char c = base.charAt(i);
            if (c < 0x20 || c == 0x7F) { sb.append('_'); continue; }
            switch (c) {
                case '/': case '\\': case ':': case '*': case '?':
                case '"': case '<': case '>': case '|':
                    sb.append('_'); break;
                default: sb.append(c);
            }
        }
        String out = sb.toString()
                .replace("..", "_")
                .trim();
        if (out.isEmpty()) out = "unnamed";
        if (out.length() > 60) out = out.substring(0, 60);
        return out;
    }

    /**
     * Linux bind-mount 权限修复：把 dir 设为 rwxrwxrwx 让容器内 uid=10001 可写.
     * POSIX 不支持的平台（Windows NTFS on Docker Desktop）静默跳过.
     */
    private static void relaxPermissions(Path dir) {
        if (!FileSystems.getDefault().supportedFileAttributeViews().contains("posix")) return;
        try {
            Set<PosixFilePermission> perms = EnumSet.of(
                    PosixFilePermission.OWNER_READ, PosixFilePermission.OWNER_WRITE, PosixFilePermission.OWNER_EXECUTE,
                    PosixFilePermission.GROUP_READ, PosixFilePermission.GROUP_WRITE, PosixFilePermission.GROUP_EXECUTE,
                    PosixFilePermission.OTHERS_READ, PosixFilePermission.OTHERS_WRITE, PosixFilePermission.OTHERS_EXECUTE);
            Files.setPosixFilePermissions(dir, perms);
        } catch (Throwable ignore) { /* best effort */ }
    }

    /**
     * Files created by Tomcat can inherit a restrictive umask (for example 0600).
     * The sandbox process runs as uid=10001, so bind-mounted inputs and entry
     * scripts must be explicitly readable by that uid.
     */
    private static void relaxFilePermissions(Path file) {
        if (!FileSystems.getDefault().supportedFileAttributeViews().contains("posix")) return;
        try {
            Set<PosixFilePermission> perms = EnumSet.of(
                    PosixFilePermission.OWNER_READ, PosixFilePermission.OWNER_WRITE,
                    PosixFilePermission.GROUP_READ, PosixFilePermission.GROUP_WRITE,
                    PosixFilePermission.OTHERS_READ);
            Files.setPosixFilePermissions(file, perms);
        } catch (Throwable ignore) { /* best effort */ }
    }

    private static String extOf(String filename) {
        if (filename == null) return "bin";
        int dot = filename.lastIndexOf('.');
        if (dot < 0 || dot == filename.length() - 1) return "bin";
        return filename.substring(dot + 1).toLowerCase();
    }

    private static void deleteRecursive(Path root) throws IOException {
        if (!Files.exists(root)) return;
        try (Stream<Path> w = Files.walk(root)) {
            w.sorted(Comparator.reverseOrder()).forEach(p -> {
                try { Files.deleteIfExists(p); } catch (IOException ignore) { /* best effort */ }
            });
        }
    }
}
