package com.gzzm.lobster.llm.adapter;

import com.gzzm.lobster.common.JsonUtil;
import com.gzzm.lobster.common.LobsterException;
import com.gzzm.lobster.common.MessageRole;
import com.gzzm.lobster.common.TokenEstimator;
import com.gzzm.lobster.llm.*;

import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.concurrent.atomic.AtomicLong;

/**
 * OpenAiCompatibleAdapter —— OpenAI / vLLM 兼容协议适配器 /
 * Adapter for OpenAI / vLLM compatible chat-completions API.
 *
 * <p>为了便于内网离线环境直接编译运行，这里使用标准 {@code HttpURLConnection}
 * 直接构造 OpenAI chat completions 请求，避免对额外网络依赖的强绑定；
 * 生产部署中可替换为 LangChain4j {@code OpenAiChatModel} 实现。
 *
 * <p>Uses plain HttpURLConnection so the code compiles without requiring
 * LangChain4j on the classpath. Can be swapped for {@code OpenAiChatModel}
 * in production.
 */
public class OpenAiCompatibleAdapter implements LobsterLlmAdapter {

    private static final AtomicLong TOOL_CALL_FALLBACK_SEQ = new AtomicLong();

    private final ModelProfile profile;

    public OpenAiCompatibleAdapter(ModelProfile profile) {
        this.profile = profile;
    }

    @Override
    public ModelProfile profile() {
        return profile;
    }

    @Override
    public LlmResponse chat(List<LobsterMessage> messages, List<ToolSpec> tools) {
        Map<String, Object> payload = buildPayload(messages, tools, false);
        String raw = doHttp(payload);
        return parse(raw);
    }

    @Override
    public void chatStream(List<LobsterMessage> messages, List<ToolSpec> tools,
                           StreamingResponseHandler handler) {
        // 真实 SSE 流式：直接解析 OpenAI chat.completions 的 `data: ...` 帧。
        // 对不稳定协议走同步 chat 兜底（nativeToolCalling=false 时常见）。
        if (!Boolean.TRUE.equals(profile.getStreaming())) {
            fallbackSyncPlayback(messages, tools, handler);
            return;
        }
        Map<String, Object> payload = buildPayload(messages, tools, true);
        HttpURLConnection conn = null;
        try {
            String url = profile.getEndpoint();
            if (!url.contains("/chat/completions")) {
                // OpenAI 兼容客户端通行 base_url 形如 ".../v1"，配置里写到 /v1 是常见姿势；
                // 若已带 /v1 就只补 /chat/completions，避免拼成 /v1/v1/chat/completions 撞 404。
                String base = url.endsWith("/") ? url.substring(0, url.length() - 1) : url;
                url = base.endsWith("/v1") ? (base + "/chat/completions") : (base + "/v1/chat/completions");
            }
            conn = (HttpURLConnection) new URL(url).openConnection();
            conn.setConnectTimeout(profile.getFirstTokenTimeoutMs() == null ? 10000 : profile.getFirstTokenTimeoutMs());
            conn.setReadTimeout(profile.getTotalTimeoutMs() == null ? 120000 : profile.getTotalTimeoutMs());
            conn.setDoOutput(true);
            conn.setRequestMethod("POST");
            conn.setRequestProperty("Content-Type", "application/json; charset=utf-8");
            conn.setRequestProperty("Accept", "text/event-stream");
            if (profile.getApiKey() != null && !profile.getApiKey().isEmpty()) {
                conn.setRequestProperty("Authorization", "Bearer " + profile.getApiKey());
            }
            try (DataOutputStream os = new DataOutputStream(conn.getOutputStream())) {
                os.write(JsonUtil.toJson(payload).getBytes(StandardCharsets.UTF_8));
            }
            int code = conn.getResponseCode();
            if (code < 200 || code >= 300) {
                // errorStream 在"没有响应体"（HTTP 503 nginx 无 body、连接级错误等）时返 null；
                // new InputStreamReader(null) 会 NPE —— 必须先判空.
                StringBuilder err = new StringBuilder();
                java.io.InputStream es = conn.getErrorStream();
                if (es != null) {
                    try (BufferedReader br = new BufferedReader(new InputStreamReader(es, StandardCharsets.UTF_8))) {
                        String line;
                        while ((line = br.readLine()) != null) err.append(line);
                    }
                } else {
                    err.append("(no response body)");
                }
                handler.onError(new LobsterException("llm.http", "HTTP " + code + " -> " + err));
                return;
            }

            StreamAggregator agg = new StreamAggregator(profile.getModelId());
            boolean cancelledByUpper = false;
            try (BufferedReader br = new BufferedReader(new InputStreamReader(
                    conn.getInputStream(), StandardCharsets.UTF_8))) {
                String line;
                StringBuilder frame = new StringBuilder();
                while ((line = br.readLine()) != null) {
                    // 每行一次的取消检测：上层置 cancelled 时立刻 disconnect，
                    // 避免白白把剩余 token 读完。
                    if (handler.isCancelled()) {
                        cancelledByUpper = true;
                        try { conn.disconnect(); } catch (Throwable ignore) { /* ignore */ }
                        break;
                    }
                    if (line.isEmpty()) { // 空行 = SSE 帧结束
                        processFrame(frame.toString(), agg, handler);
                        frame.setLength(0);
                        continue;
                    }
                    frame.append(line).append('\n');
                }
                if (!cancelledByUpper && frame.length() > 0) {
                    processFrame(frame.toString(), agg, handler);
                }
            }
            if (cancelledByUpper) {
                handler.onError(new LobsterException("llm.cancelled", "cancelled by upper layer"));
            } else {
                handler.onComplete(agg.build());
            }
        } catch (Throwable t) {
            handler.onError(t);
        } finally {
            if (conn != null) try { conn.disconnect(); } catch (Throwable ignore) { /* ignore */ }
        }
    }

    /**
     * 不支持流式的 profile：同步调用后按字符分片回放，保证上层契约一致。
     * 生产中应确保 nativeToolCalling + streaming 配对开启以避免此路径。
     */
    private void fallbackSyncPlayback(List<LobsterMessage> messages, List<ToolSpec> tools,
                                      StreamingResponseHandler handler) {
        try {
            LlmResponse response = chat(messages, tools);
            // 思考内容先回放（业界惯例：thinking 在 content 之前），让 UI 段顺序与流式时一致
            String reasoning = response.getReasoningContent();
            if (reasoning != null && !reasoning.isEmpty()) {
                int rstep = Math.max(1, reasoning.length() / 20);
                for (int i = 0; i < reasoning.length(); i += rstep) {
                    handler.onReasoningDelta(reasoning.substring(i, Math.min(reasoning.length(), i + rstep)));
                }
            }
            String text = response.getAssistantText();
            int step = Math.max(1, text.length() / 20);
            for (int i = 0; i < text.length(); i += step) {
                handler.onDelta(text.substring(i, Math.min(text.length(), i + step)));
            }
            for (ToolCall tc : response.getToolCalls()) handler.onToolCall(tc);
            handler.onComplete(response);
        } catch (Throwable t) {
            handler.onError(t);
        }
    }

    /** 解析一帧 SSE (多行 data:... + 可选 event:...)。OpenAI 格式：一条 frame 只有 data:。 */
    @SuppressWarnings("unchecked")
    private void processFrame(String frame, StreamAggregator agg, StreamingResponseHandler handler) {
        if (frame == null || frame.isEmpty()) return;
        for (String line : frame.split("\\r?\\n")) {
            if (!line.startsWith("data:")) continue;
            String data = line.substring(5).trim();
            if (data.isEmpty()) continue;
            if ("[DONE]".equals(data)) return;
            Map<String, Object> json;
            try { json = JsonUtil.fromJsonToMap(data); }
            catch (Throwable t) { continue; }
            // 顶层 usage：OpenAI 兼容流式的最后一帧大多会带上（DeepSeek / Qwen / vLLM 都发）
            Object usageObj = json.get("usage");
            if (usageObj instanceof Map) agg.mergeUsage((Map<String, Object>) usageObj);
            List<Map<String, Object>> choices = (List<Map<String, Object>>) json.get("choices");
            if (choices == null || choices.isEmpty()) continue;
            Map<String, Object> choice = choices.get(0);
            Map<String, Object> delta = (Map<String, Object>) choice.get("delta");
            if (delta == null) continue;

            Object content = delta.get("content");
            if (content instanceof String && !((String) content).isEmpty()) {
                agg.appendText((String) content);
                handler.onDelta((String) content);
            }
            // thinking-mode 模型（deepseek-v4-flash / deepseek-reasoner / Qwen-QwQ 等）会在
            // delta 里单独发 reasoning_content —— 必须累加保存，下一轮必须发回 API 否则 400.
            // 同时通过 onReasoningDelta 把分片实时上抛，让 SSE 层推 assistant_thinking 事件.
            Object reasoning = delta.get("reasoning_content");
            if (reasoning instanceof String && !((String) reasoning).isEmpty()) {
                agg.appendReasoning((String) reasoning);
                handler.onReasoningDelta((String) reasoning);
            }
            Object toolCalls = delta.get("tool_calls");
            if (toolCalls instanceof List) {
                for (Object o : (List<Object>) toolCalls) {
                    if (o instanceof Map) {
                        WriteFileContentDelta contentDelta = agg.mergeToolCallDelta((Map<String, Object>) o);
                        if (contentDelta != null) {
                            handler.onWriteFileContentDelta(
                                    contentDelta.toolCallId,
                                    contentDelta.toolIndex,
                                    contentDelta.contentDelta);
                        }
                    }
                }
            }
            Object finish = choice.get("finish_reason");
            if (finish != null) agg.setFinishReason(String.valueOf(finish));
        }
    }

    /**
     * OpenAI SSE 的 tool_calls 是 delta 合并模式：同一 index 的后续帧会累加 arguments。
     * 这里按 index 聚合，最终一次性通过 LlmResponse 返回。
     */
    private static final class StreamAggregator {
        final String modelId;
        final String fallbackIdPrefix;
        final StringBuilder text = new StringBuilder();
        /** thinking-mode 模型的 reasoning_content 累加器（deepseek-v4-flash 等）. */
        final StringBuilder reasoning = new StringBuilder();
        final Map<Integer, ToolCallBuilder> toolCalls = new LinkedHashMap<>();
        String finishReason;
        int promptTokens = 0;
        int completionTokens = 0;
        int promptCacheHitTokens = 0;
        int promptCacheMissTokens = 0;

        StreamAggregator(String modelId) {
            this.modelId = modelId;
            this.fallbackIdPrefix = "tool_call_" + TOOL_CALL_FALLBACK_SEQ.incrementAndGet() + "_index_";
        }

        void appendText(String s) { text.append(s); }
        void appendReasoning(String s) { reasoning.append(s); }
        void setFinishReason(String r) { finishReason = r; }

        /** 吸收流式响应里可能出现的 usage 对象（多家兼容实现在最后一帧带上）。 */
        void mergeUsage(Map<String, Object> usage) {
            Object ct = usage.get("completion_tokens");
            promptTokens = promptTokenCount(usage);
            if (ct instanceof Number) completionTokens = ((Number) ct).intValue();
            CacheUsage cache = parseCacheUsage(usage, promptTokens);
            promptCacheHitTokens = cache.hitTokens;
            promptCacheMissTokens = cache.missTokens;
        }

        WriteFileContentDelta mergeToolCallDelta(Map<String, Object> d) {
            Integer idx = d.get("index") instanceof Number
                    ? ((Number) d.get("index")).intValue() : 0;
            ToolCallBuilder b = toolCalls.get(idx);
            if (b == null) { b = new ToolCallBuilder(fallbackIdPrefix, idx); toolCalls.put(idx, b); }
            if (d.get("id") != null) b.id = String.valueOf(d.get("id"));
            Object fn = d.get("function");
            if (fn instanceof Map) {
                Map<String, Object> fnMap = (Map<String, Object>) fn;
                if (fnMap.get("name") != null && (b.name == null || b.name.isEmpty())) {
                    b.name = String.valueOf(fnMap.get("name"));
                }
                Object args = fnMap.get("arguments");
                if (args != null) {
                    b.args.append(String.valueOf(args));
                }
            }
            if ("write_file".equals(b.name) && b.contentExtractedChars < b.args.length()) {
                CharSequence argsDelta = b.args.subSequence(b.contentExtractedChars, b.args.length());
                b.contentExtractedChars = b.args.length();
                String contentDelta = b.contentExtractor.extractDelta(argsDelta);
                if (contentDelta != null && !contentDelta.isEmpty()) {
                    String id = b.stableIdForPreview();
                    return new WriteFileContentDelta(id, idx, contentDelta);
                }
            }
            return null;
        }

        LlmResponse build() {
            List<ToolCall> out = new ArrayList<>();
            for (ToolCallBuilder b : toolCalls.values()) {
                String id = b.stableId();
                String args = b.args.length() == 0 ? "{}" : b.args.toString();
                out.add(new ToolCall(id, b.name, args));
            }
            String assistantText = text.toString();
            String reasoningText = reasoning.length() == 0 ? null : reasoning.toString();
            // 优先取 SSE usage；没有则输出估算，输入保持 0 交给 LlmRuntime 兜底估算
            int inputTokens = promptTokens;
            int outputTokens = completionTokens > 0
                    ? completionTokens
                    : com.gzzm.lobster.common.TokenEstimator.estimate(assistantText);
            return new LlmResponse(assistantText, out, inputTokens, outputTokens,
                    finishReason == null ? "stop" : finishReason, modelId, "", reasoningText,
                    promptCacheHitTokens, promptCacheMissTokens);
        }
    }

    private static final class ToolCallBuilder {
        final String fallbackId;
        String id;
        String publicId;
        String name;
        final StringBuilder args = new StringBuilder();
        final WriteFileContentExtractor contentExtractor = new WriteFileContentExtractor();
        int contentExtractedChars;

        ToolCallBuilder(String fallbackIdPrefix, int index) {
            this.fallbackId = fallbackIdPrefix + index;
        }

        String stableId() {
            if (publicId != null) return publicId;
            return id != null ? id : fallbackId;
        }

        String stableIdForPreview() {
            if (publicId == null) publicId = id != null ? id : fallbackId;
            return publicId;
        }
    }

    private static final class WriteFileContentDelta {
        final String toolCallId;
        final int toolIndex;
        final String contentDelta;

        WriteFileContentDelta(String toolCallId, int toolIndex, String contentDelta) {
            this.toolCallId = toolCallId;
            this.toolIndex = toolIndex;
            this.contentDelta = contentDelta;
        }
    }

    /**
     * Extracts the partial JSON string value of the top-level "content" property from incomplete
     * write_file arguments. It emits only newly decoded content and ignores incomplete escapes.
     */
    private static final class WriteFileContentExtractor {
        private static final int PREVIEW_LIMIT = 30000;

        private enum State {
            SEEK_KEY,
            READ_KEY,
            AFTER_KEY,
            AFTER_COLON,
            SKIP_STRING,
            READ_CONTENT
        }

        private State state = State.SEEK_KEY;
        private int nestingDepth = 0;
        private String lastKey;
        private final StringBuilder key = new StringBuilder();
        private boolean escaping;
        private int unicodeRemaining;
        private int unicodeValue;
        private int emittedChars;

        String extractDelta(CharSequence delta) {
            if (delta == null || delta.length() == 0 || emittedChars >= PREVIEW_LIMIT) return "";
            StringBuilder out = new StringBuilder();
            for (int i = 0; i < delta.length(); i++) {
                char c = delta.charAt(i);
                switch (state) {
                    case SEEK_KEY:
                        if (c == '{' || c == '[') nestingDepth++;
                        else if (c == '}' || c == ']') nestingDepth = Math.max(0, nestingDepth - 1);
                        else if (nestingDepth == 1 && c == '"') {
                            key.setLength(0);
                            resetStringState();
                            state = State.READ_KEY;
                        }
                        break;
                    case READ_KEY:
                        if (consumeStringChar(c, key, null)) {
                            lastKey = key.toString();
                            state = State.AFTER_KEY;
                        }
                        break;
                    case AFTER_KEY:
                        if (Character.isWhitespace(c)) break;
                        state = c == ':' ? State.AFTER_COLON : State.SEEK_KEY;
                        break;
                    case AFTER_COLON:
                        if (Character.isWhitespace(c)) break;
                        if (c == '"') {
                            resetStringState();
                            state = "content".equals(lastKey) ? State.READ_CONTENT : State.SKIP_STRING;
                            lastKey = null;
                        } else {
                            lastKey = null;
                            if (c == '{' || c == '[') nestingDepth++;
                            else if (c == '}' || c == ']') nestingDepth = Math.max(0, nestingDepth - 1);
                            state = State.SEEK_KEY;
                        }
                        break;
                    case SKIP_STRING:
                        if (consumeStringChar(c, null, null)) state = State.SEEK_KEY;
                        break;
                    case READ_CONTENT:
                        if (consumeStringChar(c, null, out)) state = State.SEEK_KEY;
                        break;
                    default:
                        state = State.SEEK_KEY;
                        break;
                }
            }
            return out.toString();
        }

        private void resetStringState() {
            escaping = false;
            unicodeRemaining = 0;
            unicodeValue = 0;
        }

        private boolean consumeStringChar(char c, StringBuilder sink, StringBuilder out) {
            if (unicodeRemaining > 0) {
                int hex = Character.digit(c, 16);
                if (hex >= 0) {
                    unicodeValue = (unicodeValue << 4) + hex;
                    unicodeRemaining--;
                    if (unicodeRemaining == 0) appendDecoded((char) unicodeValue, sink, out);
                    return false;
                }
                unicodeRemaining = 0;
                unicodeValue = 0;
            }
            if (escaping) {
                escaping = false;
                switch (c) {
                    case '"': appendDecoded('"', sink, out); break;
                    case '\\': appendDecoded('\\', sink, out); break;
                    case '/': appendDecoded('/', sink, out); break;
                    case 'b': appendDecoded('\b', sink, out); break;
                    case 'f': appendDecoded('\f', sink, out); break;
                    case 'n': appendDecoded('\n', sink, out); break;
                    case 'r': appendDecoded('\r', sink, out); break;
                    case 't': appendDecoded('\t', sink, out); break;
                    case 'u':
                        unicodeRemaining = 4;
                        unicodeValue = 0;
                        break;
                    default:
                        appendDecoded(c, sink, out);
                        break;
                }
                return false;
            }
            if (c == '\\') {
                escaping = true;
                return false;
            }
            if (c == '"') {
                return true;
            }
            appendDecoded(c, sink, out);
            return false;
        }

        private void appendDecoded(char c, StringBuilder sink, StringBuilder out) {
            if (sink != null) sink.append(c);
            if (out != null && emittedChars < PREVIEW_LIMIT) {
                out.append(c);
                emittedChars++;
            }
        }
    }

    // ---- private helpers ----

    private Map<String, Object> buildPayload(List<LobsterMessage> messages, List<ToolSpec> tools, boolean stream) {
        Map<String, Object> payload = new LinkedHashMap<>();
        payload.put("model", profile.getModelId());
        payload.put("messages", toOpenAiMessages(messages));
        if (profile.getMaxOutputTokens() != null) payload.put("max_tokens", profile.getMaxOutputTokens());
        if (Boolean.TRUE.equals(profile.getNativeToolCalling()) && tools != null && !tools.isEmpty()) {
            payload.put("tools", toOpenAiTools(tools));
            payload.put("tool_choice", "auto");
        }
        payload.put("stream", stream);
        // 显式要求流式返回 usage：DeepSeek / 部分 vLLM 默认不发，加这个才会有 prompt_tokens / completion_tokens
        if (stream) {
            Map<String, Object> streamOpts = new LinkedHashMap<>();
            streamOpts.put("include_usage", true);
            payload.put("stream_options", streamOpts);
        }
        // 思考模式：三态显式控制（auto/on/off）。详见 ModelThinkingMode javadoc.
        // - on：传 reasoning_effort=high + thinking={type:enabled}（Python SDK 的 extra_body 会被合并到顶层 body）
        // - off：传一组业界常见 disable 字段——
        //        enable_thinking=false（DashScope / 阿里云百炼 Qwen）
        //        chat_template_kwargs.enable_thinking=false（vLLM Qwen3 部署惯例）
        //        provider 端识得就用、识不得就忽略；OpenAI 真身严格 schema 例外，
        //        OpenAI 真身请配 thinkingMode=auto 不要 off.
        // - auto：不传任何 thinking 字段 → 用 provider 默认行为（最稳的兼容选择）
        ModelThinkingMode mode = profile.resolveThinkingMode();
        if (mode == ModelThinkingMode.on) {
            payload.put("reasoning_effort", "high");
            Map<String, Object> thinking = new LinkedHashMap<>();
            thinking.put("type", "enabled");
            payload.put("thinking", thinking);
        } else if (mode == ModelThinkingMode.off) {
            payload.put("enable_thinking", false);
            Map<String, Object> tplKwargs = new LinkedHashMap<>();
            tplKwargs.put("enable_thinking", false);
            payload.put("chat_template_kwargs", tplKwargs);
        }
        // mode == auto → 不传字段
        return payload;
    }

    private List<Map<String, Object>> toOpenAiMessages(List<LobsterMessage> messages) {
        List<Map<String, Object>> list = new ArrayList<>(messages.size());
        for (LobsterMessage m : messages) {
            Map<String, Object> msg = new LinkedHashMap<>();
            msg.put("role", m.getRole().name());
            if (m.getRole() == MessageRole.tool) {
                msg.put("tool_call_id", m.getToolCallId());
                msg.put("name", m.getToolName());
                msg.put("content", m.getContent() == null ? "" : m.getContent());
            } else if (m.getRole() == MessageRole.assistant && m.hasToolCalls()) {
                // assistant 消息同时承载 content + tool_calls
                if (m.getContent() != null) msg.put("content", m.getContent());
                // thinking-mode 模型要求把历史 reasoning_content 发回（deepseek-v4-flash 等，
                // 不发回会返 400 "reasoning_content in thinking mode must be passed back"）
                if (m.hasReasoningContent()) msg.put("reasoning_content", m.getReasoningContent());
                List<Map<String, Object>> calls = new ArrayList<>();
                for (ToolCall tc : m.getToolCalls()) {
                    Map<String, Object> call = new LinkedHashMap<>();
                    call.put("id", tc.getId());
                    call.put("type", "function");
                    Map<String, Object> fn = new LinkedHashMap<>();
                    fn.put("name", tc.getName());
                    fn.put("arguments", tc.getArgumentsJson() == null ? "{}" : tc.getArgumentsJson());
                    call.put("function", fn);
                    calls.add(call);
                }
                msg.put("tool_calls", calls);
            } else if (m.getImageUrls() != null && !m.getImageUrls().isEmpty()) {
                List<Map<String, Object>> content = new ArrayList<>();
                Map<String, Object> textPart = new LinkedHashMap<>();
                textPart.put("type", "text");
                textPart.put("text", m.getContent() == null ? "" : m.getContent());
                content.add(textPart);
                for (String url : m.getImageUrls()) {
                    Map<String, Object> imgPart = new LinkedHashMap<>();
                    imgPart.put("type", "image_url");
                    Map<String, Object> img = new LinkedHashMap<>();
                    img.put("url", url);
                    imgPart.put("image_url", img);
                    content.add(imgPart);
                }
                msg.put("content", content);
            } else {
                msg.put("content", m.getContent() == null ? "" : m.getContent());
                // thinking-mode 无 tool_calls 的 assistant 消息也要回传 reasoning_content
                if (m.getRole() == MessageRole.assistant && m.hasReasoningContent()) {
                    msg.put("reasoning_content", m.getReasoningContent());
                }
            }
            list.add(msg);
        }
        return list;
    }

    private List<Map<String, Object>> toOpenAiTools(List<ToolSpec> tools) {
        List<Map<String, Object>> list = new ArrayList<>(tools.size());
        for (ToolSpec t : tools) {
            Map<String, Object> spec = new LinkedHashMap<>();
            spec.put("type", "function");
            Map<String, Object> fn = new LinkedHashMap<>();
            fn.put("name", t.getName());
            fn.put("description", t.getDescription());
            fn.put("parameters", t.getParametersSchema());
            spec.put("function", fn);
            list.add(spec);
        }
        return list;
    }

    private String doHttp(Map<String, Object> payload) {
        HttpURLConnection conn = null;
        try {
            String url = profile.getEndpoint();
            if (!url.contains("/chat/completions")) {
                // OpenAI 兼容客户端通行 base_url 形如 ".../v1"，配置里写到 /v1 是常见姿势；
                // 若已带 /v1 就只补 /chat/completions，避免拼成 /v1/v1/chat/completions 撞 404。
                String base = url.endsWith("/") ? url.substring(0, url.length() - 1) : url;
                url = base.endsWith("/v1") ? (base + "/chat/completions") : (base + "/v1/chat/completions");
            }
            URL target = new URL(url);
            conn = (HttpURLConnection) target.openConnection();
            conn.setConnectTimeout(profile.getFirstTokenTimeoutMs() == null ? 10000 : profile.getFirstTokenTimeoutMs());
            conn.setReadTimeout(profile.getTotalTimeoutMs() == null ? 120000 : profile.getTotalTimeoutMs());
            conn.setDoOutput(true);
            conn.setRequestMethod("POST");
            conn.setRequestProperty("Content-Type", "application/json; charset=utf-8");
            if (profile.getApiKey() != null && !profile.getApiKey().isEmpty()) {
                conn.setRequestProperty("Authorization", "Bearer " + profile.getApiKey());
            }
            try (DataOutputStream os = new DataOutputStream(conn.getOutputStream())) {
                os.write(JsonUtil.toJson(payload).getBytes(StandardCharsets.UTF_8));
            }
            int code = conn.getResponseCode();
            StringBuilder sb = new StringBuilder();
            try (BufferedReader br = new BufferedReader(new InputStreamReader(
                    code >= 200 && code < 300 ? conn.getInputStream() : conn.getErrorStream(),
                    StandardCharsets.UTF_8))) {
                String line;
                while ((line = br.readLine()) != null) sb.append(line).append('\n');
            }
            if (code >= 200 && code < 300) {
                return sb.toString();
            }
            throw new LobsterException("llm.http", "HTTP " + code + " -> " + sb);
        } catch (LobsterException e) {
            throw e;
        } catch (Exception e) {
            throw new LobsterException("llm.http", "OpenAI call failed: " + e.getMessage(), e);
        } finally {
            if (conn != null) conn.disconnect();
        }
    }

    @SuppressWarnings("unchecked")
    private LlmResponse parse(String raw) {
        Map<String, Object> json = JsonUtil.fromJsonToMap(raw);
        List<Map<String, Object>> choices = (List<Map<String, Object>>) json.get("choices");
        if (choices == null || choices.isEmpty()) {
            return new LlmResponse("", Collections.<ToolCall>emptyList(), 0, 0, "empty", profile.getModelId(), raw);
        }
        Map<String, Object> choice = choices.get(0);
        Map<String, Object> msg = (Map<String, Object>) choice.get("message");
        String content = msg.get("content") == null ? "" : String.valueOf(msg.get("content"));
        // thinking-mode 非流式响应也会在 message 里带 reasoning_content
        String reasoningContent = msg.get("reasoning_content") == null
                ? null : String.valueOf(msg.get("reasoning_content"));
        if (reasoningContent != null && reasoningContent.isEmpty()) reasoningContent = null;
        List<ToolCall> toolCalls = new ArrayList<>();
        Object rawCalls = msg.get("tool_calls");
        if (rawCalls instanceof List) {
            for (Object o : (List<Object>) rawCalls) {
                Map<String, Object> callMap = (Map<String, Object>) o;
                Map<String, Object> fn = (Map<String, Object>) callMap.get("function");
                String id = String.valueOf(callMap.get("id"));
                String name = String.valueOf(fn.get("name"));
                Object args = fn.get("arguments");
                String argStr = args == null ? "{}" : (args instanceof String ? (String) args : JsonUtil.toJson(args));
                toolCalls.add(new ToolCall(id, name, argStr));
            }
        }
        int inputTokens = 0, outputTokens = 0;
        Object usageObj = json.get("usage");
        if (usageObj instanceof Map) {
            Map<String, Object> usage = (Map<String, Object>) usageObj;
            inputTokens = promptTokenCount(usage);
            outputTokens = asInt(usage.get("completion_tokens"));
            CacheUsage cache = parseCacheUsage(usage, inputTokens);
            int cacheHitTokens = cache.hitTokens;
            int cacheMissTokens = cache.missTokens;
            if (inputTokens == 0) inputTokens = TokenEstimator.estimate(raw);
            if (outputTokens == 0) outputTokens = TokenEstimator.estimate(content);
            String finishReason = choice.get("finish_reason") == null ? null : String.valueOf(choice.get("finish_reason"));
            return new LlmResponse(content, toolCalls, inputTokens, outputTokens, finishReason,
                    profile.getModelId(), raw, reasoningContent, cacheHitTokens, cacheMissTokens);
        }
        if (inputTokens == 0) inputTokens = TokenEstimator.estimate(raw);
        if (outputTokens == 0) outputTokens = TokenEstimator.estimate(content);
        String finishReason = choice.get("finish_reason") == null ? null : String.valueOf(choice.get("finish_reason"));
        return new LlmResponse(content, toolCalls, inputTokens, outputTokens, finishReason,
                profile.getModelId(), raw, reasoningContent);
    }

    static int promptTokenCount(Map<String, Object> usage) {
        return firstPositiveInt(usage,
                "prompt_tokens",
                "input_tokens");
    }

    @SuppressWarnings("unchecked")
    private static CacheUsage parseCacheUsage(Map<String, Object> usage, int promptTokens) {
        if (usage == null) return new CacheUsage(0, 0);

        int hit = firstPositiveInt(usage,
                "prompt_cache_hit_tokens",
                "prompt_cache_read_tokens",
                "cache_hit_tokens",
                "cached_tokens",
                "cache_read_input_tokens");
        int miss = firstPositiveInt(usage,
                "prompt_cache_miss_tokens",
                "prompt_cache_write_tokens",
                "cache_miss_tokens",
                "cache_creation_input_tokens");

        Object detailsObj = usage.get("prompt_tokens_details");
        if (!(detailsObj instanceof Map)) detailsObj = usage.get("input_tokens_details");
        if (detailsObj instanceof Map) {
            Map<String, Object> details = (Map<String, Object>) detailsObj;
            if (hit <= 0) {
                hit = firstPositiveInt(details,
                        "cached_tokens",
                        "prompt_cache_hit_tokens",
                        "cache_read_input_tokens");
            }
            if (miss <= 0) {
                miss = firstPositiveInt(details,
                        "prompt_cache_miss_tokens",
                        "cache_creation_input_tokens");
            }
        }

        if (hit > 0 && miss <= 0 && promptTokens > hit) miss = promptTokens - hit;
        return new CacheUsage(hit, miss);
    }

    private static int firstPositiveInt(Map<String, Object> map, String... keys) {
        if (map == null || keys == null) return 0;
        for (String key : keys) {
            int n = asStaticInt(map.get(key));
            if (n > 0) return n;
        }
        return 0;
    }

    private static int asStaticInt(Object o) {
        if (o == null) return 0;
        if (o instanceof Number) return ((Number) o).intValue();
        try { return Integer.parseInt(String.valueOf(o)); } catch (Exception e) { return 0; }
    }

    private static final class CacheUsage {
        final int hitTokens;
        final int missTokens;
        CacheUsage(int hitTokens, int missTokens) {
            this.hitTokens = Math.max(0, hitTokens);
            this.missTokens = Math.max(0, missTokens);
        }
    }

    private int asInt(Object o) {
        if (o == null) return 0;
        if (o instanceof Number) return ((Number) o).intValue();
        try { return Integer.parseInt(String.valueOf(o)); } catch (Exception e) { return 0; }
    }
}
