"""说话人匹配服务。

职责概览：
- 对单段 PCM 音频执行 speaker 匹配/注册并返回 rl 与相似度。
- 提供 AudioSegment 到 PCM 原始字节的转换方法，供流水线调用。
"""

import io
import json
from pathlib import Path
from typing import Optional, Tuple

import numpy as np
from pydub import AudioSegment

from ..utils.speaker_id import SpeakerRegistry

_config_path = Path(__file__).resolve().parents[2] / "gzzm_config.json"
def _load_gzzm_config() -> dict:
    try:
        with _config_path.open("r", encoding="utf-8") as _f:
            return json.load(_f)
    except Exception:
        return {}


def _to_bool(value: object, default: bool) -> bool:
    if isinstance(value, bool):
        return value
    if isinstance(value, str):
        v = value.strip().lower()
        if v in ("1", "true", "yes", "on"):
            return True
        if v in ("0", "false", "no", "off"):
            return False
    if isinstance(value, (int, float)):
        return bool(value)
    return default




def _select_most_informative_pcm_window(
    pcm_bytes: bytes,
    sample_rate: int,
    channels: int,
    target_seconds: float = 5.0,
) -> bytes:
    """从完整 PCM 中选择信息量更高的窗口（优先配置时长，不足则返回全段）。"""
    if not pcm_bytes:
        return pcm_bytes
    if sample_rate <= 0 or channels <= 0:
        return pcm_bytes

    target_seconds = max(0.1, float(target_seconds))
    target_samples = int(round(target_seconds * float(sample_rate)))

    pcm = np.frombuffer(pcm_bytes, dtype=np.int16)
    usable = (pcm.size // channels) * channels
    if usable <= 0:
        return pcm_bytes
    if usable != pcm.size:
        pcm = pcm[:usable]

    total_samples = usable // channels
    if total_samples <= target_samples:
        return pcm.tobytes()

    mono = pcm.reshape(-1, channels).astype(np.float32).mean(axis=1) / 32768.0
    frame_samples = max(1, int(round(float(sample_rate) * 0.02)))  # 20ms
    frame_count = mono.size // frame_samples
    if frame_count < 2:
        return pcm.tobytes()

    trimmed = mono[: frame_count * frame_samples]
    frames = trimmed.reshape(frame_count, frame_samples)
    rms = np.sqrt(np.mean(np.square(frames), axis=1) + 1e-12)

    noise_floor = float(np.percentile(rms, 20))
    peak = float(np.percentile(rms, 95))
    activity_threshold = max(1e-4, noise_floor + 0.2 * max(0.0, peak - noise_floor))
    activity = (rms >= activity_threshold).astype(np.float32)

    window_frames = max(1, target_samples // frame_samples)
    if window_frames >= frame_count:
        return pcm.tobytes()

    # 用活动占比 + 能量均值综合打分，挑选更可能包含有效语音的窗口。
    c_activity = np.concatenate(([0.0], np.cumsum(activity)))
    c_rms = np.concatenate(([0.0], np.cumsum(rms)))

    active_ratio = (c_activity[window_frames:] - c_activity[:-window_frames]) / float(window_frames)
    mean_rms = (c_rms[window_frames:] - c_rms[:-window_frames]) / float(window_frames)
    norm_energy = np.clip(mean_rms / max(peak, 1e-8), 0.0, 1.0)
    score = (0.7 * active_ratio) + (0.3 * norm_energy)

    best_frame = int(np.argmax(score))
    start_sample = best_frame * frame_samples
    end_sample = min(total_samples, start_sample + target_samples)

    start_i16 = start_sample * channels
    end_i16 = end_sample * channels
    if end_i16 <= start_i16:
        return pcm.tobytes()

    return pcm[start_i16:end_i16].tobytes()


def identify_speaker_id_from_pcm(
    pcm_bytes: bytes,
    sample_rate: int,
    channels: int,
    registry: SpeakerRegistry,
    device: str,
) -> Tuple[Optional[str], Optional[float]]:
    """为单段 PCM 音频识别或注册说话人 ID。

    参数:
    - pcm_bytes (bytes): 原始 PCM 音频数据（假定 16-bit、每样本 2 字节、交错声道）。
    - sample_rate (int): 采样率（Hz）。
    - channels (int): 声道数。
    - registry (SpeakerRegistry): 用于匹配/注册说话人的注册器实例。
    - device (str): 设备标识（传递给 registry）。

    返回:
    - Tuple[Optional[str], Optional[float]]: (speaker_id, similarity)
      - speaker_id: 匹配或新注册的说话人 ID（字符串），无法匹配或出错时为 None。
      - similarity: 相似度分数（浮点），如果不可用则为 None。

    实现细节:
    - 若 `pcm_bytes` 为空，函数会直接返回 (None, None)。
    - 时长通过 `len(pcm_bytes) / (sample_rate * channels * 2)` 估算（假定每样本 2 字节）。
    - 可通过配置 `speaker_use_informative_window` 开关控制是否启用“信息量窗口挑选”。
    - 当输入长于配置窗口 `speaker_match_window_seconds` 时，会先自动挑选信息量更高的窗口（尽量避开静音）再做匹配。
    - 当输入短于配置窗口时，会使用实际可用的整段音频做匹配（不会因“最短阈值”跳过）。
    - 调用 `registry.register_or_match(...)` 获取匹配结果；若抛出 ImportError 或其他异常，函数会捕获并返回 (None, None)。
    """
    if not pcm_bytes:
        return None, None

    cfg = _load_gzzm_config()
    # 默认的短窗口与用于首次基准的初始窗口（秒）
    try:
        default_match_seconds = float(cfg.get("speaker_match_window_seconds", 5.0))
    except Exception:
        default_match_seconds = 5.0
    try:
        initial_baseline_seconds = float(cfg.get("speaker_initial_baseline_seconds", 10.0))
    except Exception:
        initial_baseline_seconds = 10.0
    use_informative = _to_bool(cfg.get("speaker_use_informative_window", True), True)

    # 如果当前注册表为空（会话初次基准），优先使用 10s 初始基准窗口（且强制启用 informative）
    try:
        reg_count = int(registry.stats().get("count", 0))
    except Exception:
        reg_count = 0
    if reg_count == 0:
        target_seconds = initial_baseline_seconds
        use_informative = True
    else:
        target_seconds = default_match_seconds

    if use_informative:
        pcm_for_match = _select_most_informative_pcm_window(
            pcm_bytes=pcm_bytes,
            sample_rate=sample_rate,
            channels=channels,
            target_seconds=target_seconds,
        )
    else:
        pcm_for_match = pcm_bytes

    try:
        matched = registry.register_or_match(
            pcm_for_match,
            sample_rate=sample_rate,
            channels=channels,
            device=device,
            return_sim=True,
        )
    except ImportError:
        return None, None
    except Exception:
        return None, None

    if not isinstance(matched, tuple):
        return str(matched), None

    cand_id = matched[0] if len(matched) > 0 else None
    cand_sim = matched[1] if len(matched) > 1 else None

    if cand_id in (None, -1):
        return None, None

    try:
        sim = float(cand_sim) if cand_sim is not None else None
    except Exception:
        sim = None

    return str(cand_id), sim


def audio_segment_to_pcm_bytes(segment: AudioSegment) -> bytes:
    """将 pydub `AudioSegment` 导出为原始 PCM 字节序列（raw 格式）。

    返回的 bytes 为原始 PCM 数据；调用者可通过 `segment.frame_rate`、`segment.channels`、
    `segment.sample_width` 获取对应的音频元信息（采样率、声道数、样本宽度）。
    """
    buf = io.BytesIO()
    segment.export(buf, format="raw")
    return buf.getvalue()