"""Time-stamp 过滤工具。

职责：
- 丢弃那些文本为纯标点且 `rl` 为 None 的片段。
- 根据配置项 `enable_detailed_log` 控制是否输出详细日志。

该模块设计为幂等、无副作用：接受一个 time_stamps 列表并返回过滤后的新列表。
"""
from pathlib import Path
import json
import logging
from typing import List, Dict

logger = logging.getLogger(__name__)

_config_path = Path(__file__).resolve().parents[2] / "gzzm_config.json"


def _load_gzzm_config() -> dict:
    try:
        with _config_path.open("r", encoding="utf-8") as f:
            return json.load(f)
    except Exception:
        return {}


def _is_pure_punctuation(text: str) -> bool:
    if not text:
        return False
    s = text.strip()
    if not s:
        return False
    # 如果字符串中存在任一字母或数字（包含中文字符），则不是纯标点
    for ch in s:
        try:
            if ch.isalnum():
                return False
        except Exception:
            # 保守处理，若无法判断则保留该字符
            return False
    return True


def filter_time_stamps(time_stamps: List[Dict]) -> List[Dict]:
    """过滤输入的 time_stamps 列表。

    规则：当单条 time_stamp 的 `text` 为纯标点且 `rl` 为 None 时将其丢弃。
    返回新的列表（不在原列表上就地修改）。
    同时根据配置 `enable_detailed_log` 输出被丢弃片段的简短日志。
    """
    cfg = _load_gzzm_config()
    detailed = str(cfg.get("enable_detailed_log", False)).strip().lower() in ("1", "true", "yes", "on")

    if not time_stamps:
        return time_stamps

    kept: List[Dict] = []
    removed: List[Dict] = []
    for seg in time_stamps:
        txt = seg.get("text") or ""
        rl = seg.get("rl") if "rl" in seg else None
        if _is_pure_punctuation(txt) and (rl is None):
            removed.append(seg)
        else:
            kept.append(seg)

    if detailed and removed:
        try:
            logger.info("time_stamp_filter: removed %d pure-punctuation segments with null rl", len(removed))
            for s in removed:
                logger.info(
                    "removed: start=%s end=%s text=%r rl=%r",
                    s.get("start"),
                    s.get("end"),
                    s.get("text"),
                    s.get("rl"),
                )
        except Exception:
            # 日志不应影响正常流程
            pass

    return kept
