from __future__ import annotations
from simhash import Simhash


def content_simhash(text: str) -> str:
    """64-bit SimHash, returned as 16 hex chars. 中文 2-gram, 见 RESEARCH #Dont Hand-Roll."""
    if not text:
        return "0" * 16
    # 同时保留英文 word 切分 + 中文 2-gram
    features: list[str] = []
    # 2-gram over full string (works for CJK and fine for mixed text)
    features.extend(text[i : i + 2] for i in range(len(text) - 1) if text[i : i + 2].strip())
    if not features:
        return "0" * 16
    sh = Simhash(features, f=64)
    return f"{sh.value:016x}"
