from __future__ import annotations
import random
import time
from urllib.parse import urlparse

DEFAULT_INTERVAL_S = 5.0
JITTER_PCT = 0.20


class HostThrottle:
    """Per-host minimum interval gate. In-process only (Phase 2 will use Valkey)."""

    def __init__(
        self,
        interval_s: float | None = DEFAULT_INTERVAL_S,
        jitter_pct: float = JITTER_PCT,
    ):
        # Accept None explicitly — callers routinely pass `crawl_target.interval_sec`
        # which is nullable. Falling back to DEFAULT_INTERVAL_S avoids silent
        # TypeError("unsupported operand type(s) for *: 'NoneType' and 'float'")
        # on the second wait() call (first call skips math because last is None,
        # hiding the bug until an actual rate-limit hits).
        self.interval_s = DEFAULT_INTERVAL_S if interval_s is None else interval_s
        self.jitter_pct = jitter_pct
        self._last_by_host: dict[str, float] = {}

    def _host(self, url: str) -> str:
        return urlparse(url).netloc.lower()

    def wait(self, url: str, *, sleep=time.sleep, now=time.monotonic) -> float:
        host = self._host(url)
        now_t = now()
        last = self._last_by_host.get(host)
        sleep_s = 0.0
        if last is not None:
            jitter = 1.0 + random.uniform(-self.jitter_pct, self.jitter_pct)
            due = last + self.interval_s * jitter
            sleep_s = max(0.0, due - now_t)
            if sleep_s > 0:
                sleep(sleep_s)
        self._last_by_host[host] = now()
        return sleep_s
