"""Hard-coded blacklist of path fragments we refuse to crawl (COMP-03).

Even if a site's YAML points a column at one of these, we reject. Only
explicit public-info paths belong in the crawler's reach.
"""
from __future__ import annotations

from urllib.parse import urlparse

FORBIDDEN_PATH_PATTERNS: tuple[str, ...] = (
    "/admin",
    "/api/internal",
    "/internal/",
    "/backstage/",
    "/manage/",
    "/login",
    "/logout",
    "/user/",
    "/account/",
)


def is_public_path(url: str) -> bool:
    """Return False if the URL's path matches any forbidden fragment."""
    path = urlparse(url).path.lower()
    return not any(frag in path for frag in FORBIDDEN_PATH_PATTERNS)
