from __future__ import annotations
from dataclasses import dataclass
from urllib.parse import urljoin
from parsel import Selector


@dataclass
class ListItem:
    url: str
    title: str
    publish_time_raw: str  # 原始串；datetime 解析留给 Plan 03 的 storage 层


def parse_list(html: str, base_url: str, selectors: dict) -> list[ListItem]:
    """Extract list rows from a column index page.

    selectors keys:
      row   : CSS for each row element
      href  : CSS relative to row, ending with ::attr(href)
      title : CSS relative to row, ending with ::text
      date  : CSS relative to row, ending with ::text
    """
    sel = Selector(text=html or "")
    items: list[ListItem] = []
    rows = sel.css(selectors["row"])
    for row in rows:
        href = row.css(selectors["href"]).get()
        if not href:
            continue
        title = (row.css(selectors["title"]).get() or "").strip()
        date = (row.css(selectors["date"]).get() or "").strip()
        items.append(
            ListItem(
                url=urljoin(base_url, href),
                title=title,
                publish_time_raw=date,
            )
        )
    return items
