diff --git a/docs/USAGE.md b/docs/USAGE.md index 0032f7e..9c8aa79 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -684,7 +684,7 @@ Platforms searched: - **YouTube** (`yt`) -- InnerTube search API (no auth required) - **Twitch** (`tw`) -- Public GQL endpoint: live streams and VODs (no auth required) -- **SearXNG** (`sx`) -- Local SearXNG instance (no auth required) +- **SearXNG** (`sx`) -- Local SearXNG instance, searches general/news/videos/social media categories filtered to last 24h (no auth required) - **Reddit** (`rd`) -- JSON search API, sorted by new, past week (no auth required) - **Mastodon** (`ft`) -- Public hashtag timeline across 4 instances (no auth required) diff --git a/plugins/alert.py b/plugins/alert.py index 8868477..b134776 100644 --- a/plugins/alert.py +++ b/plugins/alert.py @@ -337,31 +337,54 @@ def _search_twitch(keyword: str) -> list[dict]: # -- SearXNG search (blocking) ---------------------------------------------- +_SEARX_CATEGORIES = ["general", "news", "videos", "social media"] + + def _search_searx(keyword: str) -> list[dict]: - """Search SearXNG. Blocking.""" + """Search SearXNG across multiple categories, filtered to last day. Blocking.""" import urllib.parse - params = urllib.parse.urlencode({"q": keyword, "format": "json"}) - url = f"{_SEARX_URL}?{params}" - - req = urllib.request.Request(url, method="GET") - resp = urllib.request.urlopen(req, timeout=_FETCH_TIMEOUT) - raw = resp.read() - resp.close() - - data = json.loads(raw) results: list[dict] = [] - for item in data.get("results", []): - item_url = item.get("url", "") - title = item.get("title", "") - date = _parse_date(item.get("publishedDate") or "") - results.append({ - "id": item_url, - "title": title, - "url": item_url, - "date": date, - "extra": "", + seen_urls: set[str] = set() + + for category in _SEARX_CATEGORIES: + params = urllib.parse.urlencode({ + "q": keyword, + "format": "json", + "categories": category, + "time_range": "day", }) + url = f"{_SEARX_URL}?{params}" + + req = urllib.request.Request(url, method="GET") + try: + resp = urllib.request.urlopen(req, timeout=_FETCH_TIMEOUT) + raw = resp.read() + resp.close() + except Exception as exc: + _log.debug("searx category %s failed: %s", category, exc) + continue + + try: + data = json.loads(raw) + except json.JSONDecodeError: + continue + + for item in data.get("results", []): + item_url = item.get("url", "") + if not item_url or item_url in seen_urls: + continue + seen_urls.add(item_url) + title = item.get("title", "") + date = _parse_date(item.get("publishedDate") or "") + results.append({ + "id": item_url, + "title": title, + "url": item_url, + "date": date, + "extra": "", + }) + return results