feat: search SearXNG across categories with day filter

Query general, news, videos, and social media categories separately with time_range=day. Dedup results by URL across categories to avoid announcing the same item twice. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 22:44:55 +01:00
parent f84723f66d
commit e70c22a510
2 changed files with 44 additions and 21 deletions
--- a/plugins/alert.py
+++ b/plugins/alert.py
@@ -337,31 +337,54 @@ def _search_twitch(keyword: str) -> list[dict]:

 # -- SearXNG search (blocking) ----------------------------------------------

+_SEARX_CATEGORIES = ["general", "news", "videos", "social media"]
+
+
 def _search_searx(keyword: str) -> list[dict]:
-    """Search SearXNG. Blocking."""
+    """Search SearXNG across multiple categories, filtered to last day. Blocking."""
    import urllib.parse

-    params = urllib.parse.urlencode({"q": keyword, "format": "json"})
-    url = f"{_SEARX_URL}?{params}"
-
-    req = urllib.request.Request(url, method="GET")
-    resp = urllib.request.urlopen(req, timeout=_FETCH_TIMEOUT)
-    raw = resp.read()
-    resp.close()
-
-    data = json.loads(raw)
    results: list[dict] = []
-    for item in data.get("results", []):
-        item_url = item.get("url", "")
-        title = item.get("title", "")
-        date = _parse_date(item.get("publishedDate") or "")
-        results.append({
-            "id": item_url,
-            "title": title,
-            "url": item_url,
-            "date": date,
-            "extra": "",
+    seen_urls: set[str] = set()
+
+    for category in _SEARX_CATEGORIES:
+        params = urllib.parse.urlencode({
+            "q": keyword,
+            "format": "json",
+            "categories": category,
+            "time_range": "day",
        })
+        url = f"{_SEARX_URL}?{params}"
+
+        req = urllib.request.Request(url, method="GET")
+        try:
+            resp = urllib.request.urlopen(req, timeout=_FETCH_TIMEOUT)
+            raw = resp.read()
+            resp.close()
+        except Exception as exc:
+            _log.debug("searx category %s failed: %s", category, exc)
+            continue
+
+        try:
+            data = json.loads(raw)
+        except json.JSONDecodeError:
+            continue
+
+        for item in data.get("results", []):
+            item_url = item.get("url", "")
+            if not item_url or item_url in seen_urls:
+                continue
+            seen_urls.add(item_url)
+            title = item.get("title", "")
+            date = _parse_date(item.get("publishedDate") or "")
+            results.append({
+                "id": item_url,
+                "title": title,
+                "url": item_url,
+                "date": date,
+                "extra": "",
+            })
+
    return results