feat: search SearXNG across categories with day filter

Query general, news, videos, and social media categories
separately with time_range=day. Dedup results by URL across
categories to avoid announcing the same item twice.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
user
2026-02-15 22:44:55 +01:00
parent f84723f66d
commit e70c22a510
2 changed files with 44 additions and 21 deletions

View File

@@ -337,31 +337,54 @@ def _search_twitch(keyword: str) -> list[dict]:
# -- SearXNG search (blocking) ----------------------------------------------
_SEARX_CATEGORIES = ["general", "news", "videos", "social media"]
def _search_searx(keyword: str) -> list[dict]:
"""Search SearXNG. Blocking."""
"""Search SearXNG across multiple categories, filtered to last day. Blocking."""
import urllib.parse
params = urllib.parse.urlencode({"q": keyword, "format": "json"})
url = f"{_SEARX_URL}?{params}"
req = urllib.request.Request(url, method="GET")
resp = urllib.request.urlopen(req, timeout=_FETCH_TIMEOUT)
raw = resp.read()
resp.close()
data = json.loads(raw)
results: list[dict] = []
for item in data.get("results", []):
item_url = item.get("url", "")
title = item.get("title", "")
date = _parse_date(item.get("publishedDate") or "")
results.append({
"id": item_url,
"title": title,
"url": item_url,
"date": date,
"extra": "",
seen_urls: set[str] = set()
for category in _SEARX_CATEGORIES:
params = urllib.parse.urlencode({
"q": keyword,
"format": "json",
"categories": category,
"time_range": "day",
})
url = f"{_SEARX_URL}?{params}"
req = urllib.request.Request(url, method="GET")
try:
resp = urllib.request.urlopen(req, timeout=_FETCH_TIMEOUT)
raw = resp.read()
resp.close()
except Exception as exc:
_log.debug("searx category %s failed: %s", category, exc)
continue
try:
data = json.loads(raw)
except json.JSONDecodeError:
continue
for item in data.get("results", []):
item_url = item.get("url", "")
if not item_url or item_url in seen_urls:
continue
seen_urls.add(item_url)
title = item.get("title", "")
date = _parse_date(item.get("publishedDate") or "")
results.append({
"id": item_url,
"title": title,
"url": item_url,
"date": date,
"extra": "",
})
return results