feat: search SearXNG across categories with day filter
Query general, news, videos, and social media categories separately with time_range=day. Dedup results by URL across categories to avoid announcing the same item twice. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -337,31 +337,54 @@ def _search_twitch(keyword: str) -> list[dict]:
|
||||
|
||||
# -- SearXNG search (blocking) ----------------------------------------------
|
||||
|
||||
_SEARX_CATEGORIES = ["general", "news", "videos", "social media"]
|
||||
|
||||
|
||||
def _search_searx(keyword: str) -> list[dict]:
|
||||
"""Search SearXNG. Blocking."""
|
||||
"""Search SearXNG across multiple categories, filtered to last day. Blocking."""
|
||||
import urllib.parse
|
||||
|
||||
params = urllib.parse.urlencode({"q": keyword, "format": "json"})
|
||||
url = f"{_SEARX_URL}?{params}"
|
||||
|
||||
req = urllib.request.Request(url, method="GET")
|
||||
resp = urllib.request.urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||
raw = resp.read()
|
||||
resp.close()
|
||||
|
||||
data = json.loads(raw)
|
||||
results: list[dict] = []
|
||||
for item in data.get("results", []):
|
||||
item_url = item.get("url", "")
|
||||
title = item.get("title", "")
|
||||
date = _parse_date(item.get("publishedDate") or "")
|
||||
results.append({
|
||||
"id": item_url,
|
||||
"title": title,
|
||||
"url": item_url,
|
||||
"date": date,
|
||||
"extra": "",
|
||||
seen_urls: set[str] = set()
|
||||
|
||||
for category in _SEARX_CATEGORIES:
|
||||
params = urllib.parse.urlencode({
|
||||
"q": keyword,
|
||||
"format": "json",
|
||||
"categories": category,
|
||||
"time_range": "day",
|
||||
})
|
||||
url = f"{_SEARX_URL}?{params}"
|
||||
|
||||
req = urllib.request.Request(url, method="GET")
|
||||
try:
|
||||
resp = urllib.request.urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||
raw = resp.read()
|
||||
resp.close()
|
||||
except Exception as exc:
|
||||
_log.debug("searx category %s failed: %s", category, exc)
|
||||
continue
|
||||
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
for item in data.get("results", []):
|
||||
item_url = item.get("url", "")
|
||||
if not item_url or item_url in seen_urls:
|
||||
continue
|
||||
seen_urls.add(item_url)
|
||||
title = item.get("title", "")
|
||||
date = _parse_date(item.get("publishedDate") or "")
|
||||
results.append({
|
||||
"id": item_url,
|
||||
"title": title,
|
||||
"url": item_url,
|
||||
"date": date,
|
||||
"extra": "",
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user