feat: search SearXNG across categories with day filter
Query general, news, videos, and social media categories separately with time_range=day. Dedup results by URL across categories to avoid announcing the same item twice. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -684,7 +684,7 @@ Platforms searched:
|
|||||||
|
|
||||||
- **YouTube** (`yt`) -- InnerTube search API (no auth required)
|
- **YouTube** (`yt`) -- InnerTube search API (no auth required)
|
||||||
- **Twitch** (`tw`) -- Public GQL endpoint: live streams and VODs (no auth required)
|
- **Twitch** (`tw`) -- Public GQL endpoint: live streams and VODs (no auth required)
|
||||||
- **SearXNG** (`sx`) -- Local SearXNG instance (no auth required)
|
- **SearXNG** (`sx`) -- Local SearXNG instance, searches general/news/videos/social media categories filtered to last 24h (no auth required)
|
||||||
- **Reddit** (`rd`) -- JSON search API, sorted by new, past week (no auth required)
|
- **Reddit** (`rd`) -- JSON search API, sorted by new, past week (no auth required)
|
||||||
- **Mastodon** (`ft`) -- Public hashtag timeline across 4 instances (no auth required)
|
- **Mastodon** (`ft`) -- Public hashtag timeline across 4 instances (no auth required)
|
||||||
|
|
||||||
|
|||||||
@@ -337,31 +337,54 @@ def _search_twitch(keyword: str) -> list[dict]:
|
|||||||
|
|
||||||
# -- SearXNG search (blocking) ----------------------------------------------
|
# -- SearXNG search (blocking) ----------------------------------------------
|
||||||
|
|
||||||
|
_SEARX_CATEGORIES = ["general", "news", "videos", "social media"]
|
||||||
|
|
||||||
|
|
||||||
def _search_searx(keyword: str) -> list[dict]:
|
def _search_searx(keyword: str) -> list[dict]:
|
||||||
"""Search SearXNG. Blocking."""
|
"""Search SearXNG across multiple categories, filtered to last day. Blocking."""
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
params = urllib.parse.urlencode({"q": keyword, "format": "json"})
|
|
||||||
url = f"{_SEARX_URL}?{params}"
|
|
||||||
|
|
||||||
req = urllib.request.Request(url, method="GET")
|
|
||||||
resp = urllib.request.urlopen(req, timeout=_FETCH_TIMEOUT)
|
|
||||||
raw = resp.read()
|
|
||||||
resp.close()
|
|
||||||
|
|
||||||
data = json.loads(raw)
|
|
||||||
results: list[dict] = []
|
results: list[dict] = []
|
||||||
for item in data.get("results", []):
|
seen_urls: set[str] = set()
|
||||||
item_url = item.get("url", "")
|
|
||||||
title = item.get("title", "")
|
for category in _SEARX_CATEGORIES:
|
||||||
date = _parse_date(item.get("publishedDate") or "")
|
params = urllib.parse.urlencode({
|
||||||
results.append({
|
"q": keyword,
|
||||||
"id": item_url,
|
"format": "json",
|
||||||
"title": title,
|
"categories": category,
|
||||||
"url": item_url,
|
"time_range": "day",
|
||||||
"date": date,
|
|
||||||
"extra": "",
|
|
||||||
})
|
})
|
||||||
|
url = f"{_SEARX_URL}?{params}"
|
||||||
|
|
||||||
|
req = urllib.request.Request(url, method="GET")
|
||||||
|
try:
|
||||||
|
resp = urllib.request.urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||||
|
raw = resp.read()
|
||||||
|
resp.close()
|
||||||
|
except Exception as exc:
|
||||||
|
_log.debug("searx category %s failed: %s", category, exc)
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(raw)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for item in data.get("results", []):
|
||||||
|
item_url = item.get("url", "")
|
||||||
|
if not item_url or item_url in seen_urls:
|
||||||
|
continue
|
||||||
|
seen_urls.add(item_url)
|
||||||
|
title = item.get("title", "")
|
||||||
|
date = _parse_date(item.get("publishedDate") or "")
|
||||||
|
results.append({
|
||||||
|
"id": item_url,
|
||||||
|
"title": title,
|
||||||
|
"url": item_url,
|
||||||
|
"date": date,
|
||||||
|
"extra": "",
|
||||||
|
})
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user