feat: add Bluesky, Lemmy, Odysee, and Archive.org alert backends

Bluesky (bs) searches public post API, constructs bsky.app URLs
from at:// URIs. Lemmy (ly) queries 4 instances (lemmy.ml,
lemmy.world, programming.dev, infosec.pub) with cross-instance
dedup. Odysee (od) uses LBRY JSON-RPC claim_search for video,
audio, and documents with lbry:// to odysee.com URL conversion.
Archive.org (ia) searches via advanced search API sorted by date.
All routed through SOCKS5 proxy via _urlopen.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
user
2026-02-15 23:07:09 +01:00
parent 52c49609b3
commit f0b198d98a
3 changed files with 231 additions and 7 deletions

View File

@@ -345,12 +345,13 @@ No API credentials needed (uses public GQL endpoint).
!alert history <name> [n] # Show recent results (default 5)
```
Searches keywords across 10 backends: YouTube (yt), Twitch (tw), SearXNG (sx),
Searches keywords across 14 backends: YouTube (yt), Twitch (tw), SearXNG (sx),
Reddit (rd), Mastodon (ft), DuckDuckGo (dg), Google News (gn), Kick (kk),
Dailymotion (dm), PeerTube (pt). Names: lowercase alphanumeric + hyphens, 1-20
chars. Keywords: 1-100 chars. Max 20 alerts/channel. Polls every 5min.
Format: `[name/yt] Title -- URL`, etc. No API credentials needed. Persists across
restarts. History stored in `data/alert_history.db`.
Dailymotion (dm), PeerTube (pt), Bluesky (bs), Lemmy (ly), Odysee (od),
Archive.org (ia). Names: lowercase alphanumeric + hyphens, 1-20 chars. Keywords:
1-100 chars. Max 20 alerts/channel. Polls every 5min. Format: `[name/yt] Title -- URL`,
etc. No API credentials needed. Persists across restarts. History stored in
`data/alert_history.db`.
## SearX

View File

@@ -692,13 +692,17 @@ Platforms searched:
- **Kick** (`kk`) -- Public search API: channels and livestreams (no auth required)
- **Dailymotion** (`dm`) -- Public video API, sorted by recent (no auth required)
- **PeerTube** (`pt`) -- Federated video search across 4 instances (no auth required)
- **Bluesky** (`bs`) -- Public post search API via SOCKS5 proxy (no auth required)
- **Lemmy** (`ly`) -- Federated post search across 4 instances (no auth required)
- **Odysee** (`od`) -- LBRY JSON-RPC claim search: video, audio, documents (no auth required)
- **Archive.org** (`ia`) -- Internet Archive advanced search, sorted by date (no auth required)
Polling and announcements:
- Alerts are polled every 5 minutes by default
- On `add`, existing results are recorded without announcing (prevents flood)
- New results announced as `[name/<tag>] Title -- URL` where tag is `yt`, `tw`,
`sx`, `rd`, `ft`, `dg`, `gn`, `kk`, `dm`, or `pt`
- New results announced as `[name/<tag>] Title -- URL` where tag is one of:
`yt`, `tw`, `sx`, `rd`, `ft`, `dg`, `gn`, `kk`, `dm`, `pt`, `bs`, `ly`, `od`, `ia`
- Titles are truncated to 80 characters
- Each platform maintains its own seen list (capped at 200 per platform)
- 5 consecutive errors doubles the poll interval (max 1 hour)

View File

@@ -51,6 +51,16 @@ _PEERTUBE_INSTANCES = [
"diode.zone",
]
_PEERTUBE_TIMEOUT = 4
_BLUESKY_SEARCH_URL = "https://public.api.bsky.app/xrpc/app.bsky.feed.searchPosts"
_LEMMY_INSTANCES = [
"lemmy.ml",
"lemmy.world",
"programming.dev",
"infosec.pub",
]
_LEMMY_TIMEOUT = 4
_ODYSEE_API = "https://api.na-backend.odysee.com/api/v1/proxy"
_ARCHIVE_SEARCH_URL = "https://archive.org/advancedsearch.php"
# -- Module-level tracking ---------------------------------------------------
@@ -787,6 +797,211 @@ def _search_peertube(keyword: str) -> list[dict]:
return results
# -- Bluesky search (blocking) ----------------------------------------------
def _search_bluesky(keyword: str) -> list[dict]:
"""Search Bluesky via public search API. Blocking."""
import urllib.parse
params = urllib.parse.urlencode({"q": keyword, "limit": "25", "sort": "latest"})
url = f"{_BLUESKY_SEARCH_URL}?{params}"
req = urllib.request.Request(url, method="GET")
req.add_header("Accept", "application/json")
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
raw = resp.read()
resp.close()
data = json.loads(raw)
results: list[dict] = []
for post in data.get("posts") or []:
uri = post.get("uri", "")
if not uri:
continue
# Extract rkey from at:// URI for web URL
# URI format: at://did:plc:xxx/app.bsky.feed.post/rkey
rkey = uri.rsplit("/", 1)[-1] if "/" in uri else ""
author = post.get("author") or {}
handle = author.get("handle", "")
display = author.get("displayName") or handle
record = post.get("record") or {}
text = record.get("text", "")
title = f"@{display}: {_truncate(text, 60)}"
date = _parse_date(record.get("createdAt", ""))
post_url = f"https://bsky.app/profile/{handle}/post/{rkey}" if handle else ""
results.append({
"id": uri,
"title": title,
"url": post_url,
"date": date,
"extra": "",
})
return results
# -- Lemmy search (blocking) ------------------------------------------------
def _search_lemmy(keyword: str) -> list[dict]:
"""Search Lemmy instances via public API. Blocking."""
import urllib.parse
results: list[dict] = []
seen_ids: set[str] = set()
for instance in _LEMMY_INSTANCES:
params = urllib.parse.urlencode({
"q": keyword, "type_": "Posts", "sort": "New", "limit": "25",
})
api_url = f"https://{instance}/api/v3/search?{params}"
req = urllib.request.Request(api_url, method="GET")
req.add_header("Accept", "application/json")
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
try:
resp = _urlopen(req, timeout=_LEMMY_TIMEOUT)
raw = resp.read()
resp.close()
except Exception as exc:
_log.debug("lemmy %s failed: %s", instance, exc)
continue
try:
data = json.loads(raw)
except json.JSONDecodeError:
continue
for entry in data.get("posts") or []:
post = entry.get("post") or {}
ap_id = post.get("ap_id", "")
if not ap_id or ap_id in seen_ids:
continue
seen_ids.add(ap_id)
name = post.get("name", "")
community = (entry.get("community") or {}).get("name", "")
title = f"{community}: {name}" if community else name
date = _parse_date(post.get("published", ""))
# Use linked URL if present, otherwise the post's ap_id
post_url = post.get("url") or ap_id
results.append({
"id": ap_id,
"title": title,
"url": post_url,
"date": date,
"extra": "",
})
return results
# -- Odysee/LBRY search (blocking) ------------------------------------------
def _lbry_to_odysee_url(lbry_url: str) -> str:
"""Convert lbry:// URI to https://odysee.com/ web URL."""
if not lbry_url.startswith("lbry://"):
return lbry_url
return "https://odysee.com/" + lbry_url[7:].replace("#", ":")
def _search_odysee(keyword: str) -> list[dict]:
"""Search Odysee/LBRY via JSON-RPC claim_search. Blocking."""
payload = json.dumps({
"jsonrpc": "2.0",
"method": "claim_search",
"params": {
"text": keyword,
"order_by": ["release_time"],
"page_size": 25,
"stream_types": ["video", "audio", "document"],
},
"id": 1,
}).encode()
req = urllib.request.Request(
f"{_ODYSEE_API}?m=claim_search", data=payload, method="POST",
)
req.add_header("Content-Type", "application/json")
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
raw = resp.read()
resp.close()
data = json.loads(raw)
results: list[dict] = []
for item in (data.get("result") or {}).get("items") or []:
claim_id = item.get("claim_id", "")
if not claim_id:
continue
value = item.get("value") or {}
title = value.get("title", "")
canonical = item.get("canonical_url", "")
web_url = _lbry_to_odysee_url(canonical)
# Use block timestamp for date (release_time can be bogus)
timestamp = item.get("timestamp")
date = ""
if timestamp and isinstance(timestamp, int) and timestamp < 2000000000:
try:
date = datetime.fromtimestamp(
timestamp, tz=timezone.utc,
).strftime("%Y-%m-%d")
except (ValueError, OSError):
pass
results.append({
"id": claim_id,
"title": title,
"url": web_url,
"date": date,
"extra": "",
})
return results
# -- Archive.org search (blocking) ------------------------------------------
def _search_archive(keyword: str) -> list[dict]:
"""Search Archive.org via advanced search API. Blocking."""
import urllib.parse
params = urllib.parse.urlencode({
"q": keyword,
"output": "json",
"rows": "25",
"sort[]": "date desc",
"fl[]": "identifier,title,date,mediatype",
})
url = f"{_ARCHIVE_SEARCH_URL}?{params}"
req = urllib.request.Request(url, method="GET")
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
raw = resp.read()
resp.close()
data = json.loads(raw)
results: list[dict] = []
for doc in (data.get("response") or {}).get("docs") or []:
identifier = doc.get("identifier", "")
if not identifier:
continue
title = doc.get("title", "")
mediatype = doc.get("mediatype", "")
if mediatype:
title = f"[{mediatype}] {title}"
date = _parse_date(doc.get("date", ""))
results.append({
"id": identifier,
"title": title,
"url": f"https://archive.org/details/{identifier}",
"date": date,
"extra": "",
})
return results
# -- Backend registry -------------------------------------------------------
_BACKENDS: dict[str, callable] = {
@@ -800,6 +1015,10 @@ _BACKENDS: dict[str, callable] = {
"kk": _search_kick,
"dm": _search_dailymotion,
"pt": _search_peertube,
"bs": _search_bluesky,
"ly": _search_lemmy,
"od": _search_odysee,
"ia": _search_archive,
}