feat: add Bluesky, Lemmy, Odysee, and Archive.org alert backends
Bluesky (bs) searches public post API, constructs bsky.app URLs from at:// URIs. Lemmy (ly) queries 4 instances (lemmy.ml, lemmy.world, programming.dev, infosec.pub) with cross-instance dedup. Odysee (od) uses LBRY JSON-RPC claim_search for video, audio, and documents with lbry:// to odysee.com URL conversion. Archive.org (ia) searches via advanced search API sorted by date. All routed through SOCKS5 proxy via _urlopen. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -345,12 +345,13 @@ No API credentials needed (uses public GQL endpoint).
|
||||
!alert history <name> [n] # Show recent results (default 5)
|
||||
```
|
||||
|
||||
Searches keywords across 10 backends: YouTube (yt), Twitch (tw), SearXNG (sx),
|
||||
Searches keywords across 14 backends: YouTube (yt), Twitch (tw), SearXNG (sx),
|
||||
Reddit (rd), Mastodon (ft), DuckDuckGo (dg), Google News (gn), Kick (kk),
|
||||
Dailymotion (dm), PeerTube (pt). Names: lowercase alphanumeric + hyphens, 1-20
|
||||
chars. Keywords: 1-100 chars. Max 20 alerts/channel. Polls every 5min.
|
||||
Format: `[name/yt] Title -- URL`, etc. No API credentials needed. Persists across
|
||||
restarts. History stored in `data/alert_history.db`.
|
||||
Dailymotion (dm), PeerTube (pt), Bluesky (bs), Lemmy (ly), Odysee (od),
|
||||
Archive.org (ia). Names: lowercase alphanumeric + hyphens, 1-20 chars. Keywords:
|
||||
1-100 chars. Max 20 alerts/channel. Polls every 5min. Format: `[name/yt] Title -- URL`,
|
||||
etc. No API credentials needed. Persists across restarts. History stored in
|
||||
`data/alert_history.db`.
|
||||
|
||||
## SearX
|
||||
|
||||
|
||||
@@ -692,13 +692,17 @@ Platforms searched:
|
||||
- **Kick** (`kk`) -- Public search API: channels and livestreams (no auth required)
|
||||
- **Dailymotion** (`dm`) -- Public video API, sorted by recent (no auth required)
|
||||
- **PeerTube** (`pt`) -- Federated video search across 4 instances (no auth required)
|
||||
- **Bluesky** (`bs`) -- Public post search API via SOCKS5 proxy (no auth required)
|
||||
- **Lemmy** (`ly`) -- Federated post search across 4 instances (no auth required)
|
||||
- **Odysee** (`od`) -- LBRY JSON-RPC claim search: video, audio, documents (no auth required)
|
||||
- **Archive.org** (`ia`) -- Internet Archive advanced search, sorted by date (no auth required)
|
||||
|
||||
Polling and announcements:
|
||||
|
||||
- Alerts are polled every 5 minutes by default
|
||||
- On `add`, existing results are recorded without announcing (prevents flood)
|
||||
- New results announced as `[name/<tag>] Title -- URL` where tag is `yt`, `tw`,
|
||||
`sx`, `rd`, `ft`, `dg`, `gn`, `kk`, `dm`, or `pt`
|
||||
- New results announced as `[name/<tag>] Title -- URL` where tag is one of:
|
||||
`yt`, `tw`, `sx`, `rd`, `ft`, `dg`, `gn`, `kk`, `dm`, `pt`, `bs`, `ly`, `od`, `ia`
|
||||
- Titles are truncated to 80 characters
|
||||
- Each platform maintains its own seen list (capped at 200 per platform)
|
||||
- 5 consecutive errors doubles the poll interval (max 1 hour)
|
||||
|
||||
219
plugins/alert.py
219
plugins/alert.py
@@ -51,6 +51,16 @@ _PEERTUBE_INSTANCES = [
|
||||
"diode.zone",
|
||||
]
|
||||
_PEERTUBE_TIMEOUT = 4
|
||||
_BLUESKY_SEARCH_URL = "https://public.api.bsky.app/xrpc/app.bsky.feed.searchPosts"
|
||||
_LEMMY_INSTANCES = [
|
||||
"lemmy.ml",
|
||||
"lemmy.world",
|
||||
"programming.dev",
|
||||
"infosec.pub",
|
||||
]
|
||||
_LEMMY_TIMEOUT = 4
|
||||
_ODYSEE_API = "https://api.na-backend.odysee.com/api/v1/proxy"
|
||||
_ARCHIVE_SEARCH_URL = "https://archive.org/advancedsearch.php"
|
||||
|
||||
# -- Module-level tracking ---------------------------------------------------
|
||||
|
||||
@@ -787,6 +797,211 @@ def _search_peertube(keyword: str) -> list[dict]:
|
||||
return results
|
||||
|
||||
|
||||
# -- Bluesky search (blocking) ----------------------------------------------
|
||||
|
||||
def _search_bluesky(keyword: str) -> list[dict]:
|
||||
"""Search Bluesky via public search API. Blocking."""
|
||||
import urllib.parse
|
||||
|
||||
params = urllib.parse.urlencode({"q": keyword, "limit": "25", "sort": "latest"})
|
||||
url = f"{_BLUESKY_SEARCH_URL}?{params}"
|
||||
|
||||
req = urllib.request.Request(url, method="GET")
|
||||
req.add_header("Accept", "application/json")
|
||||
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
|
||||
|
||||
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||
raw = resp.read()
|
||||
resp.close()
|
||||
|
||||
data = json.loads(raw)
|
||||
results: list[dict] = []
|
||||
for post in data.get("posts") or []:
|
||||
uri = post.get("uri", "")
|
||||
if not uri:
|
||||
continue
|
||||
# Extract rkey from at:// URI for web URL
|
||||
# URI format: at://did:plc:xxx/app.bsky.feed.post/rkey
|
||||
rkey = uri.rsplit("/", 1)[-1] if "/" in uri else ""
|
||||
author = post.get("author") or {}
|
||||
handle = author.get("handle", "")
|
||||
display = author.get("displayName") or handle
|
||||
record = post.get("record") or {}
|
||||
text = record.get("text", "")
|
||||
title = f"@{display}: {_truncate(text, 60)}"
|
||||
date = _parse_date(record.get("createdAt", ""))
|
||||
post_url = f"https://bsky.app/profile/{handle}/post/{rkey}" if handle else ""
|
||||
results.append({
|
||||
"id": uri,
|
||||
"title": title,
|
||||
"url": post_url,
|
||||
"date": date,
|
||||
"extra": "",
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
# -- Lemmy search (blocking) ------------------------------------------------
|
||||
|
||||
def _search_lemmy(keyword: str) -> list[dict]:
|
||||
"""Search Lemmy instances via public API. Blocking."""
|
||||
import urllib.parse
|
||||
|
||||
results: list[dict] = []
|
||||
seen_ids: set[str] = set()
|
||||
|
||||
for instance in _LEMMY_INSTANCES:
|
||||
params = urllib.parse.urlencode({
|
||||
"q": keyword, "type_": "Posts", "sort": "New", "limit": "25",
|
||||
})
|
||||
api_url = f"https://{instance}/api/v3/search?{params}"
|
||||
|
||||
req = urllib.request.Request(api_url, method="GET")
|
||||
req.add_header("Accept", "application/json")
|
||||
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
|
||||
try:
|
||||
resp = _urlopen(req, timeout=_LEMMY_TIMEOUT)
|
||||
raw = resp.read()
|
||||
resp.close()
|
||||
except Exception as exc:
|
||||
_log.debug("lemmy %s failed: %s", instance, exc)
|
||||
continue
|
||||
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
for entry in data.get("posts") or []:
|
||||
post = entry.get("post") or {}
|
||||
ap_id = post.get("ap_id", "")
|
||||
if not ap_id or ap_id in seen_ids:
|
||||
continue
|
||||
seen_ids.add(ap_id)
|
||||
|
||||
name = post.get("name", "")
|
||||
community = (entry.get("community") or {}).get("name", "")
|
||||
title = f"{community}: {name}" if community else name
|
||||
date = _parse_date(post.get("published", ""))
|
||||
# Use linked URL if present, otherwise the post's ap_id
|
||||
post_url = post.get("url") or ap_id
|
||||
results.append({
|
||||
"id": ap_id,
|
||||
"title": title,
|
||||
"url": post_url,
|
||||
"date": date,
|
||||
"extra": "",
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# -- Odysee/LBRY search (blocking) ------------------------------------------
|
||||
|
||||
def _lbry_to_odysee_url(lbry_url: str) -> str:
|
||||
"""Convert lbry:// URI to https://odysee.com/ web URL."""
|
||||
if not lbry_url.startswith("lbry://"):
|
||||
return lbry_url
|
||||
return "https://odysee.com/" + lbry_url[7:].replace("#", ":")
|
||||
|
||||
|
||||
def _search_odysee(keyword: str) -> list[dict]:
|
||||
"""Search Odysee/LBRY via JSON-RPC claim_search. Blocking."""
|
||||
payload = json.dumps({
|
||||
"jsonrpc": "2.0",
|
||||
"method": "claim_search",
|
||||
"params": {
|
||||
"text": keyword,
|
||||
"order_by": ["release_time"],
|
||||
"page_size": 25,
|
||||
"stream_types": ["video", "audio", "document"],
|
||||
},
|
||||
"id": 1,
|
||||
}).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{_ODYSEE_API}?m=claim_search", data=payload, method="POST",
|
||||
)
|
||||
req.add_header("Content-Type", "application/json")
|
||||
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
|
||||
|
||||
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||
raw = resp.read()
|
||||
resp.close()
|
||||
|
||||
data = json.loads(raw)
|
||||
results: list[dict] = []
|
||||
for item in (data.get("result") or {}).get("items") or []:
|
||||
claim_id = item.get("claim_id", "")
|
||||
if not claim_id:
|
||||
continue
|
||||
value = item.get("value") or {}
|
||||
title = value.get("title", "")
|
||||
canonical = item.get("canonical_url", "")
|
||||
web_url = _lbry_to_odysee_url(canonical)
|
||||
# Use block timestamp for date (release_time can be bogus)
|
||||
timestamp = item.get("timestamp")
|
||||
date = ""
|
||||
if timestamp and isinstance(timestamp, int) and timestamp < 2000000000:
|
||||
try:
|
||||
date = datetime.fromtimestamp(
|
||||
timestamp, tz=timezone.utc,
|
||||
).strftime("%Y-%m-%d")
|
||||
except (ValueError, OSError):
|
||||
pass
|
||||
results.append({
|
||||
"id": claim_id,
|
||||
"title": title,
|
||||
"url": web_url,
|
||||
"date": date,
|
||||
"extra": "",
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
# -- Archive.org search (blocking) ------------------------------------------
|
||||
|
||||
def _search_archive(keyword: str) -> list[dict]:
|
||||
"""Search Archive.org via advanced search API. Blocking."""
|
||||
import urllib.parse
|
||||
|
||||
params = urllib.parse.urlencode({
|
||||
"q": keyword,
|
||||
"output": "json",
|
||||
"rows": "25",
|
||||
"sort[]": "date desc",
|
||||
"fl[]": "identifier,title,date,mediatype",
|
||||
})
|
||||
url = f"{_ARCHIVE_SEARCH_URL}?{params}"
|
||||
|
||||
req = urllib.request.Request(url, method="GET")
|
||||
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
|
||||
|
||||
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||
raw = resp.read()
|
||||
resp.close()
|
||||
|
||||
data = json.loads(raw)
|
||||
results: list[dict] = []
|
||||
for doc in (data.get("response") or {}).get("docs") or []:
|
||||
identifier = doc.get("identifier", "")
|
||||
if not identifier:
|
||||
continue
|
||||
title = doc.get("title", "")
|
||||
mediatype = doc.get("mediatype", "")
|
||||
if mediatype:
|
||||
title = f"[{mediatype}] {title}"
|
||||
date = _parse_date(doc.get("date", ""))
|
||||
results.append({
|
||||
"id": identifier,
|
||||
"title": title,
|
||||
"url": f"https://archive.org/details/{identifier}",
|
||||
"date": date,
|
||||
"extra": "",
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
# -- Backend registry -------------------------------------------------------
|
||||
|
||||
_BACKENDS: dict[str, callable] = {
|
||||
@@ -800,6 +1015,10 @@ _BACKENDS: dict[str, callable] = {
|
||||
"kk": _search_kick,
|
||||
"dm": _search_dailymotion,
|
||||
"pt": _search_peertube,
|
||||
"bs": _search_bluesky,
|
||||
"ly": _search_lemmy,
|
||||
"od": _search_odysee,
|
||||
"ia": _search_archive,
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user