feat: add Bluesky, Lemmy, Odysee, and Archive.org alert backends
Bluesky (bs) searches public post API, constructs bsky.app URLs from at:// URIs. Lemmy (ly) queries 4 instances (lemmy.ml, lemmy.world, programming.dev, infosec.pub) with cross-instance dedup. Odysee (od) uses LBRY JSON-RPC claim_search for video, audio, and documents with lbry:// to odysee.com URL conversion. Archive.org (ia) searches via advanced search API sorted by date. All routed through SOCKS5 proxy via _urlopen. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -345,12 +345,13 @@ No API credentials needed (uses public GQL endpoint).
|
|||||||
!alert history <name> [n] # Show recent results (default 5)
|
!alert history <name> [n] # Show recent results (default 5)
|
||||||
```
|
```
|
||||||
|
|
||||||
Searches keywords across 10 backends: YouTube (yt), Twitch (tw), SearXNG (sx),
|
Searches keywords across 14 backends: YouTube (yt), Twitch (tw), SearXNG (sx),
|
||||||
Reddit (rd), Mastodon (ft), DuckDuckGo (dg), Google News (gn), Kick (kk),
|
Reddit (rd), Mastodon (ft), DuckDuckGo (dg), Google News (gn), Kick (kk),
|
||||||
Dailymotion (dm), PeerTube (pt). Names: lowercase alphanumeric + hyphens, 1-20
|
Dailymotion (dm), PeerTube (pt), Bluesky (bs), Lemmy (ly), Odysee (od),
|
||||||
chars. Keywords: 1-100 chars. Max 20 alerts/channel. Polls every 5min.
|
Archive.org (ia). Names: lowercase alphanumeric + hyphens, 1-20 chars. Keywords:
|
||||||
Format: `[name/yt] Title -- URL`, etc. No API credentials needed. Persists across
|
1-100 chars. Max 20 alerts/channel. Polls every 5min. Format: `[name/yt] Title -- URL`,
|
||||||
restarts. History stored in `data/alert_history.db`.
|
etc. No API credentials needed. Persists across restarts. History stored in
|
||||||
|
`data/alert_history.db`.
|
||||||
|
|
||||||
## SearX
|
## SearX
|
||||||
|
|
||||||
|
|||||||
@@ -692,13 +692,17 @@ Platforms searched:
|
|||||||
- **Kick** (`kk`) -- Public search API: channels and livestreams (no auth required)
|
- **Kick** (`kk`) -- Public search API: channels and livestreams (no auth required)
|
||||||
- **Dailymotion** (`dm`) -- Public video API, sorted by recent (no auth required)
|
- **Dailymotion** (`dm`) -- Public video API, sorted by recent (no auth required)
|
||||||
- **PeerTube** (`pt`) -- Federated video search across 4 instances (no auth required)
|
- **PeerTube** (`pt`) -- Federated video search across 4 instances (no auth required)
|
||||||
|
- **Bluesky** (`bs`) -- Public post search API via SOCKS5 proxy (no auth required)
|
||||||
|
- **Lemmy** (`ly`) -- Federated post search across 4 instances (no auth required)
|
||||||
|
- **Odysee** (`od`) -- LBRY JSON-RPC claim search: video, audio, documents (no auth required)
|
||||||
|
- **Archive.org** (`ia`) -- Internet Archive advanced search, sorted by date (no auth required)
|
||||||
|
|
||||||
Polling and announcements:
|
Polling and announcements:
|
||||||
|
|
||||||
- Alerts are polled every 5 minutes by default
|
- Alerts are polled every 5 minutes by default
|
||||||
- On `add`, existing results are recorded without announcing (prevents flood)
|
- On `add`, existing results are recorded without announcing (prevents flood)
|
||||||
- New results announced as `[name/<tag>] Title -- URL` where tag is `yt`, `tw`,
|
- New results announced as `[name/<tag>] Title -- URL` where tag is one of:
|
||||||
`sx`, `rd`, `ft`, `dg`, `gn`, `kk`, `dm`, or `pt`
|
`yt`, `tw`, `sx`, `rd`, `ft`, `dg`, `gn`, `kk`, `dm`, `pt`, `bs`, `ly`, `od`, `ia`
|
||||||
- Titles are truncated to 80 characters
|
- Titles are truncated to 80 characters
|
||||||
- Each platform maintains its own seen list (capped at 200 per platform)
|
- Each platform maintains its own seen list (capped at 200 per platform)
|
||||||
- 5 consecutive errors doubles the poll interval (max 1 hour)
|
- 5 consecutive errors doubles the poll interval (max 1 hour)
|
||||||
|
|||||||
219
plugins/alert.py
219
plugins/alert.py
@@ -51,6 +51,16 @@ _PEERTUBE_INSTANCES = [
|
|||||||
"diode.zone",
|
"diode.zone",
|
||||||
]
|
]
|
||||||
_PEERTUBE_TIMEOUT = 4
|
_PEERTUBE_TIMEOUT = 4
|
||||||
|
_BLUESKY_SEARCH_URL = "https://public.api.bsky.app/xrpc/app.bsky.feed.searchPosts"
|
||||||
|
_LEMMY_INSTANCES = [
|
||||||
|
"lemmy.ml",
|
||||||
|
"lemmy.world",
|
||||||
|
"programming.dev",
|
||||||
|
"infosec.pub",
|
||||||
|
]
|
||||||
|
_LEMMY_TIMEOUT = 4
|
||||||
|
_ODYSEE_API = "https://api.na-backend.odysee.com/api/v1/proxy"
|
||||||
|
_ARCHIVE_SEARCH_URL = "https://archive.org/advancedsearch.php"
|
||||||
|
|
||||||
# -- Module-level tracking ---------------------------------------------------
|
# -- Module-level tracking ---------------------------------------------------
|
||||||
|
|
||||||
@@ -787,6 +797,211 @@ def _search_peertube(keyword: str) -> list[dict]:
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
# -- Bluesky search (blocking) ----------------------------------------------
|
||||||
|
|
||||||
|
def _search_bluesky(keyword: str) -> list[dict]:
|
||||||
|
"""Search Bluesky via public search API. Blocking."""
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
params = urllib.parse.urlencode({"q": keyword, "limit": "25", "sort": "latest"})
|
||||||
|
url = f"{_BLUESKY_SEARCH_URL}?{params}"
|
||||||
|
|
||||||
|
req = urllib.request.Request(url, method="GET")
|
||||||
|
req.add_header("Accept", "application/json")
|
||||||
|
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
|
||||||
|
|
||||||
|
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||||
|
raw = resp.read()
|
||||||
|
resp.close()
|
||||||
|
|
||||||
|
data = json.loads(raw)
|
||||||
|
results: list[dict] = []
|
||||||
|
for post in data.get("posts") or []:
|
||||||
|
uri = post.get("uri", "")
|
||||||
|
if not uri:
|
||||||
|
continue
|
||||||
|
# Extract rkey from at:// URI for web URL
|
||||||
|
# URI format: at://did:plc:xxx/app.bsky.feed.post/rkey
|
||||||
|
rkey = uri.rsplit("/", 1)[-1] if "/" in uri else ""
|
||||||
|
author = post.get("author") or {}
|
||||||
|
handle = author.get("handle", "")
|
||||||
|
display = author.get("displayName") or handle
|
||||||
|
record = post.get("record") or {}
|
||||||
|
text = record.get("text", "")
|
||||||
|
title = f"@{display}: {_truncate(text, 60)}"
|
||||||
|
date = _parse_date(record.get("createdAt", ""))
|
||||||
|
post_url = f"https://bsky.app/profile/{handle}/post/{rkey}" if handle else ""
|
||||||
|
results.append({
|
||||||
|
"id": uri,
|
||||||
|
"title": title,
|
||||||
|
"url": post_url,
|
||||||
|
"date": date,
|
||||||
|
"extra": "",
|
||||||
|
})
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
# -- Lemmy search (blocking) ------------------------------------------------
|
||||||
|
|
||||||
|
def _search_lemmy(keyword: str) -> list[dict]:
|
||||||
|
"""Search Lemmy instances via public API. Blocking."""
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
results: list[dict] = []
|
||||||
|
seen_ids: set[str] = set()
|
||||||
|
|
||||||
|
for instance in _LEMMY_INSTANCES:
|
||||||
|
params = urllib.parse.urlencode({
|
||||||
|
"q": keyword, "type_": "Posts", "sort": "New", "limit": "25",
|
||||||
|
})
|
||||||
|
api_url = f"https://{instance}/api/v3/search?{params}"
|
||||||
|
|
||||||
|
req = urllib.request.Request(api_url, method="GET")
|
||||||
|
req.add_header("Accept", "application/json")
|
||||||
|
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
|
||||||
|
try:
|
||||||
|
resp = _urlopen(req, timeout=_LEMMY_TIMEOUT)
|
||||||
|
raw = resp.read()
|
||||||
|
resp.close()
|
||||||
|
except Exception as exc:
|
||||||
|
_log.debug("lemmy %s failed: %s", instance, exc)
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(raw)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for entry in data.get("posts") or []:
|
||||||
|
post = entry.get("post") or {}
|
||||||
|
ap_id = post.get("ap_id", "")
|
||||||
|
if not ap_id or ap_id in seen_ids:
|
||||||
|
continue
|
||||||
|
seen_ids.add(ap_id)
|
||||||
|
|
||||||
|
name = post.get("name", "")
|
||||||
|
community = (entry.get("community") or {}).get("name", "")
|
||||||
|
title = f"{community}: {name}" if community else name
|
||||||
|
date = _parse_date(post.get("published", ""))
|
||||||
|
# Use linked URL if present, otherwise the post's ap_id
|
||||||
|
post_url = post.get("url") or ap_id
|
||||||
|
results.append({
|
||||||
|
"id": ap_id,
|
||||||
|
"title": title,
|
||||||
|
"url": post_url,
|
||||||
|
"date": date,
|
||||||
|
"extra": "",
|
||||||
|
})
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
# -- Odysee/LBRY search (blocking) ------------------------------------------
|
||||||
|
|
||||||
|
def _lbry_to_odysee_url(lbry_url: str) -> str:
|
||||||
|
"""Convert lbry:// URI to https://odysee.com/ web URL."""
|
||||||
|
if not lbry_url.startswith("lbry://"):
|
||||||
|
return lbry_url
|
||||||
|
return "https://odysee.com/" + lbry_url[7:].replace("#", ":")
|
||||||
|
|
||||||
|
|
||||||
|
def _search_odysee(keyword: str) -> list[dict]:
|
||||||
|
"""Search Odysee/LBRY via JSON-RPC claim_search. Blocking."""
|
||||||
|
payload = json.dumps({
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"method": "claim_search",
|
||||||
|
"params": {
|
||||||
|
"text": keyword,
|
||||||
|
"order_by": ["release_time"],
|
||||||
|
"page_size": 25,
|
||||||
|
"stream_types": ["video", "audio", "document"],
|
||||||
|
},
|
||||||
|
"id": 1,
|
||||||
|
}).encode()
|
||||||
|
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{_ODYSEE_API}?m=claim_search", data=payload, method="POST",
|
||||||
|
)
|
||||||
|
req.add_header("Content-Type", "application/json")
|
||||||
|
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
|
||||||
|
|
||||||
|
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||||
|
raw = resp.read()
|
||||||
|
resp.close()
|
||||||
|
|
||||||
|
data = json.loads(raw)
|
||||||
|
results: list[dict] = []
|
||||||
|
for item in (data.get("result") or {}).get("items") or []:
|
||||||
|
claim_id = item.get("claim_id", "")
|
||||||
|
if not claim_id:
|
||||||
|
continue
|
||||||
|
value = item.get("value") or {}
|
||||||
|
title = value.get("title", "")
|
||||||
|
canonical = item.get("canonical_url", "")
|
||||||
|
web_url = _lbry_to_odysee_url(canonical)
|
||||||
|
# Use block timestamp for date (release_time can be bogus)
|
||||||
|
timestamp = item.get("timestamp")
|
||||||
|
date = ""
|
||||||
|
if timestamp and isinstance(timestamp, int) and timestamp < 2000000000:
|
||||||
|
try:
|
||||||
|
date = datetime.fromtimestamp(
|
||||||
|
timestamp, tz=timezone.utc,
|
||||||
|
).strftime("%Y-%m-%d")
|
||||||
|
except (ValueError, OSError):
|
||||||
|
pass
|
||||||
|
results.append({
|
||||||
|
"id": claim_id,
|
||||||
|
"title": title,
|
||||||
|
"url": web_url,
|
||||||
|
"date": date,
|
||||||
|
"extra": "",
|
||||||
|
})
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
# -- Archive.org search (blocking) ------------------------------------------
|
||||||
|
|
||||||
|
def _search_archive(keyword: str) -> list[dict]:
|
||||||
|
"""Search Archive.org via advanced search API. Blocking."""
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
params = urllib.parse.urlencode({
|
||||||
|
"q": keyword,
|
||||||
|
"output": "json",
|
||||||
|
"rows": "25",
|
||||||
|
"sort[]": "date desc",
|
||||||
|
"fl[]": "identifier,title,date,mediatype",
|
||||||
|
})
|
||||||
|
url = f"{_ARCHIVE_SEARCH_URL}?{params}"
|
||||||
|
|
||||||
|
req = urllib.request.Request(url, method="GET")
|
||||||
|
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
|
||||||
|
|
||||||
|
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||||
|
raw = resp.read()
|
||||||
|
resp.close()
|
||||||
|
|
||||||
|
data = json.loads(raw)
|
||||||
|
results: list[dict] = []
|
||||||
|
for doc in (data.get("response") or {}).get("docs") or []:
|
||||||
|
identifier = doc.get("identifier", "")
|
||||||
|
if not identifier:
|
||||||
|
continue
|
||||||
|
title = doc.get("title", "")
|
||||||
|
mediatype = doc.get("mediatype", "")
|
||||||
|
if mediatype:
|
||||||
|
title = f"[{mediatype}] {title}"
|
||||||
|
date = _parse_date(doc.get("date", ""))
|
||||||
|
results.append({
|
||||||
|
"id": identifier,
|
||||||
|
"title": title,
|
||||||
|
"url": f"https://archive.org/details/{identifier}",
|
||||||
|
"date": date,
|
||||||
|
"extra": "",
|
||||||
|
})
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
# -- Backend registry -------------------------------------------------------
|
# -- Backend registry -------------------------------------------------------
|
||||||
|
|
||||||
_BACKENDS: dict[str, callable] = {
|
_BACKENDS: dict[str, callable] = {
|
||||||
@@ -800,6 +1015,10 @@ _BACKENDS: dict[str, callable] = {
|
|||||||
"kk": _search_kick,
|
"kk": _search_kick,
|
||||||
"dm": _search_dailymotion,
|
"dm": _search_dailymotion,
|
||||||
"pt": _search_peertube,
|
"pt": _search_peertube,
|
||||||
|
"bs": _search_bluesky,
|
||||||
|
"ly": _search_lemmy,
|
||||||
|
"od": _search_odysee,
|
||||||
|
"ia": _search_archive,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user