From f0b198d98ae4c32b1eb8c64578bb3d77cb483ae2 Mon Sep 17 00:00:00 2001 From: user Date: Sun, 15 Feb 2026 23:07:09 +0100 Subject: [PATCH] feat: add Bluesky, Lemmy, Odysee, and Archive.org alert backends Bluesky (bs) searches public post API, constructs bsky.app URLs from at:// URIs. Lemmy (ly) queries 4 instances (lemmy.ml, lemmy.world, programming.dev, infosec.pub) with cross-instance dedup. Odysee (od) uses LBRY JSON-RPC claim_search for video, audio, and documents with lbry:// to odysee.com URL conversion. Archive.org (ia) searches via advanced search API sorted by date. All routed through SOCKS5 proxy via _urlopen. Co-Authored-By: Claude Opus 4.6 --- docs/CHEATSHEET.md | 11 +-- docs/USAGE.md | 8 +- plugins/alert.py | 219 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 231 insertions(+), 7 deletions(-) diff --git a/docs/CHEATSHEET.md b/docs/CHEATSHEET.md index cb4f612..fb58640 100644 --- a/docs/CHEATSHEET.md +++ b/docs/CHEATSHEET.md @@ -345,12 +345,13 @@ No API credentials needed (uses public GQL endpoint). !alert history [n] # Show recent results (default 5) ``` -Searches keywords across 10 backends: YouTube (yt), Twitch (tw), SearXNG (sx), +Searches keywords across 14 backends: YouTube (yt), Twitch (tw), SearXNG (sx), Reddit (rd), Mastodon (ft), DuckDuckGo (dg), Google News (gn), Kick (kk), -Dailymotion (dm), PeerTube (pt). Names: lowercase alphanumeric + hyphens, 1-20 -chars. Keywords: 1-100 chars. Max 20 alerts/channel. Polls every 5min. -Format: `[name/yt] Title -- URL`, etc. No API credentials needed. Persists across -restarts. History stored in `data/alert_history.db`. +Dailymotion (dm), PeerTube (pt), Bluesky (bs), Lemmy (ly), Odysee (od), +Archive.org (ia). Names: lowercase alphanumeric + hyphens, 1-20 chars. Keywords: +1-100 chars. Max 20 alerts/channel. Polls every 5min. Format: `[name/yt] Title -- URL`, +etc. No API credentials needed. Persists across restarts. History stored in +`data/alert_history.db`. ## SearX diff --git a/docs/USAGE.md b/docs/USAGE.md index a0b163e..a7110d6 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -692,13 +692,17 @@ Platforms searched: - **Kick** (`kk`) -- Public search API: channels and livestreams (no auth required) - **Dailymotion** (`dm`) -- Public video API, sorted by recent (no auth required) - **PeerTube** (`pt`) -- Federated video search across 4 instances (no auth required) +- **Bluesky** (`bs`) -- Public post search API via SOCKS5 proxy (no auth required) +- **Lemmy** (`ly`) -- Federated post search across 4 instances (no auth required) +- **Odysee** (`od`) -- LBRY JSON-RPC claim search: video, audio, documents (no auth required) +- **Archive.org** (`ia`) -- Internet Archive advanced search, sorted by date (no auth required) Polling and announcements: - Alerts are polled every 5 minutes by default - On `add`, existing results are recorded without announcing (prevents flood) -- New results announced as `[name/] Title -- URL` where tag is `yt`, `tw`, - `sx`, `rd`, `ft`, `dg`, `gn`, `kk`, `dm`, or `pt` +- New results announced as `[name/] Title -- URL` where tag is one of: + `yt`, `tw`, `sx`, `rd`, `ft`, `dg`, `gn`, `kk`, `dm`, `pt`, `bs`, `ly`, `od`, `ia` - Titles are truncated to 80 characters - Each platform maintains its own seen list (capped at 200 per platform) - 5 consecutive errors doubles the poll interval (max 1 hour) diff --git a/plugins/alert.py b/plugins/alert.py index 10a9c16..c526f97 100644 --- a/plugins/alert.py +++ b/plugins/alert.py @@ -51,6 +51,16 @@ _PEERTUBE_INSTANCES = [ "diode.zone", ] _PEERTUBE_TIMEOUT = 4 +_BLUESKY_SEARCH_URL = "https://public.api.bsky.app/xrpc/app.bsky.feed.searchPosts" +_LEMMY_INSTANCES = [ + "lemmy.ml", + "lemmy.world", + "programming.dev", + "infosec.pub", +] +_LEMMY_TIMEOUT = 4 +_ODYSEE_API = "https://api.na-backend.odysee.com/api/v1/proxy" +_ARCHIVE_SEARCH_URL = "https://archive.org/advancedsearch.php" # -- Module-level tracking --------------------------------------------------- @@ -787,6 +797,211 @@ def _search_peertube(keyword: str) -> list[dict]: return results +# -- Bluesky search (blocking) ---------------------------------------------- + +def _search_bluesky(keyword: str) -> list[dict]: + """Search Bluesky via public search API. Blocking.""" + import urllib.parse + + params = urllib.parse.urlencode({"q": keyword, "limit": "25", "sort": "latest"}) + url = f"{_BLUESKY_SEARCH_URL}?{params}" + + req = urllib.request.Request(url, method="GET") + req.add_header("Accept", "application/json") + req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)") + + resp = _urlopen(req, timeout=_FETCH_TIMEOUT) + raw = resp.read() + resp.close() + + data = json.loads(raw) + results: list[dict] = [] + for post in data.get("posts") or []: + uri = post.get("uri", "") + if not uri: + continue + # Extract rkey from at:// URI for web URL + # URI format: at://did:plc:xxx/app.bsky.feed.post/rkey + rkey = uri.rsplit("/", 1)[-1] if "/" in uri else "" + author = post.get("author") or {} + handle = author.get("handle", "") + display = author.get("displayName") or handle + record = post.get("record") or {} + text = record.get("text", "") + title = f"@{display}: {_truncate(text, 60)}" + date = _parse_date(record.get("createdAt", "")) + post_url = f"https://bsky.app/profile/{handle}/post/{rkey}" if handle else "" + results.append({ + "id": uri, + "title": title, + "url": post_url, + "date": date, + "extra": "", + }) + return results + + +# -- Lemmy search (blocking) ------------------------------------------------ + +def _search_lemmy(keyword: str) -> list[dict]: + """Search Lemmy instances via public API. Blocking.""" + import urllib.parse + + results: list[dict] = [] + seen_ids: set[str] = set() + + for instance in _LEMMY_INSTANCES: + params = urllib.parse.urlencode({ + "q": keyword, "type_": "Posts", "sort": "New", "limit": "25", + }) + api_url = f"https://{instance}/api/v3/search?{params}" + + req = urllib.request.Request(api_url, method="GET") + req.add_header("Accept", "application/json") + req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)") + try: + resp = _urlopen(req, timeout=_LEMMY_TIMEOUT) + raw = resp.read() + resp.close() + except Exception as exc: + _log.debug("lemmy %s failed: %s", instance, exc) + continue + + try: + data = json.loads(raw) + except json.JSONDecodeError: + continue + + for entry in data.get("posts") or []: + post = entry.get("post") or {} + ap_id = post.get("ap_id", "") + if not ap_id or ap_id in seen_ids: + continue + seen_ids.add(ap_id) + + name = post.get("name", "") + community = (entry.get("community") or {}).get("name", "") + title = f"{community}: {name}" if community else name + date = _parse_date(post.get("published", "")) + # Use linked URL if present, otherwise the post's ap_id + post_url = post.get("url") or ap_id + results.append({ + "id": ap_id, + "title": title, + "url": post_url, + "date": date, + "extra": "", + }) + + return results + + +# -- Odysee/LBRY search (blocking) ------------------------------------------ + +def _lbry_to_odysee_url(lbry_url: str) -> str: + """Convert lbry:// URI to https://odysee.com/ web URL.""" + if not lbry_url.startswith("lbry://"): + return lbry_url + return "https://odysee.com/" + lbry_url[7:].replace("#", ":") + + +def _search_odysee(keyword: str) -> list[dict]: + """Search Odysee/LBRY via JSON-RPC claim_search. Blocking.""" + payload = json.dumps({ + "jsonrpc": "2.0", + "method": "claim_search", + "params": { + "text": keyword, + "order_by": ["release_time"], + "page_size": 25, + "stream_types": ["video", "audio", "document"], + }, + "id": 1, + }).encode() + + req = urllib.request.Request( + f"{_ODYSEE_API}?m=claim_search", data=payload, method="POST", + ) + req.add_header("Content-Type", "application/json") + req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)") + + resp = _urlopen(req, timeout=_FETCH_TIMEOUT) + raw = resp.read() + resp.close() + + data = json.loads(raw) + results: list[dict] = [] + for item in (data.get("result") or {}).get("items") or []: + claim_id = item.get("claim_id", "") + if not claim_id: + continue + value = item.get("value") or {} + title = value.get("title", "") + canonical = item.get("canonical_url", "") + web_url = _lbry_to_odysee_url(canonical) + # Use block timestamp for date (release_time can be bogus) + timestamp = item.get("timestamp") + date = "" + if timestamp and isinstance(timestamp, int) and timestamp < 2000000000: + try: + date = datetime.fromtimestamp( + timestamp, tz=timezone.utc, + ).strftime("%Y-%m-%d") + except (ValueError, OSError): + pass + results.append({ + "id": claim_id, + "title": title, + "url": web_url, + "date": date, + "extra": "", + }) + return results + + +# -- Archive.org search (blocking) ------------------------------------------ + +def _search_archive(keyword: str) -> list[dict]: + """Search Archive.org via advanced search API. Blocking.""" + import urllib.parse + + params = urllib.parse.urlencode({ + "q": keyword, + "output": "json", + "rows": "25", + "sort[]": "date desc", + "fl[]": "identifier,title,date,mediatype", + }) + url = f"{_ARCHIVE_SEARCH_URL}?{params}" + + req = urllib.request.Request(url, method="GET") + req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)") + + resp = _urlopen(req, timeout=_FETCH_TIMEOUT) + raw = resp.read() + resp.close() + + data = json.loads(raw) + results: list[dict] = [] + for doc in (data.get("response") or {}).get("docs") or []: + identifier = doc.get("identifier", "") + if not identifier: + continue + title = doc.get("title", "") + mediatype = doc.get("mediatype", "") + if mediatype: + title = f"[{mediatype}] {title}" + date = _parse_date(doc.get("date", "")) + results.append({ + "id": identifier, + "title": title, + "url": f"https://archive.org/details/{identifier}", + "date": date, + "extra": "", + }) + return results + + # -- Backend registry ------------------------------------------------------- _BACKENDS: dict[str, callable] = { @@ -800,6 +1015,10 @@ _BACKENDS: dict[str, callable] = { "kk": _search_kick, "dm": _search_dailymotion, "pt": _search_peertube, + "bs": _search_bluesky, + "ly": _search_lemmy, + "od": _search_odysee, + "ia": _search_archive, }