feat: add Reddit and Mastodon backends to alert plugin

Search Reddit posts (rd) via JSON API and Mastodon hashtag timelines (ft) across 4 fediverse instances. Both public, no auth required. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 22:42:06 +01:00
parent 83a1d37b98
commit f84723f66d
3 changed files with 131 additions and 13 deletions
--- a/docs/CHEATSHEET.md
+++ b/docs/CHEATSHEET.md
@@ -345,10 +345,10 @@ No API credentials needed (uses public GQL endpoint).
 !alert history <name> [n]        # Show recent results (default 5)
 ```

-Searches keywords across YouTube (InnerTube), Twitch (GQL), and SearXNG simultaneously.
-Names: lowercase alphanumeric + hyphens, 1-20 chars. Keywords: 1-100 chars.
-Max 20 alerts/channel. Polls every 5min. Max 5 announcements per platform per cycle.
-Format: `[name/yt] Title -- URL`, `[name/tw] Title -- URL`, or `[name/sx] Title -- URL`.
+Searches keywords across YouTube (yt), Twitch (tw), SearXNG (sx), Reddit (rd),
+and Mastodon/Fediverse (ft) simultaneously. Names: lowercase alphanumeric + hyphens,
+1-20 chars. Keywords: 1-100 chars. Max 20 alerts/channel. Polls every 5min.
+Format: `[name/yt] Title -- URL`, `[name/rd] Title -- URL`, etc.
 No API credentials needed. Persists across restarts. History stored in `data/alert_history.db`.

 ## SearX
--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -660,10 +660,10 @@ Title Three -- https://example.com/page3

 ### `!alert` -- Keyword Alert Subscriptions

-Search keywords across multiple platforms (YouTube, Twitch, SearXNG) and
-announce new results. Unlike `!rss`/`!yt`/`!twitch` which follow specific
-channels/feeds, `!alert` searches keywords across all supported platforms
-simultaneously.
+Search keywords across multiple platforms (YouTube, Twitch, SearXNG, Reddit,
+Mastodon/Fediverse) and announce new results. Unlike `!rss`/`!yt`/`!twitch`
+which follow specific channels/feeds, `!alert` searches keywords across all
+supported platforms simultaneously.

 ```
 !alert add <name> <keyword...>   Add keyword alert (admin)
@@ -682,17 +682,18 @@ simultaneously.

 Platforms searched:

- **YouTube** -- InnerTube search API (no auth required)
- **Twitch** -- Public GQL endpoint: live streams and VODs (no auth required)
- **SearXNG** -- Local SearXNG instance (no auth required)
+- **YouTube** (`yt`) -- InnerTube search API (no auth required)
+- **Twitch** (`tw`) -- Public GQL endpoint: live streams and VODs (no auth required)
+- **SearXNG** (`sx`) -- Local SearXNG instance (no auth required)
+- **Reddit** (`rd`) -- JSON search API, sorted by new, past week (no auth required)
+- **Mastodon** (`ft`) -- Public hashtag timeline across 4 instances (no auth required)

 Polling and announcements:

 - Alerts are polled every 5 minutes by default
 - On `add`, existing results are recorded without announcing (prevents flood)
 - New results announced as `[name/yt] Title -- URL`, `[name/tw] Title -- URL`,
-  or `[name/sx] Title -- URL`
- Maximum 5 items announced per platform per poll; excess shown as `... and N more`
+  `[name/sx] Title -- URL`, `[name/rd] Title -- URL`, or `[name/ft] Title -- URL`
 - Titles are truncated to 80 characters
 - Each platform maintains its own seen list (capped at 200 per platform)
 - 5 consecutive errors doubles the poll interval (max 1 hour)
--- a/plugins/alert.py
+++ b/plugins/alert.py
@@ -32,6 +32,14 @@ _YT_CLIENT_VERSION = "2.20250101.00.00"
 _GQL_URL = "https://gql.twitch.tv/gql"
 _GQL_CLIENT_ID = "kimne78kx3ncx6brgo4mv6wki5h1ko"
 _SEARX_URL = "https://searx.mymx.me/search"
+_REDDIT_SEARCH_URL = "https://old.reddit.com/search.json"
+_MASTODON_INSTANCES = [
+    "mastodon.social",
+    "fosstodon.org",
+    "hachyderm.io",
+    "infosec.exchange",
+]
+_MASTODON_TAG_TIMEOUT = 4

 # -- Module-level tracking ---------------------------------------------------

@@ -153,6 +161,11 @@ def _parse_date(raw: str) -> str:
    return m.group(0) if m else ""


+def _strip_html(text: str) -> str:
+    """Remove HTML tags from text."""
+    return re.sub(r"<[^>]+>", "", text).strip()
+
+
 def _fetch_og(url: str) -> tuple[str, str, str]:
    """Fetch og:title, og:description, and published date from a URL.

@@ -352,12 +365,116 @@ def _search_searx(keyword: str) -> list[dict]:
    return results


+# -- Reddit search (blocking) ------------------------------------------------
+
+def _search_reddit(keyword: str) -> list[dict]:
+    """Search Reddit via JSON API. Blocking."""
+    import urllib.parse
+
+    params = urllib.parse.urlencode({
+        "q": keyword, "sort": "new", "limit": "25", "t": "week",
+    })
+    url = f"{_REDDIT_SEARCH_URL}?{params}"
+
+    req = urllib.request.Request(url, method="GET")
+    req.add_header("User-Agent", "derp-bot/1.0 (IRC keyword alert)")
+
+    resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
+    raw = resp.read()
+    resp.close()
+
+    data = json.loads(raw)
+    results: list[dict] = []
+    for child in (data.get("data") or {}).get("children") or []:
+        post = child.get("data") or {}
+        post_id = post.get("name", "")
+        permalink = post.get("permalink", "")
+        title = post.get("title", "")
+        created = post.get("created_utc")
+        date = ""
+        if created:
+            try:
+                date = datetime.fromtimestamp(
+                    float(created), tz=timezone.utc,
+                ).strftime("%Y-%m-%d")
+            except (ValueError, OSError):
+                pass
+        results.append({
+            "id": post_id,
+            "title": title,
+            "url": f"https://www.reddit.com{permalink}" if permalink else "",
+            "date": date,
+            "extra": "",
+        })
+    return results
+
+
+# -- Mastodon/Fediverse search (blocking) -----------------------------------
+
+def _search_mastodon(keyword: str) -> list[dict]:
+    """Search Mastodon instances via public hashtag timeline. Blocking."""
+    import urllib.parse
+
+    # Sanitize keyword to alphanumeric for hashtag search
+    hashtag = re.sub(r"[^a-zA-Z0-9]", "", keyword).lower()
+    if not hashtag:
+        return []
+
+    results: list[dict] = []
+    seen_urls: set[str] = set()
+
+    for instance in _MASTODON_INSTANCES:
+        tag_url = (
+            f"https://{instance}/api/v1/timelines/tag/"
+            f"{urllib.parse.quote(hashtag, safe='')}"
+        )
+        req = urllib.request.Request(tag_url, method="GET")
+        req.add_header("User-Agent", "derp-bot/1.0 (IRC keyword alert)")
+        try:
+            resp = _urlopen(req, timeout=_MASTODON_TAG_TIMEOUT)
+            raw = resp.read()
+            resp.close()
+        except Exception as exc:
+            _log.debug("mastodon %s failed: %s", instance, exc)
+            continue
+
+        try:
+            statuses = json.loads(raw)
+        except json.JSONDecodeError:
+            continue
+
+        if not isinstance(statuses, list):
+            continue
+
+        for status in statuses:
+            status_url = status.get("url") or status.get("uri", "")
+            if not status_url or status_url in seen_urls:
+                continue
+            seen_urls.add(status_url)
+
+            acct = (status.get("account") or {}).get("acct", "")
+            content = _strip_html(status.get("content", ""))
+            title = f"@{acct}: {_truncate(content, 60)}" if acct else content
+            date = _parse_date(status.get("created_at", ""))
+            results.append({
+                "id": status_url,
+                "title": title,
+                "url": status_url,
+                "date": date,
+                "extra": "",
+            })
+
+    return results
+
+
 # -- Backend registry -------------------------------------------------------

 _BACKENDS: dict[str, callable] = {
    "yt": _search_youtube,
    "tw": _search_twitch,
    "sx": _search_searx,
+    "rd": _search_reddit,
+    "ft": _search_mastodon,
 }