diff --git a/docs/CHEATSHEET.md b/docs/CHEATSHEET.md index 838f447..7c75653 100644 --- a/docs/CHEATSHEET.md +++ b/docs/CHEATSHEET.md @@ -345,10 +345,10 @@ No API credentials needed (uses public GQL endpoint). !alert history [n] # Show recent results (default 5) ``` -Searches keywords across YouTube (InnerTube), Twitch (GQL), and SearXNG simultaneously. -Names: lowercase alphanumeric + hyphens, 1-20 chars. Keywords: 1-100 chars. -Max 20 alerts/channel. Polls every 5min. Max 5 announcements per platform per cycle. -Format: `[name/yt] Title -- URL`, `[name/tw] Title -- URL`, or `[name/sx] Title -- URL`. +Searches keywords across YouTube (yt), Twitch (tw), SearXNG (sx), Reddit (rd), +and Mastodon/Fediverse (ft) simultaneously. Names: lowercase alphanumeric + hyphens, +1-20 chars. Keywords: 1-100 chars. Max 20 alerts/channel. Polls every 5min. +Format: `[name/yt] Title -- URL`, `[name/rd] Title -- URL`, etc. No API credentials needed. Persists across restarts. History stored in `data/alert_history.db`. ## SearX diff --git a/docs/USAGE.md b/docs/USAGE.md index 836d08a..0032f7e 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -660,10 +660,10 @@ Title Three -- https://example.com/page3 ### `!alert` -- Keyword Alert Subscriptions -Search keywords across multiple platforms (YouTube, Twitch, SearXNG) and -announce new results. Unlike `!rss`/`!yt`/`!twitch` which follow specific -channels/feeds, `!alert` searches keywords across all supported platforms -simultaneously. +Search keywords across multiple platforms (YouTube, Twitch, SearXNG, Reddit, +Mastodon/Fediverse) and announce new results. Unlike `!rss`/`!yt`/`!twitch` +which follow specific channels/feeds, `!alert` searches keywords across all +supported platforms simultaneously. ``` !alert add Add keyword alert (admin) @@ -682,17 +682,18 @@ simultaneously. Platforms searched: -- **YouTube** -- InnerTube search API (no auth required) -- **Twitch** -- Public GQL endpoint: live streams and VODs (no auth required) -- **SearXNG** -- Local SearXNG instance (no auth required) +- **YouTube** (`yt`) -- InnerTube search API (no auth required) +- **Twitch** (`tw`) -- Public GQL endpoint: live streams and VODs (no auth required) +- **SearXNG** (`sx`) -- Local SearXNG instance (no auth required) +- **Reddit** (`rd`) -- JSON search API, sorted by new, past week (no auth required) +- **Mastodon** (`ft`) -- Public hashtag timeline across 4 instances (no auth required) Polling and announcements: - Alerts are polled every 5 minutes by default - On `add`, existing results are recorded without announcing (prevents flood) - New results announced as `[name/yt] Title -- URL`, `[name/tw] Title -- URL`, - or `[name/sx] Title -- URL` -- Maximum 5 items announced per platform per poll; excess shown as `... and N more` + `[name/sx] Title -- URL`, `[name/rd] Title -- URL`, or `[name/ft] Title -- URL` - Titles are truncated to 80 characters - Each platform maintains its own seen list (capped at 200 per platform) - 5 consecutive errors doubles the poll interval (max 1 hour) diff --git a/plugins/alert.py b/plugins/alert.py index be2ce5a..8868477 100644 --- a/plugins/alert.py +++ b/plugins/alert.py @@ -32,6 +32,14 @@ _YT_CLIENT_VERSION = "2.20250101.00.00" _GQL_URL = "https://gql.twitch.tv/gql" _GQL_CLIENT_ID = "kimne78kx3ncx6brgo4mv6wki5h1ko" _SEARX_URL = "https://searx.mymx.me/search" +_REDDIT_SEARCH_URL = "https://old.reddit.com/search.json" +_MASTODON_INSTANCES = [ + "mastodon.social", + "fosstodon.org", + "hachyderm.io", + "infosec.exchange", +] +_MASTODON_TAG_TIMEOUT = 4 # -- Module-level tracking --------------------------------------------------- @@ -153,6 +161,11 @@ def _parse_date(raw: str) -> str: return m.group(0) if m else "" +def _strip_html(text: str) -> str: + """Remove HTML tags from text.""" + return re.sub(r"<[^>]+>", "", text).strip() + + def _fetch_og(url: str) -> tuple[str, str, str]: """Fetch og:title, og:description, and published date from a URL. @@ -352,12 +365,116 @@ def _search_searx(keyword: str) -> list[dict]: return results +# -- Reddit search (blocking) ------------------------------------------------ + +def _search_reddit(keyword: str) -> list[dict]: + """Search Reddit via JSON API. Blocking.""" + import urllib.parse + + params = urllib.parse.urlencode({ + "q": keyword, "sort": "new", "limit": "25", "t": "week", + }) + url = f"{_REDDIT_SEARCH_URL}?{params}" + + req = urllib.request.Request(url, method="GET") + req.add_header("User-Agent", "derp-bot/1.0 (IRC keyword alert)") + + resp = _urlopen(req, timeout=_FETCH_TIMEOUT) + raw = resp.read() + resp.close() + + data = json.loads(raw) + results: list[dict] = [] + for child in (data.get("data") or {}).get("children") or []: + post = child.get("data") or {} + post_id = post.get("name", "") + permalink = post.get("permalink", "") + title = post.get("title", "") + created = post.get("created_utc") + date = "" + if created: + try: + date = datetime.fromtimestamp( + float(created), tz=timezone.utc, + ).strftime("%Y-%m-%d") + except (ValueError, OSError): + pass + results.append({ + "id": post_id, + "title": title, + "url": f"https://www.reddit.com{permalink}" if permalink else "", + "date": date, + "extra": "", + }) + return results + + +# -- Mastodon/Fediverse search (blocking) ----------------------------------- + +def _search_mastodon(keyword: str) -> list[dict]: + """Search Mastodon instances via public hashtag timeline. Blocking.""" + import urllib.parse + + # Sanitize keyword to alphanumeric for hashtag search + hashtag = re.sub(r"[^a-zA-Z0-9]", "", keyword).lower() + if not hashtag: + return [] + + results: list[dict] = [] + seen_urls: set[str] = set() + + for instance in _MASTODON_INSTANCES: + tag_url = ( + f"https://{instance}/api/v1/timelines/tag/" + f"{urllib.parse.quote(hashtag, safe='')}" + ) + req = urllib.request.Request(tag_url, method="GET") + req.add_header("User-Agent", "derp-bot/1.0 (IRC keyword alert)") + try: + resp = _urlopen(req, timeout=_MASTODON_TAG_TIMEOUT) + raw = resp.read() + resp.close() + except Exception as exc: + _log.debug("mastodon %s failed: %s", instance, exc) + continue + + try: + statuses = json.loads(raw) + except json.JSONDecodeError: + continue + + if not isinstance(statuses, list): + continue + + for status in statuses: + status_url = status.get("url") or status.get("uri", "") + if not status_url or status_url in seen_urls: + continue + seen_urls.add(status_url) + + acct = (status.get("account") or {}).get("acct", "") + content = _strip_html(status.get("content", "")) + title = f"@{acct}: {_truncate(content, 60)}" if acct else content + date = _parse_date(status.get("created_at", "")) + results.append({ + "id": status_url, + "title": title, + "url": status_url, + "date": date, + "extra": "", + }) + + return results + + # -- Backend registry ------------------------------------------------------- _BACKENDS: dict[str, callable] = { "yt": _search_youtube, "tw": _search_twitch, "sx": _search_searx, + "rd": _search_reddit, + "ft": _search_mastodon, }