feat: add Reddit and Mastodon backends to alert plugin

Search Reddit posts (rd) via JSON API and Mastodon hashtag
timelines (ft) across 4 fediverse instances. Both public,
no auth required.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
user
2026-02-15 22:42:06 +01:00
parent 83a1d37b98
commit f84723f66d
3 changed files with 131 additions and 13 deletions

View File

@@ -32,6 +32,14 @@ _YT_CLIENT_VERSION = "2.20250101.00.00"
_GQL_URL = "https://gql.twitch.tv/gql"
_GQL_CLIENT_ID = "kimne78kx3ncx6brgo4mv6wki5h1ko"
_SEARX_URL = "https://searx.mymx.me/search"
_REDDIT_SEARCH_URL = "https://old.reddit.com/search.json"
_MASTODON_INSTANCES = [
"mastodon.social",
"fosstodon.org",
"hachyderm.io",
"infosec.exchange",
]
_MASTODON_TAG_TIMEOUT = 4
# -- Module-level tracking ---------------------------------------------------
@@ -153,6 +161,11 @@ def _parse_date(raw: str) -> str:
return m.group(0) if m else ""
def _strip_html(text: str) -> str:
"""Remove HTML tags from text."""
return re.sub(r"<[^>]+>", "", text).strip()
def _fetch_og(url: str) -> tuple[str, str, str]:
"""Fetch og:title, og:description, and published date from a URL.
@@ -352,12 +365,116 @@ def _search_searx(keyword: str) -> list[dict]:
return results
# -- Reddit search (blocking) ------------------------------------------------
def _search_reddit(keyword: str) -> list[dict]:
"""Search Reddit via JSON API. Blocking."""
import urllib.parse
params = urllib.parse.urlencode({
"q": keyword, "sort": "new", "limit": "25", "t": "week",
})
url = f"{_REDDIT_SEARCH_URL}?{params}"
req = urllib.request.Request(url, method="GET")
req.add_header("User-Agent", "derp-bot/1.0 (IRC keyword alert)")
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
raw = resp.read()
resp.close()
data = json.loads(raw)
results: list[dict] = []
for child in (data.get("data") or {}).get("children") or []:
post = child.get("data") or {}
post_id = post.get("name", "")
permalink = post.get("permalink", "")
title = post.get("title", "")
created = post.get("created_utc")
date = ""
if created:
try:
date = datetime.fromtimestamp(
float(created), tz=timezone.utc,
).strftime("%Y-%m-%d")
except (ValueError, OSError):
pass
results.append({
"id": post_id,
"title": title,
"url": f"https://www.reddit.com{permalink}" if permalink else "",
"date": date,
"extra": "",
})
return results
# -- Mastodon/Fediverse search (blocking) -----------------------------------
def _search_mastodon(keyword: str) -> list[dict]:
"""Search Mastodon instances via public hashtag timeline. Blocking."""
import urllib.parse
# Sanitize keyword to alphanumeric for hashtag search
hashtag = re.sub(r"[^a-zA-Z0-9]", "", keyword).lower()
if not hashtag:
return []
results: list[dict] = []
seen_urls: set[str] = set()
for instance in _MASTODON_INSTANCES:
tag_url = (
f"https://{instance}/api/v1/timelines/tag/"
f"{urllib.parse.quote(hashtag, safe='')}"
)
req = urllib.request.Request(tag_url, method="GET")
req.add_header("User-Agent", "derp-bot/1.0 (IRC keyword alert)")
try:
resp = _urlopen(req, timeout=_MASTODON_TAG_TIMEOUT)
raw = resp.read()
resp.close()
except Exception as exc:
_log.debug("mastodon %s failed: %s", instance, exc)
continue
try:
statuses = json.loads(raw)
except json.JSONDecodeError:
continue
if not isinstance(statuses, list):
continue
for status in statuses:
status_url = status.get("url") or status.get("uri", "")
if not status_url or status_url in seen_urls:
continue
seen_urls.add(status_url)
acct = (status.get("account") or {}).get("acct", "")
content = _strip_html(status.get("content", ""))
title = f"@{acct}: {_truncate(content, 60)}" if acct else content
date = _parse_date(status.get("created_at", ""))
results.append({
"id": status_url,
"title": title,
"url": status_url,
"date": date,
"extra": "",
})
return results
# -- Backend registry -------------------------------------------------------
_BACKENDS: dict[str, callable] = {
"yt": _search_youtube,
"tw": _search_twitch,
"sx": _search_searx,
"rd": _search_reddit,
"ft": _search_mastodon,
}