feat: metadata enrichment for alerts and subscription plugins

Alert backends now populate structured `extra` field with engagement
metrics (views, stars, votes, etc.) instead of embedding them in titles.
Subscription plugins show richer announcements: Twitch viewer counts,
YouTube views/likes/dates, RSS published dates.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
user
2026-02-19 10:00:17 +01:00
parent c3b19feb0f
commit 1fe7da9ed8
10 changed files with 614 additions and 52 deletions

View File

@@ -148,6 +148,7 @@ def _db() -> sqlite3.Connection:
for col, default in [
("short_id", "''"),
("short_url", "''"),
("extra", "''"),
]:
try:
_conn.execute(
@@ -181,8 +182,8 @@ def _save_result(channel: str, alert: str, backend: str, item: dict,
db.execute(
"INSERT INTO results"
" (channel, alert, backend, item_id, title, url, date, found_at,"
" short_id, short_url)"
" VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
" short_id, short_url, extra)"
" VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
(
channel,
alert,
@@ -194,6 +195,7 @@ def _save_result(channel: str, alert: str, backend: str, item: dict,
datetime.now(timezone.utc).isoformat(),
short_id,
short_url,
item.get("extra", ""),
),
)
db.commit()
@@ -286,6 +288,15 @@ class _DDGParser(HTMLParser):
self.results.append((self._url, title))
def _compact_num(n: int) -> str:
"""Format large numbers compactly: 1234 -> 1.2k, 1234567 -> 1.2M."""
if n >= 1_000_000:
return f"{n / 1_000_000:.1f}M".replace(".0M", "M")
if n >= 1_000:
return f"{n / 1_000:.1f}k".replace(".0k", "k")
return str(n)
def _make_short_id(backend: str, item_id: str) -> str:
"""Deterministic 8-char base36 hash from backend:item_id."""
digest = hashlib.sha256(f"{backend}:{item_id}".encode()).digest()
@@ -469,12 +480,14 @@ def _search_twitch(keyword: str) -> list[dict]:
line = f"{display} is live: {title}"
if game:
line += f" ({game})"
viewers = item.get("viewersCount", 0)
extra = f"{_compact_num(viewers)} viewers" if viewers else ""
results.append({
"id": f"stream:{stream_id}",
"title": line,
"url": f"https://twitch.tv/{login}",
"date": "",
"extra": "",
"extra": extra,
})
# VODs
@@ -484,12 +497,14 @@ def _search_twitch(keyword: str) -> list[dict]:
if not vod_id:
continue
title = item.get("title", "")
views = item.get("viewCount", 0)
extra = f"{_compact_num(views)} views" if views else ""
results.append({
"id": f"vod:{vod_id}",
"title": title,
"url": f"https://twitch.tv/videos/{vod_id}",
"date": "",
"extra": "",
"extra": extra,
})
return results
@@ -579,12 +594,19 @@ def _search_reddit(keyword: str) -> list[dict]:
).strftime("%Y-%m-%d")
except (ValueError, OSError):
pass
score = post.get("score", 0)
num_comments = post.get("num_comments", 0)
parts = []
if score:
parts.append(f"+{_compact_num(score)}")
if num_comments:
parts.append(f"{_compact_num(num_comments)}c")
results.append({
"id": post_id,
"title": title,
"url": f"https://www.reddit.com{permalink}" if permalink else "",
"date": date,
"extra": "",
"extra": " ".join(parts),
})
return results
@@ -622,12 +644,19 @@ def _search_mastodon(keyword: str) -> list[dict]:
acct = (status.get("account") or {}).get("acct", "")
content = _strip_html(status.get("content", ""))
title = f"@{acct}: {content}" if acct else content
reblogs = status.get("reblogs_count", 0)
favs = status.get("favourites_count", 0)
parts = []
if reblogs:
parts.append(f"{_compact_num(reblogs)}rb")
if favs:
parts.append(f"{_compact_num(favs)}fav")
items.append({
"id": status_url,
"title": title,
"url": status_url,
"date": _parse_date(status.get("created_at", "")),
"extra": "",
"extra": " ".join(parts),
})
return items
@@ -783,15 +812,13 @@ def _search_kick(keyword: str) -> list[dict]:
channel = stream.get("channel") or {}
slug = channel.get("slug", "")
viewers = stream.get("viewer_count", 0)
title = session_title
if viewers:
title += f" ({viewers} viewers)"
extra = f"{_compact_num(viewers)} viewers" if viewers else ""
results.append({
"id": f"live:{stream_id}",
"title": title,
"title": session_title,
"url": f"https://kick.com/{slug}" if slug else "",
"date": _parse_date(stream.get("start_time", "")),
"extra": "",
"extra": extra,
})
return results
@@ -807,7 +834,7 @@ def _search_dailymotion(keyword: str) -> list[dict]:
"search": keyword,
"sort": "recent",
"limit": "25",
"fields": "id,title,url,created_time",
"fields": "id,title,url,created_time,views_total",
})
url = f"{_DAILYMOTION_API}?{params}"
@@ -833,12 +860,14 @@ def _search_dailymotion(keyword: str) -> list[dict]:
).strftime("%Y-%m-%d")
except (ValueError, OSError):
pass
views = item.get("views_total", 0)
extra = f"{_compact_num(views)} views" if views else ""
results.append({
"id": video_id,
"title": title,
"url": video_url,
"date": date,
"extra": "",
"extra": extra,
})
return results
@@ -872,12 +901,19 @@ def _search_peertube(keyword: str) -> list[dict]:
name = video.get("name", "")
acct = (video.get("account") or {}).get("displayName", "")
title = f"{acct}: {name}" if acct else name
views = video.get("views", 0)
likes = video.get("likes", 0)
parts = []
if views:
parts.append(f"{_compact_num(views)}v")
if likes:
parts.append(f"{_compact_num(likes)}lk")
items.append({
"id": video_url,
"title": title,
"url": video_url,
"date": _parse_date(video.get("publishedAt", "")),
"extra": "",
"extra": " ".join(parts),
})
return items
@@ -923,12 +959,19 @@ def _search_bluesky(keyword: str) -> list[dict]:
title = f"@{display}: {text}"
date = _parse_date(record.get("createdAt", ""))
post_url = f"https://bsky.app/profile/{handle}/post/{rkey}" if handle else ""
like_count = post.get("likeCount", 0)
repost_count = post.get("repostCount", 0)
parts = []
if like_count:
parts.append(f"{_compact_num(like_count)}lk")
if repost_count:
parts.append(f"{_compact_num(repost_count)}rp")
results.append({
"id": uri,
"title": title,
"url": post_url,
"date": date,
"extra": "",
"extra": " ".join(parts),
})
return results
@@ -965,12 +1008,20 @@ def _search_lemmy(keyword: str) -> list[dict]:
community = (entry.get("community") or {}).get("name", "")
title = f"{community}: {name}" if community else name
post_url = post.get("url") or ap_id
counts = entry.get("counts") or {}
score = counts.get("score", 0)
comments = counts.get("comments", 0)
parts = []
if score:
parts.append(f"+{_compact_num(score)}")
if comments:
parts.append(f"{_compact_num(comments)}c")
items.append({
"id": ap_id,
"title": title,
"url": post_url,
"date": _parse_date(post.get("published", "")),
"extra": "",
"extra": " ".join(parts),
})
return items
@@ -1116,15 +1167,19 @@ def _search_hackernews(keyword: str) -> list[dict]:
# External URL if available, otherwise HN discussion link
item_url = hit.get("url") or f"https://news.ycombinator.com/item?id={object_id}"
date = _parse_date(hit.get("created_at", ""))
points = hit.get("points")
points = hit.get("points", 0)
num_comments = hit.get("num_comments", 0)
parts = []
if points:
title += f" ({points}pts)"
parts.append(f"{_compact_num(points)}pt")
if num_comments:
parts.append(f"{_compact_num(num_comments)}c")
results.append({
"id": object_id,
"title": title,
"url": item_url,
"date": date,
"extra": "",
"extra": " ".join(parts),
})
return results
@@ -1158,18 +1213,22 @@ def _search_github(keyword: str) -> list[dict]:
description = repo.get("description") or ""
html_url = repo.get("html_url", "")
stars = repo.get("stargazers_count", 0)
forks = repo.get("forks_count", 0)
title = full_name
if description:
title += f": {description}"
parts = []
if stars:
title += f" [{stars}*]"
parts.append(f"{_compact_num(stars)}*")
if forks:
parts.append(f"{_compact_num(forks)}fk")
date = _parse_date(repo.get("updated_at", ""))
results.append({
"id": repo_id,
"title": title,
"url": html_url,
"date": date,
"extra": "",
"extra": " ".join(parts),
})
return results
@@ -1248,8 +1307,15 @@ def _search_stackexchange(keyword: str) -> list[dict]:
title = _strip_html(item.get("title", ""))
link = item.get("link", "")
score = item.get("score", 0)
answer_count = item.get("answer_count", 0)
view_count = item.get("view_count", 0)
parts = []
if score:
title += f" [{score}v]"
parts.append(f"+{_compact_num(score)}")
if answer_count:
parts.append(f"{_compact_num(answer_count)}a")
if view_count:
parts.append(f"{_compact_num(view_count)}v")
created = item.get("creation_date")
date = ""
if created:
@@ -1261,7 +1327,7 @@ def _search_stackexchange(keyword: str) -> list[dict]:
pass
results.append({
"id": qid, "title": title, "url": link,
"date": date, "extra": "",
"date": date, "extra": " ".join(parts),
})
return results
@@ -1295,15 +1361,19 @@ def _search_gitlab(keyword: str) -> list[dict]:
description = repo.get("description") or ""
web_url = repo.get("web_url", "")
stars = repo.get("star_count", 0)
forks = repo.get("forks_count", 0)
title = name
if description:
title += f": {description}"
parts = []
if stars:
title += f" [{stars}*]"
parts.append(f"{_compact_num(stars)}*")
if forks:
parts.append(f"{_compact_num(forks)}fk")
date = _parse_date(repo.get("last_activity_at", ""))
results.append({
"id": rid, "title": title, "url": web_url,
"date": date, "extra": "",
"date": date, "extra": " ".join(parts),
})
return results
@@ -1408,18 +1478,22 @@ def _search_dockerhub(keyword: str) -> list[dict]:
continue
description = item.get("short_description") or ""
stars = item.get("star_count", 0)
pulls = item.get("pull_count", 0)
title = name
if description:
title += f": {description}"
parts = []
if stars:
title += f" [{stars}*]"
parts.append(f"{_compact_num(stars)}*")
if pulls:
parts.append(f"{_compact_num(pulls)} pulls")
hub_url = (
f"https://hub.docker.com/r/{name}" if "/" in name
else f"https://hub.docker.com/_/{name}"
)
results.append({
"id": name, "title": title, "url": hub_url,
"date": "", "extra": "",
"date": "", "extra": " ".join(parts),
})
return results
@@ -1574,10 +1648,17 @@ def _search_devto(keyword: str) -> list[dict]:
author = ""
if author:
title = f"{author}: {title}"
reactions = item.get("positive_reactions_count", 0)
comments = item.get("comments_count", 0)
parts = []
if reactions:
parts.append(f"+{_compact_num(reactions)}")
if comments:
parts.append(f"{_compact_num(comments)}c")
date = _parse_date(item.get("published_at", ""))
results.append({
"id": article_id, "title": title, "url": article_url,
"date": date, "extra": "",
"date": date, "extra": " ".join(parts),
})
return results
@@ -1656,17 +1737,18 @@ def _search_huggingface(keyword: str) -> list[dict]:
downloads = model.get("downloads", 0)
likes = model.get("likes", 0)
title = model_id
parts = []
if downloads:
title += f" [{downloads} dl]"
elif likes:
title += f" [{likes} likes]"
parts.append(f"{_compact_num(downloads)}dl")
if likes:
parts.append(f"{_compact_num(likes)}lk")
date = _parse_date(model.get("lastModified", ""))
results.append({
"id": model_id,
"title": title,
"url": f"https://huggingface.co/{model_id}",
"date": date,
"extra": "",
"extra": " ".join(parts),
})
return results
@@ -1836,6 +1918,9 @@ async def _poll_once(bot, key: str, announce: bool = True) -> None:
channel, name, tag, item, short_url=short_url,
)
title = item["title"] or "(no title)"
extra = item.get("extra", "")
if extra:
title = f"{title} | {extra}"
date = item.get("date", "")
meta = f"[{name}/{tag}/{short_id}]"
if date:
@@ -2003,7 +2088,7 @@ async def cmd_alert(bot, message):
db = _db()
rows = db.execute(
"SELECT id, backend, title, url, date, found_at, short_id,"
" short_url FROM results"
" short_url, extra FROM results"
" WHERE channel = ? AND alert = ? ORDER BY id DESC LIMIT ?",
(channel, name, limit),
).fetchall()
@@ -2013,9 +2098,12 @@ async def cmd_alert(bot, message):
loop = asyncio.get_running_loop()
fp = bot.registry._modules.get("flaskpaste")
history_lines = []
for row_id, backend, title, url, date, found_at, short_id, short_url in reversed(rows):
for (row_id, backend, title, url, date, found_at,
short_id, short_url, extra) in reversed(rows):
ts = found_at[:10]
title = _truncate(title) if title else "(no title)"
if extra:
title = f"{title} | {extra}"
display_url = short_url or url
if fp and url and not short_url:
try:
@@ -2050,15 +2138,19 @@ async def cmd_alert(bot, message):
channel = message.target
db = _db()
row = db.execute(
"SELECT alert, backend, title, url, date, found_at, short_id"
"SELECT alert, backend, title, url, date, found_at, short_id,"
" extra"
" FROM results WHERE short_id = ? AND channel = ? LIMIT 1",
(short_id, channel),
).fetchone()
if not row:
await bot.reply(message, f"No result with id '{short_id}'")
return
alert, backend, title, url, date, found_at, sid = row
await bot.reply(message, f"[{alert}/{backend}/{sid}] {title or '(no title)'}")
alert, backend, title, url, date, found_at, sid, extra = row
display = title or "(no title)"
if extra:
display = f"{display} | {extra}"
await bot.reply(message, f"[{alert}/{backend}/{sid}] {display}")
if url:
await bot.reply(message, url)
await bot.reply(

View File

@@ -135,6 +135,21 @@ def _fetch_feed(url: str, etag: str = "", last_modified: str = "") -> dict:
# -- Feed parsing ------------------------------------------------------------
def _parse_date(raw: str) -> str:
"""Try to extract a YYYY-MM-DD date from a raw date string."""
import re as _re
m = _re.search(r"\d{4}-\d{2}-\d{2}", raw)
if m:
return m.group(0)
# Try RFC 2822 (common in RSS pubDate)
from email.utils import parsedate_to_datetime
try:
dt = parsedate_to_datetime(raw)
return dt.strftime("%Y-%m-%d")
except (ValueError, TypeError):
return ""
def _parse_rss(root: ET.Element) -> tuple[str, list[dict]]:
"""Parse RSS 2.0 feed."""
channel = root.find("channel")
@@ -146,8 +161,13 @@ def _parse_rss(root: ET.Element) -> tuple[str, list[dict]]:
item_id = item.findtext("guid") or item.findtext("link") or ""
item_title = (item.findtext("title") or "").strip()
item_link = (item.findtext("link") or "").strip()
pub_date = (item.findtext("pubDate") or "").strip()
date = _parse_date(pub_date) if pub_date else ""
if item_id:
items.append({"id": item_id, "title": item_title, "link": item_link})
items.append({
"id": item_id, "title": item_title,
"link": item_link, "date": date,
})
return (title, items)
@@ -162,8 +182,14 @@ def _parse_atom(root: ET.Element) -> tuple[str, list[dict]]:
if not entry_id:
entry_id = entry_link
entry_title = (entry.findtext(f"{_ATOM_NS}title") or "").strip()
published = (entry.findtext(f"{_ATOM_NS}published") or "").strip()
updated = (entry.findtext(f"{_ATOM_NS}updated") or "").strip()
date = _parse_date(published or updated)
if entry_id:
items.append({"id": entry_id, "title": entry_title, "link": entry_link})
items.append({
"id": entry_id, "title": entry_title,
"link": entry_link, "date": date,
})
return (title, items)
@@ -246,7 +272,10 @@ async def _poll_once(bot, key: str, announce: bool = True) -> None:
for item in shown:
title = _truncate(item["title"]) if item["title"] else "(no title)"
link = item["link"]
date = item.get("date", "")
line = f"[{name}] {title}"
if date:
line += f" | {date}"
if link:
line += f" -- {link}"
await bot.send(channel, line)

View File

@@ -49,6 +49,15 @@ def _truncate(text: str, max_len: int = _MAX_TITLE_LEN) -> str:
return text[: max_len - 3].rstrip() + "..."
def _compact_num(n: int) -> str:
"""Format large numbers compactly: 1234 -> 1.2k, 1234567 -> 1.2M."""
if n >= 1_000_000:
return f"{n / 1_000_000:.1f}M".replace(".0M", "M")
if n >= 1_000:
return f"{n / 1_000:.1f}k".replace(".0k", "k")
return str(n)
# -- Blocking helpers (for executor) -----------------------------------------
def _query_stream(login: str) -> dict:
@@ -172,15 +181,19 @@ async def _poll_once(bot, key: str, announce: bool = True) -> None:
new_stream_id = result["stream_id"]
data["last_title"] = result["title"]
data["last_game"] = result["game"]
data["last_viewers"] = result["viewers"]
if announce and (not was_live or new_stream_id != old_stream_id):
channel = data["channel"]
name = data["name"]
title = _truncate(result["title"]) if result["title"] else "(no title)"
game = result["game"]
viewers = result["viewers"]
line = f"[{name}] is live: {title}"
if game:
line += f" ({game})"
if viewers:
line += f" | {_compact_num(viewers)} viewers"
line += f" -- https://twitch.tv/{login}"
await bot.send(channel, line)
@@ -286,7 +299,13 @@ async def cmd_twitch(bot, message):
if err:
streamers.append(f"{name} (error)")
elif live:
streamers.append(f"{name} (live)")
viewers = data.get("last_viewers", 0)
if viewers:
streamers.append(
f"{name} (live, {_compact_num(viewers)})"
)
else:
streamers.append(f"{name} (live)")
else:
streamers.append(name)
if not streamers:
@@ -318,9 +337,12 @@ async def cmd_twitch(bot, message):
elif data.get("was_live"):
title = _truncate(data.get("last_title", ""))
game = data.get("last_game", "")
viewers = data.get("last_viewers", 0)
line = f"{name}: live -- {title}"
if game:
line += f" ({game})"
if viewers:
line += f" | {_compact_num(viewers)} viewers"
await bot.reply(message, line)
else:
await bot.reply(message, f"{name}: offline")

View File

@@ -27,6 +27,7 @@ _YT_PLAYER_URL = "https://www.youtube.com/youtubei/v1/player"
_YT_CLIENT_VERSION = "2.20250101.00.00"
_ATOM_NS = "{http://www.w3.org/2005/Atom}"
_YT_NS = "{http://www.youtube.com/xml/schemas/2015}"
_MEDIA_NS = "{http://search.yahoo.com/mrss/}"
_MAX_SEEN = 200
_MAX_ANNOUNCE = 5
_DEFAULT_INTERVAL = 600
@@ -74,6 +75,15 @@ def _truncate(text: str, max_len: int = _MAX_TITLE_LEN) -> str:
return text[: max_len - 3].rstrip() + "..."
def _compact_num(n: int) -> str:
"""Format large numbers compactly: 1234 -> 1.2k, 1234567 -> 1.2M."""
if n >= 1_000_000:
return f"{n / 1_000_000:.1f}M".replace(".0M", "M")
if n >= 1_000:
return f"{n / 1_000:.1f}k".replace(".0k", "k")
return str(n)
def _is_youtube_url(url: str) -> bool:
"""Check if URL is a YouTube domain."""
try:
@@ -213,8 +223,33 @@ def _parse_feed(body: bytes) -> tuple[str, list[dict]]:
link = (link_el.get("href", "") if link_el is not None else "").strip()
if not entry_id:
entry_id = link
# Published date
published = (entry.findtext(f"{_ATOM_NS}published") or "").strip()
date = published[:10] if len(published) >= 10 else ""
# media:statistics views + media:starRating count (likes)
views = 0
likes = 0
group = entry.find(f"{_MEDIA_NS}group")
if group is not None:
community = group.find(f"{_MEDIA_NS}community")
if community is not None:
stats_el = community.find(f"{_MEDIA_NS}statistics")
if stats_el is not None:
try:
views = int(stats_el.get("views", "0"))
except (ValueError, TypeError):
pass
rating_el = community.find(f"{_MEDIA_NS}starRating")
if rating_el is not None:
try:
likes = int(rating_el.get("count", "0"))
except (ValueError, TypeError):
pass
if entry_id:
items.append({"id": entry_id, "title": entry_title, "link": link})
items.append({
"id": entry_id, "title": entry_title, "link": link,
"date": date, "views": views, "likes": likes,
})
return (channel_name, items)
@@ -305,7 +340,21 @@ async def _poll_once(bot, key: str, announce: bool = True) -> None:
for item in shown:
title = _truncate(item["title"]) if item["title"] else "(no title)"
link = item["link"]
# Build metadata suffix
parts = []
views = item.get("views", 0)
likes = item.get("likes", 0)
if views:
parts.append(f"{_compact_num(views)}v")
if likes:
parts.append(f"{_compact_num(likes)}lk")
date = item.get("date", "")
if date:
parts.append(date)
extra = " ".join(parts)
line = f"[{name}] {title}"
if extra:
line += f" | {extra}"
if link:
line += f" -- {link}"
await bot.send(channel, line)