feat: metadata enrichment for alerts and subscription plugins

Alert backends now populate structured `extra` field with engagement metrics (views, stars, votes, etc.) instead of embedding them in titles. Subscription plugins show richer announcements: Twitch viewer counts, YouTube views/likes/dates, RSS published dates. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 10:00:17 +01:00
parent c3b19feb0f
commit 1fe7da9ed8
10 changed files with 614 additions and 52 deletions
@@ -148,6 +148,7 @@ def _db() -> sqlite3.Connection:
    for col, default in [
        ("short_id", "''"),
        ("short_url", "''"),
+        ("extra", "''"),
    ]:
        try:
            _conn.execute(
@@ -181,8 +182,8 @@ def _save_result(channel: str, alert: str, backend: str, item: dict,
    db.execute(
        "INSERT INTO results"
        " (channel, alert, backend, item_id, title, url, date, found_at,"
-        "  short_id, short_url)"
-        " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+        "  short_id, short_url, extra)"
+        " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
        (
            channel,
            alert,
@@ -194,6 +195,7 @@ def _save_result(channel: str, alert: str, backend: str, item: dict,
            datetime.now(timezone.utc).isoformat(),
            short_id,
            short_url,
+            item.get("extra", ""),
        ),
    )
    db.commit()
@@ -286,6 +288,15 @@ class _DDGParser(HTMLParser):
                self.results.append((self._url, title))


+def _compact_num(n: int) -> str:
+    """Format large numbers compactly: 1234 -> 1.2k, 1234567 -> 1.2M."""
+    if n >= 1_000_000:
+        return f"{n / 1_000_000:.1f}M".replace(".0M", "M")
+    if n >= 1_000:
+        return f"{n / 1_000:.1f}k".replace(".0k", "k")
+    return str(n)
+
+
 def _make_short_id(backend: str, item_id: str) -> str:
    """Deterministic 8-char base36 hash from backend:item_id."""
    digest = hashlib.sha256(f"{backend}:{item_id}".encode()).digest()
@@ -469,12 +480,14 @@ def _search_twitch(keyword: str) -> list[dict]:
        line = f"{display} is live: {title}"
        if game:
            line += f" ({game})"
+        viewers = item.get("viewersCount", 0)
+        extra = f"{_compact_num(viewers)} viewers" if viewers else ""
        results.append({
            "id": f"stream:{stream_id}",
            "title": line,
            "url": f"https://twitch.tv/{login}",
            "date": "",
-            "extra": "",
+            "extra": extra,
        })

    # VODs
@@ -484,12 +497,14 @@ def _search_twitch(keyword: str) -> list[dict]:
        if not vod_id:
            continue
        title = item.get("title", "")
+        views = item.get("viewCount", 0)
+        extra = f"{_compact_num(views)} views" if views else ""
        results.append({
            "id": f"vod:{vod_id}",
            "title": title,
            "url": f"https://twitch.tv/videos/{vod_id}",
            "date": "",
-            "extra": "",
+            "extra": extra,
        })

    return results
@@ -579,12 +594,19 @@ def _search_reddit(keyword: str) -> list[dict]:
                ).strftime("%Y-%m-%d")
            except (ValueError, OSError):
                pass
+        score = post.get("score", 0)
+        num_comments = post.get("num_comments", 0)
+        parts = []
+        if score:
+            parts.append(f"+{_compact_num(score)}")
+        if num_comments:
+            parts.append(f"{_compact_num(num_comments)}c")
        results.append({
            "id": post_id,
            "title": title,
            "url": f"https://www.reddit.com{permalink}" if permalink else "",
            "date": date,
-            "extra": "",
+            "extra": " ".join(parts),
        })
    return results

@@ -622,12 +644,19 @@ def _search_mastodon(keyword: str) -> list[dict]:
            acct = (status.get("account") or {}).get("acct", "")
            content = _strip_html(status.get("content", ""))
            title = f"@{acct}: {content}" if acct else content
+            reblogs = status.get("reblogs_count", 0)
+            favs = status.get("favourites_count", 0)
+            parts = []
+            if reblogs:
+                parts.append(f"{_compact_num(reblogs)}rb")
+            if favs:
+                parts.append(f"{_compact_num(favs)}fav")
            items.append({
                "id": status_url,
                "title": title,
                "url": status_url,
                "date": _parse_date(status.get("created_at", "")),
-                "extra": "",
+                "extra": " ".join(parts),
            })
        return items

@@ -783,15 +812,13 @@ def _search_kick(keyword: str) -> list[dict]:
        channel = stream.get("channel") or {}
        slug = channel.get("slug", "")
        viewers = stream.get("viewer_count", 0)
-        title = session_title
-        if viewers:
-            title += f" ({viewers} viewers)"
+        extra = f"{_compact_num(viewers)} viewers" if viewers else ""
        results.append({
            "id": f"live:{stream_id}",
-            "title": title,
+            "title": session_title,
            "url": f"https://kick.com/{slug}" if slug else "",
            "date": _parse_date(stream.get("start_time", "")),
-            "extra": "",
+            "extra": extra,
        })

    return results
@@ -807,7 +834,7 @@ def _search_dailymotion(keyword: str) -> list[dict]:
        "search": keyword,
        "sort": "recent",
        "limit": "25",
-        "fields": "id,title,url,created_time",
+        "fields": "id,title,url,created_time,views_total",
    })
    url = f"{_DAILYMOTION_API}?{params}"

@@ -833,12 +860,14 @@ def _search_dailymotion(keyword: str) -> list[dict]:
                ).strftime("%Y-%m-%d")
            except (ValueError, OSError):
                pass
+        views = item.get("views_total", 0)
+        extra = f"{_compact_num(views)} views" if views else ""
        results.append({
            "id": video_id,
            "title": title,
            "url": video_url,
            "date": date,
-            "extra": "",
+            "extra": extra,
        })
    return results

@@ -872,12 +901,19 @@ def _search_peertube(keyword: str) -> list[dict]:
            name = video.get("name", "")
            acct = (video.get("account") or {}).get("displayName", "")
            title = f"{acct}: {name}" if acct else name
+            views = video.get("views", 0)
+            likes = video.get("likes", 0)
+            parts = []
+            if views:
+                parts.append(f"{_compact_num(views)}v")
+            if likes:
+                parts.append(f"{_compact_num(likes)}lk")
            items.append({
                "id": video_url,
                "title": title,
                "url": video_url,
                "date": _parse_date(video.get("publishedAt", "")),
-                "extra": "",
+                "extra": " ".join(parts),
            })
        return items

@@ -923,12 +959,19 @@ def _search_bluesky(keyword: str) -> list[dict]:
        title = f"@{display}: {text}"
        date = _parse_date(record.get("createdAt", ""))
        post_url = f"https://bsky.app/profile/{handle}/post/{rkey}" if handle else ""
+        like_count = post.get("likeCount", 0)
+        repost_count = post.get("repostCount", 0)
+        parts = []
+        if like_count:
+            parts.append(f"{_compact_num(like_count)}lk")
+        if repost_count:
+            parts.append(f"{_compact_num(repost_count)}rp")
        results.append({
            "id": uri,
            "title": title,
            "url": post_url,
            "date": date,
-            "extra": "",
+            "extra": " ".join(parts),
        })
    return results

@@ -965,12 +1008,20 @@ def _search_lemmy(keyword: str) -> list[dict]:
            community = (entry.get("community") or {}).get("name", "")
            title = f"{community}: {name}" if community else name
            post_url = post.get("url") or ap_id
+            counts = entry.get("counts") or {}
+            score = counts.get("score", 0)
+            comments = counts.get("comments", 0)
+            parts = []
+            if score:
+                parts.append(f"+{_compact_num(score)}")
+            if comments:
+                parts.append(f"{_compact_num(comments)}c")
            items.append({
                "id": ap_id,
                "title": title,
                "url": post_url,
                "date": _parse_date(post.get("published", "")),
-                "extra": "",
+                "extra": " ".join(parts),
            })
        return items

@@ -1116,15 +1167,19 @@ def _search_hackernews(keyword: str) -> list[dict]:
        # External URL if available, otherwise HN discussion link
        item_url = hit.get("url") or f"https://news.ycombinator.com/item?id={object_id}"
        date = _parse_date(hit.get("created_at", ""))
-        points = hit.get("points")
+        points = hit.get("points", 0)
+        num_comments = hit.get("num_comments", 0)
+        parts = []
        if points:
-            title += f" ({points}pts)"
+            parts.append(f"{_compact_num(points)}pt")
+        if num_comments:
+            parts.append(f"{_compact_num(num_comments)}c")
        results.append({
            "id": object_id,
            "title": title,
            "url": item_url,
            "date": date,
-            "extra": "",
+            "extra": " ".join(parts),
        })
    return results

@@ -1158,18 +1213,22 @@ def _search_github(keyword: str) -> list[dict]:
        description = repo.get("description") or ""
        html_url = repo.get("html_url", "")
        stars = repo.get("stargazers_count", 0)
+        forks = repo.get("forks_count", 0)
        title = full_name
        if description:
            title += f": {description}"
+        parts = []
        if stars:
-            title += f" [{stars}*]"
+            parts.append(f"{_compact_num(stars)}*")
+        if forks:
+            parts.append(f"{_compact_num(forks)}fk")
        date = _parse_date(repo.get("updated_at", ""))
        results.append({
            "id": repo_id,
            "title": title,
            "url": html_url,
            "date": date,
-            "extra": "",
+            "extra": " ".join(parts),
        })
    return results

@@ -1248,8 +1307,15 @@ def _search_stackexchange(keyword: str) -> list[dict]:
        title = _strip_html(item.get("title", ""))
        link = item.get("link", "")
        score = item.get("score", 0)
+        answer_count = item.get("answer_count", 0)
+        view_count = item.get("view_count", 0)
+        parts = []
        if score:
-            title += f" [{score}v]"
+            parts.append(f"+{_compact_num(score)}")
+        if answer_count:
+            parts.append(f"{_compact_num(answer_count)}a")
+        if view_count:
+            parts.append(f"{_compact_num(view_count)}v")
        created = item.get("creation_date")
        date = ""
        if created:
@@ -1261,7 +1327,7 @@ def _search_stackexchange(keyword: str) -> list[dict]:
                pass
        results.append({
            "id": qid, "title": title, "url": link,
-            "date": date, "extra": "",
+            "date": date, "extra": " ".join(parts),
        })
    return results

@@ -1295,15 +1361,19 @@ def _search_gitlab(keyword: str) -> list[dict]:
        description = repo.get("description") or ""
        web_url = repo.get("web_url", "")
        stars = repo.get("star_count", 0)
+        forks = repo.get("forks_count", 0)
        title = name
        if description:
            title += f": {description}"
+        parts = []
        if stars:
-            title += f" [{stars}*]"
+            parts.append(f"{_compact_num(stars)}*")
+        if forks:
+            parts.append(f"{_compact_num(forks)}fk")
        date = _parse_date(repo.get("last_activity_at", ""))
        results.append({
            "id": rid, "title": title, "url": web_url,
-            "date": date, "extra": "",
+            "date": date, "extra": " ".join(parts),
        })
    return results

@@ -1408,18 +1478,22 @@ def _search_dockerhub(keyword: str) -> list[dict]:
            continue
        description = item.get("short_description") or ""
        stars = item.get("star_count", 0)
+        pulls = item.get("pull_count", 0)
        title = name
        if description:
            title += f": {description}"
+        parts = []
        if stars:
-            title += f" [{stars}*]"
+            parts.append(f"{_compact_num(stars)}*")
+        if pulls:
+            parts.append(f"{_compact_num(pulls)} pulls")
        hub_url = (
            f"https://hub.docker.com/r/{name}" if "/" in name
            else f"https://hub.docker.com/_/{name}"
        )
        results.append({
            "id": name, "title": title, "url": hub_url,
-            "date": "", "extra": "",
+            "date": "", "extra": " ".join(parts),
        })
    return results

@@ -1574,10 +1648,17 @@ def _search_devto(keyword: str) -> list[dict]:
            author = ""
        if author:
            title = f"{author}: {title}"
+        reactions = item.get("positive_reactions_count", 0)
+        comments = item.get("comments_count", 0)
+        parts = []
+        if reactions:
+            parts.append(f"+{_compact_num(reactions)}")
+        if comments:
+            parts.append(f"{_compact_num(comments)}c")
        date = _parse_date(item.get("published_at", ""))
        results.append({
            "id": article_id, "title": title, "url": article_url,
-            "date": date, "extra": "",
+            "date": date, "extra": " ".join(parts),
        })
    return results

@@ -1656,17 +1737,18 @@ def _search_huggingface(keyword: str) -> list[dict]:
        downloads = model.get("downloads", 0)
        likes = model.get("likes", 0)
        title = model_id
+        parts = []
        if downloads:
-            title += f" [{downloads} dl]"
-        elif likes:
-            title += f" [{likes} likes]"
+            parts.append(f"{_compact_num(downloads)}dl")
+        if likes:
+            parts.append(f"{_compact_num(likes)}lk")
        date = _parse_date(model.get("lastModified", ""))
        results.append({
            "id": model_id,
            "title": title,
            "url": f"https://huggingface.co/{model_id}",
            "date": date,
-            "extra": "",
+            "extra": " ".join(parts),
        })
    return results

@@ -1836,6 +1918,9 @@ async def _poll_once(bot, key: str, announce: bool = True) -> None:
                    channel, name, tag, item, short_url=short_url,
                )
                title = item["title"] or "(no title)"
+                extra = item.get("extra", "")
+                if extra:
+                    title = f"{title} | {extra}"
                date = item.get("date", "")
                meta = f"[{name}/{tag}/{short_id}]"
                if date:
@@ -2003,7 +2088,7 @@ async def cmd_alert(bot, message):
        db = _db()
        rows = db.execute(
            "SELECT id, backend, title, url, date, found_at, short_id,"
-            " short_url FROM results"
+            " short_url, extra FROM results"
            " WHERE channel = ? AND alert = ? ORDER BY id DESC LIMIT ?",
            (channel, name, limit),
        ).fetchall()
@@ -2013,9 +2098,12 @@ async def cmd_alert(bot, message):
        loop = asyncio.get_running_loop()
        fp = bot.registry._modules.get("flaskpaste")
        history_lines = []
-        for row_id, backend, title, url, date, found_at, short_id, short_url in reversed(rows):
+        for (row_id, backend, title, url, date, found_at,
+             short_id, short_url, extra) in reversed(rows):
            ts = found_at[:10]
            title = _truncate(title) if title else "(no title)"
+            if extra:
+                title = f"{title} | {extra}"
            display_url = short_url or url
            if fp and url and not short_url:
                try:
@@ -2050,15 +2138,19 @@ async def cmd_alert(bot, message):
        channel = message.target
        db = _db()
        row = db.execute(
-            "SELECT alert, backend, title, url, date, found_at, short_id"
+            "SELECT alert, backend, title, url, date, found_at, short_id,"
+            " extra"
            " FROM results WHERE short_id = ? AND channel = ? LIMIT 1",
            (short_id, channel),
        ).fetchone()
        if not row:
            await bot.reply(message, f"No result with id '{short_id}'")
            return
-        alert, backend, title, url, date, found_at, sid = row
-        await bot.reply(message, f"[{alert}/{backend}/{sid}] {title or '(no title)'}")
+        alert, backend, title, url, date, found_at, sid, extra = row
+        display = title or "(no title)"
+        if extra:
+            display = f"{display} | {extra}"
+        await bot.reply(message, f"[{alert}/{backend}/{sid}] {display}")
        if url:
            await bot.reply(message, url)
        await bot.reply(
@@ -135,6 +135,21 @@ def _fetch_feed(url: str, etag: str = "", last_modified: str = "") -> dict:

 # -- Feed parsing ------------------------------------------------------------

+def _parse_date(raw: str) -> str:
+    """Try to extract a YYYY-MM-DD date from a raw date string."""
+    import re as _re
+    m = _re.search(r"\d{4}-\d{2}-\d{2}", raw)
+    if m:
+        return m.group(0)
+    # Try RFC 2822 (common in RSS pubDate)
+    from email.utils import parsedate_to_datetime
+    try:
+        dt = parsedate_to_datetime(raw)
+        return dt.strftime("%Y-%m-%d")
+    except (ValueError, TypeError):
+        return ""
+
+
 def _parse_rss(root: ET.Element) -> tuple[str, list[dict]]:
    """Parse RSS 2.0 feed."""
    channel = root.find("channel")
@@ -146,8 +161,13 @@ def _parse_rss(root: ET.Element) -> tuple[str, list[dict]]:
        item_id = item.findtext("guid") or item.findtext("link") or ""
        item_title = (item.findtext("title") or "").strip()
        item_link = (item.findtext("link") or "").strip()
+        pub_date = (item.findtext("pubDate") or "").strip()
+        date = _parse_date(pub_date) if pub_date else ""
        if item_id:
-            items.append({"id": item_id, "title": item_title, "link": item_link})
+            items.append({
+                "id": item_id, "title": item_title,
+                "link": item_link, "date": date,
+            })
    return (title, items)


@@ -162,8 +182,14 @@ def _parse_atom(root: ET.Element) -> tuple[str, list[dict]]:
        if not entry_id:
            entry_id = entry_link
        entry_title = (entry.findtext(f"{_ATOM_NS}title") or "").strip()
+        published = (entry.findtext(f"{_ATOM_NS}published") or "").strip()
+        updated = (entry.findtext(f"{_ATOM_NS}updated") or "").strip()
+        date = _parse_date(published or updated)
        if entry_id:
-            items.append({"id": entry_id, "title": entry_title, "link": entry_link})
+            items.append({
+                "id": entry_id, "title": entry_title,
+                "link": entry_link, "date": date,
+            })
    return (title, items)


@@ -246,7 +272,10 @@ async def _poll_once(bot, key: str, announce: bool = True) -> None:
        for item in shown:
            title = _truncate(item["title"]) if item["title"] else "(no title)"
            link = item["link"]
+            date = item.get("date", "")
            line = f"[{name}] {title}"
+            if date:
+                line += f" | {date}"
            if link:
                line += f" -- {link}"
            await bot.send(channel, line)
@@ -49,6 +49,15 @@ def _truncate(text: str, max_len: int = _MAX_TITLE_LEN) -> str:
    return text[: max_len - 3].rstrip() + "..."


+def _compact_num(n: int) -> str:
+    """Format large numbers compactly: 1234 -> 1.2k, 1234567 -> 1.2M."""
+    if n >= 1_000_000:
+        return f"{n / 1_000_000:.1f}M".replace(".0M", "M")
+    if n >= 1_000:
+        return f"{n / 1_000:.1f}k".replace(".0k", "k")
+    return str(n)
+
+
 # -- Blocking helpers (for executor) -----------------------------------------

 def _query_stream(login: str) -> dict:
@@ -172,15 +181,19 @@ async def _poll_once(bot, key: str, announce: bool = True) -> None:
        new_stream_id = result["stream_id"]
        data["last_title"] = result["title"]
        data["last_game"] = result["game"]
+        data["last_viewers"] = result["viewers"]

        if announce and (not was_live or new_stream_id != old_stream_id):
            channel = data["channel"]
            name = data["name"]
            title = _truncate(result["title"]) if result["title"] else "(no title)"
            game = result["game"]
+            viewers = result["viewers"]
            line = f"[{name}] is live: {title}"
            if game:
                line += f" ({game})"
+            if viewers:
+                line += f" | {_compact_num(viewers)} viewers"
            line += f" -- https://twitch.tv/{login}"
            await bot.send(channel, line)

@@ -286,7 +299,13 @@ async def cmd_twitch(bot, message):
                    if err:
                        streamers.append(f"{name} (error)")
                    elif live:
-                        streamers.append(f"{name} (live)")
+                        viewers = data.get("last_viewers", 0)
+                        if viewers:
+                            streamers.append(
+                                f"{name} (live, {_compact_num(viewers)})"
+                            )
+                        else:
+                            streamers.append(f"{name} (live)")
                    else:
                        streamers.append(name)
        if not streamers:
@@ -318,9 +337,12 @@ async def cmd_twitch(bot, message):
        elif data.get("was_live"):
            title = _truncate(data.get("last_title", ""))
            game = data.get("last_game", "")
+            viewers = data.get("last_viewers", 0)
            line = f"{name}: live -- {title}"
            if game:
                line += f" ({game})"
+            if viewers:
+                line += f" | {_compact_num(viewers)} viewers"
            await bot.reply(message, line)
        else:
            await bot.reply(message, f"{name}: offline")
@@ -27,6 +27,7 @@ _YT_PLAYER_URL = "https://www.youtube.com/youtubei/v1/player"
 _YT_CLIENT_VERSION = "2.20250101.00.00"
 _ATOM_NS = "{http://www.w3.org/2005/Atom}"
 _YT_NS = "{http://www.youtube.com/xml/schemas/2015}"
+_MEDIA_NS = "{http://search.yahoo.com/mrss/}"
 _MAX_SEEN = 200
 _MAX_ANNOUNCE = 5
 _DEFAULT_INTERVAL = 600
@@ -74,6 +75,15 @@ def _truncate(text: str, max_len: int = _MAX_TITLE_LEN) -> str:
    return text[: max_len - 3].rstrip() + "..."


+def _compact_num(n: int) -> str:
+    """Format large numbers compactly: 1234 -> 1.2k, 1234567 -> 1.2M."""
+    if n >= 1_000_000:
+        return f"{n / 1_000_000:.1f}M".replace(".0M", "M")
+    if n >= 1_000:
+        return f"{n / 1_000:.1f}k".replace(".0k", "k")
+    return str(n)
+
+
 def _is_youtube_url(url: str) -> bool:
    """Check if URL is a YouTube domain."""
    try:
@@ -213,8 +223,33 @@ def _parse_feed(body: bytes) -> tuple[str, list[dict]]:
            link = (link_el.get("href", "") if link_el is not None else "").strip()
        if not entry_id:
            entry_id = link
+        # Published date
+        published = (entry.findtext(f"{_ATOM_NS}published") or "").strip()
+        date = published[:10] if len(published) >= 10 else ""
+        # media:statistics views + media:starRating count (likes)
+        views = 0
+        likes = 0
+        group = entry.find(f"{_MEDIA_NS}group")
+        if group is not None:
+            community = group.find(f"{_MEDIA_NS}community")
+            if community is not None:
+                stats_el = community.find(f"{_MEDIA_NS}statistics")
+                if stats_el is not None:
+                    try:
+                        views = int(stats_el.get("views", "0"))
+                    except (ValueError, TypeError):
+                        pass
+                rating_el = community.find(f"{_MEDIA_NS}starRating")
+                if rating_el is not None:
+                    try:
+                        likes = int(rating_el.get("count", "0"))
+                    except (ValueError, TypeError):
+                        pass
        if entry_id:
-            items.append({"id": entry_id, "title": entry_title, "link": link})
+            items.append({
+                "id": entry_id, "title": entry_title, "link": link,
+                "date": date, "views": views, "likes": likes,
+            })
    return (channel_name, items)


@@ -305,7 +340,21 @@ async def _poll_once(bot, key: str, announce: bool = True) -> None:
        for item in shown:
            title = _truncate(item["title"]) if item["title"] else "(no title)"
            link = item["link"]
+            # Build metadata suffix
+            parts = []
+            views = item.get("views", 0)
+            likes = item.get("likes", 0)
+            if views:
+                parts.append(f"{_compact_num(views)}v")
+            if likes:
+                parts.append(f"{_compact_num(likes)}lk")
+            date = item.get("date", "")
+            if date:
+                parts.append(date)
+            extra = " ".join(parts)
            line = f"[{name}] {title}"
+            if extra:
+                line += f" | {extra}"
            if link:
                line += f" -- {link}"
            await bot.send(channel, line)