feat: add 11 alert backends and fix PyPI/DEV.to search
Add Wikipedia, Stack Exchange, GitLab, npm, PyPI, Docker Hub, arXiv, Lobsters, DEV.to, Medium, and Hugging Face backends to the alert plugin (16 -> 27 total). Fix PyPI backend to use RSS updates feed (web search now requires JS challenge). Fix DEV.to to use public articles API (feed_content endpoint returns empty). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
519
plugins/alert.py
519
plugins/alert.py
@@ -64,6 +64,17 @@ _ODYSEE_API = "https://api.na-backend.odysee.com/api/v1/proxy"
|
||||
_ARCHIVE_SEARCH_URL = "https://archive.org/advancedsearch.php"
|
||||
_HN_SEARCH_URL = "https://hn.algolia.com/api/v1/search_by_date"
|
||||
_GITHUB_SEARCH_URL = "https://api.github.com/search/repositories"
|
||||
_WIKIPEDIA_API = "https://en.wikipedia.org/w/api.php"
|
||||
_STACKEXCHANGE_URL = "https://api.stackexchange.com/2.3/search"
|
||||
_GITLAB_SEARCH_URL = "https://gitlab.com/api/v4/projects"
|
||||
_NPM_SEARCH_URL = "https://registry.npmjs.org/-/v1/search"
|
||||
_PYPI_RSS_URL = "https://pypi.org/rss/updates.xml"
|
||||
_DOCKERHUB_SEARCH_URL = "https://hub.docker.com/v2/search/repositories/"
|
||||
_ARXIV_API = "https://export.arxiv.org/api/query"
|
||||
_LOBSTERS_SEARCH_URL = "https://lobste.rs/search"
|
||||
_DEVTO_API = "https://dev.to/api/articles"
|
||||
_MEDIUM_FEED_URL = "https://medium.com/feed/tag"
|
||||
_HUGGINGFACE_API = "https://huggingface.co/api/models"
|
||||
|
||||
# -- Module-level tracking ---------------------------------------------------
|
||||
|
||||
@@ -1125,6 +1136,503 @@ def _search_github(keyword: str) -> list[dict]:
|
||||
return results
|
||||
|
||||
|
||||
# -- Wikipedia search (blocking) --------------------------------------------
|
||||
|
||||
def _search_wikipedia(keyword: str) -> list[dict]:
|
||||
"""Search Wikipedia articles via public API. Blocking."""
|
||||
import urllib.parse
|
||||
|
||||
params = urllib.parse.urlencode({
|
||||
"action": "query", "list": "search", "srsearch": keyword,
|
||||
"srlimit": "25", "format": "json",
|
||||
})
|
||||
url = f"{_WIKIPEDIA_API}?{params}"
|
||||
|
||||
req = urllib.request.Request(url, method="GET")
|
||||
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
|
||||
|
||||
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||
raw = resp.read()
|
||||
resp.close()
|
||||
|
||||
data = json.loads(raw)
|
||||
results: list[dict] = []
|
||||
for item in (data.get("query") or {}).get("search") or []:
|
||||
title = item.get("title", "")
|
||||
pageid = str(item.get("pageid", ""))
|
||||
if not pageid:
|
||||
continue
|
||||
date = _parse_date(item.get("timestamp", ""))
|
||||
slug = title.replace(" ", "_")
|
||||
results.append({
|
||||
"id": pageid,
|
||||
"title": title,
|
||||
"url": f"https://en.wikipedia.org/wiki/{slug}",
|
||||
"date": date,
|
||||
"extra": "",
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
# -- Stack Exchange search (blocking) ---------------------------------------
|
||||
|
||||
def _search_stackexchange(keyword: str) -> list[dict]:
|
||||
"""Search Stack Overflow questions via public API. Blocking."""
|
||||
import gzip
|
||||
import io
|
||||
import urllib.parse
|
||||
|
||||
params = urllib.parse.urlencode({
|
||||
"order": "desc", "sort": "creation", "intitle": keyword,
|
||||
"site": "stackoverflow", "pagesize": "25",
|
||||
})
|
||||
url = f"{_STACKEXCHANGE_URL}?{params}"
|
||||
|
||||
req = urllib.request.Request(url, method="GET")
|
||||
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
|
||||
req.add_header("Accept-Encoding", "gzip")
|
||||
|
||||
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||
raw = resp.read()
|
||||
resp.close()
|
||||
|
||||
try:
|
||||
raw = gzip.GzipFile(fileobj=io.BytesIO(raw)).read()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
data = json.loads(raw)
|
||||
results: list[dict] = []
|
||||
for item in data.get("items") or []:
|
||||
qid = str(item.get("question_id", ""))
|
||||
if not qid:
|
||||
continue
|
||||
title = _strip_html(item.get("title", ""))
|
||||
link = item.get("link", "")
|
||||
score = item.get("score", 0)
|
||||
if score:
|
||||
title += f" [{score}v]"
|
||||
created = item.get("creation_date")
|
||||
date = ""
|
||||
if created:
|
||||
try:
|
||||
date = datetime.fromtimestamp(
|
||||
int(created), tz=timezone.utc,
|
||||
).strftime("%Y-%m-%d")
|
||||
except (ValueError, OSError):
|
||||
pass
|
||||
results.append({
|
||||
"id": qid, "title": title, "url": link,
|
||||
"date": date, "extra": "",
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
# -- GitLab search (blocking) ----------------------------------------------
|
||||
|
||||
def _search_gitlab(keyword: str) -> list[dict]:
|
||||
"""Search GitLab projects via public API. Blocking."""
|
||||
import urllib.parse
|
||||
|
||||
params = urllib.parse.urlencode({
|
||||
"search": keyword, "order_by": "updated_at",
|
||||
"sort": "desc", "per_page": "25",
|
||||
})
|
||||
url = f"{_GITLAB_SEARCH_URL}?{params}"
|
||||
|
||||
req = urllib.request.Request(url, method="GET")
|
||||
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
|
||||
|
||||
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||
raw = resp.read()
|
||||
resp.close()
|
||||
|
||||
data = json.loads(raw)
|
||||
results: list[dict] = []
|
||||
for repo in data if isinstance(data, list) else []:
|
||||
rid = str(repo.get("id", ""))
|
||||
if not rid:
|
||||
continue
|
||||
name = repo.get("path_with_namespace", "")
|
||||
description = repo.get("description") or ""
|
||||
web_url = repo.get("web_url", "")
|
||||
stars = repo.get("star_count", 0)
|
||||
title = name
|
||||
if description:
|
||||
title += f": {_truncate(description, 50)}"
|
||||
if stars:
|
||||
title += f" [{stars}*]"
|
||||
date = _parse_date(repo.get("last_activity_at", ""))
|
||||
results.append({
|
||||
"id": rid, "title": title, "url": web_url,
|
||||
"date": date, "extra": "",
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
# -- npm search (blocking) -------------------------------------------------
|
||||
|
||||
def _search_npm(keyword: str) -> list[dict]:
|
||||
"""Search npm packages via registry API. Blocking."""
|
||||
import urllib.parse
|
||||
|
||||
params = urllib.parse.urlencode({"text": keyword, "size": "25"})
|
||||
url = f"{_NPM_SEARCH_URL}?{params}"
|
||||
|
||||
req = urllib.request.Request(url, method="GET")
|
||||
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
|
||||
|
||||
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||
raw = resp.read()
|
||||
resp.close()
|
||||
|
||||
data = json.loads(raw)
|
||||
results: list[dict] = []
|
||||
for obj in data.get("objects") or []:
|
||||
pkg = obj.get("package") or {}
|
||||
name = pkg.get("name", "")
|
||||
if not name:
|
||||
continue
|
||||
description = pkg.get("description") or ""
|
||||
version = pkg.get("version", "")
|
||||
links = pkg.get("links") or {}
|
||||
npm_url = links.get("npm", f"https://www.npmjs.com/package/{name}")
|
||||
title = f"{name}@{version}" if version else name
|
||||
if description:
|
||||
title += f": {_truncate(description, 50)}"
|
||||
date = _parse_date(pkg.get("date", ""))
|
||||
results.append({
|
||||
"id": name, "title": title, "url": npm_url,
|
||||
"date": date, "extra": "",
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
# -- PyPI search (blocking) ------------------------------------------------
|
||||
|
||||
def _search_pypi(keyword: str) -> list[dict]:
|
||||
"""Search PyPI recent updates via RSS feed, filtered by keyword. Blocking."""
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
req = urllib.request.Request(_PYPI_RSS_URL, method="GET")
|
||||
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
|
||||
|
||||
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||
raw = resp.read()
|
||||
resp.close()
|
||||
|
||||
root = ET.fromstring(raw)
|
||||
kw_lower = keyword.lower()
|
||||
results: list[dict] = []
|
||||
for item in root.findall(".//item"):
|
||||
title = (item.findtext("title") or "").strip()
|
||||
link = (item.findtext("link") or "").strip()
|
||||
desc = (item.findtext("description") or "").strip()
|
||||
if not title or not link:
|
||||
continue
|
||||
if kw_lower not in title.lower() and kw_lower not in desc.lower():
|
||||
continue
|
||||
pkg_name = title.split()[0] if title else ""
|
||||
display = title
|
||||
if desc:
|
||||
display += f": {_truncate(desc, 50)}"
|
||||
results.append({
|
||||
"id": pkg_name or link,
|
||||
"title": display,
|
||||
"url": link,
|
||||
"date": "",
|
||||
"extra": "",
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
# -- Docker Hub search (blocking) ------------------------------------------
|
||||
|
||||
def _search_dockerhub(keyword: str) -> list[dict]:
|
||||
"""Search Docker Hub repositories via public API. Blocking."""
|
||||
import urllib.parse
|
||||
|
||||
params = urllib.parse.urlencode({"query": keyword, "page_size": "25"})
|
||||
url = f"{_DOCKERHUB_SEARCH_URL}?{params}"
|
||||
|
||||
req = urllib.request.Request(url, method="GET")
|
||||
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
|
||||
|
||||
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||
raw = resp.read()
|
||||
resp.close()
|
||||
|
||||
data = json.loads(raw)
|
||||
results: list[dict] = []
|
||||
for item in data.get("results") or []:
|
||||
name = item.get("repo_name", "")
|
||||
if not name:
|
||||
continue
|
||||
description = item.get("short_description") or ""
|
||||
stars = item.get("star_count", 0)
|
||||
title = name
|
||||
if description:
|
||||
title += f": {_truncate(description, 50)}"
|
||||
if stars:
|
||||
title += f" [{stars}*]"
|
||||
hub_url = (
|
||||
f"https://hub.docker.com/r/{name}" if "/" in name
|
||||
else f"https://hub.docker.com/_/{name}"
|
||||
)
|
||||
results.append({
|
||||
"id": name, "title": title, "url": hub_url,
|
||||
"date": "", "extra": "",
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
# -- arXiv search (blocking) -----------------------------------------------
|
||||
|
||||
def _search_arxiv(keyword: str) -> list[dict]:
|
||||
"""Search arXiv preprints via Atom API. Blocking."""
|
||||
import urllib.parse
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
params = urllib.parse.urlencode({
|
||||
"search_query": f"all:{keyword}",
|
||||
"sortBy": "submittedDate", "sortOrder": "descending",
|
||||
"max_results": "25",
|
||||
})
|
||||
url = f"{_ARXIV_API}?{params}"
|
||||
|
||||
req = urllib.request.Request(url, method="GET")
|
||||
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
|
||||
|
||||
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||
raw = resp.read()
|
||||
resp.close()
|
||||
|
||||
ns = {"a": "http://www.w3.org/2005/Atom"}
|
||||
root = ET.fromstring(raw)
|
||||
results: list[dict] = []
|
||||
for entry in root.findall("a:entry", ns):
|
||||
entry_id = (entry.findtext("a:id", "", ns) or "").strip()
|
||||
title = (entry.findtext("a:title", "", ns) or "").strip()
|
||||
title = " ".join(title.split()) # collapse whitespace
|
||||
published = entry.findtext("a:published", "", ns) or ""
|
||||
link_url = ""
|
||||
for link in entry.findall("a:link", ns):
|
||||
if link.get("type") == "text/html":
|
||||
link_url = link.get("href", "")
|
||||
break
|
||||
if not link_url:
|
||||
link_url = entry_id
|
||||
arxiv_id = entry_id.rsplit("/abs/", 1)[-1] if "/abs/" in entry_id else entry_id
|
||||
date = _parse_date(published)
|
||||
if title:
|
||||
results.append({
|
||||
"id": arxiv_id, "title": title, "url": link_url,
|
||||
"date": date, "extra": "",
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
# -- Lobsters search (blocking) --------------------------------------------
|
||||
|
||||
class _LobstersParser(HTMLParser):
|
||||
"""Extract story links from Lobsters search HTML."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.results: list[tuple[str, str]] = []
|
||||
self._in_link = False
|
||||
self._url = ""
|
||||
self._title_parts: list[str] = []
|
||||
|
||||
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
||||
if tag != "a":
|
||||
return
|
||||
attr_map = {k: (v or "") for k, v in attrs}
|
||||
cls = attr_map.get("class", "")
|
||||
if "u-url" in cls:
|
||||
self._in_link = True
|
||||
self._url = attr_map.get("href", "")
|
||||
self._title_parts = []
|
||||
|
||||
def handle_data(self, data: str) -> None:
|
||||
if self._in_link:
|
||||
self._title_parts.append(data)
|
||||
|
||||
def handle_endtag(self, tag: str) -> None:
|
||||
if tag == "a" and self._in_link:
|
||||
self._in_link = False
|
||||
title = "".join(self._title_parts).strip()
|
||||
if self._url and title:
|
||||
self.results.append((self._url, title))
|
||||
|
||||
|
||||
def _search_lobsters(keyword: str) -> list[dict]:
|
||||
"""Search Lobsters stories via HTML search page. Blocking."""
|
||||
import urllib.parse
|
||||
|
||||
params = urllib.parse.urlencode({
|
||||
"q": keyword, "what": "stories", "order": "newest",
|
||||
})
|
||||
url = f"{_LOBSTERS_SEARCH_URL}?{params}"
|
||||
|
||||
req = urllib.request.Request(url, method="GET")
|
||||
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
|
||||
|
||||
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||
raw = resp.read()
|
||||
resp.close()
|
||||
|
||||
html = raw.decode("utf-8", errors="replace")
|
||||
parser = _LobstersParser()
|
||||
parser.feed(html)
|
||||
|
||||
results: list[dict] = []
|
||||
seen_urls: set[str] = set()
|
||||
for item_url, title in parser.results:
|
||||
if item_url in seen_urls:
|
||||
continue
|
||||
seen_urls.add(item_url)
|
||||
results.append({
|
||||
"id": item_url,
|
||||
"title": title,
|
||||
"url": item_url,
|
||||
"date": "",
|
||||
"extra": "",
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
# -- DEV.to search (blocking) ----------------------------------------------
|
||||
|
||||
def _search_devto(keyword: str) -> list[dict]:
|
||||
"""Search DEV.to articles via public articles API. Blocking."""
|
||||
import urllib.parse
|
||||
|
||||
tag = re.sub(r"[^a-zA-Z0-9]", "", keyword).lower()
|
||||
params = urllib.parse.urlencode({"per_page": "25", "tag": tag})
|
||||
url = f"{_DEVTO_API}?{params}"
|
||||
|
||||
req = urllib.request.Request(url, method="GET")
|
||||
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
|
||||
|
||||
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||
raw = resp.read()
|
||||
resp.close()
|
||||
|
||||
data = json.loads(raw)
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
results: list[dict] = []
|
||||
for item in data:
|
||||
article_id = str(item.get("id", ""))
|
||||
if not article_id:
|
||||
continue
|
||||
title = item.get("title", "")
|
||||
article_url = item.get("url", "")
|
||||
user = item.get("user", {})
|
||||
if isinstance(user, dict):
|
||||
author = user.get("username", "")
|
||||
else:
|
||||
author = ""
|
||||
if author:
|
||||
title = f"{author}: {title}"
|
||||
date = _parse_date(item.get("published_at", ""))
|
||||
results.append({
|
||||
"id": article_id, "title": title, "url": article_url,
|
||||
"date": date, "extra": "",
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
# -- Medium tag feed search (blocking) -------------------------------------
|
||||
|
||||
def _search_medium(keyword: str) -> list[dict]:
|
||||
"""Search Medium via tag RSS feed. Blocking."""
|
||||
import urllib.parse
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
tag = re.sub(r"[^a-zA-Z0-9-]", "-", keyword).lower().strip("-")
|
||||
if not tag:
|
||||
return []
|
||||
url = f"{_MEDIUM_FEED_URL}/{urllib.parse.quote(tag, safe='')}"
|
||||
|
||||
req = urllib.request.Request(url, method="GET")
|
||||
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
|
||||
|
||||
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||
raw = resp.read()
|
||||
resp.close()
|
||||
|
||||
root = ET.fromstring(raw)
|
||||
results: list[dict] = []
|
||||
for item in root.iter("item"):
|
||||
title = (item.findtext("title") or "").strip()
|
||||
link = (item.findtext("link") or "").strip()
|
||||
if not link:
|
||||
continue
|
||||
guid = (item.findtext("guid") or link).strip()
|
||||
creator = item.findtext("{http://purl.org/dc/elements/1.1/}creator") or ""
|
||||
if creator:
|
||||
title = f"{creator}: {title}"
|
||||
pub_date = item.findtext("pubDate") or ""
|
||||
date = _parse_date(pub_date)
|
||||
if not date and pub_date:
|
||||
from email.utils import parsedate_to_datetime
|
||||
try:
|
||||
dt = parsedate_to_datetime(pub_date)
|
||||
date = dt.strftime("%Y-%m-%d")
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
results.append({
|
||||
"id": guid, "title": title, "url": link,
|
||||
"date": date, "extra": "",
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
# -- Hugging Face search (blocking) ----------------------------------------
|
||||
|
||||
def _search_huggingface(keyword: str) -> list[dict]:
|
||||
"""Search Hugging Face models via public API. Blocking."""
|
||||
import urllib.parse
|
||||
|
||||
params = urllib.parse.urlencode({
|
||||
"search": keyword, "sort": "lastModified",
|
||||
"direction": "-1", "limit": "25",
|
||||
})
|
||||
url = f"{_HUGGINGFACE_API}?{params}"
|
||||
|
||||
req = urllib.request.Request(url, method="GET")
|
||||
req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)")
|
||||
|
||||
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
|
||||
raw = resp.read()
|
||||
resp.close()
|
||||
|
||||
data = json.loads(raw)
|
||||
results: list[dict] = []
|
||||
for model in data if isinstance(data, list) else []:
|
||||
model_id = model.get("modelId") or model.get("id", "")
|
||||
if not model_id:
|
||||
continue
|
||||
downloads = model.get("downloads", 0)
|
||||
likes = model.get("likes", 0)
|
||||
title = model_id
|
||||
if downloads:
|
||||
title += f" [{downloads} dl]"
|
||||
elif likes:
|
||||
title += f" [{likes} likes]"
|
||||
date = _parse_date(model.get("lastModified", ""))
|
||||
results.append({
|
||||
"id": model_id,
|
||||
"title": title,
|
||||
"url": f"https://huggingface.co/{model_id}",
|
||||
"date": date,
|
||||
"extra": "",
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
# -- Backend registry -------------------------------------------------------
|
||||
|
||||
_BACKENDS: dict[str, callable] = {
|
||||
@@ -1144,6 +1652,17 @@ _BACKENDS: dict[str, callable] = {
|
||||
"ia": _search_archive,
|
||||
"hn": _search_hackernews,
|
||||
"gh": _search_github,
|
||||
"wp": _search_wikipedia,
|
||||
"se": _search_stackexchange,
|
||||
"gl": _search_gitlab,
|
||||
"nm": _search_npm,
|
||||
"pp": _search_pypi,
|
||||
"dh": _search_dockerhub,
|
||||
"ax": _search_arxiv,
|
||||
"lb": _search_lobsters,
|
||||
"dv": _search_devto,
|
||||
"md": _search_medium,
|
||||
"hf": _search_huggingface,
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user