diff --git a/docs/CHEATSHEET.md b/docs/CHEATSHEET.md index fb58640..e789428 100644 --- a/docs/CHEATSHEET.md +++ b/docs/CHEATSHEET.md @@ -345,13 +345,13 @@ No API credentials needed (uses public GQL endpoint). !alert history [n] # Show recent results (default 5) ``` -Searches keywords across 14 backends: YouTube (yt), Twitch (tw), SearXNG (sx), +Searches keywords across 16 backends: YouTube (yt), Twitch (tw), SearXNG (sx), Reddit (rd), Mastodon (ft), DuckDuckGo (dg), Google News (gn), Kick (kk), Dailymotion (dm), PeerTube (pt), Bluesky (bs), Lemmy (ly), Odysee (od), -Archive.org (ia). Names: lowercase alphanumeric + hyphens, 1-20 chars. Keywords: -1-100 chars. Max 20 alerts/channel. Polls every 5min. Format: `[name/yt] Title -- URL`, -etc. No API credentials needed. Persists across restarts. History stored in -`data/alert_history.db`. +Archive.org (ia), Hacker News (hn), GitHub (gh). Names: lowercase alphanumeric + +hyphens, 1-20 chars. Keywords: 1-100 chars. Max 20 alerts/channel. Polls every +5min. Format: `[name/yt] Title -- URL`, etc. No API credentials needed. Persists +across restarts. History stored in `data/alert_history.db`. ## SearX diff --git a/docs/USAGE.md b/docs/USAGE.md index a7110d6..c89ef57 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -696,13 +696,16 @@ Platforms searched: - **Lemmy** (`ly`) -- Federated post search across 4 instances (no auth required) - **Odysee** (`od`) -- LBRY JSON-RPC claim search: video, audio, documents (no auth required) - **Archive.org** (`ia`) -- Internet Archive advanced search, sorted by date (no auth required) +- **Hacker News** (`hn`) -- Algolia search API, sorted by date (no auth required) +- **GitHub** (`gh`) -- Repository search API, sorted by recently updated (no auth required) Polling and announcements: - Alerts are polled every 5 minutes by default - On `add`, existing results are recorded without announcing (prevents flood) - New results announced as `[name/] Title -- URL` where tag is one of: - `yt`, `tw`, `sx`, `rd`, `ft`, `dg`, `gn`, `kk`, `dm`, `pt`, `bs`, `ly`, `od`, `ia` + `yt`, `tw`, `sx`, `rd`, `ft`, `dg`, `gn`, `kk`, `dm`, `pt`, `bs`, `ly`, `od`, `ia`, + `hn`, `gh` - Titles are truncated to 80 characters - Each platform maintains its own seen list (capped at 200 per platform) - 5 consecutive errors doubles the poll interval (max 1 hour) diff --git a/plugins/alert.py b/plugins/alert.py index c526f97..1ae8b53 100644 --- a/plugins/alert.py +++ b/plugins/alert.py @@ -61,6 +61,8 @@ _LEMMY_INSTANCES = [ _LEMMY_TIMEOUT = 4 _ODYSEE_API = "https://api.na-backend.odysee.com/api/v1/proxy" _ARCHIVE_SEARCH_URL = "https://archive.org/advancedsearch.php" +_HN_SEARCH_URL = "https://hn.algolia.com/api/v1/search_by_date" +_GITHUB_SEARCH_URL = "https://api.github.com/search/repositories" # -- Module-level tracking --------------------------------------------------- @@ -1002,6 +1004,92 @@ def _search_archive(keyword: str) -> list[dict]: return results +# -- Hacker News search (blocking) ------------------------------------------ + +def _search_hackernews(keyword: str) -> list[dict]: + """Search Hacker News via Algolia API, sorted by date. Blocking.""" + import urllib.parse + + params = urllib.parse.urlencode({ + "query": keyword, "tags": "story", "hitsPerPage": "25", + }) + url = f"{_HN_SEARCH_URL}?{params}" + + req = urllib.request.Request(url, method="GET") + req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)") + + resp = _urlopen(req, timeout=_FETCH_TIMEOUT) + raw = resp.read() + resp.close() + + data = json.loads(raw) + results: list[dict] = [] + for hit in data.get("hits") or []: + object_id = hit.get("objectID", "") + if not object_id: + continue + title = hit.get("title", "") + # External URL if available, otherwise HN discussion link + item_url = hit.get("url") or f"https://news.ycombinator.com/item?id={object_id}" + date = _parse_date(hit.get("created_at", "")) + points = hit.get("points") + if points: + title += f" ({points}pts)" + results.append({ + "id": object_id, + "title": title, + "url": item_url, + "date": date, + "extra": "", + }) + return results + + +# -- GitHub search (blocking) ----------------------------------------------- + +def _search_github(keyword: str) -> list[dict]: + """Search GitHub repositories via public API. Blocking.""" + import urllib.parse + + params = urllib.parse.urlencode({ + "q": keyword, "sort": "updated", "order": "desc", "per_page": "25", + }) + url = f"{_GITHUB_SEARCH_URL}?{params}" + + req = urllib.request.Request(url, method="GET") + req.add_header("Accept", "application/vnd.github+json") + req.add_header("User-Agent", "Mozilla/5.0 (compatible; derp-bot)") + + resp = _urlopen(req, timeout=_FETCH_TIMEOUT) + raw = resp.read() + resp.close() + + data = json.loads(raw) + results: list[dict] = [] + for repo in data.get("items") or []: + repo_id = str(repo.get("id", "")) + if not repo_id: + continue + full_name = repo.get("full_name", "") + description = repo.get("description") or "" + html_url = repo.get("html_url", "") + stars = repo.get("stargazers_count", 0) + title = full_name + if description: + title += f": {_truncate(description, 50)}" + if stars: + title += f" [{stars}*]" + date = _parse_date(repo.get("updated_at", "")) + results.append({ + "id": repo_id, + "title": title, + "url": html_url, + "date": date, + "extra": "", + }) + return results + + # -- Backend registry ------------------------------------------------------- _BACKENDS: dict[str, callable] = { @@ -1019,6 +1107,8 @@ _BACKENDS: dict[str, callable] = { "ly": _search_lemmy, "od": _search_odysee, "ia": _search_archive, + "hn": _search_hackernews, + "gh": _search_github, }