diff --git a/CLAUDE.md b/CLAUDE.md index 4b40bb8..3c2ffaf 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -42,6 +42,7 @@ tools/ ppf-logs view container logs ppf-service manage containers (status/start/stop/restart) ppf-db database operations (stats/purge-proxies/vacuum) + ppf-status cluster overview (containers, workers, queue) playbooks/ deploy.yml ansible playbook (sync, compose, restart) inventory.ini hosts with WireGuard IPs + SSH key @@ -107,6 +108,13 @@ ppf-db purge-proxies # stop odin, delete all proxies, restart ppf-db vacuum # reclaim disk space ``` +### Cluster Status + +```bash +ppf-status # full overview: containers, DB, workers, queue +ppf-status --json # raw JSON from odin API +``` + ### Direct Ansible (for operations not covered by tools) Use the toolkit inventory for ad-hoc commands over WireGuard: diff --git a/README.md b/README.md index bf760fc..5844c64 100644 --- a/README.md +++ b/README.md @@ -227,6 +227,7 @@ ppf-deploy --check # dry run with diff ppf-logs [node] # view container logs (-f to follow) ppf-service [nodes...] # status / start / stop / restart ppf-db # stats / purge-proxies / vacuum +ppf-status # cluster overview (containers, workers, queue) ``` See `--help` on each tool. diff --git a/tools/ppf-status b/tools/ppf-status new file mode 100755 index 0000000..7b14881 --- /dev/null +++ b/tools/ppf-status @@ -0,0 +1,246 @@ +#!/bin/bash +# ppf-status -- PPF cluster overview +# +# Usage: +# ppf-status [options] + +set -eu + +# Resolve to real path (handles symlinks from ~/.local/bin/) +SCRIPT_PATH="$(cd "$(dirname "$0")" && pwd)/$(basename "$0")" +SCRIPT_DIR="$(dirname "$(readlink -f "$SCRIPT_PATH")")" +# shellcheck disable=SC1091 +. "$SCRIPT_DIR/lib/ppf-common.sh" + +ODIN_URL="http://127.0.0.1:8081" +PROXY_DB="/home/podman/ppf/data/proxies.sqlite" +URL_DB="/home/podman/ppf/data/websites.sqlite" + +# --------------------------------------------------------------------------- +# Usage +# --------------------------------------------------------------------------- +usage() { + cat </dev/null || echo '{}'" \ + 2>/dev/null | sed 's/Shared connection.*closed\.\?//; /^\s*$/d; /^odin/d; /CHANGED/d; /SUCCESS/d') + +if [ "$RAW_JSON" -eq 1 ]; then + echo "$api_json" + exit 0 +fi + +# Check if we got valid data +if ! echo "$api_json" | python3 -c "import sys,json; json.load(sys.stdin)" 2>/dev/null; then + die "Failed to fetch API data from odin" +fi + +# --------------------------------------------------------------------------- +# Container health +# --------------------------------------------------------------------------- +section "Containers" + +for host in $ALL_HOSTS; do + output=$(compose_cmd "$host" "ps" 2>/dev/null) || true + if echo "$output" | grep -qi "up\|running"; then + log_ok "$host" + elif echo "$output" | grep -qi "exit"; then + log_err "$host (exited)" + else + log_warn "$host (unknown)" + fi +done + +# --------------------------------------------------------------------------- +# Database summary (quick counts from odin) +# --------------------------------------------------------------------------- +section "Database" + +proxy_count=$(ansible_cmd "$MASTER" -m raw -a \ + "sudo -u podman sqlite3 '$PROXY_DB' 'SELECT COUNT(*) FROM proxylist;'" 2>/dev/null \ + | sed 's/Shared connection.*//; /^\s*$/d; /^odin/d; /CHANGED/d; /SUCCESS/d' || echo '?') +working_count=$(ansible_cmd "$MASTER" -m raw -a \ + "sudo -u podman sqlite3 '$PROXY_DB' 'SELECT COUNT(*) FROM proxylist WHERE failed=0 AND proto IS NOT NULL;'" 2>/dev/null \ + | sed 's/Shared connection.*//; /^\s*$/d; /^odin/d; /CHANGED/d; /SUCCESS/d' || echo '?') +url_count=$(ansible_cmd "$MASTER" -m raw -a \ + "sudo -u podman sqlite3 '$URL_DB' 'SELECT COUNT(*) FROM uris;'" 2>/dev/null \ + | sed 's/Shared connection.*//; /^\s*$/d; /^odin/d; /CHANGED/d; /SUCCESS/d' || echo '?') + +log_info "Proxies: ${proxy_count} total, ${working_count} working" +log_info "URLs: ${url_count}" + +# --------------------------------------------------------------------------- +# Parse and display via Python for clean formatting +# --------------------------------------------------------------------------- +echo "$api_json" | python3 -c " +import sys, json + +NO_COLOR = __import__('os').environ.get('NO_COLOR', '') + +# Colors +if not NO_COLOR and sys.stdout.isatty(): + RST = '\033[0m' + DIM = '\033[2m' + BOLD = '\033[1m' + RED = '\033[38;5;167m' + GREEN = '\033[38;5;114m' + YELLOW = '\033[38;5;180m' + BLUE = '\033[38;5;110m' + CYAN = '\033[38;5;116m' +else: + RST = DIM = BOLD = RED = GREEN = YELLOW = BLUE = CYAN = '' + +def ok(s): return GREEN + s + RST +def err(s): return RED + s + RST +def warn(s): return YELLOW + s + RST +def dim(s): return DIM + s + RST +def bold(s): return BOLD + CYAN + s + RST + +try: + data = json.load(sys.stdin) +except: + sys.exit(0) + +workers = data.get('workers', []) +summary = data.get('summary', {}) +queue = data.get('queue', {}) +manager = data.get('manager', {}) + +# Workers table +print() +print(bold(' Workers')) +if workers: + # Header + print(dim(' %-12s %7s %9s %9s %7s %6s %s' % ( + 'NAME', 'TESTED', 'WORKING', 'FAILED', 'RATE', 'ACT', 'STATUS'))) + for w in sorted(workers, key=lambda x: x.get('name', '')): + name = w.get('name', w.get('ip', '?')) + tested = w.get('proxies_tested', 0) + working = w.get('proxies_working', 0) + failed = w.get('proxies_failed', 0) + rate = w.get('success_rate', 0) + active = w.get('active', False) + threads = w.get('threads', 0) + + # Format numbers compactly + def fmt(n): + if n >= 1000000: return '%.1fM' % (n / 1000000) + if n >= 1000: return '%.1fk' % (n / 1000) + return str(n) + + act_str = ok('yes') if active else err('no') + if rate >= 30: + rate_str = ok('%.1f%%' % rate) + elif rate >= 10: + rate_str = warn('%.1f%%' % rate) + else: + rate_str = err('%.1f%%' % rate) + + age = w.get('age', 0) + if age > 300 and not active: + status = err('stale (%dm)' % (age // 60)) + elif active: + status = ok('testing') + else: + status = dim('idle') + + print(' %-12s %7s %9s %9s %7s %6s %s' % ( + name, fmt(tested), fmt(working), fmt(failed), + rate_str, act_str, status)) + + # Summary line + total_t = summary.get('total_tested', 0) + total_w = summary.get('total_working', 0) + total_f = summary.get('total_failed', 0) + overall = summary.get('overall_success_rate', 0) + active_count = data.get('active', 0) + total_count = data.get('total', 0) + print(dim(' %-12s %7s %9s %9s %7s %6s' % ( + 'TOTAL', + fmt(total_t) if total_t else '-', + fmt(total_w) if total_w else '-', + fmt(total_f) if total_f else '-', + '%.1f%%' % overall, + '%d/%d' % (active_count, total_count)))) +else: + print(err(' no workers connected')) + +# Manager (odin verification) +if manager: + print() + print(bold(' Odin Verification')) + m_rate = manager.get('success_rate', 0) + m_tested = manager.get('tested', 0) + m_passed = manager.get('passed', 0) + m_threads = manager.get('threads', 0) + m_speed = manager.get('rate', 0) + m_queue = manager.get('queue_size', 0) + m_uptime = manager.get('uptime', 0) + + def fmt_time(s): + if s >= 3600: return '%dh%dm' % (s // 3600, (s % 3600) // 60) + if s >= 60: return '%dm%ds' % (s // 60, s % 60) + return '%ds' % s + + if m_rate >= 30: + rate_str = ok('%.1f%%' % m_rate) + elif m_rate >= 10: + rate_str = warn('%.1f%%' % m_rate) + else: + rate_str = err('%.1f%%' % m_rate) + + print(' threads: %d rate: %.2f/s uptime: %s' % (m_threads, m_speed, fmt_time(m_uptime))) + print(' tested: %s passed: %s success: %s' % (fmt(m_tested), fmt(m_passed), rate_str)) + print(' queue: %d jobs' % m_queue) + +# Queue +if queue: + print() + print(bold(' Proxy Queue')) + print(' total: %d due: %d pending: %d claimed: %d' % ( + queue.get('total', 0), queue.get('due', 0), + queue.get('pending', 0), queue.get('claimed', 0))) + sess_tested = queue.get('session_tested', 0) + sess_pct = queue.get('session_pct', 0) + if sess_tested: + print(' session: %s tested (%.1f%%)' % (fmt(sess_tested), sess_pct)) + +print() +"