tools: add ppf-status cluster overview

2026-02-18 01:02:42 +01:00
parent 304830e382
commit 04fb362181
3 changed files with 255 additions and 0 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -42,6 +42,7 @@ tools/
  ppf-logs                     view container logs
  ppf-service                  manage containers (status/start/stop/restart)
  ppf-db                       database operations (stats/purge-proxies/vacuum)
  ppf-status                   cluster overview (containers, workers, queue)
  playbooks/
    deploy.yml                 ansible playbook (sync, compose, restart)
    inventory.ini              hosts with WireGuard IPs + SSH key
@@ -107,6 +108,13 @@ ppf-db purge-proxies        # stop odin, delete all proxies, restart
 ppf-db vacuum               # reclaim disk space
 ```
 ### Cluster Status
 ```bash
 ppf-status                  # full overview: containers, DB, workers, queue
 ppf-status --json           # raw JSON from odin API
 ```
 ### Direct Ansible (for operations not covered by tools)
 Use the toolkit inventory for ad-hoc commands over WireGuard:
--- a/README.md
+++ b/README.md
@@ -227,6 +227,7 @@ ppf-deploy --check             # dry run with diff
 ppf-logs [node]                # view container logs (-f to follow)
 ppf-service <cmd> [nodes...]   # status / start / stop / restart
 ppf-db <cmd>                   # stats / purge-proxies / vacuum
 ppf-status                     # cluster overview (containers, workers, queue)
 ```
 See `--help` on each tool.
--- a/tools/ppf-status
+++ b/tools/ppf-status
@@ -0,0 +1,246 @@
 #!/bin/bash
 # ppf-status -- PPF cluster overview
 #
 # Usage:
 #   ppf-status [options]
 set -eu
 # Resolve to real path (handles symlinks from ~/.local/bin/)
 SCRIPT_PATH="$(cd "$(dirname "$0")" && pwd)/$(basename "$0")"
 SCRIPT_DIR="$(dirname "$(readlink -f "$SCRIPT_PATH")")"
 # shellcheck disable=SC1091
 . "$SCRIPT_DIR/lib/ppf-common.sh"
 ODIN_URL="http://127.0.0.1:8081"
 PROXY_DB="/home/podman/ppf/data/proxies.sqlite"
 URL_DB="/home/podman/ppf/data/websites.sqlite"
 # ---------------------------------------------------------------------------
 # Usage
 # ---------------------------------------------------------------------------
 usage() {
    cat <<EOF
 Usage: ppf-status [options]
 Show PPF cluster overview.
 Options:
  --json       raw JSON from API
  --help       show this help
  --version    show version
 Displays:
  - Container health per node
  - Worker stats (tested, working, rate, active)
  - Odin manager stats (verification, queue)
  - Database counts (proxies, URLs)
 EOF
    exit 0
 }
 # ---------------------------------------------------------------------------
 # Parse args
 # ---------------------------------------------------------------------------
 RAW_JSON=0
 while [ $# -gt 0 ]; do
    case "$1" in
        --help|-h)    usage ;;
        --version|-V) echo "ppf-status $PPF_TOOLS_VERSION"; exit 0 ;;
        --json)       RAW_JSON=1 ;;
        -*)           die "Unknown option: $1" ;;
        *)            die "Unknown argument: $1" ;;
    esac
    shift
 done
 # ---------------------------------------------------------------------------
 # Fetch API data from odin (run on odin via curl to localhost)
 # ---------------------------------------------------------------------------
 api_json=$(ansible_cmd "$MASTER" -m raw -a \
    "curl -sf --max-time 5 ${ODIN_URL}/api/workers 2>/dev/null || echo '{}'" \
    2>/dev/null | sed 's/Shared connection.*closed\.\?//; /^\s*$/d; /^odin/d; /CHANGED/d; /SUCCESS/d')
 if [ "$RAW_JSON" -eq 1 ]; then
    echo "$api_json"
    exit 0
 fi
 # Check if we got valid data
 if ! echo "$api_json" | python3 -c "import sys,json; json.load(sys.stdin)" 2>/dev/null; then
    die "Failed to fetch API data from odin"
 fi
 # ---------------------------------------------------------------------------
 # Container health
 # ---------------------------------------------------------------------------
 section "Containers"
 for host in $ALL_HOSTS; do
    output=$(compose_cmd "$host" "ps" 2>/dev/null) || true
    if echo "$output" | grep -qi "up\|running"; then
        log_ok "$host"
    elif echo "$output" | grep -qi "exit"; then
        log_err "$host (exited)"
    else
        log_warn "$host (unknown)"
    fi
 done
 # ---------------------------------------------------------------------------
 # Database summary (quick counts from odin)
 # ---------------------------------------------------------------------------
 section "Database"
 proxy_count=$(ansible_cmd "$MASTER" -m raw -a \
    "sudo -u podman sqlite3 '$PROXY_DB' 'SELECT COUNT(*) FROM proxylist;'" 2>/dev/null \
    | sed 's/Shared connection.*//; /^\s*$/d; /^odin/d; /CHANGED/d; /SUCCESS/d' || echo '?')
 working_count=$(ansible_cmd "$MASTER" -m raw -a \
    "sudo -u podman sqlite3 '$PROXY_DB' 'SELECT COUNT(*) FROM proxylist WHERE failed=0 AND proto IS NOT NULL;'" 2>/dev/null \
    | sed 's/Shared connection.*//; /^\s*$/d; /^odin/d; /CHANGED/d; /SUCCESS/d' || echo '?')
 url_count=$(ansible_cmd "$MASTER" -m raw -a \
    "sudo -u podman sqlite3 '$URL_DB' 'SELECT COUNT(*) FROM uris;'" 2>/dev/null \
    | sed 's/Shared connection.*//; /^\s*$/d; /^odin/d; /CHANGED/d; /SUCCESS/d' || echo '?')
 log_info "Proxies: ${proxy_count} total, ${working_count} working"
 log_info "URLs: ${url_count}"
 # ---------------------------------------------------------------------------
 # Parse and display via Python for clean formatting
 # ---------------------------------------------------------------------------
 echo "$api_json" | python3 -c "
 import sys, json
 NO_COLOR = __import__('os').environ.get('NO_COLOR', '')
 # Colors
 if not NO_COLOR and sys.stdout.isatty():
    RST = '\033[0m'
    DIM = '\033[2m'
    BOLD = '\033[1m'
    RED = '\033[38;5;167m'
    GREEN = '\033[38;5;114m'
    YELLOW = '\033[38;5;180m'
    BLUE = '\033[38;5;110m'
    CYAN = '\033[38;5;116m'
 else:
    RST = DIM = BOLD = RED = GREEN = YELLOW = BLUE = CYAN = ''
 def ok(s):   return GREEN + s + RST
 def err(s):  return RED + s + RST
 def warn(s): return YELLOW + s + RST
 def dim(s):  return DIM + s + RST
 def bold(s): return BOLD + CYAN + s + RST
 try:
    data = json.load(sys.stdin)
 except:
    sys.exit(0)
 workers = data.get('workers', [])
 summary = data.get('summary', {})
 queue = data.get('queue', {})
 manager = data.get('manager', {})
 # Workers table
 print()
 print(bold('  Workers'))
 if workers:
    # Header
    print(dim('  %-12s %7s %9s %9s %7s %6s  %s' % (
        'NAME', 'TESTED', 'WORKING', 'FAILED', 'RATE', 'ACT', 'STATUS')))
    for w in sorted(workers, key=lambda x: x.get('name', '')):
        name = w.get('name', w.get('ip', '?'))
        tested = w.get('proxies_tested', 0)
        working = w.get('proxies_working', 0)
        failed = w.get('proxies_failed', 0)
        rate = w.get('success_rate', 0)
        active = w.get('active', False)
        threads = w.get('threads', 0)
        # Format numbers compactly
        def fmt(n):
            if n >= 1000000: return '%.1fM' % (n / 1000000)
            if n >= 1000: return '%.1fk' % (n / 1000)
            return str(n)
        act_str = ok('yes') if active else err('no')
        if rate >= 30:
            rate_str = ok('%.1f%%' % rate)
        elif rate >= 10:
            rate_str = warn('%.1f%%' % rate)
        else:
            rate_str = err('%.1f%%' % rate)
        age = w.get('age', 0)
        if age > 300 and not active:
            status = err('stale (%dm)' % (age // 60))
        elif active:
            status = ok('testing')
        else:
            status = dim('idle')
        print('  %-12s %7s %9s %9s %7s %6s  %s' % (
            name, fmt(tested), fmt(working), fmt(failed),
            rate_str, act_str, status))
    # Summary line
    total_t = summary.get('total_tested', 0)
    total_w = summary.get('total_working', 0)
    total_f = summary.get('total_failed', 0)
    overall = summary.get('overall_success_rate', 0)
    active_count = data.get('active', 0)
    total_count = data.get('total', 0)
    print(dim('  %-12s %7s %9s %9s %7s %6s' % (
        'TOTAL',
        fmt(total_t) if total_t else '-',
        fmt(total_w) if total_w else '-',
        fmt(total_f) if total_f else '-',
        '%.1f%%' % overall,
        '%d/%d' % (active_count, total_count))))
 else:
    print(err('  no workers connected'))
 # Manager (odin verification)
 if manager:
    print()
    print(bold('  Odin Verification'))
    m_rate = manager.get('success_rate', 0)
    m_tested = manager.get('tested', 0)
    m_passed = manager.get('passed', 0)
    m_threads = manager.get('threads', 0)
    m_speed = manager.get('rate', 0)
    m_queue = manager.get('queue_size', 0)
    m_uptime = manager.get('uptime', 0)
    def fmt_time(s):
        if s >= 3600: return '%dh%dm' % (s // 3600, (s % 3600) // 60)
        if s >= 60: return '%dm%ds' % (s // 60, s % 60)
        return '%ds' % s
    if m_rate >= 30:
        rate_str = ok('%.1f%%' % m_rate)
    elif m_rate >= 10:
        rate_str = warn('%.1f%%' % m_rate)
    else:
        rate_str = err('%.1f%%' % m_rate)
    print('  threads: %d  rate: %.2f/s  uptime: %s' % (m_threads, m_speed, fmt_time(m_uptime)))
    print('  tested: %s  passed: %s  success: %s' % (fmt(m_tested), fmt(m_passed), rate_str))
    print('  queue: %d jobs' % m_queue)
 # Queue
 if queue:
    print()
    print(bold('  Proxy Queue'))
    print('  total: %d  due: %d  pending: %d  claimed: %d' % (
        queue.get('total', 0), queue.get('due', 0),
        queue.get('pending', 0), queue.get('claimed', 0)))
    sess_tested = queue.get('session_tested', 0)
    sess_pct = queue.get('session_pct', 0)
    if sess_tested:
        print('  session: %s tested (%.1f%%)' % (fmt(sess_tested), sess_pct))
 print()
 "