dbs: expand seed sources to 111 URLs

Add 21 new proxy source URLs: missing protocol variants from existing repos, 4 new GitHub repos, openproxylist.xyz and spys.me APIs, 5 web scraper targets, 2 Telegram channels.
add compose-based test runner for Python 2.7
2026-02-22 17:14:47 +01:00 · 2026-02-22 15:38:00 +01:00 · 2026-02-22 15:37:54 +01:00 · 2026-02-22 15:37:50 +01:00 · 2026-02-22 15:37:43 +01:00 · 2026-02-22 13:58:37 +01:00
35 changed files with 4313 additions and 2142 deletions
--- a/.gitea/workflows/ci.yml
+++ b/.gitea/workflows/ci.yml
@@ -8,11 +8,14 @@ on:
  workflow_dispatch:

 jobs:
-  syntax-check:
+  validate:
    runs-on: dotfiles
+    container:
+      image: python:3-slim
    steps:
      - name: Checkout
        run: |
+          apt-get update -qq && apt-get install -y -qq git >/dev/null
          git clone --depth 1 --branch "${GITHUB_REF_NAME}" \
            "https://oauth2:${{ github.token }}@${GITHUB_SERVER_URL#https://}/${GITHUB_REPOSITORY}.git" .

@@ -30,70 +33,30 @@ jobs:
          done
          exit $failed

-  memory-leak-check:
-    runs-on: dotfiles
-    container:
-      image: python:3-slim
-    steps:
-      - name: Checkout
+      - name: Import validation
        run: |
-          apt-get update && apt-get install -y git
-          git clone --depth 1 --branch "${GITHUB_REF_NAME}" \
-            "https://oauth2:${{ github.token }}@${GITHUB_SERVER_URL#https://}/${GITHUB_REPOSITORY}.git" .
-
-      - name: Check for memory leak patterns
-        run: |
-          echo "Scanning for common memory leak patterns..."
+          echo "Verifying module imports..."
          failed=0
-
-          # Check for unbounded list/dict growth without limits
-          echo "Checking for unbounded collections..."
-          for f in ppf.py proxywatchd.py scraper.py httpd.py; do
-            if [ -f "$f" ]; then
-              # Look for .append() without corresponding size limits
-              if grep -n "\.append(" "$f" | grep -v "# bounded" | grep -v "_max\|max_\|limit\|[:]\|pop(" > /tmp/unbounded 2>/dev/null; then
-                count=$(wc -l < /tmp/unbounded)
-                if [ "$count" -gt 20 ]; then
-                  echo "WARN $f: $count potential unbounded appends"
-                fi
-              fi
+          for mod in comboparse config dbs job misc mysqlite network_stats stats translations; do
+            if python3 -c "import sys; sys.path.insert(0,'.'); import $mod; print('OK $mod')"; then
+              :
+            else
+              echo "FAIL $mod"
+              failed=1
            fi
          done
+          exit $failed

-          # Check for circular references
-          echo "Checking for potential circular references..."
-          for f in ppf.py proxywatchd.py scraper.py httpd.py connection_pool.py; do
-            if [ -f "$f" ]; then
-              if grep -n "self\.\w* = self" "$f" 2>/dev/null; then
-                echo "WARN $f: potential self-reference"
-              fi
-            fi
-          done
-
-          # Check for __del__ methods (often problematic)
-          echo "Checking for __del__ methods..."
-          for f in *.py; do
-            if grep -n "def __del__" "$f" 2>/dev/null; then
-              echo "WARN $f: has __del__ method (may cause leaks)"
-            fi
-          done
-
-          # Check that gc is imported where needed
-          echo "Checking gc module usage..."
-          for f in proxywatchd.py httpd.py; do
-            if [ -f "$f" ]; then
-              if ! grep -q "^import gc" "$f" && ! grep -q "^from gc" "$f"; then
-                echo "INFO $f: gc module not imported"
-              fi
-            fi
-          done
-
-          echo "Memory leak pattern scan complete"
-
-      - name: Static import check
+      - name: YAML lint
        run: |
-          echo "Verifying imports..."
-          python3 -c "import sys; sys.path.insert(0,'.'); import config; print('OK config')" || echo "FAIL config"
-          python3 -c "import sys; sys.path.insert(0,'.'); import misc; print('OK misc')" || echo "FAIL misc"
-          python3 -c "import sys; sys.path.insert(0,'.'); import mysqlite; print('OK mysqlite')" || echo "FAIL mysqlite"
-
+          echo "Checking YAML files for tabs..."
+          failed=0
+          for f in compose.master.yml compose.worker.yml .gitea/workflows/ci.yml; do
+            if grep -qP '\t' "$f"; then
+              echo "FAIL $f: contains tabs"
+              failed=1
+            else
+              echo "OK $f"
+            fi
+          done
+          exit $failed
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,5 @@ __pycache__/
 *.sqlite-shm
 *.sqlite-wal
 .claude/
+.venv/
 data/
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -6,17 +6,17 @@
 ┌──────────┬─────────────┬────────────────────────────────────────────────────────┐
 │ Host     │ Role        │ Notes
 ├──────────┼─────────────┼────────────────────────────────────────────────────────┤
-│ odin     │ Master      │ Scrapes proxy lists, verifies conflicts, port 8081
-│ forge    │ Worker      │ Tests proxies, reports to master via WireGuard
-│ hermes   │ Worker      │ Tests proxies, reports to master via WireGuard
-│ janus    │ Worker      │ Tests proxies, reports to master via WireGuard
+│ odin     │ Master      │ API server + SSL-only proxy verification, port 8081
+│ cassius  │ Worker      │ Tests proxies, reports to master via WireGuard
+│ edge     │ Worker      │ Tests proxies, reports to master via WireGuard
+│ sentinel │ Worker      │ Tests proxies, reports to master via WireGuard
 └──────────┴─────────────┴────────────────────────────────────────────────────────┘
 ```

 ### Role Separation

- **Odin (Master)**: Scrapes proxy sources, does verification tests only. No routine testing. Local Tor only.
- **Workers**: All routine proxy testing. Each uses only local Tor (127.0.0.1:9050).
+- **Odin (Master)**: API server + SSL-only proxy verification (10 threads). No URL cycling (workers handle it via `/api/claim-urls`). Local Tor only.
+- **Workers**: All URL fetching (via `/api/claim-urls`) and proxy testing. Each uses only local Tor (127.0.0.1:9050).

 ## CRITICAL: Directory Structure Differences

@@ -25,95 +25,111 @@
 │ Host     │ Code Location           │ Container Mount
 ├──────────┼─────────────────────────┼──────────────────────────────────────────┤
 │ odin     │ /home/podman/ppf/*.py   │ Mounts ppf/ directly to /app
-│ workers  │ /home/podman/ppf/src/   │ Mounts ppf/src/ to /app (via systemd)
+│ workers  │ /home/podman/ppf/src/   │ Mounts ppf/src/ to /app (via compose)
 └──────────┴─────────────────────────┴──────────────────────────────────────────┘
 ```

 **ODIN uses root ppf/ directory. WORKERS use ppf/src/ subdirectory.**

-## Host Access
+## Operations Toolkit

-**ALWAYS use Ansible from `/opt/ansible` with venv activated:**
+All deployment and service management is handled by `tools/`:
+
+```
+tools/
+  lib/ppf-common.sh           shared library (hosts, wrappers, colors)
+  ppf-deploy                   deploy wrapper (local validation + playbook)
+  ppf-logs                     view container logs
+  ppf-service                  manage containers (status/start/stop/restart)
+  ppf-db                       database operations (stats/purge-proxies/vacuum)
+  ppf-status                   cluster overview (containers, workers, queue)
+  playbooks/
+    deploy.yml                 ansible playbook (sync, compose, restart)
+    inventory.ini              hosts with WireGuard IPs + SSH key
+    group_vars/
+      all.yml                  shared vars (ppf_base, ppf_owner)
+      master.yml               odin paths + compose file
+      workers.yml              worker paths + compose file
+```
+
+Symlinked to `~/.local/bin/` for direct use.
+
+### Connectivity
+
+All tools connect over WireGuard (`10.200.1.0/24`) as user `ansible`
+with the SSH key at `/opt/ansible/secrets/ssh/ansible`.
+
+### Deployment
+
+`ppf-deploy` validates syntax locally, then runs the Ansible playbook.
+Hosts execute in parallel; containers restart only when files change.
+
+```bash
+ppf-deploy                  # all nodes: validate, sync, restart
+ppf-deploy odin             # master only
+ppf-deploy workers          # cassius, edge, sentinel
+ppf-deploy cassius edge     # specific hosts
+ppf-deploy --no-restart     # sync only, skip restart
+ppf-deploy --check          # dry run (ansible --check --diff)
+ppf-deploy -v               # verbose ansible output
+```
+
+Playbook steps (per host, in parallel):
+1. Rsync `*.py` + `servers.txt` (role-aware destination via group_vars)
+2. Copy compose file per role (`compose.master.yml` / `compose.worker.yml`)
+3. Fix ownership (`podman:podman`, recursive)
+4. Restart containers via handler (only if files changed)
+5. Show container status
+
+### Container Logs
+
+```bash
+ppf-logs                    # last 40 lines from odin
+ppf-logs cassius            # specific worker
+ppf-logs -f edge            # follow mode
+ppf-logs -n 100 sentinel    # last N lines
+```
+
+### Service Management
+
+```bash
+ppf-service status          # all nodes: compose ps + health
+ppf-service status workers  # workers only
+ppf-service restart odin    # restart master
+ppf-service stop cassius    # stop specific worker
+ppf-service start workers   # start all workers
+```
+
+### Database Management
+
+```bash
+ppf-db stats                # proxy and URL counts
+ppf-db purge-proxies        # stop odin, delete all proxies, restart
+ppf-db vacuum               # reclaim disk space
+```
+
+### Cluster Status
+
+```bash
+ppf-status                  # full overview: containers, DB, workers, queue
+ppf-status --json           # raw JSON from odin API
+```
+
+### Direct Ansible (for operations not covered by tools)
+
+Use the toolkit inventory for ad-hoc commands over WireGuard:

 ```bash
 cd /opt/ansible && source venv/bin/activate
-```
-
-### Quick Reference Commands
-
-```bash
-# Check worker status
-ANSIBLE_REMOTE_TMP=/tmp/.ansible ansible forge,hermes,janus -m shell -a "hostname"
+INV=/home/user/git/ppf/tools/playbooks/inventory.ini

 # Check worker config
-ANSIBLE_REMOTE_TMP=/tmp/.ansible ansible forge,hermes,janus -m shell -a "grep -E 'threads|timeout|ssl' /home/podman/ppf/config.ini"
-
-# Check worker logs
-ANSIBLE_REMOTE_TMP=/tmp/.ansible ansible forge -m shell -a "sudo -u podman journalctl --user -u ppf-worker -n 20"
+ansible -i $INV workers -m shell \
+  -a "grep -E 'threads|timeout|ssl' /home/podman/ppf/config.ini"

 # Modify config option
-ANSIBLE_REMOTE_TMP=/tmp/.ansible ansible forge,hermes,janus -m lineinfile -a "path=/home/podman/ppf/config.ini line='ssl_only = 1' insertafter='ssl_first'"
-
-# Restart workers (different UIDs!)
-ANSIBLE_REMOTE_TMP=/tmp/.ansible ansible janus,forge -m raw -a "sudo -u podman XDG_RUNTIME_DIR=/run/user/996 systemctl --user restart ppf-worker"
-ANSIBLE_REMOTE_TMP=/tmp/.ansible ansible hermes -m raw -a "sudo -u podman XDG_RUNTIME_DIR=/run/user/1001 systemctl --user restart ppf-worker"
-```
-
-## Full Deployment Procedure
-
-### Step 1: Validate Syntax Locally
-
-```bash
-cd /home/user/git/ppf
-for f in *.py; do python3 -m py_compile "$f" && echo "OK: $f"; done
-```
-
-### Step 2: Deploy to ALL Hosts
-
-```bash
-cd /opt/ansible && source venv/bin/activate
-
-# Deploy to ODIN (root ppf/ directory)
-ANSIBLE_REMOTE_TMP=/tmp/.ansible ansible odin -m synchronize \
-  -a "src=/home/user/git/ppf/ dest=/home/podman/ppf/ rsync_opts='--include=*.py,--include=servers.txt,--exclude=*'"
-
-# Deploy to WORKERS (ppf/src/ subdirectory)
-ANSIBLE_REMOTE_TMP=/tmp/.ansible ansible forge,hermes,janus -m synchronize \
-  -a "src=/home/user/git/ppf/ dest=/home/podman/ppf/src/ rsync_opts='--include=*.py,--include=servers.txt,--exclude=*'"
-
-# CRITICAL: Fix ownership on ALL hosts (rsync uses ansible user, containers need podman)
-ANSIBLE_REMOTE_TMP=/tmp/.ansible ansible odin,forge,hermes,janus -m raw \
-  -a "chown -R podman:podman /home/podman/ppf/"
-```
-
-**Note:** Ownership must be fixed after every deploy. rsync runs as ansible user, but containers run as podman user. Missing ownership fix causes `ImportError: No module named X` errors.
-
-### Step 3: Restart Services
-
-```bash
-# Restart ODIN (UID 1005)
-ansible odin -m raw \
-  -a "cd /tmp && XDG_RUNTIME_DIR=/run/user/1005 runuser -u podman -- podman restart ppf"
-
-# Restart WORKERS (note different UIDs)
-ansible janus,forge -m raw \
-  -a "sudo -u podman XDG_RUNTIME_DIR=/run/user/996 systemctl --user restart ppf-worker"
-ansible hermes -m raw \
-  -a "sudo -u podman XDG_RUNTIME_DIR=/run/user/1001 systemctl --user restart ppf-worker"
-```
-
-### Step 4: Verify All Running
-
-```bash
-# Check odin (UID 1005)
-ansible odin -m raw \
-  -a "cd /tmp && XDG_RUNTIME_DIR=/run/user/1005 runuser -u podman -- podman ps"
-
-# Check workers
-ansible janus,forge -m raw \
-  -a "sudo -u podman XDG_RUNTIME_DIR=/run/user/996 systemctl --user is-active ppf-worker"
-ansible hermes -m raw \
-  -a "sudo -u podman XDG_RUNTIME_DIR=/run/user/1001 systemctl --user is-active ppf-worker"
+ansible -i $INV workers -m lineinfile \
+  -a "path=/home/podman/ppf/config.ini line='ssl_only = 1' insertafter='ssl_first'"
 ```

 ## Podman User IDs
@@ -123,12 +139,14 @@ ansible hermes -m raw \
 │ Host     │ UID   │ XDG_RUNTIME_DIR
 ├──────────┼───────┼─────────────────────────────┤
 │ odin     │ 1005  │ /run/user/1005
-│ hermes   │ 1001  │ /run/user/1001
-│ janus    │ 996   │ /run/user/996
-│ forge    │ 996   │ /run/user/996
+│ cassius  │ 993   │ /run/user/993
+│ edge     │ 993   │ /run/user/993
+│ sentinel │ 992   │ /run/user/992
 └──────────┴───────┴─────────────────────────────┘
 ```

+**Prefer dynamic UID discovery** (`uid=$(id -u podman)`) over hardcoded values.
+
 ## Configuration

 ### Odin config.ini
@@ -138,11 +156,19 @@ ansible hermes -m raw \
 tor_hosts = 127.0.0.1:9050   # Local Tor ONLY

 [watchd]
-threads = 0                   # NO routine testing
-database = data/ppf.sqlite
+threads = 10                  # SSL-only verification of worker-reported proxies
+timeout = 7
+checktype = none              # No secondary check
+ssl_first = 1
+ssl_only = 1
+database = data/proxies.sqlite
+
+[ppf]
+threads = 0                   # NO URL cycling (workers handle it)
+database = data/websites.sqlite

 [scraper]
-threads = 10
+enabled = 0                   # Disabled on master
 ```

 ### Worker config.ini
@@ -156,7 +182,7 @@ threads = 35
 timeout = 9
 ssl_first = 1     # Try SSL handshake first
 ssl_only = 0      # Set to 1 to skip secondary check on SSL failure
-checktype = head  # Secondary check type: head, irc, judges
+checktype = head  # Secondary check: head, irc, judges, none (SSL-only)
 ```

 ### Config Options
@@ -167,7 +193,7 @@ checktype = head  # Secondary check type: head, irc, judges
 ├───────────────┼─────────┼────────────────────────────────────────────────────┤
 │ ssl_first     │ 1       │ Try SSL handshake first, fallback to checktype
 │ ssl_only      │ 0       │ Skip secondary check when SSL fails (faster)
-│ checktype     │ head    │ Secondary check: head, irc, judges
+│ checktype     │ head    │ Secondary check: head, irc, judges, none/false
 │ threads       │ 20      │ Number of test threads
 │ timeout       │ 15      │ Socket timeout in seconds
 └───────────────┴─────────┴────────────────────────────────────────────────────┘
@@ -186,41 +212,21 @@ batch_size = clamp(fair_share, min=100, max=1000)
 - Workers shuffle their batch locally to avoid testing same proxies simultaneously
 - Claims expire after 5 minutes if not completed

-## Worker systemd Unit
+## Container Management

-Located at `/home/podman/.config/systemd/user/ppf-worker.service`:
+All nodes run via `podman-compose` with role-specific compose files:

-```ini
-[Unit]
-Description=PPF Worker Container
-After=network-online.target tor.service
+- **Odin**: `compose.master.yml` -> deployed as `compose.yml`
+- **Workers**: `compose.worker.yml` -> deployed as `compose.yml`

-[Service]
-Type=simple
-Restart=on-failure
-RestartSec=10
-WorkingDirectory=%h
-ExecStartPre=-/usr/bin/podman stop -t 10 ppf-worker
-ExecStartPre=-/usr/bin/podman rm -f ppf-worker
-ExecStart=/usr/bin/podman run \
-    --name ppf-worker --rm --log-driver=journald --network=host \
-    -v %h/ppf/src:/app:ro \
-    -v %h/ppf/data:/app/data \
-    -v %h/ppf/config.ini:/app/config.ini:ro \
-    -e PYTHONUNBUFFERED=1 \
-    localhost/ppf-worker:latest \
-    python -u ppf.py --worker --server http://10.200.1.250:8081
-ExecStop=/usr/bin/podman stop -t 10 ppf-worker
-
-[Install]
-WantedBy=default.target
-```
+Containers are managed exclusively through compose. No systemd user
+services or standalone `podman run` commands.

 ## Rebuilding Images

 ```bash
 # Workers - from ppf/ directory (Dockerfile copies from src/)
-ansible forge,hermes,janus -m raw \
+ansible cassius,edge,sentinel -m raw \
  -a "cd /home/podman/ppf && sudo -u podman podman build -t localhost/ppf-worker:latest ."

 # Odin - from ppf/ directory
@@ -231,24 +237,25 @@ ansible odin -m raw \
 ## API Endpoints

 ```
-/dashboard       Web UI with live statistics
-/map             Interactive world map
-/health          Health check
-/api/stats       Runtime statistics (JSON)
-/api/workers     Connected worker status
-/api/memory      Memory profiling data
-/api/countries   Proxy counts by country
-/proxies         Working proxies list
+/dashboard           Web UI with live statistics
+/map                 Interactive world map
+/health              Health check
+/api/stats           Runtime statistics (JSON)
+/api/workers         Connected worker status
+/api/countries       Proxy counts by country
+/api/claim-urls      Claim URL batch for worker-driven fetching (GET)
+/api/report-urls     Report URL fetch results (POST)
+/api/report-proxies  Report working proxies (POST)
+/proxies             Working proxies list
 ```

 ## Troubleshooting

 ### Missing servers.txt

-Workers need `servers.txt` in src/:
+Redeploy syncs `servers.txt` automatically:
 ```bash
-ansible forge,hermes,janus -m copy \
-  -a "src=/home/user/git/ppf/servers.txt dest=/home/podman/ppf/src/servers.txt owner=podman group=podman"
+ppf-deploy workers
 ```

 ### Exit Code 126 (Permission/Storage)
@@ -260,25 +267,23 @@ sudo -u podman podman system reset --force

 ### Dashboard Shows NaN or Missing Data

-Odin likely running old code. Redeploy to odin:
+Odin likely running old code:
 ```bash
-ansible odin -m synchronize \
-  -a "src=/home/user/git/ppf/ dest=/home/podman/ppf/ rsync_opts='--include=*.py,--include=servers.txt,--exclude=*'"
-ansible odin -m raw -a "chown -R podman:podman /home/podman/ppf/"
-ansible odin -m raw -a "cd /tmp; sudo -u podman podman restart ppf"
+ppf-deploy odin
 ```

 ### Worker Keeps Crashing

-1. Check systemd status with correct UID
-2. Verify servers.txt exists in src/
-3. Check ownership
-4. Run manually to see error:
+1. Check status: `ppf-service status workers`
+2. Check logs: `ppf-logs -n 50 cassius`
+3. Redeploy (fixes ownership + servers.txt): `ppf-deploy cassius`
+4. If still failing, run manually on the host to see error:
 ```bash
 sudo -u podman podman run --rm --network=host \
-  -v /home/podman/ppf/src:/app:ro \
-  -v /home/podman/ppf/data:/app/data \
-  -v /home/podman/ppf/config.ini:/app/config.ini:ro \
+  -v /home/podman/ppf/src:/app:ro,Z \
+  -v /home/podman/ppf/data:/app/data:Z \
+  -v /home/podman/ppf/config.ini:/app/config.ini:ro,Z \
+  -v /home/podman/ppf/servers.txt:/app/servers.txt:ro,Z \
  localhost/ppf-worker:latest \
  python -u ppf.py --worker --server http://10.200.1.250:8081
 ```
--- a/Dockerfile.test
+++ b/Dockerfile.test
@@ -0,0 +1,29 @@
+FROM python:2.7-slim
+
+WORKDIR /app
+
+RUN sed -i 's/deb.debian.org/archive.debian.org/g' /etc/apt/sources.list && \
+    sed -i 's/security.debian.org/archive.debian.org/g' /etc/apt/sources.list && \
+    sed -i '/buster-updates/d' /etc/apt/sources.list && \
+    echo 'deb http://archive.debian.org/debian-security buster/updates main' >> /etc/apt/sources.list && \
+    apt-get update && \
+    apt-get upgrade -y && \
+    apt-get install -y --no-install-recommends gcc libc-dev && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN pip install --upgrade "pip<21" "setuptools<45" "wheel<0.38"
+
+COPY requirements.txt .
+RUN pip install -r requirements.txt || true
+RUN pip install pytest
+
+RUN mkdir -p /app/data && \
+    python -c "import pyasn" 2>/dev/null && \
+    pyasn_util_download.py --latest && \
+    pyasn_util_convert.py --single rib.*.bz2 /app/data/ipasn.dat && \
+    rm -f rib.*.bz2 || \
+    echo "pyasn database setup skipped"
+
+RUN apt-get purge -y gcc libc-dev && apt-get autoremove -y || true
+
+CMD ["python", "-m", "pytest", "tests/", "-v", "--tb=short"]
--- a/README.md
+++ b/README.md
@@ -197,46 +197,40 @@ stale_count INT        -- checks without new proxies

 ## Deployment

-### Systemd Service
-
-```ini
-[Unit]
-Description=PPF Python Proxy Finder
-After=network-online.target tor.service
-Wants=network-online.target
-
-[Service]
-Type=simple
-User=ppf
-WorkingDirectory=/opt/ppf
-# ppf.py is the main entry point (runs harvester + validator)
-ExecStart=/usr/bin/python2 ppf.py
-Restart=on-failure
-RestartSec=30
-
-[Install]
-WantedBy=multi-user.target
-```
-
 ### Container Deployment

+All nodes use `podman-compose` with role-specific compose files
+(rootless, as `podman` user). `--network=host` required for Tor
+access at 127.0.0.1:9050.
+
 ```sh
-# Build
+# Build image
 podman build -t ppf:latest .

-# Run with persistent storage
-# IMPORTANT: Use ppf.py as entry point (runs both harvester + validator)
-podman run -d --name ppf \
-  --network=host \
-  -v ./data:/app/data:Z \
-  -v ./config.ini:/app/config.ini:ro \
-  ppf:latest python ppf.py
+# Start via compose
+podman-compose up -d

-# Generate systemd unit
-podman generate systemd --name ppf --files --new
+# View logs / stop
+podman-compose logs -f
+podman-compose down
 ```

-Note: `--network=host` required for Tor access at 127.0.0.1:9050.
+### Operations Toolkit
+
+The `tools/` directory provides CLI wrappers for multi-node operations.
+Deployment uses an Ansible playbook over WireGuard for parallel execution
+and handler-based restarts.
+
+```sh
+ppf-deploy [targets...]        # validate + deploy + restart (playbook)
+ppf-deploy --check             # dry run with diff
+ppf-logs [node]                # view container logs (-f to follow)
+ppf-service <cmd> [nodes...]   # status / start / stop / restart
+ppf-db <cmd>                   # stats / purge-proxies / vacuum
+ppf-status                     # cluster overview (containers, workers, queue)
+```
+
+See `--help` on each tool.

 ## Troubleshooting

--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -1,411 +1,113 @@
-# PPF Project Roadmap
+# PPF Roadmap

-## Project Purpose
-
-PPF (Proxy Fetcher) is a Python 2 proxy scraping and validation framework designed to:
-
-1. **Discover** proxy addresses by crawling websites and search engines
-2. **Validate** proxies through multi-target testing via Tor
-3. **Maintain** a database of working proxies with protocol detection (SOCKS4/SOCKS5/HTTP)
-
-## Architecture Overview
+## Architecture

 ```
-┌─────────────────────────────────────────────────────────────────────────────┐
-│                              PPF Architecture                               │
-├─────────────────────────────────────────────────────────────────────────────┤
-│                                                                             │
-│  ┌─────────────┐    ┌─────────────┐    ┌─────────────┐                     │
-│  │ scraper.py  │    │   ppf.py    │    │proxywatchd  │                     │
-│  │             │    │             │    │             │                     │
-│  │ Searx query │───>│ URL harvest │───>│ Proxy test  │                     │
-│  │ URL finding │    │ Proxy extract│   │ Validation  │                     │
-│  └─────────────┘    └─────────────┘    └─────────────┘                     │
-│         │                  │                  │                             │
-│         v                  v                  v                             │
-│  ┌─────────────────────────────────────────────────────────────────┐       │
-│  │                        SQLite Databases                          │       │
-│  │  uris.db (URLs)                    proxies.db (proxy list)       │       │
-│  └─────────────────────────────────────────────────────────────────┘       │
-│                                                                             │
-│  ┌─────────────────────────────────────────────────────────────────┐       │
-│  │                         Network Layer                            │       │
-│  │  rocksock.py ─── Tor SOCKS ─── Test Proxy ─── Target Server      │       │
-│  └─────────────────────────────────────────────────────────────────┘       │
-│                                                                             │
-└─────────────────────────────────────────────────────────────────────────────┘
+                    ┌──────────────────────────────────────────┐
+                    │              Odin (Master)                │
+                    │  httpd.py ─ API + SSL-only verification   │
+                    │  proxywatchd.py ─ proxy recheck daemon    │
+                    │  SQLite: proxies.db, websites.db          │
+                    └──────────┬───────────────────────────────┘
+                               │ WireGuard (10.200.1.0/24)
+              ┌────────────────┼────────────────┐
+              v                v                v
+        ┌───────────┐   ┌───────────┐   ┌───────────┐
+        │  cassius   │   │   edge    │   │ sentinel  │
+        │  Worker    │   │  Worker   │   │  Worker   │
+        │  ppf.py    │   │  ppf.py   │   │  ppf.py   │
+        └───────────┘   └───────────┘   └───────────┘
 ```

+Workers claim URLs, extract proxies, test them, report back.
+Master verifies (SSL-only), serves API, coordinates distribution.
+
 ## Constraints

- **Python 2.7** compatibility required
- **Minimal external dependencies** (avoid adding new modules)
- Current dependencies: beautifulsoup4, pyasn, IP2Location
- Data files: IP2LOCATION-LITE-DB1.BIN (country), ipasn.dat (ASN)
+- Python 2.7 runtime (container-based)
+- Minimal external dependencies
+- All traffic via Tor

 ---

-## Phase 1: Stability & Code Quality
+## Phase 1: Performance and Quality (current)

-**Objective:** Establish a solid, maintainable codebase
+Profiling-driven optimizations and source pipeline hardening.

-### 1.1 Error Handling Improvements
+| Item | Status | Description |
+|------|--------|-------------|
+| Extraction short-circuits | done | Guard clauses in fetch.py extractors |
+| Skip shutdown on failed sockets | done | Track _connected flag, skip shutdown on dead sockets |
+| SQLite connection reuse (odin) | done | Per-greenlet cached handles via threading.local |
+| Lazy-load ASN database | done | Defer ipasn.dat parsing to first lookup |
+| Add more seed sources (100+) | done | Expanded to 120+ URLs with SOCKS5-specific sources |
+| Protocol-aware source weighting | done | Dynamic SOCKS boost in claim_urls scoring |
+| Sharpen error penalty in URL scoring | done | Reduce erroring URL claim frequency |

-| Task | Description | File(s) |
-|------|-------------|---------|
-| Add connection retry logic | Implement exponential backoff for failed connections | rocksock.py, fetch.py |
-| Graceful database errors | Handle SQLite lock/busy errors with retry | mysqlite.py |
-| Timeout standardization | Consistent timeout handling across all network ops | proxywatchd.py, fetch.py |
-| Exception logging | Log exceptions with context, not just silently catch | all files |
+## Phase 2: Proxy Diversity and Consumer API

-### 1.2 Code Consolidation
+Address customer-reported quality gaps.

-| Task | Description | File(s) |
-|------|-------------|---------|
-| Unify _known_proxies | Single source of truth for known proxy cache | ppf.py, fetch.py |
-| Extract proxy utils | Create proxy_utils.py with cleanse/validate functions | new file |
-| Remove global config pattern | Pass config explicitly instead of set_config() | fetch.py |
-| Standardize logging | Consistent _log() usage with levels across all modules | all files |
+| Item | Status | Description |
+|------|--------|-------------|
+| ASN diversity scoring | pending | Deprioritize over-represented ASNs in testing |
+| Graduated recheck intervals | pending | Fresh proxies rechecked more often than stale |
+| API filters (proto/country/ASN/latency) | pending | Consumer-facing query parameters on /proxies |
+| Latency-based ranking | pending | Expose latency percentiles per proxy |

-### 1.3 Testing Infrastructure
+## Phase 3: Self-Expanding Source Pool

-| Task | Description | File(s) |
-|------|-------------|---------|
-| Add unit tests | Test proxy parsing, URL extraction, IP validation | tests/ |
-| Mock network layer | Allow testing without live network/Tor | tests/ |
-| Validation test suite | Verify multi-target voting logic | tests/ |
+Worker-driven link discovery from productive pages.
+
+| Item | Status | Description |
+|------|--------|-------------|
+| Link extraction from productive pages | pending | Parse HTML for links when page yields proxies |
+| Report discovered URLs to master | pending | New endpoint for worker URL submissions |
+| Conditional discovery | pending | Only extract links from confirmed-productive pages |
+
+## Phase 4: Long-Term
+
+| Item | Status | Description |
+|------|--------|-------------|
+| Python 3 migration | deferred | Unblocks modern deps, security patches, pyasn native |
+| Worker trust scoring | pending | Activate spot-check verification framework |
+| Dynamic target pool | pending | Auto-discover and rotate validation targets |
+| Geographic target spread | pending | Ensure targets span multiple regions |

 ---

-## Phase 2: Performance Optimization
+## Completed

-**Objective:** Improve throughput and resource efficiency
-
-### 2.1 Connection Pooling
-
-| Task | Description | File(s) |
-|------|-------------|---------|
-| Tor connection reuse | Pool Tor SOCKS connections instead of reconnecting | proxywatchd.py |
-| HTTP keep-alive | Reuse connections to same target servers | http2.py |
-| Connection warm-up | Pre-establish connections before job assignment | proxywatchd.py |
-
-### 2.2 Database Optimization
-
-| Task | Description | File(s) |
-|------|-------------|---------|
-| Batch inserts | Group INSERT operations (already partial) | dbs.py |
-| Index optimization | Add indexes for frequent query patterns | dbs.py |
-| WAL mode | Enable Write-Ahead Logging for better concurrency | mysqlite.py |
-| Prepared statements | Cache compiled SQL statements | mysqlite.py |
-
-### 2.3 Threading Improvements
-
-| Task | Description | File(s) |
-|------|-------------|---------|
-| Dynamic thread scaling | Adjust thread count based on success rate | proxywatchd.py |
-| Priority queue | Test high-value proxies (low fail count) first | proxywatchd.py |
-| Stale proxy cleanup | Background thread to remove long-dead proxies | proxywatchd.py |
-
---
-
-## Phase 3: Reliability & Accuracy
-
-**Objective:** Improve proxy validation accuracy and system reliability
-
-### 3.1 Enhanced Validation
-
-| Task | Description | File(s) |
-|------|-------------|---------|
-| Latency tracking | Store and use connection latency metrics | proxywatchd.py, dbs.py |
-| Geographic validation | Verify proxy actually routes through claimed location | proxywatchd.py |
-| Protocol fingerprinting | Better SOCKS4/SOCKS5/HTTP detection | rocksock.py |
-| HTTPS/SSL testing | Validate SSL proxy capabilities | proxywatchd.py |
-
-### 3.2 Target Management
-
-| Task | Description | File(s) |
-|------|-------------|---------|
-| Dynamic target pool | Auto-discover and rotate validation targets | proxywatchd.py |
-| Target health tracking | Remove unresponsive targets from pool | proxywatchd.py |
-| Geographic target spread | Ensure targets span multiple regions | config.py |
-
-### 3.3 Failure Analysis
-
-| Task | Description | File(s) |
-|------|-------------|---------|
-| Failure categorization | Distinguish timeout vs refused vs auth-fail | proxywatchd.py |
-| Retry strategies | Different retry logic per failure type | proxywatchd.py |
-| Dead proxy quarantine | Separate storage for likely-dead proxies | dbs.py |
-
---
-
-## Phase 4: Features & Usability
-
-**Objective:** Add useful features while maintaining simplicity
-
-### 4.1 Reporting & Monitoring
-
-| Task | Description | File(s) |
-|------|-------------|---------|
-| Statistics collection | Track success rates, throughput, latency | proxywatchd.py |
-| Periodic status output | Log summary stats every N minutes | ppf.py, proxywatchd.py |
-| Export functionality | Export working proxies to file (txt, json) | new: export.py |
-
-### 4.2 Configuration
-
-| Task | Description | File(s) |
-|------|-------------|---------|
-| Config validation | Validate config.ini on startup | config.py |
-| Runtime reconfiguration | Reload config without restart (SIGHUP) | proxywatchd.py |
-| Sensible defaults | Document and improve default values | config.py |
-
-### 4.3 Proxy Source Expansion
-
-| Task | Description | File(s) |
-|------|-------------|---------|
-| Additional scrapers | Support more search engines beyond Searx | scraper.py |
-| API sources | Integrate free proxy API endpoints | new: api_sources.py |
-| Import formats | Support various proxy list formats | ppf.py |
-
---
-
-## Implementation Priority
-
-```
-┌─────────────────────────────────────────────────────────────────────────────┐
-│ Priority Matrix                                                             │
-├──────────────────────────┬──────────────────────────────────────────────────┤
-│ HIGH IMPACT / LOW EFFORT │ HIGH IMPACT / HIGH EFFORT                        │
-│                          │                                                  │
-│ [x] Unify _known_proxies │ [x] Connection pooling                           │
-│ [x] Graceful DB errors   │ [x] Dynamic thread scaling                       │
-│ [x] Batch inserts        │ [x] Unit test infrastructure                     │
-│ [x] WAL mode for SQLite  │ [x] Latency tracking                             │
-│                          │                                                  │
-├──────────────────────────┼──────────────────────────────────────────────────┤
-│ LOW IMPACT / LOW EFFORT  │ LOW IMPACT / HIGH EFFORT                         │
-│                          │                                                  │
-│ [x] Standardize logging  │ [x] Geographic validation                        │
-│ [x] Config validation    │ [x] Additional scrapers (Bing, Yahoo, Mojeek)    │
-│ [x] Export functionality │ [ ] API sources                                  │
-│ [x] Status output        │ [ ] Protocol fingerprinting                      │
-│                          │                                                  │
-└──────────────────────────┴──────────────────────────────────────────────────┘
-```
-
---
-
-## Completed Work
-
-### Multi-Target Validation (Done)
- [x] Work-stealing queue with shared Queue.Queue()
- [x] Multi-target validation (2/3 majority voting)
- [x] Interleaved testing (jobs shuffled across proxies)
- [x] ProxyTestState and TargetTestJob classes
-
-### Code Cleanup (Done)
- [x] Removed dead HTTP server code from ppf.py
- [x] Removed dead gumbo code from soup_parser.py
- [x] Removed test code from comboparse.py
- [x] Removed unused functions from misc.py
- [x] Fixed IP/port cleansing in ppf.py extract_proxies()
- [x] Updated .gitignore, removed .pyc files
-
-### Database Optimization (Done)
- [x] Enable SQLite WAL mode for better concurrency
- [x] Add indexes for common query patterns (failed, tested, proto, error, check_time)
- [x] Optimize batch inserts (remove redundant SELECT before INSERT OR IGNORE)
-
-### Dependency Reduction (Done)
- [x] Make lxml optional (removed from requirements)
- [x] Make IP2Location optional (graceful fallback)
- [x] Add --nobs flag for stdlib HTMLParser fallback (bs4 optional)
-
-### Rate Limiting & Stability (Done)
- [x] InstanceTracker class in scraper.py with exponential backoff
- [x] Configurable backoff_base, backoff_max, fail_threshold
- [x] Exception logging with context (replaced bare except blocks)
- [x] Unified _known_proxies cache in fetch.py
-
-### Monitoring & Maintenance (Done)
- [x] Stats class in proxywatchd.py (tested/passed/failed tracking)
- [x] Periodic stats reporting (configurable stats_interval)
- [x] Stale proxy cleanup (cleanup_stale() with configurable stale_days)
- [x] Timeout config options (timeout_connect, timeout_read)
-
-### Connection Pooling (Done)
- [x] TorHostState class tracking per-host health and latency
- [x] TorConnectionPool with worker affinity for circuit reuse
- [x] Exponential backoff (5s, 10s, 20s, 40s, max 60s) on failures
- [x] Pool warmup and health status reporting
-
-### Priority Queue (Done)
- [x] PriorityJobQueue class with heap-based ordering
- [x] calculate_priority() assigns priority 0-4 by proxy state
- [x] New proxies tested first, high-fail proxies last
-
-### Dynamic Thread Scaling (Done)
- [x] ThreadScaler class adjusts thread count dynamically
- [x] Scales up when queue deep and success rate acceptable
- [x] Scales down when queue shallow or success rate drops
- [x] Respects min/max bounds with cooldown period
-
-### Latency Tracking (Done)
- [x] avg_latency, latency_samples columns in proxylist
- [x] Exponential moving average calculation
- [x] Migration function for existing databases
- [x] Latency recorded for successful proxy tests
-
-### Container Support (Done)
- [x] Dockerfile with Python 2.7-slim base
- [x] docker-compose.yml for local development
- [x] Rootless podman deployment documentation
- [x] Volume mounts for persistent data
-
-### Code Style (Done)
- [x] Normalized indentation (4-space, no tabs)
- [x] Removed dead code and unused imports
- [x] Added docstrings to classes and functions
- [x] Python 2/3 compatible imports (Queue/queue)
-
-### Geographic Validation (Done)
- [x] IP2Location integration for country lookup
- [x] pyasn integration for ASN lookup
- [x] Graceful fallback when database files missing
- [x] Country codes displayed in test output: `(US)`, `(IN)`, etc.
- [x] Data files: IP2LOCATION-LITE-DB1.BIN, ipasn.dat
-
-### SSL Proxy Testing (Done)
- [x] Default checktype changed to 'ssl'
- [x] ssl_targets list with major HTTPS sites
- [x] TLS handshake validation with certificate verification
- [x] Detects MITM proxies that intercept SSL connections
-
-### Export Functionality (Done)
- [x] export.py CLI tool for exporting working proxies
- [x] Multiple formats: txt, json, csv, len (length-prefixed)
- [x] Filters: proto, country, anonymity, max_latency
- [x] Sort options: latency, added, tested, success
- [x] Output to stdout or file
-
-### Web Dashboard (Done)
- [x] /dashboard endpoint with dark theme HTML UI
- [x] /api/stats endpoint for JSON runtime statistics
- [x] Auto-refresh with JavaScript fetch every 3 seconds
- [x] Stats provider callback from proxywatchd.py to httpd.py
- [x] Displays: tested/passed/success rate, thread count, uptime
- [x] Tor pool health: per-host latency, success rate, availability
- [x] Failure categories breakdown: timeout, proxy, ssl, closed
-
-### Dashboard Enhancements v2 (Done)
- [x] Prominent check type badge in header (SSL/JUDGES/HTTP/IRC)
- [x] System monitor bar: load, memory, disk, process RSS
- [x] Anonymity breakdown: elite/anonymous/transparent counts
- [x] Database health: size, tested/hour, added/day, dead count
- [x] Enhanced Tor pool stats: requests, success rate, healthy nodes, latency
- [x] SQLite ANALYZE/VACUUM functions for query optimization
- [x] Lightweight design: client-side polling, minimal DOM updates
-
-### Dashboard Enhancements v3 (Done)
- [x] Electric cyan theme with translucent glass-morphism effects
- [x] Unified wrapper styling (.chart-wrap, .histo-wrap, .stats-wrap, .lb-wrap, .pie-wrap)
- [x] Consistent backdrop-filter blur and electric glow borders
- [x] Tor Exit Nodes cards with hover effects (.tor-card)
- [x] Lighter background/tile color scheme (#1e2738 bg, #181f2a card)
- [x] Map endpoint restyled to match dashboard (electric cyan theme)
- [x] Map markers updated from gold to cyan for approximate locations
-
-### Memory Profiling & Analysis (Done)
- [x] /api/memory endpoint with comprehensive memory stats
- [x] objgraph integration for object type counting
- [x] pympler integration for memory summaries
- [x] Memory sample history tracking (RSS over time)
- [x] Process memory from /proc/self/status (VmRSS, VmPeak, VmData, etc.)
- [x] GC statistics (collections, objects, thresholds)
-
-### MITM Detection Optimization (Done)
- [x] MITM re-test skip optimization - avoid redundant SSL checks for known MITM proxies
- [x] mitm_retest_skipped stats counter for tracking optimization effectiveness
- [x] Content hash deduplication for stale proxy list detection
- [x] stale_count reset when content hash changes
-
-### Distributed Workers (Done)
- [x] Worker registration and heartbeat system
- [x] /api/workers endpoint for worker status monitoring
- [x] Tor connectivity check before workers claim work
- [x] Worker test rate tracking with sliding window calculation
- [x] Combined rate aggregation across all workers
- [x] Dashboard worker cards showing per-worker stats
-
-### Dashboard Performance (Done)
- [x] Keyboard shortcuts: r=refresh, 1-9=tabs, t=theme, p=pause
- [x] Tab-aware chart rendering - skip expensive renders for hidden tabs
- [x] Visibility API - pause polling when browser tab hidden
- [x] Dark/muted-dark/light theme cycling
- [x] Stats export endpoint (/api/stats/export?format=json|csv)
-
-### Proxy Validation Cache (Done)
- [x] LRU cache for is_usable_proxy() using OrderedDict
- [x] Thread-safe with lock for concurrent access
- [x] Proper LRU eviction (move_to_end on hits, popitem oldest when full)
-
-### Database Context Manager (Done)
- [x] Refactored all DB operations to use `_db_context()` context manager
- [x] Connections guaranteed to close even on exceptions
- [x] Removed deprecated `_prep_db()` and `_close_db()` methods
- [x] `fetch_rows()` now accepts db parameter for cleaner dependency injection
-
-### Additional Search Engines (Done)
- [x] Bing and Yahoo engine implementations in scraper.py
- [x] Engine rotation for rate limit avoidance
- [x] engines.py module with SearchEngine base class
-
-### Worker Health Improvements (Done)
- [x] Tor connectivity check before workers claim work
- [x] Fixed interval Tor check (30s) instead of exponential backoff
- [x] Graceful handling when Tor unavailable
-
-### Memory Optimization (Done)
- [x] `__slots__` on ProxyTestState (27 attrs) and TargetTestJob (4 attrs)
- [x] Reduced per-object memory overhead for hot path objects
-
---
-
-## Technical Debt
-
-| Item | Description | Risk |
-|------|-------------|------|
-| ~~Dual _known_proxies~~ | ~~ppf.py and fetch.py maintain separate caches~~ | **Resolved** |
-| Global config in fetch.py | set_config() pattern is fragile | Low - works but not clean |
-| ~~No input validation~~ | ~~Proxy strings parsed without validation~~ | **Resolved** |
-| ~~Silent exception catching~~ | ~~Some except: pass patterns hide errors~~ | **Resolved** |
-| ~~Hardcoded timeouts~~ | ~~Various timeout values scattered in code~~ | **Resolved** |
+| Item | Date | Description |
+|------|------|-------------|
+| Sharpen URL error penalty | 2026-02-22 | error*0.5 cap 4.0 + stale*0.2 cap 1.5 |
+| SOCKS5 source expansion | 2026-02-22 | Added 10 new SOCKS5-specific sources |
+| SQLite connection reuse | 2026-02-22 | Per-greenlet cached handles via threading.local |
+| Lazy-load ASN database | 2026-02-22 | Deferred ipasn.dat to first lookup |
+| Socket shutdown skip | 2026-02-22 | _connected flag, skip shutdown on dead sockets |
+| Protocol-aware weighting | 2026-02-22 | Dynamic SOCKS boost in claim_urls scoring |
+| Seed sources expanded | 2026-02-22 | 37 -> 120+ URLs |
+| last_seen freshness fix | 2026-02-22 | Watchd updates last_seen on verification |
+| Periodic re-seeding | 2026-02-22 | Reset errored sources every 6h |
+| ASN enrichment | 2026-02-22 | Pure-Python ipasn.dat reader + backfill |
+| URL pipeline stats | 2026-02-22 | /api/stats exposes source health metrics |
+| Extraction short-circuits | 2026-02-22 | Guard clauses + precompiled table regexes |
+| Target health tracking | prior | Cooldown-based health for all target pools |
+| MITM field in proxy list | prior | Expose mitm boolean in JSON endpoints |
+| V1 worker protocol removal | prior | Cleaned up legacy --worker code path |

 ---

 ## File Reference

-| File | Purpose | Status |
-|------|---------|--------|
-| ppf.py | Main URL harvester daemon | Active, cleaned |
-| proxywatchd.py | Proxy validation daemon | Active, enhanced |
-| scraper.py | Searx search integration | Active, cleaned |
-| fetch.py | HTTP fetching with proxy support | Active, LRU cache |
-| dbs.py | Database schema and inserts | Active |
-| mysqlite.py | SQLite wrapper | Active |
-| rocksock.py | Socket/proxy abstraction (3rd party) | Stable |
-| http2.py | HTTP client implementation | Stable |
-| httpd.py | Web dashboard and REST API server | Active, enhanced |
-| config.py | Configuration management | Active |
-| comboparse.py | Config/arg parser framework | Stable, cleaned |
-| soup_parser.py | BeautifulSoup wrapper | Stable, cleaned |
-| misc.py | Utilities (timestamp, logging) | Stable, cleaned |
-| export.py | Proxy export CLI tool | Active |
-| engines.py | Search engine implementations | Active |
-| connection_pool.py | Tor connection pooling | Active |
-| network_stats.py | Network statistics tracking | Active |
-| dns.py | DNS resolution with caching | Active |
-| mitm.py | MITM certificate detection | Active |
-| job.py | Priority job queue | Active |
-| static/dashboard.js | Dashboard frontend logic | Active, enhanced |
-| static/dashboard.html | Dashboard HTML template | Active |
+| File | Purpose |
+|------|---------|
+| ppf.py | URL harvester, worker main loop |
+| proxywatchd.py | Proxy validation daemon |
+| fetch.py | HTTP fetching, proxy extraction |
+| httpd.py | API server, worker coordination |
+| dbs.py | Database schema, seed sources |
+| config.py | Configuration management |
+| rocksock.py | Socket/proxy abstraction |
+| http2.py | HTTP client implementation |
+| tools/ppf-deploy | Deployment wrapper |
--- a/TASKLIST.md
+++ b/TASKLIST.md
@@ -0,0 +1,34 @@
+# PPF Tasklist
+
+Active execution queue. Ordered by priority.
+
+---
+
+## In Progress
+
+| # | Task | File(s) | Notes |
+|---|------|---------|-------|
+
+## Queued
+
+| # | Task | File(s) | Notes |
+|---|------|---------|-------|
+| 12 | API filters on /proxies (proto/country/ASN) | httpd.py | Consumer query params |
+| 8 | Graduated recheck intervals | proxywatchd.py | Fresh proxies checked more often |
+
+## Done
+
+| # | Task | Date |
+|---|------|------|
+| - | Sharpen URL error penalty scoring | 2026-02-22 |
+| - | Add SOCKS5-specific sources (10 new) | 2026-02-22 |
+| 3 | Lazy-load ASN database | 2026-02-22 |
+| 2 | SQLite connection reuse on odin | 2026-02-22 |
+| 1 | Skip socket.shutdown on failed connections | 2026-02-22 |
+| 4 | Add more seed sources (100+) | 2026-02-22 |
+| 6 | Protocol-aware source weighting | 2026-02-22 |
+| - | Extraction short-circuits | 2026-02-22 |
+| - | last_seen freshness fix | 2026-02-22 |
+| - | Periodic re-seeding | 2026-02-22 |
+| - | ASN enrichment | 2026-02-22 |
+| - | URL pipeline stats | 2026-02-22 |
--- a/TODO.md
+++ b/TODO.md
@@ -1,866 +1,35 @@
-# PPF Implementation Tasks
+# PPF TODO

-## Legend
-
-```
-[ ] Not started
-[~] In progress
-[x] Completed
-[!] Blocked/needs discussion
-```
+Intake buffer. Items refined here move to TASKLIST.md.

 ---

-## Immediate Priority (Next Sprint)
+## Dashboard

-### [x] 1. Unify _known_proxies Cache
-
-**Completed.** Added `init_known_proxies()`, `add_known_proxies()`, `is_known_proxy()`
-to fetch.py. Updated ppf.py to use these functions instead of local cache.
-
---
-
-### [x] 2. Graceful SQLite Error Handling
-
-**Completed.** mysqlite.py now retries on "locked" errors with exponential backoff.
-
---
-
-### [x] 3. Enable SQLite WAL Mode
-
-**Completed.** mysqlite.py enables WAL mode and NORMAL synchronous on init.
-
---
-
-### [x] 4. Batch Database Inserts
-
-**Completed.** dbs.py uses executemany() for batch inserts.
-
---
-
-### [x] 5. Add Database Indexes
-
-**Completed.** dbs.py creates indexes on failed, tested, proto, error, check_time.
-
---
-
-## Short Term (This Month)
-
-### [x] 6. Log Level Filtering
-
-**Completed.** Added log level filtering with -q/--quiet and -v/--verbose CLI flags.
- misc.py: LOG_LEVELS dict, set_log_level(), get_log_level()
- config.py: Added -q/--quiet and -v/--verbose arguments
- Log levels: debug=0, info=1, warn=2, error=3
- --quiet: only show warn/error
- --verbose: show debug messages
-
---
-
-### [x] 7. Connection Timeout Standardization
-
-**Completed.** Added timeout_connect and timeout_read to [common] section in config.py.
-
---
-
-### [x] 8. Failure Categorization
-
-**Completed.** Added failure categorization for proxy errors.
- misc.py: categorize_error() function, FAIL_* constants
- Categories: timeout, refused, auth, unreachable, dns, ssl, closed, proxy, other
- proxywatchd.py: Stats.record() now accepts category parameter
- Stats.report() shows failure breakdown by category
- ProxyTestState.evaluate() returns (success, category) tuple
-
---
-
-### [x] 9. Priority Queue for Proxy Testing
-
-**Completed.** Added priority-based job scheduling for proxy tests.
- PriorityJobQueue class with heap-based ordering
- calculate_priority() assigns priority 0-4 based on proxy state
- Priority 0: New proxies (never tested)
- Priority 1: Working proxies (no failures)
- Priority 2: Low fail count (< 3)
- Priority 3-4: Medium/high fail count
- Integrated into prepare_jobs() for automatic prioritization
-
---
-
-### [x] 10. Periodic Statistics Output
-
-**Completed.** Added Stats class to proxywatchd.py with record(), should_report(),
-and report() methods. Integrated into main loop with configurable stats_interval.
-
---
-
-## Medium Term (Next Quarter)
-
-### [x] 11. Tor Connection Pooling
-
-**Completed.** Added connection pooling with worker-Tor affinity and health monitoring.
- connection_pool.py: TorHostState class tracks per-host health, latency, backoff
- connection_pool.py: TorConnectionPool with worker affinity, warmup, statistics
- proxywatchd.py: Workers get consistent Tor host assignment for circuit reuse
- Success/failure tracking with exponential backoff (5s, 10s, 20s, 40s, max 60s)
- Latency tracking with rolling averages
- Pool status reported alongside periodic stats
-
---
-
-### [x] 12. Dynamic Thread Scaling
-
-**Completed.** Added dynamic thread scaling based on queue depth and success rate.
- ThreadScaler class in proxywatchd.py with should_scale(), status_line()
- Scales up when queue is deep (2x target) and success rate > 10%
- Scales down when queue is shallow or success rate drops
- Min/max threads derived from config.watchd.threads (1/4x to 2x)
- 30-second cooldown between scaling decisions
- _spawn_thread(), _remove_thread(), _adjust_threads() helper methods
- Scaler status reported alongside periodic stats
-
---
-
-### [x] 13. Latency Tracking
-
-**Completed.** Added per-proxy latency tracking with exponential moving average.
- dbs.py: avg_latency, latency_samples columns added to proxylist schema
- dbs.py: _migrate_latency_columns() for backward-compatible migration
- dbs.py: update_proxy_latency() with EMA (alpha = 2/(samples+1))
- proxywatchd.py: ProxyTestState.last_latency_ms field
- proxywatchd.py: evaluate() calculates average latency from successful tests
- proxywatchd.py: submit_collected() records latency for passing proxies
-
---
-
-### [x] 14. Export Functionality
-
-**Completed.** Added export.py CLI tool for exporting working proxies.
- Formats: txt (default), json, csv, len (length-prefixed)
- Filters: --proto, --country, --anonymity, --max-latency
- Options: --sort (latency, added, tested, success), --limit, --pretty
- Output: stdout or --output file
- Usage: `python export.py --proto http --country US --sort latency --limit 100`
-
---
-
-### [x] 15. Unit Test Infrastructure
-
-**Completed.** Added pytest-based test suite with comprehensive coverage.
- tests/conftest.py: Shared fixtures (temp_db, proxy_db, sample_proxies, etc.)
- tests/test_dbs.py: 40 tests for database operations (CDN filtering, latency, stats)
- tests/test_fetch.py: 60 tests for proxy validation (skipped in Python 3)
- tests/test_misc.py: 39 tests for utilities (timestamp, log levels, SSL errors)
- tests/mock_network.py: Network mocking infrastructure
-
-```
-Test Results: 79 passed, 60 skipped (Python 2 only)
-Run with: python3 -m pytest tests/ -v
-```
-
---
-
-## Long Term (Future)
-
-### [x] 16. Geographic Validation
-
-**Completed.** Added IP2Location and pyasn for proxy geolocation.
- requirements.txt: Added IP2Location package
- proxywatchd.py: IP2Location for country lookup, pyasn for ASN lookup
- proxywatchd.py: Fixed ValueError handling when database files missing
- data/: IP2LOCATION-LITE-DB1.BIN (2.7M), ipasn.dat (23M)
- Output shows country codes: `http://1.2.3.4:8080 (US)` or `(IN)`, `(DE)`, etc.
-
---
-
-### [x] 17. SSL Proxy Testing
-
-**Completed.** Added SSL checktype for TLS handshake validation.
- config.py: Default checktype changed to 'ssl'
- proxywatchd.py: ssl_targets list with major HTTPS sites
- Validates TLS handshake with certificate verification
- Detects MITM proxies that intercept SSL connections
-
-### [x] 18. Additional Search Engines
-
-**Completed.** Added modular search engine architecture.
- engines.py: SearchEngine base class with build_url(), extract_urls(), is_rate_limited()
- Engines: DuckDuckGo, Startpage, Mojeek (UK), Qwant (FR), Yandex (RU), Ecosia, Brave
- Git hosters: GitHub, GitLab, Codeberg, Gitea
- scraper.py: EngineTracker class for multi-engine rate limiting
- Config: [scraper] engines, max_pages settings
- searx.instances: Updated with 51 active SearXNG instances
-
-### [x] 19. REST API
-
-**Completed.** Added HTTP API server for querying working proxies.
- httpd.py: ProxyAPIServer class with BaseHTTPServer
- Endpoints: /proxies, /proxies/count, /health
- Params: limit, proto, country, format (json/plain)
- Integrated into proxywatchd.py (starts when httpd.enabled=True)
- Config: [httpd] section with listenip, port, enabled
-
-### [x] 20. Web Dashboard
-
-**Completed.** Added web dashboard with live statistics.
- httpd.py: DASHBOARD_HTML template with dark theme UI
- Endpoint: /dashboard (HTML page with auto-refresh)
- Endpoint: /api/stats (JSON runtime statistics)
- Stats include: tested/passed counts, success rate, thread count, uptime
- Tor pool health: per-host latency, success rate, availability
- Failure categories: timeout, proxy, ssl, closed, etc.
- proxywatchd.py: get_runtime_stats() method provides stats callback
-
-### [x] 21. Dashboard Enhancements (v2)
-
-**Completed.** Major dashboard improvements for better visibility.
- Prominent check type badge in header (SSL/JUDGES/HTTP/IRC with color coding)
- System monitor bar: load average, memory usage, disk usage, process RSS
- Anonymity breakdown: elite/anonymous/transparent proxy counts
- Database health indicators: size, tested/hour, added/day, dead count
- Enhanced Tor pool: total requests, success rate, healthy nodes, avg latency
- SQLite ANALYZE/VACUUM functions for query optimization (dbs.py)
- Database statistics API (get_database_stats())
-
-### [x] 22. Completion Queue Optimization
-
-**Completed.** Eliminated polling bottleneck in proxy test collection.
- Added `completion_queue` for event-driven state signaling
- `ProxyTestState.record_result()` signals when all targets complete
- `collect_work()` drains queue instead of polling all pending states
- Changed `pending_states` from list to dict for O(1) removal
- Result: `is_complete()` eliminated from hot path, `collect_work()` 54x faster
-
---
-
-### [x] 23. Batch API Endpoint
-
-**Completed.** Added `/api/dashboard` batch endpoint combining stats, workers, and countries.
-
-**Implementation:**
- `httpd.py`: New `/api/dashboard` endpoint returns combined data in single response
- `httpd.py`: Refactored `/api/workers` to use `_get_workers_data()` helper method
- `dashboard.js`: Updated `fetchStats()` to use batch endpoint instead of multiple calls
-
-**Response Structure:**
-```json
-{
-  "stats": { /* same as /api/stats */ },
-  "workers": { /* same as /api/workers */ },
-  "countries": { /* same as /api/countries */ }
-}
-```
-
-**Benefit:**
- Reduces dashboard polling from 2 HTTP requests to 1 per poll cycle
- Lower RTT impact over SSH tunnels and high-latency connections
- Single database connection serves all data
-
---
-
-## Profiling-Based Performance Optimizations
-
-**Baseline:** 30-minute profiling session, 25.6M function calls, 1842s runtime
-
-The following optimizations were identified through cProfile analysis. Each is
-assessed for real-world impact based on measured data.
-
-### [x] 1. SQLite Query Batching
-
-**Completed.** Added batch update functions and optimized submit_collected().
-
-**Implementation:**
- `batch_update_proxy_latency()`: Single SELECT with IN clause, compute EMA in Python,
-  batch UPDATE with executemany()
- `batch_update_proxy_anonymity()`: Batch all anonymity updates in single executemany()
- `submit_collected()`: Uses batch functions instead of per-proxy loops
-
-**Previous State:**
- 18,182 execute() calls consuming 50.6s (2.7% of runtime)
- Individual UPDATE for each proxy latency and anonymity
-
-**Improvement:**
- Reduced from N execute() + N commit() to 1 SELECT + 1 executemany() per batch
- Estimated 15-25% reduction in SQLite overhead
-
---
-
-### [x] 2. Proxy Validation Caching
-
-**Completed.** Converted is_usable_proxy() cache to proper LRU with OrderedDict.
-
-**Implementation:**
- fetch.py: Changed _proxy_valid_cache from dict to OrderedDict
- Added thread-safe _proxy_valid_cache_lock
- move_to_end() on cache hits to maintain LRU order
- Evict oldest entries when cache reaches max size (10,000)
- Proper LRU eviction instead of stopping inserts when full
-
---
-
-### [x] 3. Regex Pattern Pre-compilation
-
-**Completed.** Pre-compiled proxy extraction pattern at module load.
-
-**Implementation:**
- `fetch.py`: Added `PROXY_PATTERN = re.compile(r'...')` at module level
- `extract_proxies()`: Changed `re.findall(pattern, ...)` to `PROXY_PATTERN.findall(...)`
- Pattern compiled once at import, not on each call
-
-**Previous State:**
- `extract_proxies()`: 166 calls, 2.87s total (17.3ms each)
- Pattern recompiled on each call
-
-**Improvement:**
- Eliminated per-call regex compilation overhead
- Estimated 30-50% reduction in extract_proxies() time
-
---
-
-### [ ] 4. JSON Stats Response Caching
-
-**Current State:**
- 1.9M calls to JSON encoder functions
- `_iterencode_dict`: 1.4s, `_iterencode_list`: 0.8s
- Dashboard polls every 3 seconds = 600 requests per 30min
- Most stats data unchanged between requests
-
-**Proposed Change:**
- Cache serialized JSON response with short TTL (1-2 seconds)
- Only regenerate when underlying stats change
- Use ETag/If-None-Match for client-side caching
-
-**Assessment:**
-```
-Current cost:     ~5.5s per 30min (JSON encoding overhead)
-Potential saving: 60-80% = 3.3-4.4s per 30min = 6.6-8.8s/hour
-Effort:           Medium (add caching layer to httpd.py)
-Risk:             Low (stale stats for 1-2 seconds acceptable)
-```
-
-**Verdict:** LOW PRIORITY. Only matters with frequent dashboard access.
-
---
-
-### [ ] 5. Object Pooling for Test States
-
-**Current State:**
- `__new__` calls: 43,413 at 10.1s total
- `ProxyTestState.__init__`: 18,150 calls, 0.87s
- `TargetTestJob` creation: similar overhead
- Objects created and discarded each test cycle
-
-**Proposed Change:**
- Implement object pool for ProxyTestState and TargetTestJob
- Reset and reuse objects instead of creating new
- Pool size: 2x thread count
-
-**Assessment:**
-```
-Current cost:     ~11s per 30min = 22s/hour = 14.7min/day
-Potential saving: 50-70% = 5.5-7.7s per 30min = 11-15s/hour = 7-10min/day
-Effort:           High (significant refactoring, reset logic needed)
-Risk:             Medium (state leakage bugs if reset incomplete)
-```
-
-**Verdict:** NOT RECOMMENDED. High effort, medium risk, modest gain.
-Python's object creation is already optimized. Focus elsewhere.
-
---
-
-### [ ] 6. SQLite Connection Reuse
-
-**Current State:**
- 718 connection opens in 30min session
- Each open: 0.26ms (total 0.18s for connects)
- Connection per operation pattern in mysqlite.py
-
-**Proposed Change:**
- Maintain persistent connection per thread
- Implement connection pool with health checks
- Reuse connections across operations
-
-**Assessment:**
-```
-Current cost:     0.18s per 30min (connection overhead only)
-Potential saving: 90% = 0.16s per 30min = 0.32s/hour
-Effort:           Medium (thread-local storage, lifecycle management)
-Risk:             Medium (connection state, locking issues)
-```
-
-**Verdict:** NOT RECOMMENDED. Negligible time savings (0.16s per 30min).
-SQLite's lightweight connections don't justify pooling complexity.
-
---
-
-### Summary: Optimization Priority Matrix
-
-```
-┌─────────────────────────────────────┬────────┬────────┬─────────┬───────────┐
-│ Optimization                        │ Effort │ Risk   │ Savings │ Status
-├─────────────────────────────────────┼────────┼────────┼─────────┼───────────┤
-│ 1. SQLite Query Batching            │ Low    │ Low    │ 20-34s/h│ DONE
-│ 2. Proxy Validation Caching         │ V.Low  │ None   │ 5-8s/h  │ DONE
-│ 3. Regex Pre-compilation            │ Low    │ None   │ 5-8s/h  │ DONE
-│ 4. JSON Response Caching            │ Medium │ Low    │ 7-9s/h  │ Later
-│ 5. Object Pooling                   │ High   │ Medium │ 11-15s/h│ Skip
-│ 6. SQLite Connection Reuse          │ Medium │ Medium │ 0.3s/h  │ Skip
-└─────────────────────────────────────┴────────┴────────┴─────────┴───────────┘
-
-Completed: 1 (SQLite Batching), 2 (Proxy Caching), 3 (Regex Pre-compilation)
-Remaining: 4 (JSON Caching - Later)
-
-Realized savings from completed optimizations:
-  Per hour:   25-42 seconds saved
-  Per day:    10-17 minutes saved
-  Per week:   1.2-2.0 hours saved
-
-Note: 68.7% of runtime is socket I/O (recv/send) which cannot be optimized
-without changing the fundamental network architecture. The optimizations
-above target the remaining 31.3% of CPU-bound operations.
-```
-
---
-
-## Potential Dashboard Improvements
-
-### [ ] Dashboard Performance Optimizations
-
-**Goal:** Ensure dashboard remains lightweight and doesn't impact system performance.
-
-**Current safeguards:**
- No polling on server side (client-initiated via fetch)
- 3-second refresh interval (configurable)
- Minimal DOM updates (targeted element updates, not full re-render)
- Static CSS/JS (no server-side templating per request)
- No persistent connections (stateless HTTP)
-
-**Future considerations:**
- [x] Add rate limiting on /api/stats endpoint (300 req/60s sliding window)
 - [ ] Cache expensive DB queries (top countries, protocol breakdown)
- [ ] Lazy-load historical data (only when scrolled into view)
- [ ] WebSocket option for push updates (reduce polling overhead)
- [ ] Configurable refresh interval via URL param or localStorage
- [x] Pause polling when browser tab not visible (Page Visibility API)
- [x] Skip chart rendering for inactive dashboard tabs (reduces CPU)
- [ ] Batch API endpoints - combine /api/stats, /api/workers, /api/countries into
-      single /api/dashboard call to reduce round-trips (helps SSH tunnel latency)
-
-### [ ] Dashboard Feature Ideas
-
-**Low priority - consider when time permits:**
- [x] Geographic map visualization - /map endpoint with Leaflet.js
- [x] Dark/light/muted theme toggle - t key cycles themes
- [x] Export stats as CSV/JSON from dashboard (/api/stats/export?format=json|csv)
 - [ ] Historical graphs (24h, 7d) using stats_history table
 - [ ] Per-ASN performance analysis
 - [ ] Alert thresholds (success rate < X%, MITM detected)
+- [ ] WebSocket push updates (reduce polling overhead)
 - [ ] Mobile-responsive improvements
- [x] Keyboard shortcuts (r=refresh, 1-9=tabs, t=theme, p=pause)

-### [x] Local JS Library Serving
+## Memory

-**Completed.** All JavaScript libraries now served locally from /static/lib/ endpoint.
+- [ ] Lock consolidation (260k LockType objects at scale)
+- [ ] Leaner state objects per job

-**Bundled libraries (static/lib/):**
- Leaflet.js 1.9.4 (leaflet.js, leaflet.css)
- Leaflet.markercluster (MarkerCluster.Default.css)
- Chart.js 4.x (chart.umd.min.js)
- uPlot (uPlot.iife.min.js, uPlot.min.css)
+Memory scales ~4.5 KB/job. No leaks detected. Optimize only if constrained.

-**Candidate libraries for future enhancements:**
+## Source Pipeline

-```
-┌─────────────────┬─────────┬───────────────────────────────────────────────┐
-│ Library         │ Size    │ Use Case
-├─────────────────┼─────────┼───────────────────────────────────────────────┤
-│ Chart.js        │ 65 KB   │ Line/bar/pie charts (simpler API than D3)
-│ uPlot           │ 15 KB   │ Fast time-series charts (minimal, performant)
-│ ApexCharts      │ 125 KB  │ Modern charts with animations
-│ Frappe Charts   │ 25 KB   │ Simple, modern SVG charts
-│ Sparkline       │ 2 KB    │ Tiny inline charts (already have custom impl)
-├─────────────────┼─────────┼───────────────────────────────────────────────┤
-│ D3.js           │ 85 KB   │ Full control, complex visualizations
-│ D3-geo          │ 30 KB   │ Geographic projections (alternative to Leaflet)
-├─────────────────┼─────────┼───────────────────────────────────────────────┤
-│ Leaflet         │ 40 KB   │ Interactive maps (already using)
-│ Leaflet.heat    │ 5 KB    │ Heatmap layer for proxy density
-│ Leaflet.cluster │ 10 KB   │ Marker clustering for many points
-└─────────────────┴─────────┴───────────────────────────────────────────────┘
+- [ ] PasteBin/GitHub API scrapers for proxy lists
+- [ ] Telegram channel scrapers (beyond t.me/s/ HTML)
+- [ ] Source quality decay tracking (flag sources going stale)
+- [ ] Deduplication of sources across different URL forms

-Recommendations:
-  ● uPlot     - Best for time-series (rate history, success rate history)
-  ● Chart.js  - Best for pie/bar charts (failure breakdown, protocol stats)
-  ● Leaflet   - Keep for maps, add heatmap plugin for density viz
-```
+## Known Issues

-**Current custom implementations (no library):**
- Sparkline charts (Test Rate History, Success Rate History) - inline SVG
- Histogram bars (Response Time Distribution) - CSS divs
- Pie charts (Failure Breakdown, Protocol Stats) - CSS conic-gradient
+### [!] Podman Container Metadata Disappears

-**Decision:** Current custom implementations are lightweight and sufficient.
-Add libraries only when custom becomes unmaintainable or new features needed.
-
-### [ ] Memory Optimization Candidates
-
-**Based on memory analysis (production metrics):**
-```
-Current State (260k queue):
-  Start RSS:    442 MB
-  Current RSS:  1,615 MB
-  Per-job:      ~4.5 KB overhead
-
-Object Distribution:
-  259,863 TargetTestJob     (1 per job)
-  259,863 ProxyTestState    (1 per job)
-  259,950 LockType          (1 per job - threading locks)
-  523,395 dict              (2 per job - state + metadata)
-  522,807 list              (2 per job - results + targets)
-```
-
-**Potential optimizations:**
- [ ] Lock consolidation - reduce per-proxy locks (260k LockType objects)
- [ ] Leaner state objects - reduce dict/list count per job
- [x] Slot-based classes - use `__slots__` on ProxyTestState (27 attrs), TargetTestJob (4 attrs)
- [ ] Object pooling - reuse ProxyTestState/TargetTestJob objects (not recommended)
-
-**Verdict:** Memory scales linearly with queue (~4.5 KB/job). No leaks detected.
-Current usage acceptable for production workloads. Optimize only if memory
-becomes a constraint.
-
---
-
-## Completed
-
-### [x] Work-Stealing Queue
- Implemented shared Queue.Queue() for job distribution
- Workers pull from shared queue instead of pre-assigned lists
- Better utilization across threads
-
-### [x] Multi-Target Validation
- Test each proxy against 3 random targets
- 2/3 majority required for success
- Reduces false negatives from single target failures
-
-### [x] Interleaved Testing
- Jobs shuffled across all proxies before queueing
- Prevents burst of 3 connections to same proxy
- ProxyTestState accumulates results from TargetTestJobs
-
-### [x] Code Cleanup
- Removed 93 lines dead HTTP server code (ppf.py)
- Removed dead gumbo parser (soup_parser.py)
- Removed test code (comboparse.py)
- Removed unused functions (misc.py)
- Fixed IP/port cleansing (ppf.py)
- Updated .gitignore
-
-### [x] Rate Limiting & Instance Tracking (scraper.py)
- InstanceTracker class with exponential backoff
- Configurable backoff_base, backoff_max, fail_threshold
- Instance cycling when rate limited
-
-### [x] Exception Logging with Context
- Replaced bare `except:` with typed exceptions across all files
- Added context logging to exception handlers (e.g., URL, error message)
-
-### [x] Timeout Standardization
- Added timeout_connect, timeout_read to [common] config section
- Added stale_days, stats_interval to [watchd] config section
-
-### [x] Periodic Stats & Stale Cleanup (proxywatchd.py)
- Stats class tracks tested/passed/failed with thread-safe counters
- Configurable stats_interval (default: 300s)
- cleanup_stale() removes dead proxies older than stale_days (default: 30)
-
-### [x] Unified Proxy Cache
- Moved _known_proxies to fetch.py with helper functions
- init_known_proxies(), add_known_proxies(), is_known_proxy()
- ppf.py now uses shared cache via fetch module
-
-### [x] Config Validation
- config.py: validate() method checks config values on startup
- Validates: port ranges, timeout values, thread counts, engine names
- Warns on missing source_file, unknown engines
- Errors on unwritable database directories
- Integrated into ppf.py, proxywatchd.py, scraper.py main entry points
-
-### [x] Profiling Support
- config.py: Added --profile CLI argument
- ppf.py: Refactored main logic into main() function
- ppf.py: cProfile wrapper with stats output to profile.stats
- Prints top 20 functions by cumulative time on exit
- Usage: `python2 ppf.py --profile`
-
-### [x] SIGTERM Graceful Shutdown
- ppf.py: Added signal handler converting SIGTERM to KeyboardInterrupt
- Ensures profile stats are written before container exit
- Allows clean thread shutdown in containerized environments
- Podman stop now triggers proper cleanup instead of SIGKILL
-
-### [x] Unicode Exception Handling (Python 2)
- Problem: `repr(e)` on exceptions with unicode content caused encoding errors
- Files affected: ppf.py, scraper.py (3 exception handlers)
- Solution: Check `isinstance(err_msg, unicode)` then encode with 'backslashreplace'
- Pattern applied:
-  ```python
-  try:
-      err_msg = repr(e)
-      if isinstance(err_msg, unicode):
-          err_msg = err_msg.encode('ascii', 'backslashreplace')
-  except:
-      err_msg = type(e).__name__
-  ```
- Handles Korean/CJK characters in search queries without crashing
-
-### [x] Interactive World Map (/map endpoint)
- Added Leaflet.js interactive map showing proxy distribution by country
- Modern glassmorphism UI with `backdrop-filter: blur(12px)`
- CartoDB dark tiles for dark theme
- Circle markers sized proportionally to proxy count per country
- Hover effects with smooth transitions
- Stats overlay showing total countries/proxies
- Legend with proxy count scale
- Country coordinates and names lookup tables
-
-### [x] Dashboard v3 - Electric Cyan Theme
- Translucent glass-morphism effects with `backdrop-filter: blur()`
- Electric cyan glow borders `rgba(56,189,248,...)` on all graph wrappers
- Gradient overlays using `::before` pseudo-elements
- Unified styling across: .chart-wrap, .histo-wrap, .stats-wrap, .lb-wrap, .pie-wrap
- New .tor-card wrapper for Tor Exit Nodes with hover effects
- Lighter background color scheme (#1e2738 bg, #181f2a card)
-
-### [x] Map Endpoint Styling Update
- Converted from gold/bronze theme (#c8b48c) to electric cyan (#38bdf8)
- Glass panels with electric glow matching dashboard
- Map markers for approximate locations now cyan instead of gold
- Unified map_bg color with dashboard background (#1e2738)
- Updated Leaflet controls, popups, and legend to cyan theme
-
-### [x] MITM Re-test Optimization
- Skip redundant SSL checks for proxies already known to be MITM
- Added `mitm_retest_skipped` counter to Stats class
- Optimization in `_try_ssl_check()` checks existing MITM flag before testing
- Avoids 6k+ unnecessary re-tests per session (based on production metrics)
-
-### [x] Memory Profiling Endpoint
- /api/memory endpoint with comprehensive memory analysis
- objgraph integration for object type distribution
- pympler integration for memory summaries
- Memory sample history tracking (RSS over time)
- Process memory from /proc/self/status
- GC statistics and collection counts
-
-### [x] Database Context Manager Refactoring
- Refactored all DB operations to use `_db_context()` context manager
- `prepare_jobs()`, `submit_collected()`, `_run()` now use `with self._db_context() as db:`
- `fetch_rows()` accepts db parameter for dependency injection
- Removed deprecated `_prep_db()` and `_close_db()` methods
- Connections guaranteed to close even on exceptions
-
---
-
-## Deployment Troubleshooting Log
-
-### [x] Container Crash on Startup (2024-12-24)
-
-**Symptoms:**
- Container starts then immediately disappears
- `podman ps` shows no running containers
- `podman logs ppf` returns "no such container"
- Port 8081 not listening
-
-**Debugging Process:**
-
-1. **Initial diagnosis** - SSH to odin, checked container state:
-   ```bash
-   sudo -u podman podman ps -a  # Empty
-   sudo ss -tlnp | grep 8081    # Nothing listening
-   ```
-
-2. **Ran container in foreground** to capture output:
-   ```bash
-   sudo -u podman bash -c 'cd /home/podman/ppf && \
-     timeout 25 podman run --rm --name ppf --network=host \
-     -v ./src:/app:ro -v ./data:/app/data \
-     -v ./config.ini:/app/config.ini:ro \
-     localhost/ppf python2 -u proxywatchd.py 2>&1'
-   ```
-
-3. **Found the error** in httpd thread startup:
-   ```
-   error: [Errno 98] Address already in use: ('0.0.0.0', 8081)
-   ```
-   Container started, httpd failed to bind, process continued but HTTP unavailable.
-
-4. **Identified root cause** - orphaned processes from previous debug attempts:
-   ```bash
-   ps aux | grep -E "[p]pf|[p]roxy"
-   # Found: python2 ppf.py (PID 6421) still running, holding port 8081
-   # Found: conmon, timeout, bash processes from stale container
-   ```
-
-5. **Why orphans existed:**
-   - Previous `timeout 15 podman run` commands timed out
-   - `podman rm -f` doesn't kill processes when container metadata is corrupted
-   - Orphaned python2 process kept running with port bound
-
-**Root Cause:**
-Stale container processes from interrupted debug sessions held port 8081.
-The container started successfully but httpd thread failed to bind,
-causing silent failure (no HTTP endpoints) while proxy testing continued.
-
-**Fix Applied:**
-```bash
-# Force kill all orphaned processes
-sudo pkill -9 -f "ppf.py"
-sudo pkill -9 -f "proxywatchd.py"
-sudo pkill -9 -f "conmon.*ppf"
-sleep 2
-
-# Verify port is free
-sudo ss -tlnp | grep 8081  # Should show nothing
-
-# Clean podman state
-sudo -u podman podman rm -f -a
-sudo -u podman podman container prune -f
-
-# Start fresh
-sudo -u podman bash -c 'cd /home/podman/ppf && \
-  podman run -d --rm --name ppf --network=host \
-  -v ./src:/app:ro -v ./data:/app/data \
-  -v ./config.ini:/app/config.ini:ro \
-  localhost/ppf python2 -u proxywatchd.py'
-```
-
-**Verification:**
-```bash
-curl -sf http://localhost:8081/health
-# {"status": "ok", "timestamp": 1766573885}
-```
-
-**Prevention:**
- Use `podman-compose` for reliable container management
- Use `pkill -9 -f` to kill orphaned processes before restart
- Check port availability before starting: `ss -tlnp | grep 8081`
- Run container foreground first to capture startup errors
-
-**Correct Deployment Procedure:**
-```bash
-# As root or with sudo
-sudo -i -u podman bash
-cd /home/podman/ppf
-podman-compose down
-podman-compose up -d
-podman ps
-podman logs -f ppf
-```
-
-**docker-compose.yml (updated):**
-```yaml
-version: '3.8'
-
-services:
-  ppf:
-    image: localhost/ppf:latest
-    container_name: ppf
-    network_mode: host
-    volumes:
-      - ./src:/app:ro
-      - ./data:/app/data
-      - ./config.ini:/app/config.ini:ro
-    command: python2 -u proxywatchd.py
-    restart: unless-stopped
-    environment:
-      - PYTHONUNBUFFERED=1
-```
-
---
-
-### [x] SSH Connection Flooding / fail2ban (2024-12-24)
-
-**Symptoms:**
- SSH connections timing out or reset
- "Connection refused" errors
- Intermittent access to odin
-
-**Root Cause:**
-Multiple individual SSH commands triggered fail2ban rate limiting.
-
-**Fix Applied:**
-Created `~/.claude/rules/ssh-usage.md` with batching best practices.
-
-**Key Pattern:**
-```bash
-# BAD: 5 separate connections
-ssh host 'cmd1'
-ssh host 'cmd2'
-ssh host 'cmd3'
-
-# GOOD: 1 connection, all commands
-ssh host bash <<'EOF'
-cmd1
-cmd2
-cmd3
-EOF
-```
-
---
-
-### [!] Podman Container Metadata Disappears (2024-12-24)
-
-**Symptoms:**
- `podman ps -a` shows empty even though process is running
- `podman logs ppf` returns "no such container"
- Port is listening and service responds to health checks
-
-**Observed Behavior:**
-```
-# Container starts
-podman run -d --name ppf ...
-# Returns container ID: dc55f0a218b7...
-
-# Immediately after
-podman ps -a         # Empty!
-ss -tlnp | grep 8081 # Shows python2 listening
-curl localhost:8081/health  # {"status": "ok"}
-```
-
-**Analysis:**
- The process runs correctly inside the container namespace
- Container metadata in podman's database is lost/corrupted
- May be related to `--rm` flag interaction with detached mode
- Rootless podman with overlayfs can have state sync issues
-
-**Workaround:**
-Service works despite missing metadata. Monitor via:
- `ss -tlnp | grep 8081` - port listening
- `ps aux | grep proxywatchd` - process running
- `curl localhost:8081/health` - service responding
-
-**Impact:** Low. Service functions correctly. Only `podman logs` unavailable.
-
---
-
-### Container Debugging Checklist
-
-When container fails to start or crashes:
-
-```
-┌───┬─────────────────────────────────────────────────────────────────────────┐
-│ 1 │ Check for orphans: ps aux | grep -E "[p]rocess_name"
-│ 2 │ Check port conflicts: ss -tlnp | grep PORT
-│ 3 │ Run foreground: podman run --rm (no -d) to see output
-│ 4 │ Check podman state: podman ps -a
-│ 5 │ Clean stale: pkill -9 -f "pattern" && podman rm -f -a
-│ 6 │ Verify deps: config files, data dirs, volumes exist
-│ 7 │ Check logs: podman logs container_name 2>&1 | tail -50
-│ 8 │ Health check: curl -sf http://localhost:PORT/health
-└───┴─────────────────────────────────────────────────────────────────────────┘
-
-Note: If podman ps shows empty but port is listening and health check passes,
-the service is running correctly despite metadata issues. See "Podman Container
-Metadata Disappears" section above.
-```
+`podman ps -a` shows empty even though process is running.
+Monitor via `ss -tlnp`, `ps aux`, or `curl localhost:8081/health`.
--- a/comboparse.py
+++ b/comboparse.py
@@ -3,9 +3,9 @@
 """Combined config file and argument parser."""

 try:
-    from ConfigParser import SafeConfigParser, NoOptionError
+    from ConfigParser import SafeConfigParser as ConfigParser, NoOptionError
 except ImportError:
-    from configparser import SafeConfigParser, NoOptionError
+    from configparser import ConfigParser, NoOptionError
 from argparse import ArgumentParser
 import sys

@@ -23,7 +23,7 @@ class ComboParser(object):

    def __init__(self, ini):
        self.items = []
-        self.cparser = SafeConfigParser()
+        self.cparser = ConfigParser()
        self.aparser = ArgumentParser()
        self.ini = ini
        self.loaded = False
--- a/compose.master.yml
+++ b/compose.master.yml
@@ -0,0 +1,32 @@
+# PPF master node (odin)
+#
+# Scrapes proxy sources, runs verification, serves API/dashboard.
+# No routine proxy testing -- workers handle that.
+#
+# Prerequisites:
+#   - config.ini  (not tracked, host-specific)
+#   - data/       (created automatically)
+#
+# Usage:
+#   podman-compose -f compose.master.yml up -d
+#   podman-compose -f compose.master.yml logs -f
+#   podman-compose -f compose.master.yml down
+
+services:
+  ppf:
+    container_name: ppf
+    image: localhost/ppf:latest
+    build: .
+    network_mode: host
+    restart: unless-stopped
+    logging:
+      driver: k8s-file
+    stop_signal: SIGTERM
+    stop_grace_period: 30s
+    environment:
+      PYTHONUNBUFFERED: "1"
+    volumes:
+      - .:/app:ro,Z
+      - ./data:/app/data:Z
+      - ./config.ini:/app/config.ini:ro,Z
+    command: python -u ppf.py
--- a/compose.test.yml
+++ b/compose.test.yml
@@ -0,0 +1,18 @@
+# PPF test runner (Python 2.7, production deps + pytest)
+#
+# Mounts source and tests as volumes so no rebuild needed between runs.
+#
+# Usage:
+#   podman-compose -f compose.test.yml run --rm test
+#   podman-compose -f compose.test.yml run --rm test python -m pytest tests/test_fetch.py -v
+
+services:
+  test:
+    container_name: ppf-test
+    build:
+      context: .
+      dockerfile: Dockerfile.test
+    volumes:
+      - .:/app:ro,Z
+    working_dir: /app
+    command: python -m pytest tests/ -v --tb=short
--- a/compose.worker.yml
+++ b/compose.worker.yml
@@ -0,0 +1,38 @@
+# PPF worker node (cassius, edge, sentinel, ...)
+#
+# Tests proxies and reports results to master via WireGuard.
+# Each worker uses only local Tor (127.0.0.1:9050).
+#
+# Prerequisites:
+#   - config.ini    (not tracked, host-specific)
+#   - servers.txt   (deploy from repo)
+#   - src/          (deploy *.py from repo root into src/)
+#   - data/         (created automatically)
+#
+# Usage:
+#   PPF_MASTER_URL=http://10.200.1.250:8081 podman-compose -f compose.worker.yml up -d
+#   podman-compose -f compose.worker.yml logs -f
+#   podman-compose -f compose.worker.yml down
+#
+# The master URL defaults to http://10.200.1.250:8081 (odin via WireGuard).
+# Override with PPF_MASTER_URL env var or edit .env file.
+
+services:
+  ppf-worker:
+    container_name: ppf-worker
+    image: localhost/ppf-worker:latest
+    build: .
+    network_mode: host
+    restart: unless-stopped
+    stop_signal: SIGTERM
+    stop_grace_period: 30s
+    logging:
+      driver: k8s-file
+    environment:
+      PYTHONUNBUFFERED: "1"
+    volumes:
+      - ./src:/app:ro,Z
+      - ./data:/app/data:Z
+      - ./config.ini:/app/config.ini:ro,Z
+      - ./servers.txt:/app/servers.txt:ro,Z
+    command: python -u ppf.py --worker --server ${PPF_MASTER_URL:-http://10.200.1.250:8081}
--- a/config.py
+++ b/config.py
@@ -11,7 +11,12 @@ class Config(ComboParser):
        with open(self.watchd.source_file, 'r') as handle:
            self.servers = [x.strip() for x in handle.readlines() if len(x.strip()) > 0]
        # Parse checktypes as comma-separated list
-        self.watchd.checktypes = [t.strip() for t in self.watchd.checktype.split(',') if t.strip()]
+        # Normalize: 'false'/'off'/'disabled' -> 'none' (SSL-only mode)
+        raw_types = [t.strip().lower() for t in self.watchd.checktype.split(',') if t.strip()]
+        self.watchd.checktypes = ['none' if t in ('false', 'off', 'disabled') else t for t in raw_types]
+        # SSL-only mode: force ssl_first when secondary check is disabled
+        if self.watchd.checktypes == ['none']:
+            self.watchd.ssl_first = True
        # Apply log level from CLI flags
        if self.args.quiet:
            set_log_level('warn')
@@ -40,10 +45,10 @@ class Config(ComboParser):
        # Validate thread counts (0 allowed for watchd to disable local testing)
        if self.watchd.threads < 0:
            errors.append('watchd.threads must be >= 0')
-        if self.ppf.threads < 1:
-            errors.append('ppf.threads must be >= 1')
-        if self.scraper.threads < 1:
-            errors.append('scraper.threads must be >= 1')
+        if self.ppf.threads < 0:
+            errors.append('ppf.threads must be >= 0')
+        if self.scraper.enabled and self.scraper.threads < 1:
+            errors.append('scraper.threads must be >= 1 when scraper is enabled')

        # Validate max_fail
        if self.watchd.max_fail < 1:
@@ -52,12 +57,15 @@ class Config(ComboParser):
            errors.append('ppf.max_fail must be >= 1')

        # Validate checktypes (secondary check types, ssl is handled by ssl_first)
-        valid_checktypes = {'irc', 'head', 'judges'}
+        # 'none' = SSL-only mode (no secondary check)
+        valid_checktypes = {'irc', 'head', 'judges', 'none'}
        for ct in self.watchd.checktypes:
            if ct not in valid_checktypes:
                errors.append('watchd.checktype "%s" invalid, must be one of: %s' % (ct, ', '.join(sorted(valid_checktypes))))
        if not self.watchd.checktypes:
            errors.append('watchd.checktype must specify at least one valid type')
+        if 'none' in self.watchd.checktypes and len(self.watchd.checktypes) > 1:
+            errors.append('watchd.checktype "none" cannot be combined with other types')

        # Validate engine names
        valid_engines = {'duckduckgo', 'startpage', 'brave', 'ecosia',
@@ -112,9 +120,10 @@ class Config(ComboParser):
        self.add_item(section, 'stale_days', int, 30, 'days after which dead proxies are removed (default: 30)', False)
        self.add_item(section, 'stats_interval', int, 300, 'seconds between status reports (default: 300)', False)
        self.add_item(section, 'tor_safeguard', bool, True, 'enable tor safeguard (default: True)', False)
-        self.add_item(section, 'checktype', str, 'head', 'secondary check type: irc, head, judges (used when ssl_first fails)', False)
+        self.add_item(section, 'checktype', str, 'head', 'secondary check type: head, irc, judges, none/false (none = SSL-only)', False)
        self.add_item(section, 'ssl_first', bool, True, 'try SSL handshake first, fallback to checktype on failure (default: True)', False)
        self.add_item(section, 'ssl_only', bool, False, 'when ssl_first enabled, skip secondary check on SSL failure (default: False)', False)
+        self.add_item(section, 'fingerprint', bool, True, 'probe proxy protocol before testing (default: True)', False)
        self.add_item(section, 'scale_cooldown', int, 10, 'seconds between thread scaling decisions (default: 10)', False)
        self.add_item(section, 'scale_threshold', float, 10.0, 'min success rate % to scale up threads (default: 10.0)', False)

@@ -158,17 +167,18 @@ class Config(ComboParser):
        self.add_item(section, 'spot_check_pct', float, 1.0, 'percent of working proxies to spot-check (default: 1.0)', False)

        section = 'worker'
-        self.add_item(section, 'batch_size', int, 100, 'proxies per work batch (default: 100)', False)
        self.add_item(section, 'heartbeat', int, 60, 'heartbeat interval in seconds (default: 60)', False)
-        self.add_item(section, 'claim_timeout', int, 300, 'seconds before unclaimed work is released (default: 300)', False)
+        self.add_item(section, 'url_batch_size', int, 5, 'URLs per claim cycle (default: 5)', False)
+        self.add_item(section, 'fetch_timeout', int, 30, 'timeout for URL fetching (default: 30)', False)
+        self.add_item(section, 'cache_ttl', int, 300, 'local proxy test cache TTL in seconds, 0 to disable (default: 300)', False)

        self.aparser.add_argument("--file", help="import a single file containing proxy addrs", type=str, default='', required=False)
        self.aparser.add_argument("--nobs", help="disable BeautifulSoup, use stdlib HTMLParser", action='store_true', default=False)
        self.aparser.add_argument("-q", "--quiet", help="suppress info messages, show warnings and errors only", action='store_true', default=False)
        self.aparser.add_argument("-v", "--verbose", help="show debug messages", action='store_true', default=False)
        self.aparser.add_argument("--profile", help="enable cProfile profiling, output to profile.stats", action='store_true', default=False)
-        self.aparser.add_argument("--worker", help="run as worker node", action='store_true', default=False)
        self.aparser.add_argument("--server", help="master server URL (e.g., https://master:8081)", type=str, default='')
        self.aparser.add_argument("--worker-key", help="worker authentication key", type=str, default='')
        self.aparser.add_argument("--register", help="register as worker with master server", action='store_true', default=False)
        self.aparser.add_argument("--worker-name", help="worker name for registration (default: hostname)", type=str, default='')
+        self.aparser.add_argument("--worker", help="run as worker node", action='store_true', default=False)
--- a/dbs.py
+++ b/dbs.py
@@ -66,6 +66,93 @@ def _migrate_confidence_column(sqlite):
        sqlite.commit()


+def _migrate_source_proto(sqlite):
+    """Add source_proto columns to preserve scraper-detected protocol intelligence."""
+    try:
+        sqlite.execute('SELECT source_proto FROM proxylist LIMIT 1')
+    except Exception:
+        # source_proto: protocol detected by scraper (never overwritten by tests)
+        sqlite.execute('ALTER TABLE proxylist ADD COLUMN source_proto TEXT')
+        # source_confidence: scraper confidence score (0-100)
+        sqlite.execute('ALTER TABLE proxylist ADD COLUMN source_confidence INT DEFAULT 0')
+        sqlite.commit()
+
+
+def _migrate_protos_working(sqlite):
+    """Add protos_working column for multi-protocol storage."""
+    try:
+        sqlite.execute('SELECT protos_working FROM proxylist LIMIT 1')
+    except Exception:
+        # protos_working: comma-separated list of working protos (e.g. "http,socks5")
+        sqlite.execute('ALTER TABLE proxylist ADD COLUMN protos_working TEXT')
+        sqlite.commit()
+
+
+def _migrate_last_seen(sqlite):
+    """Add last_seen column for worker-reported proxy freshness tracking."""
+    try:
+        sqlite.execute('SELECT last_seen FROM proxylist LIMIT 1')
+    except Exception:
+        # last_seen: unix timestamp of most recent "working" report from any worker
+        sqlite.execute('ALTER TABLE proxylist ADD COLUMN last_seen INT DEFAULT 0')
+        sqlite.commit()
+
+
+def _migrate_last_check_columns(sqlite):
+    """Add last_check and last_target columns for test provenance tracking."""
+    for col, typedef in (('last_check', 'TEXT'), ('last_target', 'TEXT')):
+        try:
+            sqlite.execute('SELECT %s FROM proxylist LIMIT 1' % col)
+        except Exception:
+            sqlite.execute('ALTER TABLE proxylist ADD COLUMN %s %s' % (col, typedef))
+            sqlite.commit()
+
+
+def _migrate_uri_check_interval(sqlite):
+    """Add adaptive check_interval column to uris table."""
+    try:
+        sqlite.execute('SELECT check_interval FROM uris LIMIT 1')
+    except Exception:
+        sqlite.execute('ALTER TABLE uris ADD COLUMN check_interval INT DEFAULT 3600')
+        sqlite.commit()
+
+
+def _migrate_uri_working_ratio(sqlite):
+    """Add working_ratio column to uris table for proxy quality tracking."""
+    try:
+        sqlite.execute('SELECT working_ratio FROM uris LIMIT 1')
+    except Exception:
+        sqlite.execute('ALTER TABLE uris ADD COLUMN working_ratio REAL DEFAULT 0.0')
+        sqlite.commit()
+
+
+def _migrate_uri_avg_fetch_time(sqlite):
+    """Add avg_fetch_time column to uris table for fetch latency EMA."""
+    try:
+        sqlite.execute('SELECT avg_fetch_time FROM uris LIMIT 1')
+    except Exception:
+        sqlite.execute('ALTER TABLE uris ADD COLUMN avg_fetch_time INT DEFAULT 0')
+        sqlite.commit()
+
+
+def _migrate_uri_last_worker(sqlite):
+    """Add last_worker column to uris table."""
+    try:
+        sqlite.execute('SELECT last_worker FROM uris LIMIT 1')
+    except Exception:
+        sqlite.execute('ALTER TABLE uris ADD COLUMN last_worker TEXT')
+        sqlite.commit()
+
+
+def _migrate_uri_yield_rate(sqlite):
+    """Add yield_rate column to uris table for proxy yield EMA."""
+    try:
+        sqlite.execute('SELECT yield_rate FROM uris LIMIT 1')
+    except Exception:
+        sqlite.execute('ALTER TABLE uris ADD COLUMN yield_rate REAL DEFAULT 0.0')
+        sqlite.commit()
+
+
 def compute_proxy_list_hash(proxies):
    """Compute MD5 hash of sorted proxy list for change detection.

@@ -290,13 +377,23 @@ def create_table_if_not_exists(sqlite, dbname):
            asn INT,
            latitude REAL,
            longitude REAL,
-            confidence INT DEFAULT 30)""")
+            confidence INT DEFAULT 30,
+            source_proto TEXT,
+            source_confidence INT DEFAULT 0,
+            protos_working TEXT,
+            last_seen INT DEFAULT 0,
+            last_check TEXT,
+            last_target TEXT)""")
        # Migration: add columns to existing databases (must run before creating indexes)
        _migrate_latency_columns(sqlite)
        _migrate_anonymity_columns(sqlite)
        _migrate_asn_column(sqlite)
        _migrate_geolocation_columns(sqlite)
        _migrate_confidence_column(sqlite)
+        _migrate_source_proto(sqlite)
+        _migrate_protos_working(sqlite)
+        _migrate_last_seen(sqlite)
+        _migrate_last_check_columns(sqlite)
        # Indexes for common query patterns
        sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_failed ON proxylist(failed)')
        sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_tested ON proxylist(tested)')
@@ -317,6 +414,11 @@ def create_table_if_not_exists(sqlite, dbname):
            content_hash TEXT)""")
        # Migration for existing databases
        _migrate_content_hash_column(sqlite)
+        _migrate_uri_check_interval(sqlite)
+        _migrate_uri_working_ratio(sqlite)
+        _migrate_uri_avg_fetch_time(sqlite)
+        _migrate_uri_last_worker(sqlite)
+        _migrate_uri_yield_rate(sqlite)
        # Indexes for common query patterns
        sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_error ON uris(error)')
        sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_checktime ON uris(check_time)')
@@ -444,11 +546,11 @@ def insert_proxies(proxydb, proxies, url):
            filtered += 1
            continue

-        rows.append((timestamp, addr, ip, port, proto, 1, 0, 0, 0, 0, 0, confidence))
+        rows.append((timestamp, addr, ip, port, proto, 1, 0, 0, 0, 0, 0, confidence, proto, confidence))
    proxydb.executemany(
        'INSERT OR IGNORE INTO proxylist '
-        '(added,proxy,ip,port,proto,failed,tested,success_count,total_duration,mitm,consecutive_success,confidence) '
-        'VALUES (?,?,?,?,?,?,?,?,?,?,?,?)',
+        '(added,proxy,ip,port,proto,failed,tested,success_count,total_duration,mitm,consecutive_success,confidence,source_proto,source_confidence) '
+        'VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)',
        rows
    )
    proxydb.commit()
@@ -480,41 +582,195 @@ def insert_urls(urls, search, sqlite):

 # Known proxy list sources (GitHub raw lists, APIs)
 PROXY_SOURCES = [
+    # --- GitHub raw lists (sorted by update frequency) ---
+
    # TheSpeedX/PROXY-List - large, hourly updates
    'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt',
    'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt',
    'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt',
-    # clarketm/proxy-list - curated, daily
-    'https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt',
    # monosans/proxy-list - hourly updates
    'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/http.txt',
    'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt',
    'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt',
+    # prxchk/proxy-list - 10 min updates
+    'https://raw.githubusercontent.com/prxchk/proxy-list/main/http.txt',
+    'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks4.txt',
+    'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks5.txt',
    # jetkai/proxy-list - 10 min updates
    'https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies.txt',
-    # roosterkid/openproxylist
-    'https://raw.githubusercontent.com/roosterkid/openproxylist/main/HTTPS_RAW.txt',
-    'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt',
-    'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5_RAW.txt',
-    # ShiftyTR/Proxy-List
-    'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/http.txt',
-    'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt',
-    'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt',
+    # hookzof/socks5_list - hourly, SOCKS5 focused
+    'https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt',
    # mmpx12/proxy-list
    'https://raw.githubusercontent.com/mmpx12/proxy-list/master/http.txt',
    'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks4.txt',
    'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks5.txt',
-    # proxyscrape API
+    # ShiftyTR/Proxy-List
+    'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/http.txt',
+    'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt',
+    'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt',
+    # roosterkid/openproxylist
+    'https://raw.githubusercontent.com/roosterkid/openproxylist/main/HTTPS_RAW.txt',
+    'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt',
+    'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5_RAW.txt',
+    # clarketm/proxy-list - curated, daily
+    'https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt',
+    # officialputuid/KangProxy - 4-6 hour updates
+    'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/http/http.txt',
+    'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/https/https.txt',
+    'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks4/socks4.txt',
+    'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks5.txt',
+    # iplocate/free-proxy-list - 30 min updates
+    'https://raw.githubusercontent.com/iplocate/free-proxy-list/main/protocols/http.txt',
+    'https://raw.githubusercontent.com/iplocate/free-proxy-list/main/protocols/socks4.txt',
+    'https://raw.githubusercontent.com/iplocate/free-proxy-list/main/protocols/socks5.txt',
+    # ErcinDedeworken/proxy-list - hourly
+    'https://raw.githubusercontent.com/ErcinDedeworken/proxy-list/main/proxy-list/data.txt',
+    # MuRongPIG/Proxy-Master - 10 min updates
+    'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/http.txt',
+    'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks4.txt',
+    'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks5.txt',
+    # zloi-user/hideip.me - hourly
+    'https://raw.githubusercontent.com/zloi-user/hideip.me/main/http.txt',
+    'https://raw.githubusercontent.com/zloi-user/hideip.me/main/socks4.txt',
+    'https://raw.githubusercontent.com/zloi-user/hideip.me/main/socks5.txt',
+    # FLAVIEN-music/proxy-list - 30 min updates
+    'https://raw.githubusercontent.com/FLAVIEN-music/proxy-list/main/proxies/http.txt',
+    'https://raw.githubusercontent.com/FLAVIEN-music/proxy-list/main/proxies/socks4.txt',
+    'https://raw.githubusercontent.com/FLAVIEN-music/proxy-list/main/proxies/socks5.txt',
+    # Zaeem20/FREE_PROXIES_LIST - 30 min updates
+    'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/http.txt',
+    'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/https.txt',
+    'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks4.txt',
+    'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks5.txt',
+    # r00tee/Proxy-List - hourly
+    'https://raw.githubusercontent.com/r00tee/Proxy-List/main/Https.txt',
+    'https://raw.githubusercontent.com/r00tee/Proxy-List/main/Socks4.txt',
+    'https://raw.githubusercontent.com/r00tee/Proxy-List/main/Socks5.txt',
+    # casals-ar/proxy-list
+    'https://raw.githubusercontent.com/casals-ar/proxy-list/main/http',
+    'https://raw.githubusercontent.com/casals-ar/proxy-list/main/socks4',
+    'https://raw.githubusercontent.com/casals-ar/proxy-list/main/socks5',
+    # yemixzy/proxy-list
+    'https://raw.githubusercontent.com/yemixzy/proxy-list/main/proxies/http.txt',
+    'https://raw.githubusercontent.com/yemixzy/proxy-list/main/proxies/socks4.txt',
+    'https://raw.githubusercontent.com/yemixzy/proxy-list/main/proxies/socks5.txt',
+    # opsxcq/proxy-list
+    'https://raw.githubusercontent.com/opsxcq/proxy-list/master/list.txt',
+    # im-razvan/proxy_list - 10 min updates
+    'https://raw.githubusercontent.com/im-razvan/proxy_list/main/http.txt',
+    'https://raw.githubusercontent.com/im-razvan/proxy_list/main/socks4.txt',
+    'https://raw.githubusercontent.com/im-razvan/proxy_list/main/socks5.txt',
+    # zevtyardt/proxy-list - daily SOCKS5
+    'https://raw.githubusercontent.com/zevtyardt/proxy-list/main/socks5.txt',
+    # UptimerBot/proxy-list - 15 min updates
+    'https://raw.githubusercontent.com/UptimerBot/proxy-list/main/proxies/socks5.txt',
+    # Anonym0usWork1221/Free-Proxies
+    'https://raw.githubusercontent.com/Anonym0usWork1221/Free-Proxies/main/proxy_files/https_proxies.txt',
+    'https://raw.githubusercontent.com/Anonym0usWork1221/Free-Proxies/main/proxy_files/socks4_proxies.txt',
+    'https://raw.githubusercontent.com/Anonym0usWork1221/Free-Proxies/main/proxy_files/socks5_proxies.txt',
+    # ErcinDedeoglu/proxies - hourly
+    'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/http.txt',
+    'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/socks4.txt',
+    'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/socks5.txt',
+    # dinoz0rg/proxy-list - daily, all protocols
+    'https://raw.githubusercontent.com/dinoz0rg/proxy-list/main/all.txt',
+    # elliottophellia/proxylist - SOCKS5
+    'https://raw.githubusercontent.com/elliottophellia/proxylist/master/results/socks5/global/socks5_len.txt',
+    # gfpcom/free-proxy-list - SOCKS5
+    'https://raw.githubusercontent.com/gfpcom/free-proxy-list/main/socks5.txt',
+    # databay-labs/free-proxy-list - SOCKS5
+    'https://raw.githubusercontent.com/databay-labs/free-proxy-list/master/socks5.txt',
+
+    # --- GitHub Pages / CDN hosted ---
+
+    # proxifly/free-proxy-list - 5 min updates (jsDelivr CDN)
+    'https://cdn.jsdelivr.net/gh/proxifly/free-proxy-list@main/proxies/protocols/http/data.txt',
+    'https://cdn.jsdelivr.net/gh/proxifly/free-proxy-list@main/proxies/protocols/socks4/data.txt',
+    'https://cdn.jsdelivr.net/gh/proxifly/free-proxy-list@main/proxies/protocols/socks5/data.txt',
+    # vakhov/fresh-proxy-list - 5-20 min updates (GitHub Pages)
+    'https://vakhov.github.io/fresh-proxy-list/http.txt',
+    'https://vakhov.github.io/fresh-proxy-list/socks4.txt',
+    'https://vakhov.github.io/fresh-proxy-list/socks5.txt',
+    # sunny9577/proxy-scraper - 3 hour updates (GitHub Pages)
+    'https://sunny9577.github.io/proxy-scraper/generated/http_proxies.txt',
+    'https://sunny9577.github.io/proxy-scraper/generated/socks4_proxies.txt',
+    'https://sunny9577.github.io/proxy-scraper/generated/socks5_proxies.txt',
+
+    # --- API endpoints ---
+
+    # proxyscrape
    'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=http&timeout=10000&country=all',
    'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4&timeout=10000&country=all',
    'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5&timeout=10000&country=all',
+
+    # proxy-list.download - SOCKS5 API
+    'https://www.proxy-list.download/api/v1/get?type=socks5',
+    'https://www.proxy-list.download/api/v1/get?type=socks4',
+    # openproxylist.xyz - plain text
+    'https://api.openproxylist.xyz/http.txt',
+    'https://api.openproxylist.xyz/socks4.txt',
+    'https://api.openproxylist.xyz/socks5.txt',
+    # spys.me - plain text, 30 min updates
+    'http://spys.me/proxy.txt',
+    'http://spys.me/socks.txt',
+
+    # --- Web scrapers (HTML pages) ---
+
+    # spys.one - mixed protocols, requires parsing
+    'https://spys.one/en/free-proxy-list/',
+    'https://spys.one/en/socks-proxy-list/',
+    'https://spys.one/en/https-ssl-proxy/',
+    # free-proxy-list.net
+    'https://free-proxy-list.net/',
+    'https://www.sslproxies.org/',
+    'https://www.socks-proxy.net/',
+    # sockslist.us - SOCKS5 focused
+    'https://sockslist.us/',
+    # mtpro.xyz - SOCKS5, updated every 5 min
+    'https://mtpro.xyz/socks5',
+    # proxy-tools.com - SOCKS5 filtered
+    'https://proxy-tools.com/proxy/socks5',
+    # hidemy.name - all protocols, paginated
+    'https://hide.mn/en/proxy-list/',
+    # advanced.name - SOCKS5 filtered
+    'https://advanced.name/freeproxy?type=socks5',
+    # proxynova.com - by country
+    'https://www.proxynova.com/proxy-server-list/',
+    # freeproxy.world - SOCKS5 filtered
+    'https://www.freeproxy.world/?type=socks5',
+    # proxydb.net - all protocols
+    'http://proxydb.net/',
+    # geonode
+    'https://proxylist.geonode.com/api/proxy-list?limit=500&page=1&sort_by=lastChecked&sort_type=desc&protocols=http',
+    'https://proxylist.geonode.com/api/proxy-list?limit=500&page=1&sort_by=lastChecked&sort_type=desc&protocols=socks4',
+    'https://proxylist.geonode.com/api/proxy-list?limit=500&page=1&sort_by=lastChecked&sort_type=desc&protocols=socks5',
+    # openproxy.space
+    'https://openproxy.space/list/http',
+    'https://openproxy.space/list/socks4',
+    'https://openproxy.space/list/socks5',
+
+    # --- Telegram channels (public HTML view) ---
+
+    'https://t.me/s/spys_one',
+    'https://t.me/s/proxyfree1',
+    'https://t.me/s/proxylist4free',
+    'https://t.me/s/proxy_lists',
+    'https://t.me/s/Proxies4ForYou',
 ]


-def seed_proxy_sources(sqlite):
-    """Seed known proxy list sources into uris table."""
+def seed_proxy_sources(sqlite, reset_errors=False):
+    """Seed known proxy list sources into uris table.
+
+    Args:
+        sqlite: Database connection
+        reset_errors: If True, reset error/stale counts on existing seed
+                      sources that have errored out, allowing them to be
+                      retried. Safe to call periodically.
+    """
    timestamp = int(time.time())
    added = 0
+    reset = 0
    for url in PROXY_SOURCES:
        try:
            sqlite.execute(
@@ -525,11 +781,21 @@ def seed_proxy_sources(sqlite):
            )
            if sqlite.cursor.rowcount > 0:
                added += 1
+            elif reset_errors:
+                # Reset errored-out seed sources so they get reclaimed
+                sqlite.execute(
+                    'UPDATE uris SET error = 0, stale_count = 0, '
+                    'check_interval = 3600, check_time = 0 '
+                    'WHERE url = ? AND error >= 5',
+                    (url,)
+                )
+                if sqlite.cursor.rowcount > 0:
+                    reset += 1
        except Exception as e:
            _log('seed_urls insert error for %s: %s' % (url, e), 'warn')
    sqlite.commit()
-    if added > 0:
-        _log('seeded %d proxy source URLs' % added, 'info')
+    if added > 0 or reset > 0:
+        _log('seed sources: %d new, %d reset' % (added, reset), 'info')


 def save_session_state(sqlite, stats):
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,11 +0,0 @@
-version: '3.8'
-
-services:
-  ppf:
-    build: .
-    volumes:
-      - .:/app
-    working_dir: /app
-    command: python ppf.py
-    environment:
-      - PYTHONUNBUFFERED=1
--- a/documentation/design-worker-driven-discovery.md
+++ b/documentation/design-worker-driven-discovery.md
@@ -0,0 +1,572 @@
+# Design: Worker-Driven Discovery
+
+## Status
+
+**Proposal** -- Not yet implemented.
+
+## Problem
+
+The current architecture centralizes all proxy list fetching on the master
+node (odin). Workers only test proxies handed to them. This creates several
+issues:
+
+1. **Single point of fetch** -- If odin can't reach a source (blocked IP,
+   transient failure), that source is dead for everyone.
+2. **Bandwidth concentration** -- Odin fetches 40 proxy lists every cycle,
+   extracts proxies, deduplicates, and stores them before workers ever see
+   them.
+3. **Wasted vantage points** -- Workers sit behind different Tor exits and
+   IPs, but never use that diversity for fetching.
+4. **Tight coupling** -- Workers can't operate at all without the master's
+   claim queue. If odin restarts, all workers stall.
+
+## Proposed Architecture
+
+Move proxy list fetching to workers. Master becomes a coordinator and
+aggregator rather than a fetcher.
+
+```
+Current:                             Proposed:
+
+Master                               Master
+  Fetch URLs --------+                  Manage URL database
+  Extract proxies    |                  Score URLs from feedback
+  Store proxylist    |                  Aggregate working proxies
+  Serve /api/work ---+-> Workers        Serve /api/claim-urls ----> Workers
+                     <- /api/results                              |
+                                        <- /api/report-urls ------+
+                                        <- /api/report-proxies ---+
+```
+
+### Role Changes
+
+```
+--------+---------------------------+----------------------------------+
+| Host   | Current Role              | New Role                         |
+--------+---------------------------+----------------------------------+
+| odin   | Fetch URLs                | Maintain URL database            |
+|        | Extract proxies           | Score URLs from worker feedback  |
+|        | Store proxylist           | Aggregate reported proxies       |
+|        | Distribute proxy batches  | Distribute URL batches           |
+|        | Collect test results      | Collect URL + proxy reports      |
+--------+---------------------------+----------------------------------+
+| worker | Claim proxy batch         | Claim URL batch                  |
+|        | Test each proxy           | Fetch URL, extract proxies       |
+|        | Report pass/fail          | Test extracted proxies           |
+|        |                           | Report URL health + proxy results|
+--------+---------------------------+----------------------------------+
+```
+
+## Data Flow
+
+### Phase 1: URL Claiming
+
+Worker requests a batch of URLs to process.
+
+```
+Worker                          Master
+  |                               |
+  |  GET /api/claim-urls          |
+  |  ?key=...&count=5             |
+  |------------------------------>|
+  |                               |  Select due URLs from uris table
+  |                               |  Mark as claimed (in-memory)
+  |  [{url, last_hash, proto_hint}, ...]
+  |<------------------------------|
+```
+
+**Claim response:**
+```json
+{
+  "worker_id": "abc123",
+  "urls": [
+    {
+      "url": "https://raw.githubusercontent.com/.../http.txt",
+      "last_hash": "a1b2c3d4...",
+      "proto_hint": "http",
+      "priority": 1
+    }
+  ]
+}
+```
+
+Fields:
+- `last_hash` -- MD5 of last extracted proxy list. Worker can skip
+  extraction and report "unchanged" if hash matches, saving CPU.
+- `proto_hint` -- Protocol inferred from URL path. Worker uses this for
+  extraction confidence scoring.
+- `priority` -- Higher = fetch sooner. Based on URL score.
+
+### Phase 2: Fetch and Extract
+
+Worker fetches each URL through Tor, extracts proxies using the existing
+`fetch.extract_proxies()` pipeline.
+
+```
+Worker
+  |
+  |  For each claimed URL:
+  |    1. Fetch through Tor (fetch_contents)
+  |    2. Compute content hash (MD5)
+  |    3. If hash == last_hash: skip extraction, report unchanged
+  |    4. Else: extract_proxies() -> list of (addr, proto, confidence)
+  |    5. Queue extracted proxies for testing
+  |
+```
+
+### Phase 3: URL Feedback
+
+Worker reports fetch results for each URL back to master.
+
+```
+Worker                          Master
+  |                               |
+  |  POST /api/report-urls        |
+  |  {reports: [...]}             |
+  |------------------------------>|
+  |                               |  Update uris table:
+  |                               |    check_time, error, stale_count,
+  |                               |    retrievals, proxies_added,
+  |                               |    content_hash, worker_scores
+  |  {ok: true}                   |
+  |<------------------------------|
+```
+
+**URL report payload:**
+```json
+{
+  "reports": [
+    {
+      "url": "https://...",
+      "success": true,
+      "content_hash": "a1b2c3d4...",
+      "proxy_count": 1523,
+      "fetch_time_ms": 2340,
+      "changed": true,
+      "error": null
+    },
+    {
+      "url": "https://...",
+      "success": false,
+      "content_hash": null,
+      "proxy_count": 0,
+      "fetch_time_ms": 0,
+      "changed": false,
+      "error": "timeout"
+    }
+  ]
+}
+```
+
+### Phase 4: Proxy Testing and Reporting
+
+Worker tests extracted proxies locally using the existing `TargetTestJob`
+pipeline. **Only working proxies are reported to master.** Failed proxies
+are discarded silently -- no point wasting bandwidth on negatives.
+
+Workers are trusted. If a worker says a proxy works, master accepts it.
+
+```
+Worker                          Master
+  |                               |
+  |  Test proxies locally         |
+  |  (same TargetTestJob flow)    |
+  |  Discard failures             |
+  |                               |
+  |  POST /api/report-proxies     |
+  |  {proxies: [...]}             |  (working only)
+  |------------------------------>|
+  |                               |  Upsert into proxylist:
+  |                               |    INSERT OR REPLACE
+  |                               |    Set failed=0, update last_seen
+  |  {ok: true}                   |
+  |<------------------------------|
+```
+
+**Proxy report payload (working only):**
+```json
+{
+  "proxies": [
+    {
+      "ip": "1.2.3.4",
+      "port": 8080,
+      "proto": "socks5",
+      "source_proto": "socks5",
+      "latency": 1.234,
+      "exit_ip": "5.6.7.8",
+      "anonymity": "elite",
+      "source_url": "https://..."
+    }
+  ]
+}
+```
+
+No `working` field needed -- everything in the report is working by
+definition. The `source_url` links proxy provenance to the URL that
+yielded it, enabling URL quality scoring.
+
+### Complete Cycle
+
+```
+Worker main loop:
+  1. GET  /api/claim-urls          Claim batch of URLs
+  2. For each URL:
+     a. Fetch through Tor
+     b. Extract proxies (or skip if unchanged)
+     c. Test extracted proxies
+  3. POST /api/report-urls         Report URL health
+  4. POST /api/report-proxies      Report proxy results
+  5. POST /api/heartbeat           Health check
+  6. Sleep, repeat
+```
+
+## Master-Side Changes
+
+### URL Scheduling
+
+Current `Leechered` threads fetch URLs on a timer based on error/stale
+count. Replace with a scoring system that workers consume.
+
+**URL score** (higher = fetch sooner):
+
+```
+score = base_score
+      + freshness_bonus          # High-frequency sources score higher
+      - error_penalty            # Consecutive errors reduce score
+      - stale_penalty            # Unchanged content reduces score
+      + yield_bonus              # URLs that produce many proxies score higher
+      + quality_bonus            # URLs whose proxies actually work score higher
+```
+
+Concrete formula:
+
+```python
+def url_score(url_row):
+    age = now - url_row.check_time
+    base = age / url_row.check_interval       # 1.0 when due
+
+    # Yield: proxies found per fetch (rolling average)
+    yield_rate = url_row.proxies_added / max(url_row.retrievals, 1)
+    yield_bonus = min(yield_rate / 100.0, 1.0)  # Cap at 1.0
+
+    # Quality: what % of extracted proxies actually worked
+    quality_bonus = url_row.working_ratio * 0.5  # 0.0 to 0.5
+
+    # Penalties
+    error_penalty = min(url_row.error * 0.3, 2.0)
+    stale_penalty = min(url_row.stale_count * 0.1, 1.0)
+
+    return base + yield_bonus + quality_bonus - error_penalty - stale_penalty
+```
+
+URLs with `score >= 1.0` are due for fetching. Claimed URLs are locked
+in memory for `claim_timeout` seconds (existing pattern).
+
+### New uris Columns
+
+```sql
+ALTER TABLE uris ADD COLUMN check_interval INT DEFAULT 3600;
+ALTER TABLE uris ADD COLUMN working_ratio REAL DEFAULT 0.0;
+ALTER TABLE uris ADD COLUMN avg_fetch_time INT DEFAULT 0;
+ALTER TABLE uris ADD COLUMN last_worker TEXT;
+ALTER TABLE uris ADD COLUMN yield_rate REAL DEFAULT 0.0;
+```
+
+- `check_interval` -- Adaptive: decreases for high-yield URLs, increases
+  for stale/erroring ones. Replaces the `checktime + error * perfail`
+  formula with a persisted value.
+- `working_ratio` -- EMA of (working_proxies / total_proxies) from worker
+  feedback. URLs that yield dead proxies get deprioritized.
+- `avg_fetch_time` -- EMA of fetch duration in ms. Helps identify slow
+  sources.
+- `last_worker` -- Which worker last fetched this URL. Useful for
+  debugging, and to distribute URLs across workers evenly.
+- `yield_rate` -- EMA of proxies extracted per fetch.
+
+### Proxy Aggregation
+
+Trust model: **workers are trusted.** If any worker reports a proxy as
+working, master accepts it. Failed proxies are never reported -- workers
+discard them locally.
+
+```
+Worker A reports: 1.2.3.4:8080 working, latency 1.2s
+Worker B reports: 1.2.3.4:8080 working, latency 1.5s
+
+Master action:
+  - INSERT OR REPLACE with latest report
+  - Update last_seen, latency EMA
+  - Set failed = 0
+```
+
+No consensus, no voting, no trust scoring. A proxy lives as long as at
+least one worker keeps confirming it. It dies when nobody reports it for
+`proxy_ttl` seconds.
+
+New `proxylist` column:
+
+```sql
+ALTER TABLE proxylist ADD COLUMN last_seen INT DEFAULT 0;
+```
+
+- `last_seen` -- Unix timestamp of most recent "working" report. Proxies
+  not seen in N hours are expired by the master's periodic cleanup.
+
+### Proxy Expiry
+
+Working proxies that haven't been reported by any worker within
+`proxy_ttl` (default: 4 hours) are marked stale and re-queued for
+testing. After `proxy_ttl * 3` with no reports, they're marked failed.
+
+```python
+def expire_stale_proxies(db, proxy_ttl):
+    cutoff_stale = now - proxy_ttl
+    cutoff_dead = now - (proxy_ttl * 3)
+
+    # Mark stale proxies for retesting
+    db.execute('''
+        UPDATE proxylist SET failed = 1
+        WHERE failed = 0 AND last_seen < ? AND last_seen > 0
+    ''', (cutoff_stale,))
+
+    # Kill proxies not seen in a long time
+    db.execute('''
+        UPDATE proxylist SET failed = -1
+        WHERE failed > 0 AND last_seen < ? AND last_seen > 0
+    ''', (cutoff_dead,))
+```
+
+## Worker-Side Changes
+
+### New Worker Loop
+
+Replace the current claim-test-report loop with a two-phase loop:
+
+```python
+def worker_main_v2(config):
+    register()
+    verify_tor()
+
+    while True:
+        # Phase 1: Fetch URLs and extract proxies
+        urls = claim_urls(server, key, count=5)
+        url_reports = []
+        proxy_batch = []
+
+        for url_info in urls:
+            report, proxies = fetch_and_extract(url_info)
+            url_reports.append(report)
+            proxy_batch.extend(proxies)
+
+        report_urls(server, key, url_reports)
+
+        # Phase 2: Test extracted proxies, report working only
+        if proxy_batch:
+            working = test_proxies(proxy_batch)
+            if working:
+                report_proxies(server, key, working)
+
+        heartbeat(server, key)
+        sleep(1)
+```
+
+### fetch_and_extract()
+
+New function that combines fetching + extraction on the worker side:
+
+```python
+def fetch_and_extract(url_info):
+    url = url_info['url']
+    last_hash = url_info.get('last_hash')
+    proto_hint = url_info.get('proto_hint')
+
+    start = time.time()
+    try:
+        content = fetch_contents(url, head=False, proxy=tor_proxy)
+    except Exception as e:
+        return {'url': url, 'success': False, 'error': str(e)}, []
+
+    elapsed = int((time.time() - start) * 1000)
+    content_hash = hashlib.md5(content).hexdigest()
+
+    if content_hash == last_hash:
+        return {
+            'url': url, 'success': True, 'content_hash': content_hash,
+            'proxy_count': 0, 'fetch_time_ms': elapsed,
+            'changed': False, 'error': None
+        }, []
+
+    proxies = extract_proxies(content, url)
+    return {
+        'url': url, 'success': True, 'content_hash': content_hash,
+        'proxy_count': len(proxies), 'fetch_time_ms': elapsed,
+        'changed': True, 'error': None
+    }, proxies
+```
+
+### Deduplication
+
+Workers may extract the same proxies from different URLs. Local
+deduplication before testing:
+
+```python
+seen = set()
+unique = []
+for addr, proto, confidence in proxy_batch:
+    if addr not in seen:
+        seen.add(addr)
+        unique.append((addr, proto, confidence))
+proxy_batch = unique
+```
+
+### Proxy Testing
+
+Reuse the existing `TargetTestJob` / `WorkerThread` pipeline. The only
+change: proxies come from local extraction instead of master's claim
+response. The test loop, result collection, and evaluation logic remain
+identical.
+
+## API Changes Summary
+
+### New Endpoints
+
+| Endpoint | Method | Purpose |
+|----------|--------|---------|
+| `/api/claim-urls` | GET | Worker claims batch of due URLs |
+| `/api/report-urls` | POST | Worker reports URL fetch results |
+| `/api/report-proxies` | POST | Worker reports proxy test results |
+
+### Modified Endpoints
+
+| Endpoint | Change |
+|----------|--------|
+| `/api/work` | Deprecated but kept for backward compatibility |
+| `/api/results` | Deprecated but kept for backward compatibility |
+
+### Unchanged Endpoints
+
+| Endpoint | Reason |
+|----------|--------|
+| `/api/register` | Same registration flow |
+| `/api/heartbeat` | Same health reporting |
+| `/dashboard` | Still reads from same DB |
+| `/proxies` | Still reads from proxylist |
+
+## Schema Changes
+
+### uris table additions
+
+```sql
+ALTER TABLE uris ADD COLUMN check_interval INT DEFAULT 3600;
+ALTER TABLE uris ADD COLUMN working_ratio REAL DEFAULT 0.0;
+ALTER TABLE uris ADD COLUMN avg_fetch_time INT DEFAULT 0;
+ALTER TABLE uris ADD COLUMN last_worker TEXT;
+ALTER TABLE uris ADD COLUMN yield_rate REAL DEFAULT 0.0;
+```
+
+### proxylist table additions
+
+```sql
+ALTER TABLE proxylist ADD COLUMN last_seen INT DEFAULT 0;
+```
+
+## Migration Strategy
+
+### Phase 1: Add New Endpoints (non-breaking)
+
+Add `/api/claim-urls`, `/api/report-urls`, `/api/report-proxies` to
+`httpd.py`. Keep all existing endpoints working. Master still runs its
+own `Leechered` threads.
+
+Files: `httpd.py`, `dbs.py` (migrations)
+
+### Phase 2: Worker V2 Mode
+
+Add `--worker-v2` flag to `ppf.py`. When set, worker uses the new
+URL-claiming loop instead of the proxy-claiming loop. Both modes coexist.
+
+Old workers (`--worker`) continue working against `/api/work` and
+`/api/results`. New workers (`--worker-v2`) use the new endpoints.
+
+Files: `ppf.py`, `config.py`
+
+### Phase 3: URL Scoring
+
+Implement URL scoring in master based on worker feedback. Replace
+`Leechered` timer-based scheduling with score-based scheduling. Master's
+own fetching becomes a fallback for URLs no worker has claimed recently.
+
+Files: `httpd.py`, `dbs.py`
+
+### Phase 4: Remove Legacy
+
+Once all workers run V2, remove `/api/work`, `/api/results`, and
+master-side `Leechered` threads. Master no longer fetches proxy lists
+directly.
+
+Files: `ppf.py`, `httpd.py`
+
+## Configuration
+
+### New config.ini Options
+
+```ini
+[worker]
+# V2 mode: worker fetches URLs instead of proxy batches
+mode = v2                    # v1 (legacy) or v2 (url-driven)
+url_batch_size = 5           # URLs per claim cycle
+max_proxies_per_cycle = 500  # Cap on proxies tested per cycle
+fetch_timeout = 30           # Timeout for URL fetching (seconds)
+
+[ppf]
+# URL scoring weights
+score_yield_weight = 1.0
+score_quality_weight = 0.5
+score_error_penalty = 0.3
+score_stale_penalty = 0.1
+
+# Proxy expiry
+proxy_ttl = 14400            # Seconds before unseen proxy goes stale (4h)
+proxy_ttl_dead = 43200       # Seconds before unseen proxy is killed (12h)
+
+# Fallback: master fetches URLs not claimed by any worker
+fallback_fetch = true
+fallback_interval = 7200     # Seconds before master fetches unclaimed URL
+```
+
+## Risks and Mitigations
+
+| Risk | Impact | Mitigation |
+|------|--------|------------|
+| Workers extract different proxy counts from same URL | Inconsistent proxy_count in reports | Use content_hash for dedup; only update yield_rate when hash changes |
+| Tor exit blocks a source for one worker | Worker reports error for a working URL | Require 2+ consecutive errors before incrementing URL error count |
+| Workers test same proxies redundantly | Wasted CPU | Master tracks which URLs are assigned to which workers; avoid assigning same URL to multiple workers in same cycle |
+| Large proxy lists overwhelm worker memory | OOM on worker | Cap `max_proxies_per_cycle`; worker discards excess after dedup |
+| Master restart loses claim state | Workers refetch recently-fetched URLs | Harmless -- just a redundant fetch. content_hash prevents duplicate work |
+| `fetch.py` imports unavailable on worker image | ImportError | Verify worker Dockerfile includes fetch.py and dependencies |
+
+## What Stays the Same
+
+- `rocksock.py` -- No changes to proxy chain logic
+- `connection_pool.py` -- Tor host selection unchanged
+- `proxywatchd.py` core -- `TargetTestJob`, `WorkerThread`, `ProxyTestState`
+  remain identical. Only the job source changes.
+- `fetch.py` -- Used on workers now, but the code itself doesn't change
+- `httpd.py` dashboard/proxies -- Still reads from same `proxylist` table
+- SQLite as storage -- No database engine change
+
+## Open Questions
+
+1. **Should workers share extracted proxy lists with each other?** Peer
+   exchange would reduce redundant fetching but adds protocol complexity.
+   Recommendation: no, keep it simple. Master deduplicates via
+   `INSERT OR REPLACE`.
+
+2. **Should URL claiming be weighted by worker geography?** Some sources
+   may be accessible from certain Tor exits but not others.
+   Recommendation: defer. Let natural retries handle this; track
+   per-worker URL success rates for future optimization.
+
+3. **What's the right `proxy_ttl`?** Too short and we churn proxies
+   needlessly. Too long and we serve stale data. Start with 4 hours,
+   tune based on observed proxy lifetime distribution.
--- a/fetch.py
+++ b/fetch.py
@@ -56,6 +56,8 @@ class FetchSession(object):
    def fetch(self, url, head=False):
        """Fetch URL, reusing connection if possible."""
        network_stats.set_category('scraper')
+        if isinstance(url, unicode):
+            url = url.encode('utf-8')
        host, port, ssl, uri = _parse_url(url)

        # Check if we can reuse existing connection
@@ -219,6 +221,10 @@ def extract_auth_proxies(content):
    """
    proxies = []

+    # Short-circuit: auth proxies always contain @
+    if '@' not in content:
+        return proxies
+
    # IPv4 auth proxies
    for match in AUTH_PROXY_PATTERN.finditer(content):
        proto_str, user, passwd, ip, port = match.groups()
@@ -254,6 +260,12 @@ TABLE_PORT_HEADERS = ('port',)
 TABLE_PROTO_HEADERS = ('type', 'protocol', 'proto', 'scheme')


+_TABLE_PATTERN = re.compile(r'<table[^>]*>(.*?)</table>', re.IGNORECASE | re.DOTALL)
+_ROW_PATTERN = re.compile(r'<tr[^>]*>(.*?)</tr>', re.IGNORECASE | re.DOTALL)
+_CELL_PATTERN = re.compile(r'<t[hd][^>]*>(.*?)</t[hd]>', re.IGNORECASE | re.DOTALL)
+_TAG_STRIP = re.compile(r'<[^>]+>')
+
+
 def extract_proxies_from_table(content):
    """Extract proxies from HTML tables with IP/Port/Protocol columns.

@@ -267,26 +279,23 @@ def extract_proxies_from_table(content):
    """
    proxies = []

-    # Simple regex-based table parsing (works without BeautifulSoup)
-    # Find all tables
-    table_pattern = re.compile(r'<table[^>]*>(.*?)</table>', re.IGNORECASE | re.DOTALL)
-    row_pattern = re.compile(r'<tr[^>]*>(.*?)</tr>', re.IGNORECASE | re.DOTALL)
-    cell_pattern = re.compile(r'<t[hd][^>]*>(.*?)</t[hd]>', re.IGNORECASE | re.DOTALL)
-    tag_strip = re.compile(r'<[^>]+>')
+    # Short-circuit: no HTML tables in plain text content
+    if '<table' not in content and '<TABLE' not in content:
+        return proxies

-    for table_match in table_pattern.finditer(content):
+    for table_match in _TABLE_PATTERN.finditer(content):
        table_html = table_match.group(1)
-        rows = row_pattern.findall(table_html)
+        rows = _ROW_PATTERN.findall(table_html)
        if not rows:
            continue

        # Parse header row to find column indices
        ip_col = port_col = proto_col = -1
        header_row = rows[0]
-        headers = cell_pattern.findall(header_row)
+        headers = _CELL_PATTERN.findall(header_row)

        for i, cell in enumerate(headers):
-            cell_text = tag_strip.sub('', cell).strip().lower()
+            cell_text = _TAG_STRIP.sub('', cell).strip().lower()
            if ip_col < 0 and any(h in cell_text for h in TABLE_IP_HEADERS):
                ip_col = i
            elif port_col < 0 and any(h in cell_text for h in TABLE_PORT_HEADERS):
@@ -300,11 +309,11 @@ def extract_proxies_from_table(content):

        # Parse data rows
        for row in rows[1:]:
-            cells = cell_pattern.findall(row)
+            cells = _CELL_PATTERN.findall(row)
            if len(cells) <= ip_col:
                continue

-            ip_cell = tag_strip.sub('', cells[ip_col]).strip()
+            ip_cell = _TAG_STRIP.sub('', cells[ip_col]).strip()

            # Check if IP cell contains port (ip:port format)
            if ':' in ip_cell and port_col < 0:
@@ -313,7 +322,7 @@ def extract_proxies_from_table(content):
                    ip, port = match.groups()
                    proto = None
                    if proto_col >= 0 and len(cells) > proto_col:
-                        proto = _normalize_proto(tag_strip.sub('', cells[proto_col]).strip())
+                        proto = _normalize_proto(_TAG_STRIP.sub('', cells[proto_col]).strip())
                    addr = '%s:%s' % (ip, port)
                    if is_usable_proxy(addr):
                        proxies.append((addr, proto))
@@ -321,7 +330,7 @@ def extract_proxies_from_table(content):

            # Separate IP and Port columns
            if port_col >= 0 and len(cells) > port_col:
-                port_cell = tag_strip.sub('', cells[port_col]).strip()
+                port_cell = _TAG_STRIP.sub('', cells[port_col]).strip()
                try:
                    port = int(port_cell)
                except ValueError:
@@ -333,7 +342,7 @@ def extract_proxies_from_table(content):

                proto = None
                if proto_col >= 0 and len(cells) > proto_col:
-                    proto = _normalize_proto(tag_strip.sub('', cells[proto_col]).strip())
+                    proto = _normalize_proto(_TAG_STRIP.sub('', cells[proto_col]).strip())

                addr = '%s:%d' % (ip_cell, port)
                if is_usable_proxy(addr):
@@ -356,6 +365,10 @@ def extract_proxies_from_json(content):
    """
    proxies = []

+    # Short-circuit: content must contain JSON delimiters
+    if '{' not in content and '[' not in content:
+        return proxies
+
    # Try to find JSON in content (may be embedded in HTML)
    json_matches = []

@@ -489,6 +502,8 @@ def fetch_contents(url, head=False, proxy=None):
 retry_messages = ('Engines cannot retrieve results', 'Rate limit exceeded')
 def _fetch_contents(url, head = False, proxy=None):
    network_stats.set_category('scraper')
+    if isinstance(url, unicode):
+        url = url.encode('utf-8')
    host, port, ssl, uri = _parse_url(url)
    headers=[
        'Accept-Language: en-US,en;q=0.8',
--- a/httpd.py
+++ b/httpd.py
--- a/ppf.py
+++ b/ppf.py
@@ -5,10 +5,10 @@ __version__ = '2.0.0'
 import sys
 import os

-# Worker mode requires gevent - must monkey-patch before other imports
-if '--worker' in sys.argv or '--register' in sys.argv:
-    from gevent import monkey
-    monkey.patch_all()
+# Gevent monkey-patch MUST happen before any other imports
+# Both master (httpd) and worker modes use gevent for async I/O
+from gevent import monkey
+monkey.patch_all()

 import cProfile
 import pstats
@@ -303,48 +303,6 @@ class NeedReregister(Exception):
    pass


-def worker_get_work(server_url, worker_key, count=100):
-    """Fetch batch of proxies from master."""
-    url = '%s/api/work?key=%s&count=%d' % (server_url.rstrip('/'), worker_key, count)
-
-    try:
-        resp = urllib2.urlopen(url, timeout=30)
-        result = json.loads(resp.read())
-        return result.get('proxies', [])
-    except urllib2.HTTPError as e:
-        if e.code == 403:
-            _log('worker key rejected (403), need to re-register', 'warn')
-            raise NeedReregister()
-        _log('failed to get work: %s' % e, 'error')
-        return []
-    except Exception as e:
-        _log('failed to get work: %s' % e, 'error')
-        return []
-
-
-def worker_submit_results(server_url, worker_key, results):
-    """Submit test results to master."""
-    url = '%s/api/results?key=%s' % (server_url.rstrip('/'), worker_key)
-    data = json.dumps({'results': results})
-
-    req = urllib2.Request(url, data)
-    req.add_header('Content-Type', 'application/json')
-
-    try:
-        resp = urllib2.urlopen(req, timeout=30)
-        result = json.loads(resp.read())
-        return result.get('processed', 0)
-    except urllib2.HTTPError as e:
-        if e.code == 403:
-            _log('worker key rejected (403), need to re-register', 'warn')
-            raise NeedReregister()
-        _log('failed to submit results: %s' % e, 'error')
-        return 0
-    except Exception as e:
-        _log('failed to submit results: %s' % e, 'error')
-        return 0
-
-
 def worker_send_heartbeat(server_url, worker_key, tor_ok, tor_ip=None, profiling=False, threads=0):
    """Send heartbeat with Tor status to master."""
    url = '%s/api/heartbeat?key=%s' % (server_url.rstrip('/'), worker_key)
@@ -369,6 +327,71 @@ def worker_send_heartbeat(server_url, worker_key, tor_ok, tor_ip=None, profiling
        return False


+def worker_claim_urls(server_url, worker_key, count=5):
+    """Claim batch of URLs for worker mode."""
+    url = '%s/api/claim-urls?key=%s&count=%d' % (server_url.rstrip('/'), worker_key, count)
+
+    try:
+        resp = urllib2.urlopen(url, timeout=30)
+        result = json.loads(resp.read())
+        return result.get('urls', [])
+    except urllib2.HTTPError as e:
+        if e.code == 403:
+            _log('worker key rejected (403), need to re-register', 'warn')
+            raise NeedReregister()
+        _log('failed to claim urls: %s' % e, 'error')
+        return []
+    except Exception as e:
+        _log('failed to claim urls: %s' % e, 'error')
+        return []
+
+
+def worker_report_urls(server_url, worker_key, reports):
+    """Report URL fetch results to master."""
+    url = '%s/api/report-urls?key=%s' % (server_url.rstrip('/'), worker_key)
+    data = json.dumps({'reports': reports})
+
+    req = urllib2.Request(url, data)
+    req.add_header('Content-Type', 'application/json')
+
+    try:
+        resp = urllib2.urlopen(req, timeout=30)
+        result = json.loads(resp.read())
+        return result.get('processed', 0)
+    except urllib2.HTTPError as e:
+        if e.code == 403:
+            _log('worker key rejected (403), need to re-register', 'warn')
+            raise NeedReregister()
+        _log('failed to report urls: %s' % e, 'error')
+        return 0
+    except Exception as e:
+        _log('failed to report urls: %s' % e, 'error')
+        return 0
+
+
+def worker_report_proxies(server_url, worker_key, proxies):
+    """Report working proxies to master."""
+    url = '%s/api/report-proxies?key=%s' % (server_url.rstrip('/'), worker_key)
+    data = json.dumps({'proxies': proxies})
+
+    req = urllib2.Request(url, data)
+    req.add_header('Content-Type', 'application/json')
+
+    try:
+        resp = urllib2.urlopen(req, timeout=30)
+        result = json.loads(resp.read())
+        return result.get('processed', 0)
+    except urllib2.HTTPError as e:
+        if e.code == 403:
+            _log('worker key rejected (403), need to re-register', 'warn')
+            raise NeedReregister()
+        _log('failed to report proxies: %s' % e, 'error')
+        return 0
+    except Exception as e:
+        _log('failed to report proxies: %s' % e, 'error')
+        return 0
+
+
 def check_tor_connectivity(tor_hosts):
    """Test Tor connectivity. Returns (working_hosts, tor_ip)."""
    import socket
@@ -406,7 +429,11 @@ def check_tor_connectivity(tor_hosts):


 def worker_main(config):
-    """Worker mode main loop - uses proxywatchd multi-threaded testing."""
+    """Worker mode -- URL-driven discovery.
+
+    Claims URLs from master, fetches through Tor, extracts and tests proxies,
+    reports working proxies back to master.
+    """
    import json
    global urllib2

@@ -415,7 +442,6 @@ def worker_main(config):
    except ImportError:
        import queue as Queue

-    # Import proxywatchd for multi-threaded testing (gevent already patched at top)
    import proxywatchd
    proxywatchd.set_config(config)

@@ -426,8 +452,9 @@ def worker_main(config):

    worker_key = config.args.worker_key
    worker_name = config.args.worker_name or os.uname()[1]
-    batch_size = config.worker.batch_size
    num_threads = config.watchd.threads
+    url_batch_size = config.worker.url_batch_size
+    worker_id = None

    # Register if --register flag or no key provided
    if config.args.register or not worker_key:
@@ -441,24 +468,22 @@ def worker_main(config):
        _log('save this key with --worker-key for future runs', 'info')

        if config.args.register:
-            # Just register and exit
            return

-    _log('starting worker mode', 'info')
+    _log('starting worker mode (URL-driven)', 'info')
    _log('  server: %s' % server_url, 'info')
    _log('  threads: %d' % num_threads, 'info')
-    _log('  batch size: %d' % batch_size, 'info')
+    _log('  url batch: %d' % url_batch_size, 'info')
+    _log('  cache ttl: %s' % ('%ds' % config.worker.cache_ttl if config.worker.cache_ttl > 0 else 'disabled'), 'info')
    _log('  tor hosts: %s' % config.common.tor_hosts, 'info')

-    # Verify Tor connectivity before claiming work
-    import socket
+    # Verify Tor connectivity before starting
    import socks
    working_tor_hosts = []
    for tor_host in config.torhosts:
        host, port = tor_host.split(':')
        port = int(port)
        try:
-            # Test SOCKS connection
            test_sock = socks.socksocket()
            test_sock.set_proxy(socks.SOCKS5, host, port)
            test_sock.settimeout(10)
@@ -466,13 +491,12 @@ def worker_main(config):
            test_sock.send(b'GET / HTTP/1.0\r\nHost: check.torproject.org\r\n\r\n')
            resp = test_sock.recv(512)
            test_sock.close()
-            # Accept any HTTP response (200, 301, 302, etc.)
            if resp and (b'HTTP/' in resp or len(resp) > 0):
                status = resp.split(b'\r\n')[0] if b'\r\n' in resp else resp[:50]
                _log('tor host %s:%d OK (%s)' % (host, port, status), 'info')
                working_tor_hosts.append(tor_host)
            else:
-                _log('tor host %s:%d no response (recv=%d bytes)' % (host, port, len(resp) if resp else 0), 'warn')
+                _log('tor host %s:%d no response' % (host, port), 'warn')
        except Exception as e:
            _log('tor host %s:%d failed: %s' % (host, port, e), 'warn')

@@ -486,25 +510,33 @@ def worker_main(config):
    job_queue = proxywatchd.PriorityJobQueue()
    completion_queue = Queue.Queue()

-    # Spawn worker threads with stagger to avoid overwhelming Tor
+    # Spawn worker threads
    threads = []
    for i in range(num_threads):
        wt = proxywatchd.WorkerThread('w%d' % i, job_queue)
        wt.start_thread()
        threads.append(wt)
-        time.sleep(random.random() / 10)  # 0-100ms stagger per thread
+        time.sleep(random.random() / 10)

    _log('spawned %d worker threads' % len(threads), 'info')

-    jobs_completed = 0
-    proxies_tested = 0
+    # Session for fetching URLs through Tor
+    session = fetch.FetchSession()
+
+    cycles = 0
+    urls_fetched = 0
+    proxies_found = 0
+    proxies_working = 0
    start_time = time.time()
    current_tor_ip = None
    consecutive_tor_failures = 0
    worker_profiling = config.args.profile or config.common.profiling
-    # Use dict to allow mutation in nested function (Python 2 compatible)
    wstate = {'worker_key': worker_key, 'worker_id': worker_id, 'backoff': 10}

+    # Local proxy test cache: addr -> (timestamp, success, result_dict_or_None)
+    cache_ttl = config.worker.cache_ttl
+    proxy_cache = {} if cache_ttl > 0 else None
+
    def do_register():
        """Register with master, with exponential backoff on failure."""
        while True:
@@ -513,13 +545,13 @@ def worker_main(config):
            if new_key:
                wstate['worker_id'] = new_id
                wstate['worker_key'] = new_key
-                wstate['backoff'] = 10  # Reset backoff on success
+                wstate['backoff'] = 10
                _log('registered as %s (id: %s)' % (worker_name, new_id), 'info')
                return True
            else:
                _log('registration failed, retrying in %ds' % wstate['backoff'], 'warn')
                time.sleep(wstate['backoff'])
-                wstate['backoff'] = min(wstate['backoff'] * 2, 300)  # Max 5 min backoff
+                wstate['backoff'] = min(wstate['backoff'] * 2, 300)

    def wait_for_tor():
        """Wait for Tor to become available, checking every 30 seconds."""
@@ -528,14 +560,12 @@ def worker_main(config):
            working, tor_ip = check_tor_connectivity(config.torhosts)
            if working:
                _log('tor recovered: %s (%s)' % (working[0], tor_ip or 'unknown'), 'info')
-                # Send heartbeat to manager
                try:
                    worker_send_heartbeat(server_url, wstate['worker_key'], True, tor_ip, worker_profiling, num_threads)
                except NeedReregister:
                    do_register()
                return working, tor_ip
            _log('tor still down, retrying in %ds' % check_interval, 'warn')
-            # Send heartbeat with tor_ok=False
            try:
                worker_send_heartbeat(server_url, wstate['worker_key'], False, None, worker_profiling, num_threads)
            except NeedReregister:
@@ -544,17 +574,17 @@ def worker_main(config):

    try:
        while True:
-            # Tor check before claiming work - don't claim if Tor is down
+            # Tor connectivity check
            working, tor_ip = check_tor_connectivity(config.torhosts)
            if not working:
                consecutive_tor_failures += 1
-                _log('tor down before claiming work (consecutive: %d)' % consecutive_tor_failures, 'warn')
+                _log('tor down before claiming URLs (consecutive: %d)' % consecutive_tor_failures, 'warn')
                try:
                    worker_send_heartbeat(server_url, wstate['worker_key'], False, None, worker_profiling, num_threads)
                except NeedReregister:
                    do_register()
                if consecutive_tor_failures >= 2:
-                    _log('tor appears down, waiting before claiming work', 'error')
+                    _log('tor appears down, waiting before claiming URLs', 'error')
                    working, current_tor_ip = wait_for_tor()
                    consecutive_tor_failures = 0
                else:
@@ -566,55 +596,187 @@ def worker_main(config):
                    if current_tor_ip:
                        _log('tor circuit rotated: %s' % tor_ip, 'info')
                    current_tor_ip = tor_ip
-                # Send heartbeat to manager
                try:
                    worker_send_heartbeat(server_url, wstate['worker_key'], True, tor_ip, worker_profiling, num_threads)
                except NeedReregister:
                    do_register()

-            # Get work from master
+            # Claim URLs from master
            try:
-                proxies = worker_get_work(server_url, wstate['worker_key'], batch_size)
+                url_infos = worker_claim_urls(server_url, wstate['worker_key'], url_batch_size)
            except NeedReregister:
                do_register()
                continue

-            if not proxies:
-                _log('no work available, sleeping 30s', 'info')
+            if not url_infos:
+                _log('no URLs available, sleeping 30s', 'info')
                time.sleep(30)
                continue

-            _log('received %d proxies to test' % len(proxies), 'info')
+            _log('claimed %d URLs to process' % len(url_infos), 'info')

-            # Create ProxyTestState and jobs for each proxy
+            # Phase 1: Fetch URLs and extract proxies
+            url_reports = []
+            all_extracted = []  # list of (addr, proto, confidence, source_url)
+
+            for url_info in url_infos:
+                url = url_info.get('url', '')
+                last_hash = url_info.get('last_hash')
+                proto_hint = url_info.get('proto_hint')
+
+                fetch_start = time.time()
+                try:
+                    content = session.fetch(url)
+                except Exception as e:
+                    _log('%s: fetch error: %s' % (url.split('/')[2] if '/' in url else url, e), 'error')
+                    content = None
+
+                fetch_time_ms = int((time.time() - fetch_start) * 1000)
+                urls_fetched += 1
+
+                if not content:
+                    url_reports.append({
+                        'url': url,
+                        'success': False,
+                        'content_hash': None,
+                        'proxy_count': 0,
+                        'fetch_time_ms': fetch_time_ms,
+                        'changed': False,
+                        'error': 'fetch failed',
+                    })
+                    continue
+
+                # Detect protocol from URL path
+                proto = fetch.detect_proto_from_path(url) or proto_hint
+
+                # Extract proxies (no filter_known -- workers have no proxydb)
+                extracted = fetch.extract_proxies(content, filter_known=False, proto=proto)
+
+                # Compute hash of extracted proxy list
+                content_hash = dbs.compute_proxy_list_hash(extracted)
+
+                if content_hash and last_hash and content_hash == last_hash:
+                    # Content unchanged
+                    url_reports.append({
+                        'url': url,
+                        'success': True,
+                        'content_hash': content_hash,
+                        'proxy_count': len(extracted),
+                        'fetch_time_ms': fetch_time_ms,
+                        'changed': False,
+                        'error': None,
+                    })
+                    host = url.split('/')[2] if '/' in url else url
+                    _log('%s: unchanged (%d proxies, hash match)' % (host, len(extracted)), 'stale')
+                    continue
+
+                # Content changed or first fetch
+                for addr, pr, conf in extracted:
+                    all_extracted.append((addr, pr, conf, url))
+
+                url_reports.append({
+                    'url': url,
+                    'success': True,
+                    'content_hash': content_hash,
+                    'proxy_count': len(extracted),
+                    'fetch_time_ms': fetch_time_ms,
+                    'changed': True,
+                    'error': None,
+                })
+
+                host = url.split('/')[2] if '/' in url else url
+                _log('%s: %d proxies extracted' % (host, len(extracted)), 'info')
+
+            # Report URL health to master
+            if url_reports:
+                try:
+                    worker_report_urls(server_url, wstate['worker_key'], url_reports)
+                except NeedReregister:
+                    do_register()
+                    try:
+                        worker_report_urls(server_url, wstate['worker_key'], url_reports)
+                    except NeedReregister:
+                        _log('still rejected after re-register, discarding url reports', 'error')
+
+            # Deduplicate extracted proxies by address
+            seen = set()
+            unique_proxies = []
+            source_map = {}  # addr -> first source_url
+            for addr, pr, conf, source_url in all_extracted:
+                if addr not in seen:
+                    seen.add(addr)
+                    unique_proxies.append((addr, pr, conf))
+                    source_map[addr] = source_url
+
+            proxies_found += len(unique_proxies)
+
+            if not unique_proxies:
+                cycles += 1
+                time.sleep(1)
+                continue
+
+            # Filter against local test cache
+            cached_working = []
+            if proxy_cache is not None:
+                now = time.time()
+                uncached = []
+                cache_hits = 0
+                for addr, pr, conf in unique_proxies:
+                    # Normalize to ip:port for cache lookup (strip auth prefix)
+                    cache_key = addr.split('@')[-1] if '@' in addr else addr
+                    entry = proxy_cache.get(cache_key)
+                    if entry and (now - entry[0]) < cache_ttl:
+                        cache_hits += 1
+                        if entry[1]:  # cached success
+                            cached_working.append(entry[2])
+                    else:
+                        uncached.append((addr, pr, conf))
+                if cache_hits:
+                    _log('%d cached (%d working), %d to test' % (
+                        cache_hits, len(cached_working), len(uncached)), 'info')
+                unique_proxies = uncached
+
+            if not unique_proxies:
+                # All proxies were cached, nothing to test
+                cycles += 1
+                time.sleep(1)
+                continue
+
+            _log('testing %d unique proxies' % len(unique_proxies), 'info')
+
+            # Phase 2: Test extracted proxies using worker thread pool
            pending_states = {}
            all_jobs = []
-
-            # Get checktype(s) from config
            checktypes = config.watchd.checktypes

-            for proxy_info in proxies:
-                ip = proxy_info['ip']
-                port = proxy_info['port']
-                proto = proxy_info.get('proto', 'http')
-                failed = proxy_info.get('failed', 0)
+            for addr, pr, conf in unique_proxies:
+                # Parse ip:port from addr (may contain auth: user:pass@ip:port)
+                addr_part = addr.split('@')[-1] if '@' in addr else addr
+
+                # Handle IPv6 [ipv6]:port
+                if addr_part.startswith('['):
+                    bracket_end = addr_part.index(']')
+                    ip = addr_part[1:bracket_end]
+                    port = int(addr_part[bracket_end+2:])
+                else:
+                    ip, port_str = addr_part.rsplit(':', 1)
+                    port = int(port_str)
+
+                proto = pr or 'http'
                proxy_str = '%s:%d' % (ip, port)

-                # Create state for this proxy
                state = proxywatchd.ProxyTestState(
-                    ip, port, proto, failed,
+                    ip, port, proto, 0,
                    success_count=0, total_duration=0.0,
                    country=None, mitm=0, consecutive_success=0,
                    asn=None, oldies=False,
                    completion_queue=completion_queue,
-                    proxy_full=proxy_str
+                    proxy_full=addr, source_proto=pr
                )
                pending_states[proxy_str] = state

-                # Select random checktype
                checktype = random.choice(checktypes)

-                # Get target for this checktype
                if checktype == 'judges':
                    available = proxywatchd.judge_stats.get_available_judges(
                        list(proxywatchd.judges.keys()))
@@ -630,80 +792,142 @@ def worker_main(config):
                job = proxywatchd.TargetTestJob(state, target, checktype)
                all_jobs.append(job)

-            # Shuffle and queue jobs
            random.shuffle(all_jobs)
            for job in all_jobs:
                job_queue.put(job, priority=0)

-            # Wait for all jobs to complete
+            # Wait for completion
            completed = 0
-            results = []
            timeout_start = time.time()
-            timeout_seconds = config.watchd.timeout * 2 + 30  # generous timeout
+            timeout_seconds = max(config.watchd.timeout * 2 + 30, len(all_jobs) * 0.5)
+            working_results = []
+            last_heartbeat = time.time()
+            last_report = time.time()

-            while completed < len(proxies):
+            while completed < len(all_jobs):
                try:
                    state = completion_queue.get(timeout=1)
                    completed += 1

-                    # Build result from state (failcount == 0 means success)
-                    is_working = state.failcount == 0
-                    latency_sec = (state.last_latency_ms / 1000.0) if state.last_latency_ms else 0
-                    result = {
-                        'ip': state.ip,
-                        'port': state.port,
-                        'proto': state.proto,
-                        'working': is_working,
-                        'latency': round(latency_sec, 3) if is_working else 0,
-                        'error': None if is_working else 'failed',
-                    }
-                    results.append(result)
+                    success, _ = state.evaluate()
+                    if success:
+                        latency_sec = (state.last_latency_ms / 1000.0) if state.last_latency_ms else 0
+                        proxy_addr = state.proxy
+                        if state.auth:
+                            proxy_addr = '%s@%s' % (state.auth, state.proxy)

-                    # Progress logging
-                    if completed % 20 == 0 or completed == len(proxies):
-                        working = sum(1 for r in results if r.get('working'))
+                        working_results.append({
+                            'ip': state.ip,
+                            'port': state.port,
+                            'proto': state.proto,
+                            'source_proto': state.source_proto,
+                            'latency': round(latency_sec, 3),
+                            'exit_ip': state.exit_ip,
+                            'source_url': source_map.get(proxy_addr) or source_map.get(state.proxy, ''),
+                            'checktype': state.last_check or '',
+                            'target': state.last_target or '',
+                        })
+
+                    if completed % 50 == 0 or completed == len(all_jobs):
                        _log('tested %d/%d proxies (%d working)' % (
-                            completed, len(proxies), working), 'info')
+                            completed, len(all_jobs), len(working_results)), 'info')

                except Queue.Empty:
                    if time.time() - timeout_start > timeout_seconds:
-                        _log('batch timeout, %d/%d completed' % (completed, len(proxies)), 'warn')
+                        _log('test timeout, %d/%d completed' % (completed, len(all_jobs)), 'warn')
                        break
+
+                    # Periodic heartbeat to prevent stale detection
+                    now = time.time()
+                    if now - last_heartbeat >= 60:
+                        try:
+                            worker_send_heartbeat(server_url, wstate['worker_key'],
+                                                  True, current_tor_ip, worker_profiling, num_threads)
+                        except NeedReregister:
+                            do_register()
+                        last_heartbeat = now
+
+                    # Periodic proxy report (flush working results every 5 minutes)
+                    if working_results and now - last_report >= 300:
+                        reported = False
+                        try:
+                            processed = worker_report_proxies(server_url, wstate['worker_key'],
+                                                              working_results)
+                            if processed > 0:
+                                _log('interim report: %d proxies (%d submitted)' % (
+                                    len(working_results), processed), 'info')
+                                reported = True
+                        except NeedReregister:
+                            do_register()
+                            try:
+                                processed = worker_report_proxies(server_url, wstate['worker_key'],
+                                                                  working_results)
+                                if processed > 0:
+                                    reported = True
+                            except NeedReregister:
+                                pass
+                        if reported:
+                            working_results = []
+                        last_report = now
+
                    continue

-            # Submit results
-            try:
-                processed = worker_submit_results(server_url, wstate['worker_key'], results)
-            except NeedReregister:
-                do_register()
-                # Retry submission with new key
+            # Populate proxy test cache from results
+            if proxy_cache is not None:
+                now = time.time()
+                working_addrs = set()
+                for r in working_results:
+                    addr = '%s:%d' % (r['ip'], r['port'])
+                    proxy_cache[addr] = (now, True, r)
+                    working_addrs.add(addr)
+                # Cache failures for tested proxies that didn't succeed
+                for proxy_str in pending_states:
+                    if proxy_str not in working_addrs:
+                        proxy_cache[proxy_str] = (now, False, None)
+
+            proxies_working += len(working_results)
+
+            # Report working proxies to master
+            if working_results:
                try:
-                    processed = worker_submit_results(server_url, wstate['worker_key'], results)
+                    processed = worker_report_proxies(server_url, wstate['worker_key'], working_results)
                except NeedReregister:
-                    _log('still rejected after re-register, discarding batch', 'error')
-                    processed = 0
+                    do_register()
+                    try:
+                        processed = worker_report_proxies(server_url, wstate['worker_key'], working_results)
+                    except NeedReregister:
+                        _log('still rejected after re-register, discarding proxy reports', 'error')
+                        processed = 0
+                _log('reported %d working proxies (submitted %d)' % (len(working_results), processed), 'info')

-            jobs_completed += 1
-            proxies_tested += len(results)
+            cycles += 1

-            working = sum(1 for r in results if r.get('working'))
-            _log('batch %d: %d/%d working, submitted %d' % (
-                jobs_completed, working, len(results), processed), 'info')
+            # Periodic cache cleanup: evict expired entries every 10 cycles
+            if proxy_cache is not None and cycles % 10 == 0:
+                now = time.time()
+                expired = [k for k, v in proxy_cache.items() if (now - v[0]) >= cache_ttl]
+                if expired:
+                    for k in expired:
+                        del proxy_cache[k]
+                    _log('cache cleanup: evicted %d expired, %d remaining' % (len(expired), len(proxy_cache)), 'info')

-            # Brief pause between batches
            time.sleep(1)

    except KeyboardInterrupt:
        elapsed = time.time() - start_time
        _log('worker stopping...', 'info')
-        # Stop threads
+        session.close()
        for wt in threads:
            wt.stop()
        for wt in threads:
            wt.term()
        _log('worker stopped after %s' % format_duration(int(elapsed)), 'info')
-        _log('  jobs completed: %d' % jobs_completed, 'info')
-        _log('  proxies tested: %d' % proxies_tested, 'info')
+        _log('  cycles: %d' % cycles, 'info')
+        _log('  urls fetched: %d' % urls_fetched, 'info')
+        _log('  proxies found: %d' % proxies_found, 'info')
+        _log('  proxies working: %d' % proxies_working, 'info')
+        if proxy_cache is not None:
+            _log('  cache entries: %d' % len(proxy_cache), 'info')


 def main():
@@ -718,7 +942,7 @@ def main():
        else:
            sys.exit(1)

-    # Worker mode: connect to master server instead of running locally
+    # Worker mode: URL-driven discovery
    if config.args.worker or config.args.register:
        worker_main(config)
        return
@@ -747,8 +971,14 @@ def main():
        watcherd = None
        # Start httpd independently when watchd is disabled
        if config.httpd.enabled:
-            from httpd import ProxyAPIServer
+            from httpd import ProxyAPIServer, configure_url_scoring
            import network_stats
+            configure_url_scoring(
+                config.ppf.checktime,
+                config.ppf.perfail_checktime,
+                config.ppf.max_fail,
+                config.ppf.list_max_age_days
+            )

            def httpd_stats_provider():
                """Stats provider for httpd-only mode (scraping without testing)."""
@@ -780,6 +1010,7 @@ def main():
                config.watchd.database,
                stats_provider=httpd_stats_provider,
                profiling=profiling,
+                url_database=config.ppf.database,
            )
            httpd_server.start()

@@ -814,8 +1045,20 @@ def main():
    statusmsg = time.time()
    list_max_age_seconds = config.ppf.list_max_age_days * 86400
    last_skip_log = 0
+    last_reseed = time.time()
+    reseed_interval = 6 * 3600  # re-seed sources every 6 hours
    while True:
        try:
+            # Periodic re-seeding: reset errored-out seed sources
+            if time.time() - last_reseed >= reseed_interval:
+                dbs.seed_proxy_sources(urldb, reset_errors=True)
+                last_reseed = time.time()
+
+            # When ppf threads = 0, skip URL fetching (workers handle it via /api/claim-urls)
+            if config.ppf.threads == 0:
+                time.sleep(60)
+                continue
+
            time.sleep(random.random()/10)
            if (time.time() - statusmsg) > 180:
                _log('running %d thread(s) over %d' % (len(threads), config.ppf.threads), 'ppf')
@@ -838,9 +1081,6 @@ def main():
                else:
                    _log('handing %d job(s) to %d thread(s)' % ( len(rows), config.ppf.threads ), 'ppf')

-            _proxylist = [ '%s://%s' % (p[0], p[1]) for p in proxydb.execute("SELECT proto,proxy from proxylist where failed=0 AND tested IS NOT NULL AND proto IN ('http','socks4','socks5')").fetchall() ]
-            if not _proxylist: _proxylist = None
-
            for thread in threads:
                if thread.status == 'ok':
                    url, proxylist, stale_count, error, retrievals, content_type, proxies_added, execute = thread.retrieve()
@@ -857,6 +1097,9 @@ def main():

            threads = [ thread for thread in threads if thread.is_alive() ]
            if len(threads) < config.ppf.threads and rows:
+                # Only query proxydb when actually starting a new thread (reduces GIL blocking)
+                _proxylist = [ '%s://%s' % (p[0], p[1]) for p in proxydb.execute("SELECT proto,proxy from proxylist where failed=0 AND tested IS NOT NULL AND proto IN ('http','socks4','socks5')").fetchall() ]
+                if not _proxylist: _proxylist = None
                p = random.sample(_proxylist, min(5, len(_proxylist))) if _proxylist else None
                row = random.choice(rows)
                urldb.execute('UPDATE uris SET check_time=? where url=?', (time.time(), row[0]))
--- a/proxywatchd.py
+++ b/proxywatchd.py
@@ -44,7 +44,7 @@ import dns
 from misc import _log, categorize_error, tor_proxy_url, is_ssl_protocol_error
 import rocksock
 import connection_pool
-from stats import JudgeStats, Stats, regexes, ssl_targets, try_div
+from stats import TargetStats, JudgeStats, Stats, regexes, ssl_targets, try_div
 from mitm import MITMCertStats, extract_cert_info, get_mitm_certificate
 from dns import socks4_resolve
 from job import PriorityJobQueue, calculate_priority
@@ -142,6 +142,20 @@ def is_valid_ip(ip_str):
    except (ValueError, AttributeError):
        return False

+def is_public_ip(ip_str):
+    """Validate IP is a public, globally routable address."""
+    if not is_valid_ip(ip_str):
+        return False
+    parts = [int(p) for p in ip_str.split('.')]
+    if parts[0] == 0:    return False  # 0.0.0.0/8
+    if parts[0] == 10:   return False  # 10.0.0.0/8
+    if parts[0] == 127:  return False  # 127.0.0.0/8
+    if parts[0] == 169 and parts[1] == 254: return False  # link-local
+    if parts[0] == 172 and 16 <= parts[1] <= 31: return False  # 172.16/12
+    if parts[0] == 192 and parts[1] == 168: return False  # 192.168/16
+    if parts[0] >= 224: return False  # multicast + reserved
+    return True
+
 # Pattern for header echo - if X-Forwarded-For or Via present, proxy reveals chain
 HEADER_REVEAL_PATTERN = r'(X-Forwarded-For|Via|X-Real-Ip|Forwarded)'

@@ -150,9 +164,9 @@ DEAD_PROXY = -1
 # Error categories that indicate proxy is definitely dead (not temporary failure)
 FATAL_ERROR_CATEGORIES = ('refused', 'unreachable', 'auth')

-# Patterns indicating judge is blocking the proxy (not a proxy failure)
-# These should NOT count as proxy failures - retry with different judge
-JUDGE_BLOCK_PATTERNS = [
+# Patterns indicating HTTP target is blocking the proxy (not a proxy failure)
+# These should NOT count as proxy failures - applies to judges and head targets
+HTTP_BLOCK_PATTERNS = [
    r'HTTP/1\.[01] 403',           # Forbidden
    r'HTTP/1\.[01] 429',           # Too Many Requests
    r'HTTP/1\.[01] 503',           # Service Unavailable
@@ -165,7 +179,7 @@ JUDGE_BLOCK_PATTERNS = [
    r'blocked',                     # Explicit block
    r'Checking your browser',       # Cloudflare JS challenge
 ]
-JUDGE_BLOCK_RE = re.compile('|'.join(JUDGE_BLOCK_PATTERNS), re.IGNORECASE)
+HTTP_BLOCK_RE = re.compile('|'.join(HTTP_BLOCK_PATTERNS), re.IGNORECASE)

 # Check types: irc, http (header match), judges (body match), ssl (TLS handshake)
 # Judge services - return IP in body (plain text, JSON, or HTML)
@@ -199,6 +213,9 @@ judges = {

 # Global instances
 judge_stats = JudgeStats()
+head_target_stats = TargetStats(cooldown_seconds=300, block_threshold=3)
+ssl_target_stats = TargetStats(cooldown_seconds=300, block_threshold=3)
+irc_target_stats = TargetStats(cooldown_seconds=300, block_threshold=3)
 mitm_cert_stats = MITMCertStats()


@@ -285,12 +302,13 @@ class ProxyTestState(object):
        'asn', 'isoldies', 'completion_queue', 'lock', 'results', 'completed',
        'evaluated', 'last_latency_ms', 'exit_ip', 'reveals_headers',
        'last_fail_category', 'original_failcount', 'had_ssl_test', 'ssl_success',
-        'cert_error'
+        'cert_error', 'source_proto', 'protos_working',
+        'last_check', 'last_target'
    )

    def __init__(self, ip, port, proto, failcount, success_count, total_duration,
                 country, mitm, consecutive_success, asn=None, oldies=False,
-                 completion_queue=None, proxy_full=None):
+                 completion_queue=None, proxy_full=None, source_proto=None):
        self.ip = ip
        self.port = int(port)
        self.proxy = '%s:%s' % (ip, port)
@@ -326,6 +344,12 @@ class ProxyTestState(object):
        self.had_ssl_test = False
        self.ssl_success = False
        self.cert_error = False
+        # Protocol fingerprinting
+        self.source_proto = source_proto
+        self.protos_working = None
+        # Test provenance
+        self.last_check = None
+        self.last_target = None

    def record_result(self, success, proto=None, duration=0, srv=None, tor=None, ssl=None, category=None, exit_ip=None, reveals_headers=None):
        """Record a single target test result. Thread-safe.
@@ -390,10 +414,21 @@ class ProxyTestState(object):
            self.evaluated = True
            self.checktime = int(time.time())

-            successes = [r for r in self.results if r['success']]
-            failures = [r for r in self.results if not r['success']]
+            # Filter out target_block results (inconclusive, neither pass nor fail)
+            block_cats = ('judge_block', 'target_block')
+            real_results = [r for r in self.results if r.get('category') not in block_cats]
+            successes = [r for r in real_results if r['success']]
+            failures = [r for r in real_results if not r['success']]
            num_success = len(successes)
-            _dbg('evaluate: %d success, %d fail, results=%d' % (num_success, len(failures), len(self.results)), self.proxy)
+            target_blocks = len(self.results) - len(real_results)
+            _dbg('evaluate: %d success, %d fail, %d target_block, results=%d' % (
+                num_success, len(failures), target_blocks, len(self.results)), self.proxy)
+
+            # All results were target blocks: inconclusive, preserve current state
+            if not real_results and self.results:
+                _dbg('all results inconclusive (target_block), no state change', self.proxy)
+                self.failcount = self.original_failcount
+                return (self.original_failcount == 0, None)

            # Determine dominant failure category
            fail_category = None
@@ -432,15 +467,30 @@ class ProxyTestState(object):
                        if config.watchd.debug:
                            _log('ASN lookup failed for %s: %s' % (self.ip, e), 'debug')

-                self.proto = last_good['proto']
+                # Collect all distinct working protocols
+                working_protos = set()
+                for s in successes:
+                    if s.get('proto'):
+                        working_protos.add(s['proto'])
+                if working_protos:
+                    self.protos_working = ','.join(sorted(working_protos))
+                    # Pick most specific protocol: socks5 > socks4 > http
+                    for best in ('socks5', 'socks4', 'http'):
+                        if best in working_protos:
+                            self.proto = best
+                            break
+                    else:
+                        self.proto = last_good['proto']
+                else:
+                    self.proto = last_good['proto']
                self.failcount = 0
                # Only reset mitm after 3 consecutive clean successes (not on first success)
                # and only if this test didn't detect MITM
                if self.consecutive_success > 0 and (self.consecutive_success % 3) == 0 and not self.cert_error:
                    self.mitm = 0
-                self.consecutive_success += 1
-                self.success_count += 1
-                self.total_duration += int(last_good['duration'] * 1000)
+                self.consecutive_success = (self.consecutive_success or 0) + 1
+                self.success_count = (self.success_count or 0) + 1
+                self.total_duration = (self.total_duration or 0) + int(last_good['duration'] * 1000)

                # Calculate average latency from successful tests (in ms)
                durations = [s['duration'] for s in successes if s['duration']]
@@ -501,6 +551,9 @@ class TargetTestJob(object):
                _log('JOB RUN #%d: %s -> %s (%s)' % (_sample_debug_counter,
                    self.proxy_state.proxy, self.target_srv, self.checktype), 'info')
        network_stats.set_category('proxy')
+        # Track test provenance (overwritten on each attempt, last success wins)
+        self.proxy_state.last_check = self.checktype
+        self.proxy_state.last_target = self.target_srv
        _dbg('test start: %s via %s' % (self.target_srv, self.checktype), self.proxy_state.proxy)
        sock, proto, duration, tor, srv, failinc, is_ssl, err_cat = self._connect_and_test()
        _dbg('connect result: sock=%s proto=%s err=%s' % (bool(sock), proto, err_cat), self.proxy_state.proxy)
@@ -529,6 +582,12 @@ class TargetTestJob(object):
            recv = sock.recv(-1)
            _sample_dbg('RECV: %d bytes from %s, first 80: %r' % (len(recv), srv, recv[:80]), self.proxy_state.proxy)

+            # Validate HTTP response for non-IRC checks
+            if self.checktype != 'irc' and not recv.startswith('HTTP/'):
+                _dbg('not an HTTP response, failing (first 40: %r)' % recv[:40], self.proxy_state.proxy)
+                self.proxy_state.record_result(False, category='bad_response')
+                return
+
            # Select regex based on check type (or fallback target)
            if 'check.torproject.org' in srv:
                # Tor API fallback (judge using torproject.org)
@@ -553,7 +612,7 @@ class TargetTestJob(object):
                reveals_headers = None
                if self.checktype == 'judges' or 'check.torproject.org' in srv:
                    ip_match = re.search(IP_PATTERN, recv)
-                    if ip_match and is_valid_ip(ip_match.group(0)):
+                    if ip_match and is_public_ip(ip_match.group(0)):
                        exit_ip = ip_match.group(0)
                    if self.checktype == 'judges' and 'check.torproject.org' not in srv:
                        # Check for header echo judge (elite detection)
@@ -562,6 +621,10 @@ class TargetTestJob(object):
                            reveals_headers = bool(re.search(HEADER_REVEAL_PATTERN, recv, re.IGNORECASE))
                        # Record successful judge
                        judge_stats.record_success(srv)
+                elif self.checktype == 'head':
+                    head_target_stats.record_success(srv)
+                elif self.checktype == 'irc':
+                    irc_target_stats.record_success(srv)
                self.proxy_state.record_result(
                    True, proto=proto, duration=elapsed,
                    srv=srv, tor=tor, ssl=is_ssl, exit_ip=exit_ip,
@@ -569,25 +632,28 @@ class TargetTestJob(object):
                )
            else:
                _dbg('regex NO MATCH, recv[:100]=%r' % recv[:100], self.proxy_state.proxy)
-                # Check if judge is blocking us (not a proxy failure)
-                if self.checktype == 'judges' and JUDGE_BLOCK_RE.search(recv):
-                    judge_stats.record_block(srv)
-                    # Judge block = proxy worked, we got HTTP response, just no IP
-                    # Count as success without exit_ip
-                    block_elapsed = time.time() - duration
-                    _dbg('judge BLOCK detected, counting as success', self.proxy_state.proxy)
+                # Check if HTTP target is blocking us (not a proxy failure)
+                if self.checktype in ('judges', 'head') and HTTP_BLOCK_RE.search(recv):
+                    if self.checktype == 'judges':
+                        judge_stats.record_block(srv)
+                    else:
+                        head_target_stats.record_block(srv)
+                    _dbg('target BLOCK detected, skipping (neutral)', self.proxy_state.proxy)
                    self.proxy_state.record_result(
-                        True, proto=proto, duration=block_elapsed,
-                        srv=srv, tor=tor, ssl=is_ssl, exit_ip=None,
-                        reveals_headers=None
+                        False, category='target_block', proto=proto,
+                        srv=srv, tor=tor, ssl=is_ssl
                    )
                    if config.watchd.debug:
-                        _log('judge %s challenged proxy %s (counted as success)' % (
-                            srv, self.proxy_state.proxy), 'debug')
+                        _log('%s %s challenged proxy %s (neutral, skipped)' % (
+                            self.checktype, srv, self.proxy_state.proxy), 'debug')
                else:
                    _dbg('FAIL: no match, no block', self.proxy_state.proxy)
                    if self.checktype == 'judges':
                        judge_stats.record_failure(srv)
+                    elif self.checktype == 'head':
+                        head_target_stats.record_failure(srv)
+                    elif self.checktype == 'irc':
+                        irc_target_stats.record_failure(srv)
                    self.proxy_state.record_result(False, category='other')

        except KeyboardInterrupt as e:
@@ -598,6 +664,129 @@ class TargetTestJob(object):
        finally:
            sock.disconnect()

+    def _build_proto_order(self):
+        """Build smart protocol test order based on available intelligence.
+
+        Priority:
+          1. Previously successful proto (if set)
+          2. Source-detected proto (if different, confidence >= 60)
+          3. Remaining protos in default order: socks5, socks4, http
+
+        For failing proxies (failcount > 0 and proto known), only retest
+        with the known proto to save resources.
+        """
+        ps = self.proxy_state
+        default_order = ['socks5', 'socks4', 'http']
+
+        # Known proto from previous test: only retest that
+        if ps.proto is not None:
+            # For failing proxies, skip multi-proto discovery
+            if ps.failcount > 0:
+                return [ps.proto]
+            # For working proxies, lead with known proto but try others
+            protos = [ps.proto]
+            # Add source hint if different
+            if ps.source_proto and ps.source_proto != ps.proto:
+                protos.append(ps.source_proto)
+            # Fill remaining
+            for p in default_order:
+                if p not in protos:
+                    protos.append(p)
+            return protos
+
+        # Unknown proto: use source hint if available
+        protos = []
+        if ps.source_proto:
+            protos.append(ps.source_proto)
+        for p in default_order:
+            if p not in protos:
+                protos.append(p)
+        return protos
+
+    def _fingerprint_protocol(self, pool):
+        """Identify proxy protocol via lightweight handshake probes.
+
+        Sends protocol-specific greeting bytes directly to the proxy
+        and identifies the protocol from the response pattern.
+
+        Returns: 'socks5', 'socks4', 'http', or None
+        """
+        ps = self.proxy_state
+        fp_timeout = min(config.watchd.timeout, 5)
+        torhost = pool.get_tor_host(self.worker_id) if pool else random.choice(config.torhosts)
+
+        for probe_fn, name in (
+            (self._probe_socks5, 'socks5'),
+            (self._probe_socks4, 'socks4'),
+            (self._probe_http,   'http'),
+        ):
+            result = probe_fn(ps, torhost, fp_timeout)
+            if result:
+                _sample_dbg('fingerprint: %s detected' % result, ps.proxy)
+                return result
+        return None
+
+    def _probe_socks5(self, ps, torhost, timeout):
+        """Probe for SOCKS5 protocol. Returns 'socks5' or None."""
+        try:
+            sock = rocksock.Rocksock(
+                host=ps.ip, port=int(ps.port),
+                proxies=[rocksock.RocksockProxyFromURL(tor_proxy_url(torhost))],
+                timeout=timeout
+            )
+            sock.connect()
+            sock.send('\x05\x01\x00')
+            res = sock.recv(2)
+            sock.disconnect()
+            if len(res) >= 1 and res[0] == '\x05':
+                return 'socks5'
+        except rocksock.RocksockException:
+            pass
+        except KeyboardInterrupt:
+            raise
+        return None
+
+    def _probe_socks4(self, ps, torhost, timeout):
+        """Probe for SOCKS4 protocol. Returns 'socks4' or None."""
+        try:
+            sock = rocksock.Rocksock(
+                host=ps.ip, port=int(ps.port),
+                proxies=[rocksock.RocksockProxyFromURL(tor_proxy_url(torhost))],
+                timeout=timeout
+            )
+            sock.connect()
+            # CONNECT 1.1.1.1:80
+            sock.send('\x04\x01\x00\x50\x01\x01\x01\x01\x00')
+            res = sock.recv(2)
+            sock.disconnect()
+            if len(res) >= 2 and ord(res[0]) == 0 and ord(res[1]) in (0x5a, 0x5b, 0x5c, 0x5d):
+                return 'socks4'
+        except rocksock.RocksockException:
+            pass
+        except KeyboardInterrupt:
+            raise
+        return None
+
+    def _probe_http(self, ps, torhost, timeout):
+        """Probe for HTTP CONNECT protocol. Returns 'http' or None."""
+        try:
+            sock = rocksock.Rocksock(
+                host=ps.ip, port=int(ps.port),
+                proxies=[rocksock.RocksockProxyFromURL(tor_proxy_url(torhost))],
+                timeout=timeout
+            )
+            sock.connect()
+            sock.send('CONNECT 1.1.1.1:80 HTTP/1.1\r\nHost: 1.1.1.1:80\r\n\r\n')
+            res = sock.recv(13)
+            sock.disconnect()
+            if res.startswith('HTTP/'):
+                return 'http'
+        except rocksock.RocksockException:
+            pass
+        except KeyboardInterrupt:
+            raise
+        return None
+
    def _connect_and_test(self):
        """Connect to target through the proxy and send test packet.

@@ -615,33 +804,43 @@ class TargetTestJob(object):
            _log('FIRST TEST: proxy=%s target=%s check=%s ssl_first=%s' % (
                ps.proxy, self.target_srv, self.checktype, config.watchd.ssl_first), 'info')

-        protos = ['http', 'socks5', 'socks4'] if ps.proto is None else [ps.proto]
+        protos = self._build_proto_order()
        pool = connection_pool.get_pool()

-        # Phase 1: SSL handshake (if ssl_first enabled)
-        if config.watchd.ssl_first:
-            result = self._try_ssl_handshake(protos, pool)
+        # Fingerprint unknown proxies to avoid brute-force protocol guessing
+        if ps.proto is None and config.watchd.fingerprint:
+            detected = self._fingerprint_protocol(pool)
+            if detected:
+                protos = [detected] + [p for p in protos if p != detected]
+
+        # Phase 1: SSL handshake (if ssl_first enabled or SSL-only mode)
+        ssl_reason = None
+        if config.watchd.ssl_first or self.checktype == 'none':
+            result, ssl_reason = self._try_ssl_handshake(protos, pool)
            if result is not None:
                return result  # SSL succeeded or MITM detected
            # SSL failed for all protocols
-            if config.watchd.ssl_only:
-                # ssl_only mode: skip secondary check, mark as failed
-                _dbg('SSL failed, ssl_only mode, skipping secondary check', ps.proxy)
+            if config.watchd.ssl_only or self.checktype == 'none':
+                _dbg('SSL failed, no secondary check', ps.proxy)
                return (None, None, 0, None, None, 1, 0, 'ssl_only')
            _dbg('SSL failed, trying secondary check: %s' % self.checktype, ps.proxy)

        # Phase 2: Secondary check (configured checktype)
-        return self._try_secondary_check(protos, pool)
+        return self._try_secondary_check(protos, pool, ssl_reason)

    def _try_ssl_handshake(self, protos, pool):
        """Attempt SSL handshake to verify proxy works with TLS.

        Returns:
-            Tuple on success/MITM, None on failure (should try secondary check)
+            (result, ssl_reason) where result is a tuple on success/MITM
+            or None on failure, and ssl_reason is the last SSL error reason
+            string (for secondary check SSL/plain decision).
        """
        ps = self.proxy_state
-        ssl_target = random.choice(ssl_targets)
+        available_ssl = ssl_target_stats.get_available(ssl_targets) or ssl_targets
+        ssl_target = random.choice(available_ssl)
        last_error_category = None
+        last_ssl_reason = None

        for proto in protos:
            if pool:
@@ -682,15 +881,22 @@ class TargetTestJob(object):
                elapsed = time.time() - duration
                if pool:
                    pool.record_success(torhost, elapsed)
+                ssl_target_stats.record_success(ssl_target)
                sock.disconnect()
                _dbg('SSL handshake OK', ps.proxy)
-                return None, proto, duration, torhost, ssl_target, 0, 1, 'ssl_ok'
+                return (None, proto, duration, torhost, ssl_target, 0, 1, 'ssl_ok'), None

            except rocksock.RocksockException as e:
                last_error_category = categorize_error(e)
                et = e.get_errortype()
                err = e.get_error()

+                # Track SSL reason for secondary check decision
+                if et == rocksock.RS_ET_SSL:
+                    reason = e.get_failedproxy()
+                    if isinstance(reason, str):
+                        last_ssl_reason = reason
+
                try:
                    sock.disconnect()
                except:
@@ -703,7 +909,7 @@ class TargetTestJob(object):
                    if pool:
                        pool.record_success(torhost, elapsed)
                    _dbg('SSL MITM detected', ps.proxy)
-                    return None, proto, duration, torhost, ssl_target, 0, 1, 'ssl_mitm'
+                    return (None, proto, duration, torhost, ssl_target, 0, 1, 'ssl_mitm'), None

                if config.watchd.debug:
                    _log('SSL handshake failed: %s://%s:%d: %s' % (
@@ -711,18 +917,32 @@ class TargetTestJob(object):

                # Check for Tor connection issues
                if et == rocksock.RS_ET_OWN:
-                    if e.get_failedproxy() == 0 and err == rocksock.RS_E_TARGET_CONN_REFUSED:
+                    fp = e.get_failedproxy()
+                    if fp == 0 and err == rocksock.RS_E_TARGET_CONN_REFUSED:
                        if pool:
                            pool.record_failure(torhost)
+                    elif fp == 1 and (err == rocksock.RS_E_REMOTE_DISCONNECTED or
+                                      err == rocksock.RS_E_HIT_TIMEOUT):
+                        # Target-side failure
+                        ssl_target_stats.record_failure(ssl_target)
+                elif et == rocksock.RS_ET_GAI:
+                    # DNS failure -- target unresolvable
+                    ssl_target_stats.record_block(ssl_target)

            except KeyboardInterrupt:
                raise

        # All protocols failed SSL
-        return None
+        return None, last_ssl_reason

-    def _try_secondary_check(self, protos, pool):
-        """Try the configured secondary checktype (head, judges, irc)."""
+    def _try_secondary_check(self, protos, pool, ssl_reason=None):
+        """Try the configured secondary checktype (head, judges, irc).
+
+        ssl_reason: last SSL error reason from _try_ssl_handshake, used to
+        decide whether to use SSL or plain HTTP for the secondary check.
+        Protocol errors (proxy doesn't speak TLS) -> plain HTTP.
+        Other errors (cert, timeout, etc.) -> SSL without cert verification.
+        """
        ps = self.proxy_state
        _sample_dbg('TEST START: proxy=%s target=%s check=%s' % (
            ps.proxy, self.target_srv, self.checktype), ps.proxy)
@@ -734,13 +954,26 @@ class TargetTestJob(object):
        else:
            connect_host = srvname

-        # Secondary checks: always use plain HTTP
-        use_ssl = 0
+        # Decide SSL based on why the primary handshake failed:
+        # - protocol error (proxy can't TLS) -> plain HTTP
+        # - other error (cert, timeout)      -> SSL without cert verification
+        # - no ssl_reason (ssl_first off)    -> plain HTTP (no prior info)
+        protocol_error = is_ssl_protocol_error(ssl_reason) if ssl_reason else True
        verifycert = False
-        if self.checktype == 'irc':
-            server_port = 6667
+        if protocol_error:
+            use_ssl = 0
+            if self.checktype == 'irc':
+                server_port = 6667
+            else:
+                server_port = 80
+            _dbg('secondary: plain (ssl protocol error)', ps.proxy)
        else:
-            server_port = 80
+            use_ssl = 1
+            if self.checktype == 'irc':
+                server_port = 6697
+            else:
+                server_port = 443
+            _dbg('secondary: ssl/no-verify (non-protocol ssl error)', ps.proxy)

        last_error_category = None

@@ -816,6 +1049,11 @@ class TargetTestJob(object):
                if et == rocksock.RS_ET_OWN:
                    if fp == 1 and (err == rocksock.RS_E_REMOTE_DISCONNECTED or
                                    err == rocksock.RS_E_HIT_TIMEOUT):
+                        # Target-side failure -- proxy reached target but it's down
+                        if self.checktype == 'head':
+                            head_target_stats.record_failure(srvname)
+                        elif self.checktype == 'irc':
+                            irc_target_stats.record_failure(srvname)
                        break
                    elif fp == 0 and err == rocksock.RS_E_TARGET_CONN_REFUSED:
                        # Tor connection failed - record in pool
@@ -825,6 +1063,11 @@ class TargetTestJob(object):
                            _log("could not connect to tor, sleep 5s", "ERROR")
                        time.sleep(5)
                elif et == rocksock.RS_ET_GAI:
+                    # DNS failure -- target hostname unresolvable (hard failure)
+                    if self.checktype == 'head':
+                        head_target_stats.record_block(connect_host)
+                    elif self.checktype == 'irc':
+                        irc_target_stats.record_block(srvname)
                    _log("could not resolve connection target %s" % connect_host, "ERROR")
                    break
                elif et == rocksock.RS_ET_SSL and err == rocksock.RS_E_SSL_CERTIFICATE_ERROR:
@@ -882,7 +1125,15 @@ class WorkerThread():
            nao = time.time()
            # Assign worker ID for connection pool affinity
            job.worker_id = self.id
-            job.run()
+            try:
+                job.run()
+            except Exception as e:
+                # Ensure state completes on unexpected exceptions (prevents memory leak)
+                _log('job exception: %s' % e, 'error')
+                try:
+                    job.proxy_state.record_result(False, category='exception')
+                except Exception:
+                    pass  # State may already be completed
            spent = time.time() - nao
            job_count += 1
            duration_total += spent
@@ -1041,16 +1292,17 @@ class VerificationThread(threading.Thread):
            dbs.update_worker_trust(db, worker_a, was_correct)

        # Update proxy status with authoritative result
+        now_int = int(time.time())
        if result:
            db.execute('''
-                UPDATE proxylist SET failed = 0, tested = ?
+                UPDATE proxylist SET failed = 0, tested = ?, last_seen = ?
                WHERE proxy = ?
-            ''', (int(time.time()), proxy))
+            ''', (now_int, now_int, proxy))
        else:
            db.execute('''
                UPDATE proxylist SET failed = failed + 1, tested = ?
                WHERE proxy = ?
-            ''', (int(time.time()), proxy))
+            ''', (now_int, proxy))

        # Remove from verification queue
        dbs.remove_from_verification_queue(db, proxy)
@@ -1251,7 +1503,7 @@ class Proxywatchd():
        # Build due condition using new schedule formula
        due_sql, due_params = _build_due_sql()
        q = '''SELECT ip,port,proto,failed,success_count,total_duration,country,mitm,
-               consecutive_success,asn,proxy FROM proxylist WHERE %s ORDER BY RANDOM()''' % due_sql
+               consecutive_success,asn,proxy,source_proto FROM proxylist WHERE %s ORDER BY RANDOM()''' % due_sql
        _dbg('fetch_rows: working=%d fail_interval=%d backoff=%s max_fail=%d' % (
            config.watchd.working_checktime, config.watchd.fail_retry_interval,
            config.watchd.fail_retry_backoff, config.watchd.max_fail))
@@ -1271,7 +1523,7 @@ class Proxywatchd():
                now = time.time()
                oldies_max = config.watchd.max_fail + round(config.watchd.max_fail / 2)
                q_oldies = '''SELECT ip,port,proto,failed,success_count,total_duration,country,
-                              mitm,consecutive_success,asn,proxy FROM proxylist
+                              mitm,consecutive_success,asn,proxy,source_proto FROM proxylist
                              WHERE failed >= ? AND failed < ? AND (tested + ?) < ?
                              ORDER BY RANDOM()'''
                rows = db.execute(q_oldies, (config.watchd.max_fail, oldies_max,
@@ -1289,23 +1541,31 @@ class Proxywatchd():
        _dbg('prepare_jobs: %d rows, checktypes=%s' % (len(rows), config.watchd.checktypes))
        checktypes = config.watchd.checktypes

-        # Build target pools for each checktype
+        # Build target pools for each checktype (filter out targets in cooldown)
        target_pools = {}
        for ct in checktypes:
-            if ct == 'irc':
-                target_pools[ct] = config.servers
-                _dbg('target_pool[irc]: %d servers' % len(config.servers))
+            if ct == 'none':
+                # SSL-only mode: use ssl_targets as placeholder
+                target_pools[ct] = ssl_targets
+                _dbg('target_pool[none]: SSL-only mode, %d ssl targets' % len(ssl_targets))
+            elif ct == 'irc':
+                all_servers = config.servers
+                available = irc_target_stats.get_available(all_servers)
+                target_pools[ct] = available if available else all_servers
+                _dbg('target_pool[irc]: %d/%d servers available' % (len(target_pools[ct]), len(all_servers)))
            elif ct == 'judges':
-                # Filter out judges in cooldown (blocked/rate-limited)
                all_judges = list(judges.keys())
-                available = judge_stats.get_available_judges(all_judges)
+                available = judge_stats.get_available(all_judges)
                target_pools[ct] = available if available else all_judges
            elif ct == 'ssl':
-                target_pools[ct] = ssl_targets
-                _dbg('target_pool[ssl]: %d targets' % len(ssl_targets))
+                available = ssl_target_stats.get_available(ssl_targets)
+                target_pools[ct] = available if available else ssl_targets
+                _dbg('target_pool[ssl]: %d/%d targets available' % (len(target_pools[ct]), len(ssl_targets)))
            else:  # head
-                target_pools[ct] = list(regexes.keys())
-                _dbg('target_pool[%s]: %d targets' % (ct, len(regexes)))
+                all_targets = list(regexes.keys())
+                available = head_target_stats.get_available(all_targets)
+                target_pools[ct] = available if available else all_targets
+                _dbg('target_pool[%s]: %d/%d targets available' % (ct, len(target_pools[ct]), len(all_targets)))

        # create all jobs first, then shuffle for interleaving
        all_jobs = []
@@ -1314,12 +1574,12 @@ class Proxywatchd():
        for row in rows:
            # create shared state for this proxy
            # row: ip, port, proto, failed, success_count, total_duration,
-            #      country, mitm, consecutive_success, asn, proxy
+            #      country, mitm, consecutive_success, asn, proxy, source_proto
            state = ProxyTestState(
                row[0], row[1], row[2], row[3], row[4], row[5],
                row[6], row[7], row[8], asn=row[9],
                oldies=self.isoldies, completion_queue=self.completion_queue,
-                proxy_full=row[10]
+                proxy_full=row[10], source_proto=row[11]
            )
            new_states.append(state)

@@ -1424,7 +1684,8 @@ class Proxywatchd():
                    dead_count += 1
            args.append((effective_failcount, job.checktime, 1, job.country, job.proto,
                         job.success_count, job.total_duration, job.mitm,
-                         job.consecutive_success, job.asn, job.proxy))
+                         job.consecutive_success, job.asn, job.protos_working,
+                         job.last_check, job.last_target, effective_failcount, job.proxy))

        success_rate = (float(sc) / len(self.collected)) * 100
        ret = True
@@ -1438,7 +1699,8 @@ class Proxywatchd():
                if job.failcount == 0:
                    args.append((job.failcount, job.checktime, 1, job.country, job.proto,
                                 job.success_count, job.total_duration, job.mitm,
-                                 job.consecutive_success, job.asn, job.proxy))
+                                 job.consecutive_success, job.asn, job.protos_working,
+                                 job.last_check, job.last_target, job.failcount, job.proxy))
                    if job.last_latency_ms is not None:
                        latency_updates.append((job.proxy, job.last_latency_ms))
            ret = False
@@ -1454,9 +1716,20 @@ class Proxywatchd():
                             for job in self.collected
                             if job.failcount == 0 and job.exit_ip]

+        # Separate dead proxies for deletion
+        dead_proxies = [a[-1] for a in args if a[0] == DEAD_PROXY or a[0] >= max_fail]
+        live_args = [a for a in args if a[0] != DEAD_PROXY and a[0] < max_fail]
+
        with self._db_context() as db:
-            query = 'UPDATE proxylist SET failed=?,tested=?,dronebl=?,country=?,proto=?,success_count=?,total_duration=?,mitm=?,consecutive_success=?,asn=? WHERE proxy=?'
-            db.executemany(query, args)
+            query = 'UPDATE proxylist SET failed=?,tested=?,dronebl=?,country=?,proto=?,success_count=?,total_duration=?,mitm=?,consecutive_success=?,asn=?,protos_working=?,last_check=?,last_target=?,last_seen=CASE WHEN ?=0 THEN strftime("%s","now") ELSE last_seen END WHERE proxy=?'
+            if live_args:
+                db.executemany(query, live_args)
+
+            # Delete proxies that reached max_fail
+            if dead_proxies:
+                db.executemany('DELETE FROM proxylist WHERE proxy=?',
+                               [(p,) for p in dead_proxies])
+                _log('deleted %d dead proxies' % len(dead_proxies), 'watchd')

            # Batch update latency metrics for successful proxies
            if latency_updates:
@@ -1618,15 +1891,25 @@ class Proxywatchd():
        # Judge stats (when using judges checktype)
        if 'judges' in config.watchd.checktypes:
            js = judge_stats.get_stats()
+            # Remap 'target' -> 'judge' for dashboard compatibility
+            top = [dict(j, judge=j['target']) for j in js.get('top', [])[:5]]
            stats_data['judges'] = {
                'total': js.get('total', 0),
                'available': js.get('available', 0),
                'in_cooldown': js.get('in_cooldown', 0),
-                'top_judges': js.get('top', [])[:5]  # top 5 most successful
+                'top_judges': top,
            }
        else:
            stats_data['judges'] = None

+        # Target health stats (all target pools)
+        stats_data['target_health'] = {
+            'head': head_target_stats.get_stats(),
+            'ssl': ssl_target_stats.get_stats(),
+            'irc': irc_target_stats.get_stats(),
+            'judges': judge_stats.get_stats(),
+        }
+
        # Scraper/engine stats
        if scraper_available:
            scraper_stats = scraper_module.get_scraper_stats()
@@ -1687,7 +1970,7 @@ class Proxywatchd():

        # Start HTTP API server if enabled
        if config.httpd.enabled:
-            from httpd import ProxyAPIServer, configure_schedule
+            from httpd import ProxyAPIServer, configure_schedule, configure_url_scoring
            # Pass schedule config to httpd module
            configure_schedule(
                config.watchd.working_checktime,
@@ -1695,11 +1978,18 @@ class Proxywatchd():
                config.watchd.fail_retry_backoff,
                config.watchd.max_fail
            )
+            configure_url_scoring(
+                config.ppf.checktime,
+                config.ppf.perfail_checktime,
+                config.ppf.max_fail,
+                config.ppf.list_max_age_days
+            )
            self.httpd_server = ProxyAPIServer(
                config.httpd.listenip,
                config.httpd.port,
                config.watchd.database,
-                stats_provider=self.get_runtime_stats
+                stats_provider=self.get_runtime_stats,
+                url_database=config.ppf.database,
            )
            self.httpd_server.start()

@@ -1734,27 +2024,32 @@ class Proxywatchd():
                sleeptime -= 1
                continue

-            # check if job queue is empty (work-stealing: threads pull as needed)
-            if self.job_queue.empty():
+            # Skip job processing when threads=0 (master-only mode)
+            if config.watchd.threads > 0:
+                # check if job queue is empty (work-stealing: threads pull as needed)
+                if self.job_queue.empty():
+                    self.collect_work()
+                    if not self.submit_collected() and self.tor_safeguard:
+                        _log("zzZzZzzZ sleeping 1 minute(s) due to tor issues", "watchd")
+                        sleeptime = 60
+                    else:
+                        job_count = self.prepare_jobs()
+                        if job_count == 0:
+                            # no jobs available, wait before checking again
+                            sleeptime = 10
+
+                if not self.in_background:  # single_thread scenario
+                    self.threads[0].workloop()
+
                self.collect_work()
-                if not self.submit_collected() and self.tor_safeguard:
-                    _log("zzZzZzzZ sleeping 1 minute(s) due to tor issues", "watchd")
-                    sleeptime = 60
-                else:
-                    job_count = self.prepare_jobs()
-                    if job_count == 0:
-                        # no jobs available, wait before checking again
-                        sleeptime = 10

-            if not self.in_background:  # single_thread scenario
-                self.threads[0].workloop()
-
-            self.collect_work()
-
-            if len(self.collected) > self.submit_after:
-                if not self.submit_collected() and self.tor_safeguard:
-                    _log("zzZzZzzZ sleeping 1 minute(s) due to tor issues", "watchd")
-                    sleeptime = 60
+                if len(self.collected) > self.submit_after:
+                    if not self.submit_collected() and self.tor_safeguard:
+                        _log("zzZzZzzZ sleeping 1 minute(s) due to tor issues", "watchd")
+                        sleeptime = 60
+            else:
+                # Master-only mode: sleep to avoid busy loop
+                sleeptime = 10

            # Update rate history for sparklines
            self.stats.update_history()
--- a/rocksock.py
+++ b/rocksock.py
@@ -21,6 +21,25 @@
 import socket, ssl, select, copy, errno
 import network_stats

+# Cached SSL contexts -- avoids reloading CA store from disk on every connection
+_ssl_ctx_noverify = None
+_ssl_ctx_verify = None
+
+def _get_ssl_context(verifycert=False):
+	global _ssl_ctx_noverify, _ssl_ctx_verify
+	if verifycert:
+		if _ssl_ctx_verify is None:
+			_ssl_ctx_verify = ssl.create_default_context()
+			_ssl_ctx_verify.check_hostname = True
+			_ssl_ctx_verify.verify_mode = ssl.CERT_OPTIONAL
+		return _ssl_ctx_verify
+	else:
+		if _ssl_ctx_noverify is None:
+			_ssl_ctx_noverify = ssl.create_default_context()
+			_ssl_ctx_noverify.check_hostname = False
+			_ssl_ctx_noverify.verify_mode = ssl.CERT_NONE
+		return _ssl_ctx_noverify
+
 # rs_proxyType
 RS_PT_NONE = 0
 RS_PT_SOCKS4 = 1
@@ -210,12 +229,7 @@ def RocksockProxyFromURL(url):
 class Rocksock():
 	def __init__(self, host=None, port=0, verifycert=False, timeout=0, proxies=None, **kwargs):
 		if 'ssl' in kwargs and kwargs['ssl'] == True:
-			self.sslcontext = ssl.create_default_context()
-			self.sslcontext.check_hostname = False
-			self.sslcontext.verify_mode = ssl.CERT_NONE
-			if verifycert:
-				self.sslcontext.verify_mode = ssl.CERT_OPTIONAL
-				self.sslcontext.check_hostname = True
+			self.sslcontext = _get_ssl_context(verifycert)
 		else:
 			self.sslcontext = None
 		self.proxychain = []
@@ -228,6 +242,7 @@ class Rocksock():
 		target = RocksockProxy(host, port, RS_PT_NONE)
 		self.proxychain.append(target)
 		self.sock = None
+		self._connected = False
 		self.timeout = timeout

 	def _translate_socket_error(self, e, pnum):
@@ -288,15 +303,18 @@ class Rocksock():
 					select.select([], [self.sock], [])
 			"""

+		self._connected = True

 	def disconnect(self):
 		if self.sock is None: return
-		try:
-			self.sock.shutdown(socket.SHUT_RDWR)
-		except socket.error:
-			pass
+		if self._connected:
+			try:
+				self.sock.shutdown(socket.SHUT_RDWR)
+			except socket.error:
+				pass
 		self.sock.close()
 		self.sock = None
+		self._connected = False

 	def canread(self):
 		return select.select([self.sock], [], [], 0)[0]
--- a/servers.txt
+++ b/servers.txt
@@ -1,69 +1,132 @@
-irc.2600.net
-irc.Undernet.Org
+irc.abjects.net
+irc.afternet.org
+irc.allnetwork.org
+irc.alphachat.net
+irc.atrum.org
+irc.austnet.org
+irc.axon.pw
+irc.ayochat.or.id
+irc.azzurra.chat
+irc.beyondirc.net
+irc.bolchat.com
+irc.brasirc.com.br
+irc.canternet.org
 irc.chat4all.org
 irc.chatspike.net
-irc.choopa.net
-irc.coldfront.net
-irc.cyberarmy.net
+irc.chatzona.org
+irc.cncirc.net
+irc.coolsmile.net
+irc.d-t-net.de
 irc.dal.net
+irc.darenet.org
+irc.darkfasel.net
 irc.darkmyst.org
+irc.darkscience.net
+irc.darkworld.network
 irc.data.lt
-irc.drlnet.com
-irc.dynastynet.net
+irc.dejatoons.net
+irc.desirenet.org
 irc.ecnet.org
 irc.efnet.org
-irc.efnet.pl
-irc.enterthegame.com
+irc.epiknet.org
 irc.esper.net
-irc.eu.dal.net
-irc.eu.gamesurge.net
 irc.euirc.net
 irc.europnet.org
-irc.eversible.com
+irc.evolu.net
+irc.explosionirc.net
 irc.fdfnet.net
 irc.fef.net
+irc.financialchat.com
+irc.forestnet.org
+irc.freeunibg.eu
 irc.gamesurge.net
+irc.geeknode.org
 irc.geekshed.net
-irc.german-freakz.net
+irc.german-elite.net
+irc.gigairc.net
+irc.gimp.org
 irc.globalgamers.net
-irc.greekirc.net
+irc.goodchatting.com
+irc.hackint.org
+irc.hybridirc.com
+irc.icq-chat.com
+irc.immortal-anime.net
+irc.indymedia.org
+irc.irc-hispano.org
+irc.irc2.hu
+irc.irc4fun.net
+irc.ircgate.it
+irc.irchighway.net
+irc.ircsource.net
 irc.irctoo.net
 irc.ircube.org
+irc.ircworld.org
+irc.irdsi.net
+irc.kampungchat.org
 irc.knightirc.net
+irc.krey.net
 irc.krono.net
-irc.langochat.net
+irc.krstarica.com
+irc.libera.chat
+irc.librairc.net
 irc.lichtsnel.nl
+irc.link-net.be
+irc.lt-tech.org
+irc.luatic.net
 irc.maddshark.net
-irc.newnet.net
+irc.magicstar.net
+irc.mibbit.net
+irc.mindforge.org
+irc.nationchat.org
 irc.nightstar.net
+irc.nullirc.net
 irc.oftc.net
-irc.onlinegamesnet.net
-irc.othernet.org
-irc.otherworlders.org
+irc.oltreirc.net
+irc.openjoke.org
+irc.orixon.org
 irc.oz.org
-irc.p2pchat.net
+irc.p2p-network.net
+irc.perl.org
 irc.phat-net.de
+irc.pirc.pl
 irc.ptnet.org
+irc.quakenet.org
+irc.recycled-irc.net
+irc.retroit.org
 irc.rezosup.org
 irc.rizon.net
+irc.rusnet.org.ru
 irc.scarynet.org
-irc.serenia.net
+irc.scuttled.net
 irc.serenity-irc.net
-irc.servercentral.net
 irc.shadowfire.org
 irc.shadowworld.net
+irc.simosnap.com
+irc.skychatz.org
+irc.skyrock.net
+irc.slacknet.org
 irc.slashnet.org
-irc.snt.utwente.nl
+irc.smurfnet.ch
+irc.snoonet.org
 irc.sorcery.net
-irc.spacetronix.net
+irc.spotchat.org
 irc.st-city.net
 irc.starlink-irc.org
 irc.starlink.org
+irc.staynet.org
+irc.stormbit.net
 irc.swiftirc.net
-irc.teranova.net
-irc.us.dal.net
-irc.us.gamesurge.net
+irc.synirc.net
+irc.technet.chat
+irc.tilde.chat
+irc.tweakers.net
+irc.undernet.org
+irc.undermind.net
+irc.wenet.ru
+irc.whatnet.org
+irc.wixchat.org
+irc.worldirc.org
+irc.xertion.org
 irc.xevion.net
-irc.zerofuzion.net
-uk.quakenet.org
-us.quakenet.org
+open.ircnet.net
+ssl.bongster.de
--- a/stats.py
+++ b/stats.py
@@ -14,60 +14,64 @@ def try_div(a, b):
    return 0


-class JudgeStats():
-    """Track per-judge success/failure rates for reliability scoring.
+class TargetStats():
+    """Track per-target success/failure rates with cooldown.

-    Judges that frequently block or rate-limit are temporarily avoided.
-    Stats decay over time to allow recovery.
+    Targets that frequently block or fail are temporarily avoided.
+    Block counters reset on success or cooldown expiry.
+
+    Used for all target pools: judges, head targets, SSL targets, IRC servers.
    """

    def __init__(self, cooldown_seconds=300, block_threshold=3):
        self.lock = threading.Lock()
-        self.stats = {}  # judge -> {'success': n, 'fail': n, 'block': n, 'last_block': timestamp}
-        self.cooldown_seconds = cooldown_seconds  # seconds to avoid blocked judges
-        self.block_threshold = block_threshold    # consecutive blocks before cooldown
+        self.stats = {}  # target -> {'success': n, 'fail': n, 'block': n, 'last_block': timestamp}
+        self.cooldown_seconds = cooldown_seconds
+        self.block_threshold = block_threshold

-    def record_success(self, judge):
-        """Record successful judge response."""
-        with self.lock:
-            if judge not in self.stats:
-                self.stats[judge] = {'success': 0, 'fail': 0, 'block': 0, 'last_block': 0}
-            self.stats[judge]['success'] += 1
-            # Reset block count on success
-            self.stats[judge]['block'] = 0
+    def _ensure(self, target):
+        if target not in self.stats:
+            self.stats[target] = {'success': 0, 'fail': 0, 'block': 0, 'last_block': 0}

-    def record_failure(self, judge):
-        """Record judge failure (proxy failed, not judge block)."""
+    def record_success(self, target):
+        """Record successful target response."""
        with self.lock:
-            if judge not in self.stats:
-                self.stats[judge] = {'success': 0, 'fail': 0, 'block': 0, 'last_block': 0}
-            self.stats[judge]['fail'] += 1
+            self._ensure(target)
+            self.stats[target]['success'] += 1
+            self.stats[target]['block'] = 0

-    def record_block(self, judge):
-        """Record judge blocking the proxy (403, captcha, rate-limit)."""
+    def record_failure(self, target):
+        """Record target failure (soft -- doesn't trigger cooldown)."""
        with self.lock:
-            if judge not in self.stats:
-                self.stats[judge] = {'success': 0, 'fail': 0, 'block': 0, 'last_block': 0}
-            self.stats[judge]['block'] += 1
-            self.stats[judge]['last_block'] = time.time()
+            self._ensure(target)
+            self.stats[target]['fail'] += 1

-    def is_available(self, judge):
-        """Check if judge is available (not in cooldown)."""
+    def record_block(self, target):
+        """Record target block (403, captcha, DNS failure, rate-limit)."""
        with self.lock:
-            if judge not in self.stats:
+            self._ensure(target)
+            self.stats[target]['block'] += 1
+            self.stats[target]['last_block'] = time.time()
+
+    def is_available(self, target):
+        """Check if target is available (not in cooldown)."""
+        with self.lock:
+            if target not in self.stats:
                return True
-            s = self.stats[judge]
-            # Check if in cooldown period
+            s = self.stats[target]
            if s['block'] >= self.block_threshold:
                if (time.time() - s['last_block']) < self.cooldown_seconds:
                    return False
-                # Cooldown expired, reset block count
                s['block'] = 0
            return True

+    def get_available(self, target_list):
+        """Return targets not in cooldown."""
+        return [t for t in target_list if self.is_available(t)]
+
    def get_available_judges(self, judge_list):
-        """Return list of judges not in cooldown."""
-        return [j for j in judge_list if self.is_available(j)]
+        """Compat alias for get_available()."""
+        return self.get_available(judge_list)

    def status_line(self):
        """Return status summary for logging."""
@@ -76,7 +80,7 @@ class JudgeStats():
            blocked = sum(1 for s in self.stats.values()
                          if s['block'] >= self.block_threshold and
                          (time.time() - s['last_block']) < self.cooldown_seconds)
-            return 'judges: %d total, %d in cooldown' % (total, blocked)
+            return '%d total, %d in cooldown' % (total, blocked)

    def get_stats(self):
        """Return statistics dict for API/dashboard."""
@@ -87,18 +91,21 @@ class JudgeStats():
                              if s['block'] >= self.block_threshold and
                              (now - s['last_block']) < self.cooldown_seconds)
            available = total - in_cooldown
-            # Get top judges by success count
            top = []
-            for judge, s in self.stats.items():
+            for target, s in self.stats.items():
                total_tests = s['success'] + s['fail']
                if total_tests > 0:
                    success_pct = (s['success'] * 100.0) / total_tests
-                    top.append({'judge': judge, 'success': s['success'],
+                    top.append({'target': target, 'success': s['success'],
                                'tests': total_tests, 'rate': round(success_pct, 1)})
            top.sort(key=lambda x: x['success'], reverse=True)
            return {'total': total, 'available': available, 'in_cooldown': in_cooldown, 'top': top}


+# Backwards-compatible alias
+JudgeStats = TargetStats
+
+
 # HTTP targets - check for specific headers
 regexes = {
    'www.facebook.com': 'X-FB-Debug',
@@ -107,11 +114,9 @@ regexes = {
    'www.twitter.com': 'x-connection-hash',
    't.co': 'x-connection-hash',
    'www.msn.com': 'x-aspnetmvc-version',
-    'www.bing.com': 'p3p',
    'www.ask.com': 'x-served-by',
    'www.hotmail.com': 'x-msedge-ref',
    'www.bbc.co.uk': 'x-bbc-edge-cache-status',
-    'www.skype.com': 'X-XSS-Protection',
    'www.alibaba.com': 'object-status',
    'www.mozilla.org': 'cf-ray',
    'www.cloudflare.com': 'cf-ray',
@@ -121,7 +126,6 @@ regexes = {
    'www.netflix.com': 'X-Netflix.proxy.execution-time',
    'www.amazon.de': 'x-amz-cf-id',
    'www.reuters.com': 'x-amz-cf-id',
-    'www.ikea.com': 'x-frame-options',
    'www.twitpic.com': 'timing-allow-origin',
    'www.digg.com': 'cf-request-id',
    'www.wikia.com': 'x-served-by',
@@ -133,8 +137,6 @@ regexes = {
    'www.yelp.com': 'x-timer',
    'www.ebay.com': 'x-envoy-upstream-service-time',
    'www.wikihow.com': 'x-c',
-    'www.archive.org': 'referrer-policy',
-    'www.pandora.tv': 'X-UA-Compatible',
    'www.w3.org': 'x-backend',
    'www.time.com': 'x-amz-cf-pop'
 }
--- a/tests/test_fetch.py
+++ b/tests/test_fetch.py
@@ -359,6 +359,198 @@ class TestExtractAuthProxies:
        assert fetch.extract_auth_proxies('just some text') == []


+class TestExtractAuthProxiesShortCircuit:
+    """Tests for extract_auth_proxies() short-circuit on missing @."""
+
+    def test_no_at_sign_returns_empty(self):
+        """Content without @ skips regex entirely."""
+        content = '1.2.3.4:8080 socks5://5.6.7.8:1080 plain text'
+        assert fetch.extract_auth_proxies(content) == []
+
+    def test_at_sign_still_extracts(self):
+        """Content with @ still finds auth proxies."""
+        content = 'user:pass@1.2.3.4:8080'
+        result = fetch.extract_auth_proxies(content)
+        assert len(result) == 1
+        assert result[0][0] == 'user:pass@1.2.3.4:8080'
+
+    def test_at_sign_no_match_returns_empty(self):
+        """Content with @ but no auth proxy pattern returns empty."""
+        content = 'email@example.com has no proxy'
+        assert fetch.extract_auth_proxies(content) == []
+
+
+class TestExtractProxiesFromTable:
+    """Tests for extract_proxies_from_table() with precompiled regexes."""
+
+    def test_no_table_returns_empty(self):
+        """Plain text without <table> returns empty."""
+        content = '1.2.3.4:8080\n5.6.7.8:3128\n'
+        assert fetch.extract_proxies_from_table(content) == []
+
+    def test_simple_table(self):
+        """Basic HTML table with IP/Port columns is parsed."""
+        content = '''
+        <table>
+        <tr><th>IP</th><th>Port</th><th>Type</th></tr>
+        <tr><td>1.2.3.4</td><td>8080</td><td>HTTP</td></tr>
+        <tr><td>5.6.7.8</td><td>1080</td><td>SOCKS5</td></tr>
+        </table>
+        '''
+        result = fetch.extract_proxies_from_table(content)
+        assert len(result) == 2
+        addrs = [r[0] for r in result]
+        assert '1.2.3.4:8080' in addrs
+        assert '5.6.7.8:1080' in addrs
+
+    def test_uppercase_table_tag(self):
+        """<TABLE> (uppercase) is also detected."""
+        content = '''
+        <TABLE>
+        <TR><TH>IP</TH><TH>Port</TH></TR>
+        <TR><TD>1.2.3.4</TD><TD>8080</TD></TR>
+        </TABLE>
+        '''
+        result = fetch.extract_proxies_from_table(content)
+        assert len(result) == 1
+
+    def test_empty_table(self):
+        """Table with headers but no data rows returns empty."""
+        content = '''
+        <table>
+        <tr><th>IP</th><th>Port</th></tr>
+        </table>
+        '''
+        result = fetch.extract_proxies_from_table(content)
+        assert result == []
+
+
+class TestExtractProxiesFromJson:
+    """Tests for extract_proxies_from_json() short-circuit."""
+
+    def test_no_braces_returns_empty(self):
+        """Content without { or [ skips JSON parsing."""
+        content = '1.2.3.4:8080\n5.6.7.8:3128\n'
+        assert fetch.extract_proxies_from_json(content) == []
+
+    def test_json_array_of_objects(self):
+        """JSON array with ip/port objects is parsed."""
+        content = '[{"ip": "1.2.3.4", "port": 8080}]'
+        result = fetch.extract_proxies_from_json(content)
+        assert len(result) >= 1
+        addrs = [r[0] for r in result]
+        assert '1.2.3.4:8080' in addrs
+
+    def test_json_array_of_strings(self):
+        """JSON array of ip:port strings is parsed."""
+        content = '["1.2.3.4:8080", "5.6.7.8:3128"]'
+        result = fetch.extract_proxies_from_json(content)
+        addrs = [r[0] for r in result]
+        assert '1.2.3.4:8080' in addrs
+        assert '5.6.7.8:3128' in addrs
+
+    def test_plain_html_skips_json(self):
+        """HTML without JSON delimiters returns empty."""
+        content = '<html><body>1.2.3.4:8080</body></html>'
+        # HTML has < and > but this function checks for { and [
+        # The < > chars won't trigger JSON parsing
+        result = fetch.extract_proxies_from_json(content)
+        # May or may not find anything depending on HTML structure
+        # but should not crash
+        assert isinstance(result, list)
+
+
+class TestExtractProxiesWithHints:
+    """Tests for extract_proxies_with_hints()."""
+
+    def test_proto_before_ip(self):
+        """Protocol keyword before IP:PORT is detected."""
+        content = 'socks5 1.2.3.4:8080'
+        result = fetch.extract_proxies_with_hints(content)
+        assert '1.2.3.4:8080' in result
+        assert result['1.2.3.4:8080'] == 'socks5'
+
+    def test_proto_after_ip(self):
+        """Protocol keyword after IP:PORT is detected."""
+        content = '1.2.3.4:8080 socks5'
+        result = fetch.extract_proxies_with_hints(content)
+        assert '1.2.3.4:8080' in result
+
+    def test_no_hints_returns_empty(self):
+        """Plain IP:PORT without protocol hints returns empty."""
+        content = '1.2.3.4:8080'
+        result = fetch.extract_proxies_with_hints(content)
+        assert result == {}
+
+
+class TestExtractProxiesIntegration:
+    """Integration tests for extract_proxies() combining all extractors."""
+
+    def test_plain_text_proxy_list(self):
+        """Plain text IP:PORT list extracts correctly."""
+        content = '1.2.3.4:8080\n5.6.7.8:3128\n9.10.11.12:1080\n'
+        result = fetch.extract_proxies(content, filter_known=False)
+        addrs = [r[0] for r in result]
+        assert '1.2.3.4:8080' in addrs
+        assert '5.6.7.8:3128' in addrs
+        assert '9.10.11.12:1080' in addrs
+
+    def test_auth_proxies_extracted(self):
+        """Auth proxies found in mixed content."""
+        content = 'user:pass@1.2.3.4:8080\n5.6.7.8:3128\n'
+        result = fetch.extract_proxies(content, filter_known=False)
+        addrs = [r[0] for r in result]
+        assert 'user:pass@1.2.3.4:8080' in addrs
+        assert '5.6.7.8:3128' in addrs
+
+    def test_html_table_extraction(self):
+        """Proxies extracted from HTML table."""
+        content = '''
+        <table>
+        <tr><th>IP</th><th>Port</th></tr>
+        <tr><td>1.2.3.4</td><td>8080</td></tr>
+        </table>
+        '''
+        result = fetch.extract_proxies(content, filter_known=False)
+        addrs = [r[0] for r in result]
+        assert '1.2.3.4:8080' in addrs
+
+    def test_json_extraction(self):
+        """Proxies extracted from JSON content."""
+        content = '[{"ip": "1.2.3.4", "port": 8080}]'
+        result = fetch.extract_proxies(content, filter_known=False)
+        addrs = [r[0] for r in result]
+        assert '1.2.3.4:8080' in addrs
+
+    def test_empty_content(self):
+        """Empty content returns no proxies."""
+        result = fetch.extract_proxies('', filter_known=False)
+        assert result == []
+
+    def test_private_ips_filtered(self):
+        """Private IPs are not returned."""
+        content = '10.0.0.1:8080\n192.168.1.1:3128\n1.2.3.4:8080\n'
+        result = fetch.extract_proxies(content, filter_known=False)
+        addrs = [r[0] for r in result]
+        assert '10.0.0.1:8080' not in addrs
+        assert '192.168.1.1:3128' not in addrs
+        assert '1.2.3.4:8080' in addrs
+
+    def test_proto_from_hints(self):
+        """Protocol hints are picked up."""
+        content = 'socks5 1.2.3.4:8080\n'
+        result = fetch.extract_proxies(content, filter_known=False)
+        protos = {r[0]: r[1] for r in result}
+        assert protos.get('1.2.3.4:8080') == 'socks5'
+
+    def test_proto_from_arg(self):
+        """Fallback proto from argument is used."""
+        content = '1.2.3.4:8080\n'
+        result = fetch.extract_proxies(content, filter_known=False, proto='socks4')
+        protos = {r[0]: r[1] for r in result}
+        assert protos.get('1.2.3.4:8080') == 'socks4'
+
+
 class TestConfidenceScoring:
    """Tests for confidence score constants."""

--- a/tools/lib/ppf-common.sh
+++ b/tools/lib/ppf-common.sh
@@ -0,0 +1,170 @@
+#!/bin/bash
+# ppf-common.sh -- shared library for PPF operations toolkit
+# Source this file; do not execute directly.
+
+set -eu
+
+# ---------------------------------------------------------------------------
+# Paths
+# ---------------------------------------------------------------------------
+PPF_DIR="${PPF_DIR:-$HOME/git/ppf}"
+ANSIBLE_DIR="/opt/ansible"
+ANSIBLE_VENV="${ANSIBLE_DIR}/venv/bin/activate"
+PPF_INVENTORY="${PPF_DIR}/tools/playbooks/inventory.ini"
+
+# ---------------------------------------------------------------------------
+# Host topology
+# ---------------------------------------------------------------------------
+MASTER="odin"
+WORKERS="cassius edge sentinel"
+ALL_HOSTS="odin cassius edge sentinel"
+
+# Container names per role
+MASTER_CONTAINER="ppf"
+WORKER_CONTAINER="ppf-worker"
+
+# ---------------------------------------------------------------------------
+# Colors (respects NO_COLOR -- https://no-color.org)
+# ---------------------------------------------------------------------------
+if [ -z "${NO_COLOR:-}" ] && [ -t 1 ]; then
+    C_RST='\033[0m'
+    C_DIM='\033[2m'
+    C_BOLD='\033[1m'
+    C_RED='\033[38;5;167m'
+    C_GREEN='\033[38;5;114m'
+    C_YELLOW='\033[38;5;180m'
+    C_BLUE='\033[38;5;110m'
+    C_CYAN='\033[38;5;116m'
+else
+    C_RST='' C_DIM='' C_BOLD='' C_RED='' C_GREEN=''
+    C_YELLOW='' C_BLUE='' C_CYAN=''
+fi
+
+# ---------------------------------------------------------------------------
+# Output helpers
+# ---------------------------------------------------------------------------
+log_ok()   { printf "${C_GREEN}  ✓${C_RST} %s\n" "$*"; }
+log_err()  { printf "${C_RED}  ✗${C_RST} %s\n" "$*" >&2; }
+log_warn() { printf "${C_YELLOW}  ⚠${C_RST} %s\n" "$*"; }
+log_info() { printf "${C_BLUE}  ●${C_RST} %s\n" "$*"; }
+log_dim()  { printf "${C_DIM}    %s${C_RST}\n" "$*"; }
+
+die() { log_err "$@"; exit 1; }
+
+# Section header
+section() {
+    printf "\n${C_BOLD}${C_CYAN}  %s${C_RST}\n" "$*"
+}
+
+# ---------------------------------------------------------------------------
+# Host resolution helpers
+# ---------------------------------------------------------------------------
+is_master() { [ "$1" = "$MASTER" ]; }
+is_worker() {
+    local h
+    for h in $WORKERS; do [ "$h" = "$1" ] && return 0; done
+    return 1
+}
+
+container_name() {
+    if is_master "$1"; then echo "$MASTER_CONTAINER"; else echo "$WORKER_CONTAINER"; fi
+}
+
+# Expand target aliases into host list
+# "all"     -> all hosts
+# "workers" -> worker hosts
+# "odin"    -> just odin
+# Multiple args are concatenated with comma
+resolve_targets() {
+    local targets=""
+    local arg
+    for arg in "$@"; do
+        case "$arg" in
+            all)     targets="${targets:+$targets }$ALL_HOSTS" ;;
+            workers) targets="${targets:+$targets }$WORKERS" ;;
+            master)  targets="${targets:+$targets }$MASTER" ;;
+            *)       targets="${targets:+$targets }$arg" ;;
+        esac
+    done
+    # Deduplicate while preserving order
+    echo "$targets" | tr ' ' '\n' | awk '!seen[$0]++' | tr '\n' ' ' | sed 's/ $//'
+}
+
+# Convert space-separated host list to comma-separated for ansible
+hosts_csv() {
+    echo "$*" | tr ' ' ','
+}
+
+# ---------------------------------------------------------------------------
+# Ansible wrapper
+# ---------------------------------------------------------------------------
+# Runs ansible with toolkit inventory via venv.
+# Usage: ansible_cmd <ansible args...>
+ansible_cmd() {
+    (
+        # shellcheck disable=SC1090
+        . "$ANSIBLE_VENV"
+        cd "$ANSIBLE_DIR"
+        ansible -i "$PPF_INVENTORY" --become "$@"
+    )
+}
+
+# Runs ansible-playbook with toolkit inventory via venv.
+# Usage: ansible_playbook_cmd <ansible-playbook args...>
+ansible_playbook_cmd() {
+    (
+        # shellcheck disable=SC1090
+        . "$ANSIBLE_VENV"
+        cd "$ANSIBLE_DIR"
+        ansible-playbook "$@"
+    )
+}
+
+# ---------------------------------------------------------------------------
+# Remote podman/compose wrappers
+# ---------------------------------------------------------------------------
+# Run a podman command on a remote host as the podman user.
+# Uses dynamic UID discovery.
+# Usage: podman_cmd HOST "podman subcommand..."
+podman_cmd() {
+    local host="$1"; shift
+    local cmd="$*"
+    ansible_cmd "$host" -m raw -a \
+        "uid=\$(id -u podman) && cd /tmp && sudo -u podman XDG_RUNTIME_DIR=/run/user/\$uid $cmd"
+}
+
+# Run a podman-compose subcommand on a remote host.
+# Usage: compose_cmd HOST "subcommand [args]"
+compose_cmd() {
+    local host="$1"; shift
+    local cmd="$*"
+    ansible_cmd "$host" -m raw -a \
+        "uid=\$(id -u podman) && sudo -u podman bash -c 'export XDG_RUNTIME_DIR=/run/user/'\$uid' && cd /home/podman/ppf && podman-compose $cmd'"
+}
+
+# ---------------------------------------------------------------------------
+# Validation
+# ---------------------------------------------------------------------------
+validate_syntax() {
+    local errors=0
+    local f
+    section "Validating Python syntax"
+    for f in "$PPF_DIR"/*.py; do
+        [ -f "$f" ] || continue
+        if python3 -m py_compile "$f" 2>/dev/null; then
+            log_dim "$(basename "$f")"
+        else
+            log_err "$(basename "$f")"
+            errors=$((errors + 1))
+        fi
+    done
+    if [ "$errors" -gt 0 ]; then
+        die "$errors file(s) failed syntax check"
+    fi
+    log_ok "All files valid"
+}
+
+# ---------------------------------------------------------------------------
+# Version
+# ---------------------------------------------------------------------------
+PPF_TOOLS_VERSION="1.0.0"
--- a/tools/playbooks/deploy.yml
+++ b/tools/playbooks/deploy.yml
@@ -0,0 +1,58 @@
+---
+- name: Deploy PPF code
+  hosts: ppf
+  gather_facts: false
+  become: true
+
+  tasks:
+    - name: Sync Python code and support files
+      ansible.posix.synchronize:
+        src: "{{ ppf_src }}/"
+        dest: "{{ ppf_code_dest }}"
+        rsync_opts:
+          - "--include=*.py"
+          - "--include=servers.txt"
+          - "--include=Dockerfile"
+          - "--exclude=*"
+      register: sync_result
+      notify: restart containers
+
+    - name: Deploy compose file
+      ansible.builtin.copy:
+        src: "{{ ppf_src }}/{{ ppf_compose_src }}"
+        dest: "{{ ppf_base }}/compose.yml"
+        owner: "{{ ppf_owner }}"
+        group: "{{ ppf_owner }}"
+      register: compose_result
+      notify: restart containers
+
+    - name: Fix file ownership
+      ansible.builtin.file:
+        path: "{{ ppf_base }}"
+        owner: "{{ ppf_owner }}"
+        group: "{{ ppf_owner }}"
+        recurse: true
+
+    - name: Flush handlers before status check
+      ansible.builtin.meta: flush_handlers
+
+    - name: Wait for containers to settle
+      ansible.builtin.pause:
+        seconds: 2
+      when: >-
+        ppf_restart | bool and
+        (sync_result is changed or compose_result is changed)
+
+    - name: Check container status
+      ansible.builtin.raw: "uid=$(id -u {{ ppf_owner }}) && sudo -u {{ ppf_owner }} bash -c 'export XDG_RUNTIME_DIR=/run/user/'$uid' && cd {{ ppf_base }} && podman-compose ps'"
+      register: status_result
+      changed_when: false
+
+    - name: Show container status
+      ansible.builtin.debug:
+        msg: "{{ status_result.stdout_lines | default([]) }}"
+
+  handlers:
+    - name: restart containers
+      ansible.builtin.raw: "uid=$(id -u {{ ppf_owner }}) && sudo -u {{ ppf_owner }} bash -c 'export XDG_RUNTIME_DIR=/run/user/'$uid' && cd {{ ppf_base }} && podman-compose down && podman-compose up -d'"
+      when: ppf_restart | bool
--- a/tools/playbooks/group_vars/all.yml
+++ b/tools/playbooks/group_vars/all.yml
@@ -0,0 +1,3 @@
+ppf_base: /home/podman/ppf
+ppf_owner: podman
+ppf_restart: true
--- a/tools/playbooks/group_vars/master.yml
+++ b/tools/playbooks/group_vars/master.yml
@@ -0,0 +1,2 @@
+ppf_code_dest: /home/podman/ppf/
+ppf_compose_src: compose.master.yml
--- a/tools/playbooks/group_vars/workers.yml
+++ b/tools/playbooks/group_vars/workers.yml
@@ -0,0 +1,2 @@
+ppf_code_dest: /home/podman/ppf/src/
+ppf_compose_src: compose.worker.yml
--- a/tools/playbooks/inventory.ini
+++ b/tools/playbooks/inventory.ini
@@ -0,0 +1,16 @@
+[master]
+odin      ansible_host=10.200.1.250
+
+[workers]
+cassius   ansible_host=10.200.1.13
+edge      ansible_host=10.200.1.254
+sentinel  ansible_host=10.200.1.1
+
+[ppf:children]
+master
+workers
+
+[ppf:vars]
+ansible_user=ansible
+ansible_ssh_private_key_file=/opt/ansible/secrets/ssh/ansible
+ansible_remote_tmp=~/.ansible/tmp
--- a/tools/ppf-db
+++ b/tools/ppf-db
@@ -0,0 +1,154 @@
+#!/bin/bash
+# ppf-db -- manage PPF databases
+#
+# Usage:
+#   ppf-db <command> [options]
+#
+# Commands: stats, purge-proxies, vacuum
+
+set -eu
+
+# Resolve to real path (handles symlinks from ~/.local/bin/)
+SCRIPT_PATH="$(cd "$(dirname "$0")" && pwd)/$(basename "$0")"
+SCRIPT_DIR="$(dirname "$(readlink -f "$SCRIPT_PATH")")"
+# shellcheck disable=SC1091
+. "$SCRIPT_DIR/lib/ppf-common.sh"
+
+PROXY_DB="/home/podman/ppf/data/proxies.sqlite"
+URL_DB="/home/podman/ppf/data/websites.sqlite"
+
+# ---------------------------------------------------------------------------
+# Usage
+# ---------------------------------------------------------------------------
+usage() {
+    cat <<EOF
+Usage: ppf-db <command> [options]
+
+Manage PPF databases on odin (master).
+
+Commands:
+  stats            show proxy and URL counts
+  purge-proxies    delete all proxies (keeps URLs)
+  vacuum           reclaim disk space after purge
+
+Options:
+  --help           show this help
+  --version        show version
+
+Examples:
+  ppf-db stats
+  ppf-db purge-proxies
+  ppf-db vacuum
+EOF
+    exit 0
+}
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+run_sql() {
+    local db="$1" sql="$2"
+    ansible_cmd "$MASTER" -m raw -a \
+        "sudo -u podman sqlite3 '$db' \"$sql\"" 2>/dev/null \
+        | sed 's/Shared connection.*//; /^\s*$/d; /^odin/d; /CHANGED/d; /SUCCESS/d'
+}
+
+# ---------------------------------------------------------------------------
+# Commands
+# ---------------------------------------------------------------------------
+cmd_stats() {
+    section "Database stats (odin)"
+
+    local proxies total_urls active_urls working
+    proxies=$(run_sql "$PROXY_DB" "SELECT COUNT(*) FROM proxylist;")
+    working=$(run_sql "$PROXY_DB" "SELECT COUNT(*) FROM proxylist WHERE failed=0 AND proto IS NOT NULL;")
+    total_urls=$(run_sql "$URL_DB" "SELECT COUNT(*) FROM uris;")
+    active_urls=$(run_sql "$URL_DB" "SELECT COUNT(*) FROM uris WHERE error=0;")
+
+    log_info "Proxies:      ${proxies} total, ${working} working"
+    log_info "URLs:          ${total_urls} total, ${active_urls} active"
+}
+
+cmd_purge_proxies() {
+    section "Purging proxies from odin"
+
+    # Get counts before
+    local before
+    before=$(run_sql "$PROXY_DB" "SELECT COUNT(*) FROM proxylist;")
+    log_info "Proxies before: $before"
+
+    # Stop container
+    log_info "Stopping container..."
+    compose_cmd "$MASTER" "down" > /dev/null 2>&1 \
+        && log_ok "Container stopped" \
+        || die "Failed to stop container"
+
+    # Delete proxies
+    log_info "Deleting proxylist rows..."
+    run_sql "$PROXY_DB" "DELETE FROM proxylist;" > /dev/null 2>&1
+    log_ok "Proxylist purged"
+
+    # Vacuum to reclaim space
+    log_info "Vacuuming database..."
+    run_sql "$PROXY_DB" "VACUUM;" > /dev/null 2>&1
+    log_ok "Database vacuumed"
+
+    # Verify URLs intact
+    local urls_after
+    urls_after=$(run_sql "$URL_DB" "SELECT COUNT(*) FROM uris;")
+    log_ok "URLs preserved: $urls_after"
+
+    # Start container
+    log_info "Starting container..."
+    compose_cmd "$MASTER" "up -d" > /dev/null 2>&1 \
+        && log_ok "Container started" \
+        || die "Failed to start container"
+}
+
+cmd_vacuum() {
+    section "Vacuuming database (odin)"
+
+    local before after
+    before=$(ansible_cmd "$MASTER" -m raw -a \
+        "sudo -u podman ls -lh '$PROXY_DB'" 2>/dev/null \
+        | grep -oE '[0-9]+[KMG]?' | head -1)
+
+    run_sql "$PROXY_DB" "VACUUM;" > /dev/null 2>&1
+
+    after=$(ansible_cmd "$MASTER" -m raw -a \
+        "sudo -u podman ls -lh '$PROXY_DB'" 2>/dev/null \
+        | grep -oE '[0-9]+[KMG]?' | head -1)
+
+    log_ok "Vacuumed: ${before:-?} -> ${after:-?}"
+}
+
+# ---------------------------------------------------------------------------
+# Parse args
+# ---------------------------------------------------------------------------
+[ $# -eq 0 ] && usage
+
+COMMAND=""
+
+while [ $# -gt 0 ]; do
+    case "$1" in
+        --help|-h)    usage ;;
+        --version|-V) echo "ppf-db $PPF_TOOLS_VERSION"; exit 0 ;;
+        stats|purge-proxies|vacuum)
+            [ -n "$COMMAND" ] && die "Multiple commands given"
+            COMMAND="$1"
+            ;;
+        -*)           die "Unknown option: $1" ;;
+        *)            die "Unknown command: $1" ;;
+    esac
+    shift
+done
+
+[ -z "$COMMAND" ] && die "No command given. Use: stats, purge-proxies, vacuum"
+
+case "$COMMAND" in
+    stats)          cmd_stats ;;
+    purge-proxies)  cmd_purge_proxies ;;
+    vacuum)         cmd_vacuum ;;
+esac
+
+printf "\n"
--- a/tools/ppf-deploy
+++ b/tools/ppf-deploy
@@ -0,0 +1,115 @@
+#!/bin/bash
+# ppf-deploy -- deploy PPF code to nodes
+#
+# Usage:
+#   ppf-deploy [options] [targets...]
+#
+# Targets:
+#   all          odin + all workers (default)
+#   workers      cassius, edge, sentinel
+#   master       odin
+#   <hostname>   specific host(s)
+
+set -eu
+
+# Resolve to real path (handles symlinks from ~/.local/bin/)
+SCRIPT_PATH="$(cd "$(dirname "$0")" && pwd)/$(basename "$0")"
+SCRIPT_DIR="$(dirname "$(readlink -f "$SCRIPT_PATH")")"
+# shellcheck disable=SC1091
+. "$SCRIPT_DIR/lib/ppf-common.sh"
+
+PLAYBOOK_DIR="$SCRIPT_DIR/playbooks"
+
+# ---------------------------------------------------------------------------
+# Usage
+# ---------------------------------------------------------------------------
+usage() {
+    cat <<EOF
+Usage: ppf-deploy [options] [targets...]
+
+Deploy PPF code to nodes via Ansible playbook.
+
+Targets:
+  all          odin + all workers (default)
+  workers      cassius, edge, sentinel
+  master       odin
+  <hostname>   specific host(s)
+
+Options:
+  --no-restart   sync files only, skip container restart
+  --check        dry run (ansible --check --diff)
+  -v             verbose ansible output
+  --help         show this help
+  --version      show version
+
+Steps performed:
+  1. Validate Python syntax locally
+  2. Rsync *.py + servers.txt (role-aware destinations)
+  3. Copy compose file per role
+  4. Fix ownership (podman:podman)
+  5. Restart containers on change (unless --no-restart)
+  6. Show container status
+EOF
+    exit 0
+}
+
+# ---------------------------------------------------------------------------
+# Parse args
+# ---------------------------------------------------------------------------
+DO_RESTART=1
+CHECK_MODE=0
+VERBOSE=""
+TARGETS=""
+
+while [ $# -gt 0 ]; do
+    case "$1" in
+        --help|-h)    usage ;;
+        --version|-V) echo "ppf-deploy $PPF_TOOLS_VERSION"; exit 0 ;;
+        --no-restart) DO_RESTART=0 ;;
+        --check)      CHECK_MODE=1 ;;
+        -v)           VERBOSE="-v" ;;
+        -*)           die "Unknown option: $1" ;;
+        *)            TARGETS="${TARGETS:+$TARGETS }$1" ;;
+    esac
+    shift
+done
+
+TARGETS="${TARGETS:-all}"
+
+# ---------------------------------------------------------------------------
+# Pre-flight: local syntax validation
+# ---------------------------------------------------------------------------
+validate_syntax
+
+# ---------------------------------------------------------------------------
+# Build ansible-playbook arguments
+# ---------------------------------------------------------------------------
+ARGS=(-i "$PLAYBOOK_DIR/inventory.ini")
+ARGS+=(-e "ppf_src=$PPF_DIR")
+
+if [ "$DO_RESTART" -eq 0 ]; then
+    ARGS+=(-e "ppf_restart=false")
+fi
+
+if [ "$CHECK_MODE" -eq 1 ]; then
+    ARGS+=(--check --diff)
+fi
+
+[ -n "$VERBOSE" ] && ARGS+=("$VERBOSE")
+
+# Target resolution: map aliases to ansible --limit
+case "$TARGETS" in
+    all) ;; # no --limit = all hosts in inventory
+    *)
+        LIMIT=$(resolve_targets $TARGETS | tr ' ' ',')
+        ARGS+=(--limit "$LIMIT")
+        ;;
+esac
+
+ARGS+=("$PLAYBOOK_DIR/deploy.yml")
+
+# ---------------------------------------------------------------------------
+# Run playbook
+# ---------------------------------------------------------------------------
+section "Deploying to ${TARGETS}"
+ansible_playbook_cmd "${ARGS[@]}"
--- a/tools/ppf-logs
+++ b/tools/ppf-logs
@@ -0,0 +1,80 @@
+#!/bin/bash
+# ppf-logs -- view PPF container logs
+#
+# Usage:
+#   ppf-logs [options] [node]
+#
+# Defaults to odin if no node specified.
+
+set -eu
+
+# Resolve to real path (handles symlinks from ~/.local/bin/)
+SCRIPT_PATH="$(cd "$(dirname "$0")" && pwd)/$(basename "$0")"
+SCRIPT_DIR="$(dirname "$(readlink -f "$SCRIPT_PATH")")"
+# shellcheck disable=SC1091
+. "$SCRIPT_DIR/lib/ppf-common.sh"
+
+# ---------------------------------------------------------------------------
+# Usage
+# ---------------------------------------------------------------------------
+usage() {
+    cat <<EOF
+Usage: ppf-logs [options] [node]
+
+View PPF container logs.
+
+Nodes:
+  odin, cassius, edge, sentinel (default: odin)
+
+Options:
+  -f             follow log output
+  -n LINES       number of lines to show (default: 40)
+  --help         show this help
+  --version      show version
+
+Examples:
+  ppf-logs                  last 40 lines from odin
+  ppf-logs cassius          last 40 lines from cassius
+  ppf-logs -f edge          follow edge worker logs
+  ppf-logs -n 100 sentinel  last 100 lines from sentinel
+EOF
+    exit 0
+}
+
+# ---------------------------------------------------------------------------
+# Parse args
+# ---------------------------------------------------------------------------
+FOLLOW=0
+LINES=40
+NODE=""
+
+while [ $# -gt 0 ]; do
+    case "$1" in
+        --help|-h)    usage ;;
+        --version|-V) echo "ppf-logs $PPF_TOOLS_VERSION"; exit 0 ;;
+        -f)           FOLLOW=1 ;;
+        -n)           shift; LINES="${1:?'-n' requires a number}" ;;
+        -*)           die "Unknown option: $1" ;;
+        *)            NODE="$1" ;;
+    esac
+    shift
+done
+
+NODE="${NODE:-$MASTER}"
+
+# Validate node
+is_master "$NODE" || is_worker "$NODE" || die "Unknown node: $NODE"
+
+CNAME=$(container_name "$NODE")
+
+# ---------------------------------------------------------------------------
+# Build podman logs command
+# ---------------------------------------------------------------------------
+CMD="podman logs --tail $LINES"
+[ "$FOLLOW" -eq 1 ] && CMD="$CMD -f"
+CMD="$CMD $CNAME"
+
+section "$NODE ($CNAME)"
+
+# Run with raw output -- logs go straight to terminal
+podman_cmd "$NODE" "$CMD"
--- a/tools/ppf-service
+++ b/tools/ppf-service
@@ -0,0 +1,186 @@
+#!/bin/bash
+# ppf-service -- manage PPF containers
+#
+# Usage:
+#   ppf-service <command> [nodes...]
+#
+# Commands: status, start, stop, restart
+
+set -eu
+
+# Resolve to real path (handles symlinks from ~/.local/bin/)
+SCRIPT_PATH="$(cd "$(dirname "$0")" && pwd)/$(basename "$0")"
+SCRIPT_DIR="$(dirname "$(readlink -f "$SCRIPT_PATH")")"
+# shellcheck disable=SC1091
+. "$SCRIPT_DIR/lib/ppf-common.sh"
+
+ODIN_URL="http://10.200.1.250:8081"
+
+# ---------------------------------------------------------------------------
+# Usage
+# ---------------------------------------------------------------------------
+usage() {
+    cat <<EOF
+Usage: ppf-service <command> [nodes...]
+
+Manage PPF containers on remote nodes.
+
+Commands:
+  status     show container state + health (default nodes: all)
+  start      start containers (compose up -d)
+  stop       stop containers (compose stop)
+  restart    restart containers (compose restart)
+
+Nodes:
+  all          odin + all workers (default)
+  workers      cassius, edge, sentinel
+  master       odin
+  <hostname>   specific host(s)
+
+Options:
+  --help       show this help
+  --version    show version
+
+Examples:
+  ppf-service status
+  ppf-service restart workers
+  ppf-service stop cassius edge
+  ppf-service start odin
+EOF
+    exit 0
+}
+
+# ---------------------------------------------------------------------------
+# Status helpers
+# ---------------------------------------------------------------------------
+show_health() {
+    local result
+    result=$(ansible_cmd "$MASTER" -m raw -a \
+        "curl -sf --max-time 5 ${ODIN_URL}/health 2>/dev/null || echo UNREACHABLE" \
+        2>/dev/null) || true
+    if echo "$result" | grep -qi "ok\|healthy"; then
+        log_ok "master health: ok"
+    elif echo "$result" | grep -qi "UNREACHABLE"; then
+        log_err "master health: unreachable"
+    else
+        log_warn "master health: $result"
+    fi
+}
+
+show_workers_api() {
+    local result
+    result=$(ansible_cmd "$MASTER" -m raw -a \
+        "curl -sf --max-time 5 ${ODIN_URL}/api/workers 2>/dev/null || echo '{}'" \
+        2>/dev/null) || true
+    # Just show the raw output, trimmed
+    local data
+    data=$(echo "$result" | grep -v '^\s*$' | grep -v '^[A-Z]' | head -20)
+    if [ -n "$data" ]; then
+        log_info "Worker API response:"
+        echo "$data" | while IFS= read -r line; do
+            log_dim "$line"
+        done
+    fi
+}
+
+# ---------------------------------------------------------------------------
+# Commands
+# ---------------------------------------------------------------------------
+cmd_status() {
+    local hosts="$1"
+    section "Container status"
+
+    for host in $hosts; do
+        local output
+        output=$(compose_cmd "$host" "ps" 2>/dev/null) || true
+        if echo "$output" | grep -qi "up\|running"; then
+            log_ok "$host"
+        elif echo "$output" | grep -qi "exit"; then
+            log_err "$host (exited)"
+        else
+            log_warn "$host (unknown)"
+        fi
+        echo "$output" | grep -v '^\s*$' | while IFS= read -r line; do
+            log_dim "$line"
+        done
+    done
+
+    # Show health/worker info if master is in target list
+    local h
+    for h in $hosts; do
+        if is_master "$h"; then
+            section "Master health"
+            show_health
+            show_workers_api
+            break
+        fi
+    done
+}
+
+cmd_start() {
+    local hosts="$1"
+    section "Starting containers"
+    for host in $hosts; do
+        compose_cmd "$host" "up -d" > /dev/null 2>&1 \
+            && log_ok "$host started" \
+            || log_err "$host start failed"
+    done
+}
+
+cmd_stop() {
+    local hosts="$1"
+    section "Stopping containers"
+    for host in $hosts; do
+        compose_cmd "$host" "stop" > /dev/null 2>&1 \
+            && log_ok "$host stopped" \
+            || log_err "$host stop failed"
+    done
+}
+
+cmd_restart() {
+    local hosts="$1"
+    section "Restarting containers"
+    for host in $hosts; do
+        compose_cmd "$host" "down" > /dev/null 2>&1 \
+            && compose_cmd "$host" "up -d" > /dev/null 2>&1 \
+            && log_ok "$host restarted" \
+            || log_err "$host restart failed"
+    done
+}
+
+# ---------------------------------------------------------------------------
+# Parse args
+# ---------------------------------------------------------------------------
+[ $# -eq 0 ] && usage
+
+COMMAND=""
+TARGETS=""
+
+while [ $# -gt 0 ]; do
+    case "$1" in
+        --help|-h)    usage ;;
+        --version|-V) echo "ppf-service $PPF_TOOLS_VERSION"; exit 0 ;;
+        status|start|stop|restart)
+            [ -n "$COMMAND" ] && die "Multiple commands given"
+            COMMAND="$1"
+            ;;
+        -*)           die "Unknown option: $1" ;;
+        *)            TARGETS="${TARGETS:+$TARGETS }$1" ;;
+    esac
+    shift
+done
+
+[ -z "$COMMAND" ] && die "No command given. Use: status, start, stop, restart"
+
+TARGETS="${TARGETS:-all}"
+HOSTS=$(resolve_targets $TARGETS)
+[ -z "$HOSTS" ] && die "No valid targets"
+
+case "$COMMAND" in
+    status)  cmd_status "$HOSTS" ;;
+    start)   cmd_start "$HOSTS" ;;
+    stop)    cmd_stop "$HOSTS" ;;
+    restart) cmd_restart "$HOSTS" ;;
+esac
+
+printf "\n"
--- a/tools/ppf-status
+++ b/tools/ppf-status
@@ -0,0 +1,246 @@
+#!/bin/bash
+# ppf-status -- PPF cluster overview
+#
+# Usage:
+#   ppf-status [options]
+
+set -eu
+
+# Resolve to real path (handles symlinks from ~/.local/bin/)
+SCRIPT_PATH="$(cd "$(dirname "$0")" && pwd)/$(basename "$0")"
+SCRIPT_DIR="$(dirname "$(readlink -f "$SCRIPT_PATH")")"
+# shellcheck disable=SC1091
+. "$SCRIPT_DIR/lib/ppf-common.sh"
+
+ODIN_URL="http://127.0.0.1:8081"
+PROXY_DB="/home/podman/ppf/data/proxies.sqlite"
+URL_DB="/home/podman/ppf/data/websites.sqlite"
+
+# ---------------------------------------------------------------------------
+# Usage
+# ---------------------------------------------------------------------------
+usage() {
+    cat <<EOF
+Usage: ppf-status [options]
+
+Show PPF cluster overview.
+
+Options:
+  --json       raw JSON from API
+  --help       show this help
+  --version    show version
+
+Displays:
+  - Container health per node
+  - Worker stats (tested, working, rate, active)
+  - Odin manager stats (verification, queue)
+  - Database counts (proxies, URLs)
+EOF
+    exit 0
+}
+
+# ---------------------------------------------------------------------------
+# Parse args
+# ---------------------------------------------------------------------------
+RAW_JSON=0
+
+while [ $# -gt 0 ]; do
+    case "$1" in
+        --help|-h)    usage ;;
+        --version|-V) echo "ppf-status $PPF_TOOLS_VERSION"; exit 0 ;;
+        --json)       RAW_JSON=1 ;;
+        -*)           die "Unknown option: $1" ;;
+        *)            die "Unknown argument: $1" ;;
+    esac
+    shift
+done
+
+# ---------------------------------------------------------------------------
+# Fetch API data from odin (run on odin via curl to localhost)
+# ---------------------------------------------------------------------------
+api_json=$(ansible_cmd "$MASTER" -m raw -a \
+    "curl -sf --max-time 5 ${ODIN_URL}/api/workers 2>/dev/null || echo '{}'" \
+    2>/dev/null | sed 's/Shared connection.*closed\.\?//; /^\s*$/d; /^odin/d; /CHANGED/d; /SUCCESS/d')
+
+if [ "$RAW_JSON" -eq 1 ]; then
+    echo "$api_json"
+    exit 0
+fi
+
+# Check if we got valid data
+if ! echo "$api_json" | python3 -c "import sys,json; json.load(sys.stdin)" 2>/dev/null; then
+    die "Failed to fetch API data from odin"
+fi
+
+# ---------------------------------------------------------------------------
+# Container health
+# ---------------------------------------------------------------------------
+section "Containers"
+
+for host in $ALL_HOSTS; do
+    output=$(compose_cmd "$host" "ps" 2>/dev/null) || true
+    if echo "$output" | grep -qi "up\|running"; then
+        log_ok "$host"
+    elif echo "$output" | grep -qi "exit"; then
+        log_err "$host (exited)"
+    else
+        log_warn "$host (unknown)"
+    fi
+done
+
+# ---------------------------------------------------------------------------
+# Database summary (quick counts from odin)
+# ---------------------------------------------------------------------------
+section "Database"
+
+proxy_count=$(ansible_cmd "$MASTER" -m raw -a \
+    "sudo -u podman sqlite3 '$PROXY_DB' 'SELECT COUNT(*) FROM proxylist;'" 2>/dev/null \
+    | sed 's/Shared connection.*//; /^\s*$/d; /^odin/d; /CHANGED/d; /SUCCESS/d' || echo '?')
+working_count=$(ansible_cmd "$MASTER" -m raw -a \
+    "sudo -u podman sqlite3 '$PROXY_DB' 'SELECT COUNT(*) FROM proxylist WHERE failed=0 AND proto IS NOT NULL;'" 2>/dev/null \
+    | sed 's/Shared connection.*//; /^\s*$/d; /^odin/d; /CHANGED/d; /SUCCESS/d' || echo '?')
+url_count=$(ansible_cmd "$MASTER" -m raw -a \
+    "sudo -u podman sqlite3 '$URL_DB' 'SELECT COUNT(*) FROM uris;'" 2>/dev/null \
+    | sed 's/Shared connection.*//; /^\s*$/d; /^odin/d; /CHANGED/d; /SUCCESS/d' || echo '?')
+
+log_info "Proxies: ${proxy_count} total, ${working_count} working"
+log_info "URLs: ${url_count}"
+
+# ---------------------------------------------------------------------------
+# Parse and display via Python for clean formatting
+# ---------------------------------------------------------------------------
+echo "$api_json" | python3 -c "
+import sys, json
+
+NO_COLOR = __import__('os').environ.get('NO_COLOR', '')
+
+# Colors
+if not NO_COLOR and sys.stdout.isatty():
+    RST = '\033[0m'
+    DIM = '\033[2m'
+    BOLD = '\033[1m'
+    RED = '\033[38;5;167m'
+    GREEN = '\033[38;5;114m'
+    YELLOW = '\033[38;5;180m'
+    BLUE = '\033[38;5;110m'
+    CYAN = '\033[38;5;116m'
+else:
+    RST = DIM = BOLD = RED = GREEN = YELLOW = BLUE = CYAN = ''
+
+def ok(s):   return GREEN + s + RST
+def err(s):  return RED + s + RST
+def warn(s): return YELLOW + s + RST
+def dim(s):  return DIM + s + RST
+def bold(s): return BOLD + CYAN + s + RST
+
+try:
+    data = json.load(sys.stdin)
+except:
+    sys.exit(0)
+
+workers = data.get('workers', [])
+summary = data.get('summary', {})
+queue = data.get('queue', {})
+manager = data.get('manager', {})
+
+# Workers table
+print()
+print(bold('  Workers'))
+if workers:
+    # Header
+    print(dim('  %-12s %7s %9s %9s %7s %6s  %s' % (
+        'NAME', 'TESTED', 'WORKING', 'FAILED', 'RATE', 'ACT', 'STATUS')))
+    for w in sorted(workers, key=lambda x: x.get('name', '')):
+        name = w.get('name', w.get('ip', '?'))
+        tested = w.get('proxies_tested', 0)
+        working = w.get('proxies_working', 0)
+        failed = w.get('proxies_failed', 0)
+        rate = w.get('success_rate', 0)
+        active = w.get('active', False)
+        threads = w.get('threads', 0)
+
+        # Format numbers compactly
+        def fmt(n):
+            if n >= 1000000: return '%.1fM' % (n / 1000000)
+            if n >= 1000: return '%.1fk' % (n / 1000)
+            return str(n)
+
+        act_str = ok('yes') if active else err('no')
+        if rate >= 30:
+            rate_str = ok('%.1f%%' % rate)
+        elif rate >= 10:
+            rate_str = warn('%.1f%%' % rate)
+        else:
+            rate_str = err('%.1f%%' % rate)
+
+        age = w.get('age', 0)
+        if age > 300 and not active:
+            status = err('stale (%dm)' % (age // 60))
+        elif active:
+            status = ok('testing')
+        else:
+            status = dim('idle')
+
+        print('  %-12s %7s %9s %9s %7s %6s  %s' % (
+            name, fmt(tested), fmt(working), fmt(failed),
+            rate_str, act_str, status))
+
+    # Summary line
+    total_t = summary.get('total_tested', 0)
+    total_w = summary.get('total_working', 0)
+    total_f = summary.get('total_failed', 0)
+    overall = summary.get('overall_success_rate', 0)
+    active_count = data.get('active', 0)
+    total_count = data.get('total', 0)
+    print(dim('  %-12s %7s %9s %9s %7s %6s' % (
+        'TOTAL',
+        fmt(total_t) if total_t else '-',
+        fmt(total_w) if total_w else '-',
+        fmt(total_f) if total_f else '-',
+        '%.1f%%' % overall,
+        '%d/%d' % (active_count, total_count))))
+else:
+    print(err('  no workers connected'))
+
+# Manager (odin verification)
+if manager:
+    print()
+    print(bold('  Odin Verification'))
+    m_rate = manager.get('success_rate', 0)
+    m_tested = manager.get('tested', 0)
+    m_passed = manager.get('passed', 0)
+    m_threads = manager.get('threads', 0)
+    m_speed = manager.get('rate', 0)
+    m_queue = manager.get('queue_size', 0)
+    m_uptime = manager.get('uptime', 0)
+
+    def fmt_time(s):
+        if s >= 3600: return '%dh%dm' % (s // 3600, (s % 3600) // 60)
+        if s >= 60: return '%dm%ds' % (s // 60, s % 60)
+        return '%ds' % s
+
+    if m_rate >= 30:
+        rate_str = ok('%.1f%%' % m_rate)
+    elif m_rate >= 10:
+        rate_str = warn('%.1f%%' % m_rate)
+    else:
+        rate_str = err('%.1f%%' % m_rate)
+
+    print('  threads: %d  rate: %.2f/s  uptime: %s' % (m_threads, m_speed, fmt_time(m_uptime)))
+    print('  tested: %s  passed: %s  success: %s' % (fmt(m_tested), fmt(m_passed), rate_str))
+    print('  queue: %d jobs' % m_queue)
+
+# Queue
+if queue:
+    print()
+    print(bold('  Proxy Queue'))
+    print('  total: %d  due: %d  pending: %d  claimed: %d' % (
+        queue.get('total', 0), queue.get('due', 0),
+        queue.get('pending', 0), queue.get('claimed', 0)))
+    sess_tested = queue.get('session_tested', 0)
+    sess_pct = queue.get('session_pct', 0)
+    if sess_tested:
+        print('  session: %s tested (%.1f%%)' % (fmt(sess_tested), sess_pct))
+
+print()
+"