Compare commits
71 Commits
fab1e1d110
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
361b70ace9 | ||
|
|
9c7b7ba070 | ||
|
|
0669b38782 | ||
|
|
6130b196b1 | ||
|
|
ce2d28ab07 | ||
|
|
93eb395727 | ||
|
|
f9d237fe0d | ||
|
|
0f1fe981ef | ||
|
|
0a53e4457f | ||
|
|
2ea7eb41b7 | ||
|
|
98b232f3d3 | ||
|
|
b300afed6c | ||
|
|
eeadf656f5 | ||
|
|
7ae0ac0c26 | ||
|
|
35285a84bf | ||
|
|
438e956be9 | ||
|
|
5dd9060c2b | ||
|
|
304cdb3b4c | ||
|
|
9f926f4ab5 | ||
|
|
7705ef54f6 | ||
|
|
f5b9037763 | ||
|
|
56accde90d | ||
|
|
e985f52fe6 | ||
|
|
3e5c486e7e | ||
|
|
727ed86692 | ||
|
|
821ade95ef | ||
|
|
01b91836c4 | ||
|
|
04fb362181 | ||
|
|
304830e382 | ||
|
|
752ef359b5 | ||
|
|
af6e27bd77 | ||
|
|
c091216afc | ||
|
|
4cefdf976c | ||
|
|
98c2e74412 | ||
|
|
24d6f345f6 | ||
|
|
1ca096c78a | ||
|
|
15a7f0bb6a | ||
|
|
b6045bd05c | ||
|
|
d7b004f0ac | ||
|
|
00952b7947 | ||
|
|
6800995361 | ||
|
|
7a271896a8 | ||
|
|
8779979780 | ||
|
|
195d25c653 | ||
|
|
9b8be9d302 | ||
|
|
9eff4496d6 | ||
|
|
b1de91a969 | ||
|
|
df2078c7f7 | ||
|
|
782deab95d | ||
|
|
8208670fc1 | ||
|
|
d902ecafff | ||
|
|
fdb761f9f1 | ||
|
|
12f6b1d8eb | ||
|
|
1f14173595 | ||
|
|
2128814a41 | ||
|
|
7f59cae05c | ||
|
|
9b7ca20728 | ||
|
|
82c909d7c0 | ||
|
|
cb52a978e9 | ||
|
|
224d3642f9 | ||
|
|
d184dc2926 | ||
|
|
2782e6d754 | ||
|
|
0c3c7278f5 | ||
|
|
ff21c75a7a | ||
|
|
e0e56935f2 | ||
|
|
9ecf7d89bd | ||
|
|
ba9553f4aa | ||
|
|
dfcd8f0c00 | ||
|
|
4c5f4fa01d | ||
|
|
31bdb76a97 | ||
|
|
672c1bc1f8 |
@@ -8,11 +8,14 @@ on:
|
|||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
syntax-check:
|
validate:
|
||||||
runs-on: dotfiles
|
runs-on: dotfiles
|
||||||
|
container:
|
||||||
|
image: python:3-slim
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
run: |
|
run: |
|
||||||
|
apt-get update -qq && apt-get install -y -qq git >/dev/null
|
||||||
git clone --depth 1 --branch "${GITHUB_REF_NAME}" \
|
git clone --depth 1 --branch "${GITHUB_REF_NAME}" \
|
||||||
"https://oauth2:${{ github.token }}@${GITHUB_SERVER_URL#https://}/${GITHUB_REPOSITORY}.git" .
|
"https://oauth2:${{ github.token }}@${GITHUB_SERVER_URL#https://}/${GITHUB_REPOSITORY}.git" .
|
||||||
|
|
||||||
@@ -30,70 +33,30 @@ jobs:
|
|||||||
done
|
done
|
||||||
exit $failed
|
exit $failed
|
||||||
|
|
||||||
memory-leak-check:
|
- name: Import validation
|
||||||
runs-on: dotfiles
|
|
||||||
container:
|
|
||||||
image: python:3-slim
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
run: |
|
run: |
|
||||||
apt-get update && apt-get install -y git
|
echo "Verifying module imports..."
|
||||||
git clone --depth 1 --branch "${GITHUB_REF_NAME}" \
|
|
||||||
"https://oauth2:${{ github.token }}@${GITHUB_SERVER_URL#https://}/${GITHUB_REPOSITORY}.git" .
|
|
||||||
|
|
||||||
- name: Check for memory leak patterns
|
|
||||||
run: |
|
|
||||||
echo "Scanning for common memory leak patterns..."
|
|
||||||
failed=0
|
failed=0
|
||||||
|
for mod in comboparse config dbs job misc mysqlite network_stats stats translations; do
|
||||||
# Check for unbounded list/dict growth without limits
|
if python3 -c "import sys; sys.path.insert(0,'.'); import $mod; print('OK $mod')"; then
|
||||||
echo "Checking for unbounded collections..."
|
:
|
||||||
for f in ppf.py proxywatchd.py scraper.py httpd.py; do
|
else
|
||||||
if [ -f "$f" ]; then
|
echo "FAIL $mod"
|
||||||
# Look for .append() without corresponding size limits
|
failed=1
|
||||||
if grep -n "\.append(" "$f" | grep -v "# bounded" | grep -v "_max\|max_\|limit\|[:]\|pop(" > /tmp/unbounded 2>/dev/null; then
|
|
||||||
count=$(wc -l < /tmp/unbounded)
|
|
||||||
if [ "$count" -gt 20 ]; then
|
|
||||||
echo "WARN $f: $count potential unbounded appends"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
exit $failed
|
||||||
|
|
||||||
# Check for circular references
|
- name: YAML lint
|
||||||
echo "Checking for potential circular references..."
|
|
||||||
for f in ppf.py proxywatchd.py scraper.py httpd.py connection_pool.py; do
|
|
||||||
if [ -f "$f" ]; then
|
|
||||||
if grep -n "self\.\w* = self" "$f" 2>/dev/null; then
|
|
||||||
echo "WARN $f: potential self-reference"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
# Check for __del__ methods (often problematic)
|
|
||||||
echo "Checking for __del__ methods..."
|
|
||||||
for f in *.py; do
|
|
||||||
if grep -n "def __del__" "$f" 2>/dev/null; then
|
|
||||||
echo "WARN $f: has __del__ method (may cause leaks)"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
# Check that gc is imported where needed
|
|
||||||
echo "Checking gc module usage..."
|
|
||||||
for f in proxywatchd.py httpd.py; do
|
|
||||||
if [ -f "$f" ]; then
|
|
||||||
if ! grep -q "^import gc" "$f" && ! grep -q "^from gc" "$f"; then
|
|
||||||
echo "INFO $f: gc module not imported"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
echo "Memory leak pattern scan complete"
|
|
||||||
|
|
||||||
- name: Static import check
|
|
||||||
run: |
|
run: |
|
||||||
echo "Verifying imports..."
|
echo "Checking YAML files for tabs..."
|
||||||
python3 -c "import sys; sys.path.insert(0,'.'); import config; print('OK config')" || echo "FAIL config"
|
failed=0
|
||||||
python3 -c "import sys; sys.path.insert(0,'.'); import misc; print('OK misc')" || echo "FAIL misc"
|
for f in compose.master.yml compose.worker.yml .gitea/workflows/ci.yml; do
|
||||||
python3 -c "import sys; sys.path.insert(0,'.'); import mysqlite; print('OK mysqlite')" || echo "FAIL mysqlite"
|
if grep -qP '\t' "$f"; then
|
||||||
|
echo "FAIL $f: contains tabs"
|
||||||
|
failed=1
|
||||||
|
else
|
||||||
|
echo "OK $f"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
exit $failed
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -6,4 +6,5 @@ __pycache__/
|
|||||||
*.sqlite-shm
|
*.sqlite-shm
|
||||||
*.sqlite-wal
|
*.sqlite-wal
|
||||||
.claude/
|
.claude/
|
||||||
|
.venv/
|
||||||
data/
|
data/
|
||||||
|
|||||||
229
CLAUDE.md
229
CLAUDE.md
@@ -6,7 +6,7 @@
|
|||||||
┌──────────┬─────────────┬────────────────────────────────────────────────────────┐
|
┌──────────┬─────────────┬────────────────────────────────────────────────────────┐
|
||||||
│ Host │ Role │ Notes
|
│ Host │ Role │ Notes
|
||||||
├──────────┼─────────────┼────────────────────────────────────────────────────────┤
|
├──────────┼─────────────┼────────────────────────────────────────────────────────┤
|
||||||
│ odin │ Master │ Scrapes proxy lists, verifies conflicts, port 8081
|
│ odin │ Master │ API server + SSL-only proxy verification, port 8081
|
||||||
│ cassius │ Worker │ Tests proxies, reports to master via WireGuard
|
│ cassius │ Worker │ Tests proxies, reports to master via WireGuard
|
||||||
│ edge │ Worker │ Tests proxies, reports to master via WireGuard
|
│ edge │ Worker │ Tests proxies, reports to master via WireGuard
|
||||||
│ sentinel │ Worker │ Tests proxies, reports to master via WireGuard
|
│ sentinel │ Worker │ Tests proxies, reports to master via WireGuard
|
||||||
@@ -15,8 +15,8 @@
|
|||||||
|
|
||||||
### Role Separation
|
### Role Separation
|
||||||
|
|
||||||
- **Odin (Master)**: Scrapes proxy sources, does verification tests only. No routine testing. Local Tor only.
|
- **Odin (Master)**: API server + SSL-only proxy verification (10 threads). No URL cycling (workers handle it via `/api/claim-urls`). Local Tor only.
|
||||||
- **Workers**: All routine proxy testing. Each uses only local Tor (127.0.0.1:9050).
|
- **Workers**: All URL fetching (via `/api/claim-urls`) and proxy testing. Each uses only local Tor (127.0.0.1:9050).
|
||||||
|
|
||||||
## CRITICAL: Directory Structure Differences
|
## CRITICAL: Directory Structure Differences
|
||||||
|
|
||||||
@@ -25,92 +25,111 @@
|
|||||||
│ Host │ Code Location │ Container Mount
|
│ Host │ Code Location │ Container Mount
|
||||||
├──────────┼─────────────────────────┼──────────────────────────────────────────┤
|
├──────────┼─────────────────────────┼──────────────────────────────────────────┤
|
||||||
│ odin │ /home/podman/ppf/*.py │ Mounts ppf/ directly to /app
|
│ odin │ /home/podman/ppf/*.py │ Mounts ppf/ directly to /app
|
||||||
│ workers │ /home/podman/ppf/src/ │ Mounts ppf/src/ to /app (via systemd)
|
│ workers │ /home/podman/ppf/src/ │ Mounts ppf/src/ to /app (via compose)
|
||||||
└──────────┴─────────────────────────┴──────────────────────────────────────────┘
|
└──────────┴─────────────────────────┴──────────────────────────────────────────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
**ODIN uses root ppf/ directory. WORKERS use ppf/src/ subdirectory.**
|
**ODIN uses root ppf/ directory. WORKERS use ppf/src/ subdirectory.**
|
||||||
|
|
||||||
## Host Access
|
## Operations Toolkit
|
||||||
|
|
||||||
**ALWAYS use Ansible from `/opt/ansible` with venv activated:**
|
All deployment and service management is handled by `tools/`:
|
||||||
|
|
||||||
|
```
|
||||||
|
tools/
|
||||||
|
lib/ppf-common.sh shared library (hosts, wrappers, colors)
|
||||||
|
ppf-deploy deploy wrapper (local validation + playbook)
|
||||||
|
ppf-logs view container logs
|
||||||
|
ppf-service manage containers (status/start/stop/restart)
|
||||||
|
ppf-db database operations (stats/purge-proxies/vacuum)
|
||||||
|
ppf-status cluster overview (containers, workers, queue)
|
||||||
|
playbooks/
|
||||||
|
deploy.yml ansible playbook (sync, compose, restart)
|
||||||
|
inventory.ini hosts with WireGuard IPs + SSH key
|
||||||
|
group_vars/
|
||||||
|
all.yml shared vars (ppf_base, ppf_owner)
|
||||||
|
master.yml odin paths + compose file
|
||||||
|
workers.yml worker paths + compose file
|
||||||
|
```
|
||||||
|
|
||||||
|
Symlinked to `~/.local/bin/` for direct use.
|
||||||
|
|
||||||
|
### Connectivity
|
||||||
|
|
||||||
|
All tools connect over WireGuard (`10.200.1.0/24`) as user `ansible`
|
||||||
|
with the SSH key at `/opt/ansible/secrets/ssh/ansible`.
|
||||||
|
|
||||||
|
### Deployment
|
||||||
|
|
||||||
|
`ppf-deploy` validates syntax locally, then runs the Ansible playbook.
|
||||||
|
Hosts execute in parallel; containers restart only when files change.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ppf-deploy # all nodes: validate, sync, restart
|
||||||
|
ppf-deploy odin # master only
|
||||||
|
ppf-deploy workers # cassius, edge, sentinel
|
||||||
|
ppf-deploy cassius edge # specific hosts
|
||||||
|
ppf-deploy --no-restart # sync only, skip restart
|
||||||
|
ppf-deploy --check # dry run (ansible --check --diff)
|
||||||
|
ppf-deploy -v # verbose ansible output
|
||||||
|
```
|
||||||
|
|
||||||
|
Playbook steps (per host, in parallel):
|
||||||
|
1. Rsync `*.py` + `servers.txt` (role-aware destination via group_vars)
|
||||||
|
2. Copy compose file per role (`compose.master.yml` / `compose.worker.yml`)
|
||||||
|
3. Fix ownership (`podman:podman`, recursive)
|
||||||
|
4. Restart containers via handler (only if files changed)
|
||||||
|
5. Show container status
|
||||||
|
|
||||||
|
### Container Logs
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ppf-logs # last 40 lines from odin
|
||||||
|
ppf-logs cassius # specific worker
|
||||||
|
ppf-logs -f edge # follow mode
|
||||||
|
ppf-logs -n 100 sentinel # last N lines
|
||||||
|
```
|
||||||
|
|
||||||
|
### Service Management
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ppf-service status # all nodes: compose ps + health
|
||||||
|
ppf-service status workers # workers only
|
||||||
|
ppf-service restart odin # restart master
|
||||||
|
ppf-service stop cassius # stop specific worker
|
||||||
|
ppf-service start workers # start all workers
|
||||||
|
```
|
||||||
|
|
||||||
|
### Database Management
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ppf-db stats # proxy and URL counts
|
||||||
|
ppf-db purge-proxies # stop odin, delete all proxies, restart
|
||||||
|
ppf-db vacuum # reclaim disk space
|
||||||
|
```
|
||||||
|
|
||||||
|
### Cluster Status
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ppf-status # full overview: containers, DB, workers, queue
|
||||||
|
ppf-status --json # raw JSON from odin API
|
||||||
|
```
|
||||||
|
|
||||||
|
### Direct Ansible (for operations not covered by tools)
|
||||||
|
|
||||||
|
Use the toolkit inventory for ad-hoc commands over WireGuard:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd /opt/ansible && source venv/bin/activate
|
cd /opt/ansible && source venv/bin/activate
|
||||||
```
|
INV=/home/user/git/ppf/tools/playbooks/inventory.ini
|
||||||
|
|
||||||
### Quick Reference Commands
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Check worker status
|
|
||||||
ANSIBLE_REMOTE_TMP=/tmp/.ansible ansible cassius,edge,sentinel -m shell -a "hostname"
|
|
||||||
|
|
||||||
# Check worker config
|
# Check worker config
|
||||||
ANSIBLE_REMOTE_TMP=/tmp/.ansible ansible cassius,edge,sentinel -m shell -a "grep -E 'threads|timeout|ssl' /home/podman/ppf/config.ini"
|
ansible -i $INV workers -m shell \
|
||||||
|
-a "grep -E 'threads|timeout|ssl' /home/podman/ppf/config.ini"
|
||||||
# Check worker logs (dynamic UID)
|
|
||||||
ANSIBLE_REMOTE_TMP=/tmp/.ansible ansible cassius -m raw \
|
|
||||||
-a "uid=\$(id -u podman) && sudo -u podman podman logs --tail 20 ppf-worker"
|
|
||||||
|
|
||||||
# Modify config option
|
# Modify config option
|
||||||
ANSIBLE_REMOTE_TMP=/tmp/.ansible ansible cassius,edge,sentinel -m lineinfile -a "path=/home/podman/ppf/config.ini line='ssl_only = 1' insertafter='ssl_first'"
|
ansible -i $INV workers -m lineinfile \
|
||||||
|
-a "path=/home/podman/ppf/config.ini line='ssl_only = 1' insertafter='ssl_first'"
|
||||||
# Restart workers (dynamic UID discovery)
|
|
||||||
ANSIBLE_REMOTE_TMP=/tmp/.ansible ansible cassius,edge,sentinel -m raw \
|
|
||||||
-a "uid=\$(id -u podman) && sudo -u podman XDG_RUNTIME_DIR=/run/user/\$uid podman restart ppf-worker"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Full Deployment Procedure
|
|
||||||
|
|
||||||
### Step 1: Validate Syntax Locally
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd /home/user/git/ppf
|
|
||||||
for f in *.py; do python3 -m py_compile "$f" && echo "OK: $f"; done
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 2: Deploy to ALL Hosts
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd /opt/ansible && source venv/bin/activate
|
|
||||||
|
|
||||||
# Deploy to ODIN (root ppf/ directory)
|
|
||||||
ANSIBLE_REMOTE_TMP=/tmp/.ansible ansible odin -m synchronize \
|
|
||||||
-a "src=/home/user/git/ppf/ dest=/home/podman/ppf/ rsync_opts='--include=*.py,--include=servers.txt,--exclude=*'"
|
|
||||||
|
|
||||||
# Deploy to WORKERS (ppf/src/ subdirectory)
|
|
||||||
ANSIBLE_REMOTE_TMP=/tmp/.ansible ansible cassius,edge,sentinel -m synchronize \
|
|
||||||
-a "src=/home/user/git/ppf/ dest=/home/podman/ppf/src/ rsync_opts='--include=*.py,--include=servers.txt,--exclude=*'"
|
|
||||||
|
|
||||||
# CRITICAL: Fix ownership on ALL hosts (rsync uses ansible user, containers need podman)
|
|
||||||
ANSIBLE_REMOTE_TMP=/tmp/.ansible ansible odin,cassius,edge,sentinel -m raw \
|
|
||||||
-a "chown -R podman:podman /home/podman/ppf/"
|
|
||||||
```
|
|
||||||
|
|
||||||
**Note:** Ownership must be fixed after every deploy. rsync runs as ansible user, but containers run as podman user. Missing ownership fix causes `ImportError: No module named X` errors.
|
|
||||||
|
|
||||||
### Step 3: Restart Services
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Restart ODIN (UID 1005)
|
|
||||||
ansible odin -m raw \
|
|
||||||
-a "cd /tmp && XDG_RUNTIME_DIR=/run/user/1005 runuser -u podman -- podman restart ppf"
|
|
||||||
|
|
||||||
# Restart WORKERS (dynamic UID discovery)
|
|
||||||
ansible cassius,edge,sentinel -m raw \
|
|
||||||
-a "uid=\$(id -u podman) && sudo -u podman XDG_RUNTIME_DIR=/run/user/\$uid podman restart ppf-worker"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 4: Verify All Running
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Check odin (UID 1005)
|
|
||||||
ansible odin -m raw \
|
|
||||||
-a "cd /tmp && XDG_RUNTIME_DIR=/run/user/1005 runuser -u podman -- podman ps"
|
|
||||||
|
|
||||||
# Check workers (dynamic UID discovery)
|
|
||||||
ansible cassius,edge,sentinel -m raw \
|
|
||||||
-a "uid=\$(id -u podman) && sudo -u podman XDG_RUNTIME_DIR=/run/user/\$uid podman ps --format '{{.Names}} {{.Status}}'"
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Podman User IDs
|
## Podman User IDs
|
||||||
@@ -137,11 +156,19 @@ ansible cassius,edge,sentinel -m raw \
|
|||||||
tor_hosts = 127.0.0.1:9050 # Local Tor ONLY
|
tor_hosts = 127.0.0.1:9050 # Local Tor ONLY
|
||||||
|
|
||||||
[watchd]
|
[watchd]
|
||||||
threads = 0 # NO routine testing
|
threads = 10 # SSL-only verification of worker-reported proxies
|
||||||
database = data/ppf.sqlite
|
timeout = 7
|
||||||
|
checktype = none # No secondary check
|
||||||
|
ssl_first = 1
|
||||||
|
ssl_only = 1
|
||||||
|
database = data/proxies.sqlite
|
||||||
|
|
||||||
|
[ppf]
|
||||||
|
threads = 0 # NO URL cycling (workers handle it)
|
||||||
|
database = data/websites.sqlite
|
||||||
|
|
||||||
[scraper]
|
[scraper]
|
||||||
threads = 10
|
enabled = 0 # Disabled on master
|
||||||
```
|
```
|
||||||
|
|
||||||
### Worker config.ini
|
### Worker config.ini
|
||||||
@@ -155,7 +182,7 @@ threads = 35
|
|||||||
timeout = 9
|
timeout = 9
|
||||||
ssl_first = 1 # Try SSL handshake first
|
ssl_first = 1 # Try SSL handshake first
|
||||||
ssl_only = 0 # Set to 1 to skip secondary check on SSL failure
|
ssl_only = 0 # Set to 1 to skip secondary check on SSL failure
|
||||||
checktype = head # Secondary check type: head, irc, judges
|
checktype = head # Secondary check: head, irc, judges, none (SSL-only)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Config Options
|
### Config Options
|
||||||
@@ -166,7 +193,7 @@ checktype = head # Secondary check type: head, irc, judges
|
|||||||
├───────────────┼─────────┼────────────────────────────────────────────────────┤
|
├───────────────┼─────────┼────────────────────────────────────────────────────┤
|
||||||
│ ssl_first │ 1 │ Try SSL handshake first, fallback to checktype
|
│ ssl_first │ 1 │ Try SSL handshake first, fallback to checktype
|
||||||
│ ssl_only │ 0 │ Skip secondary check when SSL fails (faster)
|
│ ssl_only │ 0 │ Skip secondary check when SSL fails (faster)
|
||||||
│ checktype │ head │ Secondary check: head, irc, judges
|
│ checktype │ head │ Secondary check: head, irc, judges, none/false
|
||||||
│ threads │ 20 │ Number of test threads
|
│ threads │ 20 │ Number of test threads
|
||||||
│ timeout │ 15 │ Socket timeout in seconds
|
│ timeout │ 15 │ Socket timeout in seconds
|
||||||
└───────────────┴─────────┴────────────────────────────────────────────────────┘
|
└───────────────┴─────────┴────────────────────────────────────────────────────┘
|
||||||
@@ -185,20 +212,15 @@ batch_size = clamp(fair_share, min=100, max=1000)
|
|||||||
- Workers shuffle their batch locally to avoid testing same proxies simultaneously
|
- Workers shuffle their batch locally to avoid testing same proxies simultaneously
|
||||||
- Claims expire after 5 minutes if not completed
|
- Claims expire after 5 minutes if not completed
|
||||||
|
|
||||||
## Worker Container
|
## Container Management
|
||||||
|
|
||||||
Workers run as podman containers with `--restart=unless-stopped`:
|
All nodes run via `podman-compose` with role-specific compose files:
|
||||||
|
|
||||||
```bash
|
- **Odin**: `compose.master.yml` -> deployed as `compose.yml`
|
||||||
podman run -d --name ppf-worker --network=host --restart=unless-stopped \
|
- **Workers**: `compose.worker.yml` -> deployed as `compose.yml`
|
||||||
-e PYTHONUNBUFFERED=1 \
|
|
||||||
-v /home/podman/ppf/src:/app:ro,Z \
|
Containers are managed exclusively through compose. No systemd user
|
||||||
-v /home/podman/ppf/data:/app/data:Z \
|
services or standalone `podman run` commands.
|
||||||
-v /home/podman/ppf/config.ini:/app/config.ini:ro,Z \
|
|
||||||
-v /home/podman/ppf/servers.txt:/app/servers.txt:ro,Z \
|
|
||||||
localhost/ppf-worker:latest \
|
|
||||||
python -u ppf.py --worker --server http://10.200.1.250:8081
|
|
||||||
```
|
|
||||||
|
|
||||||
## Rebuilding Images
|
## Rebuilding Images
|
||||||
|
|
||||||
@@ -231,10 +253,9 @@ ansible odin -m raw \
|
|||||||
|
|
||||||
### Missing servers.txt
|
### Missing servers.txt
|
||||||
|
|
||||||
Workers need `servers.txt` in src/:
|
Redeploy syncs `servers.txt` automatically:
|
||||||
```bash
|
```bash
|
||||||
ansible cassius,edge,sentinel -m copy \
|
ppf-deploy workers
|
||||||
-a "src=/home/user/git/ppf/servers.txt dest=/home/podman/ppf/src/servers.txt owner=podman group=podman"
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Exit Code 126 (Permission/Storage)
|
### Exit Code 126 (Permission/Storage)
|
||||||
@@ -246,21 +267,17 @@ sudo -u podman podman system reset --force
|
|||||||
|
|
||||||
### Dashboard Shows NaN or Missing Data
|
### Dashboard Shows NaN or Missing Data
|
||||||
|
|
||||||
Odin likely running old code. Redeploy to odin:
|
Odin likely running old code:
|
||||||
```bash
|
```bash
|
||||||
ansible odin -m synchronize \
|
ppf-deploy odin
|
||||||
-a "src=/home/user/git/ppf/ dest=/home/podman/ppf/ rsync_opts='--include=*.py,--include=servers.txt,--exclude=*'"
|
|
||||||
ansible odin -m raw -a "chown -R podman:podman /home/podman/ppf/"
|
|
||||||
ansible odin -m raw -a "cd /tmp; sudo -u podman podman restart ppf"
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Worker Keeps Crashing
|
### Worker Keeps Crashing
|
||||||
|
|
||||||
1. Check container status: `sudo -u podman podman ps -a`
|
1. Check status: `ppf-service status workers`
|
||||||
2. Check logs: `sudo -u podman podman logs --tail 50 ppf-worker`
|
2. Check logs: `ppf-logs -n 50 cassius`
|
||||||
3. Verify servers.txt exists in src/
|
3. Redeploy (fixes ownership + servers.txt): `ppf-deploy cassius`
|
||||||
4. Check ownership: `ls -la /home/podman/ppf/src/`
|
4. If still failing, run manually on the host to see error:
|
||||||
5. Run manually to see error:
|
|
||||||
```bash
|
```bash
|
||||||
sudo -u podman podman run --rm --network=host \
|
sudo -u podman podman run --rm --network=host \
|
||||||
-v /home/podman/ppf/src:/app:ro,Z \
|
-v /home/podman/ppf/src:/app:ro,Z \
|
||||||
|
|||||||
29
Dockerfile.test
Normal file
29
Dockerfile.test
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
FROM python:2.7-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN sed -i 's/deb.debian.org/archive.debian.org/g' /etc/apt/sources.list && \
|
||||||
|
sed -i 's/security.debian.org/archive.debian.org/g' /etc/apt/sources.list && \
|
||||||
|
sed -i '/buster-updates/d' /etc/apt/sources.list && \
|
||||||
|
echo 'deb http://archive.debian.org/debian-security buster/updates main' >> /etc/apt/sources.list && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get upgrade -y && \
|
||||||
|
apt-get install -y --no-install-recommends gcc libc-dev && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
RUN pip install --upgrade "pip<21" "setuptools<45" "wheel<0.38"
|
||||||
|
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install -r requirements.txt || true
|
||||||
|
RUN pip install pytest
|
||||||
|
|
||||||
|
RUN mkdir -p /app/data && \
|
||||||
|
python -c "import pyasn" 2>/dev/null && \
|
||||||
|
pyasn_util_download.py --latest && \
|
||||||
|
pyasn_util_convert.py --single rib.*.bz2 /app/data/ipasn.dat && \
|
||||||
|
rm -f rib.*.bz2 || \
|
||||||
|
echo "pyasn database setup skipped"
|
||||||
|
|
||||||
|
RUN apt-get purge -y gcc libc-dev && apt-get autoremove -y || true
|
||||||
|
|
||||||
|
CMD ["python", "-m", "pytest", "tests/", "-v", "--tb=short"]
|
||||||
58
README.md
58
README.md
@@ -197,46 +197,40 @@ stale_count INT -- checks without new proxies
|
|||||||
|
|
||||||
## Deployment
|
## Deployment
|
||||||
|
|
||||||
### Systemd Service
|
|
||||||
|
|
||||||
```ini
|
|
||||||
[Unit]
|
|
||||||
Description=PPF Python Proxy Finder
|
|
||||||
After=network-online.target tor.service
|
|
||||||
Wants=network-online.target
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Type=simple
|
|
||||||
User=ppf
|
|
||||||
WorkingDirectory=/opt/ppf
|
|
||||||
# ppf.py is the main entry point (runs harvester + validator)
|
|
||||||
ExecStart=/usr/bin/python2 ppf.py
|
|
||||||
Restart=on-failure
|
|
||||||
RestartSec=30
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=multi-user.target
|
|
||||||
```
|
|
||||||
|
|
||||||
### Container Deployment
|
### Container Deployment
|
||||||
|
|
||||||
|
All nodes use `podman-compose` with role-specific compose files
|
||||||
|
(rootless, as `podman` user). `--network=host` required for Tor
|
||||||
|
access at 127.0.0.1:9050.
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
# Build
|
# Build image
|
||||||
podman build -t ppf:latest .
|
podman build -t ppf:latest .
|
||||||
|
|
||||||
# Run with persistent storage
|
# Start via compose
|
||||||
# IMPORTANT: Use ppf.py as entry point (runs both harvester + validator)
|
podman-compose up -d
|
||||||
podman run -d --name ppf \
|
|
||||||
--network=host \
|
|
||||||
-v ./data:/app/data:Z \
|
|
||||||
-v ./config.ini:/app/config.ini:ro \
|
|
||||||
ppf:latest python ppf.py
|
|
||||||
|
|
||||||
# Generate systemd unit
|
# View logs / stop
|
||||||
podman generate systemd --name ppf --files --new
|
podman-compose logs -f
|
||||||
|
podman-compose down
|
||||||
```
|
```
|
||||||
|
|
||||||
Note: `--network=host` required for Tor access at 127.0.0.1:9050.
|
### Operations Toolkit
|
||||||
|
|
||||||
|
The `tools/` directory provides CLI wrappers for multi-node operations.
|
||||||
|
Deployment uses an Ansible playbook over WireGuard for parallel execution
|
||||||
|
and handler-based restarts.
|
||||||
|
|
||||||
|
```sh
|
||||||
|
ppf-deploy [targets...] # validate + deploy + restart (playbook)
|
||||||
|
ppf-deploy --check # dry run with diff
|
||||||
|
ppf-logs [node] # view container logs (-f to follow)
|
||||||
|
ppf-service <cmd> [nodes...] # status / start / stop / restart
|
||||||
|
ppf-db <cmd> # stats / purge-proxies / vacuum
|
||||||
|
ppf-status # cluster overview (containers, workers, queue)
|
||||||
|
```
|
||||||
|
|
||||||
|
See `--help` on each tool.
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
|
|||||||
158
ROADMAP.md
158
ROADMAP.md
@@ -1,65 +1,100 @@
|
|||||||
# PPF Project Roadmap
|
# PPF Roadmap
|
||||||
|
|
||||||
## Project Purpose
|
## Architecture
|
||||||
|
|
||||||
PPF (Proxy Fetcher) is a Python 2 proxy scraping and validation framework designed to:
|
|
||||||
|
|
||||||
1. **Discover** proxy addresses by crawling websites and search engines
|
|
||||||
2. **Validate** proxies through multi-target testing via Tor
|
|
||||||
3. **Maintain** a database of working proxies with protocol detection (SOCKS4/SOCKS5/HTTP)
|
|
||||||
|
|
||||||
## Architecture Overview
|
|
||||||
|
|
||||||
```
|
```
|
||||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
┌──────────────────────────────────────────┐
|
||||||
│ PPF Architecture │
|
│ Odin (Master) │
|
||||||
├─────────────────────────────────────────────────────────────────────────────┤
|
│ httpd.py ─ API + SSL-only verification │
|
||||||
│ │
|
│ proxywatchd.py ─ proxy recheck daemon │
|
||||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
│ SQLite: proxies.db, websites.db │
|
||||||
│ │ scraper.py │ │ ppf.py │ │proxywatchd │ │
|
└──────────┬───────────────────────────────┘
|
||||||
│ │ │ │ │ │ │ │
|
│ WireGuard (10.200.1.0/24)
|
||||||
│ │ Searx query │───>│ URL harvest │───>│ Proxy test │ │
|
┌────────────────┼────────────────┐
|
||||||
│ │ URL finding │ │ Proxy extract│ │ Validation │ │
|
v v v
|
||||||
│ └─────────────┘ └─────────────┘ └─────────────┘ │
|
┌───────────┐ ┌───────────┐ ┌───────────┐
|
||||||
│ │ │ │ │
|
│ cassius │ │ edge │ │ sentinel │
|
||||||
│ v v v │
|
│ Worker │ │ Worker │ │ Worker │
|
||||||
│ ┌─────────────────────────────────────────────────────────────────┐ │
|
│ ppf.py │ │ ppf.py │ │ ppf.py │
|
||||||
│ │ SQLite Databases │ │
|
└───────────┘ └───────────┘ └───────────┘
|
||||||
│ │ uris.db (URLs) proxies.db (proxy list) │ │
|
|
||||||
│ └─────────────────────────────────────────────────────────────────┘ │
|
|
||||||
│ │
|
|
||||||
│ ┌─────────────────────────────────────────────────────────────────┐ │
|
|
||||||
│ │ Network Layer │ │
|
|
||||||
│ │ rocksock.py ─── Tor SOCKS ─── Test Proxy ─── Target Server │ │
|
|
||||||
│ └─────────────────────────────────────────────────────────────────┘ │
|
|
||||||
│ │
|
|
||||||
└─────────────────────────────────────────────────────────────────────────────┘
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Workers claim URLs, extract proxies, test them, report back.
|
||||||
|
Master verifies (SSL-only), serves API, coordinates distribution.
|
||||||
|
|
||||||
## Constraints
|
## Constraints
|
||||||
|
|
||||||
- **Python 2.7** compatibility required
|
- Python 2.7 runtime (container-based)
|
||||||
- **Minimal external dependencies** (avoid adding new modules)
|
- Minimal external dependencies
|
||||||
- Current dependencies: beautifulsoup4, pyasn, IP2Location
|
- All traffic via Tor
|
||||||
- Data files: IP2LOCATION-LITE-DB1.BIN (country), ipasn.dat (ASN)
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Open Work
|
## Phase 1: Performance and Quality (current)
|
||||||
|
|
||||||
### Validation
|
Profiling-driven optimizations and source pipeline hardening.
|
||||||
|
|
||||||
| Task | Description | File(s) |
|
| Item | Status | Description |
|
||||||
|------|-------------|---------|
|
|------|--------|-------------|
|
||||||
| Protocol fingerprinting | Better SOCKS4/SOCKS5/HTTP detection | rocksock.py |
|
| Extraction short-circuits | done | Guard clauses in fetch.py extractors |
|
||||||
|
| Skip shutdown on failed sockets | done | Track _connected flag, skip shutdown on dead sockets |
|
||||||
|
| SQLite connection reuse (odin) | done | Per-greenlet cached handles via threading.local |
|
||||||
|
| Lazy-load ASN database | done | Defer ipasn.dat parsing to first lookup |
|
||||||
|
| Add more seed sources (100+) | done | Expanded to 120+ URLs with SOCKS5-specific sources |
|
||||||
|
| Protocol-aware source weighting | done | Dynamic SOCKS boost in claim_urls scoring |
|
||||||
|
| Sharpen error penalty in URL scoring | done | Reduce erroring URL claim frequency |
|
||||||
|
|
||||||
### Target Management
|
## Phase 2: Proxy Diversity and Consumer API
|
||||||
|
|
||||||
| Task | Description | File(s) |
|
Address customer-reported quality gaps.
|
||||||
|------|-------------|---------|
|
|
||||||
| Dynamic target pool | Auto-discover and rotate validation targets | proxywatchd.py |
|
| Item | Status | Description |
|
||||||
| Target health tracking | Remove unresponsive targets from pool | proxywatchd.py |
|
|------|--------|-------------|
|
||||||
| Geographic target spread | Ensure targets span multiple regions | config.py |
|
| ASN diversity scoring | pending | Deprioritize over-represented ASNs in testing |
|
||||||
|
| Graduated recheck intervals | pending | Fresh proxies rechecked more often than stale |
|
||||||
|
| API filters (proto/country/ASN/latency) | pending | Consumer-facing query parameters on /proxies |
|
||||||
|
| Latency-based ranking | pending | Expose latency percentiles per proxy |
|
||||||
|
|
||||||
|
## Phase 3: Self-Expanding Source Pool
|
||||||
|
|
||||||
|
Worker-driven link discovery from productive pages.
|
||||||
|
|
||||||
|
| Item | Status | Description |
|
||||||
|
|------|--------|-------------|
|
||||||
|
| Link extraction from productive pages | pending | Parse HTML for links when page yields proxies |
|
||||||
|
| Report discovered URLs to master | pending | New endpoint for worker URL submissions |
|
||||||
|
| Conditional discovery | pending | Only extract links from confirmed-productive pages |
|
||||||
|
|
||||||
|
## Phase 4: Long-Term
|
||||||
|
|
||||||
|
| Item | Status | Description |
|
||||||
|
|------|--------|-------------|
|
||||||
|
| Python 3 migration | deferred | Unblocks modern deps, security patches, pyasn native |
|
||||||
|
| Worker trust scoring | pending | Activate spot-check verification framework |
|
||||||
|
| Dynamic target pool | pending | Auto-discover and rotate validation targets |
|
||||||
|
| Geographic target spread | pending | Ensure targets span multiple regions |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Completed
|
||||||
|
|
||||||
|
| Item | Date | Description |
|
||||||
|
|------|------|-------------|
|
||||||
|
| Sharpen URL error penalty | 2026-02-22 | error*0.5 cap 4.0 + stale*0.2 cap 1.5 |
|
||||||
|
| SOCKS5 source expansion | 2026-02-22 | Added 10 new SOCKS5-specific sources |
|
||||||
|
| SQLite connection reuse | 2026-02-22 | Per-greenlet cached handles via threading.local |
|
||||||
|
| Lazy-load ASN database | 2026-02-22 | Deferred ipasn.dat to first lookup |
|
||||||
|
| Socket shutdown skip | 2026-02-22 | _connected flag, skip shutdown on dead sockets |
|
||||||
|
| Protocol-aware weighting | 2026-02-22 | Dynamic SOCKS boost in claim_urls scoring |
|
||||||
|
| Seed sources expanded | 2026-02-22 | 37 -> 120+ URLs |
|
||||||
|
| last_seen freshness fix | 2026-02-22 | Watchd updates last_seen on verification |
|
||||||
|
| Periodic re-seeding | 2026-02-22 | Reset errored sources every 6h |
|
||||||
|
| ASN enrichment | 2026-02-22 | Pure-Python ipasn.dat reader + backfill |
|
||||||
|
| URL pipeline stats | 2026-02-22 | /api/stats exposes source health metrics |
|
||||||
|
| Extraction short-circuits | 2026-02-22 | Guard clauses + precompiled table regexes |
|
||||||
|
| Target health tracking | prior | Cooldown-based health for all target pools |
|
||||||
|
| MITM field in proxy list | prior | Expose mitm boolean in JSON endpoints |
|
||||||
|
| V1 worker protocol removal | prior | Cleaned up legacy --worker code path |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -67,25 +102,12 @@ PPF (Proxy Fetcher) is a Python 2 proxy scraping and validation framework design
|
|||||||
|
|
||||||
| File | Purpose |
|
| File | Purpose |
|
||||||
|------|---------|
|
|------|---------|
|
||||||
| ppf.py | Main URL harvester daemon |
|
| ppf.py | URL harvester, worker main loop |
|
||||||
| proxywatchd.py | Proxy validation daemon |
|
| proxywatchd.py | Proxy validation daemon |
|
||||||
| scraper.py | Searx search integration |
|
| fetch.py | HTTP fetching, proxy extraction |
|
||||||
| fetch.py | HTTP fetching with proxy support |
|
| httpd.py | API server, worker coordination |
|
||||||
| dbs.py | Database schema and inserts |
|
| dbs.py | Database schema, seed sources |
|
||||||
| mysqlite.py | SQLite wrapper |
|
|
||||||
| rocksock.py | Socket/proxy abstraction (3rd party) |
|
|
||||||
| http2.py | HTTP client implementation |
|
|
||||||
| httpd.py | Web dashboard and REST API server |
|
|
||||||
| config.py | Configuration management |
|
| config.py | Configuration management |
|
||||||
| comboparse.py | Config/arg parser framework |
|
| rocksock.py | Socket/proxy abstraction |
|
||||||
| soup_parser.py | BeautifulSoup wrapper |
|
| http2.py | HTTP client implementation |
|
||||||
| misc.py | Utilities (timestamp, logging) |
|
| tools/ppf-deploy | Deployment wrapper |
|
||||||
| export.py | Proxy export CLI tool |
|
|
||||||
| engines.py | Search engine implementations |
|
|
||||||
| connection_pool.py | Tor connection pooling |
|
|
||||||
| network_stats.py | Network statistics tracking |
|
|
||||||
| dns.py | DNS resolution with caching |
|
|
||||||
| mitm.py | MITM certificate detection |
|
|
||||||
| job.py | Priority job queue |
|
|
||||||
| static/dashboard.js | Dashboard frontend logic |
|
|
||||||
| static/dashboard.html | Dashboard HTML template |
|
|
||||||
|
|||||||
34
TASKLIST.md
Normal file
34
TASKLIST.md
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
# PPF Tasklist
|
||||||
|
|
||||||
|
Active execution queue. Ordered by priority.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## In Progress
|
||||||
|
|
||||||
|
| # | Task | File(s) | Notes |
|
||||||
|
|---|------|---------|-------|
|
||||||
|
|
||||||
|
## Queued
|
||||||
|
|
||||||
|
| # | Task | File(s) | Notes |
|
||||||
|
|---|------|---------|-------|
|
||||||
|
| 12 | API filters on /proxies (proto/country/ASN) | httpd.py | Consumer query params |
|
||||||
|
| 8 | Graduated recheck intervals | proxywatchd.py | Fresh proxies checked more often |
|
||||||
|
|
||||||
|
## Done
|
||||||
|
|
||||||
|
| # | Task | Date |
|
||||||
|
|---|------|------|
|
||||||
|
| - | Sharpen URL error penalty scoring | 2026-02-22 |
|
||||||
|
| - | Add SOCKS5-specific sources (10 new) | 2026-02-22 |
|
||||||
|
| 3 | Lazy-load ASN database | 2026-02-22 |
|
||||||
|
| 2 | SQLite connection reuse on odin | 2026-02-22 |
|
||||||
|
| 1 | Skip socket.shutdown on failed connections | 2026-02-22 |
|
||||||
|
| 4 | Add more seed sources (100+) | 2026-02-22 |
|
||||||
|
| 6 | Protocol-aware source weighting | 2026-02-22 |
|
||||||
|
| - | Extraction short-circuits | 2026-02-22 |
|
||||||
|
| - | last_seen freshness fix | 2026-02-22 |
|
||||||
|
| - | Periodic re-seeding | 2026-02-22 |
|
||||||
|
| - | ASN enrichment | 2026-02-22 |
|
||||||
|
| - | URL pipeline stats | 2026-02-22 |
|
||||||
74
TODO.md
74
TODO.md
@@ -1,73 +1,35 @@
|
|||||||
# PPF TODO
|
# PPF TODO
|
||||||
|
|
||||||
## Optimization
|
Intake buffer. Items refined here move to TASKLIST.md.
|
||||||
|
|
||||||
### [ ] JSON Stats Response Caching
|
|
||||||
|
|
||||||
- Cache serialized JSON response with short TTL (1-2s)
|
|
||||||
- Only regenerate when underlying stats change
|
|
||||||
- Use ETag/If-None-Match for client-side caching
|
|
||||||
- Savings: ~7-9s/hour. Low priority, only matters with frequent dashboard access.
|
|
||||||
|
|
||||||
### [ ] Object Pooling for Test States
|
|
||||||
|
|
||||||
- Pool ProxyTestState and TargetTestJob, reset and reuse
|
|
||||||
- Savings: ~11-15s/hour. **Not recommended** - high effort, medium risk, modest gain.
|
|
||||||
|
|
||||||
### [ ] SQLite Connection Reuse
|
|
||||||
|
|
||||||
- Persistent connection per thread with health checks
|
|
||||||
- Savings: ~0.3s/hour. **Not recommended** - negligible benefit.
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Dashboard
|
## Dashboard
|
||||||
|
|
||||||
### [ ] Performance
|
- [ ] Cache expensive DB queries (top countries, protocol breakdown)
|
||||||
|
- [ ] Historical graphs (24h, 7d) using stats_history table
|
||||||
- Cache expensive DB queries (top countries, protocol breakdown)
|
- [ ] Per-ASN performance analysis
|
||||||
- Lazy-load historical data (only when scrolled into view)
|
- [ ] Alert thresholds (success rate < X%, MITM detected)
|
||||||
- WebSocket option for push updates (reduce polling overhead)
|
- [ ] WebSocket push updates (reduce polling overhead)
|
||||||
- Configurable refresh interval via URL param or localStorage
|
- [ ] Mobile-responsive improvements
|
||||||
|
|
||||||
### [ ] Features
|
|
||||||
|
|
||||||
- Historical graphs (24h, 7d) using stats_history table
|
|
||||||
- Per-ASN performance analysis
|
|
||||||
- Alert thresholds (success rate < X%, MITM detected)
|
|
||||||
- Mobile-responsive improvements
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Memory
|
## Memory
|
||||||
|
|
||||||
- [ ] Lock consolidation - reduce per-proxy locks (260k LockType objects)
|
- [ ] Lock consolidation (260k LockType objects at scale)
|
||||||
- [ ] Leaner state objects - reduce dict/list count per job
|
- [ ] Leaner state objects per job
|
||||||
|
|
||||||
Memory scales linearly with queue (~4.5 KB/job). No leaks detected.
|
Memory scales ~4.5 KB/job. No leaks detected. Optimize only if constrained.
|
||||||
Optimize only if memory becomes a constraint.
|
|
||||||
|
|
||||||
---
|
## Source Pipeline
|
||||||
|
|
||||||
|
- [ ] PasteBin/GitHub API scrapers for proxy lists
|
||||||
|
- [ ] Telegram channel scrapers (beyond t.me/s/ HTML)
|
||||||
|
- [ ] Source quality decay tracking (flag sources going stale)
|
||||||
|
- [ ] Deduplication of sources across different URL forms
|
||||||
|
|
||||||
## Known Issues
|
## Known Issues
|
||||||
|
|
||||||
### [!] Podman Container Metadata Disappears
|
### [!] Podman Container Metadata Disappears
|
||||||
|
|
||||||
`podman ps -a` shows empty even though process is running. Service functions
|
`podman ps -a` shows empty even though process is running.
|
||||||
correctly despite missing metadata. Monitor via `ss -tlnp`, `ps aux`, or
|
Monitor via `ss -tlnp`, `ps aux`, or `curl localhost:8081/health`.
|
||||||
`curl localhost:8081/health`. Low impact.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Container Debugging Checklist
|
|
||||||
|
|
||||||
```
|
|
||||||
1. Check for orphans: ps aux | grep -E "[p]rocess_name"
|
|
||||||
2. Check port conflicts: ss -tlnp | grep PORT
|
|
||||||
3. Run foreground: podman run --rm (no -d) to see output
|
|
||||||
4. Check podman state: podman ps -a
|
|
||||||
5. Clean stale: pkill -9 -f "pattern" && podman rm -f -a
|
|
||||||
6. Verify deps: config files, data dirs, volumes exist
|
|
||||||
7. Check logs: podman logs container_name 2>&1 | tail -50
|
|
||||||
8. Health check: curl -sf http://localhost:PORT/health
|
|
||||||
```
|
|
||||||
|
|||||||
@@ -3,9 +3,9 @@
|
|||||||
"""Combined config file and argument parser."""
|
"""Combined config file and argument parser."""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from ConfigParser import SafeConfigParser, NoOptionError
|
from ConfigParser import SafeConfigParser as ConfigParser, NoOptionError
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from configparser import SafeConfigParser, NoOptionError
|
from configparser import ConfigParser, NoOptionError
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
@@ -23,7 +23,7 @@ class ComboParser(object):
|
|||||||
|
|
||||||
def __init__(self, ini):
|
def __init__(self, ini):
|
||||||
self.items = []
|
self.items = []
|
||||||
self.cparser = SafeConfigParser()
|
self.cparser = ConfigParser()
|
||||||
self.aparser = ArgumentParser()
|
self.aparser = ArgumentParser()
|
||||||
self.ini = ini
|
self.ini = ini
|
||||||
self.loaded = False
|
self.loaded = False
|
||||||
|
|||||||
@@ -19,6 +19,8 @@ services:
|
|||||||
build: .
|
build: .
|
||||||
network_mode: host
|
network_mode: host
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
logging:
|
||||||
|
driver: k8s-file
|
||||||
stop_signal: SIGTERM
|
stop_signal: SIGTERM
|
||||||
stop_grace_period: 30s
|
stop_grace_period: 30s
|
||||||
environment:
|
environment:
|
||||||
|
|||||||
18
compose.test.yml
Normal file
18
compose.test.yml
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# PPF test runner (Python 2.7, production deps + pytest)
|
||||||
|
#
|
||||||
|
# Mounts source and tests as volumes so no rebuild needed between runs.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# podman-compose -f compose.test.yml run --rm test
|
||||||
|
# podman-compose -f compose.test.yml run --rm test python -m pytest tests/test_fetch.py -v
|
||||||
|
|
||||||
|
services:
|
||||||
|
test:
|
||||||
|
container_name: ppf-test
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile.test
|
||||||
|
volumes:
|
||||||
|
- .:/app:ro,Z
|
||||||
|
working_dir: /app
|
||||||
|
command: python -m pytest tests/ -v --tb=short
|
||||||
@@ -35,4 +35,4 @@ services:
|
|||||||
- ./data:/app/data:Z
|
- ./data:/app/data:Z
|
||||||
- ./config.ini:/app/config.ini:ro,Z
|
- ./config.ini:/app/config.ini:ro,Z
|
||||||
- ./servers.txt:/app/servers.txt:ro,Z
|
- ./servers.txt:/app/servers.txt:ro,Z
|
||||||
command: python -u ppf.py --worker-v2 --server ${PPF_MASTER_URL:-http://10.200.1.250:8081}
|
command: python -u ppf.py --worker --server ${PPF_MASTER_URL:-http://10.200.1.250:8081}
|
||||||
|
|||||||
19
config.py
19
config.py
@@ -45,10 +45,10 @@ class Config(ComboParser):
|
|||||||
# Validate thread counts (0 allowed for watchd to disable local testing)
|
# Validate thread counts (0 allowed for watchd to disable local testing)
|
||||||
if self.watchd.threads < 0:
|
if self.watchd.threads < 0:
|
||||||
errors.append('watchd.threads must be >= 0')
|
errors.append('watchd.threads must be >= 0')
|
||||||
if self.ppf.threads < 1:
|
if self.ppf.threads < 0:
|
||||||
errors.append('ppf.threads must be >= 1')
|
errors.append('ppf.threads must be >= 0')
|
||||||
if self.scraper.threads < 1:
|
if self.scraper.enabled and self.scraper.threads < 1:
|
||||||
errors.append('scraper.threads must be >= 1')
|
errors.append('scraper.threads must be >= 1 when scraper is enabled')
|
||||||
|
|
||||||
# Validate max_fail
|
# Validate max_fail
|
||||||
if self.watchd.max_fail < 1:
|
if self.watchd.max_fail < 1:
|
||||||
@@ -123,6 +123,7 @@ class Config(ComboParser):
|
|||||||
self.add_item(section, 'checktype', str, 'head', 'secondary check type: head, irc, judges, none/false (none = SSL-only)', False)
|
self.add_item(section, 'checktype', str, 'head', 'secondary check type: head, irc, judges, none/false (none = SSL-only)', False)
|
||||||
self.add_item(section, 'ssl_first', bool, True, 'try SSL handshake first, fallback to checktype on failure (default: True)', False)
|
self.add_item(section, 'ssl_first', bool, True, 'try SSL handshake first, fallback to checktype on failure (default: True)', False)
|
||||||
self.add_item(section, 'ssl_only', bool, False, 'when ssl_first enabled, skip secondary check on SSL failure (default: False)', False)
|
self.add_item(section, 'ssl_only', bool, False, 'when ssl_first enabled, skip secondary check on SSL failure (default: False)', False)
|
||||||
|
self.add_item(section, 'fingerprint', bool, True, 'probe proxy protocol before testing (default: True)', False)
|
||||||
self.add_item(section, 'scale_cooldown', int, 10, 'seconds between thread scaling decisions (default: 10)', False)
|
self.add_item(section, 'scale_cooldown', int, 10, 'seconds between thread scaling decisions (default: 10)', False)
|
||||||
self.add_item(section, 'scale_threshold', float, 10.0, 'min success rate % to scale up threads (default: 10.0)', False)
|
self.add_item(section, 'scale_threshold', float, 10.0, 'min success rate % to scale up threads (default: 10.0)', False)
|
||||||
|
|
||||||
@@ -166,20 +167,18 @@ class Config(ComboParser):
|
|||||||
self.add_item(section, 'spot_check_pct', float, 1.0, 'percent of working proxies to spot-check (default: 1.0)', False)
|
self.add_item(section, 'spot_check_pct', float, 1.0, 'percent of working proxies to spot-check (default: 1.0)', False)
|
||||||
|
|
||||||
section = 'worker'
|
section = 'worker'
|
||||||
self.add_item(section, 'batch_size', int, 100, 'proxies per work batch (default: 100)', False)
|
|
||||||
self.add_item(section, 'heartbeat', int, 60, 'heartbeat interval in seconds (default: 60)', False)
|
self.add_item(section, 'heartbeat', int, 60, 'heartbeat interval in seconds (default: 60)', False)
|
||||||
self.add_item(section, 'claim_timeout', int, 300, 'seconds before unclaimed work is released (default: 300)', False)
|
self.add_item(section, 'url_batch_size', int, 5, 'URLs per claim cycle (default: 5)', False)
|
||||||
self.add_item(section, 'url_batch_size', int, 5, 'URLs per claim cycle for V2 mode (default: 5)', False)
|
self.add_item(section, 'fetch_timeout', int, 30, 'timeout for URL fetching (default: 30)', False)
|
||||||
self.add_item(section, 'fetch_timeout', int, 30, 'timeout for URL fetching in V2 mode (default: 30)', False)
|
self.add_item(section, 'cache_ttl', int, 300, 'local proxy test cache TTL in seconds, 0 to disable (default: 300)', False)
|
||||||
|
|
||||||
self.aparser.add_argument("--file", help="import a single file containing proxy addrs", type=str, default='', required=False)
|
self.aparser.add_argument("--file", help="import a single file containing proxy addrs", type=str, default='', required=False)
|
||||||
self.aparser.add_argument("--nobs", help="disable BeautifulSoup, use stdlib HTMLParser", action='store_true', default=False)
|
self.aparser.add_argument("--nobs", help="disable BeautifulSoup, use stdlib HTMLParser", action='store_true', default=False)
|
||||||
self.aparser.add_argument("-q", "--quiet", help="suppress info messages, show warnings and errors only", action='store_true', default=False)
|
self.aparser.add_argument("-q", "--quiet", help="suppress info messages, show warnings and errors only", action='store_true', default=False)
|
||||||
self.aparser.add_argument("-v", "--verbose", help="show debug messages", action='store_true', default=False)
|
self.aparser.add_argument("-v", "--verbose", help="show debug messages", action='store_true', default=False)
|
||||||
self.aparser.add_argument("--profile", help="enable cProfile profiling, output to profile.stats", action='store_true', default=False)
|
self.aparser.add_argument("--profile", help="enable cProfile profiling, output to profile.stats", action='store_true', default=False)
|
||||||
self.aparser.add_argument("--worker", help="run as worker node", action='store_true', default=False)
|
|
||||||
self.aparser.add_argument("--server", help="master server URL (e.g., https://master:8081)", type=str, default='')
|
self.aparser.add_argument("--server", help="master server URL (e.g., https://master:8081)", type=str, default='')
|
||||||
self.aparser.add_argument("--worker-key", help="worker authentication key", type=str, default='')
|
self.aparser.add_argument("--worker-key", help="worker authentication key", type=str, default='')
|
||||||
self.aparser.add_argument("--register", help="register as worker with master server", action='store_true', default=False)
|
self.aparser.add_argument("--register", help="register as worker with master server", action='store_true', default=False)
|
||||||
self.aparser.add_argument("--worker-name", help="worker name for registration (default: hostname)", type=str, default='')
|
self.aparser.add_argument("--worker-name", help="worker name for registration (default: hostname)", type=str, default='')
|
||||||
self.aparser.add_argument("--worker-v2", help="run as V2 worker (URL-driven fetching)", action='store_true', default=False)
|
self.aparser.add_argument("--worker", help="run as worker node", action='store_true', default=False)
|
||||||
|
|||||||
217
dbs.py
217
dbs.py
@@ -98,6 +98,16 @@ def _migrate_last_seen(sqlite):
|
|||||||
sqlite.commit()
|
sqlite.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def _migrate_last_check_columns(sqlite):
|
||||||
|
"""Add last_check and last_target columns for test provenance tracking."""
|
||||||
|
for col, typedef in (('last_check', 'TEXT'), ('last_target', 'TEXT')):
|
||||||
|
try:
|
||||||
|
sqlite.execute('SELECT %s FROM proxylist LIMIT 1' % col)
|
||||||
|
except Exception:
|
||||||
|
sqlite.execute('ALTER TABLE proxylist ADD COLUMN %s %s' % (col, typedef))
|
||||||
|
sqlite.commit()
|
||||||
|
|
||||||
|
|
||||||
def _migrate_uri_check_interval(sqlite):
|
def _migrate_uri_check_interval(sqlite):
|
||||||
"""Add adaptive check_interval column to uris table."""
|
"""Add adaptive check_interval column to uris table."""
|
||||||
try:
|
try:
|
||||||
@@ -371,7 +381,9 @@ def create_table_if_not_exists(sqlite, dbname):
|
|||||||
source_proto TEXT,
|
source_proto TEXT,
|
||||||
source_confidence INT DEFAULT 0,
|
source_confidence INT DEFAULT 0,
|
||||||
protos_working TEXT,
|
protos_working TEXT,
|
||||||
last_seen INT DEFAULT 0)""")
|
last_seen INT DEFAULT 0,
|
||||||
|
last_check TEXT,
|
||||||
|
last_target TEXT)""")
|
||||||
# Migration: add columns to existing databases (must run before creating indexes)
|
# Migration: add columns to existing databases (must run before creating indexes)
|
||||||
_migrate_latency_columns(sqlite)
|
_migrate_latency_columns(sqlite)
|
||||||
_migrate_anonymity_columns(sqlite)
|
_migrate_anonymity_columns(sqlite)
|
||||||
@@ -381,6 +393,7 @@ def create_table_if_not_exists(sqlite, dbname):
|
|||||||
_migrate_source_proto(sqlite)
|
_migrate_source_proto(sqlite)
|
||||||
_migrate_protos_working(sqlite)
|
_migrate_protos_working(sqlite)
|
||||||
_migrate_last_seen(sqlite)
|
_migrate_last_seen(sqlite)
|
||||||
|
_migrate_last_check_columns(sqlite)
|
||||||
# Indexes for common query patterns
|
# Indexes for common query patterns
|
||||||
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_failed ON proxylist(failed)')
|
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_failed ON proxylist(failed)')
|
||||||
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_tested ON proxylist(tested)')
|
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_tested ON proxylist(tested)')
|
||||||
@@ -569,34 +582,107 @@ def insert_urls(urls, search, sqlite):
|
|||||||
|
|
||||||
# Known proxy list sources (GitHub raw lists, APIs)
|
# Known proxy list sources (GitHub raw lists, APIs)
|
||||||
PROXY_SOURCES = [
|
PROXY_SOURCES = [
|
||||||
|
# --- GitHub raw lists (sorted by update frequency) ---
|
||||||
|
|
||||||
# TheSpeedX/PROXY-List - large, hourly updates
|
# TheSpeedX/PROXY-List - large, hourly updates
|
||||||
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt',
|
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt',
|
||||||
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt',
|
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt',
|
||||||
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt',
|
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt',
|
||||||
# clarketm/proxy-list - curated, daily
|
|
||||||
'https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt',
|
|
||||||
# monosans/proxy-list - hourly updates
|
# monosans/proxy-list - hourly updates
|
||||||
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/http.txt',
|
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/http.txt',
|
||||||
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt',
|
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt',
|
||||||
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt',
|
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt',
|
||||||
|
# prxchk/proxy-list - 10 min updates
|
||||||
|
'https://raw.githubusercontent.com/prxchk/proxy-list/main/http.txt',
|
||||||
|
'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks4.txt',
|
||||||
|
'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks5.txt',
|
||||||
# jetkai/proxy-list - 10 min updates
|
# jetkai/proxy-list - 10 min updates
|
||||||
'https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies.txt',
|
'https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies.txt',
|
||||||
# roosterkid/openproxylist
|
# hookzof/socks5_list - hourly, SOCKS5 focused
|
||||||
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/HTTPS_RAW.txt',
|
'https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt',
|
||||||
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt',
|
|
||||||
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5_RAW.txt',
|
|
||||||
# ShiftyTR/Proxy-List
|
|
||||||
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/http.txt',
|
|
||||||
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt',
|
|
||||||
# mmpx12/proxy-list
|
# mmpx12/proxy-list
|
||||||
'https://raw.githubusercontent.com/mmpx12/proxy-list/master/http.txt',
|
'https://raw.githubusercontent.com/mmpx12/proxy-list/master/http.txt',
|
||||||
'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks4.txt',
|
'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks4.txt',
|
||||||
'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks5.txt',
|
'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks5.txt',
|
||||||
# proxyscrape API
|
# ShiftyTR/Proxy-List
|
||||||
'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=http&timeout=10000&country=all',
|
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/http.txt',
|
||||||
'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4&timeout=10000&country=all',
|
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt',
|
||||||
'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5&timeout=10000&country=all',
|
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt',
|
||||||
|
# roosterkid/openproxylist
|
||||||
|
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/HTTPS_RAW.txt',
|
||||||
|
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt',
|
||||||
|
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5_RAW.txt',
|
||||||
|
# clarketm/proxy-list - curated, daily
|
||||||
|
'https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt',
|
||||||
|
# officialputuid/KangProxy - 4-6 hour updates
|
||||||
|
'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/http/http.txt',
|
||||||
|
'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/https/https.txt',
|
||||||
|
'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks4/socks4.txt',
|
||||||
|
'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks5.txt',
|
||||||
|
# iplocate/free-proxy-list - 30 min updates
|
||||||
|
'https://raw.githubusercontent.com/iplocate/free-proxy-list/main/protocols/http.txt',
|
||||||
|
'https://raw.githubusercontent.com/iplocate/free-proxy-list/main/protocols/socks4.txt',
|
||||||
|
'https://raw.githubusercontent.com/iplocate/free-proxy-list/main/protocols/socks5.txt',
|
||||||
|
# ErcinDedeworken/proxy-list - hourly
|
||||||
|
'https://raw.githubusercontent.com/ErcinDedeworken/proxy-list/main/proxy-list/data.txt',
|
||||||
|
# MuRongPIG/Proxy-Master - 10 min updates
|
||||||
|
'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/http.txt',
|
||||||
|
'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks4.txt',
|
||||||
|
'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks5.txt',
|
||||||
|
# zloi-user/hideip.me - hourly
|
||||||
|
'https://raw.githubusercontent.com/zloi-user/hideip.me/main/http.txt',
|
||||||
|
'https://raw.githubusercontent.com/zloi-user/hideip.me/main/socks4.txt',
|
||||||
|
'https://raw.githubusercontent.com/zloi-user/hideip.me/main/socks5.txt',
|
||||||
|
# FLAVIEN-music/proxy-list - 30 min updates
|
||||||
|
'https://raw.githubusercontent.com/FLAVIEN-music/proxy-list/main/proxies/http.txt',
|
||||||
|
'https://raw.githubusercontent.com/FLAVIEN-music/proxy-list/main/proxies/socks4.txt',
|
||||||
|
'https://raw.githubusercontent.com/FLAVIEN-music/proxy-list/main/proxies/socks5.txt',
|
||||||
|
# Zaeem20/FREE_PROXIES_LIST - 30 min updates
|
||||||
|
'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/http.txt',
|
||||||
|
'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/https.txt',
|
||||||
|
'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks4.txt',
|
||||||
|
'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks5.txt',
|
||||||
|
# r00tee/Proxy-List - hourly
|
||||||
|
'https://raw.githubusercontent.com/r00tee/Proxy-List/main/Https.txt',
|
||||||
|
'https://raw.githubusercontent.com/r00tee/Proxy-List/main/Socks4.txt',
|
||||||
|
'https://raw.githubusercontent.com/r00tee/Proxy-List/main/Socks5.txt',
|
||||||
|
# casals-ar/proxy-list
|
||||||
|
'https://raw.githubusercontent.com/casals-ar/proxy-list/main/http',
|
||||||
|
'https://raw.githubusercontent.com/casals-ar/proxy-list/main/socks4',
|
||||||
|
'https://raw.githubusercontent.com/casals-ar/proxy-list/main/socks5',
|
||||||
|
# yemixzy/proxy-list
|
||||||
|
'https://raw.githubusercontent.com/yemixzy/proxy-list/main/proxies/http.txt',
|
||||||
|
'https://raw.githubusercontent.com/yemixzy/proxy-list/main/proxies/socks4.txt',
|
||||||
|
'https://raw.githubusercontent.com/yemixzy/proxy-list/main/proxies/socks5.txt',
|
||||||
|
# opsxcq/proxy-list
|
||||||
|
'https://raw.githubusercontent.com/opsxcq/proxy-list/master/list.txt',
|
||||||
|
# im-razvan/proxy_list - 10 min updates
|
||||||
|
'https://raw.githubusercontent.com/im-razvan/proxy_list/main/http.txt',
|
||||||
|
'https://raw.githubusercontent.com/im-razvan/proxy_list/main/socks4.txt',
|
||||||
|
'https://raw.githubusercontent.com/im-razvan/proxy_list/main/socks5.txt',
|
||||||
|
# zevtyardt/proxy-list - daily SOCKS5
|
||||||
|
'https://raw.githubusercontent.com/zevtyardt/proxy-list/main/socks5.txt',
|
||||||
|
# UptimerBot/proxy-list - 15 min updates
|
||||||
|
'https://raw.githubusercontent.com/UptimerBot/proxy-list/main/proxies/socks5.txt',
|
||||||
|
# Anonym0usWork1221/Free-Proxies
|
||||||
|
'https://raw.githubusercontent.com/Anonym0usWork1221/Free-Proxies/main/proxy_files/https_proxies.txt',
|
||||||
|
'https://raw.githubusercontent.com/Anonym0usWork1221/Free-Proxies/main/proxy_files/socks4_proxies.txt',
|
||||||
|
'https://raw.githubusercontent.com/Anonym0usWork1221/Free-Proxies/main/proxy_files/socks5_proxies.txt',
|
||||||
|
# ErcinDedeoglu/proxies - hourly
|
||||||
|
'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/http.txt',
|
||||||
|
'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/socks4.txt',
|
||||||
|
'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/socks5.txt',
|
||||||
|
# dinoz0rg/proxy-list - daily, all protocols
|
||||||
|
'https://raw.githubusercontent.com/dinoz0rg/proxy-list/main/all.txt',
|
||||||
|
# elliottophellia/proxylist - SOCKS5
|
||||||
|
'https://raw.githubusercontent.com/elliottophellia/proxylist/master/results/socks5/global/socks5_len.txt',
|
||||||
|
# gfpcom/free-proxy-list - SOCKS5
|
||||||
|
'https://raw.githubusercontent.com/gfpcom/free-proxy-list/main/socks5.txt',
|
||||||
|
# databay-labs/free-proxy-list - SOCKS5
|
||||||
|
'https://raw.githubusercontent.com/databay-labs/free-proxy-list/master/socks5.txt',
|
||||||
|
|
||||||
|
# --- GitHub Pages / CDN hosted ---
|
||||||
|
|
||||||
# proxifly/free-proxy-list - 5 min updates (jsDelivr CDN)
|
# proxifly/free-proxy-list - 5 min updates (jsDelivr CDN)
|
||||||
'https://cdn.jsdelivr.net/gh/proxifly/free-proxy-list@main/proxies/protocols/http/data.txt',
|
'https://cdn.jsdelivr.net/gh/proxifly/free-proxy-list@main/proxies/protocols/http/data.txt',
|
||||||
'https://cdn.jsdelivr.net/gh/proxifly/free-proxy-list@main/proxies/protocols/socks4/data.txt',
|
'https://cdn.jsdelivr.net/gh/proxifly/free-proxy-list@main/proxies/protocols/socks4/data.txt',
|
||||||
@@ -605,31 +691,86 @@ PROXY_SOURCES = [
|
|||||||
'https://vakhov.github.io/fresh-proxy-list/http.txt',
|
'https://vakhov.github.io/fresh-proxy-list/http.txt',
|
||||||
'https://vakhov.github.io/fresh-proxy-list/socks4.txt',
|
'https://vakhov.github.io/fresh-proxy-list/socks4.txt',
|
||||||
'https://vakhov.github.io/fresh-proxy-list/socks5.txt',
|
'https://vakhov.github.io/fresh-proxy-list/socks5.txt',
|
||||||
# prxchk/proxy-list - 10 min updates
|
|
||||||
'https://raw.githubusercontent.com/prxchk/proxy-list/main/http.txt',
|
|
||||||
'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks5.txt',
|
|
||||||
# sunny9577/proxy-scraper - 3 hour updates (GitHub Pages)
|
# sunny9577/proxy-scraper - 3 hour updates (GitHub Pages)
|
||||||
'https://sunny9577.github.io/proxy-scraper/generated/http_proxies.txt',
|
'https://sunny9577.github.io/proxy-scraper/generated/http_proxies.txt',
|
||||||
'https://sunny9577.github.io/proxy-scraper/generated/socks4_proxies.txt',
|
'https://sunny9577.github.io/proxy-scraper/generated/socks4_proxies.txt',
|
||||||
'https://sunny9577.github.io/proxy-scraper/generated/socks5_proxies.txt',
|
'https://sunny9577.github.io/proxy-scraper/generated/socks5_proxies.txt',
|
||||||
# officialputuid/KangProxy - 4-6 hour updates
|
|
||||||
'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/http/http.txt',
|
# --- API endpoints ---
|
||||||
'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks4/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks5.txt',
|
# proxyscrape
|
||||||
# hookzof/socks5_list - hourly updates
|
'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=http&timeout=10000&country=all',
|
||||||
'https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt',
|
'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4&timeout=10000&country=all',
|
||||||
# iplocate/free-proxy-list - 30 min updates
|
'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5&timeout=10000&country=all',
|
||||||
'https://raw.githubusercontent.com/iplocate/free-proxy-list/main/protocols/http.txt',
|
|
||||||
'https://raw.githubusercontent.com/iplocate/free-proxy-list/main/protocols/socks4.txt',
|
# proxy-list.download - SOCKS5 API
|
||||||
'https://raw.githubusercontent.com/iplocate/free-proxy-list/main/protocols/socks5.txt',
|
'https://www.proxy-list.download/api/v1/get?type=socks5',
|
||||||
|
'https://www.proxy-list.download/api/v1/get?type=socks4',
|
||||||
|
# openproxylist.xyz - plain text
|
||||||
|
'https://api.openproxylist.xyz/http.txt',
|
||||||
|
'https://api.openproxylist.xyz/socks4.txt',
|
||||||
|
'https://api.openproxylist.xyz/socks5.txt',
|
||||||
|
# spys.me - plain text, 30 min updates
|
||||||
|
'http://spys.me/proxy.txt',
|
||||||
|
'http://spys.me/socks.txt',
|
||||||
|
|
||||||
|
# --- Web scrapers (HTML pages) ---
|
||||||
|
|
||||||
|
# spys.one - mixed protocols, requires parsing
|
||||||
|
'https://spys.one/en/free-proxy-list/',
|
||||||
|
'https://spys.one/en/socks-proxy-list/',
|
||||||
|
'https://spys.one/en/https-ssl-proxy/',
|
||||||
|
# free-proxy-list.net
|
||||||
|
'https://free-proxy-list.net/',
|
||||||
|
'https://www.sslproxies.org/',
|
||||||
|
'https://www.socks-proxy.net/',
|
||||||
|
# sockslist.us - SOCKS5 focused
|
||||||
|
'https://sockslist.us/',
|
||||||
|
# mtpro.xyz - SOCKS5, updated every 5 min
|
||||||
|
'https://mtpro.xyz/socks5',
|
||||||
|
# proxy-tools.com - SOCKS5 filtered
|
||||||
|
'https://proxy-tools.com/proxy/socks5',
|
||||||
|
# hidemy.name - all protocols, paginated
|
||||||
|
'https://hide.mn/en/proxy-list/',
|
||||||
|
# advanced.name - SOCKS5 filtered
|
||||||
|
'https://advanced.name/freeproxy?type=socks5',
|
||||||
|
# proxynova.com - by country
|
||||||
|
'https://www.proxynova.com/proxy-server-list/',
|
||||||
|
# freeproxy.world - SOCKS5 filtered
|
||||||
|
'https://www.freeproxy.world/?type=socks5',
|
||||||
|
# proxydb.net - all protocols
|
||||||
|
'http://proxydb.net/',
|
||||||
|
# geonode
|
||||||
|
'https://proxylist.geonode.com/api/proxy-list?limit=500&page=1&sort_by=lastChecked&sort_type=desc&protocols=http',
|
||||||
|
'https://proxylist.geonode.com/api/proxy-list?limit=500&page=1&sort_by=lastChecked&sort_type=desc&protocols=socks4',
|
||||||
|
'https://proxylist.geonode.com/api/proxy-list?limit=500&page=1&sort_by=lastChecked&sort_type=desc&protocols=socks5',
|
||||||
|
# openproxy.space
|
||||||
|
'https://openproxy.space/list/http',
|
||||||
|
'https://openproxy.space/list/socks4',
|
||||||
|
'https://openproxy.space/list/socks5',
|
||||||
|
|
||||||
|
# --- Telegram channels (public HTML view) ---
|
||||||
|
|
||||||
|
'https://t.me/s/spys_one',
|
||||||
|
'https://t.me/s/proxyfree1',
|
||||||
|
'https://t.me/s/proxylist4free',
|
||||||
|
'https://t.me/s/proxy_lists',
|
||||||
|
'https://t.me/s/Proxies4ForYou',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def seed_proxy_sources(sqlite):
|
def seed_proxy_sources(sqlite, reset_errors=False):
|
||||||
"""Seed known proxy list sources into uris table."""
|
"""Seed known proxy list sources into uris table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sqlite: Database connection
|
||||||
|
reset_errors: If True, reset error/stale counts on existing seed
|
||||||
|
sources that have errored out, allowing them to be
|
||||||
|
retried. Safe to call periodically.
|
||||||
|
"""
|
||||||
timestamp = int(time.time())
|
timestamp = int(time.time())
|
||||||
added = 0
|
added = 0
|
||||||
|
reset = 0
|
||||||
for url in PROXY_SOURCES:
|
for url in PROXY_SOURCES:
|
||||||
try:
|
try:
|
||||||
sqlite.execute(
|
sqlite.execute(
|
||||||
@@ -640,11 +781,21 @@ def seed_proxy_sources(sqlite):
|
|||||||
)
|
)
|
||||||
if sqlite.cursor.rowcount > 0:
|
if sqlite.cursor.rowcount > 0:
|
||||||
added += 1
|
added += 1
|
||||||
|
elif reset_errors:
|
||||||
|
# Reset errored-out seed sources so they get reclaimed
|
||||||
|
sqlite.execute(
|
||||||
|
'UPDATE uris SET error = 0, stale_count = 0, '
|
||||||
|
'check_interval = 3600, check_time = 0 '
|
||||||
|
'WHERE url = ? AND error >= 5',
|
||||||
|
(url,)
|
||||||
|
)
|
||||||
|
if sqlite.cursor.rowcount > 0:
|
||||||
|
reset += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
_log('seed_urls insert error for %s: %s' % (url, e), 'warn')
|
_log('seed_urls insert error for %s: %s' % (url, e), 'warn')
|
||||||
sqlite.commit()
|
sqlite.commit()
|
||||||
if added > 0:
|
if added > 0 or reset > 0:
|
||||||
_log('seeded %d proxy source URLs' % added, 'info')
|
_log('seed sources: %d new, %d reset' % (added, reset), 'info')
|
||||||
|
|
||||||
|
|
||||||
def save_session_state(sqlite, stats):
|
def save_session_state(sqlite, stats):
|
||||||
|
|||||||
41
fetch.py
41
fetch.py
@@ -221,6 +221,10 @@ def extract_auth_proxies(content):
|
|||||||
"""
|
"""
|
||||||
proxies = []
|
proxies = []
|
||||||
|
|
||||||
|
# Short-circuit: auth proxies always contain @
|
||||||
|
if '@' not in content:
|
||||||
|
return proxies
|
||||||
|
|
||||||
# IPv4 auth proxies
|
# IPv4 auth proxies
|
||||||
for match in AUTH_PROXY_PATTERN.finditer(content):
|
for match in AUTH_PROXY_PATTERN.finditer(content):
|
||||||
proto_str, user, passwd, ip, port = match.groups()
|
proto_str, user, passwd, ip, port = match.groups()
|
||||||
@@ -256,6 +260,12 @@ TABLE_PORT_HEADERS = ('port',)
|
|||||||
TABLE_PROTO_HEADERS = ('type', 'protocol', 'proto', 'scheme')
|
TABLE_PROTO_HEADERS = ('type', 'protocol', 'proto', 'scheme')
|
||||||
|
|
||||||
|
|
||||||
|
_TABLE_PATTERN = re.compile(r'<table[^>]*>(.*?)</table>', re.IGNORECASE | re.DOTALL)
|
||||||
|
_ROW_PATTERN = re.compile(r'<tr[^>]*>(.*?)</tr>', re.IGNORECASE | re.DOTALL)
|
||||||
|
_CELL_PATTERN = re.compile(r'<t[hd][^>]*>(.*?)</t[hd]>', re.IGNORECASE | re.DOTALL)
|
||||||
|
_TAG_STRIP = re.compile(r'<[^>]+>')
|
||||||
|
|
||||||
|
|
||||||
def extract_proxies_from_table(content):
|
def extract_proxies_from_table(content):
|
||||||
"""Extract proxies from HTML tables with IP/Port/Protocol columns.
|
"""Extract proxies from HTML tables with IP/Port/Protocol columns.
|
||||||
|
|
||||||
@@ -269,26 +279,23 @@ def extract_proxies_from_table(content):
|
|||||||
"""
|
"""
|
||||||
proxies = []
|
proxies = []
|
||||||
|
|
||||||
# Simple regex-based table parsing (works without BeautifulSoup)
|
# Short-circuit: no HTML tables in plain text content
|
||||||
# Find all tables
|
if '<table' not in content and '<TABLE' not in content:
|
||||||
table_pattern = re.compile(r'<table[^>]*>(.*?)</table>', re.IGNORECASE | re.DOTALL)
|
return proxies
|
||||||
row_pattern = re.compile(r'<tr[^>]*>(.*?)</tr>', re.IGNORECASE | re.DOTALL)
|
|
||||||
cell_pattern = re.compile(r'<t[hd][^>]*>(.*?)</t[hd]>', re.IGNORECASE | re.DOTALL)
|
|
||||||
tag_strip = re.compile(r'<[^>]+>')
|
|
||||||
|
|
||||||
for table_match in table_pattern.finditer(content):
|
for table_match in _TABLE_PATTERN.finditer(content):
|
||||||
table_html = table_match.group(1)
|
table_html = table_match.group(1)
|
||||||
rows = row_pattern.findall(table_html)
|
rows = _ROW_PATTERN.findall(table_html)
|
||||||
if not rows:
|
if not rows:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Parse header row to find column indices
|
# Parse header row to find column indices
|
||||||
ip_col = port_col = proto_col = -1
|
ip_col = port_col = proto_col = -1
|
||||||
header_row = rows[0]
|
header_row = rows[0]
|
||||||
headers = cell_pattern.findall(header_row)
|
headers = _CELL_PATTERN.findall(header_row)
|
||||||
|
|
||||||
for i, cell in enumerate(headers):
|
for i, cell in enumerate(headers):
|
||||||
cell_text = tag_strip.sub('', cell).strip().lower()
|
cell_text = _TAG_STRIP.sub('', cell).strip().lower()
|
||||||
if ip_col < 0 and any(h in cell_text for h in TABLE_IP_HEADERS):
|
if ip_col < 0 and any(h in cell_text for h in TABLE_IP_HEADERS):
|
||||||
ip_col = i
|
ip_col = i
|
||||||
elif port_col < 0 and any(h in cell_text for h in TABLE_PORT_HEADERS):
|
elif port_col < 0 and any(h in cell_text for h in TABLE_PORT_HEADERS):
|
||||||
@@ -302,11 +309,11 @@ def extract_proxies_from_table(content):
|
|||||||
|
|
||||||
# Parse data rows
|
# Parse data rows
|
||||||
for row in rows[1:]:
|
for row in rows[1:]:
|
||||||
cells = cell_pattern.findall(row)
|
cells = _CELL_PATTERN.findall(row)
|
||||||
if len(cells) <= ip_col:
|
if len(cells) <= ip_col:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ip_cell = tag_strip.sub('', cells[ip_col]).strip()
|
ip_cell = _TAG_STRIP.sub('', cells[ip_col]).strip()
|
||||||
|
|
||||||
# Check if IP cell contains port (ip:port format)
|
# Check if IP cell contains port (ip:port format)
|
||||||
if ':' in ip_cell and port_col < 0:
|
if ':' in ip_cell and port_col < 0:
|
||||||
@@ -315,7 +322,7 @@ def extract_proxies_from_table(content):
|
|||||||
ip, port = match.groups()
|
ip, port = match.groups()
|
||||||
proto = None
|
proto = None
|
||||||
if proto_col >= 0 and len(cells) > proto_col:
|
if proto_col >= 0 and len(cells) > proto_col:
|
||||||
proto = _normalize_proto(tag_strip.sub('', cells[proto_col]).strip())
|
proto = _normalize_proto(_TAG_STRIP.sub('', cells[proto_col]).strip())
|
||||||
addr = '%s:%s' % (ip, port)
|
addr = '%s:%s' % (ip, port)
|
||||||
if is_usable_proxy(addr):
|
if is_usable_proxy(addr):
|
||||||
proxies.append((addr, proto))
|
proxies.append((addr, proto))
|
||||||
@@ -323,7 +330,7 @@ def extract_proxies_from_table(content):
|
|||||||
|
|
||||||
# Separate IP and Port columns
|
# Separate IP and Port columns
|
||||||
if port_col >= 0 and len(cells) > port_col:
|
if port_col >= 0 and len(cells) > port_col:
|
||||||
port_cell = tag_strip.sub('', cells[port_col]).strip()
|
port_cell = _TAG_STRIP.sub('', cells[port_col]).strip()
|
||||||
try:
|
try:
|
||||||
port = int(port_cell)
|
port = int(port_cell)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@@ -335,7 +342,7 @@ def extract_proxies_from_table(content):
|
|||||||
|
|
||||||
proto = None
|
proto = None
|
||||||
if proto_col >= 0 and len(cells) > proto_col:
|
if proto_col >= 0 and len(cells) > proto_col:
|
||||||
proto = _normalize_proto(tag_strip.sub('', cells[proto_col]).strip())
|
proto = _normalize_proto(_TAG_STRIP.sub('', cells[proto_col]).strip())
|
||||||
|
|
||||||
addr = '%s:%d' % (ip_cell, port)
|
addr = '%s:%d' % (ip_cell, port)
|
||||||
if is_usable_proxy(addr):
|
if is_usable_proxy(addr):
|
||||||
@@ -358,6 +365,10 @@ def extract_proxies_from_json(content):
|
|||||||
"""
|
"""
|
||||||
proxies = []
|
proxies = []
|
||||||
|
|
||||||
|
# Short-circuit: content must contain JSON delimiters
|
||||||
|
if '{' not in content and '[' not in content:
|
||||||
|
return proxies
|
||||||
|
|
||||||
# Try to find JSON in content (may be embedded in HTML)
|
# Try to find JSON in content (may be embedded in HTML)
|
||||||
json_matches = []
|
json_matches = []
|
||||||
|
|
||||||
|
|||||||
472
ppf.py
472
ppf.py
@@ -303,48 +303,6 @@ class NeedReregister(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def worker_get_work(server_url, worker_key, count=100):
|
|
||||||
"""Fetch batch of proxies from master."""
|
|
||||||
url = '%s/api/work?key=%s&count=%d' % (server_url.rstrip('/'), worker_key, count)
|
|
||||||
|
|
||||||
try:
|
|
||||||
resp = urllib2.urlopen(url, timeout=30)
|
|
||||||
result = json.loads(resp.read())
|
|
||||||
return result.get('proxies', [])
|
|
||||||
except urllib2.HTTPError as e:
|
|
||||||
if e.code == 403:
|
|
||||||
_log('worker key rejected (403), need to re-register', 'warn')
|
|
||||||
raise NeedReregister()
|
|
||||||
_log('failed to get work: %s' % e, 'error')
|
|
||||||
return []
|
|
||||||
except Exception as e:
|
|
||||||
_log('failed to get work: %s' % e, 'error')
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def worker_submit_results(server_url, worker_key, results):
|
|
||||||
"""Submit test results to master."""
|
|
||||||
url = '%s/api/results?key=%s' % (server_url.rstrip('/'), worker_key)
|
|
||||||
data = json.dumps({'results': results})
|
|
||||||
|
|
||||||
req = urllib2.Request(url, data)
|
|
||||||
req.add_header('Content-Type', 'application/json')
|
|
||||||
|
|
||||||
try:
|
|
||||||
resp = urllib2.urlopen(req, timeout=30)
|
|
||||||
result = json.loads(resp.read())
|
|
||||||
return result.get('processed', 0)
|
|
||||||
except urllib2.HTTPError as e:
|
|
||||||
if e.code == 403:
|
|
||||||
_log('worker key rejected (403), need to re-register', 'warn')
|
|
||||||
raise NeedReregister()
|
|
||||||
_log('failed to submit results: %s' % e, 'error')
|
|
||||||
return 0
|
|
||||||
except Exception as e:
|
|
||||||
_log('failed to submit results: %s' % e, 'error')
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def worker_send_heartbeat(server_url, worker_key, tor_ok, tor_ip=None, profiling=False, threads=0):
|
def worker_send_heartbeat(server_url, worker_key, tor_ok, tor_ip=None, profiling=False, threads=0):
|
||||||
"""Send heartbeat with Tor status to master."""
|
"""Send heartbeat with Tor status to master."""
|
||||||
url = '%s/api/heartbeat?key=%s' % (server_url.rstrip('/'), worker_key)
|
url = '%s/api/heartbeat?key=%s' % (server_url.rstrip('/'), worker_key)
|
||||||
@@ -370,7 +328,7 @@ def worker_send_heartbeat(server_url, worker_key, tor_ok, tor_ip=None, profiling
|
|||||||
|
|
||||||
|
|
||||||
def worker_claim_urls(server_url, worker_key, count=5):
|
def worker_claim_urls(server_url, worker_key, count=5):
|
||||||
"""Claim batch of URLs for V2 worker mode."""
|
"""Claim batch of URLs for worker mode."""
|
||||||
url = '%s/api/claim-urls?key=%s&count=%d' % (server_url.rstrip('/'), worker_key, count)
|
url = '%s/api/claim-urls?key=%s&count=%d' % (server_url.rstrip('/'), worker_key, count)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -471,310 +429,7 @@ def check_tor_connectivity(tor_hosts):
|
|||||||
|
|
||||||
|
|
||||||
def worker_main(config):
|
def worker_main(config):
|
||||||
"""Worker mode main loop - uses proxywatchd multi-threaded testing."""
|
"""Worker mode -- URL-driven discovery.
|
||||||
import json
|
|
||||||
global urllib2
|
|
||||||
|
|
||||||
try:
|
|
||||||
import Queue
|
|
||||||
except ImportError:
|
|
||||||
import queue as Queue
|
|
||||||
|
|
||||||
# Import proxywatchd for multi-threaded testing (gevent already patched at top)
|
|
||||||
import proxywatchd
|
|
||||||
proxywatchd.set_config(config)
|
|
||||||
|
|
||||||
server_url = config.args.server
|
|
||||||
if not server_url:
|
|
||||||
_log('--server URL required for worker mode', 'error')
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
worker_key = config.args.worker_key
|
|
||||||
worker_name = config.args.worker_name or os.uname()[1]
|
|
||||||
batch_size = config.worker.batch_size
|
|
||||||
num_threads = config.watchd.threads
|
|
||||||
worker_id = None
|
|
||||||
|
|
||||||
# Register if --register flag or no key provided
|
|
||||||
if config.args.register or not worker_key:
|
|
||||||
_log('registering with master: %s' % server_url, 'info')
|
|
||||||
worker_id, worker_key = worker_register(server_url, worker_name)
|
|
||||||
if not worker_key:
|
|
||||||
_log('registration failed, exiting', 'error')
|
|
||||||
sys.exit(1)
|
|
||||||
_log('registered as %s (id: %s)' % (worker_name, worker_id), 'info')
|
|
||||||
_log('worker key: %s' % worker_key, 'info')
|
|
||||||
_log('save this key with --worker-key for future runs', 'info')
|
|
||||||
|
|
||||||
if config.args.register:
|
|
||||||
# Just register and exit
|
|
||||||
return
|
|
||||||
|
|
||||||
_log('starting worker mode', 'info')
|
|
||||||
_log(' server: %s' % server_url, 'info')
|
|
||||||
_log(' threads: %d' % num_threads, 'info')
|
|
||||||
_log(' batch size: %d' % batch_size, 'info')
|
|
||||||
_log(' tor hosts: %s' % config.common.tor_hosts, 'info')
|
|
||||||
|
|
||||||
# Verify Tor connectivity before claiming work
|
|
||||||
import socket
|
|
||||||
import socks
|
|
||||||
working_tor_hosts = []
|
|
||||||
for tor_host in config.torhosts:
|
|
||||||
host, port = tor_host.split(':')
|
|
||||||
port = int(port)
|
|
||||||
try:
|
|
||||||
# Test SOCKS connection
|
|
||||||
test_sock = socks.socksocket()
|
|
||||||
test_sock.set_proxy(socks.SOCKS5, host, port)
|
|
||||||
test_sock.settimeout(10)
|
|
||||||
test_sock.connect(('check.torproject.org', 80))
|
|
||||||
test_sock.send(b'GET / HTTP/1.0\r\nHost: check.torproject.org\r\n\r\n')
|
|
||||||
resp = test_sock.recv(512)
|
|
||||||
test_sock.close()
|
|
||||||
# Accept any HTTP response (200, 301, 302, etc.)
|
|
||||||
if resp and (b'HTTP/' in resp or len(resp) > 0):
|
|
||||||
status = resp.split(b'\r\n')[0] if b'\r\n' in resp else resp[:50]
|
|
||||||
_log('tor host %s:%d OK (%s)' % (host, port, status), 'info')
|
|
||||||
working_tor_hosts.append(tor_host)
|
|
||||||
else:
|
|
||||||
_log('tor host %s:%d no response (recv=%d bytes)' % (host, port, len(resp) if resp else 0), 'warn')
|
|
||||||
except Exception as e:
|
|
||||||
_log('tor host %s:%d failed: %s' % (host, port, e), 'warn')
|
|
||||||
|
|
||||||
if not working_tor_hosts:
|
|
||||||
_log('no working Tor hosts, cannot start worker', 'error')
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
_log('%d/%d Tor hosts verified' % (len(working_tor_hosts), len(config.torhosts)), 'info')
|
|
||||||
|
|
||||||
# Create shared queues for worker threads
|
|
||||||
job_queue = proxywatchd.PriorityJobQueue()
|
|
||||||
completion_queue = Queue.Queue()
|
|
||||||
|
|
||||||
# Spawn worker threads with stagger to avoid overwhelming Tor
|
|
||||||
threads = []
|
|
||||||
for i in range(num_threads):
|
|
||||||
wt = proxywatchd.WorkerThread('w%d' % i, job_queue)
|
|
||||||
wt.start_thread()
|
|
||||||
threads.append(wt)
|
|
||||||
time.sleep(random.random() / 10) # 0-100ms stagger per thread
|
|
||||||
|
|
||||||
_log('spawned %d worker threads' % len(threads), 'info')
|
|
||||||
|
|
||||||
jobs_completed = 0
|
|
||||||
proxies_tested = 0
|
|
||||||
start_time = time.time()
|
|
||||||
current_tor_ip = None
|
|
||||||
consecutive_tor_failures = 0
|
|
||||||
worker_profiling = config.args.profile or config.common.profiling
|
|
||||||
# Use dict to allow mutation in nested function (Python 2 compatible)
|
|
||||||
wstate = {'worker_key': worker_key, 'worker_id': worker_id, 'backoff': 10}
|
|
||||||
|
|
||||||
def do_register():
|
|
||||||
"""Register with master, with exponential backoff on failure."""
|
|
||||||
while True:
|
|
||||||
_log('registering with master: %s' % server_url, 'info')
|
|
||||||
new_id, new_key = worker_register(server_url, worker_name)
|
|
||||||
if new_key:
|
|
||||||
wstate['worker_id'] = new_id
|
|
||||||
wstate['worker_key'] = new_key
|
|
||||||
wstate['backoff'] = 10 # Reset backoff on success
|
|
||||||
_log('registered as %s (id: %s)' % (worker_name, new_id), 'info')
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
_log('registration failed, retrying in %ds' % wstate['backoff'], 'warn')
|
|
||||||
time.sleep(wstate['backoff'])
|
|
||||||
wstate['backoff'] = min(wstate['backoff'] * 2, 300) # Max 5 min backoff
|
|
||||||
|
|
||||||
def wait_for_tor():
|
|
||||||
"""Wait for Tor to become available, checking every 30 seconds."""
|
|
||||||
check_interval = 30
|
|
||||||
while True:
|
|
||||||
working, tor_ip = check_tor_connectivity(config.torhosts)
|
|
||||||
if working:
|
|
||||||
_log('tor recovered: %s (%s)' % (working[0], tor_ip or 'unknown'), 'info')
|
|
||||||
# Send heartbeat to manager
|
|
||||||
try:
|
|
||||||
worker_send_heartbeat(server_url, wstate['worker_key'], True, tor_ip, worker_profiling, num_threads)
|
|
||||||
except NeedReregister:
|
|
||||||
do_register()
|
|
||||||
return working, tor_ip
|
|
||||||
_log('tor still down, retrying in %ds' % check_interval, 'warn')
|
|
||||||
# Send heartbeat with tor_ok=False
|
|
||||||
try:
|
|
||||||
worker_send_heartbeat(server_url, wstate['worker_key'], False, None, worker_profiling, num_threads)
|
|
||||||
except NeedReregister:
|
|
||||||
do_register()
|
|
||||||
time.sleep(check_interval)
|
|
||||||
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
# Tor check before claiming work - don't claim if Tor is down
|
|
||||||
working, tor_ip = check_tor_connectivity(config.torhosts)
|
|
||||||
if not working:
|
|
||||||
consecutive_tor_failures += 1
|
|
||||||
_log('tor down before claiming work (consecutive: %d)' % consecutive_tor_failures, 'warn')
|
|
||||||
try:
|
|
||||||
worker_send_heartbeat(server_url, wstate['worker_key'], False, None, worker_profiling, num_threads)
|
|
||||||
except NeedReregister:
|
|
||||||
do_register()
|
|
||||||
if consecutive_tor_failures >= 2:
|
|
||||||
_log('tor appears down, waiting before claiming work', 'error')
|
|
||||||
working, current_tor_ip = wait_for_tor()
|
|
||||||
consecutive_tor_failures = 0
|
|
||||||
else:
|
|
||||||
time.sleep(10)
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
consecutive_tor_failures = 0
|
|
||||||
if tor_ip != current_tor_ip:
|
|
||||||
if current_tor_ip:
|
|
||||||
_log('tor circuit rotated: %s' % tor_ip, 'info')
|
|
||||||
current_tor_ip = tor_ip
|
|
||||||
# Send heartbeat to manager
|
|
||||||
try:
|
|
||||||
worker_send_heartbeat(server_url, wstate['worker_key'], True, tor_ip, worker_profiling, num_threads)
|
|
||||||
except NeedReregister:
|
|
||||||
do_register()
|
|
||||||
|
|
||||||
# Get work from master
|
|
||||||
try:
|
|
||||||
proxies = worker_get_work(server_url, wstate['worker_key'], batch_size)
|
|
||||||
except NeedReregister:
|
|
||||||
do_register()
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not proxies:
|
|
||||||
_log('no work available, sleeping 30s', 'info')
|
|
||||||
time.sleep(30)
|
|
||||||
continue
|
|
||||||
|
|
||||||
_log('received %d proxies to test' % len(proxies), 'info')
|
|
||||||
|
|
||||||
# Create ProxyTestState and jobs for each proxy
|
|
||||||
pending_states = {}
|
|
||||||
all_jobs = []
|
|
||||||
|
|
||||||
# Get checktype(s) from config
|
|
||||||
checktypes = config.watchd.checktypes
|
|
||||||
|
|
||||||
for proxy_info in proxies:
|
|
||||||
ip = proxy_info['ip']
|
|
||||||
port = proxy_info['port']
|
|
||||||
proto = proxy_info.get('proto', 'http')
|
|
||||||
failed = proxy_info.get('failed', 0)
|
|
||||||
source_proto = proxy_info.get('source_proto')
|
|
||||||
proxy_str = '%s:%d' % (ip, port)
|
|
||||||
|
|
||||||
# Create state for this proxy
|
|
||||||
state = proxywatchd.ProxyTestState(
|
|
||||||
ip, port, proto, failed,
|
|
||||||
success_count=0, total_duration=0.0,
|
|
||||||
country=None, mitm=0, consecutive_success=0,
|
|
||||||
asn=None, oldies=False,
|
|
||||||
completion_queue=completion_queue,
|
|
||||||
proxy_full=proxy_str, source_proto=source_proto
|
|
||||||
)
|
|
||||||
pending_states[proxy_str] = state
|
|
||||||
|
|
||||||
# Select random checktype
|
|
||||||
checktype = random.choice(checktypes)
|
|
||||||
|
|
||||||
# Get target for this checktype
|
|
||||||
if checktype == 'judges':
|
|
||||||
available = proxywatchd.judge_stats.get_available_judges(
|
|
||||||
list(proxywatchd.judges.keys()))
|
|
||||||
target = random.choice(available) if available else random.choice(
|
|
||||||
list(proxywatchd.judges.keys()))
|
|
||||||
elif checktype == 'ssl':
|
|
||||||
target = random.choice(proxywatchd.ssl_targets)
|
|
||||||
elif checktype == 'irc':
|
|
||||||
target = random.choice(config.servers) if config.servers else 'irc.libera.chat:6667'
|
|
||||||
else: # head
|
|
||||||
target = random.choice(list(proxywatchd.regexes.keys()))
|
|
||||||
|
|
||||||
job = proxywatchd.TargetTestJob(state, target, checktype)
|
|
||||||
all_jobs.append(job)
|
|
||||||
|
|
||||||
# Shuffle and queue jobs
|
|
||||||
random.shuffle(all_jobs)
|
|
||||||
for job in all_jobs:
|
|
||||||
job_queue.put(job, priority=0)
|
|
||||||
|
|
||||||
# Wait for all jobs to complete
|
|
||||||
completed = 0
|
|
||||||
results = []
|
|
||||||
timeout_start = time.time()
|
|
||||||
timeout_seconds = config.watchd.timeout * 2 + 30 # generous timeout
|
|
||||||
|
|
||||||
while completed < len(proxies):
|
|
||||||
try:
|
|
||||||
state = completion_queue.get(timeout=1)
|
|
||||||
completed += 1
|
|
||||||
|
|
||||||
# Build result from state (failcount == 0 means success)
|
|
||||||
is_working = state.failcount == 0
|
|
||||||
latency_sec = (state.last_latency_ms / 1000.0) if state.last_latency_ms else 0
|
|
||||||
result = {
|
|
||||||
'ip': state.ip,
|
|
||||||
'port': state.port,
|
|
||||||
'proto': state.proto,
|
|
||||||
'working': is_working,
|
|
||||||
'latency': round(latency_sec, 3) if is_working else 0,
|
|
||||||
'error': None if is_working else 'failed',
|
|
||||||
}
|
|
||||||
results.append(result)
|
|
||||||
|
|
||||||
# Progress logging
|
|
||||||
if completed % 20 == 0 or completed == len(proxies):
|
|
||||||
working = sum(1 for r in results if r.get('working'))
|
|
||||||
_log('tested %d/%d proxies (%d working)' % (
|
|
||||||
completed, len(proxies), working), 'info')
|
|
||||||
|
|
||||||
except Queue.Empty:
|
|
||||||
if time.time() - timeout_start > timeout_seconds:
|
|
||||||
_log('batch timeout, %d/%d completed' % (completed, len(proxies)), 'warn')
|
|
||||||
break
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Submit results
|
|
||||||
try:
|
|
||||||
processed = worker_submit_results(server_url, wstate['worker_key'], results)
|
|
||||||
except NeedReregister:
|
|
||||||
do_register()
|
|
||||||
# Retry submission with new key
|
|
||||||
try:
|
|
||||||
processed = worker_submit_results(server_url, wstate['worker_key'], results)
|
|
||||||
except NeedReregister:
|
|
||||||
_log('still rejected after re-register, discarding batch', 'error')
|
|
||||||
processed = 0
|
|
||||||
|
|
||||||
jobs_completed += 1
|
|
||||||
proxies_tested += len(results)
|
|
||||||
|
|
||||||
working = sum(1 for r in results if r.get('working'))
|
|
||||||
_log('batch %d: %d/%d working, submitted %d' % (
|
|
||||||
jobs_completed, working, len(results), processed), 'info')
|
|
||||||
|
|
||||||
# Brief pause between batches
|
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
elapsed = time.time() - start_time
|
|
||||||
_log('worker stopping...', 'info')
|
|
||||||
# Stop threads
|
|
||||||
for wt in threads:
|
|
||||||
wt.stop()
|
|
||||||
for wt in threads:
|
|
||||||
wt.term()
|
|
||||||
_log('worker stopped after %s' % format_duration(int(elapsed)), 'info')
|
|
||||||
_log(' jobs completed: %d' % jobs_completed, 'info')
|
|
||||||
_log(' proxies tested: %d' % proxies_tested, 'info')
|
|
||||||
|
|
||||||
|
|
||||||
def worker_v2_main(config):
|
|
||||||
"""V2 worker mode -- URL-driven discovery.
|
|
||||||
|
|
||||||
Claims URLs from master, fetches through Tor, extracts and tests proxies,
|
Claims URLs from master, fetches through Tor, extracts and tests proxies,
|
||||||
reports working proxies back to master.
|
reports working proxies back to master.
|
||||||
@@ -815,10 +470,11 @@ def worker_v2_main(config):
|
|||||||
if config.args.register:
|
if config.args.register:
|
||||||
return
|
return
|
||||||
|
|
||||||
_log('starting worker V2 mode (URL-driven)', 'info')
|
_log('starting worker mode (URL-driven)', 'info')
|
||||||
_log(' server: %s' % server_url, 'info')
|
_log(' server: %s' % server_url, 'info')
|
||||||
_log(' threads: %d' % num_threads, 'info')
|
_log(' threads: %d' % num_threads, 'info')
|
||||||
_log(' url batch: %d' % url_batch_size, 'info')
|
_log(' url batch: %d' % url_batch_size, 'info')
|
||||||
|
_log(' cache ttl: %s' % ('%ds' % config.worker.cache_ttl if config.worker.cache_ttl > 0 else 'disabled'), 'info')
|
||||||
_log(' tor hosts: %s' % config.common.tor_hosts, 'info')
|
_log(' tor hosts: %s' % config.common.tor_hosts, 'info')
|
||||||
|
|
||||||
# Verify Tor connectivity before starting
|
# Verify Tor connectivity before starting
|
||||||
@@ -877,6 +533,10 @@ def worker_v2_main(config):
|
|||||||
worker_profiling = config.args.profile or config.common.profiling
|
worker_profiling = config.args.profile or config.common.profiling
|
||||||
wstate = {'worker_key': worker_key, 'worker_id': worker_id, 'backoff': 10}
|
wstate = {'worker_key': worker_key, 'worker_id': worker_id, 'backoff': 10}
|
||||||
|
|
||||||
|
# Local proxy test cache: addr -> (timestamp, success, result_dict_or_None)
|
||||||
|
cache_ttl = config.worker.cache_ttl
|
||||||
|
proxy_cache = {} if cache_ttl > 0 else None
|
||||||
|
|
||||||
def do_register():
|
def do_register():
|
||||||
"""Register with master, with exponential backoff on failure."""
|
"""Register with master, with exponential backoff on failure."""
|
||||||
while True:
|
while True:
|
||||||
@@ -1055,6 +715,33 @@ def worker_v2_main(config):
|
|||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Filter against local test cache
|
||||||
|
cached_working = []
|
||||||
|
if proxy_cache is not None:
|
||||||
|
now = time.time()
|
||||||
|
uncached = []
|
||||||
|
cache_hits = 0
|
||||||
|
for addr, pr, conf in unique_proxies:
|
||||||
|
# Normalize to ip:port for cache lookup (strip auth prefix)
|
||||||
|
cache_key = addr.split('@')[-1] if '@' in addr else addr
|
||||||
|
entry = proxy_cache.get(cache_key)
|
||||||
|
if entry and (now - entry[0]) < cache_ttl:
|
||||||
|
cache_hits += 1
|
||||||
|
if entry[1]: # cached success
|
||||||
|
cached_working.append(entry[2])
|
||||||
|
else:
|
||||||
|
uncached.append((addr, pr, conf))
|
||||||
|
if cache_hits:
|
||||||
|
_log('%d cached (%d working), %d to test' % (
|
||||||
|
cache_hits, len(cached_working), len(uncached)), 'info')
|
||||||
|
unique_proxies = uncached
|
||||||
|
|
||||||
|
if not unique_proxies:
|
||||||
|
# All proxies were cached, nothing to test
|
||||||
|
cycles += 1
|
||||||
|
time.sleep(1)
|
||||||
|
continue
|
||||||
|
|
||||||
_log('testing %d unique proxies' % len(unique_proxies), 'info')
|
_log('testing %d unique proxies' % len(unique_proxies), 'info')
|
||||||
|
|
||||||
# Phase 2: Test extracted proxies using worker thread pool
|
# Phase 2: Test extracted proxies using worker thread pool
|
||||||
@@ -1114,13 +801,16 @@ def worker_v2_main(config):
|
|||||||
timeout_start = time.time()
|
timeout_start = time.time()
|
||||||
timeout_seconds = max(config.watchd.timeout * 2 + 30, len(all_jobs) * 0.5)
|
timeout_seconds = max(config.watchd.timeout * 2 + 30, len(all_jobs) * 0.5)
|
||||||
working_results = []
|
working_results = []
|
||||||
|
last_heartbeat = time.time()
|
||||||
|
last_report = time.time()
|
||||||
|
|
||||||
while completed < len(all_jobs):
|
while completed < len(all_jobs):
|
||||||
try:
|
try:
|
||||||
state = completion_queue.get(timeout=1)
|
state = completion_queue.get(timeout=1)
|
||||||
completed += 1
|
completed += 1
|
||||||
|
|
||||||
if state.failcount == 0:
|
success, _ = state.evaluate()
|
||||||
|
if success:
|
||||||
latency_sec = (state.last_latency_ms / 1000.0) if state.last_latency_ms else 0
|
latency_sec = (state.last_latency_ms / 1000.0) if state.last_latency_ms else 0
|
||||||
proxy_addr = state.proxy
|
proxy_addr = state.proxy
|
||||||
if state.auth:
|
if state.auth:
|
||||||
@@ -1134,6 +824,8 @@ def worker_v2_main(config):
|
|||||||
'latency': round(latency_sec, 3),
|
'latency': round(latency_sec, 3),
|
||||||
'exit_ip': state.exit_ip,
|
'exit_ip': state.exit_ip,
|
||||||
'source_url': source_map.get(proxy_addr) or source_map.get(state.proxy, ''),
|
'source_url': source_map.get(proxy_addr) or source_map.get(state.proxy, ''),
|
||||||
|
'checktype': state.last_check or '',
|
||||||
|
'target': state.last_target or '',
|
||||||
})
|
})
|
||||||
|
|
||||||
if completed % 50 == 0 or completed == len(all_jobs):
|
if completed % 50 == 0 or completed == len(all_jobs):
|
||||||
@@ -1144,8 +836,55 @@ def worker_v2_main(config):
|
|||||||
if time.time() - timeout_start > timeout_seconds:
|
if time.time() - timeout_start > timeout_seconds:
|
||||||
_log('test timeout, %d/%d completed' % (completed, len(all_jobs)), 'warn')
|
_log('test timeout, %d/%d completed' % (completed, len(all_jobs)), 'warn')
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# Periodic heartbeat to prevent stale detection
|
||||||
|
now = time.time()
|
||||||
|
if now - last_heartbeat >= 60:
|
||||||
|
try:
|
||||||
|
worker_send_heartbeat(server_url, wstate['worker_key'],
|
||||||
|
True, current_tor_ip, worker_profiling, num_threads)
|
||||||
|
except NeedReregister:
|
||||||
|
do_register()
|
||||||
|
last_heartbeat = now
|
||||||
|
|
||||||
|
# Periodic proxy report (flush working results every 5 minutes)
|
||||||
|
if working_results and now - last_report >= 300:
|
||||||
|
reported = False
|
||||||
|
try:
|
||||||
|
processed = worker_report_proxies(server_url, wstate['worker_key'],
|
||||||
|
working_results)
|
||||||
|
if processed > 0:
|
||||||
|
_log('interim report: %d proxies (%d submitted)' % (
|
||||||
|
len(working_results), processed), 'info')
|
||||||
|
reported = True
|
||||||
|
except NeedReregister:
|
||||||
|
do_register()
|
||||||
|
try:
|
||||||
|
processed = worker_report_proxies(server_url, wstate['worker_key'],
|
||||||
|
working_results)
|
||||||
|
if processed > 0:
|
||||||
|
reported = True
|
||||||
|
except NeedReregister:
|
||||||
|
pass
|
||||||
|
if reported:
|
||||||
|
working_results = []
|
||||||
|
last_report = now
|
||||||
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Populate proxy test cache from results
|
||||||
|
if proxy_cache is not None:
|
||||||
|
now = time.time()
|
||||||
|
working_addrs = set()
|
||||||
|
for r in working_results:
|
||||||
|
addr = '%s:%d' % (r['ip'], r['port'])
|
||||||
|
proxy_cache[addr] = (now, True, r)
|
||||||
|
working_addrs.add(addr)
|
||||||
|
# Cache failures for tested proxies that didn't succeed
|
||||||
|
for proxy_str in pending_states:
|
||||||
|
if proxy_str not in working_addrs:
|
||||||
|
proxy_cache[proxy_str] = (now, False, None)
|
||||||
|
|
||||||
proxies_working += len(working_results)
|
proxies_working += len(working_results)
|
||||||
|
|
||||||
# Report working proxies to master
|
# Report working proxies to master
|
||||||
@@ -1162,21 +901,33 @@ def worker_v2_main(config):
|
|||||||
_log('reported %d working proxies (submitted %d)' % (len(working_results), processed), 'info')
|
_log('reported %d working proxies (submitted %d)' % (len(working_results), processed), 'info')
|
||||||
|
|
||||||
cycles += 1
|
cycles += 1
|
||||||
|
|
||||||
|
# Periodic cache cleanup: evict expired entries every 10 cycles
|
||||||
|
if proxy_cache is not None and cycles % 10 == 0:
|
||||||
|
now = time.time()
|
||||||
|
expired = [k for k, v in proxy_cache.items() if (now - v[0]) >= cache_ttl]
|
||||||
|
if expired:
|
||||||
|
for k in expired:
|
||||||
|
del proxy_cache[k]
|
||||||
|
_log('cache cleanup: evicted %d expired, %d remaining' % (len(expired), len(proxy_cache)), 'info')
|
||||||
|
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
elapsed = time.time() - start_time
|
elapsed = time.time() - start_time
|
||||||
_log('worker V2 stopping...', 'info')
|
_log('worker stopping...', 'info')
|
||||||
session.close()
|
session.close()
|
||||||
for wt in threads:
|
for wt in threads:
|
||||||
wt.stop()
|
wt.stop()
|
||||||
for wt in threads:
|
for wt in threads:
|
||||||
wt.term()
|
wt.term()
|
||||||
_log('worker V2 stopped after %s' % format_duration(int(elapsed)), 'info')
|
_log('worker stopped after %s' % format_duration(int(elapsed)), 'info')
|
||||||
_log(' cycles: %d' % cycles, 'info')
|
_log(' cycles: %d' % cycles, 'info')
|
||||||
_log(' urls fetched: %d' % urls_fetched, 'info')
|
_log(' urls fetched: %d' % urls_fetched, 'info')
|
||||||
_log(' proxies found: %d' % proxies_found, 'info')
|
_log(' proxies found: %d' % proxies_found, 'info')
|
||||||
_log(' proxies working: %d' % proxies_working, 'info')
|
_log(' proxies working: %d' % proxies_working, 'info')
|
||||||
|
if proxy_cache is not None:
|
||||||
|
_log(' cache entries: %d' % len(proxy_cache), 'info')
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@@ -1191,12 +942,7 @@ def main():
|
|||||||
else:
|
else:
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# V2 worker mode: URL-driven discovery
|
# Worker mode: URL-driven discovery
|
||||||
if config.args.worker_v2:
|
|
||||||
worker_v2_main(config)
|
|
||||||
return
|
|
||||||
|
|
||||||
# V1 worker mode: connect to master server instead of running locally
|
|
||||||
if config.args.worker or config.args.register:
|
if config.args.worker or config.args.register:
|
||||||
worker_main(config)
|
worker_main(config)
|
||||||
return
|
return
|
||||||
@@ -1299,8 +1045,20 @@ def main():
|
|||||||
statusmsg = time.time()
|
statusmsg = time.time()
|
||||||
list_max_age_seconds = config.ppf.list_max_age_days * 86400
|
list_max_age_seconds = config.ppf.list_max_age_days * 86400
|
||||||
last_skip_log = 0
|
last_skip_log = 0
|
||||||
|
last_reseed = time.time()
|
||||||
|
reseed_interval = 6 * 3600 # re-seed sources every 6 hours
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
|
# Periodic re-seeding: reset errored-out seed sources
|
||||||
|
if time.time() - last_reseed >= reseed_interval:
|
||||||
|
dbs.seed_proxy_sources(urldb, reset_errors=True)
|
||||||
|
last_reseed = time.time()
|
||||||
|
|
||||||
|
# When ppf threads = 0, skip URL fetching (workers handle it via /api/claim-urls)
|
||||||
|
if config.ppf.threads == 0:
|
||||||
|
time.sleep(60)
|
||||||
|
continue
|
||||||
|
|
||||||
time.sleep(random.random()/10)
|
time.sleep(random.random()/10)
|
||||||
if (time.time() - statusmsg) > 180:
|
if (time.time() - statusmsg) > 180:
|
||||||
_log('running %d thread(s) over %d' % (len(threads), config.ppf.threads), 'ppf')
|
_log('running %d thread(s) over %d' % (len(threads), config.ppf.threads), 'ppf')
|
||||||
|
|||||||
300
proxywatchd.py
300
proxywatchd.py
@@ -44,7 +44,7 @@ import dns
|
|||||||
from misc import _log, categorize_error, tor_proxy_url, is_ssl_protocol_error
|
from misc import _log, categorize_error, tor_proxy_url, is_ssl_protocol_error
|
||||||
import rocksock
|
import rocksock
|
||||||
import connection_pool
|
import connection_pool
|
||||||
from stats import JudgeStats, Stats, regexes, ssl_targets, try_div
|
from stats import TargetStats, JudgeStats, Stats, regexes, ssl_targets, try_div
|
||||||
from mitm import MITMCertStats, extract_cert_info, get_mitm_certificate
|
from mitm import MITMCertStats, extract_cert_info, get_mitm_certificate
|
||||||
from dns import socks4_resolve
|
from dns import socks4_resolve
|
||||||
from job import PriorityJobQueue, calculate_priority
|
from job import PriorityJobQueue, calculate_priority
|
||||||
@@ -164,9 +164,9 @@ DEAD_PROXY = -1
|
|||||||
# Error categories that indicate proxy is definitely dead (not temporary failure)
|
# Error categories that indicate proxy is definitely dead (not temporary failure)
|
||||||
FATAL_ERROR_CATEGORIES = ('refused', 'unreachable', 'auth')
|
FATAL_ERROR_CATEGORIES = ('refused', 'unreachable', 'auth')
|
||||||
|
|
||||||
# Patterns indicating judge is blocking the proxy (not a proxy failure)
|
# Patterns indicating HTTP target is blocking the proxy (not a proxy failure)
|
||||||
# These should NOT count as proxy failures - retry with different judge
|
# These should NOT count as proxy failures - applies to judges and head targets
|
||||||
JUDGE_BLOCK_PATTERNS = [
|
HTTP_BLOCK_PATTERNS = [
|
||||||
r'HTTP/1\.[01] 403', # Forbidden
|
r'HTTP/1\.[01] 403', # Forbidden
|
||||||
r'HTTP/1\.[01] 429', # Too Many Requests
|
r'HTTP/1\.[01] 429', # Too Many Requests
|
||||||
r'HTTP/1\.[01] 503', # Service Unavailable
|
r'HTTP/1\.[01] 503', # Service Unavailable
|
||||||
@@ -179,7 +179,7 @@ JUDGE_BLOCK_PATTERNS = [
|
|||||||
r'blocked', # Explicit block
|
r'blocked', # Explicit block
|
||||||
r'Checking your browser', # Cloudflare JS challenge
|
r'Checking your browser', # Cloudflare JS challenge
|
||||||
]
|
]
|
||||||
JUDGE_BLOCK_RE = re.compile('|'.join(JUDGE_BLOCK_PATTERNS), re.IGNORECASE)
|
HTTP_BLOCK_RE = re.compile('|'.join(HTTP_BLOCK_PATTERNS), re.IGNORECASE)
|
||||||
|
|
||||||
# Check types: irc, http (header match), judges (body match), ssl (TLS handshake)
|
# Check types: irc, http (header match), judges (body match), ssl (TLS handshake)
|
||||||
# Judge services - return IP in body (plain text, JSON, or HTML)
|
# Judge services - return IP in body (plain text, JSON, or HTML)
|
||||||
@@ -213,6 +213,9 @@ judges = {
|
|||||||
|
|
||||||
# Global instances
|
# Global instances
|
||||||
judge_stats = JudgeStats()
|
judge_stats = JudgeStats()
|
||||||
|
head_target_stats = TargetStats(cooldown_seconds=300, block_threshold=3)
|
||||||
|
ssl_target_stats = TargetStats(cooldown_seconds=300, block_threshold=3)
|
||||||
|
irc_target_stats = TargetStats(cooldown_seconds=300, block_threshold=3)
|
||||||
mitm_cert_stats = MITMCertStats()
|
mitm_cert_stats = MITMCertStats()
|
||||||
|
|
||||||
|
|
||||||
@@ -299,7 +302,8 @@ class ProxyTestState(object):
|
|||||||
'asn', 'isoldies', 'completion_queue', 'lock', 'results', 'completed',
|
'asn', 'isoldies', 'completion_queue', 'lock', 'results', 'completed',
|
||||||
'evaluated', 'last_latency_ms', 'exit_ip', 'reveals_headers',
|
'evaluated', 'last_latency_ms', 'exit_ip', 'reveals_headers',
|
||||||
'last_fail_category', 'original_failcount', 'had_ssl_test', 'ssl_success',
|
'last_fail_category', 'original_failcount', 'had_ssl_test', 'ssl_success',
|
||||||
'cert_error', 'source_proto', 'protos_working'
|
'cert_error', 'source_proto', 'protos_working',
|
||||||
|
'last_check', 'last_target'
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, ip, port, proto, failcount, success_count, total_duration,
|
def __init__(self, ip, port, proto, failcount, success_count, total_duration,
|
||||||
@@ -343,6 +347,9 @@ class ProxyTestState(object):
|
|||||||
# Protocol fingerprinting
|
# Protocol fingerprinting
|
||||||
self.source_proto = source_proto
|
self.source_proto = source_proto
|
||||||
self.protos_working = None
|
self.protos_working = None
|
||||||
|
# Test provenance
|
||||||
|
self.last_check = None
|
||||||
|
self.last_target = None
|
||||||
|
|
||||||
def record_result(self, success, proto=None, duration=0, srv=None, tor=None, ssl=None, category=None, exit_ip=None, reveals_headers=None):
|
def record_result(self, success, proto=None, duration=0, srv=None, tor=None, ssl=None, category=None, exit_ip=None, reveals_headers=None):
|
||||||
"""Record a single target test result. Thread-safe.
|
"""Record a single target test result. Thread-safe.
|
||||||
@@ -407,18 +414,19 @@ class ProxyTestState(object):
|
|||||||
self.evaluated = True
|
self.evaluated = True
|
||||||
self.checktime = int(time.time())
|
self.checktime = int(time.time())
|
||||||
|
|
||||||
# Filter out judge_block results (inconclusive, neither pass nor fail)
|
# Filter out target_block results (inconclusive, neither pass nor fail)
|
||||||
real_results = [r for r in self.results if r.get('category') != 'judge_block']
|
block_cats = ('judge_block', 'target_block')
|
||||||
|
real_results = [r for r in self.results if r.get('category') not in block_cats]
|
||||||
successes = [r for r in real_results if r['success']]
|
successes = [r for r in real_results if r['success']]
|
||||||
failures = [r for r in real_results if not r['success']]
|
failures = [r for r in real_results if not r['success']]
|
||||||
num_success = len(successes)
|
num_success = len(successes)
|
||||||
judge_blocks = len(self.results) - len(real_results)
|
target_blocks = len(self.results) - len(real_results)
|
||||||
_dbg('evaluate: %d success, %d fail, %d judge_block, results=%d' % (
|
_dbg('evaluate: %d success, %d fail, %d target_block, results=%d' % (
|
||||||
num_success, len(failures), judge_blocks, len(self.results)), self.proxy)
|
num_success, len(failures), target_blocks, len(self.results)), self.proxy)
|
||||||
|
|
||||||
# All results were judge blocks: inconclusive, preserve current state
|
# All results were target blocks: inconclusive, preserve current state
|
||||||
if not real_results and self.results:
|
if not real_results and self.results:
|
||||||
_dbg('all results inconclusive (judge_block), no state change', self.proxy)
|
_dbg('all results inconclusive (target_block), no state change', self.proxy)
|
||||||
self.failcount = self.original_failcount
|
self.failcount = self.original_failcount
|
||||||
return (self.original_failcount == 0, None)
|
return (self.original_failcount == 0, None)
|
||||||
|
|
||||||
@@ -480,9 +488,9 @@ class ProxyTestState(object):
|
|||||||
# and only if this test didn't detect MITM
|
# and only if this test didn't detect MITM
|
||||||
if self.consecutive_success > 0 and (self.consecutive_success % 3) == 0 and not self.cert_error:
|
if self.consecutive_success > 0 and (self.consecutive_success % 3) == 0 and not self.cert_error:
|
||||||
self.mitm = 0
|
self.mitm = 0
|
||||||
self.consecutive_success += 1
|
self.consecutive_success = (self.consecutive_success or 0) + 1
|
||||||
self.success_count += 1
|
self.success_count = (self.success_count or 0) + 1
|
||||||
self.total_duration += int(last_good['duration'] * 1000)
|
self.total_duration = (self.total_duration or 0) + int(last_good['duration'] * 1000)
|
||||||
|
|
||||||
# Calculate average latency from successful tests (in ms)
|
# Calculate average latency from successful tests (in ms)
|
||||||
durations = [s['duration'] for s in successes if s['duration']]
|
durations = [s['duration'] for s in successes if s['duration']]
|
||||||
@@ -543,6 +551,9 @@ class TargetTestJob(object):
|
|||||||
_log('JOB RUN #%d: %s -> %s (%s)' % (_sample_debug_counter,
|
_log('JOB RUN #%d: %s -> %s (%s)' % (_sample_debug_counter,
|
||||||
self.proxy_state.proxy, self.target_srv, self.checktype), 'info')
|
self.proxy_state.proxy, self.target_srv, self.checktype), 'info')
|
||||||
network_stats.set_category('proxy')
|
network_stats.set_category('proxy')
|
||||||
|
# Track test provenance (overwritten on each attempt, last success wins)
|
||||||
|
self.proxy_state.last_check = self.checktype
|
||||||
|
self.proxy_state.last_target = self.target_srv
|
||||||
_dbg('test start: %s via %s' % (self.target_srv, self.checktype), self.proxy_state.proxy)
|
_dbg('test start: %s via %s' % (self.target_srv, self.checktype), self.proxy_state.proxy)
|
||||||
sock, proto, duration, tor, srv, failinc, is_ssl, err_cat = self._connect_and_test()
|
sock, proto, duration, tor, srv, failinc, is_ssl, err_cat = self._connect_and_test()
|
||||||
_dbg('connect result: sock=%s proto=%s err=%s' % (bool(sock), proto, err_cat), self.proxy_state.proxy)
|
_dbg('connect result: sock=%s proto=%s err=%s' % (bool(sock), proto, err_cat), self.proxy_state.proxy)
|
||||||
@@ -610,6 +621,10 @@ class TargetTestJob(object):
|
|||||||
reveals_headers = bool(re.search(HEADER_REVEAL_PATTERN, recv, re.IGNORECASE))
|
reveals_headers = bool(re.search(HEADER_REVEAL_PATTERN, recv, re.IGNORECASE))
|
||||||
# Record successful judge
|
# Record successful judge
|
||||||
judge_stats.record_success(srv)
|
judge_stats.record_success(srv)
|
||||||
|
elif self.checktype == 'head':
|
||||||
|
head_target_stats.record_success(srv)
|
||||||
|
elif self.checktype == 'irc':
|
||||||
|
irc_target_stats.record_success(srv)
|
||||||
self.proxy_state.record_result(
|
self.proxy_state.record_result(
|
||||||
True, proto=proto, duration=elapsed,
|
True, proto=proto, duration=elapsed,
|
||||||
srv=srv, tor=tor, ssl=is_ssl, exit_ip=exit_ip,
|
srv=srv, tor=tor, ssl=is_ssl, exit_ip=exit_ip,
|
||||||
@@ -617,22 +632,28 @@ class TargetTestJob(object):
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
_dbg('regex NO MATCH, recv[:100]=%r' % recv[:100], self.proxy_state.proxy)
|
_dbg('regex NO MATCH, recv[:100]=%r' % recv[:100], self.proxy_state.proxy)
|
||||||
# Check if judge is blocking us (not a proxy failure)
|
# Check if HTTP target is blocking us (not a proxy failure)
|
||||||
if self.checktype == 'judges' and JUDGE_BLOCK_RE.search(recv):
|
if self.checktype in ('judges', 'head') and HTTP_BLOCK_RE.search(recv):
|
||||||
judge_stats.record_block(srv)
|
if self.checktype == 'judges':
|
||||||
# Judge block = inconclusive, not a pass or fail
|
judge_stats.record_block(srv)
|
||||||
_dbg('judge BLOCK detected, skipping (neutral)', self.proxy_state.proxy)
|
else:
|
||||||
|
head_target_stats.record_block(srv)
|
||||||
|
_dbg('target BLOCK detected, skipping (neutral)', self.proxy_state.proxy)
|
||||||
self.proxy_state.record_result(
|
self.proxy_state.record_result(
|
||||||
False, category='judge_block', proto=proto,
|
False, category='target_block', proto=proto,
|
||||||
srv=srv, tor=tor, ssl=is_ssl
|
srv=srv, tor=tor, ssl=is_ssl
|
||||||
)
|
)
|
||||||
if config.watchd.debug:
|
if config.watchd.debug:
|
||||||
_log('judge %s challenged proxy %s (neutral, skipped)' % (
|
_log('%s %s challenged proxy %s (neutral, skipped)' % (
|
||||||
srv, self.proxy_state.proxy), 'debug')
|
self.checktype, srv, self.proxy_state.proxy), 'debug')
|
||||||
else:
|
else:
|
||||||
_dbg('FAIL: no match, no block', self.proxy_state.proxy)
|
_dbg('FAIL: no match, no block', self.proxy_state.proxy)
|
||||||
if self.checktype == 'judges':
|
if self.checktype == 'judges':
|
||||||
judge_stats.record_failure(srv)
|
judge_stats.record_failure(srv)
|
||||||
|
elif self.checktype == 'head':
|
||||||
|
head_target_stats.record_failure(srv)
|
||||||
|
elif self.checktype == 'irc':
|
||||||
|
irc_target_stats.record_failure(srv)
|
||||||
self.proxy_state.record_result(False, category='other')
|
self.proxy_state.record_result(False, category='other')
|
||||||
|
|
||||||
except KeyboardInterrupt as e:
|
except KeyboardInterrupt as e:
|
||||||
@@ -682,6 +703,90 @@ class TargetTestJob(object):
|
|||||||
protos.append(p)
|
protos.append(p)
|
||||||
return protos
|
return protos
|
||||||
|
|
||||||
|
def _fingerprint_protocol(self, pool):
|
||||||
|
"""Identify proxy protocol via lightweight handshake probes.
|
||||||
|
|
||||||
|
Sends protocol-specific greeting bytes directly to the proxy
|
||||||
|
and identifies the protocol from the response pattern.
|
||||||
|
|
||||||
|
Returns: 'socks5', 'socks4', 'http', or None
|
||||||
|
"""
|
||||||
|
ps = self.proxy_state
|
||||||
|
fp_timeout = min(config.watchd.timeout, 5)
|
||||||
|
torhost = pool.get_tor_host(self.worker_id) if pool else random.choice(config.torhosts)
|
||||||
|
|
||||||
|
for probe_fn, name in (
|
||||||
|
(self._probe_socks5, 'socks5'),
|
||||||
|
(self._probe_socks4, 'socks4'),
|
||||||
|
(self._probe_http, 'http'),
|
||||||
|
):
|
||||||
|
result = probe_fn(ps, torhost, fp_timeout)
|
||||||
|
if result:
|
||||||
|
_sample_dbg('fingerprint: %s detected' % result, ps.proxy)
|
||||||
|
return result
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _probe_socks5(self, ps, torhost, timeout):
|
||||||
|
"""Probe for SOCKS5 protocol. Returns 'socks5' or None."""
|
||||||
|
try:
|
||||||
|
sock = rocksock.Rocksock(
|
||||||
|
host=ps.ip, port=int(ps.port),
|
||||||
|
proxies=[rocksock.RocksockProxyFromURL(tor_proxy_url(torhost))],
|
||||||
|
timeout=timeout
|
||||||
|
)
|
||||||
|
sock.connect()
|
||||||
|
sock.send('\x05\x01\x00')
|
||||||
|
res = sock.recv(2)
|
||||||
|
sock.disconnect()
|
||||||
|
if len(res) >= 1 and res[0] == '\x05':
|
||||||
|
return 'socks5'
|
||||||
|
except rocksock.RocksockException:
|
||||||
|
pass
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _probe_socks4(self, ps, torhost, timeout):
|
||||||
|
"""Probe for SOCKS4 protocol. Returns 'socks4' or None."""
|
||||||
|
try:
|
||||||
|
sock = rocksock.Rocksock(
|
||||||
|
host=ps.ip, port=int(ps.port),
|
||||||
|
proxies=[rocksock.RocksockProxyFromURL(tor_proxy_url(torhost))],
|
||||||
|
timeout=timeout
|
||||||
|
)
|
||||||
|
sock.connect()
|
||||||
|
# CONNECT 1.1.1.1:80
|
||||||
|
sock.send('\x04\x01\x00\x50\x01\x01\x01\x01\x00')
|
||||||
|
res = sock.recv(2)
|
||||||
|
sock.disconnect()
|
||||||
|
if len(res) >= 2 and ord(res[0]) == 0 and ord(res[1]) in (0x5a, 0x5b, 0x5c, 0x5d):
|
||||||
|
return 'socks4'
|
||||||
|
except rocksock.RocksockException:
|
||||||
|
pass
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _probe_http(self, ps, torhost, timeout):
|
||||||
|
"""Probe for HTTP CONNECT protocol. Returns 'http' or None."""
|
||||||
|
try:
|
||||||
|
sock = rocksock.Rocksock(
|
||||||
|
host=ps.ip, port=int(ps.port),
|
||||||
|
proxies=[rocksock.RocksockProxyFromURL(tor_proxy_url(torhost))],
|
||||||
|
timeout=timeout
|
||||||
|
)
|
||||||
|
sock.connect()
|
||||||
|
sock.send('CONNECT 1.1.1.1:80 HTTP/1.1\r\nHost: 1.1.1.1:80\r\n\r\n')
|
||||||
|
res = sock.recv(13)
|
||||||
|
sock.disconnect()
|
||||||
|
if res.startswith('HTTP/'):
|
||||||
|
return 'http'
|
||||||
|
except rocksock.RocksockException:
|
||||||
|
pass
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
return None
|
||||||
|
|
||||||
def _connect_and_test(self):
|
def _connect_and_test(self):
|
||||||
"""Connect to target through the proxy and send test packet.
|
"""Connect to target through the proxy and send test packet.
|
||||||
|
|
||||||
@@ -702,9 +807,16 @@ class TargetTestJob(object):
|
|||||||
protos = self._build_proto_order()
|
protos = self._build_proto_order()
|
||||||
pool = connection_pool.get_pool()
|
pool = connection_pool.get_pool()
|
||||||
|
|
||||||
|
# Fingerprint unknown proxies to avoid brute-force protocol guessing
|
||||||
|
if ps.proto is None and config.watchd.fingerprint:
|
||||||
|
detected = self._fingerprint_protocol(pool)
|
||||||
|
if detected:
|
||||||
|
protos = [detected] + [p for p in protos if p != detected]
|
||||||
|
|
||||||
# Phase 1: SSL handshake (if ssl_first enabled or SSL-only mode)
|
# Phase 1: SSL handshake (if ssl_first enabled or SSL-only mode)
|
||||||
|
ssl_reason = None
|
||||||
if config.watchd.ssl_first or self.checktype == 'none':
|
if config.watchd.ssl_first or self.checktype == 'none':
|
||||||
result = self._try_ssl_handshake(protos, pool)
|
result, ssl_reason = self._try_ssl_handshake(protos, pool)
|
||||||
if result is not None:
|
if result is not None:
|
||||||
return result # SSL succeeded or MITM detected
|
return result # SSL succeeded or MITM detected
|
||||||
# SSL failed for all protocols
|
# SSL failed for all protocols
|
||||||
@@ -714,17 +826,21 @@ class TargetTestJob(object):
|
|||||||
_dbg('SSL failed, trying secondary check: %s' % self.checktype, ps.proxy)
|
_dbg('SSL failed, trying secondary check: %s' % self.checktype, ps.proxy)
|
||||||
|
|
||||||
# Phase 2: Secondary check (configured checktype)
|
# Phase 2: Secondary check (configured checktype)
|
||||||
return self._try_secondary_check(protos, pool)
|
return self._try_secondary_check(protos, pool, ssl_reason)
|
||||||
|
|
||||||
def _try_ssl_handshake(self, protos, pool):
|
def _try_ssl_handshake(self, protos, pool):
|
||||||
"""Attempt SSL handshake to verify proxy works with TLS.
|
"""Attempt SSL handshake to verify proxy works with TLS.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple on success/MITM, None on failure (should try secondary check)
|
(result, ssl_reason) where result is a tuple on success/MITM
|
||||||
|
or None on failure, and ssl_reason is the last SSL error reason
|
||||||
|
string (for secondary check SSL/plain decision).
|
||||||
"""
|
"""
|
||||||
ps = self.proxy_state
|
ps = self.proxy_state
|
||||||
ssl_target = random.choice(ssl_targets)
|
available_ssl = ssl_target_stats.get_available(ssl_targets) or ssl_targets
|
||||||
|
ssl_target = random.choice(available_ssl)
|
||||||
last_error_category = None
|
last_error_category = None
|
||||||
|
last_ssl_reason = None
|
||||||
|
|
||||||
for proto in protos:
|
for proto in protos:
|
||||||
if pool:
|
if pool:
|
||||||
@@ -765,15 +881,22 @@ class TargetTestJob(object):
|
|||||||
elapsed = time.time() - duration
|
elapsed = time.time() - duration
|
||||||
if pool:
|
if pool:
|
||||||
pool.record_success(torhost, elapsed)
|
pool.record_success(torhost, elapsed)
|
||||||
|
ssl_target_stats.record_success(ssl_target)
|
||||||
sock.disconnect()
|
sock.disconnect()
|
||||||
_dbg('SSL handshake OK', ps.proxy)
|
_dbg('SSL handshake OK', ps.proxy)
|
||||||
return None, proto, duration, torhost, ssl_target, 0, 1, 'ssl_ok'
|
return (None, proto, duration, torhost, ssl_target, 0, 1, 'ssl_ok'), None
|
||||||
|
|
||||||
except rocksock.RocksockException as e:
|
except rocksock.RocksockException as e:
|
||||||
last_error_category = categorize_error(e)
|
last_error_category = categorize_error(e)
|
||||||
et = e.get_errortype()
|
et = e.get_errortype()
|
||||||
err = e.get_error()
|
err = e.get_error()
|
||||||
|
|
||||||
|
# Track SSL reason for secondary check decision
|
||||||
|
if et == rocksock.RS_ET_SSL:
|
||||||
|
reason = e.get_failedproxy()
|
||||||
|
if isinstance(reason, str):
|
||||||
|
last_ssl_reason = reason
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sock.disconnect()
|
sock.disconnect()
|
||||||
except:
|
except:
|
||||||
@@ -786,7 +909,7 @@ class TargetTestJob(object):
|
|||||||
if pool:
|
if pool:
|
||||||
pool.record_success(torhost, elapsed)
|
pool.record_success(torhost, elapsed)
|
||||||
_dbg('SSL MITM detected', ps.proxy)
|
_dbg('SSL MITM detected', ps.proxy)
|
||||||
return None, proto, duration, torhost, ssl_target, 0, 1, 'ssl_mitm'
|
return (None, proto, duration, torhost, ssl_target, 0, 1, 'ssl_mitm'), None
|
||||||
|
|
||||||
if config.watchd.debug:
|
if config.watchd.debug:
|
||||||
_log('SSL handshake failed: %s://%s:%d: %s' % (
|
_log('SSL handshake failed: %s://%s:%d: %s' % (
|
||||||
@@ -794,18 +917,32 @@ class TargetTestJob(object):
|
|||||||
|
|
||||||
# Check for Tor connection issues
|
# Check for Tor connection issues
|
||||||
if et == rocksock.RS_ET_OWN:
|
if et == rocksock.RS_ET_OWN:
|
||||||
if e.get_failedproxy() == 0 and err == rocksock.RS_E_TARGET_CONN_REFUSED:
|
fp = e.get_failedproxy()
|
||||||
|
if fp == 0 and err == rocksock.RS_E_TARGET_CONN_REFUSED:
|
||||||
if pool:
|
if pool:
|
||||||
pool.record_failure(torhost)
|
pool.record_failure(torhost)
|
||||||
|
elif fp == 1 and (err == rocksock.RS_E_REMOTE_DISCONNECTED or
|
||||||
|
err == rocksock.RS_E_HIT_TIMEOUT):
|
||||||
|
# Target-side failure
|
||||||
|
ssl_target_stats.record_failure(ssl_target)
|
||||||
|
elif et == rocksock.RS_ET_GAI:
|
||||||
|
# DNS failure -- target unresolvable
|
||||||
|
ssl_target_stats.record_block(ssl_target)
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
# All protocols failed SSL
|
# All protocols failed SSL
|
||||||
return None
|
return None, last_ssl_reason
|
||||||
|
|
||||||
def _try_secondary_check(self, protos, pool):
|
def _try_secondary_check(self, protos, pool, ssl_reason=None):
|
||||||
"""Try the configured secondary checktype (head, judges, irc)."""
|
"""Try the configured secondary checktype (head, judges, irc).
|
||||||
|
|
||||||
|
ssl_reason: last SSL error reason from _try_ssl_handshake, used to
|
||||||
|
decide whether to use SSL or plain HTTP for the secondary check.
|
||||||
|
Protocol errors (proxy doesn't speak TLS) -> plain HTTP.
|
||||||
|
Other errors (cert, timeout, etc.) -> SSL without cert verification.
|
||||||
|
"""
|
||||||
ps = self.proxy_state
|
ps = self.proxy_state
|
||||||
_sample_dbg('TEST START: proxy=%s target=%s check=%s' % (
|
_sample_dbg('TEST START: proxy=%s target=%s check=%s' % (
|
||||||
ps.proxy, self.target_srv, self.checktype), ps.proxy)
|
ps.proxy, self.target_srv, self.checktype), ps.proxy)
|
||||||
@@ -817,13 +954,26 @@ class TargetTestJob(object):
|
|||||||
else:
|
else:
|
||||||
connect_host = srvname
|
connect_host = srvname
|
||||||
|
|
||||||
# Secondary checks: always use plain HTTP
|
# Decide SSL based on why the primary handshake failed:
|
||||||
use_ssl = 0
|
# - protocol error (proxy can't TLS) -> plain HTTP
|
||||||
|
# - other error (cert, timeout) -> SSL without cert verification
|
||||||
|
# - no ssl_reason (ssl_first off) -> plain HTTP (no prior info)
|
||||||
|
protocol_error = is_ssl_protocol_error(ssl_reason) if ssl_reason else True
|
||||||
verifycert = False
|
verifycert = False
|
||||||
if self.checktype == 'irc':
|
if protocol_error:
|
||||||
server_port = 6667
|
use_ssl = 0
|
||||||
|
if self.checktype == 'irc':
|
||||||
|
server_port = 6667
|
||||||
|
else:
|
||||||
|
server_port = 80
|
||||||
|
_dbg('secondary: plain (ssl protocol error)', ps.proxy)
|
||||||
else:
|
else:
|
||||||
server_port = 80
|
use_ssl = 1
|
||||||
|
if self.checktype == 'irc':
|
||||||
|
server_port = 6697
|
||||||
|
else:
|
||||||
|
server_port = 443
|
||||||
|
_dbg('secondary: ssl/no-verify (non-protocol ssl error)', ps.proxy)
|
||||||
|
|
||||||
last_error_category = None
|
last_error_category = None
|
||||||
|
|
||||||
@@ -899,6 +1049,11 @@ class TargetTestJob(object):
|
|||||||
if et == rocksock.RS_ET_OWN:
|
if et == rocksock.RS_ET_OWN:
|
||||||
if fp == 1 and (err == rocksock.RS_E_REMOTE_DISCONNECTED or
|
if fp == 1 and (err == rocksock.RS_E_REMOTE_DISCONNECTED or
|
||||||
err == rocksock.RS_E_HIT_TIMEOUT):
|
err == rocksock.RS_E_HIT_TIMEOUT):
|
||||||
|
# Target-side failure -- proxy reached target but it's down
|
||||||
|
if self.checktype == 'head':
|
||||||
|
head_target_stats.record_failure(srvname)
|
||||||
|
elif self.checktype == 'irc':
|
||||||
|
irc_target_stats.record_failure(srvname)
|
||||||
break
|
break
|
||||||
elif fp == 0 and err == rocksock.RS_E_TARGET_CONN_REFUSED:
|
elif fp == 0 and err == rocksock.RS_E_TARGET_CONN_REFUSED:
|
||||||
# Tor connection failed - record in pool
|
# Tor connection failed - record in pool
|
||||||
@@ -908,6 +1063,11 @@ class TargetTestJob(object):
|
|||||||
_log("could not connect to tor, sleep 5s", "ERROR")
|
_log("could not connect to tor, sleep 5s", "ERROR")
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
elif et == rocksock.RS_ET_GAI:
|
elif et == rocksock.RS_ET_GAI:
|
||||||
|
# DNS failure -- target hostname unresolvable (hard failure)
|
||||||
|
if self.checktype == 'head':
|
||||||
|
head_target_stats.record_block(connect_host)
|
||||||
|
elif self.checktype == 'irc':
|
||||||
|
irc_target_stats.record_block(srvname)
|
||||||
_log("could not resolve connection target %s" % connect_host, "ERROR")
|
_log("could not resolve connection target %s" % connect_host, "ERROR")
|
||||||
break
|
break
|
||||||
elif et == rocksock.RS_ET_SSL and err == rocksock.RS_E_SSL_CERTIFICATE_ERROR:
|
elif et == rocksock.RS_ET_SSL and err == rocksock.RS_E_SSL_CERTIFICATE_ERROR:
|
||||||
@@ -1132,16 +1292,17 @@ class VerificationThread(threading.Thread):
|
|||||||
dbs.update_worker_trust(db, worker_a, was_correct)
|
dbs.update_worker_trust(db, worker_a, was_correct)
|
||||||
|
|
||||||
# Update proxy status with authoritative result
|
# Update proxy status with authoritative result
|
||||||
|
now_int = int(time.time())
|
||||||
if result:
|
if result:
|
||||||
db.execute('''
|
db.execute('''
|
||||||
UPDATE proxylist SET failed = 0, tested = ?
|
UPDATE proxylist SET failed = 0, tested = ?, last_seen = ?
|
||||||
WHERE proxy = ?
|
WHERE proxy = ?
|
||||||
''', (int(time.time()), proxy))
|
''', (now_int, now_int, proxy))
|
||||||
else:
|
else:
|
||||||
db.execute('''
|
db.execute('''
|
||||||
UPDATE proxylist SET failed = failed + 1, tested = ?
|
UPDATE proxylist SET failed = failed + 1, tested = ?
|
||||||
WHERE proxy = ?
|
WHERE proxy = ?
|
||||||
''', (int(time.time()), proxy))
|
''', (now_int, proxy))
|
||||||
|
|
||||||
# Remove from verification queue
|
# Remove from verification queue
|
||||||
dbs.remove_from_verification_queue(db, proxy)
|
dbs.remove_from_verification_queue(db, proxy)
|
||||||
@@ -1380,7 +1541,7 @@ class Proxywatchd():
|
|||||||
_dbg('prepare_jobs: %d rows, checktypes=%s' % (len(rows), config.watchd.checktypes))
|
_dbg('prepare_jobs: %d rows, checktypes=%s' % (len(rows), config.watchd.checktypes))
|
||||||
checktypes = config.watchd.checktypes
|
checktypes = config.watchd.checktypes
|
||||||
|
|
||||||
# Build target pools for each checktype
|
# Build target pools for each checktype (filter out targets in cooldown)
|
||||||
target_pools = {}
|
target_pools = {}
|
||||||
for ct in checktypes:
|
for ct in checktypes:
|
||||||
if ct == 'none':
|
if ct == 'none':
|
||||||
@@ -1388,19 +1549,23 @@ class Proxywatchd():
|
|||||||
target_pools[ct] = ssl_targets
|
target_pools[ct] = ssl_targets
|
||||||
_dbg('target_pool[none]: SSL-only mode, %d ssl targets' % len(ssl_targets))
|
_dbg('target_pool[none]: SSL-only mode, %d ssl targets' % len(ssl_targets))
|
||||||
elif ct == 'irc':
|
elif ct == 'irc':
|
||||||
target_pools[ct] = config.servers
|
all_servers = config.servers
|
||||||
_dbg('target_pool[irc]: %d servers' % len(config.servers))
|
available = irc_target_stats.get_available(all_servers)
|
||||||
|
target_pools[ct] = available if available else all_servers
|
||||||
|
_dbg('target_pool[irc]: %d/%d servers available' % (len(target_pools[ct]), len(all_servers)))
|
||||||
elif ct == 'judges':
|
elif ct == 'judges':
|
||||||
# Filter out judges in cooldown (blocked/rate-limited)
|
|
||||||
all_judges = list(judges.keys())
|
all_judges = list(judges.keys())
|
||||||
available = judge_stats.get_available_judges(all_judges)
|
available = judge_stats.get_available(all_judges)
|
||||||
target_pools[ct] = available if available else all_judges
|
target_pools[ct] = available if available else all_judges
|
||||||
elif ct == 'ssl':
|
elif ct == 'ssl':
|
||||||
target_pools[ct] = ssl_targets
|
available = ssl_target_stats.get_available(ssl_targets)
|
||||||
_dbg('target_pool[ssl]: %d targets' % len(ssl_targets))
|
target_pools[ct] = available if available else ssl_targets
|
||||||
|
_dbg('target_pool[ssl]: %d/%d targets available' % (len(target_pools[ct]), len(ssl_targets)))
|
||||||
else: # head
|
else: # head
|
||||||
target_pools[ct] = list(regexes.keys())
|
all_targets = list(regexes.keys())
|
||||||
_dbg('target_pool[%s]: %d targets' % (ct, len(regexes)))
|
available = head_target_stats.get_available(all_targets)
|
||||||
|
target_pools[ct] = available if available else all_targets
|
||||||
|
_dbg('target_pool[%s]: %d/%d targets available' % (ct, len(target_pools[ct]), len(all_targets)))
|
||||||
|
|
||||||
# create all jobs first, then shuffle for interleaving
|
# create all jobs first, then shuffle for interleaving
|
||||||
all_jobs = []
|
all_jobs = []
|
||||||
@@ -1519,7 +1684,8 @@ class Proxywatchd():
|
|||||||
dead_count += 1
|
dead_count += 1
|
||||||
args.append((effective_failcount, job.checktime, 1, job.country, job.proto,
|
args.append((effective_failcount, job.checktime, 1, job.country, job.proto,
|
||||||
job.success_count, job.total_duration, job.mitm,
|
job.success_count, job.total_duration, job.mitm,
|
||||||
job.consecutive_success, job.asn, job.protos_working, job.proxy))
|
job.consecutive_success, job.asn, job.protos_working,
|
||||||
|
job.last_check, job.last_target, effective_failcount, job.proxy))
|
||||||
|
|
||||||
success_rate = (float(sc) / len(self.collected)) * 100
|
success_rate = (float(sc) / len(self.collected)) * 100
|
||||||
ret = True
|
ret = True
|
||||||
@@ -1533,7 +1699,8 @@ class Proxywatchd():
|
|||||||
if job.failcount == 0:
|
if job.failcount == 0:
|
||||||
args.append((job.failcount, job.checktime, 1, job.country, job.proto,
|
args.append((job.failcount, job.checktime, 1, job.country, job.proto,
|
||||||
job.success_count, job.total_duration, job.mitm,
|
job.success_count, job.total_duration, job.mitm,
|
||||||
job.consecutive_success, job.asn, job.protos_working, job.proxy))
|
job.consecutive_success, job.asn, job.protos_working,
|
||||||
|
job.last_check, job.last_target, job.failcount, job.proxy))
|
||||||
if job.last_latency_ms is not None:
|
if job.last_latency_ms is not None:
|
||||||
latency_updates.append((job.proxy, job.last_latency_ms))
|
latency_updates.append((job.proxy, job.last_latency_ms))
|
||||||
ret = False
|
ret = False
|
||||||
@@ -1549,9 +1716,20 @@ class Proxywatchd():
|
|||||||
for job in self.collected
|
for job in self.collected
|
||||||
if job.failcount == 0 and job.exit_ip]
|
if job.failcount == 0 and job.exit_ip]
|
||||||
|
|
||||||
|
# Separate dead proxies for deletion
|
||||||
|
dead_proxies = [a[-1] for a in args if a[0] == DEAD_PROXY or a[0] >= max_fail]
|
||||||
|
live_args = [a for a in args if a[0] != DEAD_PROXY and a[0] < max_fail]
|
||||||
|
|
||||||
with self._db_context() as db:
|
with self._db_context() as db:
|
||||||
query = 'UPDATE proxylist SET failed=?,tested=?,dronebl=?,country=?,proto=?,success_count=?,total_duration=?,mitm=?,consecutive_success=?,asn=?,protos_working=? WHERE proxy=?'
|
query = 'UPDATE proxylist SET failed=?,tested=?,dronebl=?,country=?,proto=?,success_count=?,total_duration=?,mitm=?,consecutive_success=?,asn=?,protos_working=?,last_check=?,last_target=?,last_seen=CASE WHEN ?=0 THEN strftime("%s","now") ELSE last_seen END WHERE proxy=?'
|
||||||
db.executemany(query, args)
|
if live_args:
|
||||||
|
db.executemany(query, live_args)
|
||||||
|
|
||||||
|
# Delete proxies that reached max_fail
|
||||||
|
if dead_proxies:
|
||||||
|
db.executemany('DELETE FROM proxylist WHERE proxy=?',
|
||||||
|
[(p,) for p in dead_proxies])
|
||||||
|
_log('deleted %d dead proxies' % len(dead_proxies), 'watchd')
|
||||||
|
|
||||||
# Batch update latency metrics for successful proxies
|
# Batch update latency metrics for successful proxies
|
||||||
if latency_updates:
|
if latency_updates:
|
||||||
@@ -1713,15 +1891,25 @@ class Proxywatchd():
|
|||||||
# Judge stats (when using judges checktype)
|
# Judge stats (when using judges checktype)
|
||||||
if 'judges' in config.watchd.checktypes:
|
if 'judges' in config.watchd.checktypes:
|
||||||
js = judge_stats.get_stats()
|
js = judge_stats.get_stats()
|
||||||
|
# Remap 'target' -> 'judge' for dashboard compatibility
|
||||||
|
top = [dict(j, judge=j['target']) for j in js.get('top', [])[:5]]
|
||||||
stats_data['judges'] = {
|
stats_data['judges'] = {
|
||||||
'total': js.get('total', 0),
|
'total': js.get('total', 0),
|
||||||
'available': js.get('available', 0),
|
'available': js.get('available', 0),
|
||||||
'in_cooldown': js.get('in_cooldown', 0),
|
'in_cooldown': js.get('in_cooldown', 0),
|
||||||
'top_judges': js.get('top', [])[:5] # top 5 most successful
|
'top_judges': top,
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
stats_data['judges'] = None
|
stats_data['judges'] = None
|
||||||
|
|
||||||
|
# Target health stats (all target pools)
|
||||||
|
stats_data['target_health'] = {
|
||||||
|
'head': head_target_stats.get_stats(),
|
||||||
|
'ssl': ssl_target_stats.get_stats(),
|
||||||
|
'irc': irc_target_stats.get_stats(),
|
||||||
|
'judges': judge_stats.get_stats(),
|
||||||
|
}
|
||||||
|
|
||||||
# Scraper/engine stats
|
# Scraper/engine stats
|
||||||
if scraper_available:
|
if scraper_available:
|
||||||
scraper_stats = scraper_module.get_scraper_stats()
|
scraper_stats = scraper_module.get_scraper_stats()
|
||||||
|
|||||||
38
rocksock.py
38
rocksock.py
@@ -21,6 +21,25 @@
|
|||||||
import socket, ssl, select, copy, errno
|
import socket, ssl, select, copy, errno
|
||||||
import network_stats
|
import network_stats
|
||||||
|
|
||||||
|
# Cached SSL contexts -- avoids reloading CA store from disk on every connection
|
||||||
|
_ssl_ctx_noverify = None
|
||||||
|
_ssl_ctx_verify = None
|
||||||
|
|
||||||
|
def _get_ssl_context(verifycert=False):
|
||||||
|
global _ssl_ctx_noverify, _ssl_ctx_verify
|
||||||
|
if verifycert:
|
||||||
|
if _ssl_ctx_verify is None:
|
||||||
|
_ssl_ctx_verify = ssl.create_default_context()
|
||||||
|
_ssl_ctx_verify.check_hostname = True
|
||||||
|
_ssl_ctx_verify.verify_mode = ssl.CERT_OPTIONAL
|
||||||
|
return _ssl_ctx_verify
|
||||||
|
else:
|
||||||
|
if _ssl_ctx_noverify is None:
|
||||||
|
_ssl_ctx_noverify = ssl.create_default_context()
|
||||||
|
_ssl_ctx_noverify.check_hostname = False
|
||||||
|
_ssl_ctx_noverify.verify_mode = ssl.CERT_NONE
|
||||||
|
return _ssl_ctx_noverify
|
||||||
|
|
||||||
# rs_proxyType
|
# rs_proxyType
|
||||||
RS_PT_NONE = 0
|
RS_PT_NONE = 0
|
||||||
RS_PT_SOCKS4 = 1
|
RS_PT_SOCKS4 = 1
|
||||||
@@ -210,12 +229,7 @@ def RocksockProxyFromURL(url):
|
|||||||
class Rocksock():
|
class Rocksock():
|
||||||
def __init__(self, host=None, port=0, verifycert=False, timeout=0, proxies=None, **kwargs):
|
def __init__(self, host=None, port=0, verifycert=False, timeout=0, proxies=None, **kwargs):
|
||||||
if 'ssl' in kwargs and kwargs['ssl'] == True:
|
if 'ssl' in kwargs and kwargs['ssl'] == True:
|
||||||
self.sslcontext = ssl.create_default_context()
|
self.sslcontext = _get_ssl_context(verifycert)
|
||||||
self.sslcontext.check_hostname = False
|
|
||||||
self.sslcontext.verify_mode = ssl.CERT_NONE
|
|
||||||
if verifycert:
|
|
||||||
self.sslcontext.verify_mode = ssl.CERT_OPTIONAL
|
|
||||||
self.sslcontext.check_hostname = True
|
|
||||||
else:
|
else:
|
||||||
self.sslcontext = None
|
self.sslcontext = None
|
||||||
self.proxychain = []
|
self.proxychain = []
|
||||||
@@ -228,6 +242,7 @@ class Rocksock():
|
|||||||
target = RocksockProxy(host, port, RS_PT_NONE)
|
target = RocksockProxy(host, port, RS_PT_NONE)
|
||||||
self.proxychain.append(target)
|
self.proxychain.append(target)
|
||||||
self.sock = None
|
self.sock = None
|
||||||
|
self._connected = False
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
|
|
||||||
def _translate_socket_error(self, e, pnum):
|
def _translate_socket_error(self, e, pnum):
|
||||||
@@ -288,15 +303,18 @@ class Rocksock():
|
|||||||
select.select([], [self.sock], [])
|
select.select([], [self.sock], [])
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
self._connected = True
|
||||||
|
|
||||||
def disconnect(self):
|
def disconnect(self):
|
||||||
if self.sock is None: return
|
if self.sock is None: return
|
||||||
try:
|
if self._connected:
|
||||||
self.sock.shutdown(socket.SHUT_RDWR)
|
try:
|
||||||
except socket.error:
|
self.sock.shutdown(socket.SHUT_RDWR)
|
||||||
pass
|
except socket.error:
|
||||||
|
pass
|
||||||
self.sock.close()
|
self.sock.close()
|
||||||
self.sock = None
|
self.sock = None
|
||||||
|
self._connected = False
|
||||||
|
|
||||||
def canread(self):
|
def canread(self):
|
||||||
return select.select([self.sock], [], [], 0)[0]
|
return select.select([self.sock], [], [], 0)[0]
|
||||||
|
|||||||
123
servers.txt
123
servers.txt
@@ -1,69 +1,132 @@
|
|||||||
irc.2600.net
|
irc.abjects.net
|
||||||
irc.Undernet.Org
|
irc.afternet.org
|
||||||
|
irc.allnetwork.org
|
||||||
|
irc.alphachat.net
|
||||||
|
irc.atrum.org
|
||||||
|
irc.austnet.org
|
||||||
|
irc.axon.pw
|
||||||
|
irc.ayochat.or.id
|
||||||
|
irc.azzurra.chat
|
||||||
|
irc.beyondirc.net
|
||||||
|
irc.bolchat.com
|
||||||
|
irc.brasirc.com.br
|
||||||
|
irc.canternet.org
|
||||||
irc.chat4all.org
|
irc.chat4all.org
|
||||||
irc.chatspike.net
|
irc.chatspike.net
|
||||||
irc.choopa.net
|
irc.chatzona.org
|
||||||
irc.coldfront.net
|
irc.cncirc.net
|
||||||
irc.cyberarmy.net
|
irc.coolsmile.net
|
||||||
|
irc.d-t-net.de
|
||||||
irc.dal.net
|
irc.dal.net
|
||||||
|
irc.darenet.org
|
||||||
|
irc.darkfasel.net
|
||||||
irc.darkmyst.org
|
irc.darkmyst.org
|
||||||
|
irc.darkscience.net
|
||||||
|
irc.darkworld.network
|
||||||
irc.data.lt
|
irc.data.lt
|
||||||
irc.drlnet.com
|
irc.dejatoons.net
|
||||||
irc.dynastynet.net
|
irc.desirenet.org
|
||||||
irc.ecnet.org
|
irc.ecnet.org
|
||||||
irc.efnet.org
|
irc.efnet.org
|
||||||
irc.efnet.pl
|
irc.epiknet.org
|
||||||
irc.enterthegame.com
|
|
||||||
irc.esper.net
|
irc.esper.net
|
||||||
irc.eu.dal.net
|
|
||||||
irc.eu.gamesurge.net
|
|
||||||
irc.euirc.net
|
irc.euirc.net
|
||||||
irc.europnet.org
|
irc.europnet.org
|
||||||
irc.eversible.com
|
irc.evolu.net
|
||||||
|
irc.explosionirc.net
|
||||||
irc.fdfnet.net
|
irc.fdfnet.net
|
||||||
irc.fef.net
|
irc.fef.net
|
||||||
|
irc.financialchat.com
|
||||||
|
irc.forestnet.org
|
||||||
|
irc.freeunibg.eu
|
||||||
irc.gamesurge.net
|
irc.gamesurge.net
|
||||||
|
irc.geeknode.org
|
||||||
irc.geekshed.net
|
irc.geekshed.net
|
||||||
irc.german-freakz.net
|
irc.german-elite.net
|
||||||
|
irc.gigairc.net
|
||||||
|
irc.gimp.org
|
||||||
irc.globalgamers.net
|
irc.globalgamers.net
|
||||||
irc.greekirc.net
|
irc.goodchatting.com
|
||||||
|
irc.hackint.org
|
||||||
|
irc.hybridirc.com
|
||||||
|
irc.icq-chat.com
|
||||||
|
irc.immortal-anime.net
|
||||||
|
irc.indymedia.org
|
||||||
|
irc.irc-hispano.org
|
||||||
|
irc.irc2.hu
|
||||||
|
irc.irc4fun.net
|
||||||
|
irc.ircgate.it
|
||||||
|
irc.irchighway.net
|
||||||
|
irc.ircsource.net
|
||||||
irc.irctoo.net
|
irc.irctoo.net
|
||||||
irc.ircube.org
|
irc.ircube.org
|
||||||
|
irc.ircworld.org
|
||||||
|
irc.irdsi.net
|
||||||
|
irc.kampungchat.org
|
||||||
irc.knightirc.net
|
irc.knightirc.net
|
||||||
|
irc.krey.net
|
||||||
irc.krono.net
|
irc.krono.net
|
||||||
irc.langochat.net
|
irc.krstarica.com
|
||||||
|
irc.libera.chat
|
||||||
|
irc.librairc.net
|
||||||
irc.lichtsnel.nl
|
irc.lichtsnel.nl
|
||||||
|
irc.link-net.be
|
||||||
|
irc.lt-tech.org
|
||||||
|
irc.luatic.net
|
||||||
irc.maddshark.net
|
irc.maddshark.net
|
||||||
irc.newnet.net
|
irc.magicstar.net
|
||||||
|
irc.mibbit.net
|
||||||
|
irc.mindforge.org
|
||||||
|
irc.nationchat.org
|
||||||
irc.nightstar.net
|
irc.nightstar.net
|
||||||
|
irc.nullirc.net
|
||||||
irc.oftc.net
|
irc.oftc.net
|
||||||
irc.onlinegamesnet.net
|
irc.oltreirc.net
|
||||||
irc.othernet.org
|
irc.openjoke.org
|
||||||
irc.otherworlders.org
|
irc.orixon.org
|
||||||
irc.oz.org
|
irc.oz.org
|
||||||
irc.p2pchat.net
|
irc.p2p-network.net
|
||||||
|
irc.perl.org
|
||||||
irc.phat-net.de
|
irc.phat-net.de
|
||||||
|
irc.pirc.pl
|
||||||
irc.ptnet.org
|
irc.ptnet.org
|
||||||
|
irc.quakenet.org
|
||||||
|
irc.recycled-irc.net
|
||||||
|
irc.retroit.org
|
||||||
irc.rezosup.org
|
irc.rezosup.org
|
||||||
irc.rizon.net
|
irc.rizon.net
|
||||||
|
irc.rusnet.org.ru
|
||||||
irc.scarynet.org
|
irc.scarynet.org
|
||||||
irc.serenia.net
|
irc.scuttled.net
|
||||||
irc.serenity-irc.net
|
irc.serenity-irc.net
|
||||||
irc.servercentral.net
|
|
||||||
irc.shadowfire.org
|
irc.shadowfire.org
|
||||||
irc.shadowworld.net
|
irc.shadowworld.net
|
||||||
|
irc.simosnap.com
|
||||||
|
irc.skychatz.org
|
||||||
|
irc.skyrock.net
|
||||||
|
irc.slacknet.org
|
||||||
irc.slashnet.org
|
irc.slashnet.org
|
||||||
irc.snt.utwente.nl
|
irc.smurfnet.ch
|
||||||
|
irc.snoonet.org
|
||||||
irc.sorcery.net
|
irc.sorcery.net
|
||||||
irc.spacetronix.net
|
irc.spotchat.org
|
||||||
irc.st-city.net
|
irc.st-city.net
|
||||||
irc.starlink-irc.org
|
irc.starlink-irc.org
|
||||||
irc.starlink.org
|
irc.starlink.org
|
||||||
|
irc.staynet.org
|
||||||
|
irc.stormbit.net
|
||||||
irc.swiftirc.net
|
irc.swiftirc.net
|
||||||
irc.teranova.net
|
irc.synirc.net
|
||||||
irc.us.dal.net
|
irc.technet.chat
|
||||||
irc.us.gamesurge.net
|
irc.tilde.chat
|
||||||
|
irc.tweakers.net
|
||||||
|
irc.undernet.org
|
||||||
|
irc.undermind.net
|
||||||
|
irc.wenet.ru
|
||||||
|
irc.whatnet.org
|
||||||
|
irc.wixchat.org
|
||||||
|
irc.worldirc.org
|
||||||
|
irc.xertion.org
|
||||||
irc.xevion.net
|
irc.xevion.net
|
||||||
irc.zerofuzion.net
|
open.ircnet.net
|
||||||
uk.quakenet.org
|
ssl.bongster.de
|
||||||
us.quakenet.org
|
|
||||||
|
|||||||
83
stats.py
83
stats.py
@@ -14,60 +14,64 @@ def try_div(a, b):
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
class JudgeStats():
|
class TargetStats():
|
||||||
"""Track per-judge success/failure rates for reliability scoring.
|
"""Track per-target success/failure rates with cooldown.
|
||||||
|
|
||||||
Judges that frequently block or rate-limit are temporarily avoided.
|
Targets that frequently block or fail are temporarily avoided.
|
||||||
Stats decay over time to allow recovery.
|
Block counters reset on success or cooldown expiry.
|
||||||
|
|
||||||
|
Used for all target pools: judges, head targets, SSL targets, IRC servers.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, cooldown_seconds=300, block_threshold=3):
|
def __init__(self, cooldown_seconds=300, block_threshold=3):
|
||||||
self.lock = threading.Lock()
|
self.lock = threading.Lock()
|
||||||
self.stats = {} # judge -> {'success': n, 'fail': n, 'block': n, 'last_block': timestamp}
|
self.stats = {} # target -> {'success': n, 'fail': n, 'block': n, 'last_block': timestamp}
|
||||||
self.cooldown_seconds = cooldown_seconds # seconds to avoid blocked judges
|
self.cooldown_seconds = cooldown_seconds
|
||||||
self.block_threshold = block_threshold # consecutive blocks before cooldown
|
self.block_threshold = block_threshold
|
||||||
|
|
||||||
def record_success(self, judge):
|
def _ensure(self, target):
|
||||||
"""Record successful judge response."""
|
if target not in self.stats:
|
||||||
with self.lock:
|
self.stats[target] = {'success': 0, 'fail': 0, 'block': 0, 'last_block': 0}
|
||||||
if judge not in self.stats:
|
|
||||||
self.stats[judge] = {'success': 0, 'fail': 0, 'block': 0, 'last_block': 0}
|
|
||||||
self.stats[judge]['success'] += 1
|
|
||||||
# Reset block count on success
|
|
||||||
self.stats[judge]['block'] = 0
|
|
||||||
|
|
||||||
def record_failure(self, judge):
|
def record_success(self, target):
|
||||||
"""Record judge failure (proxy failed, not judge block)."""
|
"""Record successful target response."""
|
||||||
with self.lock:
|
with self.lock:
|
||||||
if judge not in self.stats:
|
self._ensure(target)
|
||||||
self.stats[judge] = {'success': 0, 'fail': 0, 'block': 0, 'last_block': 0}
|
self.stats[target]['success'] += 1
|
||||||
self.stats[judge]['fail'] += 1
|
self.stats[target]['block'] = 0
|
||||||
|
|
||||||
def record_block(self, judge):
|
def record_failure(self, target):
|
||||||
"""Record judge blocking the proxy (403, captcha, rate-limit)."""
|
"""Record target failure (soft -- doesn't trigger cooldown)."""
|
||||||
with self.lock:
|
with self.lock:
|
||||||
if judge not in self.stats:
|
self._ensure(target)
|
||||||
self.stats[judge] = {'success': 0, 'fail': 0, 'block': 0, 'last_block': 0}
|
self.stats[target]['fail'] += 1
|
||||||
self.stats[judge]['block'] += 1
|
|
||||||
self.stats[judge]['last_block'] = time.time()
|
|
||||||
|
|
||||||
def is_available(self, judge):
|
def record_block(self, target):
|
||||||
"""Check if judge is available (not in cooldown)."""
|
"""Record target block (403, captcha, DNS failure, rate-limit)."""
|
||||||
with self.lock:
|
with self.lock:
|
||||||
if judge not in self.stats:
|
self._ensure(target)
|
||||||
|
self.stats[target]['block'] += 1
|
||||||
|
self.stats[target]['last_block'] = time.time()
|
||||||
|
|
||||||
|
def is_available(self, target):
|
||||||
|
"""Check if target is available (not in cooldown)."""
|
||||||
|
with self.lock:
|
||||||
|
if target not in self.stats:
|
||||||
return True
|
return True
|
||||||
s = self.stats[judge]
|
s = self.stats[target]
|
||||||
# Check if in cooldown period
|
|
||||||
if s['block'] >= self.block_threshold:
|
if s['block'] >= self.block_threshold:
|
||||||
if (time.time() - s['last_block']) < self.cooldown_seconds:
|
if (time.time() - s['last_block']) < self.cooldown_seconds:
|
||||||
return False
|
return False
|
||||||
# Cooldown expired, reset block count
|
|
||||||
s['block'] = 0
|
s['block'] = 0
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def get_available(self, target_list):
|
||||||
|
"""Return targets not in cooldown."""
|
||||||
|
return [t for t in target_list if self.is_available(t)]
|
||||||
|
|
||||||
def get_available_judges(self, judge_list):
|
def get_available_judges(self, judge_list):
|
||||||
"""Return list of judges not in cooldown."""
|
"""Compat alias for get_available()."""
|
||||||
return [j for j in judge_list if self.is_available(j)]
|
return self.get_available(judge_list)
|
||||||
|
|
||||||
def status_line(self):
|
def status_line(self):
|
||||||
"""Return status summary for logging."""
|
"""Return status summary for logging."""
|
||||||
@@ -76,7 +80,7 @@ class JudgeStats():
|
|||||||
blocked = sum(1 for s in self.stats.values()
|
blocked = sum(1 for s in self.stats.values()
|
||||||
if s['block'] >= self.block_threshold and
|
if s['block'] >= self.block_threshold and
|
||||||
(time.time() - s['last_block']) < self.cooldown_seconds)
|
(time.time() - s['last_block']) < self.cooldown_seconds)
|
||||||
return 'judges: %d total, %d in cooldown' % (total, blocked)
|
return '%d total, %d in cooldown' % (total, blocked)
|
||||||
|
|
||||||
def get_stats(self):
|
def get_stats(self):
|
||||||
"""Return statistics dict for API/dashboard."""
|
"""Return statistics dict for API/dashboard."""
|
||||||
@@ -87,18 +91,21 @@ class JudgeStats():
|
|||||||
if s['block'] >= self.block_threshold and
|
if s['block'] >= self.block_threshold and
|
||||||
(now - s['last_block']) < self.cooldown_seconds)
|
(now - s['last_block']) < self.cooldown_seconds)
|
||||||
available = total - in_cooldown
|
available = total - in_cooldown
|
||||||
# Get top judges by success count
|
|
||||||
top = []
|
top = []
|
||||||
for judge, s in self.stats.items():
|
for target, s in self.stats.items():
|
||||||
total_tests = s['success'] + s['fail']
|
total_tests = s['success'] + s['fail']
|
||||||
if total_tests > 0:
|
if total_tests > 0:
|
||||||
success_pct = (s['success'] * 100.0) / total_tests
|
success_pct = (s['success'] * 100.0) / total_tests
|
||||||
top.append({'judge': judge, 'success': s['success'],
|
top.append({'target': target, 'success': s['success'],
|
||||||
'tests': total_tests, 'rate': round(success_pct, 1)})
|
'tests': total_tests, 'rate': round(success_pct, 1)})
|
||||||
top.sort(key=lambda x: x['success'], reverse=True)
|
top.sort(key=lambda x: x['success'], reverse=True)
|
||||||
return {'total': total, 'available': available, 'in_cooldown': in_cooldown, 'top': top}
|
return {'total': total, 'available': available, 'in_cooldown': in_cooldown, 'top': top}
|
||||||
|
|
||||||
|
|
||||||
|
# Backwards-compatible alias
|
||||||
|
JudgeStats = TargetStats
|
||||||
|
|
||||||
|
|
||||||
# HTTP targets - check for specific headers
|
# HTTP targets - check for specific headers
|
||||||
regexes = {
|
regexes = {
|
||||||
'www.facebook.com': 'X-FB-Debug',
|
'www.facebook.com': 'X-FB-Debug',
|
||||||
|
|||||||
@@ -359,6 +359,198 @@ class TestExtractAuthProxies:
|
|||||||
assert fetch.extract_auth_proxies('just some text') == []
|
assert fetch.extract_auth_proxies('just some text') == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractAuthProxiesShortCircuit:
|
||||||
|
"""Tests for extract_auth_proxies() short-circuit on missing @."""
|
||||||
|
|
||||||
|
def test_no_at_sign_returns_empty(self):
|
||||||
|
"""Content without @ skips regex entirely."""
|
||||||
|
content = '1.2.3.4:8080 socks5://5.6.7.8:1080 plain text'
|
||||||
|
assert fetch.extract_auth_proxies(content) == []
|
||||||
|
|
||||||
|
def test_at_sign_still_extracts(self):
|
||||||
|
"""Content with @ still finds auth proxies."""
|
||||||
|
content = 'user:pass@1.2.3.4:8080'
|
||||||
|
result = fetch.extract_auth_proxies(content)
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0][0] == 'user:pass@1.2.3.4:8080'
|
||||||
|
|
||||||
|
def test_at_sign_no_match_returns_empty(self):
|
||||||
|
"""Content with @ but no auth proxy pattern returns empty."""
|
||||||
|
content = 'email@example.com has no proxy'
|
||||||
|
assert fetch.extract_auth_proxies(content) == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractProxiesFromTable:
|
||||||
|
"""Tests for extract_proxies_from_table() with precompiled regexes."""
|
||||||
|
|
||||||
|
def test_no_table_returns_empty(self):
|
||||||
|
"""Plain text without <table> returns empty."""
|
||||||
|
content = '1.2.3.4:8080\n5.6.7.8:3128\n'
|
||||||
|
assert fetch.extract_proxies_from_table(content) == []
|
||||||
|
|
||||||
|
def test_simple_table(self):
|
||||||
|
"""Basic HTML table with IP/Port columns is parsed."""
|
||||||
|
content = '''
|
||||||
|
<table>
|
||||||
|
<tr><th>IP</th><th>Port</th><th>Type</th></tr>
|
||||||
|
<tr><td>1.2.3.4</td><td>8080</td><td>HTTP</td></tr>
|
||||||
|
<tr><td>5.6.7.8</td><td>1080</td><td>SOCKS5</td></tr>
|
||||||
|
</table>
|
||||||
|
'''
|
||||||
|
result = fetch.extract_proxies_from_table(content)
|
||||||
|
assert len(result) == 2
|
||||||
|
addrs = [r[0] for r in result]
|
||||||
|
assert '1.2.3.4:8080' in addrs
|
||||||
|
assert '5.6.7.8:1080' in addrs
|
||||||
|
|
||||||
|
def test_uppercase_table_tag(self):
|
||||||
|
"""<TABLE> (uppercase) is also detected."""
|
||||||
|
content = '''
|
||||||
|
<TABLE>
|
||||||
|
<TR><TH>IP</TH><TH>Port</TH></TR>
|
||||||
|
<TR><TD>1.2.3.4</TD><TD>8080</TD></TR>
|
||||||
|
</TABLE>
|
||||||
|
'''
|
||||||
|
result = fetch.extract_proxies_from_table(content)
|
||||||
|
assert len(result) == 1
|
||||||
|
|
||||||
|
def test_empty_table(self):
|
||||||
|
"""Table with headers but no data rows returns empty."""
|
||||||
|
content = '''
|
||||||
|
<table>
|
||||||
|
<tr><th>IP</th><th>Port</th></tr>
|
||||||
|
</table>
|
||||||
|
'''
|
||||||
|
result = fetch.extract_proxies_from_table(content)
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractProxiesFromJson:
|
||||||
|
"""Tests for extract_proxies_from_json() short-circuit."""
|
||||||
|
|
||||||
|
def test_no_braces_returns_empty(self):
|
||||||
|
"""Content without { or [ skips JSON parsing."""
|
||||||
|
content = '1.2.3.4:8080\n5.6.7.8:3128\n'
|
||||||
|
assert fetch.extract_proxies_from_json(content) == []
|
||||||
|
|
||||||
|
def test_json_array_of_objects(self):
|
||||||
|
"""JSON array with ip/port objects is parsed."""
|
||||||
|
content = '[{"ip": "1.2.3.4", "port": 8080}]'
|
||||||
|
result = fetch.extract_proxies_from_json(content)
|
||||||
|
assert len(result) >= 1
|
||||||
|
addrs = [r[0] for r in result]
|
||||||
|
assert '1.2.3.4:8080' in addrs
|
||||||
|
|
||||||
|
def test_json_array_of_strings(self):
|
||||||
|
"""JSON array of ip:port strings is parsed."""
|
||||||
|
content = '["1.2.3.4:8080", "5.6.7.8:3128"]'
|
||||||
|
result = fetch.extract_proxies_from_json(content)
|
||||||
|
addrs = [r[0] for r in result]
|
||||||
|
assert '1.2.3.4:8080' in addrs
|
||||||
|
assert '5.6.7.8:3128' in addrs
|
||||||
|
|
||||||
|
def test_plain_html_skips_json(self):
|
||||||
|
"""HTML without JSON delimiters returns empty."""
|
||||||
|
content = '<html><body>1.2.3.4:8080</body></html>'
|
||||||
|
# HTML has < and > but this function checks for { and [
|
||||||
|
# The < > chars won't trigger JSON parsing
|
||||||
|
result = fetch.extract_proxies_from_json(content)
|
||||||
|
# May or may not find anything depending on HTML structure
|
||||||
|
# but should not crash
|
||||||
|
assert isinstance(result, list)
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractProxiesWithHints:
|
||||||
|
"""Tests for extract_proxies_with_hints()."""
|
||||||
|
|
||||||
|
def test_proto_before_ip(self):
|
||||||
|
"""Protocol keyword before IP:PORT is detected."""
|
||||||
|
content = 'socks5 1.2.3.4:8080'
|
||||||
|
result = fetch.extract_proxies_with_hints(content)
|
||||||
|
assert '1.2.3.4:8080' in result
|
||||||
|
assert result['1.2.3.4:8080'] == 'socks5'
|
||||||
|
|
||||||
|
def test_proto_after_ip(self):
|
||||||
|
"""Protocol keyword after IP:PORT is detected."""
|
||||||
|
content = '1.2.3.4:8080 socks5'
|
||||||
|
result = fetch.extract_proxies_with_hints(content)
|
||||||
|
assert '1.2.3.4:8080' in result
|
||||||
|
|
||||||
|
def test_no_hints_returns_empty(self):
|
||||||
|
"""Plain IP:PORT without protocol hints returns empty."""
|
||||||
|
content = '1.2.3.4:8080'
|
||||||
|
result = fetch.extract_proxies_with_hints(content)
|
||||||
|
assert result == {}
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractProxiesIntegration:
|
||||||
|
"""Integration tests for extract_proxies() combining all extractors."""
|
||||||
|
|
||||||
|
def test_plain_text_proxy_list(self):
|
||||||
|
"""Plain text IP:PORT list extracts correctly."""
|
||||||
|
content = '1.2.3.4:8080\n5.6.7.8:3128\n9.10.11.12:1080\n'
|
||||||
|
result = fetch.extract_proxies(content, filter_known=False)
|
||||||
|
addrs = [r[0] for r in result]
|
||||||
|
assert '1.2.3.4:8080' in addrs
|
||||||
|
assert '5.6.7.8:3128' in addrs
|
||||||
|
assert '9.10.11.12:1080' in addrs
|
||||||
|
|
||||||
|
def test_auth_proxies_extracted(self):
|
||||||
|
"""Auth proxies found in mixed content."""
|
||||||
|
content = 'user:pass@1.2.3.4:8080\n5.6.7.8:3128\n'
|
||||||
|
result = fetch.extract_proxies(content, filter_known=False)
|
||||||
|
addrs = [r[0] for r in result]
|
||||||
|
assert 'user:pass@1.2.3.4:8080' in addrs
|
||||||
|
assert '5.6.7.8:3128' in addrs
|
||||||
|
|
||||||
|
def test_html_table_extraction(self):
|
||||||
|
"""Proxies extracted from HTML table."""
|
||||||
|
content = '''
|
||||||
|
<table>
|
||||||
|
<tr><th>IP</th><th>Port</th></tr>
|
||||||
|
<tr><td>1.2.3.4</td><td>8080</td></tr>
|
||||||
|
</table>
|
||||||
|
'''
|
||||||
|
result = fetch.extract_proxies(content, filter_known=False)
|
||||||
|
addrs = [r[0] for r in result]
|
||||||
|
assert '1.2.3.4:8080' in addrs
|
||||||
|
|
||||||
|
def test_json_extraction(self):
|
||||||
|
"""Proxies extracted from JSON content."""
|
||||||
|
content = '[{"ip": "1.2.3.4", "port": 8080}]'
|
||||||
|
result = fetch.extract_proxies(content, filter_known=False)
|
||||||
|
addrs = [r[0] for r in result]
|
||||||
|
assert '1.2.3.4:8080' in addrs
|
||||||
|
|
||||||
|
def test_empty_content(self):
|
||||||
|
"""Empty content returns no proxies."""
|
||||||
|
result = fetch.extract_proxies('', filter_known=False)
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
def test_private_ips_filtered(self):
|
||||||
|
"""Private IPs are not returned."""
|
||||||
|
content = '10.0.0.1:8080\n192.168.1.1:3128\n1.2.3.4:8080\n'
|
||||||
|
result = fetch.extract_proxies(content, filter_known=False)
|
||||||
|
addrs = [r[0] for r in result]
|
||||||
|
assert '10.0.0.1:8080' not in addrs
|
||||||
|
assert '192.168.1.1:3128' not in addrs
|
||||||
|
assert '1.2.3.4:8080' in addrs
|
||||||
|
|
||||||
|
def test_proto_from_hints(self):
|
||||||
|
"""Protocol hints are picked up."""
|
||||||
|
content = 'socks5 1.2.3.4:8080\n'
|
||||||
|
result = fetch.extract_proxies(content, filter_known=False)
|
||||||
|
protos = {r[0]: r[1] for r in result}
|
||||||
|
assert protos.get('1.2.3.4:8080') == 'socks5'
|
||||||
|
|
||||||
|
def test_proto_from_arg(self):
|
||||||
|
"""Fallback proto from argument is used."""
|
||||||
|
content = '1.2.3.4:8080\n'
|
||||||
|
result = fetch.extract_proxies(content, filter_known=False, proto='socks4')
|
||||||
|
protos = {r[0]: r[1] for r in result}
|
||||||
|
assert protos.get('1.2.3.4:8080') == 'socks4'
|
||||||
|
|
||||||
|
|
||||||
class TestConfidenceScoring:
|
class TestConfidenceScoring:
|
||||||
"""Tests for confidence score constants."""
|
"""Tests for confidence score constants."""
|
||||||
|
|
||||||
|
|||||||
170
tools/lib/ppf-common.sh
Normal file
170
tools/lib/ppf-common.sh
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# ppf-common.sh -- shared library for PPF operations toolkit
|
||||||
|
# Source this file; do not execute directly.
|
||||||
|
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Paths
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
PPF_DIR="${PPF_DIR:-$HOME/git/ppf}"
|
||||||
|
ANSIBLE_DIR="/opt/ansible"
|
||||||
|
ANSIBLE_VENV="${ANSIBLE_DIR}/venv/bin/activate"
|
||||||
|
PPF_INVENTORY="${PPF_DIR}/tools/playbooks/inventory.ini"
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Host topology
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
MASTER="odin"
|
||||||
|
WORKERS="cassius edge sentinel"
|
||||||
|
ALL_HOSTS="odin cassius edge sentinel"
|
||||||
|
|
||||||
|
# Container names per role
|
||||||
|
MASTER_CONTAINER="ppf"
|
||||||
|
WORKER_CONTAINER="ppf-worker"
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Colors (respects NO_COLOR -- https://no-color.org)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
if [ -z "${NO_COLOR:-}" ] && [ -t 1 ]; then
|
||||||
|
C_RST='\033[0m'
|
||||||
|
C_DIM='\033[2m'
|
||||||
|
C_BOLD='\033[1m'
|
||||||
|
C_RED='\033[38;5;167m'
|
||||||
|
C_GREEN='\033[38;5;114m'
|
||||||
|
C_YELLOW='\033[38;5;180m'
|
||||||
|
C_BLUE='\033[38;5;110m'
|
||||||
|
C_CYAN='\033[38;5;116m'
|
||||||
|
else
|
||||||
|
C_RST='' C_DIM='' C_BOLD='' C_RED='' C_GREEN=''
|
||||||
|
C_YELLOW='' C_BLUE='' C_CYAN=''
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Output helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
log_ok() { printf "${C_GREEN} ✓${C_RST} %s\n" "$*"; }
|
||||||
|
log_err() { printf "${C_RED} ✗${C_RST} %s\n" "$*" >&2; }
|
||||||
|
log_warn() { printf "${C_YELLOW} ⚠${C_RST} %s\n" "$*"; }
|
||||||
|
log_info() { printf "${C_BLUE} ●${C_RST} %s\n" "$*"; }
|
||||||
|
log_dim() { printf "${C_DIM} %s${C_RST}\n" "$*"; }
|
||||||
|
|
||||||
|
die() { log_err "$@"; exit 1; }
|
||||||
|
|
||||||
|
# Section header
|
||||||
|
section() {
|
||||||
|
printf "\n${C_BOLD}${C_CYAN} %s${C_RST}\n" "$*"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Host resolution helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
is_master() { [ "$1" = "$MASTER" ]; }
|
||||||
|
is_worker() {
|
||||||
|
local h
|
||||||
|
for h in $WORKERS; do [ "$h" = "$1" ] && return 0; done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
container_name() {
|
||||||
|
if is_master "$1"; then echo "$MASTER_CONTAINER"; else echo "$WORKER_CONTAINER"; fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Expand target aliases into host list
|
||||||
|
# "all" -> all hosts
|
||||||
|
# "workers" -> worker hosts
|
||||||
|
# "odin" -> just odin
|
||||||
|
# Multiple args are concatenated with comma
|
||||||
|
resolve_targets() {
|
||||||
|
local targets=""
|
||||||
|
local arg
|
||||||
|
for arg in "$@"; do
|
||||||
|
case "$arg" in
|
||||||
|
all) targets="${targets:+$targets }$ALL_HOSTS" ;;
|
||||||
|
workers) targets="${targets:+$targets }$WORKERS" ;;
|
||||||
|
master) targets="${targets:+$targets }$MASTER" ;;
|
||||||
|
*) targets="${targets:+$targets }$arg" ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
# Deduplicate while preserving order
|
||||||
|
echo "$targets" | tr ' ' '\n' | awk '!seen[$0]++' | tr '\n' ' ' | sed 's/ $//'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Convert space-separated host list to comma-separated for ansible
|
||||||
|
hosts_csv() {
|
||||||
|
echo "$*" | tr ' ' ','
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Ansible wrapper
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Runs ansible with toolkit inventory via venv.
|
||||||
|
# Usage: ansible_cmd <ansible args...>
|
||||||
|
ansible_cmd() {
|
||||||
|
(
|
||||||
|
# shellcheck disable=SC1090
|
||||||
|
. "$ANSIBLE_VENV"
|
||||||
|
cd "$ANSIBLE_DIR"
|
||||||
|
ansible -i "$PPF_INVENTORY" --become "$@"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Runs ansible-playbook with toolkit inventory via venv.
|
||||||
|
# Usage: ansible_playbook_cmd <ansible-playbook args...>
|
||||||
|
ansible_playbook_cmd() {
|
||||||
|
(
|
||||||
|
# shellcheck disable=SC1090
|
||||||
|
. "$ANSIBLE_VENV"
|
||||||
|
cd "$ANSIBLE_DIR"
|
||||||
|
ansible-playbook "$@"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Remote podman/compose wrappers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Run a podman command on a remote host as the podman user.
|
||||||
|
# Uses dynamic UID discovery.
|
||||||
|
# Usage: podman_cmd HOST "podman subcommand..."
|
||||||
|
podman_cmd() {
|
||||||
|
local host="$1"; shift
|
||||||
|
local cmd="$*"
|
||||||
|
ansible_cmd "$host" -m raw -a \
|
||||||
|
"uid=\$(id -u podman) && cd /tmp && sudo -u podman XDG_RUNTIME_DIR=/run/user/\$uid $cmd"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Run a podman-compose subcommand on a remote host.
|
||||||
|
# Usage: compose_cmd HOST "subcommand [args]"
|
||||||
|
compose_cmd() {
|
||||||
|
local host="$1"; shift
|
||||||
|
local cmd="$*"
|
||||||
|
ansible_cmd "$host" -m raw -a \
|
||||||
|
"uid=\$(id -u podman) && sudo -u podman bash -c 'export XDG_RUNTIME_DIR=/run/user/'\$uid' && cd /home/podman/ppf && podman-compose $cmd'"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Validation
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
validate_syntax() {
|
||||||
|
local errors=0
|
||||||
|
local f
|
||||||
|
section "Validating Python syntax"
|
||||||
|
for f in "$PPF_DIR"/*.py; do
|
||||||
|
[ -f "$f" ] || continue
|
||||||
|
if python3 -m py_compile "$f" 2>/dev/null; then
|
||||||
|
log_dim "$(basename "$f")"
|
||||||
|
else
|
||||||
|
log_err "$(basename "$f")"
|
||||||
|
errors=$((errors + 1))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [ "$errors" -gt 0 ]; then
|
||||||
|
die "$errors file(s) failed syntax check"
|
||||||
|
fi
|
||||||
|
log_ok "All files valid"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Version
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
PPF_TOOLS_VERSION="1.0.0"
|
||||||
58
tools/playbooks/deploy.yml
Normal file
58
tools/playbooks/deploy.yml
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
---
|
||||||
|
- name: Deploy PPF code
|
||||||
|
hosts: ppf
|
||||||
|
gather_facts: false
|
||||||
|
become: true
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Sync Python code and support files
|
||||||
|
ansible.posix.synchronize:
|
||||||
|
src: "{{ ppf_src }}/"
|
||||||
|
dest: "{{ ppf_code_dest }}"
|
||||||
|
rsync_opts:
|
||||||
|
- "--include=*.py"
|
||||||
|
- "--include=servers.txt"
|
||||||
|
- "--include=Dockerfile"
|
||||||
|
- "--exclude=*"
|
||||||
|
register: sync_result
|
||||||
|
notify: restart containers
|
||||||
|
|
||||||
|
- name: Deploy compose file
|
||||||
|
ansible.builtin.copy:
|
||||||
|
src: "{{ ppf_src }}/{{ ppf_compose_src }}"
|
||||||
|
dest: "{{ ppf_base }}/compose.yml"
|
||||||
|
owner: "{{ ppf_owner }}"
|
||||||
|
group: "{{ ppf_owner }}"
|
||||||
|
register: compose_result
|
||||||
|
notify: restart containers
|
||||||
|
|
||||||
|
- name: Fix file ownership
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ ppf_base }}"
|
||||||
|
owner: "{{ ppf_owner }}"
|
||||||
|
group: "{{ ppf_owner }}"
|
||||||
|
recurse: true
|
||||||
|
|
||||||
|
- name: Flush handlers before status check
|
||||||
|
ansible.builtin.meta: flush_handlers
|
||||||
|
|
||||||
|
- name: Wait for containers to settle
|
||||||
|
ansible.builtin.pause:
|
||||||
|
seconds: 2
|
||||||
|
when: >-
|
||||||
|
ppf_restart | bool and
|
||||||
|
(sync_result is changed or compose_result is changed)
|
||||||
|
|
||||||
|
- name: Check container status
|
||||||
|
ansible.builtin.raw: "uid=$(id -u {{ ppf_owner }}) && sudo -u {{ ppf_owner }} bash -c 'export XDG_RUNTIME_DIR=/run/user/'$uid' && cd {{ ppf_base }} && podman-compose ps'"
|
||||||
|
register: status_result
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Show container status
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "{{ status_result.stdout_lines | default([]) }}"
|
||||||
|
|
||||||
|
handlers:
|
||||||
|
- name: restart containers
|
||||||
|
ansible.builtin.raw: "uid=$(id -u {{ ppf_owner }}) && sudo -u {{ ppf_owner }} bash -c 'export XDG_RUNTIME_DIR=/run/user/'$uid' && cd {{ ppf_base }} && podman-compose down && podman-compose up -d'"
|
||||||
|
when: ppf_restart | bool
|
||||||
3
tools/playbooks/group_vars/all.yml
Normal file
3
tools/playbooks/group_vars/all.yml
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
ppf_base: /home/podman/ppf
|
||||||
|
ppf_owner: podman
|
||||||
|
ppf_restart: true
|
||||||
2
tools/playbooks/group_vars/master.yml
Normal file
2
tools/playbooks/group_vars/master.yml
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
ppf_code_dest: /home/podman/ppf/
|
||||||
|
ppf_compose_src: compose.master.yml
|
||||||
2
tools/playbooks/group_vars/workers.yml
Normal file
2
tools/playbooks/group_vars/workers.yml
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
ppf_code_dest: /home/podman/ppf/src/
|
||||||
|
ppf_compose_src: compose.worker.yml
|
||||||
16
tools/playbooks/inventory.ini
Normal file
16
tools/playbooks/inventory.ini
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
[master]
|
||||||
|
odin ansible_host=10.200.1.250
|
||||||
|
|
||||||
|
[workers]
|
||||||
|
cassius ansible_host=10.200.1.13
|
||||||
|
edge ansible_host=10.200.1.254
|
||||||
|
sentinel ansible_host=10.200.1.1
|
||||||
|
|
||||||
|
[ppf:children]
|
||||||
|
master
|
||||||
|
workers
|
||||||
|
|
||||||
|
[ppf:vars]
|
||||||
|
ansible_user=ansible
|
||||||
|
ansible_ssh_private_key_file=/opt/ansible/secrets/ssh/ansible
|
||||||
|
ansible_remote_tmp=~/.ansible/tmp
|
||||||
154
tools/ppf-db
Executable file
154
tools/ppf-db
Executable file
@@ -0,0 +1,154 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# ppf-db -- manage PPF databases
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ppf-db <command> [options]
|
||||||
|
#
|
||||||
|
# Commands: stats, purge-proxies, vacuum
|
||||||
|
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
# Resolve to real path (handles symlinks from ~/.local/bin/)
|
||||||
|
SCRIPT_PATH="$(cd "$(dirname "$0")" && pwd)/$(basename "$0")"
|
||||||
|
SCRIPT_DIR="$(dirname "$(readlink -f "$SCRIPT_PATH")")"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
. "$SCRIPT_DIR/lib/ppf-common.sh"
|
||||||
|
|
||||||
|
PROXY_DB="/home/podman/ppf/data/proxies.sqlite"
|
||||||
|
URL_DB="/home/podman/ppf/data/websites.sqlite"
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Usage
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
usage() {
|
||||||
|
cat <<EOF
|
||||||
|
Usage: ppf-db <command> [options]
|
||||||
|
|
||||||
|
Manage PPF databases on odin (master).
|
||||||
|
|
||||||
|
Commands:
|
||||||
|
stats show proxy and URL counts
|
||||||
|
purge-proxies delete all proxies (keeps URLs)
|
||||||
|
vacuum reclaim disk space after purge
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--help show this help
|
||||||
|
--version show version
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
ppf-db stats
|
||||||
|
ppf-db purge-proxies
|
||||||
|
ppf-db vacuum
|
||||||
|
EOF
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
run_sql() {
|
||||||
|
local db="$1" sql="$2"
|
||||||
|
ansible_cmd "$MASTER" -m raw -a \
|
||||||
|
"sudo -u podman sqlite3 '$db' \"$sql\"" 2>/dev/null \
|
||||||
|
| sed 's/Shared connection.*//; /^\s*$/d; /^odin/d; /CHANGED/d; /SUCCESS/d'
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Commands
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
cmd_stats() {
|
||||||
|
section "Database stats (odin)"
|
||||||
|
|
||||||
|
local proxies total_urls active_urls working
|
||||||
|
proxies=$(run_sql "$PROXY_DB" "SELECT COUNT(*) FROM proxylist;")
|
||||||
|
working=$(run_sql "$PROXY_DB" "SELECT COUNT(*) FROM proxylist WHERE failed=0 AND proto IS NOT NULL;")
|
||||||
|
total_urls=$(run_sql "$URL_DB" "SELECT COUNT(*) FROM uris;")
|
||||||
|
active_urls=$(run_sql "$URL_DB" "SELECT COUNT(*) FROM uris WHERE error=0;")
|
||||||
|
|
||||||
|
log_info "Proxies: ${proxies} total, ${working} working"
|
||||||
|
log_info "URLs: ${total_urls} total, ${active_urls} active"
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_purge_proxies() {
|
||||||
|
section "Purging proxies from odin"
|
||||||
|
|
||||||
|
# Get counts before
|
||||||
|
local before
|
||||||
|
before=$(run_sql "$PROXY_DB" "SELECT COUNT(*) FROM proxylist;")
|
||||||
|
log_info "Proxies before: $before"
|
||||||
|
|
||||||
|
# Stop container
|
||||||
|
log_info "Stopping container..."
|
||||||
|
compose_cmd "$MASTER" "down" > /dev/null 2>&1 \
|
||||||
|
&& log_ok "Container stopped" \
|
||||||
|
|| die "Failed to stop container"
|
||||||
|
|
||||||
|
# Delete proxies
|
||||||
|
log_info "Deleting proxylist rows..."
|
||||||
|
run_sql "$PROXY_DB" "DELETE FROM proxylist;" > /dev/null 2>&1
|
||||||
|
log_ok "Proxylist purged"
|
||||||
|
|
||||||
|
# Vacuum to reclaim space
|
||||||
|
log_info "Vacuuming database..."
|
||||||
|
run_sql "$PROXY_DB" "VACUUM;" > /dev/null 2>&1
|
||||||
|
log_ok "Database vacuumed"
|
||||||
|
|
||||||
|
# Verify URLs intact
|
||||||
|
local urls_after
|
||||||
|
urls_after=$(run_sql "$URL_DB" "SELECT COUNT(*) FROM uris;")
|
||||||
|
log_ok "URLs preserved: $urls_after"
|
||||||
|
|
||||||
|
# Start container
|
||||||
|
log_info "Starting container..."
|
||||||
|
compose_cmd "$MASTER" "up -d" > /dev/null 2>&1 \
|
||||||
|
&& log_ok "Container started" \
|
||||||
|
|| die "Failed to start container"
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_vacuum() {
|
||||||
|
section "Vacuuming database (odin)"
|
||||||
|
|
||||||
|
local before after
|
||||||
|
before=$(ansible_cmd "$MASTER" -m raw -a \
|
||||||
|
"sudo -u podman ls -lh '$PROXY_DB'" 2>/dev/null \
|
||||||
|
| grep -oE '[0-9]+[KMG]?' | head -1)
|
||||||
|
|
||||||
|
run_sql "$PROXY_DB" "VACUUM;" > /dev/null 2>&1
|
||||||
|
|
||||||
|
after=$(ansible_cmd "$MASTER" -m raw -a \
|
||||||
|
"sudo -u podman ls -lh '$PROXY_DB'" 2>/dev/null \
|
||||||
|
| grep -oE '[0-9]+[KMG]?' | head -1)
|
||||||
|
|
||||||
|
log_ok "Vacuumed: ${before:-?} -> ${after:-?}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Parse args
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
[ $# -eq 0 ] && usage
|
||||||
|
|
||||||
|
COMMAND=""
|
||||||
|
|
||||||
|
while [ $# -gt 0 ]; do
|
||||||
|
case "$1" in
|
||||||
|
--help|-h) usage ;;
|
||||||
|
--version|-V) echo "ppf-db $PPF_TOOLS_VERSION"; exit 0 ;;
|
||||||
|
stats|purge-proxies|vacuum)
|
||||||
|
[ -n "$COMMAND" ] && die "Multiple commands given"
|
||||||
|
COMMAND="$1"
|
||||||
|
;;
|
||||||
|
-*) die "Unknown option: $1" ;;
|
||||||
|
*) die "Unknown command: $1" ;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
|
[ -z "$COMMAND" ] && die "No command given. Use: stats, purge-proxies, vacuum"
|
||||||
|
|
||||||
|
case "$COMMAND" in
|
||||||
|
stats) cmd_stats ;;
|
||||||
|
purge-proxies) cmd_purge_proxies ;;
|
||||||
|
vacuum) cmd_vacuum ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
printf "\n"
|
||||||
115
tools/ppf-deploy
Executable file
115
tools/ppf-deploy
Executable file
@@ -0,0 +1,115 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# ppf-deploy -- deploy PPF code to nodes
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ppf-deploy [options] [targets...]
|
||||||
|
#
|
||||||
|
# Targets:
|
||||||
|
# all odin + all workers (default)
|
||||||
|
# workers cassius, edge, sentinel
|
||||||
|
# master odin
|
||||||
|
# <hostname> specific host(s)
|
||||||
|
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
# Resolve to real path (handles symlinks from ~/.local/bin/)
|
||||||
|
SCRIPT_PATH="$(cd "$(dirname "$0")" && pwd)/$(basename "$0")"
|
||||||
|
SCRIPT_DIR="$(dirname "$(readlink -f "$SCRIPT_PATH")")"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
. "$SCRIPT_DIR/lib/ppf-common.sh"
|
||||||
|
|
||||||
|
PLAYBOOK_DIR="$SCRIPT_DIR/playbooks"
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Usage
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
usage() {
|
||||||
|
cat <<EOF
|
||||||
|
Usage: ppf-deploy [options] [targets...]
|
||||||
|
|
||||||
|
Deploy PPF code to nodes via Ansible playbook.
|
||||||
|
|
||||||
|
Targets:
|
||||||
|
all odin + all workers (default)
|
||||||
|
workers cassius, edge, sentinel
|
||||||
|
master odin
|
||||||
|
<hostname> specific host(s)
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--no-restart sync files only, skip container restart
|
||||||
|
--check dry run (ansible --check --diff)
|
||||||
|
-v verbose ansible output
|
||||||
|
--help show this help
|
||||||
|
--version show version
|
||||||
|
|
||||||
|
Steps performed:
|
||||||
|
1. Validate Python syntax locally
|
||||||
|
2. Rsync *.py + servers.txt (role-aware destinations)
|
||||||
|
3. Copy compose file per role
|
||||||
|
4. Fix ownership (podman:podman)
|
||||||
|
5. Restart containers on change (unless --no-restart)
|
||||||
|
6. Show container status
|
||||||
|
EOF
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Parse args
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
DO_RESTART=1
|
||||||
|
CHECK_MODE=0
|
||||||
|
VERBOSE=""
|
||||||
|
TARGETS=""
|
||||||
|
|
||||||
|
while [ $# -gt 0 ]; do
|
||||||
|
case "$1" in
|
||||||
|
--help|-h) usage ;;
|
||||||
|
--version|-V) echo "ppf-deploy $PPF_TOOLS_VERSION"; exit 0 ;;
|
||||||
|
--no-restart) DO_RESTART=0 ;;
|
||||||
|
--check) CHECK_MODE=1 ;;
|
||||||
|
-v) VERBOSE="-v" ;;
|
||||||
|
-*) die "Unknown option: $1" ;;
|
||||||
|
*) TARGETS="${TARGETS:+$TARGETS }$1" ;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
|
TARGETS="${TARGETS:-all}"
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Pre-flight: local syntax validation
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
validate_syntax
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Build ansible-playbook arguments
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
ARGS=(-i "$PLAYBOOK_DIR/inventory.ini")
|
||||||
|
ARGS+=(-e "ppf_src=$PPF_DIR")
|
||||||
|
|
||||||
|
if [ "$DO_RESTART" -eq 0 ]; then
|
||||||
|
ARGS+=(-e "ppf_restart=false")
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$CHECK_MODE" -eq 1 ]; then
|
||||||
|
ARGS+=(--check --diff)
|
||||||
|
fi
|
||||||
|
|
||||||
|
[ -n "$VERBOSE" ] && ARGS+=("$VERBOSE")
|
||||||
|
|
||||||
|
# Target resolution: map aliases to ansible --limit
|
||||||
|
case "$TARGETS" in
|
||||||
|
all) ;; # no --limit = all hosts in inventory
|
||||||
|
*)
|
||||||
|
LIMIT=$(resolve_targets $TARGETS | tr ' ' ',')
|
||||||
|
ARGS+=(--limit "$LIMIT")
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
ARGS+=("$PLAYBOOK_DIR/deploy.yml")
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Run playbook
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
section "Deploying to ${TARGETS}"
|
||||||
|
ansible_playbook_cmd "${ARGS[@]}"
|
||||||
80
tools/ppf-logs
Executable file
80
tools/ppf-logs
Executable file
@@ -0,0 +1,80 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# ppf-logs -- view PPF container logs
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ppf-logs [options] [node]
|
||||||
|
#
|
||||||
|
# Defaults to odin if no node specified.
|
||||||
|
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
# Resolve to real path (handles symlinks from ~/.local/bin/)
|
||||||
|
SCRIPT_PATH="$(cd "$(dirname "$0")" && pwd)/$(basename "$0")"
|
||||||
|
SCRIPT_DIR="$(dirname "$(readlink -f "$SCRIPT_PATH")")"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
. "$SCRIPT_DIR/lib/ppf-common.sh"
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Usage
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
usage() {
|
||||||
|
cat <<EOF
|
||||||
|
Usage: ppf-logs [options] [node]
|
||||||
|
|
||||||
|
View PPF container logs.
|
||||||
|
|
||||||
|
Nodes:
|
||||||
|
odin, cassius, edge, sentinel (default: odin)
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-f follow log output
|
||||||
|
-n LINES number of lines to show (default: 40)
|
||||||
|
--help show this help
|
||||||
|
--version show version
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
ppf-logs last 40 lines from odin
|
||||||
|
ppf-logs cassius last 40 lines from cassius
|
||||||
|
ppf-logs -f edge follow edge worker logs
|
||||||
|
ppf-logs -n 100 sentinel last 100 lines from sentinel
|
||||||
|
EOF
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Parse args
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
FOLLOW=0
|
||||||
|
LINES=40
|
||||||
|
NODE=""
|
||||||
|
|
||||||
|
while [ $# -gt 0 ]; do
|
||||||
|
case "$1" in
|
||||||
|
--help|-h) usage ;;
|
||||||
|
--version|-V) echo "ppf-logs $PPF_TOOLS_VERSION"; exit 0 ;;
|
||||||
|
-f) FOLLOW=1 ;;
|
||||||
|
-n) shift; LINES="${1:?'-n' requires a number}" ;;
|
||||||
|
-*) die "Unknown option: $1" ;;
|
||||||
|
*) NODE="$1" ;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
|
NODE="${NODE:-$MASTER}"
|
||||||
|
|
||||||
|
# Validate node
|
||||||
|
is_master "$NODE" || is_worker "$NODE" || die "Unknown node: $NODE"
|
||||||
|
|
||||||
|
CNAME=$(container_name "$NODE")
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Build podman logs command
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
CMD="podman logs --tail $LINES"
|
||||||
|
[ "$FOLLOW" -eq 1 ] && CMD="$CMD -f"
|
||||||
|
CMD="$CMD $CNAME"
|
||||||
|
|
||||||
|
section "$NODE ($CNAME)"
|
||||||
|
|
||||||
|
# Run with raw output -- logs go straight to terminal
|
||||||
|
podman_cmd "$NODE" "$CMD"
|
||||||
186
tools/ppf-service
Executable file
186
tools/ppf-service
Executable file
@@ -0,0 +1,186 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# ppf-service -- manage PPF containers
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ppf-service <command> [nodes...]
|
||||||
|
#
|
||||||
|
# Commands: status, start, stop, restart
|
||||||
|
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
# Resolve to real path (handles symlinks from ~/.local/bin/)
|
||||||
|
SCRIPT_PATH="$(cd "$(dirname "$0")" && pwd)/$(basename "$0")"
|
||||||
|
SCRIPT_DIR="$(dirname "$(readlink -f "$SCRIPT_PATH")")"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
. "$SCRIPT_DIR/lib/ppf-common.sh"
|
||||||
|
|
||||||
|
ODIN_URL="http://10.200.1.250:8081"
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Usage
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
usage() {
|
||||||
|
cat <<EOF
|
||||||
|
Usage: ppf-service <command> [nodes...]
|
||||||
|
|
||||||
|
Manage PPF containers on remote nodes.
|
||||||
|
|
||||||
|
Commands:
|
||||||
|
status show container state + health (default nodes: all)
|
||||||
|
start start containers (compose up -d)
|
||||||
|
stop stop containers (compose stop)
|
||||||
|
restart restart containers (compose restart)
|
||||||
|
|
||||||
|
Nodes:
|
||||||
|
all odin + all workers (default)
|
||||||
|
workers cassius, edge, sentinel
|
||||||
|
master odin
|
||||||
|
<hostname> specific host(s)
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--help show this help
|
||||||
|
--version show version
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
ppf-service status
|
||||||
|
ppf-service restart workers
|
||||||
|
ppf-service stop cassius edge
|
||||||
|
ppf-service start odin
|
||||||
|
EOF
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Status helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
show_health() {
|
||||||
|
local result
|
||||||
|
result=$(ansible_cmd "$MASTER" -m raw -a \
|
||||||
|
"curl -sf --max-time 5 ${ODIN_URL}/health 2>/dev/null || echo UNREACHABLE" \
|
||||||
|
2>/dev/null) || true
|
||||||
|
if echo "$result" | grep -qi "ok\|healthy"; then
|
||||||
|
log_ok "master health: ok"
|
||||||
|
elif echo "$result" | grep -qi "UNREACHABLE"; then
|
||||||
|
log_err "master health: unreachable"
|
||||||
|
else
|
||||||
|
log_warn "master health: $result"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
show_workers_api() {
|
||||||
|
local result
|
||||||
|
result=$(ansible_cmd "$MASTER" -m raw -a \
|
||||||
|
"curl -sf --max-time 5 ${ODIN_URL}/api/workers 2>/dev/null || echo '{}'" \
|
||||||
|
2>/dev/null) || true
|
||||||
|
# Just show the raw output, trimmed
|
||||||
|
local data
|
||||||
|
data=$(echo "$result" | grep -v '^\s*$' | grep -v '^[A-Z]' | head -20)
|
||||||
|
if [ -n "$data" ]; then
|
||||||
|
log_info "Worker API response:"
|
||||||
|
echo "$data" | while IFS= read -r line; do
|
||||||
|
log_dim "$line"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Commands
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
cmd_status() {
|
||||||
|
local hosts="$1"
|
||||||
|
section "Container status"
|
||||||
|
|
||||||
|
for host in $hosts; do
|
||||||
|
local output
|
||||||
|
output=$(compose_cmd "$host" "ps" 2>/dev/null) || true
|
||||||
|
if echo "$output" | grep -qi "up\|running"; then
|
||||||
|
log_ok "$host"
|
||||||
|
elif echo "$output" | grep -qi "exit"; then
|
||||||
|
log_err "$host (exited)"
|
||||||
|
else
|
||||||
|
log_warn "$host (unknown)"
|
||||||
|
fi
|
||||||
|
echo "$output" | grep -v '^\s*$' | while IFS= read -r line; do
|
||||||
|
log_dim "$line"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
# Show health/worker info if master is in target list
|
||||||
|
local h
|
||||||
|
for h in $hosts; do
|
||||||
|
if is_master "$h"; then
|
||||||
|
section "Master health"
|
||||||
|
show_health
|
||||||
|
show_workers_api
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_start() {
|
||||||
|
local hosts="$1"
|
||||||
|
section "Starting containers"
|
||||||
|
for host in $hosts; do
|
||||||
|
compose_cmd "$host" "up -d" > /dev/null 2>&1 \
|
||||||
|
&& log_ok "$host started" \
|
||||||
|
|| log_err "$host start failed"
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_stop() {
|
||||||
|
local hosts="$1"
|
||||||
|
section "Stopping containers"
|
||||||
|
for host in $hosts; do
|
||||||
|
compose_cmd "$host" "stop" > /dev/null 2>&1 \
|
||||||
|
&& log_ok "$host stopped" \
|
||||||
|
|| log_err "$host stop failed"
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_restart() {
|
||||||
|
local hosts="$1"
|
||||||
|
section "Restarting containers"
|
||||||
|
for host in $hosts; do
|
||||||
|
compose_cmd "$host" "down" > /dev/null 2>&1 \
|
||||||
|
&& compose_cmd "$host" "up -d" > /dev/null 2>&1 \
|
||||||
|
&& log_ok "$host restarted" \
|
||||||
|
|| log_err "$host restart failed"
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Parse args
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
[ $# -eq 0 ] && usage
|
||||||
|
|
||||||
|
COMMAND=""
|
||||||
|
TARGETS=""
|
||||||
|
|
||||||
|
while [ $# -gt 0 ]; do
|
||||||
|
case "$1" in
|
||||||
|
--help|-h) usage ;;
|
||||||
|
--version|-V) echo "ppf-service $PPF_TOOLS_VERSION"; exit 0 ;;
|
||||||
|
status|start|stop|restart)
|
||||||
|
[ -n "$COMMAND" ] && die "Multiple commands given"
|
||||||
|
COMMAND="$1"
|
||||||
|
;;
|
||||||
|
-*) die "Unknown option: $1" ;;
|
||||||
|
*) TARGETS="${TARGETS:+$TARGETS }$1" ;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
|
[ -z "$COMMAND" ] && die "No command given. Use: status, start, stop, restart"
|
||||||
|
|
||||||
|
TARGETS="${TARGETS:-all}"
|
||||||
|
HOSTS=$(resolve_targets $TARGETS)
|
||||||
|
[ -z "$HOSTS" ] && die "No valid targets"
|
||||||
|
|
||||||
|
case "$COMMAND" in
|
||||||
|
status) cmd_status "$HOSTS" ;;
|
||||||
|
start) cmd_start "$HOSTS" ;;
|
||||||
|
stop) cmd_stop "$HOSTS" ;;
|
||||||
|
restart) cmd_restart "$HOSTS" ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
printf "\n"
|
||||||
246
tools/ppf-status
Executable file
246
tools/ppf-status
Executable file
@@ -0,0 +1,246 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# ppf-status -- PPF cluster overview
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ppf-status [options]
|
||||||
|
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
# Resolve to real path (handles symlinks from ~/.local/bin/)
|
||||||
|
SCRIPT_PATH="$(cd "$(dirname "$0")" && pwd)/$(basename "$0")"
|
||||||
|
SCRIPT_DIR="$(dirname "$(readlink -f "$SCRIPT_PATH")")"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
. "$SCRIPT_DIR/lib/ppf-common.sh"
|
||||||
|
|
||||||
|
ODIN_URL="http://127.0.0.1:8081"
|
||||||
|
PROXY_DB="/home/podman/ppf/data/proxies.sqlite"
|
||||||
|
URL_DB="/home/podman/ppf/data/websites.sqlite"
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Usage
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
usage() {
|
||||||
|
cat <<EOF
|
||||||
|
Usage: ppf-status [options]
|
||||||
|
|
||||||
|
Show PPF cluster overview.
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--json raw JSON from API
|
||||||
|
--help show this help
|
||||||
|
--version show version
|
||||||
|
|
||||||
|
Displays:
|
||||||
|
- Container health per node
|
||||||
|
- Worker stats (tested, working, rate, active)
|
||||||
|
- Odin manager stats (verification, queue)
|
||||||
|
- Database counts (proxies, URLs)
|
||||||
|
EOF
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Parse args
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
RAW_JSON=0
|
||||||
|
|
||||||
|
while [ $# -gt 0 ]; do
|
||||||
|
case "$1" in
|
||||||
|
--help|-h) usage ;;
|
||||||
|
--version|-V) echo "ppf-status $PPF_TOOLS_VERSION"; exit 0 ;;
|
||||||
|
--json) RAW_JSON=1 ;;
|
||||||
|
-*) die "Unknown option: $1" ;;
|
||||||
|
*) die "Unknown argument: $1" ;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Fetch API data from odin (run on odin via curl to localhost)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
api_json=$(ansible_cmd "$MASTER" -m raw -a \
|
||||||
|
"curl -sf --max-time 5 ${ODIN_URL}/api/workers 2>/dev/null || echo '{}'" \
|
||||||
|
2>/dev/null | sed 's/Shared connection.*closed\.\?//; /^\s*$/d; /^odin/d; /CHANGED/d; /SUCCESS/d')
|
||||||
|
|
||||||
|
if [ "$RAW_JSON" -eq 1 ]; then
|
||||||
|
echo "$api_json"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if we got valid data
|
||||||
|
if ! echo "$api_json" | python3 -c "import sys,json; json.load(sys.stdin)" 2>/dev/null; then
|
||||||
|
die "Failed to fetch API data from odin"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Container health
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
section "Containers"
|
||||||
|
|
||||||
|
for host in $ALL_HOSTS; do
|
||||||
|
output=$(compose_cmd "$host" "ps" 2>/dev/null) || true
|
||||||
|
if echo "$output" | grep -qi "up\|running"; then
|
||||||
|
log_ok "$host"
|
||||||
|
elif echo "$output" | grep -qi "exit"; then
|
||||||
|
log_err "$host (exited)"
|
||||||
|
else
|
||||||
|
log_warn "$host (unknown)"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Database summary (quick counts from odin)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
section "Database"
|
||||||
|
|
||||||
|
proxy_count=$(ansible_cmd "$MASTER" -m raw -a \
|
||||||
|
"sudo -u podman sqlite3 '$PROXY_DB' 'SELECT COUNT(*) FROM proxylist;'" 2>/dev/null \
|
||||||
|
| sed 's/Shared connection.*//; /^\s*$/d; /^odin/d; /CHANGED/d; /SUCCESS/d' || echo '?')
|
||||||
|
working_count=$(ansible_cmd "$MASTER" -m raw -a \
|
||||||
|
"sudo -u podman sqlite3 '$PROXY_DB' 'SELECT COUNT(*) FROM proxylist WHERE failed=0 AND proto IS NOT NULL;'" 2>/dev/null \
|
||||||
|
| sed 's/Shared connection.*//; /^\s*$/d; /^odin/d; /CHANGED/d; /SUCCESS/d' || echo '?')
|
||||||
|
url_count=$(ansible_cmd "$MASTER" -m raw -a \
|
||||||
|
"sudo -u podman sqlite3 '$URL_DB' 'SELECT COUNT(*) FROM uris;'" 2>/dev/null \
|
||||||
|
| sed 's/Shared connection.*//; /^\s*$/d; /^odin/d; /CHANGED/d; /SUCCESS/d' || echo '?')
|
||||||
|
|
||||||
|
log_info "Proxies: ${proxy_count} total, ${working_count} working"
|
||||||
|
log_info "URLs: ${url_count}"
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Parse and display via Python for clean formatting
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
echo "$api_json" | python3 -c "
|
||||||
|
import sys, json
|
||||||
|
|
||||||
|
NO_COLOR = __import__('os').environ.get('NO_COLOR', '')
|
||||||
|
|
||||||
|
# Colors
|
||||||
|
if not NO_COLOR and sys.stdout.isatty():
|
||||||
|
RST = '\033[0m'
|
||||||
|
DIM = '\033[2m'
|
||||||
|
BOLD = '\033[1m'
|
||||||
|
RED = '\033[38;5;167m'
|
||||||
|
GREEN = '\033[38;5;114m'
|
||||||
|
YELLOW = '\033[38;5;180m'
|
||||||
|
BLUE = '\033[38;5;110m'
|
||||||
|
CYAN = '\033[38;5;116m'
|
||||||
|
else:
|
||||||
|
RST = DIM = BOLD = RED = GREEN = YELLOW = BLUE = CYAN = ''
|
||||||
|
|
||||||
|
def ok(s): return GREEN + s + RST
|
||||||
|
def err(s): return RED + s + RST
|
||||||
|
def warn(s): return YELLOW + s + RST
|
||||||
|
def dim(s): return DIM + s + RST
|
||||||
|
def bold(s): return BOLD + CYAN + s + RST
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.load(sys.stdin)
|
||||||
|
except:
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
workers = data.get('workers', [])
|
||||||
|
summary = data.get('summary', {})
|
||||||
|
queue = data.get('queue', {})
|
||||||
|
manager = data.get('manager', {})
|
||||||
|
|
||||||
|
# Workers table
|
||||||
|
print()
|
||||||
|
print(bold(' Workers'))
|
||||||
|
if workers:
|
||||||
|
# Header
|
||||||
|
print(dim(' %-12s %7s %9s %9s %7s %6s %s' % (
|
||||||
|
'NAME', 'TESTED', 'WORKING', 'FAILED', 'RATE', 'ACT', 'STATUS')))
|
||||||
|
for w in sorted(workers, key=lambda x: x.get('name', '')):
|
||||||
|
name = w.get('name', w.get('ip', '?'))
|
||||||
|
tested = w.get('proxies_tested', 0)
|
||||||
|
working = w.get('proxies_working', 0)
|
||||||
|
failed = w.get('proxies_failed', 0)
|
||||||
|
rate = w.get('success_rate', 0)
|
||||||
|
active = w.get('active', False)
|
||||||
|
threads = w.get('threads', 0)
|
||||||
|
|
||||||
|
# Format numbers compactly
|
||||||
|
def fmt(n):
|
||||||
|
if n >= 1000000: return '%.1fM' % (n / 1000000)
|
||||||
|
if n >= 1000: return '%.1fk' % (n / 1000)
|
||||||
|
return str(n)
|
||||||
|
|
||||||
|
act_str = ok('yes') if active else err('no')
|
||||||
|
if rate >= 30:
|
||||||
|
rate_str = ok('%.1f%%' % rate)
|
||||||
|
elif rate >= 10:
|
||||||
|
rate_str = warn('%.1f%%' % rate)
|
||||||
|
else:
|
||||||
|
rate_str = err('%.1f%%' % rate)
|
||||||
|
|
||||||
|
age = w.get('age', 0)
|
||||||
|
if age > 300 and not active:
|
||||||
|
status = err('stale (%dm)' % (age // 60))
|
||||||
|
elif active:
|
||||||
|
status = ok('testing')
|
||||||
|
else:
|
||||||
|
status = dim('idle')
|
||||||
|
|
||||||
|
print(' %-12s %7s %9s %9s %7s %6s %s' % (
|
||||||
|
name, fmt(tested), fmt(working), fmt(failed),
|
||||||
|
rate_str, act_str, status))
|
||||||
|
|
||||||
|
# Summary line
|
||||||
|
total_t = summary.get('total_tested', 0)
|
||||||
|
total_w = summary.get('total_working', 0)
|
||||||
|
total_f = summary.get('total_failed', 0)
|
||||||
|
overall = summary.get('overall_success_rate', 0)
|
||||||
|
active_count = data.get('active', 0)
|
||||||
|
total_count = data.get('total', 0)
|
||||||
|
print(dim(' %-12s %7s %9s %9s %7s %6s' % (
|
||||||
|
'TOTAL',
|
||||||
|
fmt(total_t) if total_t else '-',
|
||||||
|
fmt(total_w) if total_w else '-',
|
||||||
|
fmt(total_f) if total_f else '-',
|
||||||
|
'%.1f%%' % overall,
|
||||||
|
'%d/%d' % (active_count, total_count))))
|
||||||
|
else:
|
||||||
|
print(err(' no workers connected'))
|
||||||
|
|
||||||
|
# Manager (odin verification)
|
||||||
|
if manager:
|
||||||
|
print()
|
||||||
|
print(bold(' Odin Verification'))
|
||||||
|
m_rate = manager.get('success_rate', 0)
|
||||||
|
m_tested = manager.get('tested', 0)
|
||||||
|
m_passed = manager.get('passed', 0)
|
||||||
|
m_threads = manager.get('threads', 0)
|
||||||
|
m_speed = manager.get('rate', 0)
|
||||||
|
m_queue = manager.get('queue_size', 0)
|
||||||
|
m_uptime = manager.get('uptime', 0)
|
||||||
|
|
||||||
|
def fmt_time(s):
|
||||||
|
if s >= 3600: return '%dh%dm' % (s // 3600, (s % 3600) // 60)
|
||||||
|
if s >= 60: return '%dm%ds' % (s // 60, s % 60)
|
||||||
|
return '%ds' % s
|
||||||
|
|
||||||
|
if m_rate >= 30:
|
||||||
|
rate_str = ok('%.1f%%' % m_rate)
|
||||||
|
elif m_rate >= 10:
|
||||||
|
rate_str = warn('%.1f%%' % m_rate)
|
||||||
|
else:
|
||||||
|
rate_str = err('%.1f%%' % m_rate)
|
||||||
|
|
||||||
|
print(' threads: %d rate: %.2f/s uptime: %s' % (m_threads, m_speed, fmt_time(m_uptime)))
|
||||||
|
print(' tested: %s passed: %s success: %s' % (fmt(m_tested), fmt(m_passed), rate_str))
|
||||||
|
print(' queue: %d jobs' % m_queue)
|
||||||
|
|
||||||
|
# Queue
|
||||||
|
if queue:
|
||||||
|
print()
|
||||||
|
print(bold(' Proxy Queue'))
|
||||||
|
print(' total: %d due: %d pending: %d claimed: %d' % (
|
||||||
|
queue.get('total', 0), queue.get('due', 0),
|
||||||
|
queue.get('pending', 0), queue.get('claimed', 0)))
|
||||||
|
sess_tested = queue.get('session_tested', 0)
|
||||||
|
sess_pct = queue.get('session_pct', 0)
|
||||||
|
if sess_tested:
|
||||||
|
print(' session: %s tested (%.1f%%)' % (fmt(sess_tested), sess_pct))
|
||||||
|
|
||||||
|
print()
|
||||||
|
"
|
||||||
Reference in New Issue
Block a user