1444 lines
56 KiB
Python
1444 lines
56 KiB
Python
#!/usr/bin/env python2
|
|
# -*- coding: utf-8 -*-
|
|
from __future__ import division
|
|
"""HTTP API server with advanced web dashboard for PPF."""
|
|
|
|
try:
|
|
from ppf import __version__
|
|
except ImportError:
|
|
__version__ = 'unknown'
|
|
|
|
import BaseHTTPServer
|
|
import json
|
|
import threading
|
|
import time
|
|
import os
|
|
import gc
|
|
import sys
|
|
from collections import defaultdict
|
|
import mysqlite
|
|
from misc import _log
|
|
|
|
# Rate limiting configuration
|
|
_rate_limits = defaultdict(list)
|
|
_rate_lock = threading.Lock()
|
|
_rate_limit_requests = 300 # requests per window (increased for workers)
|
|
_rate_limit_window = 60 # window in seconds
|
|
|
|
# Static directories (relative to this file)
|
|
_STATIC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'static')
|
|
_STATIC_LIB_DIR = os.path.join(_STATIC_DIR, 'lib')
|
|
|
|
# Content type mapping for static files
|
|
_CONTENT_TYPES = {
|
|
'.js': 'application/javascript; charset=utf-8',
|
|
'.css': 'text/css; charset=utf-8',
|
|
'.json': 'application/json; charset=utf-8',
|
|
'.png': 'image/png',
|
|
'.svg': 'image/svg+xml',
|
|
'.woff': 'font/woff',
|
|
'.woff2': 'font/woff2',
|
|
}
|
|
|
|
# Cache for static library files (loaded once at startup)
|
|
_LIB_CACHE = {}
|
|
|
|
# Cache for dashboard static files (HTML, CSS, JS)
|
|
_STATIC_CACHE = {}
|
|
|
|
# Optional memory profiling (installed via requirements.txt)
|
|
try:
|
|
import objgraph
|
|
_has_objgraph = True
|
|
except ImportError:
|
|
_has_objgraph = False
|
|
|
|
try:
|
|
from pympler import muppy, summary
|
|
_has_pympler = True
|
|
except ImportError:
|
|
_has_pympler = False
|
|
|
|
# Memory tracking for leak detection
|
|
_memory_samples = []
|
|
_memory_sample_max = 60 # Keep last 60 samples (5 min at 5s intervals)
|
|
_peak_rss = 0
|
|
_start_rss = 0
|
|
|
|
# Cache for expensive operations
|
|
_gc_objects_cache = {'value': 0, 'time': 0}
|
|
_gc_objects_ttl = 30 # seconds
|
|
|
|
_db_health_cache = {'value': {}, 'time': 0}
|
|
_db_health_ttl = 10 # seconds
|
|
|
|
# Worker registry for distributed testing
|
|
import hashlib
|
|
import random
|
|
import string
|
|
|
|
_workers = {} # worker_id -> {name, ip, last_seen, jobs_completed, proxies_tested, ...}
|
|
_workers_lock = threading.Lock()
|
|
_work_claims = {} # proxy_key -> {worker_id, claimed_at}
|
|
_work_claims_lock = threading.Lock()
|
|
_worker_keys = set() # valid API keys
|
|
_master_key = None # master key for worker registration
|
|
_claim_timeout = 300 # seconds before unclaimed work is released
|
|
_workers_file = 'data/workers.json' # persistent storage
|
|
|
|
def load_workers():
|
|
"""Load worker registry from disk."""
|
|
global _workers, _worker_keys
|
|
try:
|
|
if os.path.exists(_workers_file):
|
|
with open(_workers_file, 'r') as f:
|
|
data = json.load(f)
|
|
_workers = data.get('workers', {})
|
|
_worker_keys = set(data.get('keys', []))
|
|
_log('loaded %d workers from %s' % (len(_workers), _workers_file), 'info')
|
|
except Exception as e:
|
|
_log('failed to load workers: %s' % e, 'warn')
|
|
|
|
def save_workers():
|
|
"""Save worker registry to disk."""
|
|
try:
|
|
data = {
|
|
'workers': _workers,
|
|
'keys': list(_worker_keys),
|
|
'saved': time.time(),
|
|
}
|
|
with open(_workers_file, 'w') as f:
|
|
json.dump(data, f, indent=2)
|
|
except Exception as e:
|
|
_log('failed to save workers: %s' % e, 'warn')
|
|
|
|
def generate_worker_key():
|
|
"""Generate a random worker API key."""
|
|
chars = string.ascii_letters + string.digits
|
|
return ''.join(random.choice(chars) for _ in range(32))
|
|
|
|
def generate_worker_id(name, ip):
|
|
"""Generate a unique worker ID from name and IP."""
|
|
data = '%s:%s:%s' % (name, ip, time.time())
|
|
return hashlib.sha256(data.encode()).hexdigest()[:16]
|
|
|
|
def register_worker(name, ip, key=None):
|
|
"""Register a new worker and return its credentials.
|
|
|
|
If a worker with same name exists, update its IP and reset key.
|
|
This allows workers to re-register after restarts.
|
|
"""
|
|
# Check for existing worker with same name
|
|
with _workers_lock:
|
|
for wid, info in _workers.items():
|
|
if info.get('name') == name:
|
|
# Re-registration: update IP, generate new key, preserve stats
|
|
new_key = key or generate_worker_key()
|
|
_worker_keys.discard(info.get('key')) # Remove old key
|
|
info['ip'] = ip
|
|
info['key'] = new_key
|
|
info['last_seen'] = time.time()
|
|
_worker_keys.add(new_key)
|
|
save_workers()
|
|
return wid, new_key
|
|
|
|
# New registration
|
|
worker_id = generate_worker_id(name, ip)
|
|
worker_key = key or generate_worker_key()
|
|
with _workers_lock:
|
|
_workers[worker_id] = {
|
|
'name': name,
|
|
'ip': ip,
|
|
'key': worker_key,
|
|
'registered': time.time(),
|
|
'last_seen': time.time(),
|
|
'jobs_completed': 0,
|
|
'proxies_tested': 0,
|
|
'proxies_working': 0,
|
|
'proxies_failed': 0,
|
|
'total_latency': 0,
|
|
'last_batch_size': 0,
|
|
'last_batch_working': 0,
|
|
}
|
|
_worker_keys.add(worker_key)
|
|
save_workers()
|
|
return worker_id, worker_key
|
|
|
|
def validate_worker_key(key):
|
|
"""Check if worker key is valid."""
|
|
if not key:
|
|
return False
|
|
# Master key allows all operations
|
|
if _master_key and key == _master_key:
|
|
return True
|
|
return key in _worker_keys
|
|
|
|
def get_worker_by_key(key):
|
|
"""Get worker info by API key."""
|
|
with _workers_lock:
|
|
for wid, info in _workers.items():
|
|
if info.get('key') == key:
|
|
return wid, info
|
|
return None, None
|
|
|
|
def update_worker_heartbeat(worker_id):
|
|
"""Update worker's last_seen timestamp."""
|
|
with _workers_lock:
|
|
if worker_id in _workers:
|
|
_workers[worker_id]['last_seen'] = time.time()
|
|
|
|
def claim_work(db, worker_id, count=100):
|
|
"""Claim a batch of proxies for testing. Returns list of proxy dicts."""
|
|
now = time.time()
|
|
now_int = int(now)
|
|
|
|
# Randomize batch size (100-250) to stagger worker completion times
|
|
# This avoids thundering herd when all workers finish and request simultaneously
|
|
target_count = random.randint(100, 250)
|
|
|
|
# Clean up stale claims and get current claimed set
|
|
with _work_claims_lock:
|
|
stale = [k for k, v in _work_claims.items() if now - v['claimed_at'] > _claim_timeout]
|
|
for k in stale:
|
|
del _work_claims[k]
|
|
# Copy current claims to exclude from query
|
|
claimed_keys = set(_work_claims.keys())
|
|
|
|
# Get proxies that need testing
|
|
# Respect cooldown: tested + checktime + (failed * perfail_checktime) < now
|
|
# Priority: untested first, then oldest due - with randomness within tiers
|
|
checktime = 1800 # 30 min base cooldown
|
|
perfail_checktime = 3600 # +1 hour per failure
|
|
max_fail = 5
|
|
|
|
try:
|
|
# Build exclusion clause for already-claimed proxies
|
|
# Use ip||':'||port to match our claim key format
|
|
if claimed_keys:
|
|
# SQLite placeholder limit is ~999, chunk if needed
|
|
placeholders = ','.join('?' for _ in claimed_keys)
|
|
exclude_clause = "AND (ip || ':' || port) NOT IN (%s)" % placeholders
|
|
exclude_params = list(claimed_keys)
|
|
else:
|
|
exclude_clause = ""
|
|
exclude_params = []
|
|
|
|
# Priority tiers: 0=untested, 1=due >1hr ago, 2=due <1hr ago
|
|
# Random within each tier for fair distribution
|
|
query = '''
|
|
SELECT ip, port, proto, failed,
|
|
CASE
|
|
WHEN tested IS NULL THEN 0
|
|
WHEN (? - (tested + ? + (failed * ?))) > 3600 THEN 1
|
|
ELSE 2
|
|
END as priority
|
|
FROM proxylist
|
|
WHERE failed >= 0 AND failed < ?
|
|
AND (tested IS NULL OR (tested + ? + (failed * ?)) < ?)
|
|
%s
|
|
ORDER BY priority, RANDOM()
|
|
LIMIT ?
|
|
''' % exclude_clause
|
|
|
|
params = [now_int, checktime, perfail_checktime,
|
|
max_fail, checktime, perfail_checktime, now_int] + exclude_params + [target_count]
|
|
rows = db.execute(query, params).fetchall()
|
|
except Exception as e:
|
|
_log('claim_work query error: %s' % e, 'error')
|
|
return []
|
|
|
|
# Claim the fetched proxies (already filtered by query)
|
|
claimed = []
|
|
with _work_claims_lock:
|
|
for row in rows:
|
|
proxy_key = '%s:%s' % (row[0], row[1])
|
|
# Double-check not claimed (race condition protection)
|
|
if proxy_key not in _work_claims:
|
|
_work_claims[proxy_key] = {'worker_id': worker_id, 'claimed_at': now}
|
|
claimed.append({
|
|
'ip': row[0],
|
|
'port': row[1],
|
|
'proto': row[2],
|
|
'failed': row[3],
|
|
})
|
|
|
|
if claimed:
|
|
_log('claim_work: %d proxies to %s (pool: %d claimed)' % (
|
|
len(claimed), worker_id[:8], len(_work_claims)), 'info')
|
|
|
|
return claimed
|
|
|
|
_last_workers_save = 0
|
|
|
|
def submit_results(db, worker_id, results):
|
|
"""Process test results from a worker. Returns count of processed results."""
|
|
global _last_workers_save
|
|
processed = 0
|
|
working_count = 0
|
|
total_latency = 0
|
|
now = time.time()
|
|
|
|
with _workers_lock:
|
|
if worker_id in _workers:
|
|
_workers[worker_id]['last_seen'] = now
|
|
|
|
for r in results:
|
|
proxy_key = '%s:%s' % (r.get('ip', ''), r.get('port', ''))
|
|
|
|
# Release claim
|
|
with _work_claims_lock:
|
|
if proxy_key in _work_claims:
|
|
del _work_claims[proxy_key]
|
|
|
|
# Update database
|
|
try:
|
|
if r.get('working'):
|
|
db.execute('''
|
|
UPDATE proxylist SET
|
|
failed = 0,
|
|
tested = ?,
|
|
latency = ?
|
|
WHERE ip = ? AND port = ?
|
|
''', (int(now), r.get('latency', 0), r['ip'], r['port']))
|
|
working_count += 1
|
|
total_latency += r.get('latency', 0)
|
|
else:
|
|
db.execute('''
|
|
UPDATE proxylist SET
|
|
failed = failed + 1,
|
|
tested = ?
|
|
WHERE ip = ? AND port = ?
|
|
''', (int(now), r['ip'], r['port']))
|
|
processed += 1
|
|
except Exception:
|
|
pass
|
|
|
|
# Update worker stats
|
|
with _workers_lock:
|
|
if worker_id in _workers:
|
|
w = _workers[worker_id]
|
|
w['jobs_completed'] += 1
|
|
w['proxies_tested'] += processed
|
|
w['proxies_working'] = w.get('proxies_working', 0) + working_count
|
|
w['proxies_failed'] = w.get('proxies_failed', 0) + (processed - working_count)
|
|
w['total_latency'] = w.get('total_latency', 0) + total_latency
|
|
w['last_batch_size'] = len(results)
|
|
w['last_batch_working'] = working_count
|
|
|
|
# Save workers periodically (every 60s)
|
|
if now - _last_workers_save > 60:
|
|
save_workers()
|
|
_last_workers_save = now
|
|
|
|
return processed
|
|
|
|
|
|
def is_localhost(ip):
|
|
"""Check if IP is localhost (127.0.0.0/8 or ::1)."""
|
|
if not ip:
|
|
return False
|
|
# IPv6 localhost
|
|
if ip == '::1':
|
|
return True
|
|
# IPv4 localhost (127.0.0.0/8)
|
|
if ip.startswith('127.'):
|
|
return True
|
|
return False
|
|
|
|
|
|
def check_rate_limit(ip):
|
|
"""Check if IP is within rate limit. Returns True if allowed."""
|
|
if is_localhost(ip):
|
|
return True # No rate limit for localhost
|
|
now = time.time()
|
|
with _rate_lock:
|
|
# Clean old entries
|
|
_rate_limits[ip] = [t for t in _rate_limits[ip] if now - t < _rate_limit_window]
|
|
if len(_rate_limits[ip]) >= _rate_limit_requests:
|
|
return False
|
|
_rate_limits[ip].append(now)
|
|
return True
|
|
|
|
|
|
def get_security_headers(content_type):
|
|
"""Return security headers for responses."""
|
|
headers = [
|
|
('X-Content-Type-Options', 'nosniff'),
|
|
('X-Frame-Options', 'DENY'),
|
|
('Referrer-Policy', 'strict-origin-when-cross-origin'),
|
|
]
|
|
# Add CSP for HTML pages
|
|
if 'text/html' in content_type:
|
|
headers.append((
|
|
'Content-Security-Policy',
|
|
"default-src 'self'; "
|
|
"script-src 'self' 'unsafe-inline'; "
|
|
"style-src 'self' 'unsafe-inline'; "
|
|
"img-src 'self' data: https:; "
|
|
"connect-src 'self'"
|
|
))
|
|
return headers
|
|
|
|
|
|
def get_system_stats():
|
|
"""Collect system resource statistics."""
|
|
stats = {}
|
|
|
|
# Load average (1, 5, 15 min)
|
|
try:
|
|
load = os.getloadavg()
|
|
stats['load_1m'] = round(load[0], 2)
|
|
stats['load_5m'] = round(load[1], 2)
|
|
stats['load_15m'] = round(load[2], 2)
|
|
except (OSError, AttributeError):
|
|
stats['load_1m'] = stats['load_5m'] = stats['load_15m'] = 0
|
|
|
|
# CPU count
|
|
try:
|
|
stats['cpu_count'] = os.sysconf('SC_NPROCESSORS_ONLN')
|
|
except (ValueError, OSError, AttributeError):
|
|
stats['cpu_count'] = 1
|
|
|
|
# Memory from /proc/meminfo (Linux)
|
|
try:
|
|
with open('/proc/meminfo', 'r') as f:
|
|
meminfo = {}
|
|
for line in f:
|
|
parts = line.split()
|
|
if len(parts) >= 2:
|
|
meminfo[parts[0].rstrip(':')] = int(parts[1]) * 1024 # KB to bytes
|
|
total = meminfo.get('MemTotal', 0)
|
|
available = meminfo.get('MemAvailable', meminfo.get('MemFree', 0))
|
|
stats['mem_total'] = total
|
|
stats['mem_available'] = available
|
|
stats['mem_used'] = total - available
|
|
stats['mem_pct'] = round((total - available) / total * 100, 1) if total > 0 else 0
|
|
except (IOError, KeyError, ZeroDivisionError):
|
|
stats['mem_total'] = stats['mem_available'] = stats['mem_used'] = 0
|
|
stats['mem_pct'] = 0
|
|
|
|
# Disk usage for data directory
|
|
try:
|
|
st = os.statvfs('data' if os.path.exists('data') else '.')
|
|
total = st.f_blocks * st.f_frsize
|
|
free = st.f_bavail * st.f_frsize
|
|
used = total - free
|
|
stats['disk_total'] = total
|
|
stats['disk_free'] = free
|
|
stats['disk_used'] = used
|
|
stats['disk_pct'] = round(used / total * 100, 1) if total > 0 else 0
|
|
except (OSError, ZeroDivisionError):
|
|
stats['disk_total'] = stats['disk_free'] = stats['disk_used'] = 0
|
|
stats['disk_pct'] = 0
|
|
|
|
# Process stats from /proc/self/status
|
|
try:
|
|
with open('/proc/self/status', 'r') as f:
|
|
for line in f:
|
|
if line.startswith('VmRSS:'):
|
|
stats['proc_rss'] = int(line.split()[1]) * 1024 # KB to bytes
|
|
elif line.startswith('Threads:'):
|
|
stats['proc_threads'] = int(line.split()[1])
|
|
except (IOError, ValueError, IndexError):
|
|
stats['proc_rss'] = 0
|
|
stats['proc_threads'] = 0
|
|
|
|
# Memory leak detection
|
|
global _memory_samples, _peak_rss, _start_rss
|
|
rss = stats.get('proc_rss', 0)
|
|
if rss > 0:
|
|
if _start_rss == 0:
|
|
_start_rss = rss
|
|
if rss > _peak_rss:
|
|
_peak_rss = rss
|
|
_memory_samples.append((time.time(), rss))
|
|
if len(_memory_samples) > _memory_sample_max:
|
|
_memory_samples.pop(0)
|
|
|
|
stats['proc_rss_peak'] = _peak_rss
|
|
stats['proc_rss_start'] = _start_rss
|
|
stats['proc_rss_growth'] = rss - _start_rss if _start_rss > 0 else 0
|
|
|
|
# GC stats for leak detection
|
|
try:
|
|
gc_counts = gc.get_count()
|
|
stats['gc_count_gen0'] = gc_counts[0]
|
|
stats['gc_count_gen1'] = gc_counts[1]
|
|
stats['gc_count_gen2'] = gc_counts[2]
|
|
# Cache gc.get_objects() - expensive call (~23ms)
|
|
global _gc_objects_cache
|
|
now = time.time()
|
|
if now - _gc_objects_cache['time'] > _gc_objects_ttl:
|
|
_gc_objects_cache['value'] = len(gc.get_objects())
|
|
_gc_objects_cache['time'] = now
|
|
stats['gc_objects'] = _gc_objects_cache['value']
|
|
except Exception:
|
|
stats['gc_count_gen0'] = stats['gc_count_gen1'] = stats['gc_count_gen2'] = 0
|
|
stats['gc_objects'] = 0
|
|
|
|
return stats
|
|
|
|
|
|
def get_db_health(db):
|
|
"""Get database health and statistics (cached)."""
|
|
global _db_health_cache
|
|
now = time.time()
|
|
if now - _db_health_cache['time'] < _db_health_ttl:
|
|
return _db_health_cache['value']
|
|
|
|
stats = {}
|
|
try:
|
|
# Database file size
|
|
db_path = db.path if hasattr(db, 'path') else 'data/proxies.sqlite'
|
|
if os.path.exists(db_path):
|
|
stats['db_size'] = os.path.getsize(db_path)
|
|
else:
|
|
stats['db_size'] = 0
|
|
|
|
# Page stats from pragma
|
|
row = db.execute('PRAGMA page_count').fetchone()
|
|
stats['page_count'] = row[0] if row else 0
|
|
row = db.execute('PRAGMA page_size').fetchone()
|
|
stats['page_size'] = row[0] if row else 0
|
|
row = db.execute('PRAGMA freelist_count').fetchone()
|
|
stats['freelist_count'] = row[0] if row else 0
|
|
|
|
# Anonymity breakdown
|
|
rows = db.execute(
|
|
'SELECT anonymity, COUNT(*) FROM proxylist WHERE failed=0 GROUP BY anonymity'
|
|
).fetchall()
|
|
stats['anonymity'] = {r[0] or 'unknown': r[1] for r in rows}
|
|
|
|
# Latency stats
|
|
row = db.execute(
|
|
'SELECT AVG(avg_latency), MIN(avg_latency), MAX(avg_latency) '
|
|
'FROM proxylist WHERE failed=0 AND avg_latency > 0'
|
|
).fetchone()
|
|
if row and row[0]:
|
|
stats['db_avg_latency'] = round(row[0], 1)
|
|
stats['db_min_latency'] = round(row[1], 1)
|
|
stats['db_max_latency'] = round(row[2], 1)
|
|
else:
|
|
stats['db_avg_latency'] = stats['db_min_latency'] = stats['db_max_latency'] = 0
|
|
|
|
# Recent activity
|
|
now = int(time.time())
|
|
row = db.execute(
|
|
'SELECT COUNT(*) FROM proxylist WHERE tested > ?', (now - 3600,)
|
|
).fetchone()
|
|
stats['tested_last_hour'] = row[0] if row else 0
|
|
|
|
row = db.execute(
|
|
'SELECT COUNT(*) FROM proxylist WHERE added > ?', (now - 86400,)
|
|
).fetchone()
|
|
stats['added_last_day'] = row[0] if row else 0
|
|
|
|
# Dead proxies count (permanently dead = -1, failing = positive)
|
|
row = db.execute(
|
|
'SELECT COUNT(*) FROM proxylist WHERE failed = -1'
|
|
).fetchone()
|
|
stats['dead_count'] = row[0] if row else 0
|
|
|
|
# Failing proxies count (positive fail count but not permanently dead)
|
|
row = db.execute(
|
|
'SELECT COUNT(*) FROM proxylist WHERE failed > 0'
|
|
).fetchone()
|
|
stats['failing_count'] = row[0] if row else 0
|
|
|
|
except Exception:
|
|
pass
|
|
|
|
# Update cache
|
|
_db_health_cache['value'] = stats
|
|
_db_health_cache['time'] = time.time()
|
|
return stats
|
|
|
|
# Detect if gevent has monkey-patched the environment
|
|
try:
|
|
from gevent import monkey
|
|
GEVENT_PATCHED = monkey.is_module_patched('socket')
|
|
except ImportError:
|
|
GEVENT_PATCHED = False
|
|
|
|
if GEVENT_PATCHED:
|
|
from gevent.pywsgi import WSGIServer
|
|
|
|
|
|
def load_static_libs():
|
|
"""Load static library files into cache at startup."""
|
|
global _LIB_CACHE
|
|
if not os.path.isdir(_STATIC_LIB_DIR):
|
|
_log('static/lib directory not found: %s' % _STATIC_LIB_DIR, 'warn')
|
|
return
|
|
for fname in os.listdir(_STATIC_LIB_DIR):
|
|
fpath = os.path.join(_STATIC_LIB_DIR, fname)
|
|
if os.path.isfile(fpath):
|
|
try:
|
|
with open(fpath, 'rb') as f:
|
|
_LIB_CACHE[fname] = f.read()
|
|
_log('loaded static lib: %s (%d bytes)' % (fname, len(_LIB_CACHE[fname])), 'debug')
|
|
except IOError as e:
|
|
_log('failed to load %s: %s' % (fname, e), 'warn')
|
|
_log('loaded %d static library files' % len(_LIB_CACHE), 'info')
|
|
|
|
|
|
def get_static_lib(filename):
|
|
"""Get a cached static library file."""
|
|
return _LIB_CACHE.get(filename)
|
|
|
|
|
|
def load_static_files(theme):
|
|
"""Load dashboard static files into cache at startup.
|
|
|
|
Args:
|
|
theme: dict of color name -> color value for CSS variable substitution
|
|
"""
|
|
global _STATIC_CACHE
|
|
files = {
|
|
'dashboard.html': 'static/dashboard.html',
|
|
'map.html': 'static/map.html',
|
|
'mitm.html': 'static/mitm.html',
|
|
'workers.html': 'static/workers.html',
|
|
'style.css': 'static/style.css',
|
|
'dashboard.js': 'static/dashboard.js',
|
|
'map.js': 'static/map.js',
|
|
'mitm.js': 'static/mitm.js',
|
|
}
|
|
for key, relpath in files.items():
|
|
fpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), relpath)
|
|
if os.path.isfile(fpath):
|
|
try:
|
|
with open(fpath, 'rb') as f:
|
|
content = f.read()
|
|
# Apply theme substitution to CSS
|
|
if key == 'style.css' and theme:
|
|
for name, val in theme.items():
|
|
content = content.replace('{' + name + '}', val)
|
|
_STATIC_CACHE[key] = content
|
|
_log('loaded static file: %s (%d bytes)' % (key, len(content)), 'debug')
|
|
except IOError as e:
|
|
_log('failed to load %s: %s' % (fpath, e), 'warn')
|
|
else:
|
|
_log('static file not found: %s' % fpath, 'warn')
|
|
_log('loaded %d dashboard static files' % len(_STATIC_CACHE), 'info')
|
|
|
|
|
|
def get_static_file(filename):
|
|
"""Get a cached dashboard static file."""
|
|
return _STATIC_CACHE.get(filename)
|
|
|
|
|
|
# Theme colors - dark tiles on lighter background
|
|
THEME = {
|
|
'bg': '#1e2738',
|
|
'card': '#181f2a',
|
|
'card_alt': '#212a36',
|
|
'border': '#3a4858',
|
|
'text': '#e8eef5',
|
|
'dim': '#8b929b',
|
|
'green': '#3fb950',
|
|
'red': '#f85149',
|
|
'yellow': '#d29922',
|
|
'blue': '#58a6ff',
|
|
'purple': '#a371f7',
|
|
'cyan': '#39c5cf',
|
|
'orange': '#db6d28',
|
|
'pink': '#db61a2',
|
|
'map_bg': '#1e2738', # Match dashboard background
|
|
}
|
|
|
|
|
|
class ProxyAPIHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
|
"""HTTP request handler for proxy API."""
|
|
|
|
database = None
|
|
stats_provider = None
|
|
|
|
def log_message(self, format, *args):
|
|
pass
|
|
|
|
def send_response_body(self, body, content_type, status=200):
|
|
self.send_response(status)
|
|
self.send_header('Content-Type', content_type)
|
|
self.send_header('Content-Length', len(body))
|
|
self.send_header('Cache-Control', 'no-cache')
|
|
# Add security headers
|
|
for header, value in get_security_headers(content_type):
|
|
self.send_header(header, value)
|
|
self.end_headers()
|
|
self.wfile.write(body)
|
|
|
|
def send_json(self, data, status=200):
|
|
self.send_response_body(json.dumps(data, indent=2), 'application/json', status)
|
|
|
|
def send_text(self, text, status=200):
|
|
self.send_response_body(text, 'text/plain', status)
|
|
|
|
def send_html(self, html, status=200):
|
|
self.send_response_body(html, 'text/html; charset=utf-8', status)
|
|
|
|
def send_css(self, css, status=200):
|
|
self.send_response_body(css, 'text/css; charset=utf-8', status)
|
|
|
|
def send_js(self, js, status=200):
|
|
self.send_response_body(js, 'application/javascript; charset=utf-8', status)
|
|
|
|
def do_GET(self):
|
|
# Rate limiting check
|
|
client_ip = self.client_address[0] if self.client_address else ''
|
|
if not check_rate_limit(client_ip):
|
|
self.send_response(429)
|
|
self.send_header('Content-Type', 'application/json')
|
|
self.send_header('Retry-After', str(_rate_limit_window))
|
|
for header, value in get_security_headers('application/json'):
|
|
self.send_header(header, value)
|
|
self.end_headers()
|
|
self.wfile.write(json.dumps({'error': 'rate limited', 'retry_after': _rate_limit_window}))
|
|
return
|
|
|
|
path = self.path.split('?')[0]
|
|
routes = {
|
|
'/': self.handle_index,
|
|
'/dashboard': self.handle_dashboard,
|
|
'/map': self.handle_map,
|
|
'/static/style.css': self.handle_css,
|
|
'/static/dashboard.js': self.handle_js,
|
|
'/api/stats': self.handle_stats,
|
|
'/api/countries': self.handle_countries,
|
|
'/proxies': self.handle_proxies,
|
|
'/proxies/count': self.handle_count,
|
|
'/health': self.handle_health,
|
|
}
|
|
handler = routes.get(path)
|
|
if handler:
|
|
handler()
|
|
else:
|
|
self.send_json({'error': 'not found'}, 404)
|
|
|
|
def handle_index(self):
|
|
self.send_json({
|
|
'endpoints': {
|
|
'/dashboard': 'web dashboard (HTML)',
|
|
'/api/stats': 'runtime statistics (JSON)',
|
|
'/proxies': 'list working proxies (params: limit, proto, country, asn)',
|
|
'/proxies/count': 'count working proxies',
|
|
'/health': 'health check',
|
|
}
|
|
})
|
|
|
|
def handle_dashboard(self):
|
|
self.send_html(DASHBOARD_HTML)
|
|
|
|
def handle_map(self):
|
|
self.send_html(MAP_HTML)
|
|
|
|
def handle_countries(self):
|
|
"""Return all countries with proxy counts."""
|
|
try:
|
|
db = mysqlite.mysqlite(self.database, str)
|
|
rows = db.execute(
|
|
'SELECT country, COUNT(*) as c FROM proxylist WHERE failed=0 AND country IS NOT NULL '
|
|
'GROUP BY country ORDER BY c DESC'
|
|
).fetchall()
|
|
countries = {r[0]: r[1] for r in rows}
|
|
self.send_json({'countries': countries})
|
|
except Exception as e:
|
|
self.send_json({'error': str(e)}, 500)
|
|
|
|
def handle_css(self):
|
|
self.send_css(DASHBOARD_CSS)
|
|
|
|
def handle_js(self):
|
|
self.send_js(DASHBOARD_JS)
|
|
|
|
def get_db_stats(self):
|
|
"""Get statistics from database."""
|
|
try:
|
|
db = mysqlite.mysqlite(self.database, str)
|
|
stats = {}
|
|
|
|
# Total counts
|
|
row = db.execute('SELECT COUNT(*) FROM proxylist WHERE failed=0').fetchone()
|
|
stats['working'] = row[0] if row else 0
|
|
row = db.execute('SELECT COUNT(*) FROM proxylist').fetchone()
|
|
stats['total'] = row[0] if row else 0
|
|
|
|
# By protocol
|
|
rows = db.execute(
|
|
'SELECT proto, COUNT(*) FROM proxylist WHERE failed=0 GROUP BY proto'
|
|
).fetchall()
|
|
stats['by_proto'] = {r[0] or 'unknown': r[1] for r in rows}
|
|
|
|
# Top countries
|
|
rows = db.execute(
|
|
'SELECT country, COUNT(*) as c FROM proxylist WHERE failed=0 AND country IS NOT NULL '
|
|
'GROUP BY country ORDER BY c DESC LIMIT 10'
|
|
).fetchall()
|
|
stats['top_countries'] = [{'code': r[0], 'count': r[1]} for r in rows]
|
|
|
|
# Top ASNs
|
|
rows = db.execute(
|
|
'SELECT asn, COUNT(*) as c FROM proxylist WHERE failed=0 AND asn IS NOT NULL '
|
|
'GROUP BY asn ORDER BY c DESC LIMIT 10'
|
|
).fetchall()
|
|
stats['top_asns'] = [(r[0], r[1]) for r in rows]
|
|
|
|
return stats
|
|
except Exception as e:
|
|
return {'error': str(e)}
|
|
|
|
def handle_stats(self):
|
|
stats = {}
|
|
|
|
# Runtime stats from provider
|
|
if self.stats_provider:
|
|
try:
|
|
stats.update(self.stats_provider())
|
|
except Exception as e:
|
|
_log('stats_provider error: %s' % str(e), 'error')
|
|
|
|
# Add system stats
|
|
stats['system'] = get_system_stats()
|
|
|
|
# Add database stats
|
|
try:
|
|
db = mysqlite.mysqlite(self.database, str)
|
|
stats['db'] = self.get_db_stats()
|
|
stats['db_health'] = get_db_health(db)
|
|
except Exception:
|
|
pass
|
|
|
|
self.send_json(stats)
|
|
|
|
def handle_proxies(self):
|
|
params = {}
|
|
if '?' in self.path:
|
|
for pair in self.path.split('?')[1].split('&'):
|
|
if '=' in pair:
|
|
k, v = pair.split('=', 1)
|
|
params[k] = v
|
|
|
|
limit = min(int(params.get('limit', 100)), 1000)
|
|
proto = params.get('proto', '')
|
|
country = params.get('country', '')
|
|
asn = params.get('asn', '')
|
|
fmt = params.get('format', 'json')
|
|
|
|
sql = 'SELECT ip, port, proto, country, asn, avg_latency FROM proxylist WHERE failed=0'
|
|
args = []
|
|
|
|
if proto:
|
|
sql += ' AND proto=?'
|
|
args.append(proto)
|
|
if country:
|
|
sql += ' AND country=?'
|
|
args.append(country.upper())
|
|
if asn:
|
|
sql += ' AND asn=?'
|
|
args.append(int(asn))
|
|
|
|
sql += ' ORDER BY avg_latency ASC, tested DESC LIMIT ?'
|
|
args.append(limit)
|
|
|
|
try:
|
|
db = mysqlite.mysqlite(self.database, str)
|
|
rows = db.execute(sql, args).fetchall()
|
|
|
|
if fmt == 'plain':
|
|
self.send_text('\n'.join('%s:%s' % (r[0], r[1]) for r in rows))
|
|
else:
|
|
proxies = [{
|
|
'ip': r[0], 'port': r[1], 'proto': r[2],
|
|
'country': r[3], 'asn': r[4], 'latency': r[5]
|
|
} for r in rows]
|
|
self.send_json({'count': len(proxies), 'proxies': proxies})
|
|
except Exception as e:
|
|
self.send_json({'error': str(e)}, 500)
|
|
|
|
def handle_count(self):
|
|
try:
|
|
db = mysqlite.mysqlite(self.database, str)
|
|
row = db.execute('SELECT COUNT(*) FROM proxylist WHERE failed=0').fetchone()
|
|
self.send_json({'count': row[0] if row else 0})
|
|
except Exception as e:
|
|
self.send_json({'error': str(e)}, 500)
|
|
|
|
def handle_health(self):
|
|
self.send_json({'status': 'ok', 'version': __version__, 'timestamp': int(time.time())})
|
|
|
|
|
|
class ProxyAPIServer(threading.Thread):
|
|
"""Threaded HTTP API server.
|
|
|
|
Uses gevent's WSGIServer when running in a gevent-patched environment,
|
|
otherwise falls back to standard BaseHTTPServer.
|
|
"""
|
|
|
|
def __init__(self, host, port, database, stats_provider=None, profiling=False):
|
|
threading.Thread.__init__(self)
|
|
self.host = host
|
|
self.port = port
|
|
self.database = database
|
|
self.stats_provider = stats_provider
|
|
self.profiling = profiling
|
|
self.daemon = True
|
|
self.server = None
|
|
self._stop_event = threading.Event() if not GEVENT_PATCHED else None
|
|
# Load static library files into cache
|
|
load_static_libs()
|
|
# Load dashboard static files (HTML, CSS, JS) with theme substitution
|
|
load_static_files(THEME)
|
|
# Load worker registry from disk
|
|
load_workers()
|
|
|
|
def _wsgi_app(self, environ, start_response):
|
|
"""WSGI application wrapper for gevent."""
|
|
path = environ.get('PATH_INFO', '/').split('?')[0]
|
|
query_string = environ.get('QUERY_STRING', '')
|
|
method = environ.get('REQUEST_METHOD', 'GET')
|
|
remote_addr = environ.get('REMOTE_ADDR', '')
|
|
|
|
# Parse query parameters
|
|
query_params = {}
|
|
if query_string:
|
|
for param in query_string.split('&'):
|
|
if '=' in param:
|
|
k, v = param.split('=', 1)
|
|
query_params[k] = v
|
|
|
|
# Only allow GET and POST
|
|
if method not in ('GET', 'POST'):
|
|
start_response('405 Method Not Allowed', [('Content-Type', 'text/plain')])
|
|
return [b'Method not allowed']
|
|
|
|
# POST only allowed for worker API endpoints
|
|
post_endpoints = ('/api/register', '/api/results')
|
|
if method == 'POST' and path not in post_endpoints:
|
|
start_response('405 Method Not Allowed', [('Content-Type', 'text/plain')])
|
|
return [b'POST not allowed for this endpoint']
|
|
|
|
# Parse POST body
|
|
post_data = None
|
|
if method == 'POST':
|
|
try:
|
|
content_length = int(environ.get('CONTENT_LENGTH', 0))
|
|
if content_length > 0:
|
|
body = environ['wsgi.input'].read(content_length)
|
|
post_data = json.loads(body)
|
|
except Exception:
|
|
error_body = json.dumps({'error': 'invalid JSON body'})
|
|
headers = [('Content-Type', 'application/json')]
|
|
start_response('400 Bad Request', headers)
|
|
return [error_body.encode('utf-8')]
|
|
|
|
# Rate limiting check
|
|
if not check_rate_limit(remote_addr):
|
|
error_body = json.dumps({'error': 'rate limited', 'retry_after': _rate_limit_window})
|
|
headers = [
|
|
('Content-Type', 'application/json'),
|
|
('Content-Length', str(len(error_body))),
|
|
('Retry-After', str(_rate_limit_window)),
|
|
]
|
|
headers.extend(get_security_headers('application/json'))
|
|
start_response('429 Too Many Requests', headers)
|
|
return [error_body]
|
|
|
|
# Route handling
|
|
try:
|
|
response_body, content_type, status = self._handle_route(path, remote_addr, query_params, post_data)
|
|
status_line = '%d %s' % (status, 'OK' if status == 200 else 'Error')
|
|
headers = [
|
|
('Content-Type', content_type),
|
|
('Content-Length', str(len(response_body))),
|
|
('Cache-Control', 'no-cache'),
|
|
]
|
|
headers.extend(get_security_headers(content_type))
|
|
start_response(status_line, headers)
|
|
return [response_body.encode('utf-8') if isinstance(response_body, unicode) else response_body]
|
|
except Exception as e:
|
|
error_body = json.dumps({'error': str(e)})
|
|
headers = [
|
|
('Content-Type', 'application/json'),
|
|
('Content-Length', str(len(error_body))),
|
|
]
|
|
headers.extend(get_security_headers('application/json'))
|
|
start_response('500 Internal Server Error', headers)
|
|
return [error_body]
|
|
|
|
def _handle_route(self, path, remote_addr='', query_params=None, post_data=None):
|
|
"""Handle route and return (body, content_type, status)."""
|
|
if query_params is None:
|
|
query_params = {}
|
|
if path == '/':
|
|
body = json.dumps({
|
|
'endpoints': {
|
|
'/dashboard': 'web dashboard (HTML)',
|
|
'/map': 'proxy distribution by country (HTML)',
|
|
'/mitm': 'MITM certificate search (HTML)',
|
|
'/api/stats': 'runtime statistics (JSON)',
|
|
'/api/mitm': 'MITM certificate statistics (JSON)',
|
|
'/api/countries': 'proxy counts by country (JSON)',
|
|
'/api/work': 'get work batch for worker (params: key, count)',
|
|
'/api/results': 'submit test results (POST, params: key)',
|
|
'/api/register': 'register as worker (POST)',
|
|
'/api/workers': 'list connected workers',
|
|
'/proxies': 'list working proxies (params: limit, proto, country, asn)',
|
|
'/proxies/count': 'count working proxies',
|
|
'/health': 'health check',
|
|
}
|
|
}, indent=2)
|
|
return body, 'application/json', 200
|
|
elif path == '/dashboard':
|
|
content = get_static_file('dashboard.html')
|
|
if content:
|
|
return content, 'text/html; charset=utf-8', 200
|
|
return '{"error": "dashboard.html not loaded"}', 'application/json', 500
|
|
elif path == '/map':
|
|
content = get_static_file('map.html')
|
|
if content:
|
|
return content, 'text/html; charset=utf-8', 200
|
|
return '{"error": "map.html not loaded"}', 'application/json', 500
|
|
elif path == '/mitm':
|
|
content = get_static_file('mitm.html')
|
|
if content:
|
|
return content, 'text/html; charset=utf-8', 200
|
|
return '{"error": "mitm.html not loaded"}', 'application/json', 500
|
|
elif path == '/workers':
|
|
content = get_static_file('workers.html')
|
|
if content:
|
|
return content, 'text/html; charset=utf-8', 200
|
|
return '{"error": "workers.html not loaded"}', 'application/json', 500
|
|
elif path == '/static/style.css':
|
|
content = get_static_file('style.css')
|
|
if content:
|
|
return content, 'text/css; charset=utf-8', 200
|
|
return '{"error": "style.css not loaded"}', 'application/json', 500
|
|
elif path == '/static/dashboard.js':
|
|
content = get_static_file('dashboard.js')
|
|
if content:
|
|
return content, 'application/javascript; charset=utf-8', 200
|
|
return '{"error": "dashboard.js not loaded"}', 'application/json', 500
|
|
elif path == '/static/map.js':
|
|
content = get_static_file('map.js')
|
|
if content:
|
|
return content, 'application/javascript; charset=utf-8', 200
|
|
return '{"error": "map.js not loaded"}', 'application/json', 500
|
|
elif path == '/static/mitm.js':
|
|
content = get_static_file('mitm.js')
|
|
if content:
|
|
return content, 'application/javascript; charset=utf-8', 200
|
|
return '{"error": "mitm.js not loaded"}', 'application/json', 500
|
|
elif path.startswith('/static/lib/'):
|
|
# Serve static library files from cache
|
|
filename = path.split('/')[-1]
|
|
content = get_static_lib(filename)
|
|
if content:
|
|
ext = os.path.splitext(filename)[1]
|
|
content_type = _CONTENT_TYPES.get(ext, 'application/octet-stream')
|
|
return content, content_type, 200
|
|
return '{"error": "not found"}', 'application/json', 404
|
|
elif path == '/api/stats':
|
|
stats = {}
|
|
if self.stats_provider:
|
|
stats = self.stats_provider()
|
|
# Add system stats
|
|
stats['system'] = get_system_stats()
|
|
# Add database stats
|
|
try:
|
|
db = mysqlite.mysqlite(self.database, str)
|
|
stats['db'] = self._get_db_stats(db)
|
|
stats['db_health'] = get_db_health(db)
|
|
except Exception:
|
|
pass
|
|
# Add profiling flag (from constructor or stats_provider)
|
|
if 'profiling' not in stats:
|
|
stats['profiling'] = self.profiling
|
|
return json.dumps(stats, indent=2), 'application/json', 200
|
|
elif path == '/api/mitm':
|
|
# MITM certificate statistics
|
|
if self.stats_provider:
|
|
try:
|
|
stats = self.stats_provider()
|
|
mitm = stats.get('mitm', {})
|
|
return json.dumps(mitm, indent=2), 'application/json', 200
|
|
except Exception as e:
|
|
return json.dumps({'error': str(e)}), 'application/json', 500
|
|
return json.dumps({'error': 'stats not available'}), 'application/json', 500
|
|
elif path == '/api/countries':
|
|
try:
|
|
db = mysqlite.mysqlite(self.database, str)
|
|
rows = db.execute(
|
|
'SELECT country, COUNT(*) as c FROM proxylist WHERE failed=0 AND country IS NOT NULL '
|
|
'GROUP BY country ORDER BY c DESC'
|
|
).fetchall()
|
|
countries = {r[0]: r[1] for r in rows}
|
|
return json.dumps({'countries': countries}, indent=2), 'application/json', 200
|
|
except Exception as e:
|
|
return json.dumps({'error': str(e)}), 'application/json', 500
|
|
elif path == '/api/locations':
|
|
# Return proxy locations aggregated by lat/lon grid (0.5 degree cells)
|
|
try:
|
|
db = mysqlite.mysqlite(self.database, str)
|
|
rows = db.execute(
|
|
'SELECT ROUND(latitude, 1) as lat, ROUND(longitude, 1) as lon, '
|
|
'country, anonymity, COUNT(*) as c FROM proxylist '
|
|
'WHERE failed=0 AND latitude IS NOT NULL AND longitude IS NOT NULL '
|
|
'GROUP BY lat, lon, country, anonymity ORDER BY c DESC'
|
|
).fetchall()
|
|
locations = [{'lat': r[0], 'lon': r[1], 'country': r[2], 'anon': r[3] or 'unknown', 'count': r[4]} for r in rows]
|
|
return json.dumps({'locations': locations}, indent=2), 'application/json', 200
|
|
except Exception as e:
|
|
return json.dumps({'error': str(e)}), 'application/json', 500
|
|
elif path == '/proxies':
|
|
try:
|
|
db = mysqlite.mysqlite(self.database, str)
|
|
rows = db.execute(
|
|
'SELECT proxy, proto, country, asn FROM proxylist WHERE failed=0 LIMIT 100'
|
|
).fetchall()
|
|
proxies = [{'proxy': r[0], 'proto': r[1], 'country': r[2], 'asn': r[3]} for r in rows]
|
|
return json.dumps({'proxies': proxies}, indent=2), 'application/json', 200
|
|
except Exception as e:
|
|
return json.dumps({'error': str(e)}), 'application/json', 500
|
|
elif path == '/proxies/count':
|
|
try:
|
|
db = mysqlite.mysqlite(self.database, str)
|
|
row = db.execute('SELECT COUNT(*) FROM proxylist WHERE failed=0').fetchone()
|
|
return json.dumps({'count': row[0] if row else 0}), 'application/json', 200
|
|
except Exception as e:
|
|
return json.dumps({'error': str(e)}), 'application/json', 500
|
|
elif path == '/api/memory':
|
|
# Memory profiling endpoint (localhost only)
|
|
if not is_localhost(remote_addr):
|
|
return json.dumps({'error': 'not available'}), 'application/json', 404
|
|
try:
|
|
mem = {}
|
|
|
|
# Process memory from /proc/self/status
|
|
try:
|
|
with open('/proc/self/status', 'r') as f:
|
|
for line in f:
|
|
if line.startswith('Vm'):
|
|
parts = line.split()
|
|
key = parts[0].rstrip(':')
|
|
mem[key] = int(parts[1]) * 1024 # Convert to bytes
|
|
except IOError:
|
|
pass
|
|
|
|
# GC stats
|
|
gc_stats = {
|
|
'collections': gc.get_count(),
|
|
'threshold': gc.get_threshold(),
|
|
'objects': len(gc.get_objects()),
|
|
}
|
|
|
|
# Object type counts (top 20)
|
|
type_counts = {}
|
|
for obj in gc.get_objects():
|
|
t = type(obj).__name__
|
|
type_counts[t] = type_counts.get(t, 0) + 1
|
|
top_types = sorted(type_counts.items(), key=lambda x: -x[1])[:20]
|
|
|
|
# Memory samples history
|
|
samples = []
|
|
for ts, rss in _memory_samples[-30:]:
|
|
samples.append({'time': int(ts), 'rss': rss})
|
|
|
|
result = {
|
|
'process': mem,
|
|
'gc': gc_stats,
|
|
'top_types': [{'type': t, 'count': c} for t, c in top_types],
|
|
'samples': samples,
|
|
'peak_rss': _peak_rss,
|
|
'start_rss': _start_rss,
|
|
'has_objgraph': _has_objgraph,
|
|
'has_pympler': _has_pympler,
|
|
}
|
|
|
|
# Objgraph most common types (if available)
|
|
if _has_objgraph:
|
|
try:
|
|
result['objgraph_common'] = objgraph.most_common_types(limit=15)
|
|
except Exception:
|
|
pass
|
|
|
|
# Pympler summary (if available)
|
|
if _has_pympler:
|
|
try:
|
|
all_objects = muppy.get_objects()
|
|
sum_table = summary.summarize(all_objects)
|
|
result['pympler_summary'] = [
|
|
{'type': row[0], 'count': row[1], 'size': row[2]}
|
|
for row in sum_table[:20]
|
|
]
|
|
except Exception as e:
|
|
result['pympler_error'] = str(e)
|
|
|
|
return json.dumps(result, indent=2), 'application/json', 200
|
|
except Exception as e:
|
|
return json.dumps({'error': str(e)}), 'application/json', 500
|
|
elif path == '/health':
|
|
return json.dumps({'status': 'ok', 'version': __version__, 'timestamp': int(time.time())}), 'application/json', 200
|
|
|
|
# Worker API endpoints
|
|
elif path == '/api/register':
|
|
# Register a new worker (POST)
|
|
if not post_data:
|
|
return json.dumps({'error': 'POST body required'}), 'application/json', 400
|
|
name = post_data.get('name', 'worker-%s' % remote_addr)
|
|
master_key = post_data.get('master_key', '')
|
|
# Require master key for registration (if set)
|
|
if _master_key and master_key != _master_key:
|
|
return json.dumps({'error': 'invalid master key'}), 'application/json', 403
|
|
worker_id, worker_key = register_worker(name, remote_addr)
|
|
_log('worker registered: %s (%s) from %s' % (name, worker_id, remote_addr), 'info')
|
|
return json.dumps({
|
|
'worker_id': worker_id,
|
|
'worker_key': worker_key,
|
|
'message': 'registered successfully',
|
|
}), 'application/json', 200
|
|
|
|
elif path == '/api/work':
|
|
# Get batch of proxies to test (GET)
|
|
key = query_params.get('key', '')
|
|
if not validate_worker_key(key):
|
|
return json.dumps({'error': 'invalid worker key'}), 'application/json', 403
|
|
worker_id, _ = get_worker_by_key(key)
|
|
if not worker_id:
|
|
return json.dumps({'error': 'worker not found'}), 'application/json', 404
|
|
count = int(query_params.get('count', 100))
|
|
count = min(count, 500) # Cap at 500
|
|
try:
|
|
db = mysqlite.mysqlite(self.database, str)
|
|
proxies = claim_work(db, worker_id, count)
|
|
update_worker_heartbeat(worker_id)
|
|
return json.dumps({
|
|
'worker_id': worker_id,
|
|
'count': len(proxies),
|
|
'proxies': proxies,
|
|
}), 'application/json', 200
|
|
except Exception as e:
|
|
return json.dumps({'error': str(e)}), 'application/json', 500
|
|
|
|
elif path == '/api/results':
|
|
# Submit test results (POST)
|
|
key = query_params.get('key', '')
|
|
if not validate_worker_key(key):
|
|
return json.dumps({'error': 'invalid worker key'}), 'application/json', 403
|
|
worker_id, _ = get_worker_by_key(key)
|
|
if not worker_id:
|
|
return json.dumps({'error': 'worker not found'}), 'application/json', 404
|
|
if not post_data:
|
|
return json.dumps({'error': 'POST body required'}), 'application/json', 400
|
|
results = post_data.get('results', [])
|
|
if not results:
|
|
return json.dumps({'error': 'no results provided'}), 'application/json', 400
|
|
try:
|
|
db = mysqlite.mysqlite(self.database, str)
|
|
processed = submit_results(db, worker_id, results)
|
|
return json.dumps({
|
|
'worker_id': worker_id,
|
|
'processed': processed,
|
|
'message': 'results submitted',
|
|
}), 'application/json', 200
|
|
except Exception as e:
|
|
return json.dumps({'error': str(e)}), 'application/json', 500
|
|
|
|
elif path == '/api/heartbeat':
|
|
# Worker heartbeat with Tor status (POST)
|
|
key = query_params.get('key', '')
|
|
if not validate_worker_key(key):
|
|
return json.dumps({'error': 'invalid worker key'}), 'application/json', 403
|
|
worker_id, _ = get_worker_by_key(key)
|
|
if not worker_id:
|
|
return json.dumps({'error': 'worker not found'}), 'application/json', 404
|
|
# Update worker Tor and profiling status
|
|
tor_ok = post_data.get('tor_ok', True) if post_data else True
|
|
tor_ip = post_data.get('tor_ip') if post_data else None
|
|
profiling = post_data.get('profiling', False) if post_data else False
|
|
now = time.time()
|
|
with _workers_lock:
|
|
if worker_id in _workers:
|
|
_workers[worker_id]['last_seen'] = now
|
|
_workers[worker_id]['tor_ok'] = tor_ok
|
|
_workers[worker_id]['tor_ip'] = tor_ip
|
|
_workers[worker_id]['tor_last_check'] = now
|
|
_workers[worker_id]['profiling'] = profiling
|
|
return json.dumps({
|
|
'worker_id': worker_id,
|
|
'message': 'heartbeat received',
|
|
}), 'application/json', 200
|
|
|
|
elif path == '/api/workers':
|
|
# List connected workers
|
|
now = time.time()
|
|
with _workers_lock:
|
|
workers = []
|
|
total_tested = 0
|
|
total_working = 0
|
|
total_failed = 0
|
|
for wid, info in _workers.items():
|
|
tested = info.get('proxies_tested', 0)
|
|
working = info.get('proxies_working', 0)
|
|
failed = info.get('proxies_failed', 0)
|
|
total_latency = info.get('total_latency', 0)
|
|
avg_latency = round(total_latency / working, 2) if working > 0 else 0
|
|
success_rate = round(100 * working / tested, 1) if tested > 0 else 0
|
|
total_tested += tested
|
|
total_working += working
|
|
total_failed += failed
|
|
# Tor status with age check
|
|
tor_ok = info.get('tor_ok', True)
|
|
tor_ip = info.get('tor_ip')
|
|
tor_last_check = info.get('tor_last_check', 0)
|
|
tor_age = int(now - tor_last_check) if tor_last_check else None
|
|
worker_profiling = info.get('profiling', False)
|
|
workers.append({
|
|
'id': wid,
|
|
'name': info['name'],
|
|
'ip': info['ip'],
|
|
'registered': int(info['registered']),
|
|
'last_seen': int(info['last_seen']),
|
|
'age': int(now - info['last_seen']),
|
|
'jobs_completed': info.get('jobs_completed', 0),
|
|
'proxies_tested': tested,
|
|
'proxies_working': working,
|
|
'proxies_failed': failed,
|
|
'success_rate': success_rate,
|
|
'avg_latency': avg_latency,
|
|
'last_batch_size': info.get('last_batch_size', 0),
|
|
'last_batch_working': info.get('last_batch_working', 0),
|
|
'active': (now - info['last_seen']) < 120,
|
|
'tor_ok': tor_ok,
|
|
'tor_ip': tor_ip,
|
|
'tor_age': tor_age,
|
|
'profiling': worker_profiling,
|
|
})
|
|
# Sort by name for consistent display
|
|
workers.sort(key=lambda w: w['name'])
|
|
# Get queue status from database
|
|
queue_stats = {'pending': 0, 'claimed': 0, 'due': 0}
|
|
try:
|
|
db = mysqlite.mysqlite(self.database, str)
|
|
# Pending = total eligible (not dead)
|
|
row = db.execute('SELECT COUNT(*) FROM proxylist WHERE failed >= 0 AND failed < 5').fetchone()
|
|
queue_stats['pending'] = row[0] if row else 0
|
|
# Claimed = currently being tested by workers
|
|
with _work_claims_lock:
|
|
queue_stats['claimed'] = len(_work_claims)
|
|
# Due = ready for testing (respecting cooldown)
|
|
now_int = int(time.time())
|
|
checktime = 1800 # 30 min base
|
|
perfail_checktime = 3600 # +1 hour per failure
|
|
max_fail = 5
|
|
row = db.execute('''
|
|
SELECT COUNT(*) FROM proxylist
|
|
WHERE failed >= 0 AND failed < ?
|
|
AND (tested IS NULL OR (tested + ? + (failed * ?)) < ?)
|
|
''', (max_fail, checktime, perfail_checktime, now_int)).fetchone()
|
|
due_total = row[0] if row else 0
|
|
queue_stats['due'] = max(0, due_total - queue_stats['claimed'])
|
|
db.close()
|
|
except Exception:
|
|
pass
|
|
return json.dumps({
|
|
'workers': workers,
|
|
'total': len(workers),
|
|
'active': sum(1 for w in workers if w['active']),
|
|
'summary': {
|
|
'total_tested': total_tested,
|
|
'total_working': total_working,
|
|
'total_failed': total_failed,
|
|
'overall_success_rate': round(100 * total_working / total_tested, 1) if total_tested > 0 else 0,
|
|
},
|
|
'queue': queue_stats,
|
|
}, indent=2), 'application/json', 200
|
|
|
|
else:
|
|
return json.dumps({'error': 'not found'}), 'application/json', 404
|
|
|
|
def _get_db_stats(self, db):
|
|
"""Get database statistics."""
|
|
stats = {}
|
|
try:
|
|
# By protocol
|
|
rows = db.execute(
|
|
'SELECT proto, COUNT(*) FROM proxylist WHERE failed=0 GROUP BY proto'
|
|
).fetchall()
|
|
stats['by_proto'] = {r[0]: r[1] for r in rows if r[0]}
|
|
|
|
# Top countries
|
|
rows = db.execute(
|
|
'SELECT country, COUNT(*) as cnt FROM proxylist WHERE failed=0 AND country IS NOT NULL '
|
|
'GROUP BY country ORDER BY cnt DESC LIMIT 10'
|
|
).fetchall()
|
|
stats['top_countries'] = [{'code': r[0], 'count': r[1]} for r in rows]
|
|
|
|
# Total counts
|
|
row = db.execute('SELECT COUNT(*) FROM proxylist WHERE failed=0').fetchone()
|
|
stats['working'] = row[0] if row else 0
|
|
row = db.execute('SELECT COUNT(*) FROM proxylist').fetchone()
|
|
stats['total'] = row[0] if row else 0
|
|
|
|
# Proxies due for testing (eligible for worker claims)
|
|
now_int = int(time.time())
|
|
checktime = 1800 # 30 min base cooldown
|
|
perfail_checktime = 3600 # +1 hour per failure
|
|
max_fail = 5
|
|
row = db.execute('''
|
|
SELECT COUNT(*) FROM proxylist
|
|
WHERE failed >= 0 AND failed < ?
|
|
AND (tested IS NULL OR (tested + ? + (failed * ?)) < ?)
|
|
''', (max_fail, checktime, perfail_checktime, now_int)).fetchone()
|
|
due_total = row[0] if row else 0
|
|
# Subtract currently claimed
|
|
with _work_claims_lock:
|
|
claimed_count = len(_work_claims)
|
|
stats['due'] = max(0, due_total - claimed_count)
|
|
stats['claimed'] = claimed_count
|
|
except Exception:
|
|
pass
|
|
return stats
|
|
|
|
def run(self):
|
|
ProxyAPIHandler.database = self.database
|
|
ProxyAPIHandler.stats_provider = self.stats_provider
|
|
|
|
if GEVENT_PATCHED:
|
|
# Use gevent's WSGIServer for proper async handling
|
|
self.server = WSGIServer((self.host, self.port), self._wsgi_app, log=None)
|
|
_log('httpd listening on %s:%d (gevent)' % (self.host, self.port), 'info')
|
|
self.server.serve_forever()
|
|
else:
|
|
# Standard BaseHTTPServer for non-gevent environments
|
|
self.server = BaseHTTPServer.HTTPServer((self.host, self.port), ProxyAPIHandler)
|
|
_log('httpd listening on %s:%d' % (self.host, self.port), 'info')
|
|
self.server.serve_forever()
|
|
|
|
def stop(self):
|
|
if self.server:
|
|
if GEVENT_PATCHED:
|
|
self.server.stop()
|
|
else:
|
|
self.server.shutdown()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
import sys
|
|
host = '127.0.0.1'
|
|
port = 8081
|
|
database = 'data/proxies.sqlite'
|
|
|
|
if len(sys.argv) > 1:
|
|
database = sys.argv[1]
|
|
|
|
_log('starting test server on %s:%d (db: %s)' % (host, port, database), 'info')
|
|
server = ProxyAPIServer(host, port, database)
|
|
server.start()
|
|
|
|
try:
|
|
while True:
|
|
time.sleep(1)
|
|
except KeyboardInterrupt:
|
|
server.stop()
|
|
_log('server stopped', 'info')
|