#!/usr/bin/env python2 """Statistics tracking for PPF proxy validation.""" from __future__ import division import threading import time from misc import _log def try_div(a, b): if b != 0: return a / float(b) return 0 class TargetStats(): """Track per-target success/failure rates with cooldown. Targets that frequently block or fail are temporarily avoided. Block counters reset on success or cooldown expiry. Used for all target pools: judges, head targets, SSL targets, IRC servers. """ def __init__(self, cooldown_seconds=300, block_threshold=3): self.lock = threading.Lock() self.stats = {} # target -> {'success': n, 'fail': n, 'block': n, 'last_block': timestamp} self.cooldown_seconds = cooldown_seconds self.block_threshold = block_threshold def _ensure(self, target): if target not in self.stats: self.stats[target] = {'success': 0, 'fail': 0, 'block': 0, 'last_block': 0} def record_success(self, target): """Record successful target response.""" with self.lock: self._ensure(target) self.stats[target]['success'] += 1 self.stats[target]['block'] = 0 def record_failure(self, target): """Record target failure (soft -- doesn't trigger cooldown).""" with self.lock: self._ensure(target) self.stats[target]['fail'] += 1 def record_block(self, target): """Record target block (403, captcha, DNS failure, rate-limit).""" with self.lock: self._ensure(target) self.stats[target]['block'] += 1 self.stats[target]['last_block'] = time.time() def is_available(self, target): """Check if target is available (not in cooldown).""" with self.lock: if target not in self.stats: return True s = self.stats[target] if s['block'] >= self.block_threshold: if (time.time() - s['last_block']) < self.cooldown_seconds: return False s['block'] = 0 return True def get_available(self, target_list): """Return targets not in cooldown.""" return [t for t in target_list if self.is_available(t)] def get_available_judges(self, judge_list): """Compat alias for get_available().""" return self.get_available(judge_list) def status_line(self): """Return status summary for logging.""" with self.lock: total = len(self.stats) blocked = sum(1 for s in self.stats.values() if s['block'] >= self.block_threshold and (time.time() - s['last_block']) < self.cooldown_seconds) return '%d total, %d in cooldown' % (total, blocked) def get_stats(self): """Return statistics dict for API/dashboard.""" with self.lock: now = time.time() total = len(self.stats) in_cooldown = sum(1 for s in self.stats.values() if s['block'] >= self.block_threshold and (now - s['last_block']) < self.cooldown_seconds) available = total - in_cooldown top = [] for target, s in self.stats.items(): total_tests = s['success'] + s['fail'] if total_tests > 0: success_pct = (s['success'] * 100.0) / total_tests top.append({'target': target, 'success': s['success'], 'tests': total_tests, 'rate': round(success_pct, 1)}) top.sort(key=lambda x: x['success'], reverse=True) return {'total': total, 'available': available, 'in_cooldown': in_cooldown, 'top': top} # Backwards-compatible alias JudgeStats = TargetStats # HTTP targets - check for specific headers regexes = { 'www.facebook.com': 'X-FB-Debug', 'www.fbcdn.net': 'X-FB-Debug', 'www.reddit.com': 'x-clacks-overhead', 'www.twitter.com': 'x-connection-hash', 't.co': 'x-connection-hash', 'www.msn.com': 'x-aspnetmvc-version', 'www.ask.com': 'x-served-by', 'www.hotmail.com': 'x-msedge-ref', 'www.bbc.co.uk': 'x-bbc-edge-cache-status', 'www.alibaba.com': 'object-status', 'www.mozilla.org': 'cf-ray', 'www.cloudflare.com': 'cf-ray', 'www.wikimedia.org': 'x-client-ip', 'www.vk.com': 'x-frontend', 'www.tinypic.com': 'x-amz-cf-pop', 'www.netflix.com': 'X-Netflix.proxy.execution-time', 'www.amazon.de': 'x-amz-cf-id', 'www.reuters.com': 'x-amz-cf-id', 'www.twitpic.com': 'timing-allow-origin', 'www.digg.com': 'cf-request-id', 'www.wikia.com': 'x-served-by', 'www.wp.com': 'x-ac', 'www.last.fm': 'x-timer', 'www.usps.com': 'x-ruleset-version', 'www.linkedin.com': 'x-li-uuid', 'www.vimeo.com': 'x-timer', 'www.yelp.com': 'x-timer', 'www.ebay.com': 'x-envoy-upstream-service-time', 'www.wikihow.com': 'x-c', 'www.w3.org': 'x-backend', 'www.time.com': 'x-amz-cf-pop' } # SSL targets - verify TLS handshake only (MITM detection) ssl_targets = [ 'www.google.com', 'www.microsoft.com', 'www.apple.com', 'www.amazon.com', 'www.cloudflare.com', 'www.github.com', 'www.mozilla.org', 'www.wikipedia.org', 'www.reddit.com', 'www.twitter.com', 'x.com', 'www.facebook.com', 'www.linkedin.com', 'www.paypal.com', 'www.stripe.com', 'www.digicert.com', 'www.letsencrypt.org', ] class Stats(): """Track and report comprehensive runtime statistics.""" HISTORY_SIZE = 120 # 10 min at 5s intervals LATENCY_BUCKETS = [100, 250, 500, 1000, 2000, 5000, 10000] # ms thresholds def __init__(self): self.lock = threading.RLock() # RLock for reentrant access (get_runtime_stats) self.tested = 0 self.passed = 0 self.failed = 0 self.start_time = time.time() self.last_report = time.time() # Failure category tracking self.fail_categories = {} # Protocol tracking (tested, passed, and failed separately) self.proto_tested = {'http': 0, 'socks4': 0, 'socks5': 0} self.proto_passed = {'http': 0, 'socks4': 0, 'socks5': 0} self.proto_failed = {'http': {}, 'socks4': {}, 'socks5': {}} # Failures by category per proto self.by_proto = self.proto_passed # Alias for dashboard API # Time series history (5s intervals) self.rate_history = [] self.success_rate_history = [] self.latency_history = [] self.last_history_time = time.time() self.last_history_tested = 0 self.last_history_passed = 0 # Peak values (delayed measurement to avoid startup anomalies) self.peak_rate = 0.0 self.peak_success_rate = 0.0 self.peak_grace_period = 30 # seconds before recording peaks self.min_latency = float('inf') self.max_latency = 0.0 # Latency tracking with percentiles self.latency_sum = 0.0 self.latency_count = 0 self.latency_samples = [] # Recent samples for percentiles self.latency_buckets = {b: 0 for b in self.LATENCY_BUCKETS + [float('inf')]} # Recent window (last 60s) self.recent_tested = 0 self.recent_passed = 0 self.recent_start = time.time() # Country/ASN tracking (top N) self.country_passed = {} self.asn_passed = {} # Hourly aggregates self.hourly_tested = 0 self.hourly_passed = 0 self.hourly_start = time.time() self.hours_data = [] # Last 24 hours # SSL/TLS tracking self.ssl_tested = 0 self.ssl_passed = 0 self.ssl_failed = 0 self.ssl_fail_categories = {} # Track SSL failures by category self.mitm_detected = 0 self.cert_errors = 0 def record(self, success, category=None, proto=None, latency_ms=None, country=None, asn=None, ssl_test=False, mitm=False, cert_error=False): with self.lock: self.tested += 1 self.recent_tested += 1 self.hourly_tested += 1 # Track protocol tests if proto and proto in self.proto_tested: self.proto_tested[proto] += 1 if success: self.passed += 1 self.recent_passed += 1 self.hourly_passed += 1 if proto and proto in self.proto_passed: self.proto_passed[proto] += 1 if latency_ms and latency_ms > 0: self.latency_sum += latency_ms self.latency_count += 1 # Track min/max if latency_ms < self.min_latency: self.min_latency = latency_ms if latency_ms > self.max_latency: self.max_latency = latency_ms # Keep recent samples for percentiles (max 1000) self.latency_samples.append(latency_ms) if len(self.latency_samples) > 1000: self.latency_samples.pop(0) # Bucket for histogram for bucket in self.LATENCY_BUCKETS: if latency_ms <= bucket: self.latency_buckets[bucket] += 1 break else: self.latency_buckets[float('inf')] += 1 # Track country/ASN if country: self.country_passed[country] = self.country_passed.get(country, 0) + 1 if asn: self.asn_passed[asn] = self.asn_passed.get(asn, 0) + 1 else: self.failed += 1 if category: self.fail_categories[category] = self.fail_categories.get(category, 0) + 1 # Track failures by protocol if proto and proto in self.proto_failed: self.proto_failed[proto][category] = self.proto_failed[proto].get(category, 0) + 1 # Log failure category breakdown every 1000 failures if self.failed % 1000 == 0: top_cats = sorted(self.fail_categories.items(), key=lambda x: -x[1])[:5] cats_str = ', '.join(['%s:%d' % (c, n) for c, n in top_cats]) _log('fail breakdown (%d total): %s' % (self.failed, cats_str), 'diag') # SSL/TLS tracking if ssl_test: self.ssl_tested += 1 if success: self.ssl_passed += 1 else: self.ssl_failed += 1 # Track which error caused the SSL failure if category: self.ssl_fail_categories[category] = self.ssl_fail_categories.get(category, 0) + 1 if mitm: self.mitm_detected += 1 if cert_error: self.cert_errors += 1 def update_history(self): """Update time series history (call periodically).""" now = time.time() with self.lock: elapsed = now - self.last_history_time if elapsed >= 5: # Update every 5 seconds # Rate - with sanity checks tests_delta = self.tested - self.last_history_tested if tests_delta < 0: # Counter wrapped or corrupted - reset baseline self.last_history_tested = self.tested tests_delta = 0 rate = tests_delta / elapsed if elapsed > 0 else 0 # Cap at reasonable max (100/s is generous for proxy testing) if rate > 100: rate = 0 # Discard bogus value self.rate_history.append(round(rate, 2)) if len(self.rate_history) > self.HISTORY_SIZE: self.rate_history.pop(0) # Only record peaks after grace period (avoid startup anomalies) uptime = now - self.start_time if uptime >= self.peak_grace_period and rate > self.peak_rate and rate <= 100: self.peak_rate = rate # Success rate - with sanity checks passed_delta = self.passed - self.last_history_passed if passed_delta < 0: self.last_history_passed = self.passed passed_delta = 0 sr = (passed_delta / tests_delta * 100) if tests_delta > 0 else 0 sr = min(sr, 100.0) # Cap at 100% self.success_rate_history.append(round(sr, 1)) if len(self.success_rate_history) > self.HISTORY_SIZE: self.success_rate_history.pop(0) if uptime >= self.peak_grace_period and sr > self.peak_success_rate: self.peak_success_rate = sr # Average latency for this interval avg_lat = self.get_avg_latency() self.latency_history.append(round(avg_lat, 0)) if len(self.latency_history) > self.HISTORY_SIZE: self.latency_history.pop(0) self.last_history_time = now self.last_history_tested = self.tested self.last_history_passed = self.passed # Reset recent window every 60s if now - self.recent_start >= 60: self.recent_tested = 0 self.recent_passed = 0 self.recent_start = now # Hourly aggregation if now - self.hourly_start >= 3600: self.hours_data.append({ 'tested': self.hourly_tested, 'passed': self.hourly_passed, 'rate': self.hourly_passed / 3600.0 if self.hourly_tested > 0 else 0, 'success_rate': (self.hourly_passed / self.hourly_tested * 100) if self.hourly_tested > 0 else 0, }) if len(self.hours_data) > 24: self.hours_data.pop(0) self.hourly_tested = 0 self.hourly_passed = 0 self.hourly_start = now def get_recent_rate(self): """Get rate for last 60 seconds.""" with self.lock: elapsed = time.time() - self.recent_start if elapsed > 0: return self.recent_tested / elapsed return 0.0 def get_recent_success_rate(self): """Get success rate for last 60 seconds.""" with self.lock: if self.recent_tested > 0: return (self.recent_passed / self.recent_tested) * 100 return 0.0 def get_avg_latency(self): """Get average latency in ms.""" with self.lock: if self.latency_count > 0: return self.latency_sum / self.latency_count return 0.0 def get_latency_percentiles(self): """Get latency percentiles (p50, p90, p99).""" with self.lock: if not self.latency_samples: return {'p50': 0, 'p90': 0, 'p99': 0} sorted_samples = sorted(self.latency_samples) n = len(sorted_samples) return { 'p50': sorted_samples[int(n * 0.50)] if n > 0 else 0, 'p90': sorted_samples[int(n * 0.90)] if n > 0 else 0, 'p99': sorted_samples[min(int(n * 0.99), n - 1)] if n > 0 else 0, } def get_latency_histogram(self): """Get latency distribution histogram.""" with self.lock: total = sum(self.latency_buckets.values()) if total == 0: return [] result = [] prev = 0 for bucket in self.LATENCY_BUCKETS: count = self.latency_buckets[bucket] result.append({ 'range': '%d-%d' % (prev, bucket), 'count': count, 'pct': round(count / total * 100, 1), }) prev = bucket # Over max bucket over = self.latency_buckets[float('inf')] if over > 0: result.append({ 'range': '>%d' % self.LATENCY_BUCKETS[-1], 'count': over, 'pct': round(over / total * 100, 1), }) return result def get_proto_stats(self): """Get protocol-specific success rates and failure breakdown.""" with self.lock: result = {} for proto in ['http', 'socks4', 'socks5']: tested = self.proto_tested[proto] passed = self.proto_passed[proto] failed = sum(self.proto_failed[proto].values()) result[proto] = { 'tested': tested, 'passed': passed, 'failed': failed, 'success_rate': round(passed / tested * 100, 1) if tested > 0 else 0, 'fail_reasons': dict(self.proto_failed[proto]) if self.proto_failed[proto] else {}, } return result def get_top_countries(self, limit=10): """Get top countries by working proxy count.""" with self.lock: sorted_countries = sorted(self.country_passed.items(), key=lambda x: -x[1]) return sorted_countries[:limit] def get_top_asns(self, limit=10): """Get top ASNs by working proxy count.""" with self.lock: sorted_asns = sorted(self.asn_passed.items(), key=lambda x: -x[1]) return sorted_asns[:limit] def get_hourly_data(self): """Get last 24 hours of hourly data.""" with self.lock: return list(self.hours_data) def load_state(self, state): """Load persisted state from a dict (from database). Args: state: dict from dbs.load_session_state() """ if not state: return with self.lock: self.tested = state.get('tested', 0) self.passed = state.get('passed', 0) self.failed = state.get('failed', 0) self.ssl_tested = state.get('ssl_tested', 0) self.ssl_passed = state.get('ssl_passed', 0) self.ssl_failed = state.get('ssl_failed', 0) self.mitm_detected = state.get('mitm_detected', 0) self.cert_errors = state.get('cert_errors', 0) self.proto_tested['http'] = state.get('proto_http_tested', 0) self.proto_passed['http'] = state.get('proto_http_passed', 0) self.proto_tested['socks4'] = state.get('proto_socks4_tested', 0) self.proto_passed['socks4'] = state.get('proto_socks4_passed', 0) self.proto_tested['socks5'] = state.get('proto_socks5_tested', 0) self.proto_passed['socks5'] = state.get('proto_socks5_passed', 0) # Note: peak_rate is per-session, not restored (avoids stale/corrupt values) # Note: start_time is NOT restored - uptime reflects current session # Restore failure categories if state.get('fail_categories'): self.fail_categories = dict(state['fail_categories']) # Restore SSL failure categories if state.get('ssl_fail_categories'): self.ssl_fail_categories = dict(state['ssl_fail_categories']) # Restore protocol failure categories if state.get('proto_failed'): for proto in ['http', 'socks4', 'socks5']: if proto in state['proto_failed']: self.proto_failed[proto] = dict(state['proto_failed'][proto]) # Restore geo tracking if state.get('country_passed'): self.country_passed = dict(state['country_passed']) if state.get('asn_passed'): # Convert string keys back to int for ASN self.asn_passed = {int(k) if k.isdigit() else k: v for k, v in state['asn_passed'].items()} _log('restored session: %d tested, %d passed' % (self.tested, self.passed), 'info') def should_report(self, interval): return (time.time() - self.last_report) >= interval def report(self): with self.lock: self.last_report = time.time() elapsed = time.time() - self.start_time rate = try_div(self.tested, elapsed) pct = try_div(self.passed * 100.0, self.tested) base = 'tested=%d passed=%d (%.1f%%) rate=%.2f/s uptime=%dm' % ( self.tested, self.passed, pct, rate, int(elapsed / 60)) # Add failure breakdown if there are failures if self.fail_categories: cats = ' '.join('%s=%d' % (k, v) for k, v in sorted(self.fail_categories.items())) return '%s [%s]' % (base, cats) return base def get_full_stats(self): """Get comprehensive stats dict for API.""" with self.lock: elapsed = time.time() - self.start_time return { 'tested': self.tested, 'passed': self.passed, 'failed': self.failed, 'success_rate': round(self.passed / self.tested * 100, 1) if self.tested > 0 else 0, 'rate': round(self.tested / elapsed, 2) if elapsed > 0 else 0, 'pass_rate': round(self.passed / elapsed, 2) if elapsed > 0 else 0, 'recent_rate': self.get_recent_rate(), 'recent_success_rate': self.get_recent_success_rate(), 'peak_rate': self.peak_rate, 'peak_success_rate': self.peak_success_rate, 'uptime_seconds': int(elapsed), 'rate_history': list(self.rate_history), 'success_rate_history': list(self.success_rate_history), 'latency_history': list(self.latency_history), 'avg_latency': self.get_avg_latency(), 'min_latency': self.min_latency if self.min_latency != float('inf') else 0, 'max_latency': self.max_latency, 'latency_percentiles': self.get_latency_percentiles(), 'latency_histogram': self.get_latency_histogram(), 'by_proto': dict(self.proto_passed), 'proto_stats': self.get_proto_stats(), 'failures': dict(self.fail_categories), 'top_countries': self.get_top_countries(), 'top_asns': self.get_top_asns(), 'hourly_data': self.get_hourly_data(), }