watchd: add target health tracking for all target pools
Generalizes JudgeStats into TargetStats with cooldown-based filtering for head targets, SSL targets, and IRC servers. Targets that repeatedly block or fail are temporarily avoided, preventing unfair proxy failures when a target goes down. Exposes per-pool health via /api/stats.
This commit is contained in:
83
stats.py
83
stats.py
@@ -14,60 +14,64 @@ def try_div(a, b):
|
||||
return 0
|
||||
|
||||
|
||||
class JudgeStats():
|
||||
"""Track per-judge success/failure rates for reliability scoring.
|
||||
class TargetStats():
|
||||
"""Track per-target success/failure rates with cooldown.
|
||||
|
||||
Judges that frequently block or rate-limit are temporarily avoided.
|
||||
Stats decay over time to allow recovery.
|
||||
Targets that frequently block or fail are temporarily avoided.
|
||||
Block counters reset on success or cooldown expiry.
|
||||
|
||||
Used for all target pools: judges, head targets, SSL targets, IRC servers.
|
||||
"""
|
||||
|
||||
def __init__(self, cooldown_seconds=300, block_threshold=3):
|
||||
self.lock = threading.Lock()
|
||||
self.stats = {} # judge -> {'success': n, 'fail': n, 'block': n, 'last_block': timestamp}
|
||||
self.cooldown_seconds = cooldown_seconds # seconds to avoid blocked judges
|
||||
self.block_threshold = block_threshold # consecutive blocks before cooldown
|
||||
self.stats = {} # target -> {'success': n, 'fail': n, 'block': n, 'last_block': timestamp}
|
||||
self.cooldown_seconds = cooldown_seconds
|
||||
self.block_threshold = block_threshold
|
||||
|
||||
def record_success(self, judge):
|
||||
"""Record successful judge response."""
|
||||
with self.lock:
|
||||
if judge not in self.stats:
|
||||
self.stats[judge] = {'success': 0, 'fail': 0, 'block': 0, 'last_block': 0}
|
||||
self.stats[judge]['success'] += 1
|
||||
# Reset block count on success
|
||||
self.stats[judge]['block'] = 0
|
||||
def _ensure(self, target):
|
||||
if target not in self.stats:
|
||||
self.stats[target] = {'success': 0, 'fail': 0, 'block': 0, 'last_block': 0}
|
||||
|
||||
def record_failure(self, judge):
|
||||
"""Record judge failure (proxy failed, not judge block)."""
|
||||
def record_success(self, target):
|
||||
"""Record successful target response."""
|
||||
with self.lock:
|
||||
if judge not in self.stats:
|
||||
self.stats[judge] = {'success': 0, 'fail': 0, 'block': 0, 'last_block': 0}
|
||||
self.stats[judge]['fail'] += 1
|
||||
self._ensure(target)
|
||||
self.stats[target]['success'] += 1
|
||||
self.stats[target]['block'] = 0
|
||||
|
||||
def record_block(self, judge):
|
||||
"""Record judge blocking the proxy (403, captcha, rate-limit)."""
|
||||
def record_failure(self, target):
|
||||
"""Record target failure (soft -- doesn't trigger cooldown)."""
|
||||
with self.lock:
|
||||
if judge not in self.stats:
|
||||
self.stats[judge] = {'success': 0, 'fail': 0, 'block': 0, 'last_block': 0}
|
||||
self.stats[judge]['block'] += 1
|
||||
self.stats[judge]['last_block'] = time.time()
|
||||
self._ensure(target)
|
||||
self.stats[target]['fail'] += 1
|
||||
|
||||
def is_available(self, judge):
|
||||
"""Check if judge is available (not in cooldown)."""
|
||||
def record_block(self, target):
|
||||
"""Record target block (403, captcha, DNS failure, rate-limit)."""
|
||||
with self.lock:
|
||||
if judge not in self.stats:
|
||||
self._ensure(target)
|
||||
self.stats[target]['block'] += 1
|
||||
self.stats[target]['last_block'] = time.time()
|
||||
|
||||
def is_available(self, target):
|
||||
"""Check if target is available (not in cooldown)."""
|
||||
with self.lock:
|
||||
if target not in self.stats:
|
||||
return True
|
||||
s = self.stats[judge]
|
||||
# Check if in cooldown period
|
||||
s = self.stats[target]
|
||||
if s['block'] >= self.block_threshold:
|
||||
if (time.time() - s['last_block']) < self.cooldown_seconds:
|
||||
return False
|
||||
# Cooldown expired, reset block count
|
||||
s['block'] = 0
|
||||
return True
|
||||
|
||||
def get_available(self, target_list):
|
||||
"""Return targets not in cooldown."""
|
||||
return [t for t in target_list if self.is_available(t)]
|
||||
|
||||
def get_available_judges(self, judge_list):
|
||||
"""Return list of judges not in cooldown."""
|
||||
return [j for j in judge_list if self.is_available(j)]
|
||||
"""Compat alias for get_available()."""
|
||||
return self.get_available(judge_list)
|
||||
|
||||
def status_line(self):
|
||||
"""Return status summary for logging."""
|
||||
@@ -76,7 +80,7 @@ class JudgeStats():
|
||||
blocked = sum(1 for s in self.stats.values()
|
||||
if s['block'] >= self.block_threshold and
|
||||
(time.time() - s['last_block']) < self.cooldown_seconds)
|
||||
return 'judges: %d total, %d in cooldown' % (total, blocked)
|
||||
return '%d total, %d in cooldown' % (total, blocked)
|
||||
|
||||
def get_stats(self):
|
||||
"""Return statistics dict for API/dashboard."""
|
||||
@@ -87,18 +91,21 @@ class JudgeStats():
|
||||
if s['block'] >= self.block_threshold and
|
||||
(now - s['last_block']) < self.cooldown_seconds)
|
||||
available = total - in_cooldown
|
||||
# Get top judges by success count
|
||||
top = []
|
||||
for judge, s in self.stats.items():
|
||||
for target, s in self.stats.items():
|
||||
total_tests = s['success'] + s['fail']
|
||||
if total_tests > 0:
|
||||
success_pct = (s['success'] * 100.0) / total_tests
|
||||
top.append({'judge': judge, 'success': s['success'],
|
||||
top.append({'target': target, 'success': s['success'],
|
||||
'tests': total_tests, 'rate': round(success_pct, 1)})
|
||||
top.sort(key=lambda x: x['success'], reverse=True)
|
||||
return {'total': total, 'available': available, 'in_cooldown': in_cooldown, 'top': top}
|
||||
|
||||
|
||||
# Backwards-compatible alias
|
||||
JudgeStats = TargetStats
|
||||
|
||||
|
||||
# HTTP targets - check for specific headers
|
||||
regexes = {
|
||||
'www.facebook.com': 'X-FB-Debug',
|
||||
|
||||
Reference in New Issue
Block a user