httpd: expose URL pipeline stats in /api/stats
All checks were successful
CI / validate (push) Successful in 19s
All checks were successful
CI / validate (push) Successful in 19s
Add urls section with total/healthy/dead/erroring counts, fetch activity, productive source count, aggregate yield, and top sources ranked by working_ratio.
This commit is contained in:
63
httpd.py
63
httpd.py
@@ -1564,6 +1564,10 @@ class ProxyAPIServer(threading.Thread):
|
||||
stats['db_health'] = get_db_health(db)
|
||||
except Exception as e:
|
||||
_log('api/stats db error: %s' % e, 'warn')
|
||||
# Add URL pipeline stats
|
||||
url_stats = self._get_url_stats()
|
||||
if url_stats is not None:
|
||||
stats['urls'] = url_stats
|
||||
# Add profiling flag (from constructor or stats_provider)
|
||||
if 'profiling' not in stats:
|
||||
stats['profiling'] = self.profiling
|
||||
@@ -1908,6 +1912,65 @@ class ProxyAPIServer(threading.Thread):
|
||||
_log('_get_db_stats error: %s' % e, 'warn')
|
||||
return stats
|
||||
|
||||
def _get_url_stats(self):
|
||||
"""Get URL pipeline statistics from the websites database."""
|
||||
if not self.url_database:
|
||||
return None
|
||||
try:
|
||||
db = mysqlite.mysqlite(self.url_database, str)
|
||||
stats = {}
|
||||
now = int(time.time())
|
||||
|
||||
# Total URLs and health breakdown
|
||||
row = db.execute('SELECT COUNT(*) FROM uris').fetchone()
|
||||
stats['total'] = row[0] if row else 0
|
||||
row = db.execute('SELECT COUNT(*) FROM uris WHERE error >= 10').fetchone()
|
||||
stats['dead'] = row[0] if row else 0
|
||||
row = db.execute('SELECT COUNT(*) FROM uris WHERE error > 0 AND error < 10').fetchone()
|
||||
stats['erroring'] = row[0] if row else 0
|
||||
row = db.execute('SELECT COUNT(*) FROM uris WHERE error = 0').fetchone()
|
||||
stats['healthy'] = row[0] if row else 0
|
||||
|
||||
# Recently active (fetched in last hour)
|
||||
row = db.execute(
|
||||
'SELECT COUNT(*) FROM uris WHERE check_time >= ?',
|
||||
(now - 3600,)).fetchone()
|
||||
stats['fetched_last_hour'] = row[0] if row else 0
|
||||
|
||||
# Productive sources (have produced working proxies)
|
||||
row = db.execute(
|
||||
'SELECT COUNT(*) FROM uris WHERE working_ratio > 0'
|
||||
).fetchone()
|
||||
stats['productive'] = row[0] if row else 0
|
||||
|
||||
# Aggregate yield
|
||||
row = db.execute(
|
||||
'SELECT SUM(proxies_added), SUM(retrievals) FROM uris'
|
||||
).fetchone()
|
||||
stats['total_proxies_extracted'] = row[0] or 0 if row else 0
|
||||
stats['total_fetches'] = row[1] or 0 if row else 0
|
||||
|
||||
# Currently claimed
|
||||
with _url_claims_lock:
|
||||
stats['claimed'] = len(_url_claims)
|
||||
|
||||
# Top sources by working_ratio (productive URLs only)
|
||||
rows = db.execute(
|
||||
'SELECT url, working_ratio, yield_rate, proxies_added, retrievals '
|
||||
'FROM uris WHERE working_ratio > 0 AND retrievals > 0 '
|
||||
'ORDER BY working_ratio DESC LIMIT 10'
|
||||
).fetchall()
|
||||
stats['top_sources'] = [{
|
||||
'url': r[0], 'working_ratio': round(r[1], 3),
|
||||
'yield_rate': round(r[2], 1), 'proxies_added': r[3],
|
||||
'fetches': r[4],
|
||||
} for r in rows]
|
||||
|
||||
return stats
|
||||
except Exception as e:
|
||||
_log('_get_url_stats error: %s' % e, 'warn')
|
||||
return None
|
||||
|
||||
def _get_workers_data(self, db):
|
||||
"""Get worker status data. Used by /api/workers and /api/dashboard."""
|
||||
now = time.time()
|
||||
|
||||
Reference in New Issue
Block a user