httpd: expose URL pipeline stats in /api/stats
All checks were successful
CI / validate (push) Successful in 19s

Add urls section with total/healthy/dead/erroring counts, fetch
activity, productive source count, aggregate yield, and top sources
ranked by working_ratio.
This commit is contained in:
Username
2026-02-22 11:53:57 +01:00
parent eeadf656f5
commit b300afed6c

View File

@@ -1564,6 +1564,10 @@ class ProxyAPIServer(threading.Thread):
stats['db_health'] = get_db_health(db)
except Exception as e:
_log('api/stats db error: %s' % e, 'warn')
# Add URL pipeline stats
url_stats = self._get_url_stats()
if url_stats is not None:
stats['urls'] = url_stats
# Add profiling flag (from constructor or stats_provider)
if 'profiling' not in stats:
stats['profiling'] = self.profiling
@@ -1908,6 +1912,65 @@ class ProxyAPIServer(threading.Thread):
_log('_get_db_stats error: %s' % e, 'warn')
return stats
def _get_url_stats(self):
"""Get URL pipeline statistics from the websites database."""
if not self.url_database:
return None
try:
db = mysqlite.mysqlite(self.url_database, str)
stats = {}
now = int(time.time())
# Total URLs and health breakdown
row = db.execute('SELECT COUNT(*) FROM uris').fetchone()
stats['total'] = row[0] if row else 0
row = db.execute('SELECT COUNT(*) FROM uris WHERE error >= 10').fetchone()
stats['dead'] = row[0] if row else 0
row = db.execute('SELECT COUNT(*) FROM uris WHERE error > 0 AND error < 10').fetchone()
stats['erroring'] = row[0] if row else 0
row = db.execute('SELECT COUNT(*) FROM uris WHERE error = 0').fetchone()
stats['healthy'] = row[0] if row else 0
# Recently active (fetched in last hour)
row = db.execute(
'SELECT COUNT(*) FROM uris WHERE check_time >= ?',
(now - 3600,)).fetchone()
stats['fetched_last_hour'] = row[0] if row else 0
# Productive sources (have produced working proxies)
row = db.execute(
'SELECT COUNT(*) FROM uris WHERE working_ratio > 0'
).fetchone()
stats['productive'] = row[0] if row else 0
# Aggregate yield
row = db.execute(
'SELECT SUM(proxies_added), SUM(retrievals) FROM uris'
).fetchone()
stats['total_proxies_extracted'] = row[0] or 0 if row else 0
stats['total_fetches'] = row[1] or 0 if row else 0
# Currently claimed
with _url_claims_lock:
stats['claimed'] = len(_url_claims)
# Top sources by working_ratio (productive URLs only)
rows = db.execute(
'SELECT url, working_ratio, yield_rate, proxies_added, retrievals '
'FROM uris WHERE working_ratio > 0 AND retrievals > 0 '
'ORDER BY working_ratio DESC LIMIT 10'
).fetchall()
stats['top_sources'] = [{
'url': r[0], 'working_ratio': round(r[1], 3),
'yield_rate': round(r[2], 1), 'proxies_added': r[3],
'fetches': r[4],
} for r in rows]
return stats
except Exception as e:
_log('_get_url_stats error: %s' % e, 'warn')
return None
def _get_workers_data(self, db):
"""Get worker status data. Used by /api/workers and /api/dashboard."""
now = time.time()