scraper: add engine stats API for dashboard

- EngineTracker.get_stats() returns detailed per-engine metrics
- get_scraper_stats() module function for external access
- includes success counts, backoff status, availability
This commit is contained in:
Username
2025-12-23 17:23:28 +01:00
parent 68a34f2638
commit e7478de79e

View File

@@ -104,6 +104,43 @@ class EngineTracker(object):
in_backoff = len(self.engines) - available
return available, in_backoff, len(self.engines)
def get_stats(self):
"""Return detailed stats for API/dashboard."""
now = time.time()
available = self.get_available()
available_ids = set(ident for _, ident in available)
engines_list = []
for eng, ident in self.engines:
# Shorten identifier for display
if '/' in ident:
name = ident.split('/')[2] # extract domain from URL
else:
name = ident
backoff_remaining = 0
if ident in self.backoff_until:
backoff_remaining = max(0, int(self.backoff_until[ident] - now))
engines_list.append({
'name': name,
'available': ident in available_ids,
'successes': self.success_count.get(ident, 0),
'failures': self.failures.get(ident, 0),
'backoff_remaining': backoff_remaining
})
# Sort by success count descending
engines_list.sort(key=lambda x: -x['successes'])
return {
'available': len(available),
'in_backoff': len(self.engines) - len(available),
'total': len(self.engines),
'total_successes': sum(self.success_count.values()),
'engines': engines_list[:20] # Top 20 engines
}
def load_state(self):
"""Load persisted backoff state from JSON file."""
if not os.path.exists(self.state_file):
@@ -174,6 +211,13 @@ class EngineTracker(object):
engine_tracker = None
def get_scraper_stats():
"""Get scraper stats for API/dashboard."""
if engine_tracker is None:
return None
return engine_tracker.get_stats()
def build_search_query(sqlite=None):
"""Build a search query using configured sources."""
search = ''