scraper: add engine stats API for dashboard
- EngineTracker.get_stats() returns detailed per-engine metrics - get_scraper_stats() module function for external access - includes success counts, backoff status, availability
This commit is contained in:
44
scraper.py
44
scraper.py
@@ -104,6 +104,43 @@ class EngineTracker(object):
|
||||
in_backoff = len(self.engines) - available
|
||||
return available, in_backoff, len(self.engines)
|
||||
|
||||
def get_stats(self):
|
||||
"""Return detailed stats for API/dashboard."""
|
||||
now = time.time()
|
||||
available = self.get_available()
|
||||
available_ids = set(ident for _, ident in available)
|
||||
|
||||
engines_list = []
|
||||
for eng, ident in self.engines:
|
||||
# Shorten identifier for display
|
||||
if '/' in ident:
|
||||
name = ident.split('/')[2] # extract domain from URL
|
||||
else:
|
||||
name = ident
|
||||
|
||||
backoff_remaining = 0
|
||||
if ident in self.backoff_until:
|
||||
backoff_remaining = max(0, int(self.backoff_until[ident] - now))
|
||||
|
||||
engines_list.append({
|
||||
'name': name,
|
||||
'available': ident in available_ids,
|
||||
'successes': self.success_count.get(ident, 0),
|
||||
'failures': self.failures.get(ident, 0),
|
||||
'backoff_remaining': backoff_remaining
|
||||
})
|
||||
|
||||
# Sort by success count descending
|
||||
engines_list.sort(key=lambda x: -x['successes'])
|
||||
|
||||
return {
|
||||
'available': len(available),
|
||||
'in_backoff': len(self.engines) - len(available),
|
||||
'total': len(self.engines),
|
||||
'total_successes': sum(self.success_count.values()),
|
||||
'engines': engines_list[:20] # Top 20 engines
|
||||
}
|
||||
|
||||
def load_state(self):
|
||||
"""Load persisted backoff state from JSON file."""
|
||||
if not os.path.exists(self.state_file):
|
||||
@@ -174,6 +211,13 @@ class EngineTracker(object):
|
||||
engine_tracker = None
|
||||
|
||||
|
||||
def get_scraper_stats():
|
||||
"""Get scraper stats for API/dashboard."""
|
||||
if engine_tracker is None:
|
||||
return None
|
||||
return engine_tracker.get_stats()
|
||||
|
||||
|
||||
def build_search_query(sqlite=None):
|
||||
"""Build a search query using configured sources."""
|
||||
search = ''
|
||||
|
||||
Reference in New Issue
Block a user