From e7478de79eebfbac75ba2cec0cce4d8d6300d03f Mon Sep 17 00:00:00 2001 From: Username Date: Tue, 23 Dec 2025 17:23:28 +0100 Subject: [PATCH] scraper: add engine stats API for dashboard - EngineTracker.get_stats() returns detailed per-engine metrics - get_scraper_stats() module function for external access - includes success counts, backoff status, availability --- scraper.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/scraper.py b/scraper.py index f6ddedf..bca9ced 100755 --- a/scraper.py +++ b/scraper.py @@ -104,6 +104,43 @@ class EngineTracker(object): in_backoff = len(self.engines) - available return available, in_backoff, len(self.engines) + def get_stats(self): + """Return detailed stats for API/dashboard.""" + now = time.time() + available = self.get_available() + available_ids = set(ident for _, ident in available) + + engines_list = [] + for eng, ident in self.engines: + # Shorten identifier for display + if '/' in ident: + name = ident.split('/')[2] # extract domain from URL + else: + name = ident + + backoff_remaining = 0 + if ident in self.backoff_until: + backoff_remaining = max(0, int(self.backoff_until[ident] - now)) + + engines_list.append({ + 'name': name, + 'available': ident in available_ids, + 'successes': self.success_count.get(ident, 0), + 'failures': self.failures.get(ident, 0), + 'backoff_remaining': backoff_remaining + }) + + # Sort by success count descending + engines_list.sort(key=lambda x: -x['successes']) + + return { + 'available': len(available), + 'in_backoff': len(self.engines) - len(available), + 'total': len(self.engines), + 'total_successes': sum(self.success_count.values()), + 'engines': engines_list[:20] # Top 20 engines + } + def load_state(self): """Load persisted backoff state from JSON file.""" if not os.path.exists(self.state_file): @@ -174,6 +211,13 @@ class EngineTracker(object): engine_tracker = None +def get_scraper_stats(): + """Get scraper stats for API/dashboard.""" + if engine_tracker is None: + return None + return engine_tracker.get_stats() + + def build_search_query(sqlite=None): """Build a search query using configured sources.""" search = ''