diff --git a/dbs.py b/dbs.py index 94332f4..a64bde2 100644 --- a/dbs.py +++ b/dbs.py @@ -639,10 +639,18 @@ PROXY_SOURCES = [ ] -def seed_proxy_sources(sqlite): - """Seed known proxy list sources into uris table.""" +def seed_proxy_sources(sqlite, reset_errors=False): + """Seed known proxy list sources into uris table. + + Args: + sqlite: Database connection + reset_errors: If True, reset error/stale counts on existing seed + sources that have errored out, allowing them to be + retried. Safe to call periodically. + """ timestamp = int(time.time()) added = 0 + reset = 0 for url in PROXY_SOURCES: try: sqlite.execute( @@ -653,11 +661,21 @@ def seed_proxy_sources(sqlite): ) if sqlite.cursor.rowcount > 0: added += 1 + elif reset_errors: + # Reset errored-out seed sources so they get reclaimed + sqlite.execute( + 'UPDATE uris SET error = 0, stale_count = 0, ' + 'check_interval = 3600, check_time = 0 ' + 'WHERE url = ? AND error >= 5', + (url,) + ) + if sqlite.cursor.rowcount > 0: + reset += 1 except Exception as e: _log('seed_urls insert error for %s: %s' % (url, e), 'warn') sqlite.commit() - if added > 0: - _log('seeded %d proxy source URLs' % added, 'info') + if added > 0 or reset > 0: + _log('seed sources: %d new, %d reset' % (added, reset), 'info') def save_session_state(sqlite, stats): diff --git a/ppf.py b/ppf.py index f38a26f..695ec0a 100644 --- a/ppf.py +++ b/ppf.py @@ -1045,8 +1045,15 @@ def main(): statusmsg = time.time() list_max_age_seconds = config.ppf.list_max_age_days * 86400 last_skip_log = 0 + last_reseed = time.time() + reseed_interval = 6 * 3600 # re-seed sources every 6 hours while True: try: + # Periodic re-seeding: reset errored-out seed sources + if time.time() - last_reseed >= reseed_interval: + dbs.seed_proxy_sources(urldb, reset_errors=True) + last_reseed = time.time() + # When ppf threads = 0, skip URL fetching (workers handle it via /api/claim-urls) if config.ppf.threads == 0: time.sleep(60)