ppf: add periodic re-seeding of proxy source URLs

Seed sources that error out are permanently excluded from claiming.
Over time this starves the pipeline. Re-seed every 6 hours with
error reset for exhausted sources, preventing the starvation loop
that caused the previous outage.
This commit is contained in:
Username
2026-02-22 11:18:45 +01:00
parent 35285a84bf
commit 7ae0ac0c26
2 changed files with 29 additions and 4 deletions

26
dbs.py
View File

@@ -639,10 +639,18 @@ PROXY_SOURCES = [
]
def seed_proxy_sources(sqlite):
"""Seed known proxy list sources into uris table."""
def seed_proxy_sources(sqlite, reset_errors=False):
"""Seed known proxy list sources into uris table.
Args:
sqlite: Database connection
reset_errors: If True, reset error/stale counts on existing seed
sources that have errored out, allowing them to be
retried. Safe to call periodically.
"""
timestamp = int(time.time())
added = 0
reset = 0
for url in PROXY_SOURCES:
try:
sqlite.execute(
@@ -653,11 +661,21 @@ def seed_proxy_sources(sqlite):
)
if sqlite.cursor.rowcount > 0:
added += 1
elif reset_errors:
# Reset errored-out seed sources so they get reclaimed
sqlite.execute(
'UPDATE uris SET error = 0, stale_count = 0, '
'check_interval = 3600, check_time = 0 '
'WHERE url = ? AND error >= 5',
(url,)
)
if sqlite.cursor.rowcount > 0:
reset += 1
except Exception as e:
_log('seed_urls insert error for %s: %s' % (url, e), 'warn')
sqlite.commit()
if added > 0:
_log('seeded %d proxy source URLs' % added, 'info')
if added > 0 or reset > 0:
_log('seed sources: %d new, %d reset' % (added, reset), 'info')
def save_session_state(sqlite, stats):

7
ppf.py
View File

@@ -1045,8 +1045,15 @@ def main():
statusmsg = time.time()
list_max_age_seconds = config.ppf.list_max_age_days * 86400
last_skip_log = 0
last_reseed = time.time()
reseed_interval = 6 * 3600 # re-seed sources every 6 hours
while True:
try:
# Periodic re-seeding: reset errored-out seed sources
if time.time() - last_reseed >= reseed_interval:
dbs.seed_proxy_sources(urldb, reset_errors=True)
last_reseed = time.time()
# When ppf threads = 0, skip URL fetching (workers handle it via /api/claim-urls)
if config.ppf.threads == 0:
time.sleep(60)