ppf: add periodic re-seeding of proxy source URLs
Seed sources that error out are permanently excluded from claiming. Over time this starves the pipeline. Re-seed every 6 hours with error reset for exhausted sources, preventing the starvation loop that caused the previous outage.
This commit is contained in:
26
dbs.py
26
dbs.py
@@ -639,10 +639,18 @@ PROXY_SOURCES = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def seed_proxy_sources(sqlite):
|
def seed_proxy_sources(sqlite, reset_errors=False):
|
||||||
"""Seed known proxy list sources into uris table."""
|
"""Seed known proxy list sources into uris table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sqlite: Database connection
|
||||||
|
reset_errors: If True, reset error/stale counts on existing seed
|
||||||
|
sources that have errored out, allowing them to be
|
||||||
|
retried. Safe to call periodically.
|
||||||
|
"""
|
||||||
timestamp = int(time.time())
|
timestamp = int(time.time())
|
||||||
added = 0
|
added = 0
|
||||||
|
reset = 0
|
||||||
for url in PROXY_SOURCES:
|
for url in PROXY_SOURCES:
|
||||||
try:
|
try:
|
||||||
sqlite.execute(
|
sqlite.execute(
|
||||||
@@ -653,11 +661,21 @@ def seed_proxy_sources(sqlite):
|
|||||||
)
|
)
|
||||||
if sqlite.cursor.rowcount > 0:
|
if sqlite.cursor.rowcount > 0:
|
||||||
added += 1
|
added += 1
|
||||||
|
elif reset_errors:
|
||||||
|
# Reset errored-out seed sources so they get reclaimed
|
||||||
|
sqlite.execute(
|
||||||
|
'UPDATE uris SET error = 0, stale_count = 0, '
|
||||||
|
'check_interval = 3600, check_time = 0 '
|
||||||
|
'WHERE url = ? AND error >= 5',
|
||||||
|
(url,)
|
||||||
|
)
|
||||||
|
if sqlite.cursor.rowcount > 0:
|
||||||
|
reset += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
_log('seed_urls insert error for %s: %s' % (url, e), 'warn')
|
_log('seed_urls insert error for %s: %s' % (url, e), 'warn')
|
||||||
sqlite.commit()
|
sqlite.commit()
|
||||||
if added > 0:
|
if added > 0 or reset > 0:
|
||||||
_log('seeded %d proxy source URLs' % added, 'info')
|
_log('seed sources: %d new, %d reset' % (added, reset), 'info')
|
||||||
|
|
||||||
|
|
||||||
def save_session_state(sqlite, stats):
|
def save_session_state(sqlite, stats):
|
||||||
|
|||||||
7
ppf.py
7
ppf.py
@@ -1045,8 +1045,15 @@ def main():
|
|||||||
statusmsg = time.time()
|
statusmsg = time.time()
|
||||||
list_max_age_seconds = config.ppf.list_max_age_days * 86400
|
list_max_age_seconds = config.ppf.list_max_age_days * 86400
|
||||||
last_skip_log = 0
|
last_skip_log = 0
|
||||||
|
last_reseed = time.time()
|
||||||
|
reseed_interval = 6 * 3600 # re-seed sources every 6 hours
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
|
# Periodic re-seeding: reset errored-out seed sources
|
||||||
|
if time.time() - last_reseed >= reseed_interval:
|
||||||
|
dbs.seed_proxy_sources(urldb, reset_errors=True)
|
||||||
|
last_reseed = time.time()
|
||||||
|
|
||||||
# When ppf threads = 0, skip URL fetching (workers handle it via /api/claim-urls)
|
# When ppf threads = 0, skip URL fetching (workers handle it via /api/claim-urls)
|
||||||
if config.ppf.threads == 0:
|
if config.ppf.threads == 0:
|
||||||
time.sleep(60)
|
time.sleep(60)
|
||||||
|
|||||||
Reference in New Issue
Block a user