Compare commits
2 Commits
35285a84bf
...
d1e22a388c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d1e22a388c | ||
|
|
7ae0ac0c26 |
26
dbs.py
26
dbs.py
@@ -639,10 +639,18 @@ PROXY_SOURCES = [
|
||||
]
|
||||
|
||||
|
||||
def seed_proxy_sources(sqlite):
|
||||
"""Seed known proxy list sources into uris table."""
|
||||
def seed_proxy_sources(sqlite, reset_errors=False):
|
||||
"""Seed known proxy list sources into uris table.
|
||||
|
||||
Args:
|
||||
sqlite: Database connection
|
||||
reset_errors: If True, reset error/stale counts on existing seed
|
||||
sources that have errored out, allowing them to be
|
||||
retried. Safe to call periodically.
|
||||
"""
|
||||
timestamp = int(time.time())
|
||||
added = 0
|
||||
reset = 0
|
||||
for url in PROXY_SOURCES:
|
||||
try:
|
||||
sqlite.execute(
|
||||
@@ -653,11 +661,21 @@ def seed_proxy_sources(sqlite):
|
||||
)
|
||||
if sqlite.cursor.rowcount > 0:
|
||||
added += 1
|
||||
elif reset_errors:
|
||||
# Reset errored-out seed sources so they get reclaimed
|
||||
sqlite.execute(
|
||||
'UPDATE uris SET error = 0, stale_count = 0, '
|
||||
'check_interval = 3600, check_time = 0 '
|
||||
'WHERE url = ? AND error >= 5',
|
||||
(url,)
|
||||
)
|
||||
if sqlite.cursor.rowcount > 0:
|
||||
reset += 1
|
||||
except Exception as e:
|
||||
_log('seed_urls insert error for %s: %s' % (url, e), 'warn')
|
||||
sqlite.commit()
|
||||
if added > 0:
|
||||
_log('seeded %d proxy source URLs' % added, 'info')
|
||||
if added > 0 or reset > 0:
|
||||
_log('seed sources: %d new, %d reset' % (added, reset), 'info')
|
||||
|
||||
|
||||
def save_session_state(sqlite, stats):
|
||||
|
||||
18
httpd.py
18
httpd.py
@@ -31,6 +31,13 @@ except (ImportError, IOError, ValueError):
|
||||
_geodb = None
|
||||
_geolite = False
|
||||
|
||||
# ASN lookup (optional)
|
||||
try:
|
||||
import pyasn
|
||||
_asndb = pyasn.pyasn(os.path.join("data", "ipasn.dat"))
|
||||
except (ImportError, IOError):
|
||||
_asndb = None
|
||||
|
||||
# Rate limiting configuration
|
||||
_rate_limits = defaultdict(list)
|
||||
_rate_lock = threading.Lock()
|
||||
@@ -604,7 +611,7 @@ def submit_proxy_reports(db, worker_id, proxies):
|
||||
''', (proxy_key, ip, port, proto, now_int, now_int, latency, now_int,
|
||||
checktype, target))
|
||||
|
||||
# Geolocate if IP2Location available
|
||||
# Geolocate and ASN lookup
|
||||
if _geolite and _geodb:
|
||||
try:
|
||||
rec = _geodb.get_all(ip)
|
||||
@@ -614,6 +621,15 @@ def submit_proxy_reports(db, worker_id, proxies):
|
||||
(rec.country_short, proxy_key))
|
||||
except Exception:
|
||||
pass
|
||||
if _asndb:
|
||||
try:
|
||||
asn_result = _asndb.lookup(ip)
|
||||
if asn_result and asn_result[0]:
|
||||
db.execute(
|
||||
'UPDATE proxylist SET asn=? WHERE proxy=?',
|
||||
(asn_result[0], proxy_key))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Track per-URL working count for working_ratio
|
||||
if source_url:
|
||||
|
||||
7
ppf.py
7
ppf.py
@@ -1045,8 +1045,15 @@ def main():
|
||||
statusmsg = time.time()
|
||||
list_max_age_seconds = config.ppf.list_max_age_days * 86400
|
||||
last_skip_log = 0
|
||||
last_reseed = time.time()
|
||||
reseed_interval = 6 * 3600 # re-seed sources every 6 hours
|
||||
while True:
|
||||
try:
|
||||
# Periodic re-seeding: reset errored-out seed sources
|
||||
if time.time() - last_reseed >= reseed_interval:
|
||||
dbs.seed_proxy_sources(urldb, reset_errors=True)
|
||||
last_reseed = time.time()
|
||||
|
||||
# When ppf threads = 0, skip URL fetching (workers handle it via /api/claim-urls)
|
||||
if config.ppf.threads == 0:
|
||||
time.sleep(60)
|
||||
|
||||
Reference in New Issue
Block a user