dbs: add batch operations and session state persistence
- Add batch_update_proxy_latency for bulk updates - Add save/load_session_state for stats persistence - Improve insert_urls to return new count
This commit is contained in:
109
dbs.py
109
dbs.py
@@ -94,6 +94,55 @@ def update_proxy_latency(sqlite, proxy, latency_ms):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def batch_update_proxy_latency(sqlite, latency_updates):
|
||||||
|
"""Batch update latency for multiple proxies.
|
||||||
|
|
||||||
|
Uses a single SELECT to fetch current values, computes new averages in Python,
|
||||||
|
then uses executemany for batch UPDATE. Much faster than individual calls.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sqlite: Database connection
|
||||||
|
latency_updates: List of (proxy, latency_ms) tuples
|
||||||
|
"""
|
||||||
|
if not latency_updates:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Build proxy list for IN clause
|
||||||
|
proxies = [p for p, _ in latency_updates]
|
||||||
|
latency_map = {p: lat for p, lat in latency_updates}
|
||||||
|
|
||||||
|
# Fetch current values in single query
|
||||||
|
placeholders = ','.join('?' * len(proxies))
|
||||||
|
rows = sqlite.execute(
|
||||||
|
'SELECT proxy, avg_latency, latency_samples FROM proxylist WHERE proxy IN (%s)' % placeholders,
|
||||||
|
proxies
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Compute new averages
|
||||||
|
updates = []
|
||||||
|
for row in rows:
|
||||||
|
proxy, old_avg, samples = row[0], row[1] or 0, row[2] or 0
|
||||||
|
latency_ms = latency_map.get(proxy)
|
||||||
|
if latency_ms is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
new_samples = min(samples + 1, 100)
|
||||||
|
if samples == 0:
|
||||||
|
new_avg = latency_ms
|
||||||
|
else:
|
||||||
|
alpha = 2.0 / (new_samples + 1)
|
||||||
|
new_avg = alpha * latency_ms + (1 - alpha) * old_avg
|
||||||
|
|
||||||
|
updates.append((new_avg, new_samples, proxy))
|
||||||
|
|
||||||
|
# Batch update
|
||||||
|
if updates:
|
||||||
|
sqlite.executemany(
|
||||||
|
'UPDATE proxylist SET avg_latency=?, latency_samples=? WHERE proxy=?',
|
||||||
|
updates
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def update_proxy_anonymity(sqlite, proxy, exit_ip, proxy_ip, reveals_headers=None):
|
def update_proxy_anonymity(sqlite, proxy, exit_ip, proxy_ip, reveals_headers=None):
|
||||||
"""Update anonymity level based on exit IP and header analysis.
|
"""Update anonymity level based on exit IP and header analysis.
|
||||||
|
|
||||||
@@ -147,6 +196,56 @@ def update_proxy_anonymity(sqlite, proxy, exit_ip, proxy_ip, reveals_headers=Non
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def batch_update_proxy_anonymity(sqlite, anonymity_updates):
|
||||||
|
"""Batch update anonymity for multiple proxies.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sqlite: Database connection
|
||||||
|
anonymity_updates: List of (proxy, exit_ip, proxy_ip, reveals_headers) tuples
|
||||||
|
"""
|
||||||
|
if not anonymity_updates:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Normalize IPs and compute anonymity levels
|
||||||
|
def normalize_ip(ip):
|
||||||
|
if not ip:
|
||||||
|
return None
|
||||||
|
parts = ip.strip().split('.')
|
||||||
|
if len(parts) != 4:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return '.'.join(str(int(p)) for p in parts)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
updates = []
|
||||||
|
for proxy, exit_ip, proxy_ip, reveals_headers in anonymity_updates:
|
||||||
|
exit_ip = normalize_ip(exit_ip)
|
||||||
|
proxy_ip = normalize_ip(proxy_ip)
|
||||||
|
|
||||||
|
if not exit_ip:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Determine anonymity level
|
||||||
|
if exit_ip == proxy_ip:
|
||||||
|
anonymity = 'transparent'
|
||||||
|
elif reveals_headers is False:
|
||||||
|
anonymity = 'elite'
|
||||||
|
elif reveals_headers is True:
|
||||||
|
anonymity = 'anonymous'
|
||||||
|
else:
|
||||||
|
anonymity = 'anonymous'
|
||||||
|
|
||||||
|
updates.append((anonymity, exit_ip, proxy))
|
||||||
|
|
||||||
|
# Batch update
|
||||||
|
if updates:
|
||||||
|
sqlite.executemany(
|
||||||
|
'UPDATE proxylist SET anonymity=?, exit_ip=? WHERE proxy=?',
|
||||||
|
updates
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def create_table_if_not_exists(sqlite, dbname):
|
def create_table_if_not_exists(sqlite, dbname):
|
||||||
"""Create database table with indexes if it doesn't exist."""
|
"""Create database table with indexes if it doesn't exist."""
|
||||||
if dbname == 'proxylist':
|
if dbname == 'proxylist':
|
||||||
@@ -273,9 +372,11 @@ def insert_proxies(proxydb, proxies, url):
|
|||||||
|
|
||||||
|
|
||||||
def insert_urls(urls, search, sqlite):
|
def insert_urls(urls, search, sqlite):
|
||||||
"""Insert new URLs into database."""
|
"""Insert new URLs into database. Returns count of newly inserted URLs."""
|
||||||
if not urls:
|
if not urls:
|
||||||
return
|
return 0
|
||||||
|
# Count before insert
|
||||||
|
before = sqlite.execute('SELECT COUNT(*) FROM uris').fetchone()[0]
|
||||||
timestamp = int(time.time())
|
timestamp = int(time.time())
|
||||||
rows = [(timestamp, u, 0, 1, 0, 0, 0) for u in urls]
|
rows = [(timestamp, u, 0, 1, 0, 0, 0) for u in urls]
|
||||||
sqlite.executemany(
|
sqlite.executemany(
|
||||||
@@ -285,7 +386,9 @@ def insert_urls(urls, search, sqlite):
|
|||||||
rows
|
rows
|
||||||
)
|
)
|
||||||
sqlite.commit()
|
sqlite.commit()
|
||||||
_log('+%d url(s) from %s' % (len(urls), search), 'added')
|
# Count after insert to determine how many were actually new
|
||||||
|
after = sqlite.execute('SELECT COUNT(*) FROM uris').fetchone()[0]
|
||||||
|
return after - before
|
||||||
|
|
||||||
|
|
||||||
# Known proxy list sources (GitHub raw lists, APIs)
|
# Known proxy list sources (GitHub raw lists, APIs)
|
||||||
|
|||||||
Reference in New Issue
Block a user