diff --git a/dbs.py b/dbs.py index f02fcfb..9ef6ff7 100644 --- a/dbs.py +++ b/dbs.py @@ -94,6 +94,55 @@ def update_proxy_latency(sqlite, proxy, latency_ms): ) +def batch_update_proxy_latency(sqlite, latency_updates): + """Batch update latency for multiple proxies. + + Uses a single SELECT to fetch current values, computes new averages in Python, + then uses executemany for batch UPDATE. Much faster than individual calls. + + Args: + sqlite: Database connection + latency_updates: List of (proxy, latency_ms) tuples + """ + if not latency_updates: + return + + # Build proxy list for IN clause + proxies = [p for p, _ in latency_updates] + latency_map = {p: lat for p, lat in latency_updates} + + # Fetch current values in single query + placeholders = ','.join('?' * len(proxies)) + rows = sqlite.execute( + 'SELECT proxy, avg_latency, latency_samples FROM proxylist WHERE proxy IN (%s)' % placeholders, + proxies + ).fetchall() + + # Compute new averages + updates = [] + for row in rows: + proxy, old_avg, samples = row[0], row[1] or 0, row[2] or 0 + latency_ms = latency_map.get(proxy) + if latency_ms is None: + continue + + new_samples = min(samples + 1, 100) + if samples == 0: + new_avg = latency_ms + else: + alpha = 2.0 / (new_samples + 1) + new_avg = alpha * latency_ms + (1 - alpha) * old_avg + + updates.append((new_avg, new_samples, proxy)) + + # Batch update + if updates: + sqlite.executemany( + 'UPDATE proxylist SET avg_latency=?, latency_samples=? WHERE proxy=?', + updates + ) + + def update_proxy_anonymity(sqlite, proxy, exit_ip, proxy_ip, reveals_headers=None): """Update anonymity level based on exit IP and header analysis. @@ -147,6 +196,56 @@ def update_proxy_anonymity(sqlite, proxy, exit_ip, proxy_ip, reveals_headers=Non ) +def batch_update_proxy_anonymity(sqlite, anonymity_updates): + """Batch update anonymity for multiple proxies. + + Args: + sqlite: Database connection + anonymity_updates: List of (proxy, exit_ip, proxy_ip, reveals_headers) tuples + """ + if not anonymity_updates: + return + + # Normalize IPs and compute anonymity levels + def normalize_ip(ip): + if not ip: + return None + parts = ip.strip().split('.') + if len(parts) != 4: + return None + try: + return '.'.join(str(int(p)) for p in parts) + except ValueError: + return None + + updates = [] + for proxy, exit_ip, proxy_ip, reveals_headers in anonymity_updates: + exit_ip = normalize_ip(exit_ip) + proxy_ip = normalize_ip(proxy_ip) + + if not exit_ip: + continue + + # Determine anonymity level + if exit_ip == proxy_ip: + anonymity = 'transparent' + elif reveals_headers is False: + anonymity = 'elite' + elif reveals_headers is True: + anonymity = 'anonymous' + else: + anonymity = 'anonymous' + + updates.append((anonymity, exit_ip, proxy)) + + # Batch update + if updates: + sqlite.executemany( + 'UPDATE proxylist SET anonymity=?, exit_ip=? WHERE proxy=?', + updates + ) + + def create_table_if_not_exists(sqlite, dbname): """Create database table with indexes if it doesn't exist.""" if dbname == 'proxylist': @@ -273,9 +372,11 @@ def insert_proxies(proxydb, proxies, url): def insert_urls(urls, search, sqlite): - """Insert new URLs into database.""" + """Insert new URLs into database. Returns count of newly inserted URLs.""" if not urls: - return + return 0 + # Count before insert + before = sqlite.execute('SELECT COUNT(*) FROM uris').fetchone()[0] timestamp = int(time.time()) rows = [(timestamp, u, 0, 1, 0, 0, 0) for u in urls] sqlite.executemany( @@ -285,7 +386,9 @@ def insert_urls(urls, search, sqlite): rows ) sqlite.commit() - _log('+%d url(s) from %s' % (len(urls), search), 'added') + # Count after insert to determine how many were actually new + after = sqlite.execute('SELECT COUNT(*) FROM uris').fetchone()[0] + return after - before # Known proxy list sources (GitHub raw lists, APIs)