#!/usr/bin/env python2 # -*- coding: utf-8 -*- """Database table creation and insertion utilities.""" import time from misc import _log def _migrate_latency_columns(sqlite): """Add latency columns to existing databases.""" try: sqlite.execute('SELECT avg_latency FROM proxylist LIMIT 1') except Exception: sqlite.execute('ALTER TABLE proxylist ADD COLUMN avg_latency REAL DEFAULT 0') sqlite.execute('ALTER TABLE proxylist ADD COLUMN latency_samples INT DEFAULT 0') sqlite.commit() def _migrate_anonymity_columns(sqlite): """Add anonymity detection columns to existing databases.""" try: sqlite.execute('SELECT anonymity FROM proxylist LIMIT 1') except Exception: # anonymity: transparent, anonymous, elite, or NULL (unknown) sqlite.execute('ALTER TABLE proxylist ADD COLUMN anonymity TEXT') # exit_ip: the IP seen by the target server sqlite.execute('ALTER TABLE proxylist ADD COLUMN exit_ip TEXT') sqlite.commit() def _migrate_asn_column(sqlite): """Add ASN column to existing databases.""" try: sqlite.execute('SELECT asn FROM proxylist LIMIT 1') except Exception: sqlite.execute('ALTER TABLE proxylist ADD COLUMN asn INT') sqlite.commit() def update_proxy_latency(sqlite, proxy, latency_ms): """Update rolling average latency for a proxy. Args: sqlite: Database connection proxy: Proxy address (ip:port) latency_ms: Response latency in milliseconds """ row = sqlite.execute( 'SELECT avg_latency, latency_samples FROM proxylist WHERE proxy=?', (proxy,) ).fetchone() if row: old_avg, samples = row[0] or 0, row[1] or 0 # Exponential moving average, capped at 100 samples new_samples = min(samples + 1, 100) if samples == 0: new_avg = latency_ms else: # Weight recent samples more heavily alpha = 2.0 / (new_samples + 1) new_avg = alpha * latency_ms + (1 - alpha) * old_avg sqlite.execute( 'UPDATE proxylist SET avg_latency=?, latency_samples=? WHERE proxy=?', (new_avg, new_samples, proxy) ) def update_proxy_anonymity(sqlite, proxy, exit_ip, proxy_ip, reveals_headers=None): """Update anonymity level based on exit IP and header analysis. Anonymity levels: transparent: exit_ip == proxy_ip (proxy reveals itself) anonymous: exit_ip != proxy_ip, adds X-Forwarded-For/Via headers elite: exit_ip != proxy_ip, no revealing headers Args: sqlite: Database connection proxy: Proxy address (ip:port) exit_ip: IP address seen by target server proxy_ip: Proxy's IP address reveals_headers: True if proxy adds revealing headers, False if not, None if unknown """ if not exit_ip: return # Normalize IPs (remove leading zeros) def normalize_ip(ip): if not ip: return None parts = ip.strip().split('.') if len(parts) != 4: return None return '.'.join(str(int(p)) for p in parts) exit_ip = normalize_ip(exit_ip) proxy_ip = normalize_ip(proxy_ip) if not exit_ip: return # Determine anonymity level if exit_ip == proxy_ip: anonymity = 'transparent' elif reveals_headers is False: anonymity = 'elite' elif reveals_headers is True: anonymity = 'anonymous' else: # No header check performed, conservative default anonymity = 'anonymous' sqlite.execute( 'UPDATE proxylist SET anonymity=?, exit_ip=? WHERE proxy=?', (anonymity, exit_ip, proxy) ) def create_table_if_not_exists(sqlite, dbname): """Create database table with indexes if it doesn't exist.""" if dbname == 'proxylist': sqlite.execute("""CREATE TABLE IF NOT EXISTS proxylist ( proxy BLOB UNIQUE, country BLOB, added INT, failed INT, tested INT, dronebl INT, proto TEXT, mitm INT, success_count INT, ip TEXT, port INT, consecutive_success INT, total_duration INT, avg_latency REAL DEFAULT 0, latency_samples INT DEFAULT 0, anonymity TEXT, exit_ip TEXT, asn INT)""") # Migration: add columns to existing databases (must run before creating indexes) _migrate_latency_columns(sqlite) _migrate_anonymity_columns(sqlite) _migrate_asn_column(sqlite) # Indexes for common query patterns sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_failed ON proxylist(failed)') sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_tested ON proxylist(tested)') sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_proto ON proxylist(proto)') sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_anonymity ON proxylist(anonymity)') sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_asn ON proxylist(asn)') elif dbname == 'uris': sqlite.execute("""CREATE TABLE IF NOT EXISTS uris ( url TEXT UNIQUE, content_type TEXT, check_time INT, error INT, stale_count INT, retrievals INT, proxies_added INT, added INT)""") # Indexes for common query patterns sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_error ON uris(error)') sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_checktime ON uris(check_time)') sqlite.commit() def insert_proxies(proxydb, proxies, url): """Insert new proxies into database.""" if not proxies: return timestamp = int(time.time()) rows = [] for p in proxies: ip, port = p.split(':') rows.append((timestamp, p, ip, port, 3, 0, 0, 0, 0, 0)) proxydb.executemany( 'INSERT OR IGNORE INTO proxylist ' '(added,proxy,ip,port,failed,tested,success_count,total_duration,mitm,consecutive_success) ' 'VALUES (?,?,?,?,?,?,?,?,?,?)', rows ) proxydb.commit() _log('+%d proxy/ies from %s' % (len(proxies), url), 'added') def insert_urls(urls, search, sqlite): """Insert new URLs into database.""" if not urls: return timestamp = int(time.time()) rows = [(timestamp, u, 0, 1, 0, 0, 0) for u in urls] sqlite.executemany( 'INSERT OR IGNORE INTO uris ' '(added,url,check_time,error,stale_count,retrievals,proxies_added) ' 'VALUES (?,?,?,?,?,?,?)', rows ) sqlite.commit() _log('+%d url(s) from %s' % (len(urls), search), 'added') # Known proxy list sources (GitHub raw lists, APIs) PROXY_SOURCES = [ # TheSpeedX/PROXY-List - large, hourly updates 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt', 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt', 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt', # clarketm/proxy-list - curated, daily 'https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt', # monosans/proxy-list - hourly updates 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/http.txt', 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt', 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt', # jetkai/proxy-list - 10 min updates 'https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies.txt', # roosterkid/openproxylist 'https://raw.githubusercontent.com/roosterkid/openproxylist/main/HTTPS_RAW.txt', 'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt', 'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5_RAW.txt', # ShiftyTR/Proxy-List 'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/http.txt', 'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt', 'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt', # mmpx12/proxy-list 'https://raw.githubusercontent.com/mmpx12/proxy-list/master/http.txt', 'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks4.txt', 'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks5.txt', # proxyscrape API 'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=http&timeout=10000&country=all', 'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4&timeout=10000&country=all', 'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5&timeout=10000&country=all', ] def seed_proxy_sources(sqlite): """Seed known proxy list sources into uris table.""" timestamp = int(time.time()) added = 0 for url in PROXY_SOURCES: try: sqlite.execute( 'INSERT OR IGNORE INTO uris ' '(added,url,check_time,error,stale_count,retrievals,proxies_added) ' 'VALUES (?,?,?,?,?,?,?)', (timestamp, url, 0, 0, 0, 0, 0) ) if sqlite.cursor.rowcount > 0: added += 1 except Exception: pass sqlite.commit() if added > 0: _log('seeded %d proxy source URLs' % added, 'info')