From 77867d0b2db0a72dd120d02162fefe455006ac94 Mon Sep 17 00:00:00 2001 From: Username Date: Sun, 21 Dec 2025 23:37:38 +0100 Subject: [PATCH] dbs: add latency columns and migration --- dbs.py | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 135 insertions(+), 3 deletions(-) diff --git a/dbs.py b/dbs.py index b401d2a..2cc9e1f 100644 --- a/dbs.py +++ b/dbs.py @@ -16,6 +16,27 @@ def _migrate_latency_columns(sqlite): sqlite.commit() +def _migrate_anonymity_columns(sqlite): + """Add anonymity detection columns to existing databases.""" + try: + sqlite.execute('SELECT anonymity FROM proxylist LIMIT 1') + except Exception: + # anonymity: transparent, anonymous, elite, or NULL (unknown) + sqlite.execute('ALTER TABLE proxylist ADD COLUMN anonymity TEXT') + # exit_ip: the IP seen by the target server + sqlite.execute('ALTER TABLE proxylist ADD COLUMN exit_ip TEXT') + sqlite.commit() + + +def _migrate_asn_column(sqlite): + """Add ASN column to existing databases.""" + try: + sqlite.execute('SELECT asn FROM proxylist LIMIT 1') + except Exception: + sqlite.execute('ALTER TABLE proxylist ADD COLUMN asn INT') + sqlite.commit() + + def update_proxy_latency(sqlite, proxy, latency_ms): """Update rolling average latency for a proxy. @@ -46,6 +67,56 @@ def update_proxy_latency(sqlite, proxy, latency_ms): ) +def update_proxy_anonymity(sqlite, proxy, exit_ip, proxy_ip, reveals_headers=None): + """Update anonymity level based on exit IP and header analysis. + + Anonymity levels: + transparent: exit_ip == proxy_ip (proxy reveals itself) + anonymous: exit_ip != proxy_ip, adds X-Forwarded-For/Via headers + elite: exit_ip != proxy_ip, no revealing headers + + Args: + sqlite: Database connection + proxy: Proxy address (ip:port) + exit_ip: IP address seen by target server + proxy_ip: Proxy's IP address + reveals_headers: True if proxy adds revealing headers, False if not, None if unknown + """ + if not exit_ip: + return + + # Normalize IPs (remove leading zeros) + def normalize_ip(ip): + if not ip: + return None + parts = ip.strip().split('.') + if len(parts) != 4: + return None + return '.'.join(str(int(p)) for p in parts) + + exit_ip = normalize_ip(exit_ip) + proxy_ip = normalize_ip(proxy_ip) + + if not exit_ip: + return + + # Determine anonymity level + if exit_ip == proxy_ip: + anonymity = 'transparent' + elif reveals_headers is False: + anonymity = 'elite' + elif reveals_headers is True: + anonymity = 'anonymous' + else: + # No header check performed, conservative default + anonymity = 'anonymous' + + sqlite.execute( + 'UPDATE proxylist SET anonymity=?, exit_ip=? WHERE proxy=?', + (anonymity, exit_ip, proxy) + ) + + def create_table_if_not_exists(sqlite, dbname): """Create database table with indexes if it doesn't exist.""" if dbname == 'proxylist': @@ -64,13 +135,20 @@ def create_table_if_not_exists(sqlite, dbname): consecutive_success INT, total_duration INT, avg_latency REAL DEFAULT 0, - latency_samples INT DEFAULT 0)""") + latency_samples INT DEFAULT 0, + anonymity TEXT, + exit_ip TEXT, + asn INT)""") + # Migration: add columns to existing databases (must run before creating indexes) + _migrate_latency_columns(sqlite) + _migrate_anonymity_columns(sqlite) + _migrate_asn_column(sqlite) # Indexes for common query patterns sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_failed ON proxylist(failed)') sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_tested ON proxylist(tested)') sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_proto ON proxylist(proto)') - # Migration: add latency columns if missing - _migrate_latency_columns(sqlite) + sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_anonymity ON proxylist(anonymity)') + sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_asn ON proxylist(asn)') elif dbname == 'uris': sqlite.execute("""CREATE TABLE IF NOT EXISTS uris ( @@ -122,3 +200,57 @@ def insert_urls(urls, search, sqlite): ) sqlite.commit() _log('+%d url(s) from %s' % (len(urls), search), 'added') + + +# Known proxy list sources (GitHub raw lists, APIs) +PROXY_SOURCES = [ + # TheSpeedX/PROXY-List - large, hourly updates + 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt', + 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt', + 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt', + # clarketm/proxy-list - curated, daily + 'https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt', + # monosans/proxy-list - hourly updates + 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/http.txt', + 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt', + 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt', + # jetkai/proxy-list - 10 min updates + 'https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies.txt', + # roosterkid/openproxylist + 'https://raw.githubusercontent.com/roosterkid/openproxylist/main/HTTPS_RAW.txt', + 'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt', + 'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5_RAW.txt', + # ShiftyTR/Proxy-List + 'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/http.txt', + 'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt', + 'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt', + # mmpx12/proxy-list + 'https://raw.githubusercontent.com/mmpx12/proxy-list/master/http.txt', + 'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks4.txt', + 'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks5.txt', + # proxyscrape API + 'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=http&timeout=10000&country=all', + 'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4&timeout=10000&country=all', + 'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5&timeout=10000&country=all', +] + + +def seed_proxy_sources(sqlite): + """Seed known proxy list sources into uris table.""" + timestamp = int(time.time()) + added = 0 + for url in PROXY_SOURCES: + try: + sqlite.execute( + 'INSERT OR IGNORE INTO uris ' + '(added,url,check_time,error,stale_count,retrievals,proxies_added) ' + 'VALUES (?,?,?,?,?,?,?)', + (timestamp, url, 0, 0, 0, 0, 0) + ) + if sqlite.cursor.rowcount > 0: + added += 1 + except Exception: + pass + sqlite.commit() + if added > 0: + _log('seeded %d proxy source URLs' % added, 'info')