dbs: add latency columns and migration
This commit is contained in:
138
dbs.py
138
dbs.py
@@ -16,6 +16,27 @@ def _migrate_latency_columns(sqlite):
|
|||||||
sqlite.commit()
|
sqlite.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def _migrate_anonymity_columns(sqlite):
|
||||||
|
"""Add anonymity detection columns to existing databases."""
|
||||||
|
try:
|
||||||
|
sqlite.execute('SELECT anonymity FROM proxylist LIMIT 1')
|
||||||
|
except Exception:
|
||||||
|
# anonymity: transparent, anonymous, elite, or NULL (unknown)
|
||||||
|
sqlite.execute('ALTER TABLE proxylist ADD COLUMN anonymity TEXT')
|
||||||
|
# exit_ip: the IP seen by the target server
|
||||||
|
sqlite.execute('ALTER TABLE proxylist ADD COLUMN exit_ip TEXT')
|
||||||
|
sqlite.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def _migrate_asn_column(sqlite):
|
||||||
|
"""Add ASN column to existing databases."""
|
||||||
|
try:
|
||||||
|
sqlite.execute('SELECT asn FROM proxylist LIMIT 1')
|
||||||
|
except Exception:
|
||||||
|
sqlite.execute('ALTER TABLE proxylist ADD COLUMN asn INT')
|
||||||
|
sqlite.commit()
|
||||||
|
|
||||||
|
|
||||||
def update_proxy_latency(sqlite, proxy, latency_ms):
|
def update_proxy_latency(sqlite, proxy, latency_ms):
|
||||||
"""Update rolling average latency for a proxy.
|
"""Update rolling average latency for a proxy.
|
||||||
|
|
||||||
@@ -46,6 +67,56 @@ def update_proxy_latency(sqlite, proxy, latency_ms):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def update_proxy_anonymity(sqlite, proxy, exit_ip, proxy_ip, reveals_headers=None):
|
||||||
|
"""Update anonymity level based on exit IP and header analysis.
|
||||||
|
|
||||||
|
Anonymity levels:
|
||||||
|
transparent: exit_ip == proxy_ip (proxy reveals itself)
|
||||||
|
anonymous: exit_ip != proxy_ip, adds X-Forwarded-For/Via headers
|
||||||
|
elite: exit_ip != proxy_ip, no revealing headers
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sqlite: Database connection
|
||||||
|
proxy: Proxy address (ip:port)
|
||||||
|
exit_ip: IP address seen by target server
|
||||||
|
proxy_ip: Proxy's IP address
|
||||||
|
reveals_headers: True if proxy adds revealing headers, False if not, None if unknown
|
||||||
|
"""
|
||||||
|
if not exit_ip:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Normalize IPs (remove leading zeros)
|
||||||
|
def normalize_ip(ip):
|
||||||
|
if not ip:
|
||||||
|
return None
|
||||||
|
parts = ip.strip().split('.')
|
||||||
|
if len(parts) != 4:
|
||||||
|
return None
|
||||||
|
return '.'.join(str(int(p)) for p in parts)
|
||||||
|
|
||||||
|
exit_ip = normalize_ip(exit_ip)
|
||||||
|
proxy_ip = normalize_ip(proxy_ip)
|
||||||
|
|
||||||
|
if not exit_ip:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Determine anonymity level
|
||||||
|
if exit_ip == proxy_ip:
|
||||||
|
anonymity = 'transparent'
|
||||||
|
elif reveals_headers is False:
|
||||||
|
anonymity = 'elite'
|
||||||
|
elif reveals_headers is True:
|
||||||
|
anonymity = 'anonymous'
|
||||||
|
else:
|
||||||
|
# No header check performed, conservative default
|
||||||
|
anonymity = 'anonymous'
|
||||||
|
|
||||||
|
sqlite.execute(
|
||||||
|
'UPDATE proxylist SET anonymity=?, exit_ip=? WHERE proxy=?',
|
||||||
|
(anonymity, exit_ip, proxy)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def create_table_if_not_exists(sqlite, dbname):
|
def create_table_if_not_exists(sqlite, dbname):
|
||||||
"""Create database table with indexes if it doesn't exist."""
|
"""Create database table with indexes if it doesn't exist."""
|
||||||
if dbname == 'proxylist':
|
if dbname == 'proxylist':
|
||||||
@@ -64,13 +135,20 @@ def create_table_if_not_exists(sqlite, dbname):
|
|||||||
consecutive_success INT,
|
consecutive_success INT,
|
||||||
total_duration INT,
|
total_duration INT,
|
||||||
avg_latency REAL DEFAULT 0,
|
avg_latency REAL DEFAULT 0,
|
||||||
latency_samples INT DEFAULT 0)""")
|
latency_samples INT DEFAULT 0,
|
||||||
|
anonymity TEXT,
|
||||||
|
exit_ip TEXT,
|
||||||
|
asn INT)""")
|
||||||
|
# Migration: add columns to existing databases (must run before creating indexes)
|
||||||
|
_migrate_latency_columns(sqlite)
|
||||||
|
_migrate_anonymity_columns(sqlite)
|
||||||
|
_migrate_asn_column(sqlite)
|
||||||
# Indexes for common query patterns
|
# Indexes for common query patterns
|
||||||
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_failed ON proxylist(failed)')
|
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_failed ON proxylist(failed)')
|
||||||
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_tested ON proxylist(tested)')
|
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_tested ON proxylist(tested)')
|
||||||
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_proto ON proxylist(proto)')
|
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_proto ON proxylist(proto)')
|
||||||
# Migration: add latency columns if missing
|
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_anonymity ON proxylist(anonymity)')
|
||||||
_migrate_latency_columns(sqlite)
|
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_asn ON proxylist(asn)')
|
||||||
|
|
||||||
elif dbname == 'uris':
|
elif dbname == 'uris':
|
||||||
sqlite.execute("""CREATE TABLE IF NOT EXISTS uris (
|
sqlite.execute("""CREATE TABLE IF NOT EXISTS uris (
|
||||||
@@ -122,3 +200,57 @@ def insert_urls(urls, search, sqlite):
|
|||||||
)
|
)
|
||||||
sqlite.commit()
|
sqlite.commit()
|
||||||
_log('+%d url(s) from %s' % (len(urls), search), 'added')
|
_log('+%d url(s) from %s' % (len(urls), search), 'added')
|
||||||
|
|
||||||
|
|
||||||
|
# Known proxy list sources (GitHub raw lists, APIs)
|
||||||
|
PROXY_SOURCES = [
|
||||||
|
# TheSpeedX/PROXY-List - large, hourly updates
|
||||||
|
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt',
|
||||||
|
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt',
|
||||||
|
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt',
|
||||||
|
# clarketm/proxy-list - curated, daily
|
||||||
|
'https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt',
|
||||||
|
# monosans/proxy-list - hourly updates
|
||||||
|
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/http.txt',
|
||||||
|
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt',
|
||||||
|
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt',
|
||||||
|
# jetkai/proxy-list - 10 min updates
|
||||||
|
'https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies.txt',
|
||||||
|
# roosterkid/openproxylist
|
||||||
|
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/HTTPS_RAW.txt',
|
||||||
|
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt',
|
||||||
|
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5_RAW.txt',
|
||||||
|
# ShiftyTR/Proxy-List
|
||||||
|
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/http.txt',
|
||||||
|
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt',
|
||||||
|
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt',
|
||||||
|
# mmpx12/proxy-list
|
||||||
|
'https://raw.githubusercontent.com/mmpx12/proxy-list/master/http.txt',
|
||||||
|
'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks4.txt',
|
||||||
|
'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks5.txt',
|
||||||
|
# proxyscrape API
|
||||||
|
'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=http&timeout=10000&country=all',
|
||||||
|
'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4&timeout=10000&country=all',
|
||||||
|
'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5&timeout=10000&country=all',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def seed_proxy_sources(sqlite):
|
||||||
|
"""Seed known proxy list sources into uris table."""
|
||||||
|
timestamp = int(time.time())
|
||||||
|
added = 0
|
||||||
|
for url in PROXY_SOURCES:
|
||||||
|
try:
|
||||||
|
sqlite.execute(
|
||||||
|
'INSERT OR IGNORE INTO uris '
|
||||||
|
'(added,url,check_time,error,stale_count,retrievals,proxies_added) '
|
||||||
|
'VALUES (?,?,?,?,?,?,?)',
|
||||||
|
(timestamp, url, 0, 0, 0, 0, 0)
|
||||||
|
)
|
||||||
|
if sqlite.cursor.rowcount > 0:
|
||||||
|
added += 1
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
sqlite.commit()
|
||||||
|
if added > 0:
|
||||||
|
_log('seeded %d proxy source URLs' % added, 'info')
|
||||||
|
|||||||
Reference in New Issue
Block a user