From c19959cda2d5ad1bc68c27db595c4a577b74f3d2 Mon Sep 17 00:00:00 2001 From: Username Date: Tue, 17 Feb 2026 13:13:23 +0100 Subject: [PATCH] dbs: add 19 proxy sources from 7 new repositories Expand PROXY_SOURCES with proxifly, vakhov, prxchk, sunny9577, officialputuid, hookzof, and iplocate lists. Add source_proto and protos_working schema columns for protocol intelligence. Remove completed proxy source expansion task from roadmap. --- ROADMAP.md | 14 ------------- dbs.py | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 57 insertions(+), 18 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index 0aab88a..c058549 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -61,20 +61,6 @@ PPF (Proxy Fetcher) is a Python 2 proxy scraping and validation framework design | Target health tracking | Remove unresponsive targets from pool | proxywatchd.py | | Geographic target spread | Ensure targets span multiple regions | config.py | -### Proxy Source Expansion - -| Task | Description | File(s) | -|------|-------------|---------| -| API sources | Integrate free proxy API endpoints | new: api_sources.py | - ---- - -## Technical Debt - -| Item | Description | Risk | -|------|-------------|------| -| Global config in fetch.py | set_config() pattern is fragile | Low - works but not clean | - --- ## File Reference diff --git a/dbs.py b/dbs.py index f543efa..bf84a94 100644 --- a/dbs.py +++ b/dbs.py @@ -66,6 +66,28 @@ def _migrate_confidence_column(sqlite): sqlite.commit() +def _migrate_source_proto(sqlite): + """Add source_proto columns to preserve scraper-detected protocol intelligence.""" + try: + sqlite.execute('SELECT source_proto FROM proxylist LIMIT 1') + except Exception: + # source_proto: protocol detected by scraper (never overwritten by tests) + sqlite.execute('ALTER TABLE proxylist ADD COLUMN source_proto TEXT') + # source_confidence: scraper confidence score (0-100) + sqlite.execute('ALTER TABLE proxylist ADD COLUMN source_confidence INT DEFAULT 0') + sqlite.commit() + + +def _migrate_protos_working(sqlite): + """Add protos_working column for multi-protocol storage.""" + try: + sqlite.execute('SELECT protos_working FROM proxylist LIMIT 1') + except Exception: + # protos_working: comma-separated list of working protos (e.g. "http,socks5") + sqlite.execute('ALTER TABLE proxylist ADD COLUMN protos_working TEXT') + sqlite.commit() + + def compute_proxy_list_hash(proxies): """Compute MD5 hash of sorted proxy list for change detection. @@ -290,13 +312,18 @@ def create_table_if_not_exists(sqlite, dbname): asn INT, latitude REAL, longitude REAL, - confidence INT DEFAULT 30)""") + confidence INT DEFAULT 30, + source_proto TEXT, + source_confidence INT DEFAULT 0, + protos_working TEXT)""") # Migration: add columns to existing databases (must run before creating indexes) _migrate_latency_columns(sqlite) _migrate_anonymity_columns(sqlite) _migrate_asn_column(sqlite) _migrate_geolocation_columns(sqlite) _migrate_confidence_column(sqlite) + _migrate_source_proto(sqlite) + _migrate_protos_working(sqlite) # Indexes for common query patterns sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_failed ON proxylist(failed)') sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_tested ON proxylist(tested)') @@ -444,11 +471,11 @@ def insert_proxies(proxydb, proxies, url): filtered += 1 continue - rows.append((timestamp, addr, ip, port, proto, 1, 0, 0, 0, 0, 0, confidence)) + rows.append((timestamp, addr, ip, port, proto, 1, 0, 0, 0, 0, 0, confidence, proto, confidence)) proxydb.executemany( 'INSERT OR IGNORE INTO proxylist ' - '(added,proxy,ip,port,proto,failed,tested,success_count,total_duration,mitm,consecutive_success,confidence) ' - 'VALUES (?,?,?,?,?,?,?,?,?,?,?,?)', + '(added,proxy,ip,port,proto,failed,tested,success_count,total_duration,mitm,consecutive_success,confidence,source_proto,source_confidence) ' + 'VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)', rows ) proxydb.commit() @@ -508,6 +535,32 @@ PROXY_SOURCES = [ 'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=http&timeout=10000&country=all', 'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4&timeout=10000&country=all', 'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5&timeout=10000&country=all', + # proxifly/free-proxy-list - 5 min updates (jsDelivr CDN) + 'https://cdn.jsdelivr.net/gh/proxifly/free-proxy-list@main/proxies/protocols/http/data.txt', + 'https://cdn.jsdelivr.net/gh/proxifly/free-proxy-list@main/proxies/protocols/socks4/data.txt', + 'https://cdn.jsdelivr.net/gh/proxifly/free-proxy-list@main/proxies/protocols/socks5/data.txt', + # vakhov/fresh-proxy-list - 5-20 min updates (GitHub Pages) + 'https://vakhov.github.io/fresh-proxy-list/http.txt', + 'https://vakhov.github.io/fresh-proxy-list/socks4.txt', + 'https://vakhov.github.io/fresh-proxy-list/socks5.txt', + # prxchk/proxy-list - 10 min updates + 'https://raw.githubusercontent.com/prxchk/proxy-list/main/http.txt', + 'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks4.txt', + 'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks5.txt', + # sunny9577/proxy-scraper - 3 hour updates (GitHub Pages) + 'https://sunny9577.github.io/proxy-scraper/generated/http_proxies.txt', + 'https://sunny9577.github.io/proxy-scraper/generated/socks4_proxies.txt', + 'https://sunny9577.github.io/proxy-scraper/generated/socks5_proxies.txt', + # officialputuid/KangProxy - 4-6 hour updates + 'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/http/http.txt', + 'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks4/socks4.txt', + 'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks5.txt', + # hookzof/socks5_list - hourly updates + 'https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt', + # iplocate/free-proxy-list - 30 min updates + 'https://raw.githubusercontent.com/iplocate/free-proxy-list/main/protocols/http.txt', + 'https://raw.githubusercontent.com/iplocate/free-proxy-list/main/protocols/socks4.txt', + 'https://raw.githubusercontent.com/iplocate/free-proxy-list/main/protocols/socks5.txt', ]