ppf: add content hash for duplicate proxy list detection

This commit is contained in:
Username
2025-12-22 00:03:12 +01:00
parent 6b5eb83bf4
commit f382a4ab6a
2 changed files with 56 additions and 6 deletions

30
dbs.py
View File

@@ -37,6 +37,31 @@ def _migrate_asn_column(sqlite):
sqlite.commit()
def _migrate_content_hash_column(sqlite):
"""Add content_hash column to uris table for duplicate detection."""
try:
sqlite.execute('SELECT content_hash FROM uris LIMIT 1')
except Exception:
sqlite.execute('ALTER TABLE uris ADD COLUMN content_hash TEXT')
sqlite.commit()
def compute_proxy_list_hash(proxies):
"""Compute MD5 hash of sorted proxy list for change detection.
Args:
proxies: List of proxy strings (ip:port format)
Returns:
Hexadecimal MD5 hash string, or None if list is empty
"""
if not proxies:
return None
import hashlib
sorted_list = '\n'.join(sorted(proxies))
return hashlib.md5(sorted_list.encode('utf-8') if hasattr(sorted_list, 'encode') else sorted_list).hexdigest()
def update_proxy_latency(sqlite, proxy, latency_ms):
"""Update rolling average latency for a proxy.
@@ -159,7 +184,10 @@ def create_table_if_not_exists(sqlite, dbname):
stale_count INT,
retrievals INT,
proxies_added INT,
added INT)""")
added INT,
content_hash TEXT)""")
# Migration for existing databases
_migrate_content_hash_column(sqlite)
# Indexes for common query patterns
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_error ON uris(error)')
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_checktime ON uris(check_time)')