style: normalize indentation and improve code style

- convert tabs to 4-space indentation
- add docstrings to modules and classes
- remove unused import (copy)
- use explicit object inheritance
- use 'while True' over 'while 1'
- use 'while args' over 'while len(args)'
- use '{}' over 'dict()'
- consistent string formatting
- Python 2/3 compatible Queue import
This commit is contained in:
Username
2025-12-20 23:18:45 +01:00
parent d356cdf6ee
commit e24f68500c
8 changed files with 1434 additions and 1342 deletions

View File

@@ -1,22 +1,36 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""Combined config file and argument parser."""
from ConfigParser import SafeConfigParser, NoOptionError from ConfigParser import SafeConfigParser, NoOptionError
from argparse import ArgumentParser from argparse import ArgumentParser
import sys import sys
class _Dummy():
class _Dummy(object):
"""Placeholder for config sections."""
pass pass
class ComboParser(object): class ComboParser(object):
"""Parse configuration from INI file and command-line arguments.
Command-line arguments override INI file values.
"""
def __init__(self, ini): def __init__(self, ini):
self.items = [] self.items = []
self.cparser = SafeConfigParser() self.cparser = SafeConfigParser()
self.aparser = ArgumentParser() self.aparser = ArgumentParser()
self.ini = ini self.ini = ini
self.items = []
self.loaded = False self.loaded = False
self.args = None
def add_item(self, section, name, type, default, desc, required): def add_item(self, section, name, type, default, desc, required):
"""Add a configuration item."""
def str2bool(val): def str2bool(val):
return val in ['True', 'true', '1', 'yes'] return val.lower() in ('true', '1', 'yes')
self.items.append({ self.items.append({
'section': section, 'section': section,
'name': name, 'name': name,
@@ -26,49 +40,71 @@ class ComboParser(object):
}) })
self.aparser.add_argument( self.aparser.add_argument(
'--%s.%s' % (section, name), '--%s.%s' % (section, name),
help='%s, default: (%s)'%(desc, str(default)), help='%s (default: %s)' % (desc, default),
type=type if type is not bool else str2bool, type=type if type is not bool else str2bool,
default=None, default=None,
required=False required=False
) )
def load(self): def load(self):
if self.loaded: return """Load configuration from file and command-line."""
if self.loaded:
return
self.loaded = True self.loaded = True
try: try:
self.cparser.read(self.ini) self.cparser.read(self.ini)
except Exception: except Exception:
pass # config file missing or unreadable, use defaults pass # Config file missing or unreadable, use defaults
args = self.aparser.parse_args()
self.args = self.aparser.parse_args()
for item in self.items: for item in self.items:
try: section = item['section']
obj = getattr(self, item['section']) name = item['name']
except AttributeError:
setattr(self, item['section'], _Dummy())
obj = getattr(self, item['section'])
setattr(obj, item['name'], item['default']) # Ensure section object exists
inner = getattr(obj, item['name']) if not hasattr(self, section):
setattr(self, section, _Dummy())
obj = getattr(self, section)
item['found'] = True # Start with default value
value = item['default']
found = False
# Try to read from config file
try: try:
if item['type'] is bool : inner = self.cparser.getboolean(item['section'], item['name']) if item['type'] is bool:
elif item['type'] is float: inner = self.cparser.getfloat(item['section'], item['name']) value = self.cparser.getboolean(section, name)
elif item['type'] is int : inner = self.cparser.getint(item['section'], item['name']) elif item['type'] is float:
elif item['type'] is str : inner = self.cparser.get(item['section'], item['name']) value = self.cparser.getfloat(section, name)
elif item['type'] is int:
value = self.cparser.getint(section, name)
elif item['type'] is str:
value = self.cparser.get(section, name)
found = True
except NoOptionError: except NoOptionError:
item['found'] = False pass
try:
arg = getattr(args, '%s.%s'%(item['section'], item['name'])) # Command-line overrides config file
if arg is not None: arg_name = '%s.%s' % (section, name)
inner = arg arg_value = getattr(self.args, arg_name, None)
item['found'] = True if arg_value is not None:
except AttributeError: value = arg_value
pass # arg not provided on command line found = True
if not item['found']:
# Handle missing required items
if not found:
if item['required']: if item['required']:
sys.stderr.write('error: required config item "%s" not found in section "%s" of "%s"!\n'%(item['name'], item['section'], self.ini)) sys.stderr.write(
'error: required config item "%s" not found in section "%s" of "%s"\n'
% (name, section, self.ini)
)
sys.exit(1) sys.exit(1)
else: else:
sys.stderr.write('warning: assigned default value of "%s" to "%s.%s"\n'%(str(item['default']), item['section'], item['name'])) sys.stderr.write(
setattr(obj, item['name'], inner) 'warning: assigned default value of "%s" to "%s.%s"\n'
% (item['default'], section, name)
)
setattr(obj, name, value)

41
dbs.py
View File

@@ -1,7 +1,13 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""Database table creation and insertion utilities."""
import time import time
from misc import _log from misc import _log
def create_table_if_not_exists(sqlite, dbname): def create_table_if_not_exists(sqlite, dbname):
"""Create database table with indexes if it doesn't exist."""
if dbname == 'proxylist': if dbname == 'proxylist':
sqlite.execute("""CREATE TABLE IF NOT EXISTS proxylist ( sqlite.execute("""CREATE TABLE IF NOT EXISTS proxylist (
proxy BLOB UNIQUE, proxy BLOB UNIQUE,
@@ -17,7 +23,7 @@ def create_table_if_not_exists(sqlite, dbname):
port INT, port INT,
consecutive_success INT, consecutive_success INT,
total_duration INT)""") total_duration INT)""")
# indexes for common query patterns # Indexes for common query patterns
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_failed ON proxylist(failed)') sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_failed ON proxylist(failed)')
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_tested ON proxylist(tested)') sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_tested ON proxylist(tested)')
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_proto ON proxylist(proto)') sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_proto ON proxylist(proto)')
@@ -31,31 +37,44 @@ def create_table_if_not_exists(sqlite, dbname):
stale_count INT, stale_count INT,
retrievals INT, retrievals INT,
proxies_added INT, proxies_added INT,
added INT added INT)""")
)""") # Indexes for common query patterns
# indexes for common query patterns
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_error ON uris(error)') sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_error ON uris(error)')
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_checktime ON uris(check_time)') sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_checktime ON uris(check_time)')
sqlite.commit() sqlite.commit()
def insert_proxies(proxydb, proxies, url): def insert_proxies(proxydb, proxies, url):
if not proxies: return """Insert new proxies into database."""
if not proxies:
return
timestamp = int(time.time()) timestamp = int(time.time())
rows = [] rows = []
for p in proxies: for p in proxies:
ip, port = p.split(':') ip, port = p.split(':')
rows.append((timestamp, p, ip, port, 3, 0, 0, 0, 0, 0)) rows.append((timestamp, p, ip, port, 3, 0, 0, 0, 0, 0))
proxydb.executemany('INSERT OR IGNORE INTO proxylist (added,proxy,ip,port,failed,tested,success_count,total_duration,mitm,consecutive_success) VALUES (?,?,?,?,?,?,?,?,?,?)', rows) proxydb.executemany(
'INSERT OR IGNORE INTO proxylist '
'(added,proxy,ip,port,failed,tested,success_count,total_duration,mitm,consecutive_success) '
'VALUES (?,?,?,?,?,?,?,?,?,?)',
rows
)
proxydb.commit() proxydb.commit()
_log('+%d proxy/ies from %s' % (len(proxies), url), 'added') _log('+%d proxy/ies from %s' % (len(proxies), url), 'added')
def insert_urls(urls, search, sqlite): def insert_urls(urls, search, sqlite):
if not urls: return """Insert new URLs into database."""
time_now = int(time.time()) if not urls:
rows = [ (time_now,u,0,1,0,0,0) for u in urls ] return
sqlite.executemany('INSERT OR IGNORE INTO uris (added,url,check_time,error,stale_count,retrievals,proxies_added) values(?,?,?,?,?,?,?)', rows) timestamp = int(time.time())
rows = [(timestamp, u, 0, 1, 0, 0, 0) for u in urls]
sqlite.executemany(
'INSERT OR IGNORE INTO uris '
'(added,url,check_time,error,stale_count,retrievals,proxies_added) '
'VALUES (?,?,?,?,?,?,?)',
rows
)
sqlite.commit() sqlite.commit()
_log('+%d url(s) from %s' % (len(urls), search), 'added') _log('+%d url(s) from %s' % (len(urls), search), 'added')

View File

@@ -1,9 +1,29 @@
import time, random, sys #!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""SQLite wrapper with retry logic and WAL mode."""
import time
import random
import sys
import sqlite3 import sqlite3
class mysqlite:
class mysqlite(object):
"""SQLite connection wrapper with automatic retry on lock."""
def __init__(self, database, factory=None):
self.handle = sqlite3.connect(database)
if factory is not None:
self.handle.text_factory = factory
self.cursor = self.handle.cursor()
self.dbname = database
# Enable WAL mode for better concurrency
self.cursor.execute('PRAGMA journal_mode=WAL')
self.cursor.execute('PRAGMA synchronous=NORMAL')
def _try_op(self, op, query, args=None, rmin=1.5, rmax=7.0): def _try_op(self, op, query, args=None, rmin=1.5, rmax=7.0):
while 1: """Execute operation with retry on database lock."""
while True:
try: try:
if query is None: if query is None:
return op() return op()
@@ -12,33 +32,31 @@ class mysqlite:
else: else:
return op(query, args) return op(query, args)
except sqlite3.OperationalError as e: except sqlite3.OperationalError as e:
if e.message == 'database is locked': err_msg = str(e)
print "zzZzzZZ: db is locked (%s)"%self.dbname if 'database is locked' in err_msg:
sys.stderr.write('zzZzzZZ: db is locked (%s)\n' % self.dbname)
time.sleep(random.uniform(rmin, rmax)) time.sleep(random.uniform(rmin, rmax))
continue continue
else: else:
print '%s\nquery: %s\nargs: %s' % (str(sys.exc_info()), str(query), str(args)) sys.stderr.write('%s\nquery: %s\nargs: %s\n' % (
raise e str(sys.exc_info()), str(query), str(args)))
raise
def execute(self, query, args=None, rmin=1.5, rmax=7.0): def execute(self, query, args=None, rmin=1.5, rmax=7.0):
"""Execute a single query with retry."""
return self._try_op(self.cursor.execute, query, args, rmin, rmax) return self._try_op(self.cursor.execute, query, args, rmin, rmax)
def executemany(self, query, args, rmin=1.5, rmax=7.0): def executemany(self, query, args, rmin=1.5, rmax=7.0):
while len(args): """Execute query for multiple argument sets, batched."""
self._try_op(self.cursor.executemany, query, args[:500], rmin, rmax) while args:
batch = args[:500]
self._try_op(self.cursor.executemany, query, batch, rmin, rmax)
args = args[500:] args = args[500:]
def commit(self, rmin=1.5, rmax=7.0): def commit(self, rmin=1.5, rmax=7.0):
"""Commit transaction with retry."""
return self._try_op(self.handle.commit, None, None, rmin, rmax) return self._try_op(self.handle.commit, None, None, rmin, rmax)
def close(self): def close(self):
"""Close database connection."""
self.handle.close() self.handle.close()
def __init__(self, database, factory = None):
self.handle = sqlite3.connect(database)
if factory: self.handle.text_factory = factory
self.cursor = self.handle.cursor()
self.dbname = database
# enable WAL mode for better concurrency
self.cursor.execute('PRAGMA journal_mode=WAL')
self.cursor.execute('PRAGMA synchronous=NORMAL')

0
ppf.py Executable file → Normal file
View File

View File

@@ -1,9 +1,16 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
import threading import threading
import time, random, string, re, copy import time
import Queue import random
import string
import re
import heapq import heapq
try:
import Queue
except ImportError:
import queue as Queue
try: try:
import IP2Location import IP2Location
import os import os
@@ -22,7 +29,7 @@ import connection_pool
config = Config() config = Config()
_run_standalone = False _run_standalone = False
cached_dns = dict() cached_dns = {}
regexes = { regexes = {
'www.facebook.com': 'X-FB-Debug', 'www.facebook.com': 'X-FB-Debug',

View File

@@ -1,5 +1,6 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# HTML parsing with optional BeautifulSoup or stdlib fallback # -*- coding: utf-8 -*-
"""HTML parsing with optional BeautifulSoup or stdlib fallback."""
from HTMLParser import HTMLParser from HTMLParser import HTMLParser
import sys import sys
@@ -14,7 +15,9 @@ except ImportError:
_bs4_available = False _bs4_available = False
class Tag(): class Tag(object):
"""Simple tag representation for stdlib parser."""
def __init__(self, name, attrs): def __init__(self, name, attrs):
self.name = name self.name = name
self.attrs = dict(attrs) self.attrs = dict(attrs)
@@ -26,12 +29,15 @@ class Tag():
return self.attrs.get(key, default) return self.attrs.get(key, default)
class SoupResult(): class SoupResult(object):
"""BeautifulSoup-like result wrapper for stdlib parser."""
def __init__(self, tags): def __init__(self, tags):
self._tags = tags self._tags = tags
self.body = self self.body = self
def find_all(self, tag_name, **kwargs): def find_all(self, tag_name, **kwargs):
"""Find all tags matching criteria."""
results = [] results = []
for tag in self._tags: for tag in self._tags:
if tag.name != tag_name: if tag.name != tag_name:
@@ -46,6 +52,8 @@ class SoupResult():
class LinkExtractor(HTMLParser): class LinkExtractor(HTMLParser):
"""Extract tags from HTML using stdlib."""
def __init__(self): def __init__(self):
HTMLParser.__init__(self) HTMLParser.__init__(self)
self.tags = [] self.tags = []
@@ -58,15 +66,17 @@ class LinkExtractor(HTMLParser):
def _parse_stdlib(html): def _parse_stdlib(html):
"""Parse HTML using stdlib HTMLParser."""
parser = LinkExtractor() parser = LinkExtractor()
try: try:
parser.feed(html) parser.feed(html)
except Exception: except Exception:
pass # malformed HTML, return partial results pass # Malformed HTML, return partial results
return SoupResult(parser.tags) return SoupResult(parser.tags)
def _parse_bs4(html): def _parse_bs4(html):
"""Parse HTML using BeautifulSoup."""
try: try:
return BeautifulSoup(html, 'lxml') return BeautifulSoup(html, 'lxml')
except (FeatureNotFound, Exception): except (FeatureNotFound, Exception):
@@ -74,6 +84,7 @@ def _parse_bs4(html):
def set_nobs(enabled): def set_nobs(enabled):
"""Disable BeautifulSoup and use stdlib instead."""
global _use_bs4 global _use_bs4
_use_bs4 = not enabled _use_bs4 = not enabled
if enabled and _bs4_available: if enabled and _bs4_available:
@@ -83,12 +94,13 @@ def set_nobs(enabled):
def soupify(html, nohtml=False): def soupify(html, nohtml=False):
htm = html if nohtml else '<html><body>%s</body></html>' % (html) """Parse HTML content, returning BeautifulSoup-like object."""
htm = html if nohtml else '<html><body>%s</body></html>' % html
if _use_bs4 and _bs4_available: if _use_bs4 and _bs4_available:
return _parse_bs4(htm) return _parse_bs4(htm)
else:
return _parse_stdlib(htm) return _parse_stdlib(htm)
def is_available(): def is_available():
"""Check if BeautifulSoup is available."""
return _bs4_available return _bs4_available