style: normalize indentation and improve code style

- convert tabs to 4-space indentation
- add docstrings to modules and classes
- remove unused import (copy)
- use explicit object inheritance
- use 'while True' over 'while 1'
- use 'while args' over 'while len(args)'
- use '{}' over 'dict()'
- consistent string formatting
- Python 2/3 compatible Queue import
This commit is contained in:
Username
2025-12-20 23:18:45 +01:00
parent d356cdf6ee
commit e24f68500c
8 changed files with 1434 additions and 1342 deletions

View File

@@ -1,74 +1,110 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""Combined config file and argument parser."""
from ConfigParser import SafeConfigParser, NoOptionError
from argparse import ArgumentParser
import sys
class _Dummy():
class _Dummy(object):
"""Placeholder for config sections."""
pass
class ComboParser(object):
"""Parse configuration from INI file and command-line arguments.
Command-line arguments override INI file values.
"""
def __init__(self, ini):
self.items = []
self.cparser = SafeConfigParser()
self.aparser = ArgumentParser()
self.ini = ini
self.items = []
self.loaded = False
self.args = None
def add_item(self, section, name, type, default, desc, required):
"""Add a configuration item."""
def str2bool(val):
return val in ['True', 'true', '1', 'yes']
return val.lower() in ('true', '1', 'yes')
self.items.append({
'section':section,
'name':name,
'type':type,
'default':default,
'required':required,
'section': section,
'name': name,
'type': type,
'default': default,
'required': required,
})
self.aparser.add_argument(
'--%s.%s'%(section, name),
help='%s, default: (%s)'%(desc, str(default)),
'--%s.%s' % (section, name),
help='%s (default: %s)' % (desc, default),
type=type if type is not bool else str2bool,
default=None,
required=False
)
def load(self):
if self.loaded: return
"""Load configuration from file and command-line."""
if self.loaded:
return
self.loaded = True
try:
self.cparser.read(self.ini)
except Exception:
pass # config file missing or unreadable, use defaults
args = self.aparser.parse_args()
pass # Config file missing or unreadable, use defaults
self.args = self.aparser.parse_args()
for item in self.items:
try:
obj = getattr(self, item['section'])
except AttributeError:
setattr(self, item['section'], _Dummy())
obj = getattr(self, item['section'])
section = item['section']
name = item['name']
setattr(obj, item['name'], item['default'])
inner = getattr(obj, item['name'])
# Ensure section object exists
if not hasattr(self, section):
setattr(self, section, _Dummy())
obj = getattr(self, section)
item['found'] = True
# Start with default value
value = item['default']
found = False
# Try to read from config file
try:
if item['type'] is bool : inner = self.cparser.getboolean(item['section'], item['name'])
elif item['type'] is float: inner = self.cparser.getfloat(item['section'], item['name'])
elif item['type'] is int : inner = self.cparser.getint(item['section'], item['name'])
elif item['type'] is str : inner = self.cparser.get(item['section'], item['name'])
if item['type'] is bool:
value = self.cparser.getboolean(section, name)
elif item['type'] is float:
value = self.cparser.getfloat(section, name)
elif item['type'] is int:
value = self.cparser.getint(section, name)
elif item['type'] is str:
value = self.cparser.get(section, name)
found = True
except NoOptionError:
item['found'] = False
try:
arg = getattr(args, '%s.%s'%(item['section'], item['name']))
if arg is not None:
inner = arg
item['found'] = True
except AttributeError:
pass # arg not provided on command line
if not item['found']:
pass
# Command-line overrides config file
arg_name = '%s.%s' % (section, name)
arg_value = getattr(self.args, arg_name, None)
if arg_value is not None:
value = arg_value
found = True
# Handle missing required items
if not found:
if item['required']:
sys.stderr.write('error: required config item "%s" not found in section "%s" of "%s"!\n'%(item['name'], item['section'], self.ini))
sys.stderr.write(
'error: required config item "%s" not found in section "%s" of "%s"\n'
% (name, section, self.ini)
)
sys.exit(1)
else:
sys.stderr.write('warning: assigned default value of "%s" to "%s.%s"\n'%(str(item['default']), item['section'], item['name']))
setattr(obj, item['name'], inner)
sys.stderr.write(
'warning: assigned default value of "%s" to "%s.%s"\n'
% (item['default'], section, name)
)
setattr(obj, name, value)

43
dbs.py
View File

@@ -1,7 +1,13 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""Database table creation and insertion utilities."""
import time
from misc import _log
def create_table_if_not_exists(sqlite, dbname):
"""Create database table with indexes if it doesn't exist."""
if dbname == 'proxylist':
sqlite.execute("""CREATE TABLE IF NOT EXISTS proxylist (
proxy BLOB UNIQUE,
@@ -17,7 +23,7 @@ def create_table_if_not_exists(sqlite, dbname):
port INT,
consecutive_success INT,
total_duration INT)""")
# indexes for common query patterns
# Indexes for common query patterns
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_failed ON proxylist(failed)')
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_tested ON proxylist(tested)')
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_proto ON proxylist(proto)')
@@ -31,31 +37,44 @@ def create_table_if_not_exists(sqlite, dbname):
stale_count INT,
retrievals INT,
proxies_added INT,
added INT
)""")
# indexes for common query patterns
added INT)""")
# Indexes for common query patterns
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_error ON uris(error)')
sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_checktime ON uris(check_time)')
sqlite.commit()
def insert_proxies(proxydb, proxies, url):
if not proxies: return
"""Insert new proxies into database."""
if not proxies:
return
timestamp = int(time.time())
rows = []
for p in proxies:
ip, port = p.split(':')
rows.append((timestamp,p,ip,port,3,0,0,0,0,0))
proxydb.executemany('INSERT OR IGNORE INTO proxylist (added,proxy,ip,port,failed,tested,success_count,total_duration,mitm,consecutive_success) VALUES (?,?,?,?,?,?,?,?,?,?)', rows)
rows.append((timestamp, p, ip, port, 3, 0, 0, 0, 0, 0))
proxydb.executemany(
'INSERT OR IGNORE INTO proxylist '
'(added,proxy,ip,port,failed,tested,success_count,total_duration,mitm,consecutive_success) '
'VALUES (?,?,?,?,?,?,?,?,?,?)',
rows
)
proxydb.commit()
_log('+%d proxy/ies from %s' % (len(proxies), url), 'added')
def insert_urls(urls, search, sqlite):
if not urls: return
time_now = int(time.time())
rows = [ (time_now,u,0,1,0,0,0) for u in urls ]
sqlite.executemany('INSERT OR IGNORE INTO uris (added,url,check_time,error,stale_count,retrievals,proxies_added) values(?,?,?,?,?,?,?)', rows)
"""Insert new URLs into database."""
if not urls:
return
timestamp = int(time.time())
rows = [(timestamp, u, 0, 1, 0, 0, 0) for u in urls]
sqlite.executemany(
'INSERT OR IGNORE INTO uris '
'(added,url,check_time,error,stale_count,retrievals,proxies_added) '
'VALUES (?,?,?,?,?,?,?)',
rows
)
sqlite.commit()
_log('+%d url(s) from %s' % (len(urls), search), 'added')

View File

@@ -1,9 +1,29 @@
import time, random, sys
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""SQLite wrapper with retry logic and WAL mode."""
import time
import random
import sys
import sqlite3
class mysqlite:
class mysqlite(object):
"""SQLite connection wrapper with automatic retry on lock."""
def __init__(self, database, factory=None):
self.handle = sqlite3.connect(database)
if factory is not None:
self.handle.text_factory = factory
self.cursor = self.handle.cursor()
self.dbname = database
# Enable WAL mode for better concurrency
self.cursor.execute('PRAGMA journal_mode=WAL')
self.cursor.execute('PRAGMA synchronous=NORMAL')
def _try_op(self, op, query, args=None, rmin=1.5, rmax=7.0):
while 1:
"""Execute operation with retry on database lock."""
while True:
try:
if query is None:
return op()
@@ -12,33 +32,31 @@ class mysqlite:
else:
return op(query, args)
except sqlite3.OperationalError as e:
if e.message == 'database is locked':
print "zzZzzZZ: db is locked (%s)"%self.dbname
err_msg = str(e)
if 'database is locked' in err_msg:
sys.stderr.write('zzZzzZZ: db is locked (%s)\n' % self.dbname)
time.sleep(random.uniform(rmin, rmax))
continue
else:
print '%s\nquery: %s\nargs: %s' % (str(sys.exc_info()), str(query), str(args))
raise e
sys.stderr.write('%s\nquery: %s\nargs: %s\n' % (
str(sys.exc_info()), str(query), str(args)))
raise
def execute(self, query, args = None, rmin=1.5, rmax=7.0):
def execute(self, query, args=None, rmin=1.5, rmax=7.0):
"""Execute a single query with retry."""
return self._try_op(self.cursor.execute, query, args, rmin, rmax)
def executemany(self, query, args, rmin=1.5, rmax=7.0):
while len(args):
self._try_op(self.cursor.executemany, query, args[:500], rmin, rmax)
"""Execute query for multiple argument sets, batched."""
while args:
batch = args[:500]
self._try_op(self.cursor.executemany, query, batch, rmin, rmax)
args = args[500:]
def commit(self, rmin=1.5, rmax=7.0):
"""Commit transaction with retry."""
return self._try_op(self.handle.commit, None, None, rmin, rmax)
def close(self):
"""Close database connection."""
self.handle.close()
def __init__(self, database, factory = None):
self.handle = sqlite3.connect(database)
if factory: self.handle.text_factory = factory
self.cursor = self.handle.cursor()
self.dbname = database
# enable WAL mode for better concurrency
self.cursor.execute('PRAGMA journal_mode=WAL')
self.cursor.execute('PRAGMA synchronous=NORMAL')

0
ppf.py Executable file → Normal file
View File

View File

@@ -1,9 +1,16 @@
#!/usr/bin/env python2
import threading
import time, random, string, re, copy
import Queue
import time
import random
import string
import re
import heapq
try:
import Queue
except ImportError:
import queue as Queue
try:
import IP2Location
import os
@@ -22,7 +29,7 @@ import connection_pool
config = Config()
_run_standalone = False
cached_dns = dict()
cached_dns = {}
regexes = {
'www.facebook.com': 'X-FB-Debug',

View File

@@ -1,5 +1,6 @@
#!/usr/bin/env python2
# HTML parsing with optional BeautifulSoup or stdlib fallback
# -*- coding: utf-8 -*-
"""HTML parsing with optional BeautifulSoup or stdlib fallback."""
from HTMLParser import HTMLParser
import sys
@@ -14,7 +15,9 @@ except ImportError:
_bs4_available = False
class Tag():
class Tag(object):
"""Simple tag representation for stdlib parser."""
def __init__(self, name, attrs):
self.name = name
self.attrs = dict(attrs)
@@ -26,12 +29,15 @@ class Tag():
return self.attrs.get(key, default)
class SoupResult():
class SoupResult(object):
"""BeautifulSoup-like result wrapper for stdlib parser."""
def __init__(self, tags):
self._tags = tags
self.body = self
def find_all(self, tag_name, **kwargs):
"""Find all tags matching criteria."""
results = []
for tag in self._tags:
if tag.name != tag_name:
@@ -46,6 +52,8 @@ class SoupResult():
class LinkExtractor(HTMLParser):
"""Extract tags from HTML using stdlib."""
def __init__(self):
HTMLParser.__init__(self)
self.tags = []
@@ -58,15 +66,17 @@ class LinkExtractor(HTMLParser):
def _parse_stdlib(html):
"""Parse HTML using stdlib HTMLParser."""
parser = LinkExtractor()
try:
parser.feed(html)
except Exception:
pass # malformed HTML, return partial results
pass # Malformed HTML, return partial results
return SoupResult(parser.tags)
def _parse_bs4(html):
"""Parse HTML using BeautifulSoup."""
try:
return BeautifulSoup(html, 'lxml')
except (FeatureNotFound, Exception):
@@ -74,6 +84,7 @@ def _parse_bs4(html):
def set_nobs(enabled):
"""Disable BeautifulSoup and use stdlib instead."""
global _use_bs4
_use_bs4 = not enabled
if enabled and _bs4_available:
@@ -83,12 +94,13 @@ def set_nobs(enabled):
def soupify(html, nohtml=False):
htm = html if nohtml else '<html><body>%s</body></html>' % (html)
"""Parse HTML content, returning BeautifulSoup-like object."""
htm = html if nohtml else '<html><body>%s</body></html>' % html
if _use_bs4 and _bs4_available:
return _parse_bs4(htm)
else:
return _parse_stdlib(htm)
def is_available():
"""Check if BeautifulSoup is available."""
return _bs4_available