style: normalize indentation and improve code style

- convert tabs to 4-space indentation - add docstrings to modules and classes - remove unused import (copy) - use explicit object inheritance - use 'while True' over 'while 1' - use 'while args' over 'while len(args)' - use '{}' over 'dict()' - consistent string formatting - Python 2/3 compatible Queue import
2025-12-20 23:18:45 +01:00
parent d356cdf6ee
commit e24f68500c
8 changed files with 1434 additions and 1342 deletions
--- a/comboparse.py
+++ b/comboparse.py
@@ -1,74 +1,110 @@
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+"""Combined config file and argument parser."""
+
 from ConfigParser import SafeConfigParser, NoOptionError
 from argparse import ArgumentParser
 import sys

-class _Dummy():
-	pass
+
+class _Dummy(object):
+    """Placeholder for config sections."""
+    pass
+

 class ComboParser(object):
-	def __init__(self, ini):
-		self.items = []
-		self.cparser = SafeConfigParser()
-		self.aparser = ArgumentParser()
-		self.ini = ini
-		self.items = []
-		self.loaded = False
+    """Parse configuration from INI file and command-line arguments.

-	def add_item(self, section, name, type, default, desc, required):
-		def str2bool(val):
-			return val in ['True', 'true', '1', 'yes']
-		self.items.append({
-			'section':section,
-			'name':name,
-			'type':type,
-			'default':default,
-			'required':required,
-		})
-		self.aparser.add_argument(
-			'--%s.%s'%(section, name),
-			help='%s, default: (%s)'%(desc, str(default)),
-			type=type if type is not bool else str2bool,
-			default=None,
-			required=False
-		)
-	def load(self):
-		if self.loaded: return
-		self.loaded = True
+    Command-line arguments override INI file values.
+    """

-		try:
-			self.cparser.read(self.ini)
-		except Exception:
-			pass  # config file missing or unreadable, use defaults
-		args = self.aparser.parse_args()
-		for item in self.items:
-			try:
-				obj = getattr(self, item['section'])
-			except AttributeError:
-				setattr(self, item['section'], _Dummy())
-				obj = getattr(self, item['section'])
+    def __init__(self, ini):
+        self.items = []
+        self.cparser = SafeConfigParser()
+        self.aparser = ArgumentParser()
+        self.ini = ini
+        self.loaded = False
+        self.args = None

-			setattr(obj, item['name'], item['default'])
-			inner = getattr(obj, item['name'])
+    def add_item(self, section, name, type, default, desc, required):
+        """Add a configuration item."""
+        def str2bool(val):
+            return val.lower() in ('true', '1', 'yes')

-			item['found'] = True
-			try:
-				if   item['type'] is bool : inner = self.cparser.getboolean(item['section'], item['name'])
-				elif item['type'] is float: inner = self.cparser.getfloat(item['section'], item['name'])
-				elif item['type'] is int  : inner = self.cparser.getint(item['section'], item['name'])
-				elif item['type'] is str  : inner = self.cparser.get(item['section'], item['name'])
-			except NoOptionError:
-				item['found'] = False
-			try:
-				arg = getattr(args, '%s.%s'%(item['section'], item['name']))
-				if arg is not None:
-					inner = arg
-					item['found'] = True
-			except AttributeError:
-				pass  # arg not provided on command line
-			if not item['found']:
-				if item['required']:
-					sys.stderr.write('error: required config item "%s" not found in section "%s" of "%s"!\n'%(item['name'], item['section'], self.ini))
-					sys.exit(1)
-				else:
-					sys.stderr.write('warning: assigned default value of "%s" to "%s.%s"\n'%(str(item['default']), item['section'], item['name']))
-			setattr(obj, item['name'], inner)
+        self.items.append({
+            'section': section,
+            'name': name,
+            'type': type,
+            'default': default,
+            'required': required,
+        })
+        self.aparser.add_argument(
+            '--%s.%s' % (section, name),
+            help='%s (default: %s)' % (desc, default),
+            type=type if type is not bool else str2bool,
+            default=None,
+            required=False
+        )
+
+    def load(self):
+        """Load configuration from file and command-line."""
+        if self.loaded:
+            return
+        self.loaded = True
+
+        try:
+            self.cparser.read(self.ini)
+        except Exception:
+            pass  # Config file missing or unreadable, use defaults
+
+        self.args = self.aparser.parse_args()
+
+        for item in self.items:
+            section = item['section']
+            name = item['name']
+
+            # Ensure section object exists
+            if not hasattr(self, section):
+                setattr(self, section, _Dummy())
+            obj = getattr(self, section)
+
+            # Start with default value
+            value = item['default']
+            found = False
+
+            # Try to read from config file
+            try:
+                if item['type'] is bool:
+                    value = self.cparser.getboolean(section, name)
+                elif item['type'] is float:
+                    value = self.cparser.getfloat(section, name)
+                elif item['type'] is int:
+                    value = self.cparser.getint(section, name)
+                elif item['type'] is str:
+                    value = self.cparser.get(section, name)
+                found = True
+            except NoOptionError:
+                pass
+
+            # Command-line overrides config file
+            arg_name = '%s.%s' % (section, name)
+            arg_value = getattr(self.args, arg_name, None)
+            if arg_value is not None:
+                value = arg_value
+                found = True
+
+            # Handle missing required items
+            if not found:
+                if item['required']:
+                    sys.stderr.write(
+                        'error: required config item "%s" not found in section "%s" of "%s"\n'
+                        % (name, section, self.ini)
+                    )
+                    sys.exit(1)
+                else:
+                    sys.stderr.write(
+                        'warning: assigned default value of "%s" to "%s.%s"\n'
+                        % (item['default'], section, name)
+                    )
+
+            setattr(obj, name, value)
--- a/config.py
+++ b/config.py
@@ -3,159 +3,159 @@ from misc import set_log_level, _log
 import os

 class Config(ComboParser):
-	def load(self):
-		super(Config, self).load()
-		self.torhosts = [ str(i).strip() for i in self.common.tor_hosts.split(',') ]
-		#with open('servers.txt', 'r') as handle:
-		with open(self.watchd.source_file, 'r') as handle:
-			self.servers = [x.strip() for x in handle.readlines() if len(x.strip()) > 0]
-		# Apply log level from CLI flags
-		if self.args.quiet:
-			set_log_level('warn')
-		elif self.args.verbose:
-			set_log_level('debug')
+    def load(self):
+        super(Config, self).load()
+        self.torhosts = [ str(i).strip() for i in self.common.tor_hosts.split(',') ]
+        #with open('servers.txt', 'r') as handle:
+        with open(self.watchd.source_file, 'r') as handle:
+            self.servers = [x.strip() for x in handle.readlines() if len(x.strip()) > 0]
+        # Apply log level from CLI flags
+        if self.args.quiet:
+            set_log_level('warn')
+        elif self.args.verbose:
+            set_log_level('debug')

-	def validate(self):
-		"""Validate configuration values. Returns list of errors."""
-		errors = []
-		warnings = []
+    def validate(self):
+        """Validate configuration values. Returns list of errors."""
+        errors = []
+        warnings = []

-		# Validate port numbers
-		if not 1 <= self.httpd.port <= 65535:
-			errors.append('httpd.port must be 1-65535, got %d' % self.httpd.port)
+        # Validate port numbers
+        if not 1 <= self.httpd.port <= 65535:
+            errors.append('httpd.port must be 1-65535, got %d' % self.httpd.port)

-		# Validate timeouts (must be positive)
-		if self.common.timeout_connect <= 0:
-			errors.append('common.timeout_connect must be > 0')
-		if self.common.timeout_read <= 0:
-			errors.append('common.timeout_read must be > 0')
-		if self.watchd.timeout <= 0:
-			errors.append('watchd.timeout must be > 0')
-		if self.ppf.timeout <= 0:
-			errors.append('ppf.timeout must be > 0')
+        # Validate timeouts (must be positive)
+        if self.common.timeout_connect <= 0:
+            errors.append('common.timeout_connect must be > 0')
+        if self.common.timeout_read <= 0:
+            errors.append('common.timeout_read must be > 0')
+        if self.watchd.timeout <= 0:
+            errors.append('watchd.timeout must be > 0')
+        if self.ppf.timeout <= 0:
+            errors.append('ppf.timeout must be > 0')

-		# Validate thread counts
-		if self.watchd.threads < 1:
-			errors.append('watchd.threads must be >= 1')
-		if self.ppf.threads < 1:
-			errors.append('ppf.threads must be >= 1')
+        # Validate thread counts
+        if self.watchd.threads < 1:
+            errors.append('watchd.threads must be >= 1')
+        if self.ppf.threads < 1:
+            errors.append('ppf.threads must be >= 1')

-		# Validate max_fail
-		if self.watchd.max_fail < 1:
-			errors.append('watchd.max_fail must be >= 1')
-		if self.ppf.max_fail < 1:
-			errors.append('ppf.max_fail must be >= 1')
+        # Validate max_fail
+        if self.watchd.max_fail < 1:
+            errors.append('watchd.max_fail must be >= 1')
+        if self.ppf.max_fail < 1:
+            errors.append('ppf.max_fail must be >= 1')

-		# Validate engine names
-		valid_engines = {'duckduckgo', 'startpage', 'brave', 'ecosia',
-						 'mojeek', 'qwant', 'yandex', 'github', 'gitlab',
-						 'codeberg', 'gitea', 'searx'}
-		configured = [e.strip().lower() for e in self.scraper.engines.split(',')]
-		for eng in configured:
-			if eng and eng not in valid_engines:
-				warnings.append('unknown engine: %s' % eng)
+        # Validate engine names
+        valid_engines = {'duckduckgo', 'startpage', 'brave', 'ecosia',
+                         'mojeek', 'qwant', 'yandex', 'github', 'gitlab',
+                         'codeberg', 'gitea', 'searx'}
+        configured = [e.strip().lower() for e in self.scraper.engines.split(',')]
+        for eng in configured:
+            if eng and eng not in valid_engines:
+                warnings.append('unknown engine: %s' % eng)

-		# Validate source_file exists
-		if not os.path.exists(self.watchd.source_file):
-			warnings.append('source_file not found: %s' % self.watchd.source_file)
+        # Validate source_file exists
+        if not os.path.exists(self.watchd.source_file):
+            warnings.append('source_file not found: %s' % self.watchd.source_file)

-		# Validate database directories are writable
-		for db in (self.watchd.database, self.ppf.database):
-			db_dir = os.path.dirname(db) or '.'
-			if not os.access(db_dir, os.W_OK):
-				errors.append('database directory not writable: %s' % db_dir)
+        # Validate database directories are writable
+        for db in (self.watchd.database, self.ppf.database):
+            db_dir = os.path.dirname(db) or '.'
+            if not os.access(db_dir, os.W_OK):
+                errors.append('database directory not writable: %s' % db_dir)

-		# Log warnings
-		for w in warnings:
-			_log(w, 'warn')
+        # Log warnings
+        for w in warnings:
+            _log(w, 'warn')

-		return errors
-	def __init__(self):
-		super(Config, self).__init__('config.ini')
-		section = 'common'
-		self.add_item(section, 'tor_hosts', str, '127.0.0.1:9050', 'comma-separated list of tor proxy address(es)', True)
-		self.add_item(section, 'timeout_connect', int, 10, 'connection timeout in seconds (default: 10)', False)
-		self.add_item(section, 'timeout_read', int, 15, 'read timeout in seconds (default: 15)', False)
+        return errors
+    def __init__(self):
+        super(Config, self).__init__('config.ini')
+        section = 'common'
+        self.add_item(section, 'tor_hosts', str, '127.0.0.1:9050', 'comma-separated list of tor proxy address(es)', True)
+        self.add_item(section, 'timeout_connect', int, 10, 'connection timeout in seconds (default: 10)', False)
+        self.add_item(section, 'timeout_read', int, 15, 'read timeout in seconds (default: 15)', False)

-		section = 'watchd'
-		self.add_item(section, 'outage_threshold', float, 4.0, 'mininum success percentage required to not drop check results', False)
-		self.add_item(section, 'max_fail', int, 5, 'number of fails after which a proxy is considered dead', False)
-		self.add_item(section, 'threads', int, 10, 'number of threads watchd uses to check proxies', True)
-		self.add_item(section, 'timeout', int, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False)
-		self.add_item(section, 'submit_after', int, 200, 'min. number of tested proxies for DB write', False)
-		self.add_item(section, 'debug', bool, False, 'whether to print additional debug info', False)
-		self.add_item(section, 'use_ssl', int, 0, 'whether to use SSL and port 6697 to connect to targets (slower)', False)
-		self.add_item(section, 'checktime', int, 1800, 'base checking interval for proxies in db in seconds', False)
-		self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for proxies in db in seconds per experienced failure', False)
-		self.add_item(section, 'database', str, 'websites.sqlite', 'filename of database', True)
-		self.add_item(section, 'oldies', bool, False, 're-test old proxies as well ? (default: False)', False)
-		self.add_item(section, 'oldies_checktime', int, 43200, 'base checking interval for *old* proxies in seconds (default: 43200)', False)
-		self.add_item(section, 'oldies_multi', int, 10, 'fetch threads*multi rows when testing oldies (default: 10)', False)
-		self.add_item(section, 'source_file', str, 'servers.txt', 'server/url list to read from (default: servers.txt)', False)
-		self.add_item(section, 'stale_days', int, 30, 'days after which dead proxies are removed (default: 30)', False)
-		self.add_item(section, 'stats_interval', int, 300, 'seconds between status reports (default: 300)', False)
-		self.add_item(section, 'tor_safeguard', bool, True, 'enable tor safeguard (default: True)', False)
-		self.add_item(section, 'checktype', str, 'http', 'check type (irc or http)', False)
+        section = 'watchd'
+        self.add_item(section, 'outage_threshold', float, 4.0, 'mininum success percentage required to not drop check results', False)
+        self.add_item(section, 'max_fail', int, 5, 'number of fails after which a proxy is considered dead', False)
+        self.add_item(section, 'threads', int, 10, 'number of threads watchd uses to check proxies', True)
+        self.add_item(section, 'timeout', int, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False)
+        self.add_item(section, 'submit_after', int, 200, 'min. number of tested proxies for DB write', False)
+        self.add_item(section, 'debug', bool, False, 'whether to print additional debug info', False)
+        self.add_item(section, 'use_ssl', int, 0, 'whether to use SSL and port 6697 to connect to targets (slower)', False)
+        self.add_item(section, 'checktime', int, 1800, 'base checking interval for proxies in db in seconds', False)
+        self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for proxies in db in seconds per experienced failure', False)
+        self.add_item(section, 'database', str, 'websites.sqlite', 'filename of database', True)
+        self.add_item(section, 'oldies', bool, False, 're-test old proxies as well ? (default: False)', False)
+        self.add_item(section, 'oldies_checktime', int, 43200, 'base checking interval for *old* proxies in seconds (default: 43200)', False)
+        self.add_item(section, 'oldies_multi', int, 10, 'fetch threads*multi rows when testing oldies (default: 10)', False)
+        self.add_item(section, 'source_file', str, 'servers.txt', 'server/url list to read from (default: servers.txt)', False)
+        self.add_item(section, 'stale_days', int, 30, 'days after which dead proxies are removed (default: 30)', False)
+        self.add_item(section, 'stats_interval', int, 300, 'seconds between status reports (default: 300)', False)
+        self.add_item(section, 'tor_safeguard', bool, True, 'enable tor safeguard (default: True)', False)
+        self.add_item(section, 'checktype', str, 'http', 'check type (irc or http)', False)

-		section = 'httpd'
-		self.add_item(section, 'listenip', str, '127.0.0.1', 'address for the httpd to listen to (default: 127.0.0.1)', True)
-		self.add_item(section, 'port', int, 8081, 'port for the httpd to listen to (default: 8081)', True)
-		self.add_item(section, 'enabled', bool, False, 'start httpd (default: False)', True)
+        section = 'httpd'
+        self.add_item(section, 'listenip', str, '127.0.0.1', 'address for the httpd to listen to (default: 127.0.0.1)', True)
+        self.add_item(section, 'port', int, 8081, 'port for the httpd to listen to (default: 8081)', True)
+        self.add_item(section, 'enabled', bool, False, 'start httpd (default: False)', True)

-		section = 'ppf'
-		self.add_item(section, 'debug', bool, False, 'whether to print additional debug info', False)
-		self.add_item(section, 'search', bool, True, 'whether to use searx search engine to find new proxy lists', False)
-		self.add_item(section, 'timeout', float, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False)
-		self.add_item(section, 'http_retries', int, 1, 'number of retries for http connects', False)
-		self.add_item(section, 'threads', int, 1, 'number of threads to run (default: 1)', False)
-		self.add_item(section, 'checktime', int, 3600, 'base checking interval for urls in db in seconds', False)
-		self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for urls in db in seconds per resultless check', False)
-		self.add_item(section, 'max_fail', int, 5, 'number of fails after which an url is considered dead', False)
-		self.add_item(section, 'database', str, 'proxies.sqlite', 'filename of database', True)
-		self.add_item(section, 'extract_samedomain', bool, False, 'extract only url from same domains? (default: False)', False)
+        section = 'ppf'
+        self.add_item(section, 'debug', bool, False, 'whether to print additional debug info', False)
+        self.add_item(section, 'search', bool, True, 'whether to use searx search engine to find new proxy lists', False)
+        self.add_item(section, 'timeout', float, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False)
+        self.add_item(section, 'http_retries', int, 1, 'number of retries for http connects', False)
+        self.add_item(section, 'threads', int, 1, 'number of threads to run (default: 1)', False)
+        self.add_item(section, 'checktime', int, 3600, 'base checking interval for urls in db in seconds', False)
+        self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for urls in db in seconds per resultless check', False)
+        self.add_item(section, 'max_fail', int, 5, 'number of fails after which an url is considered dead', False)
+        self.add_item(section, 'database', str, 'proxies.sqlite', 'filename of database', True)
+        self.add_item(section, 'extract_samedomain', bool, False, 'extract only url from same domains? (default: False)', False)

-		section = 'scraper'
-		self.add_item(section, 'debug', bool, False, 'scraper: whether to print additional debug info', False)
-		self.add_item(section, 'query', str, 'psw', 'build query using Proxies, Search, Websites', False)
-		self.add_item(section, 'backoff_base', int, 30, 'base backoff delay in seconds (default: 30)', False)
-		self.add_item(section, 'backoff_max', int, 3600, 'max backoff delay in seconds (default: 3600)', False)
-		self.add_item(section, 'fail_threshold', int, 2, 'consecutive failures before backoff (default: 2)', False)
-		self.add_item(section, 'engines', str, 'searx,duckduckgo,github', 'comma-separated search engines (default: searx,duckduckgo,github)', False)
-		self.add_item(section, 'max_pages', int, 5, 'max pages to fetch per engine query (default: 5)', False)
-		self.add_item(section, 'libretranslate_url', str, 'https://lt.mymx.me/translate', 'LibreTranslate API URL (default: https://lt.mymx.me/translate)', False)
-		self.add_item(section, 'libretranslate_enabled', bool, True, 'enable LibreTranslate for dynamic translations (default: True)', False)
+        section = 'scraper'
+        self.add_item(section, 'debug', bool, False, 'scraper: whether to print additional debug info', False)
+        self.add_item(section, 'query', str, 'psw', 'build query using Proxies, Search, Websites', False)
+        self.add_item(section, 'backoff_base', int, 30, 'base backoff delay in seconds (default: 30)', False)
+        self.add_item(section, 'backoff_max', int, 3600, 'max backoff delay in seconds (default: 3600)', False)
+        self.add_item(section, 'fail_threshold', int, 2, 'consecutive failures before backoff (default: 2)', False)
+        self.add_item(section, 'engines', str, 'searx,duckduckgo,github', 'comma-separated search engines (default: searx,duckduckgo,github)', False)
+        self.add_item(section, 'max_pages', int, 5, 'max pages to fetch per engine query (default: 5)', False)
+        self.add_item(section, 'libretranslate_url', str, 'https://lt.mymx.me/translate', 'LibreTranslate API URL (default: https://lt.mymx.me/translate)', False)
+        self.add_item(section, 'libretranslate_enabled', bool, True, 'enable LibreTranslate for dynamic translations (default: True)', False)

-		self.aparser.add_argument("--file", help="import a single file containing proxy addrs", type=str, default='', required=False)
-		self.aparser.add_argument("--nobs", help="disable BeautifulSoup, use stdlib HTMLParser", action='store_true', default=False)
-		self.aparser.add_argument("-q", "--quiet", help="suppress info messages, show warnings and errors only", action='store_true', default=False)
-		self.aparser.add_argument("-v", "--verbose", help="show debug messages", action='store_true', default=False)
+        self.aparser.add_argument("--file", help="import a single file containing proxy addrs", type=str, default='', required=False)
+        self.aparser.add_argument("--nobs", help="disable BeautifulSoup, use stdlib HTMLParser", action='store_true', default=False)
+        self.aparser.add_argument("-q", "--quiet", help="suppress info messages, show warnings and errors only", action='store_true', default=False)
+        self.aparser.add_argument("-v", "--verbose", help="show debug messages", action='store_true', default=False)

-		section = 'flood'
-		self.add_item(section, 'server', str, None, 'irc server address', False)
-		self.add_item(section, 'target', str, None, 'target to flood', False)
-		self.add_item(section, 'nickserv', str, 'nickserv', "nickserv's nickname", False)
-		self.add_item(section, 'message', str, None, 'message', False)
-		self.add_item(section, 'threads', int, 1, '# of threads', False)
-		self.add_item(section, 'register', int, 0, 'register nickname when required', False)
+        section = 'flood'
+        self.add_item(section, 'server', str, None, 'irc server address', False)
+        self.add_item(section, 'target', str, None, 'target to flood', False)
+        self.add_item(section, 'nickserv', str, 'nickserv', "nickserv's nickname", False)
+        self.add_item(section, 'message', str, None, 'message', False)
+        self.add_item(section, 'threads', int, 1, '# of threads', False)
+        self.add_item(section, 'register', int, 0, 'register nickname when required', False)

-		self.add_item(section, 'wait', int, 0, 'wait prior sending messages', False)
-		self.add_item(section, 'once', int, 0, 'quit as soon as possible', False)
-		self.add_item(section, 'hilight', int, 0, 'try to hilight all nicks?', False)
-		self.add_item(section, 'waitonsuccess', int, 0, 'wait for a while on success', False)
-		self.add_item(section, 'debug', int, 0, 'use debug', False)
-		self.add_item(section, 'duration', int, 180, 'maximum time to run', False)
-		self.add_item(section, 'delay', str, 14400, 'if waitonsuccess, wait for $delay before sending other bots', False)
-		self.add_item(section, 'nick', str, None, 'specify nickname to use', False)
-		self.add_item(section, 'use_ssl', int, 2, 'Use ssl? (0: false, 1: true, 2: random)', False)
-		self.add_item(section, 'cycle', int, 0, 'cycle flood', False)
-		self.add_item(section, 'change_nick', int, 0, 'Change nick between messages (useful when flooding privates)', False)
-		self.add_item(section, 'use_timeout', int, 0, 'make connexions quit through timeout', False)
-		self.add_item(section, 'clones', int, 1, 'Number of connexion repeat to run', False)
-		self.add_item(section, 'query', bool, False, 'also flood in query', False)
-		self.add_item(section, 'noquerybefore', int, 10, 'do not send query before x secs being connected', False)
-		self.add_item(section, 'oper', bool, False, 'piss of opers', False)
-		self.add_item(section, 'whois', bool, False, 'piss of opers with /whois', False)
-		self.add_item(section, 'modex', bool, False, 'make +/- x mode', False)
-		self.add_item(section, 'os', bool, False, 'piss off opers with /os', False)
-		self.add_item(section, 'file', str, None, 'read flood content from file', False)
-		self.add_item(section, 'failid', str, None, 'generate nickserv warn. about IDENTIFY attempts', False)
+        self.add_item(section, 'wait', int, 0, 'wait prior sending messages', False)
+        self.add_item(section, 'once', int, 0, 'quit as soon as possible', False)
+        self.add_item(section, 'hilight', int, 0, 'try to hilight all nicks?', False)
+        self.add_item(section, 'waitonsuccess', int, 0, 'wait for a while on success', False)
+        self.add_item(section, 'debug', int, 0, 'use debug', False)
+        self.add_item(section, 'duration', int, 180, 'maximum time to run', False)
+        self.add_item(section, 'delay', str, 14400, 'if waitonsuccess, wait for $delay before sending other bots', False)
+        self.add_item(section, 'nick', str, None, 'specify nickname to use', False)
+        self.add_item(section, 'use_ssl', int, 2, 'Use ssl? (0: false, 1: true, 2: random)', False)
+        self.add_item(section, 'cycle', int, 0, 'cycle flood', False)
+        self.add_item(section, 'change_nick', int, 0, 'Change nick between messages (useful when flooding privates)', False)
+        self.add_item(section, 'use_timeout', int, 0, 'make connexions quit through timeout', False)
+        self.add_item(section, 'clones', int, 1, 'Number of connexion repeat to run', False)
+        self.add_item(section, 'query', bool, False, 'also flood in query', False)
+        self.add_item(section, 'noquerybefore', int, 10, 'do not send query before x secs being connected', False)
+        self.add_item(section, 'oper', bool, False, 'piss of opers', False)
+        self.add_item(section, 'whois', bool, False, 'piss of opers with /whois', False)
+        self.add_item(section, 'modex', bool, False, 'make +/- x mode', False)
+        self.add_item(section, 'os', bool, False, 'piss off opers with /os', False)
+        self.add_item(section, 'file', str, None, 'read flood content from file', False)
+        self.add_item(section, 'failid', str, None, 'generate nickserv warn. about IDENTIFY attempts', False)
--- a/dbs.py
+++ b/dbs.py
@@ -1,61 +1,80 @@
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+"""Database table creation and insertion utilities."""
+
 import time
 from misc import _log

+
 def create_table_if_not_exists(sqlite, dbname):
-	if dbname == 'proxylist':
-		sqlite.execute("""CREATE TABLE IF NOT EXISTS proxylist (
-			proxy BLOB UNIQUE,
-			country BLOB,
-			added INT,
-			failed INT,
-			tested INT,
-			dronebl INT,
-			proto TEXT,
-			mitm INT,
-			success_count INT,
-			ip TEXT,
-			port INT,
-			consecutive_success INT,
-			total_duration INT)""")
-		# indexes for common query patterns
-		sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_failed ON proxylist(failed)')
-		sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_tested ON proxylist(tested)')
-		sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_proto ON proxylist(proto)')
+    """Create database table with indexes if it doesn't exist."""
+    if dbname == 'proxylist':
+        sqlite.execute("""CREATE TABLE IF NOT EXISTS proxylist (
+            proxy BLOB UNIQUE,
+            country BLOB,
+            added INT,
+            failed INT,
+            tested INT,
+            dronebl INT,
+            proto TEXT,
+            mitm INT,
+            success_count INT,
+            ip TEXT,
+            port INT,
+            consecutive_success INT,
+            total_duration INT)""")
+        # Indexes for common query patterns
+        sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_failed ON proxylist(failed)')
+        sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_tested ON proxylist(tested)')
+        sqlite.execute('CREATE INDEX IF NOT EXISTS idx_proxylist_proto ON proxylist(proto)')

-	elif dbname == 'uris':
-		sqlite.execute("""CREATE TABLE IF NOT EXISTS uris (
-			url TEXT UNIQUE,
-			content_type TEXT,
-			check_time INT,
-			error INT,
-			stale_count INT,
-			retrievals INT,
-			proxies_added INT,
-			added INT
-			)""")
-		# indexes for common query patterns
-		sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_error ON uris(error)')
-		sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_checktime ON uris(check_time)')
+    elif dbname == 'uris':
+        sqlite.execute("""CREATE TABLE IF NOT EXISTS uris (
+            url TEXT UNIQUE,
+            content_type TEXT,
+            check_time INT,
+            error INT,
+            stale_count INT,
+            retrievals INT,
+            proxies_added INT,
+            added INT)""")
+        # Indexes for common query patterns
+        sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_error ON uris(error)')
+        sqlite.execute('CREATE INDEX IF NOT EXISTS idx_uris_checktime ON uris(check_time)')
+
+    sqlite.commit()

-	sqlite.commit()

 def insert_proxies(proxydb, proxies, url):
-	if not proxies: return
-	timestamp = int(time.time())
-	rows = []
-	for p in proxies:
-		ip, port = p.split(':')
-		rows.append((timestamp,p,ip,port,3,0,0,0,0,0))
-	proxydb.executemany('INSERT OR IGNORE INTO proxylist (added,proxy,ip,port,failed,tested,success_count,total_duration,mitm,consecutive_success) VALUES (?,?,?,?,?,?,?,?,?,?)', rows)
-	proxydb.commit()
-	_log('+%d proxy/ies from %s' % (len(proxies), url), 'added')
+    """Insert new proxies into database."""
+    if not proxies:
+        return
+    timestamp = int(time.time())
+    rows = []
+    for p in proxies:
+        ip, port = p.split(':')
+        rows.append((timestamp, p, ip, port, 3, 0, 0, 0, 0, 0))
+    proxydb.executemany(
+        'INSERT OR IGNORE INTO proxylist '
+        '(added,proxy,ip,port,failed,tested,success_count,total_duration,mitm,consecutive_success) '
+        'VALUES (?,?,?,?,?,?,?,?,?,?)',
+        rows
+    )
+    proxydb.commit()
+    _log('+%d proxy/ies from %s' % (len(proxies), url), 'added')


 def insert_urls(urls, search, sqlite):
-	if not urls: return
-	time_now = int(time.time())
-	rows = [ (time_now,u,0,1,0,0,0) for u in urls ]
-	sqlite.executemany('INSERT OR IGNORE INTO uris (added,url,check_time,error,stale_count,retrievals,proxies_added) values(?,?,?,?,?,?,?)', rows)
-	sqlite.commit()
-	_log('+%d url(s) from %s' % (len(urls), search), 'added')
-
+    """Insert new URLs into database."""
+    if not urls:
+        return
+    timestamp = int(time.time())
+    rows = [(timestamp, u, 0, 1, 0, 0, 0) for u in urls]
+    sqlite.executemany(
+        'INSERT OR IGNORE INTO uris '
+        '(added,url,check_time,error,stale_count,retrievals,proxies_added) '
+        'VALUES (?,?,?,?,?,?,?)',
+        rows
+    )
+    sqlite.commit()
+    _log('+%d url(s) from %s' % (len(urls), search), 'added')
--- a/fetch.py
+++ b/fetch.py
@@ -6,169 +6,169 @@ from misc import _log

 config = None
 def set_config(cfg):
-	global config
-	config = cfg
+    global config
+    config = cfg

 cleanhtml_re = [
-	re.compile('<.*?>'),
-	re.compile('\s+'),
-	re.compile('::+'),
+    re.compile('<.*?>'),
+    re.compile('\s+'),
+    re.compile('::+'),
 ]
 def cleanhtml(raw_html):
-	html = raw_html.replace('&nbsp;', ' ')
-	html = re.sub(cleanhtml_re[0], ':', html)
-	html = re.sub(cleanhtml_re[1], ':', html)
-	html = re.sub(cleanhtml_re[2], ':', html)
-	return html
+    html = raw_html.replace('&nbsp;', ' ')
+    html = re.sub(cleanhtml_re[0], ':', html)
+    html = re.sub(cleanhtml_re[1], ':', html)
+    html = re.sub(cleanhtml_re[2], ':', html)
+    return html

 def fetch_contents(url, head=False, proxy=None):
-	content = None
-	if proxy is not None and len(proxy):
-		for p in proxy:
-			content = _fetch_contents(url, head=head, proxy=p)
-			if content is not None: break
+    content = None
+    if proxy is not None and len(proxy):
+        for p in proxy:
+            content = _fetch_contents(url, head=head, proxy=p)
+            if content is not None: break

-	else:
-		content = _fetch_contents(url, head=head)
+    else:
+        content = _fetch_contents(url, head=head)

-	return content if content is not None else ''
+    return content if content is not None else ''

 retry_messages = ('Engines cannot retrieve results', 'Rate limit exceeded')
 def _fetch_contents(url, head = False, proxy=None):
-	host, port, ssl, uri = _parse_url(url)
-	headers=[
-		'Accept-Language: en-US,en;q=0.8',
-		'Cache-Control: max-age=0',
-	]
-	if config.ppf.debug:
-		_log("connecting to %s... (header: %s)" % (url, str(head)), "debug")
-	while True:
-		proxies = [rocksock.RocksockProxyFromURL('socks4://%s' % random.choice( config.torhosts ))]
-		if proxy: proxies.append( rocksock.RocksockProxyFromURL(proxy))
+    host, port, ssl, uri = _parse_url(url)
+    headers=[
+        'Accept-Language: en-US,en;q=0.8',
+        'Cache-Control: max-age=0',
+    ]
+    if config.ppf.debug:
+        _log("connecting to %s... (header: %s)" % (url, str(head)), "debug")
+    while True:
+        proxies = [rocksock.RocksockProxyFromURL('socks4://%s' % random.choice( config.torhosts ))]
+        if proxy: proxies.append( rocksock.RocksockProxyFromURL(proxy))

-		http = RsHttp(host,ssl=ssl,port=port, keep_alive=True, timeout=config.ppf.timeout, max_tries=config.ppf.http_retries, follow_redirects=True, auto_set_cookies=True, proxies=proxies, user_agent='Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0')
-		if not http.connect():
-			_log("failed to connect to %s"%url, "ppf")
-			e = http.get_last_rocksock_exception()
-			if not e:
-				return None
-			et = e.get_errortype()
-			ee = e.get_error()
-			ef = e.get_failedproxy()
-			if et == rocksock.RS_ET_OWN and \
-			ee == rocksock.RS_E_TARGET_CONN_REFUSED \
-			and ef == 0:
-				_log("could not connect to proxy 0 - check your connection", "error")
-				time.sleep(5)
-				continue
-			return None
-		break
+        http = RsHttp(host,ssl=ssl,port=port, keep_alive=True, timeout=config.ppf.timeout, max_tries=config.ppf.http_retries, follow_redirects=True, auto_set_cookies=True, proxies=proxies, user_agent='Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0')
+        if not http.connect():
+            _log("failed to connect to %s"%url, "ppf")
+            e = http.get_last_rocksock_exception()
+            if not e:
+                return None
+            et = e.get_errortype()
+            ee = e.get_error()
+            ef = e.get_failedproxy()
+            if et == rocksock.RS_ET_OWN and \
+            ee == rocksock.RS_E_TARGET_CONN_REFUSED \
+            and ef == 0:
+                _log("could not connect to proxy 0 - check your connection", "error")
+                time.sleep(5)
+                continue
+            return None
+        break

-	## only request header
-	if head:
-		hdr = http.head(uri, headers)
-		return hdr
+    ## only request header
+    if head:
+        hdr = http.head(uri, headers)
+        return hdr

-	hdr, res = http.get(uri, headers)
-	res = res.encode('utf-8') if isinstance(res, unicode) else res
-	for retry_message in retry_messages:
-		if retry_message in res: return None
+    hdr, res = http.get(uri, headers)
+    res = res.encode('utf-8') if isinstance(res, unicode) else res
+    for retry_message in retry_messages:
+        if retry_message in res: return None

-	return res
+    return res

 def valid_port(port):
-	return port > 0 and port < 65535
+    return port > 0 and port < 65535

 def is_usable_proxy(proxy):
-	ip, port = proxy.split(':')
-	if not valid_port(int(port)): return False
+    ip, port = proxy.split(':')
+    if not valid_port(int(port)): return False

-	octets = ip.split('.')
-	A = int(octets[0])
-	B = int(octets[1])
-	C = int(octets[2])
-	D = int(octets[3])
+    octets = ip.split('.')
+    A = int(octets[0])
+    B = int(octets[1])
+    C = int(octets[2])
+    D = int(octets[3])

-	if (A < 1 or A > 254 or \
-	B > 255 or C > 255 or D > 255) or \
-	(A == 10 or A == 127) or \
-	(A == 192 and B == 168) or \
-	(A == 172 and B >= 16 and B <= 31): return False
-	return True
+    if (A < 1 or A > 254 or \
+    B > 255 or C > 255 or D > 255) or \
+    (A == 10 or A == 127) or \
+    (A == 192 and B == 168) or \
+    (A == 172 and B >= 16 and B <= 31): return False
+    return True

 _known_proxies = {}

 def init_known_proxies(proxydb):
-	"""Initialize known proxies cache from database."""
-	global _known_proxies
-	if _known_proxies:
-		return
-	known = proxydb.execute('SELECT proxy FROM proxylist').fetchall()
-	for k in known:
-		_known_proxies[k[0]] = True
+    """Initialize known proxies cache from database."""
+    global _known_proxies
+    if _known_proxies:
+        return
+    known = proxydb.execute('SELECT proxy FROM proxylist').fetchall()
+    for k in known:
+        _known_proxies[k[0]] = True

 def add_known_proxies(proxies):
-	"""Add proxies to known cache."""
-	global _known_proxies
-	for p in proxies:
-		_known_proxies[p] = True
+    """Add proxies to known cache."""
+    global _known_proxies
+    for p in proxies:
+        _known_proxies[p] = True

 def is_known_proxy(proxy):
-	"""Check if proxy is in known cache."""
-	return proxy in _known_proxies
+    """Check if proxy is in known cache."""
+    return proxy in _known_proxies

 def extract_proxies(content, proxydb=None, filter_known=True):
-	"""Extract and normalize proxy addresses from content.
+    """Extract and normalize proxy addresses from content.

-	Args:
-		content: HTML/text content to parse
-		proxydb: Database connection for known proxy lookup (optional)
-		filter_known: If True, filter out known proxies and return new only
+    Args:
+        content: HTML/text content to parse
+        proxydb: Database connection for known proxy lookup (optional)
+        filter_known: If True, filter out known proxies and return new only

-	Returns:
-		If filter_known: (unique_count, new_proxies) tuple
-		If not filter_known: list of all unique valid proxies
-	"""
-	matches = re.findall(r'([0-9]+(?:\.[0-9]+){3}:[0-9]{2,5})[\D$]', cleanhtml(content))
+    Returns:
+        If filter_known: (unique_count, new_proxies) tuple
+        If not filter_known: list of all unique valid proxies
+    """
+    matches = re.findall(r'([0-9]+(?:\.[0-9]+){3}:[0-9]{2,5})[\D$]', cleanhtml(content))

-	uniques_dict = {}
-	for p in matches:
-		ip, port = p.split(':')
-		# Normalize IP (remove leading zeros from octets)
-		ip = '.'.join(str(int(octet)) for octet in ip.split('.'))
-		# Normalize port (remove leading zeros, handle empty case)
-		port = int(port.lstrip('0') or '0')
-		p = '%s:%s' % (ip, port)
-		uniques_dict[p] = True
+    uniques_dict = {}
+    for p in matches:
+        ip, port = p.split(':')
+        # Normalize IP (remove leading zeros from octets)
+        ip = '.'.join(str(int(octet)) for octet in ip.split('.'))
+        # Normalize port (remove leading zeros, handle empty case)
+        port = int(port.lstrip('0') or '0')
+        p = '%s:%s' % (ip, port)
+        uniques_dict[p] = True

-	uniques = [p for p in uniques_dict.keys() if is_usable_proxy(p)]
+    uniques = [p for p in uniques_dict.keys() if is_usable_proxy(p)]

-	if not filter_known:
-		return uniques
+    if not filter_known:
+        return uniques

-	# Initialize known proxies from DB if needed
-	if proxydb is not None:
-		init_known_proxies(proxydb)
+    # Initialize known proxies from DB if needed
+    if proxydb is not None:
+        init_known_proxies(proxydb)

-	new = []
-	for p in uniques:
-		if not is_known_proxy(p):
-			new.append(p)
-			add_known_proxies([p])
+    new = []
+    for p in uniques:
+        if not is_known_proxy(p):
+            new.append(p)
+            add_known_proxies([p])

-	return len(uniques), new
+    return len(uniques), new

 def extract_urls(content, urls = None, urignore=None):
-	urls = [] if not urls else urls
-	soup = soupify(content)
-	for a in soup.body.find_all('a'):
-		if not 'rel' in a.attrs or not 'noreferrer' in a.attrs['rel'] or a.attrs['href'] in urls: continue
-		bad = False
-		href = a.attrs['href']
-		for i in urignore:
-			if re.findall(i, href):
-				bad = True
-				break
-		if not bad: urls.append(href)
-	return urls
+    urls = [] if not urls else urls
+    soup = soupify(content)
+    for a in soup.body.find_all('a'):
+        if not 'rel' in a.attrs or not 'noreferrer' in a.attrs['rel'] or a.attrs['href'] in urls: continue
+        bad = False
+        href = a.attrs['href']
+        for i in urignore:
+            if re.findall(i, href):
+                bad = True
+                break
+        if not bad: urls.append(href)
+    return urls

--- a/mysqlite.py
+++ b/mysqlite.py
@@ -1,44 +1,62 @@
-import time, random, sys
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+"""SQLite wrapper with retry logic and WAL mode."""
+
+import time
+import random
+import sys
 import sqlite3

-class mysqlite:
-	def _try_op(self, op, query, args=None, rmin=1.5, rmax=7.0):
-		while 1:
-			try:
-				if query is None:
-					return op()
-				elif args is None:
-					return op(query)
-				else:
-					return op(query, args)
-			except sqlite3.OperationalError as e:
-				if e.message == 'database is locked':
-					print "zzZzzZZ: db is locked (%s)"%self.dbname
-					time.sleep(random.uniform(rmin, rmax))
-					continue
-				else:
-					print '%s\nquery: %s\nargs: %s' % (str(sys.exc_info()), str(query), str(args))
-					raise e

-	def execute(self, query, args = None, rmin=1.5, rmax=7.0):
-		return self._try_op(self.cursor.execute, query, args, rmin, rmax)
+class mysqlite(object):
+    """SQLite connection wrapper with automatic retry on lock."""

-	def executemany(self, query, args, rmin=1.5, rmax=7.0):
-		while len(args):
-			self._try_op(self.cursor.executemany, query, args[:500], rmin, rmax)
-			args = args[500:]
+    def __init__(self, database, factory=None):
+        self.handle = sqlite3.connect(database)
+        if factory is not None:
+            self.handle.text_factory = factory
+        self.cursor = self.handle.cursor()
+        self.dbname = database
+        # Enable WAL mode for better concurrency
+        self.cursor.execute('PRAGMA journal_mode=WAL')
+        self.cursor.execute('PRAGMA synchronous=NORMAL')

-	def commit(self, rmin=1.5, rmax=7.0):
-		return self._try_op(self.handle.commit, None, None, rmin, rmax)
+    def _try_op(self, op, query, args=None, rmin=1.5, rmax=7.0):
+        """Execute operation with retry on database lock."""
+        while True:
+            try:
+                if query is None:
+                    return op()
+                elif args is None:
+                    return op(query)
+                else:
+                    return op(query, args)
+            except sqlite3.OperationalError as e:
+                err_msg = str(e)
+                if 'database is locked' in err_msg:
+                    sys.stderr.write('zzZzzZZ: db is locked (%s)\n' % self.dbname)
+                    time.sleep(random.uniform(rmin, rmax))
+                    continue
+                else:
+                    sys.stderr.write('%s\nquery: %s\nargs: %s\n' % (
+                        str(sys.exc_info()), str(query), str(args)))
+                    raise

-	def close(self):
-		self.handle.close()
+    def execute(self, query, args=None, rmin=1.5, rmax=7.0):
+        """Execute a single query with retry."""
+        return self._try_op(self.cursor.execute, query, args, rmin, rmax)

-	def __init__(self, database, factory = None):
-		self.handle = sqlite3.connect(database)
-		if factory: self.handle.text_factory = factory
-		self.cursor = self.handle.cursor()
-		self.dbname = database
-		# enable WAL mode for better concurrency
-		self.cursor.execute('PRAGMA journal_mode=WAL')
-		self.cursor.execute('PRAGMA synchronous=NORMAL')
+    def executemany(self, query, args, rmin=1.5, rmax=7.0):
+        """Execute query for multiple argument sets, batched."""
+        while args:
+            batch = args[:500]
+            self._try_op(self.cursor.executemany, query, batch, rmin, rmax)
+            args = args[500:]
+
+    def commit(self, rmin=1.5, rmax=7.0):
+        """Commit transaction with retry."""
+        return self._try_op(self.handle.commit, None, None, rmin, rmax)
+
+    def close(self):
+        """Close database connection."""
+        self.handle.close()
--- a/ppf.py
+++ b/ppf.py
@@ -16,231 +16,231 @@ import random
 config = Config()

 def import_from_file(fn, sqlite):
-	with open(fn, 'r') as f:
-		urls = [ url for url in f.read().split('\n') if url ]
-		cinc = 0
-		while True:
-			chunk = urls[cinc:cinc+200]
-			if chunk: dbs.insert_urls(chunk, 'import.txt', urldb)
-			else: break
-			cinc = cinc + 200
+    with open(fn, 'r') as f:
+        urls = [ url for url in f.read().split('\n') if url ]
+        cinc = 0
+        while True:
+            chunk = urls[cinc:cinc+200]
+            if chunk: dbs.insert_urls(chunk, 'import.txt', urldb)
+            else: break
+            cinc = cinc + 200


 def get_content_type(url, proxy):
-	hdr = fetch.fetch_contents(url, head=True, proxy=proxy)
+    hdr = fetch.fetch_contents(url, head=True, proxy=proxy)

-	for h in hdr.split('\n'):
-		if h.lower().startswith('content-type: '): return h.lower().split(':')[1].strip()
+    for h in hdr.split('\n'):
+        if h.lower().startswith('content-type: '): return h.lower().split(':')[1].strip()

-	return ''
+    return ''

 def is_good_content_type(string):
-	allowed_ct = [ 'text/html', 'text/plain', 'atom+xml' ]
-	for ct in allowed_ct:
-		if ct.lower() in string.lower(): return True
-	return False
+    allowed_ct = [ 'text/html', 'text/plain', 'atom+xml' ]
+    for ct in allowed_ct:
+        if ct.lower() in string.lower(): return True
+    return False

 def is_bad_url(uri, domain=None, samedomain=False):
-	# if uri needs to be from same domain and domains missmatch
-	if samedomain and str(uri.split('/')[2]).lower() != str(domain).lower():
-		return True
-	for u in urignore:
-		if re.findall(u, uri): return True
-	return False
+    # if uri needs to be from same domain and domains missmatch
+    if samedomain and str(uri.split('/')[2]).lower() != str(domain).lower():
+        return True
+    for u in urignore:
+        if re.findall(u, uri): return True
+    return False

 def extract_urls(html, url):
-	mytime = int(time.time())
-	proto = url.split(':')[0]
-	domain = url.split('/')[2]
-	urls = []
+    mytime = int(time.time())
+    proto = url.split(':')[0]
+    domain = url.split('/')[2]
+    urls = []

-	soup = soupify(html, nohtml=True)
+    soup = soupify(html, nohtml=True)

-	for a in soup.find_all('a', href=True):
-		item = a['href'].encode('utf-8') if isinstance(a['href'], unicode) else a['href']
-		item = item.strip()
+    for a in soup.find_all('a', href=True):
+        item = a['href'].encode('utf-8') if isinstance(a['href'], unicode) else a['href']
+        item = item.strip()

-		if item.startswith('www.'):
-			item = 'http://%s' % item
-		elif not item.startswith('http'):
-			if not item.startswith('/'): item = '/%s' % item
-			item = '%s://%s%s' % (proto,domain,item)
+        if item.startswith('www.'):
+            item = 'http://%s' % item
+        elif not item.startswith('http'):
+            if not item.startswith('/'): item = '/%s' % item
+            item = '%s://%s%s' % (proto,domain,item)

-		elif is_bad_url(item, domain=domain, samedomain=config.ppf.extract_samedomain):
-			continue
-		if not item in urls: urls.append(item)
+        elif is_bad_url(item, domain=domain, samedomain=config.ppf.extract_samedomain):
+            continue
+        if not item in urls: urls.append(item)

-	if urls: dbs.insert_urls(urls, url, urldb) #insert_if_not_exists(urls)
+    if urls: dbs.insert_urls(urls, url, urldb) #insert_if_not_exists(urls)

 def import_proxies_from_file(proxydb, fn):
-	content = open(fn, 'r').read()
-	unique_count, new = fetch.extract_proxies(content, proxydb)
-	if new:
-		dbs.insert_proxies(proxydb, new, fn)
-		return 0
-	return 1
+    content = open(fn, 'r').read()
+    unique_count, new = fetch.extract_proxies(content, proxydb)
+    if new:
+        dbs.insert_proxies(proxydb, new, fn)
+        return 0
+    return 1

 class Leechered(threading.Thread):
-	def __init__(self, url, stale_count, error, retrievals, proxies_added, content_type, proxy):
-		self.status = 'nok'
-		self.proxylist = []
-		self.running = True
-		self.url = url
-		self.stale_count = stale_count
-		self.error = error
-		self.retrievals = retrievals
-		self.proxies_added = proxies_added
-		self.content_type = content_type
-		self.proxy = proxy
-		self.execute = ''
-		threading.Thread.__init__(self)
+    def __init__(self, url, stale_count, error, retrievals, proxies_added, content_type, proxy):
+        self.status = 'nok'
+        self.proxylist = []
+        self.running = True
+        self.url = url
+        self.stale_count = stale_count
+        self.error = error
+        self.retrievals = retrievals
+        self.proxies_added = proxies_added
+        self.content_type = content_type
+        self.proxy = proxy
+        self.execute = ''
+        threading.Thread.__init__(self)

-	def retrieve(self):
-		return self.url, self.proxylist, self.stale_count, self.error, self.retrievals, self.content_type, self.proxies_added, self.execute
-	def status(self):
-		return self.status
+    def retrieve(self):
+        return self.url, self.proxylist, self.stale_count, self.error, self.retrievals, self.content_type, self.proxies_added, self.execute
+    def status(self):
+        return self.status

-	def run(self):
-		self.status = 'nok'
+    def run(self):
+        self.status = 'nok'

-		if not self.content_type: self.content_type = get_content_type(self.url, self.proxy)
+        if not self.content_type: self.content_type = get_content_type(self.url, self.proxy)

-		if is_good_content_type(self.content_type):
-			try:
-				content = fetch.fetch_contents(self.url, proxy=self.proxy)
-			except KeyboardInterrupt as e:
-				raise e
-			except Exception as e:
-				_log('%s: fetch error: %s' % (self.url.split('/')[2], str(e)), 'error')
-				content = ''
-		else:
-			content = ''
+        if is_good_content_type(self.content_type):
+            try:
+                content = fetch.fetch_contents(self.url, proxy=self.proxy)
+            except KeyboardInterrupt as e:
+                raise e
+            except Exception as e:
+                _log('%s: fetch error: %s' % (self.url.split('/')[2], str(e)), 'error')
+                content = ''
+        else:
+            content = ''

-		unique = fetch.extract_proxies(content, filter_known=False)
-		self.proxylist = [ proxy for proxy in unique if not fetch.is_known_proxy(proxy) ]
-		proxy_count = len(self.proxylist)
+        unique = fetch.extract_proxies(content, filter_known=False)
+        self.proxylist = [ proxy for proxy in unique if not fetch.is_known_proxy(proxy) ]
+        proxy_count = len(self.proxylist)

-		if self.retrievals == 0:	# new site
-			if content and not self.proxylist: # site works but has zero proxy addresses
-				self.error += 1
-				self.stale_count += 1
-			elif proxy_count:
-				self.error = 0
-				self.stale_count = 0
-			else:
-				self.error += 2
-				self.stale_count += 2
-		else:										 # not a new site
-			# proxylist is empty
-			if not proxy_count:
-				self.stale_count += 1
-			# proxylist is not empty: site is working
-			else:
-				self.stale_count = 0
-				self.error = 0
-			# site has no content
-			if not content:
-				self.error += 1
-				self.stale_count += 1
-			#else:
-			#	self.retrievals += 1
-			#	self.error = 0
-			#	self.stale_count = 0
-			# site has proxies
-			if proxy_count:
-				self.error = 0
-				self.stale_count = 0
-				extract_urls(content, self.url)
+        if self.retrievals == 0:    # new site
+            if content and not self.proxylist: # site works but has zero proxy addresses
+                self.error += 1
+                self.stale_count += 1
+            elif proxy_count:
+                self.error = 0
+                self.stale_count = 0
+            else:
+                self.error += 2
+                self.stale_count += 2
+        else:                                        # not a new site
+            # proxylist is empty
+            if not proxy_count:
+                self.stale_count += 1
+            # proxylist is not empty: site is working
+            else:
+                self.stale_count = 0
+                self.error = 0
+            # site has no content
+            if not content:
+                self.error += 1
+                self.stale_count += 1
+            #else:
+            #   self.retrievals += 1
+            #   self.error = 0
+            #   self.stale_count = 0
+            # site has proxies
+            if proxy_count:
+                self.error = 0
+                self.stale_count = 0
+                extract_urls(content, self.url)

-		self.execute = (self.error, self.stale_count, int(time.time()), self.retrievals, self.proxies_added+len(self.proxylist), self.content_type, self.url)
-		self.status = 'ok'
+        self.execute = (self.error, self.stale_count, int(time.time()), self.retrievals, self.proxies_added+len(self.proxylist), self.content_type, self.url)
+        self.status = 'ok'


 if __name__ == '__main__':
-	config.load()
-	errors = config.validate()
-	if errors:
-		for e in errors:
-			_log(e, 'error')
-		sys.exit(1)
-	fetch.set_config(config)
+    config.load()
+    errors = config.validate()
+    if errors:
+        for e in errors:
+            _log(e, 'error')
+        sys.exit(1)
+    fetch.set_config(config)

-	# handle --nobs flag
-	args = config.aparser.parse_args()
-	if args.nobs:
-		set_nobs(True)
+    # handle --nobs flag
+    args = config.aparser.parse_args()
+    if args.nobs:
+        set_nobs(True)


-	proxydb = mysqlite.mysqlite(config.watchd.database, str)
-	dbs.create_table_if_not_exists(proxydb, 'proxylist')
-	fetch.init_known_proxies(proxydb)
+    proxydb = mysqlite.mysqlite(config.watchd.database, str)
+    dbs.create_table_if_not_exists(proxydb, 'proxylist')
+    fetch.init_known_proxies(proxydb)

-	with open('urignore.txt', 'r') as f:
-		urignore = [ i.strip() for i in f.read().split('\n') if i.strip() ]
+    with open('urignore.txt', 'r') as f:
+        urignore = [ i.strip() for i in f.read().split('\n') if i.strip() ]

-	urldb = mysqlite.mysqlite(config.ppf.database, str)
-	dbs.create_table_if_not_exists(urldb, 'uris')
-	import_from_file('import.txt', urldb)
-	if len(sys.argv) == 3 and sys.argv[1] == "--file":
-		sys.exit(import_proxies_from_file(proxydb, sys.argv[2]))
+    urldb = mysqlite.mysqlite(config.ppf.database, str)
+    dbs.create_table_if_not_exists(urldb, 'uris')
+    import_from_file('import.txt', urldb)
+    if len(sys.argv) == 3 and sys.argv[1] == "--file":
+        sys.exit(import_proxies_from_file(proxydb, sys.argv[2]))

-	# start proxy watcher
-	if config.watchd.threads > 0:
-		watcherd = proxywatchd.Proxywatchd()
-		watcherd.start()
-	else:
-		watcherd = None
+    # start proxy watcher
+    if config.watchd.threads > 0:
+        watcherd = proxywatchd.Proxywatchd()
+        watcherd.start()
+    else:
+        watcherd = None

-	qurl = 'SELECT url,stale_count,error,retrievals,proxies_added,content_type FROM uris WHERE error < ? and (check_time+?+((error+stale_count)*?) <?) ORDER BY RANDOM()'
-	threads = []
-	rows = []
-	reqtime = time.time() - 3600
-	statusmsg = time.time()
-	while True:
-		try:
-			time.sleep(random.random()/10)
-			if (time.time() - statusmsg) > 180:
-				_log('running %d thread(s) over %d' % (len(threads), config.ppf.threads), 'ppf')
-				statusmsg = time.time()
-			if not rows:
-				if (time.time() - reqtime) > 3:
-					rows = urldb.execute(qurl, (config.ppf.max_fail, config.ppf.checktime, config.ppf.perfail_checktime, int(time.time()))).fetchall()
-					reqtime = time.time()
-				if len(rows) < config.ppf.threads:
-					time.sleep(60)
-					rows = []
-				else:
-					_log('handing %d job(s) to %d thread(s)' % ( len(rows), config.ppf.threads ), 'ppf')
+    qurl = 'SELECT url,stale_count,error,retrievals,proxies_added,content_type FROM uris WHERE error < ? and (check_time+?+((error+stale_count)*?) <?) ORDER BY RANDOM()'
+    threads = []
+    rows = []
+    reqtime = time.time() - 3600
+    statusmsg = time.time()
+    while True:
+        try:
+            time.sleep(random.random()/10)
+            if (time.time() - statusmsg) > 180:
+                _log('running %d thread(s) over %d' % (len(threads), config.ppf.threads), 'ppf')
+                statusmsg = time.time()
+            if not rows:
+                if (time.time() - reqtime) > 3:
+                    rows = urldb.execute(qurl, (config.ppf.max_fail, config.ppf.checktime, config.ppf.perfail_checktime, int(time.time()))).fetchall()
+                    reqtime = time.time()
+                if len(rows) < config.ppf.threads:
+                    time.sleep(60)
+                    rows = []
+                else:
+                    _log('handing %d job(s) to %d thread(s)' % ( len(rows), config.ppf.threads ), 'ppf')

-			_proxylist = [ '%s://%s' % (p[0], p[1]) for p in proxydb.execute('SELECT proto,proxy from proxylist where failed=0').fetchall() ]
-			if not _proxylist: _proxylist = None
+            _proxylist = [ '%s://%s' % (p[0], p[1]) for p in proxydb.execute('SELECT proto,proxy from proxylist where failed=0').fetchall() ]
+            if not _proxylist: _proxylist = None

-			for thread in threads:
-				if thread.status == 'ok':
-					url, proxylist, stale_count, error, retrievals, content_type, proxies_added, execute = thread.retrieve()
-					new = [ p for p in proxylist if not fetch.is_known_proxy(p) ]
-					if new:
-						fetch.add_known_proxies(new)
-					execute = (error, stale_count, int(time.time()), retrievals, proxies_added+len(new), content_type, url)
-					urldb.execute('UPDATE uris SET error=?,stale_count=?,check_time=?,retrievals=?,proxies_added=?,content_type=? where url=?', execute)
-					urldb.commit()
-					if new: dbs.insert_proxies(proxydb, new, url)
+            for thread in threads:
+                if thread.status == 'ok':
+                    url, proxylist, stale_count, error, retrievals, content_type, proxies_added, execute = thread.retrieve()
+                    new = [ p for p in proxylist if not fetch.is_known_proxy(p) ]
+                    if new:
+                        fetch.add_known_proxies(new)
+                    execute = (error, stale_count, int(time.time()), retrievals, proxies_added+len(new), content_type, url)
+                    urldb.execute('UPDATE uris SET error=?,stale_count=?,check_time=?,retrievals=?,proxies_added=?,content_type=? where url=?', execute)
+                    urldb.commit()
+                    if new: dbs.insert_proxies(proxydb, new, url)

-			threads = [ thread for thread in threads if thread.is_alive() ]
-			if len(threads) < config.ppf.threads and rows:
-				p = random.sample(_proxylist, 5) if _proxylist is not None else None
-				row = random.choice(rows)
-				urldb.execute('UPDATE uris SET check_time=? where url=?', (time.time(), row[0]))
-				urldb.commit()
-				rows.remove(row)
-				t = Leechered(row[0], row[1], row[2], row[3], row[4], row[5], p)
-				threads.append(t)
-				t.start()
+            threads = [ thread for thread in threads if thread.is_alive() ]
+            if len(threads) < config.ppf.threads and rows:
+                p = random.sample(_proxylist, 5) if _proxylist is not None else None
+                row = random.choice(rows)
+                urldb.execute('UPDATE uris SET check_time=? where url=?', (time.time(), row[0]))
+                urldb.commit()
+                rows.remove(row)
+                t = Leechered(row[0], row[1], row[2], row[3], row[4], row[5], p)
+                threads.append(t)
+                t.start()

-		except KeyboardInterrupt:
-			if watcherd:
-				watcherd.stop()
-				watcherd.finish()
-			break
+        except KeyboardInterrupt:
+            if watcherd:
+                watcherd.stop()
+                watcherd.finish()
+            break

-	_log('ppf stopped', 'info')
+    _log('ppf stopped', 'info')
--- a/proxywatchd.py
+++ b/proxywatchd.py
--- a/soup_parser.py
+++ b/soup_parser.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python2
-# HTML parsing with optional BeautifulSoup or stdlib fallback
+# -*- coding: utf-8 -*-
+"""HTML parsing with optional BeautifulSoup or stdlib fallback."""

 from HTMLParser import HTMLParser
 import sys
@@ -8,87 +9,98 @@ _bs4_available = False
 _use_bs4 = True

 try:
-	from bs4 import BeautifulSoup, FeatureNotFound
-	_bs4_available = True
+    from bs4 import BeautifulSoup, FeatureNotFound
+    _bs4_available = True
 except ImportError:
-	_bs4_available = False
+    _bs4_available = False


-class Tag():
-	def __init__(self, name, attrs):
-		self.name = name
-		self.attrs = dict(attrs)
+class Tag(object):
+    """Simple tag representation for stdlib parser."""

-	def __getitem__(self, key):
-		return self.attrs.get(key)
+    def __init__(self, name, attrs):
+        self.name = name
+        self.attrs = dict(attrs)

-	def get(self, key, default=None):
-		return self.attrs.get(key, default)
+    def __getitem__(self, key):
+        return self.attrs.get(key)
+
+    def get(self, key, default=None):
+        return self.attrs.get(key, default)


-class SoupResult():
-	def __init__(self, tags):
-		self._tags = tags
-		self.body = self
+class SoupResult(object):
+    """BeautifulSoup-like result wrapper for stdlib parser."""

-	def find_all(self, tag_name, **kwargs):
-		results = []
-		for tag in self._tags:
-			if tag.name != tag_name:
-				continue
-			if 'href' in kwargs:
-				if kwargs['href'] is True and 'href' not in tag.attrs:
-					continue
-				elif kwargs['href'] is not True and tag.attrs.get('href') != kwargs['href']:
-					continue
-			results.append(tag)
-		return results
+    def __init__(self, tags):
+        self._tags = tags
+        self.body = self
+
+    def find_all(self, tag_name, **kwargs):
+        """Find all tags matching criteria."""
+        results = []
+        for tag in self._tags:
+            if tag.name != tag_name:
+                continue
+            if 'href' in kwargs:
+                if kwargs['href'] is True and 'href' not in tag.attrs:
+                    continue
+                elif kwargs['href'] is not True and tag.attrs.get('href') != kwargs['href']:
+                    continue
+            results.append(tag)
+        return results


 class LinkExtractor(HTMLParser):
-	def __init__(self):
-		HTMLParser.__init__(self)
-		self.tags = []
+    """Extract tags from HTML using stdlib."""

-	def handle_starttag(self, tag, attrs):
-		self.tags.append(Tag(tag, attrs))
+    def __init__(self):
+        HTMLParser.__init__(self)
+        self.tags = []

-	def handle_startendtag(self, tag, attrs):
-		self.tags.append(Tag(tag, attrs))
+    def handle_starttag(self, tag, attrs):
+        self.tags.append(Tag(tag, attrs))
+
+    def handle_startendtag(self, tag, attrs):
+        self.tags.append(Tag(tag, attrs))


 def _parse_stdlib(html):
-	parser = LinkExtractor()
-	try:
-		parser.feed(html)
-	except Exception:
-		pass  # malformed HTML, return partial results
-	return SoupResult(parser.tags)
+    """Parse HTML using stdlib HTMLParser."""
+    parser = LinkExtractor()
+    try:
+        parser.feed(html)
+    except Exception:
+        pass  # Malformed HTML, return partial results
+    return SoupResult(parser.tags)


 def _parse_bs4(html):
-	try:
-		return BeautifulSoup(html, 'lxml')
-	except (FeatureNotFound, Exception):
-		return BeautifulSoup(html, 'html.parser')
+    """Parse HTML using BeautifulSoup."""
+    try:
+        return BeautifulSoup(html, 'lxml')
+    except (FeatureNotFound, Exception):
+        return BeautifulSoup(html, 'html.parser')


 def set_nobs(enabled):
-	global _use_bs4
-	_use_bs4 = not enabled
-	if enabled and _bs4_available:
-		sys.stderr.write('info: --nobs: using stdlib HTMLParser\n')
-	elif not _bs4_available:
-		sys.stderr.write('info: bs4 not available, using stdlib HTMLParser\n')
+    """Disable BeautifulSoup and use stdlib instead."""
+    global _use_bs4
+    _use_bs4 = not enabled
+    if enabled and _bs4_available:
+        sys.stderr.write('info: --nobs: using stdlib HTMLParser\n')
+    elif not _bs4_available:
+        sys.stderr.write('info: bs4 not available, using stdlib HTMLParser\n')


 def soupify(html, nohtml=False):
-	htm = html if nohtml else '<html><body>%s</body></html>' % (html)
-	if _use_bs4 and _bs4_available:
-		return _parse_bs4(htm)
-	else:
-		return _parse_stdlib(htm)
+    """Parse HTML content, returning BeautifulSoup-like object."""
+    htm = html if nohtml else '<html><body>%s</body></html>' % html
+    if _use_bs4 and _bs4_available:
+        return _parse_bs4(htm)
+    return _parse_stdlib(htm)


 def is_available():
-	return _bs4_available
+    """Check if BeautifulSoup is available."""
+    return _bs4_available