- convert tabs to 4-space indentation
- add docstrings to modules and classes
- remove unused import (copy)
- use explicit object inheritance
- use 'while True' over 'while 1'
- use 'while args' over 'while len(args)'
- use '{}' over 'dict()'
- consistent string formatting
- Python 2/3 compatible Queue import
162 lines
11 KiB
Python
162 lines
11 KiB
Python
from comboparse import ComboParser
|
|
from misc import set_log_level, _log
|
|
import os
|
|
|
|
class Config(ComboParser):
|
|
def load(self):
|
|
super(Config, self).load()
|
|
self.torhosts = [ str(i).strip() for i in self.common.tor_hosts.split(',') ]
|
|
#with open('servers.txt', 'r') as handle:
|
|
with open(self.watchd.source_file, 'r') as handle:
|
|
self.servers = [x.strip() for x in handle.readlines() if len(x.strip()) > 0]
|
|
# Apply log level from CLI flags
|
|
if self.args.quiet:
|
|
set_log_level('warn')
|
|
elif self.args.verbose:
|
|
set_log_level('debug')
|
|
|
|
def validate(self):
|
|
"""Validate configuration values. Returns list of errors."""
|
|
errors = []
|
|
warnings = []
|
|
|
|
# Validate port numbers
|
|
if not 1 <= self.httpd.port <= 65535:
|
|
errors.append('httpd.port must be 1-65535, got %d' % self.httpd.port)
|
|
|
|
# Validate timeouts (must be positive)
|
|
if self.common.timeout_connect <= 0:
|
|
errors.append('common.timeout_connect must be > 0')
|
|
if self.common.timeout_read <= 0:
|
|
errors.append('common.timeout_read must be > 0')
|
|
if self.watchd.timeout <= 0:
|
|
errors.append('watchd.timeout must be > 0')
|
|
if self.ppf.timeout <= 0:
|
|
errors.append('ppf.timeout must be > 0')
|
|
|
|
# Validate thread counts
|
|
if self.watchd.threads < 1:
|
|
errors.append('watchd.threads must be >= 1')
|
|
if self.ppf.threads < 1:
|
|
errors.append('ppf.threads must be >= 1')
|
|
|
|
# Validate max_fail
|
|
if self.watchd.max_fail < 1:
|
|
errors.append('watchd.max_fail must be >= 1')
|
|
if self.ppf.max_fail < 1:
|
|
errors.append('ppf.max_fail must be >= 1')
|
|
|
|
# Validate engine names
|
|
valid_engines = {'duckduckgo', 'startpage', 'brave', 'ecosia',
|
|
'mojeek', 'qwant', 'yandex', 'github', 'gitlab',
|
|
'codeberg', 'gitea', 'searx'}
|
|
configured = [e.strip().lower() for e in self.scraper.engines.split(',')]
|
|
for eng in configured:
|
|
if eng and eng not in valid_engines:
|
|
warnings.append('unknown engine: %s' % eng)
|
|
|
|
# Validate source_file exists
|
|
if not os.path.exists(self.watchd.source_file):
|
|
warnings.append('source_file not found: %s' % self.watchd.source_file)
|
|
|
|
# Validate database directories are writable
|
|
for db in (self.watchd.database, self.ppf.database):
|
|
db_dir = os.path.dirname(db) or '.'
|
|
if not os.access(db_dir, os.W_OK):
|
|
errors.append('database directory not writable: %s' % db_dir)
|
|
|
|
# Log warnings
|
|
for w in warnings:
|
|
_log(w, 'warn')
|
|
|
|
return errors
|
|
def __init__(self):
|
|
super(Config, self).__init__('config.ini')
|
|
section = 'common'
|
|
self.add_item(section, 'tor_hosts', str, '127.0.0.1:9050', 'comma-separated list of tor proxy address(es)', True)
|
|
self.add_item(section, 'timeout_connect', int, 10, 'connection timeout in seconds (default: 10)', False)
|
|
self.add_item(section, 'timeout_read', int, 15, 'read timeout in seconds (default: 15)', False)
|
|
|
|
section = 'watchd'
|
|
self.add_item(section, 'outage_threshold', float, 4.0, 'mininum success percentage required to not drop check results', False)
|
|
self.add_item(section, 'max_fail', int, 5, 'number of fails after which a proxy is considered dead', False)
|
|
self.add_item(section, 'threads', int, 10, 'number of threads watchd uses to check proxies', True)
|
|
self.add_item(section, 'timeout', int, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False)
|
|
self.add_item(section, 'submit_after', int, 200, 'min. number of tested proxies for DB write', False)
|
|
self.add_item(section, 'debug', bool, False, 'whether to print additional debug info', False)
|
|
self.add_item(section, 'use_ssl', int, 0, 'whether to use SSL and port 6697 to connect to targets (slower)', False)
|
|
self.add_item(section, 'checktime', int, 1800, 'base checking interval for proxies in db in seconds', False)
|
|
self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for proxies in db in seconds per experienced failure', False)
|
|
self.add_item(section, 'database', str, 'websites.sqlite', 'filename of database', True)
|
|
self.add_item(section, 'oldies', bool, False, 're-test old proxies as well ? (default: False)', False)
|
|
self.add_item(section, 'oldies_checktime', int, 43200, 'base checking interval for *old* proxies in seconds (default: 43200)', False)
|
|
self.add_item(section, 'oldies_multi', int, 10, 'fetch threads*multi rows when testing oldies (default: 10)', False)
|
|
self.add_item(section, 'source_file', str, 'servers.txt', 'server/url list to read from (default: servers.txt)', False)
|
|
self.add_item(section, 'stale_days', int, 30, 'days after which dead proxies are removed (default: 30)', False)
|
|
self.add_item(section, 'stats_interval', int, 300, 'seconds between status reports (default: 300)', False)
|
|
self.add_item(section, 'tor_safeguard', bool, True, 'enable tor safeguard (default: True)', False)
|
|
self.add_item(section, 'checktype', str, 'http', 'check type (irc or http)', False)
|
|
|
|
section = 'httpd'
|
|
self.add_item(section, 'listenip', str, '127.0.0.1', 'address for the httpd to listen to (default: 127.0.0.1)', True)
|
|
self.add_item(section, 'port', int, 8081, 'port for the httpd to listen to (default: 8081)', True)
|
|
self.add_item(section, 'enabled', bool, False, 'start httpd (default: False)', True)
|
|
|
|
section = 'ppf'
|
|
self.add_item(section, 'debug', bool, False, 'whether to print additional debug info', False)
|
|
self.add_item(section, 'search', bool, True, 'whether to use searx search engine to find new proxy lists', False)
|
|
self.add_item(section, 'timeout', float, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False)
|
|
self.add_item(section, 'http_retries', int, 1, 'number of retries for http connects', False)
|
|
self.add_item(section, 'threads', int, 1, 'number of threads to run (default: 1)', False)
|
|
self.add_item(section, 'checktime', int, 3600, 'base checking interval for urls in db in seconds', False)
|
|
self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for urls in db in seconds per resultless check', False)
|
|
self.add_item(section, 'max_fail', int, 5, 'number of fails after which an url is considered dead', False)
|
|
self.add_item(section, 'database', str, 'proxies.sqlite', 'filename of database', True)
|
|
self.add_item(section, 'extract_samedomain', bool, False, 'extract only url from same domains? (default: False)', False)
|
|
|
|
section = 'scraper'
|
|
self.add_item(section, 'debug', bool, False, 'scraper: whether to print additional debug info', False)
|
|
self.add_item(section, 'query', str, 'psw', 'build query using Proxies, Search, Websites', False)
|
|
self.add_item(section, 'backoff_base', int, 30, 'base backoff delay in seconds (default: 30)', False)
|
|
self.add_item(section, 'backoff_max', int, 3600, 'max backoff delay in seconds (default: 3600)', False)
|
|
self.add_item(section, 'fail_threshold', int, 2, 'consecutive failures before backoff (default: 2)', False)
|
|
self.add_item(section, 'engines', str, 'searx,duckduckgo,github', 'comma-separated search engines (default: searx,duckduckgo,github)', False)
|
|
self.add_item(section, 'max_pages', int, 5, 'max pages to fetch per engine query (default: 5)', False)
|
|
self.add_item(section, 'libretranslate_url', str, 'https://lt.mymx.me/translate', 'LibreTranslate API URL (default: https://lt.mymx.me/translate)', False)
|
|
self.add_item(section, 'libretranslate_enabled', bool, True, 'enable LibreTranslate for dynamic translations (default: True)', False)
|
|
|
|
self.aparser.add_argument("--file", help="import a single file containing proxy addrs", type=str, default='', required=False)
|
|
self.aparser.add_argument("--nobs", help="disable BeautifulSoup, use stdlib HTMLParser", action='store_true', default=False)
|
|
self.aparser.add_argument("-q", "--quiet", help="suppress info messages, show warnings and errors only", action='store_true', default=False)
|
|
self.aparser.add_argument("-v", "--verbose", help="show debug messages", action='store_true', default=False)
|
|
|
|
section = 'flood'
|
|
self.add_item(section, 'server', str, None, 'irc server address', False)
|
|
self.add_item(section, 'target', str, None, 'target to flood', False)
|
|
self.add_item(section, 'nickserv', str, 'nickserv', "nickserv's nickname", False)
|
|
self.add_item(section, 'message', str, None, 'message', False)
|
|
self.add_item(section, 'threads', int, 1, '# of threads', False)
|
|
self.add_item(section, 'register', int, 0, 'register nickname when required', False)
|
|
|
|
self.add_item(section, 'wait', int, 0, 'wait prior sending messages', False)
|
|
self.add_item(section, 'once', int, 0, 'quit as soon as possible', False)
|
|
self.add_item(section, 'hilight', int, 0, 'try to hilight all nicks?', False)
|
|
self.add_item(section, 'waitonsuccess', int, 0, 'wait for a while on success', False)
|
|
self.add_item(section, 'debug', int, 0, 'use debug', False)
|
|
self.add_item(section, 'duration', int, 180, 'maximum time to run', False)
|
|
self.add_item(section, 'delay', str, 14400, 'if waitonsuccess, wait for $delay before sending other bots', False)
|
|
self.add_item(section, 'nick', str, None, 'specify nickname to use', False)
|
|
self.add_item(section, 'use_ssl', int, 2, 'Use ssl? (0: false, 1: true, 2: random)', False)
|
|
self.add_item(section, 'cycle', int, 0, 'cycle flood', False)
|
|
self.add_item(section, 'change_nick', int, 0, 'Change nick between messages (useful when flooding privates)', False)
|
|
self.add_item(section, 'use_timeout', int, 0, 'make connexions quit through timeout', False)
|
|
self.add_item(section, 'clones', int, 1, 'Number of connexion repeat to run', False)
|
|
self.add_item(section, 'query', bool, False, 'also flood in query', False)
|
|
self.add_item(section, 'noquerybefore', int, 10, 'do not send query before x secs being connected', False)
|
|
self.add_item(section, 'oper', bool, False, 'piss of opers', False)
|
|
self.add_item(section, 'whois', bool, False, 'piss of opers with /whois', False)
|
|
self.add_item(section, 'modex', bool, False, 'make +/- x mode', False)
|
|
self.add_item(section, 'os', bool, False, 'piss off opers with /os', False)
|
|
self.add_item(section, 'file', str, None, 'read flood content from file', False)
|
|
self.add_item(section, 'failid', str, None, 'generate nickserv warn. about IDENTIFY attempts', False)
|