187 lines
13 KiB
Python
187 lines
13 KiB
Python
from comboparse import ComboParser
|
|
from misc import set_log_level, _log
|
|
import os
|
|
|
|
class Config(ComboParser):
|
|
def load(self):
|
|
super(Config, self).load()
|
|
self.torhosts = [ str(i).strip() for i in self.common.tor_hosts.split(',') ]
|
|
# threads config = per-host value, multiply by Tor host count
|
|
self.watchd.threads = self.watchd.threads * len(self.torhosts)
|
|
with open(self.watchd.source_file, 'r') as handle:
|
|
self.servers = [x.strip() for x in handle.readlines() if len(x.strip()) > 0]
|
|
# Parse checktypes as comma-separated list
|
|
# Normalize: 'false'/'off'/'disabled' -> 'none' (SSL-only mode)
|
|
raw_types = [t.strip().lower() for t in self.watchd.checktype.split(',') if t.strip()]
|
|
self.watchd.checktypes = ['none' if t in ('false', 'off', 'disabled') else t for t in raw_types]
|
|
# SSL-only mode: force ssl_first when secondary check is disabled
|
|
if self.watchd.checktypes == ['none']:
|
|
self.watchd.ssl_first = True
|
|
# Apply log level from CLI flags
|
|
if self.args.quiet:
|
|
set_log_level('warn')
|
|
elif self.args.verbose:
|
|
set_log_level('debug')
|
|
|
|
def validate(self):
|
|
"""Validate configuration values. Returns list of errors."""
|
|
errors = []
|
|
warnings = []
|
|
|
|
# Validate port numbers
|
|
if not 1 <= self.httpd.port <= 65535:
|
|
errors.append('httpd.port must be 1-65535, got %d' % self.httpd.port)
|
|
|
|
# Validate timeouts (must be positive)
|
|
if self.common.timeout_connect <= 0:
|
|
errors.append('common.timeout_connect must be > 0')
|
|
if self.common.timeout_read <= 0:
|
|
errors.append('common.timeout_read must be > 0')
|
|
if self.watchd.timeout <= 0:
|
|
errors.append('watchd.timeout must be > 0')
|
|
if self.ppf.timeout <= 0:
|
|
errors.append('ppf.timeout must be > 0')
|
|
|
|
# Validate thread counts (0 allowed for watchd to disable local testing)
|
|
if self.watchd.threads < 0:
|
|
errors.append('watchd.threads must be >= 0')
|
|
if self.ppf.threads < 1:
|
|
errors.append('ppf.threads must be >= 1')
|
|
if self.scraper.threads < 1:
|
|
errors.append('scraper.threads must be >= 1')
|
|
|
|
# Validate max_fail
|
|
if self.watchd.max_fail < 1:
|
|
errors.append('watchd.max_fail must be >= 1')
|
|
if self.ppf.max_fail < 1:
|
|
errors.append('ppf.max_fail must be >= 1')
|
|
|
|
# Validate checktypes (secondary check types, ssl is handled by ssl_first)
|
|
# 'none' = SSL-only mode (no secondary check)
|
|
valid_checktypes = {'irc', 'head', 'judges', 'none'}
|
|
for ct in self.watchd.checktypes:
|
|
if ct not in valid_checktypes:
|
|
errors.append('watchd.checktype "%s" invalid, must be one of: %s' % (ct, ', '.join(sorted(valid_checktypes))))
|
|
if not self.watchd.checktypes:
|
|
errors.append('watchd.checktype must specify at least one valid type')
|
|
if 'none' in self.watchd.checktypes and len(self.watchd.checktypes) > 1:
|
|
errors.append('watchd.checktype "none" cannot be combined with other types')
|
|
|
|
# Validate engine names
|
|
valid_engines = {'duckduckgo', 'startpage', 'brave', 'ecosia',
|
|
'mojeek', 'qwant', 'yandex', 'github', 'gitlab',
|
|
'codeberg', 'gitea', 'searx'}
|
|
configured = [e.strip().lower() for e in self.scraper.engines.split(',')]
|
|
for eng in configured:
|
|
if eng and eng not in valid_engines:
|
|
warnings.append('unknown engine: %s' % eng)
|
|
|
|
# Validate source_file exists
|
|
if not os.path.exists(self.watchd.source_file):
|
|
warnings.append('source_file not found: %s' % self.watchd.source_file)
|
|
|
|
# Validate database directories are writable
|
|
for db in (self.watchd.database, self.ppf.database):
|
|
db_dir = os.path.dirname(db) or '.'
|
|
if not os.access(db_dir, os.W_OK):
|
|
errors.append('database directory not writable: %s' % db_dir)
|
|
|
|
# Log warnings
|
|
for w in warnings:
|
|
_log(w, 'warn')
|
|
|
|
return errors
|
|
def __init__(self):
|
|
super(Config, self).__init__('config.ini')
|
|
section = 'common'
|
|
self.add_item(section, 'tor_hosts', str, '127.0.0.1:9050', 'comma-separated list of tor proxy address(es)', True)
|
|
self.add_item(section, 'timeout_connect', int, 10, 'connection timeout in seconds (default: 10)', False)
|
|
self.add_item(section, 'timeout_read', int, 15, 'read timeout in seconds (default: 15)', False)
|
|
self.add_item(section, 'profiling', bool, False, 'enable cProfile profiling (default: False)', False)
|
|
|
|
section = 'watchd'
|
|
self.add_item(section, 'outage_threshold', float, 4.0, 'mininum success percentage required to not drop check results', False)
|
|
self.add_item(section, 'max_fail', int, 5, 'number of fails after which a proxy is considered dead', False)
|
|
self.add_item(section, 'threads', int, 10, 'number of threads watchd uses to check proxies', True)
|
|
self.add_item(section, 'min_threads', int, 0, 'minimum threads (0 = auto: threads/4)', False)
|
|
self.add_item(section, 'timeout', int, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False)
|
|
self.add_item(section, 'timeout_fail_inc', float, 1.5, 'extra timeout per failure (default: 1.5)', False)
|
|
self.add_item(section, 'timeout_fail_max', float, 15, 'max extra timeout for failures (default: 15)', False)
|
|
self.add_item(section, 'submit_after', int, 200, 'min. number of tested proxies for DB write', False)
|
|
self.add_item(section, 'debug', bool, False, 'whether to print additional debug info', False)
|
|
self.add_item(section, 'working_checktime', int, 300, 'retest interval for working proxies in seconds (default: 300)', False)
|
|
self.add_item(section, 'fail_retry_interval', int, 60, 'retry interval for failing proxies in seconds (default: 60)', False)
|
|
self.add_item(section, 'fail_retry_backoff', bool, True, 'use linear backoff for failures: 60, 120, 180... (default: True)', False)
|
|
self.add_item(section, 'database', str, 'proxies.sqlite', 'filename of database', True)
|
|
self.add_item(section, 'oldies', bool, False, 're-test old proxies as well ? (default: False)', False)
|
|
self.add_item(section, 'oldies_checktime', int, 43200, 'base checking interval for *old* proxies in seconds (default: 43200)', False)
|
|
self.add_item(section, 'oldies_multi', int, 10, 'fetch threads*multi rows when testing oldies (default: 10)', False)
|
|
self.add_item(section, 'source_file', str, 'servers.txt', 'server/url list to read from (default: servers.txt)', False)
|
|
self.add_item(section, 'stale_days', int, 30, 'days after which dead proxies are removed (default: 30)', False)
|
|
self.add_item(section, 'stats_interval', int, 300, 'seconds between status reports (default: 300)', False)
|
|
self.add_item(section, 'tor_safeguard', bool, True, 'enable tor safeguard (default: True)', False)
|
|
self.add_item(section, 'checktype', str, 'head', 'secondary check type: head, irc, judges, none/false (none = SSL-only)', False)
|
|
self.add_item(section, 'ssl_first', bool, True, 'try SSL handshake first, fallback to checktype on failure (default: True)', False)
|
|
self.add_item(section, 'ssl_only', bool, False, 'when ssl_first enabled, skip secondary check on SSL failure (default: False)', False)
|
|
self.add_item(section, 'fingerprint', bool, True, 'probe proxy protocol before testing (default: True)', False)
|
|
self.add_item(section, 'scale_cooldown', int, 10, 'seconds between thread scaling decisions (default: 10)', False)
|
|
self.add_item(section, 'scale_threshold', float, 10.0, 'min success rate % to scale up threads (default: 10.0)', False)
|
|
|
|
section = 'httpd'
|
|
self.add_item(section, 'listenip', str, '127.0.0.1', 'address for the httpd to listen to (default: 127.0.0.1)', True)
|
|
self.add_item(section, 'port', int, 8081, 'port for the httpd to listen to (default: 8081)', True)
|
|
self.add_item(section, 'enabled', bool, False, 'start httpd (default: False)', True)
|
|
|
|
section = 'ppf'
|
|
self.add_item(section, 'debug', bool, False, 'whether to print additional debug info', False)
|
|
self.add_item(section, 'search', bool, True, 'whether to use searx search engine to find new proxy lists', False)
|
|
self.add_item(section, 'timeout', float, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False)
|
|
self.add_item(section, 'http_retries', int, 1, 'number of retries for http connects', False)
|
|
self.add_item(section, 'threads', int, 1, 'number of threads to run (default: 1)', False)
|
|
self.add_item(section, 'checktime', int, 3600, 'base checking interval for urls in db in seconds', False)
|
|
self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for urls in db in seconds per resultless check', False)
|
|
self.add_item(section, 'max_fail', int, 5, 'number of fails after which an url is considered dead', False)
|
|
self.add_item(section, 'database', str, 'proxies.sqlite', 'filename of database', True)
|
|
self.add_item(section, 'extract_samedomain', bool, False, 'extract only url from same domains? (default: False)', False)
|
|
self.add_item(section, 'list_max_age_days', int, 7, 'max age in days for proxy list URLs (default: 7)', False)
|
|
|
|
section = 'scraper'
|
|
self.add_item(section, 'enabled', bool, True, 'enable search engine scraper (default: True)', False)
|
|
self.add_item(section, 'threads', int, 3, 'number of scraper threads (default: 3)', False)
|
|
self.add_item(section, 'debug', bool, False, 'scraper: whether to print additional debug info', False)
|
|
self.add_item(section, 'query', str, 'psw', 'build query using Proxies, Search, Websites', False)
|
|
self.add_item(section, 'backoff_base', int, 30, 'base backoff delay in seconds (default: 30)', False)
|
|
self.add_item(section, 'backoff_max', int, 3600, 'max backoff delay in seconds (default: 3600)', False)
|
|
self.add_item(section, 'fail_threshold', int, 2, 'consecutive failures before backoff (default: 2)', False)
|
|
self.add_item(section, 'engines', str, 'searx,duckduckgo,github', 'comma-separated search engines (default: searx,duckduckgo,github)', False)
|
|
self.add_item(section, 'max_pages', int, 5, 'max pages to fetch per engine query (default: 5)', False)
|
|
self.add_item(section, 'libretranslate_url', str, 'https://lt.mymx.me/translate', 'LibreTranslate API URL (default: https://lt.mymx.me/translate)', False)
|
|
self.add_item(section, 'libretranslate_enabled', bool, False, 'enable LibreTranslate for dynamic translations (default: False)', False)
|
|
|
|
section = 'verification'
|
|
self.add_item(section, 'enabled', bool, True, 'enable manager verification system (default: True)', False)
|
|
self.add_item(section, 'threads', int, 2, 'number of verification threads (default: 2)', False)
|
|
self.add_item(section, 'batch_size', int, 10, 'proxies per verification cycle (default: 10)', False)
|
|
self.add_item(section, 'interval', int, 30, 'seconds between verification cycles (default: 30)', False)
|
|
self.add_item(section, 'max_queue', int, 1000, 'max pending verifications (default: 1000)', False)
|
|
self.add_item(section, 'spot_check_pct', float, 1.0, 'percent of working proxies to spot-check (default: 1.0)', False)
|
|
|
|
section = 'worker'
|
|
self.add_item(section, 'batch_size', int, 100, 'proxies per work batch (default: 100)', False)
|
|
self.add_item(section, 'heartbeat', int, 60, 'heartbeat interval in seconds (default: 60)', False)
|
|
self.add_item(section, 'claim_timeout', int, 300, 'seconds before unclaimed work is released (default: 300)', False)
|
|
self.add_item(section, 'url_batch_size', int, 5, 'URLs per claim cycle for V2 mode (default: 5)', False)
|
|
self.add_item(section, 'fetch_timeout', int, 30, 'timeout for URL fetching in V2 mode (default: 30)', False)
|
|
|
|
self.aparser.add_argument("--file", help="import a single file containing proxy addrs", type=str, default='', required=False)
|
|
self.aparser.add_argument("--nobs", help="disable BeautifulSoup, use stdlib HTMLParser", action='store_true', default=False)
|
|
self.aparser.add_argument("-q", "--quiet", help="suppress info messages, show warnings and errors only", action='store_true', default=False)
|
|
self.aparser.add_argument("-v", "--verbose", help="show debug messages", action='store_true', default=False)
|
|
self.aparser.add_argument("--profile", help="enable cProfile profiling, output to profile.stats", action='store_true', default=False)
|
|
self.aparser.add_argument("--worker", help="run as worker node", action='store_true', default=False)
|
|
self.aparser.add_argument("--server", help="master server URL (e.g., https://master:8081)", type=str, default='')
|
|
self.aparser.add_argument("--worker-key", help="worker authentication key", type=str, default='')
|
|
self.aparser.add_argument("--register", help="register as worker with master server", action='store_true', default=False)
|
|
self.aparser.add_argument("--worker-name", help="worker name for registration (default: hostname)", type=str, default='')
|
|
self.aparser.add_argument("--worker-v2", help="run as V2 worker (URL-driven fetching)", action='store_true', default=False)
|