When ssl_first=1 (default), proxy validation first attempts an SSL handshake. If it fails, falls back to the configured secondary check (head, judges, or irc). This separates SSL capability detection from basic connectivity testing. New config options: - ssl_first: enable SSL-first pattern (default: 1) - checktype: secondary check type (head, judges, irc)
165 lines
11 KiB
Python
165 lines
11 KiB
Python
from comboparse import ComboParser
|
|
from misc import set_log_level, _log
|
|
import os
|
|
|
|
class Config(ComboParser):
|
|
def load(self):
|
|
super(Config, self).load()
|
|
self.torhosts = [ str(i).strip() for i in self.common.tor_hosts.split(',') ]
|
|
# threads config = per-host value, multiply by Tor host count
|
|
self.watchd.threads = self.watchd.threads * len(self.torhosts)
|
|
with open(self.watchd.source_file, 'r') as handle:
|
|
self.servers = [x.strip() for x in handle.readlines() if len(x.strip()) > 0]
|
|
# Parse checktypes as comma-separated list
|
|
self.watchd.checktypes = [t.strip() for t in self.watchd.checktype.split(',') if t.strip()]
|
|
# Apply log level from CLI flags
|
|
if self.args.quiet:
|
|
set_log_level('warn')
|
|
elif self.args.verbose:
|
|
set_log_level('debug')
|
|
|
|
def validate(self):
|
|
"""Validate configuration values. Returns list of errors."""
|
|
errors = []
|
|
warnings = []
|
|
|
|
# Validate port numbers
|
|
if not 1 <= self.httpd.port <= 65535:
|
|
errors.append('httpd.port must be 1-65535, got %d' % self.httpd.port)
|
|
|
|
# Validate timeouts (must be positive)
|
|
if self.common.timeout_connect <= 0:
|
|
errors.append('common.timeout_connect must be > 0')
|
|
if self.common.timeout_read <= 0:
|
|
errors.append('common.timeout_read must be > 0')
|
|
if self.watchd.timeout <= 0:
|
|
errors.append('watchd.timeout must be > 0')
|
|
if self.ppf.timeout <= 0:
|
|
errors.append('ppf.timeout must be > 0')
|
|
|
|
# Validate thread counts (0 allowed for watchd to disable local testing)
|
|
if self.watchd.threads < 0:
|
|
errors.append('watchd.threads must be >= 0')
|
|
if self.ppf.threads < 1:
|
|
errors.append('ppf.threads must be >= 1')
|
|
if self.scraper.threads < 1:
|
|
errors.append('scraper.threads must be >= 1')
|
|
|
|
# Validate max_fail
|
|
if self.watchd.max_fail < 1:
|
|
errors.append('watchd.max_fail must be >= 1')
|
|
if self.ppf.max_fail < 1:
|
|
errors.append('ppf.max_fail must be >= 1')
|
|
|
|
# Validate checktypes (secondary check types, ssl is handled by ssl_first)
|
|
valid_checktypes = {'irc', 'head', 'judges'}
|
|
for ct in self.watchd.checktypes:
|
|
if ct not in valid_checktypes:
|
|
errors.append('watchd.checktype "%s" invalid, must be one of: %s' % (ct, ', '.join(sorted(valid_checktypes))))
|
|
if not self.watchd.checktypes:
|
|
errors.append('watchd.checktype must specify at least one valid type')
|
|
|
|
# Validate engine names
|
|
valid_engines = {'duckduckgo', 'startpage', 'brave', 'ecosia',
|
|
'mojeek', 'qwant', 'yandex', 'github', 'gitlab',
|
|
'codeberg', 'gitea', 'searx'}
|
|
configured = [e.strip().lower() for e in self.scraper.engines.split(',')]
|
|
for eng in configured:
|
|
if eng and eng not in valid_engines:
|
|
warnings.append('unknown engine: %s' % eng)
|
|
|
|
# Validate source_file exists
|
|
if not os.path.exists(self.watchd.source_file):
|
|
warnings.append('source_file not found: %s' % self.watchd.source_file)
|
|
|
|
# Validate database directories are writable
|
|
for db in (self.watchd.database, self.ppf.database):
|
|
db_dir = os.path.dirname(db) or '.'
|
|
if not os.access(db_dir, os.W_OK):
|
|
errors.append('database directory not writable: %s' % db_dir)
|
|
|
|
# Log warnings
|
|
for w in warnings:
|
|
_log(w, 'warn')
|
|
|
|
return errors
|
|
def __init__(self):
|
|
super(Config, self).__init__('config.ini')
|
|
section = 'common'
|
|
self.add_item(section, 'tor_hosts', str, '127.0.0.1:9050', 'comma-separated list of tor proxy address(es)', True)
|
|
self.add_item(section, 'timeout_connect', int, 10, 'connection timeout in seconds (default: 10)', False)
|
|
self.add_item(section, 'timeout_read', int, 15, 'read timeout in seconds (default: 15)', False)
|
|
self.add_item(section, 'profiling', bool, False, 'enable cProfile profiling (default: False)', False)
|
|
|
|
section = 'watchd'
|
|
self.add_item(section, 'outage_threshold', float, 4.0, 'mininum success percentage required to not drop check results', False)
|
|
self.add_item(section, 'max_fail', int, 5, 'number of fails after which a proxy is considered dead', False)
|
|
self.add_item(section, 'threads', int, 10, 'number of threads watchd uses to check proxies', True)
|
|
self.add_item(section, 'min_threads', int, 0, 'minimum threads (0 = auto: threads/4)', False)
|
|
self.add_item(section, 'timeout', int, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False)
|
|
self.add_item(section, 'timeout_fail_inc', float, 1.5, 'extra timeout per failure (default: 1.5)', False)
|
|
self.add_item(section, 'timeout_fail_max', float, 15, 'max extra timeout for failures (default: 15)', False)
|
|
self.add_item(section, 'submit_after', int, 200, 'min. number of tested proxies for DB write', False)
|
|
self.add_item(section, 'debug', bool, False, 'whether to print additional debug info', False)
|
|
self.add_item(section, 'checktime', int, 1800, 'base checking interval for proxies in db in seconds', False)
|
|
self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for proxies in db in seconds per experienced failure', False)
|
|
self.add_item(section, 'database', str, 'proxies.sqlite', 'filename of database', True)
|
|
self.add_item(section, 'oldies', bool, False, 're-test old proxies as well ? (default: False)', False)
|
|
self.add_item(section, 'oldies_checktime', int, 43200, 'base checking interval for *old* proxies in seconds (default: 43200)', False)
|
|
self.add_item(section, 'oldies_multi', int, 10, 'fetch threads*multi rows when testing oldies (default: 10)', False)
|
|
self.add_item(section, 'source_file', str, 'servers.txt', 'server/url list to read from (default: servers.txt)', False)
|
|
self.add_item(section, 'stale_days', int, 30, 'days after which dead proxies are removed (default: 30)', False)
|
|
self.add_item(section, 'stats_interval', int, 300, 'seconds between status reports (default: 300)', False)
|
|
self.add_item(section, 'tor_safeguard', bool, True, 'enable tor safeguard (default: True)', False)
|
|
self.add_item(section, 'checktype', str, 'head', 'secondary check type: irc, head, judges (used when ssl_first fails)', False)
|
|
self.add_item(section, 'ssl_first', bool, True, 'try SSL handshake first, fallback to checktype on failure (default: True)', False)
|
|
self.add_item(section, 'scale_cooldown', int, 10, 'seconds between thread scaling decisions (default: 10)', False)
|
|
self.add_item(section, 'scale_threshold', float, 10.0, 'min success rate % to scale up threads (default: 10.0)', False)
|
|
|
|
section = 'httpd'
|
|
self.add_item(section, 'listenip', str, '127.0.0.1', 'address for the httpd to listen to (default: 127.0.0.1)', True)
|
|
self.add_item(section, 'port', int, 8081, 'port for the httpd to listen to (default: 8081)', True)
|
|
self.add_item(section, 'enabled', bool, False, 'start httpd (default: False)', True)
|
|
|
|
section = 'ppf'
|
|
self.add_item(section, 'debug', bool, False, 'whether to print additional debug info', False)
|
|
self.add_item(section, 'search', bool, True, 'whether to use searx search engine to find new proxy lists', False)
|
|
self.add_item(section, 'timeout', float, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False)
|
|
self.add_item(section, 'http_retries', int, 1, 'number of retries for http connects', False)
|
|
self.add_item(section, 'threads', int, 1, 'number of threads to run (default: 1)', False)
|
|
self.add_item(section, 'checktime', int, 3600, 'base checking interval for urls in db in seconds', False)
|
|
self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for urls in db in seconds per resultless check', False)
|
|
self.add_item(section, 'max_fail', int, 5, 'number of fails after which an url is considered dead', False)
|
|
self.add_item(section, 'database', str, 'proxies.sqlite', 'filename of database', True)
|
|
self.add_item(section, 'extract_samedomain', bool, False, 'extract only url from same domains? (default: False)', False)
|
|
self.add_item(section, 'list_max_age_days', int, 7, 'max age in days for proxy list URLs (default: 7)', False)
|
|
|
|
section = 'scraper'
|
|
self.add_item(section, 'enabled', bool, True, 'enable search engine scraper (default: True)', False)
|
|
self.add_item(section, 'threads', int, 3, 'number of scraper threads (default: 3)', False)
|
|
self.add_item(section, 'debug', bool, False, 'scraper: whether to print additional debug info', False)
|
|
self.add_item(section, 'query', str, 'psw', 'build query using Proxies, Search, Websites', False)
|
|
self.add_item(section, 'backoff_base', int, 30, 'base backoff delay in seconds (default: 30)', False)
|
|
self.add_item(section, 'backoff_max', int, 3600, 'max backoff delay in seconds (default: 3600)', False)
|
|
self.add_item(section, 'fail_threshold', int, 2, 'consecutive failures before backoff (default: 2)', False)
|
|
self.add_item(section, 'engines', str, 'searx,duckduckgo,github', 'comma-separated search engines (default: searx,duckduckgo,github)', False)
|
|
self.add_item(section, 'max_pages', int, 5, 'max pages to fetch per engine query (default: 5)', False)
|
|
self.add_item(section, 'libretranslate_url', str, 'https://lt.mymx.me/translate', 'LibreTranslate API URL (default: https://lt.mymx.me/translate)', False)
|
|
self.add_item(section, 'libretranslate_enabled', bool, False, 'enable LibreTranslate for dynamic translations (default: False)', False)
|
|
|
|
section = 'worker'
|
|
self.add_item(section, 'batch_size', int, 100, 'proxies per work batch (default: 100)', False)
|
|
self.add_item(section, 'heartbeat', int, 60, 'heartbeat interval in seconds (default: 60)', False)
|
|
self.add_item(section, 'claim_timeout', int, 300, 'seconds before unclaimed work is released (default: 300)', False)
|
|
|
|
self.aparser.add_argument("--file", help="import a single file containing proxy addrs", type=str, default='', required=False)
|
|
self.aparser.add_argument("--nobs", help="disable BeautifulSoup, use stdlib HTMLParser", action='store_true', default=False)
|
|
self.aparser.add_argument("-q", "--quiet", help="suppress info messages, show warnings and errors only", action='store_true', default=False)
|
|
self.aparser.add_argument("-v", "--verbose", help="show debug messages", action='store_true', default=False)
|
|
self.aparser.add_argument("--profile", help="enable cProfile profiling, output to profile.stats", action='store_true', default=False)
|
|
self.aparser.add_argument("--worker", help="run as worker node", action='store_true', default=False)
|
|
self.aparser.add_argument("--server", help="master server URL (e.g., https://master:8081)", type=str, default='')
|
|
self.aparser.add_argument("--worker-key", help="worker authentication key", type=str, default='')
|
|
self.aparser.add_argument("--register", help="register as worker with master server", action='store_true', default=False)
|
|
self.aparser.add_argument("--worker-name", help="worker name for registration (default: hostname)", type=str, default='')
|