from comboparse import ComboParser from misc import set_log_level, _log import os class Config(ComboParser): def load(self): super(Config, self).load() self.torhosts = [ str(i).strip() for i in self.common.tor_hosts.split(',') ] # threads config = per-host value, multiply by Tor host count self.watchd.threads = self.watchd.threads * len(self.torhosts) with open(self.watchd.source_file, 'r') as handle: self.servers = [x.strip() for x in handle.readlines() if len(x.strip()) > 0] # Parse checktypes as comma-separated list # Normalize: 'false'/'off'/'disabled' -> 'none' (SSL-only mode) raw_types = [t.strip().lower() for t in self.watchd.checktype.split(',') if t.strip()] self.watchd.checktypes = ['none' if t in ('false', 'off', 'disabled') else t for t in raw_types] # SSL-only mode: force ssl_first when secondary check is disabled if self.watchd.checktypes == ['none']: self.watchd.ssl_first = True # Apply log level from CLI flags if self.args.quiet: set_log_level('warn') elif self.args.verbose: set_log_level('debug') def validate(self): """Validate configuration values. Returns list of errors.""" errors = [] warnings = [] # Validate port numbers if not 1 <= self.httpd.port <= 65535: errors.append('httpd.port must be 1-65535, got %d' % self.httpd.port) # Validate timeouts (must be positive) if self.common.timeout_connect <= 0: errors.append('common.timeout_connect must be > 0') if self.common.timeout_read <= 0: errors.append('common.timeout_read must be > 0') if self.watchd.timeout <= 0: errors.append('watchd.timeout must be > 0') if self.ppf.timeout <= 0: errors.append('ppf.timeout must be > 0') # Validate thread counts (0 allowed for watchd to disable local testing) if self.watchd.threads < 0: errors.append('watchd.threads must be >= 0') if self.ppf.threads < 1: errors.append('ppf.threads must be >= 1') if self.scraper.threads < 1: errors.append('scraper.threads must be >= 1') # Validate max_fail if self.watchd.max_fail < 1: errors.append('watchd.max_fail must be >= 1') if self.ppf.max_fail < 1: errors.append('ppf.max_fail must be >= 1') # Validate checktypes (secondary check types, ssl is handled by ssl_first) # 'none' = SSL-only mode (no secondary check) valid_checktypes = {'irc', 'head', 'judges', 'none'} for ct in self.watchd.checktypes: if ct not in valid_checktypes: errors.append('watchd.checktype "%s" invalid, must be one of: %s' % (ct, ', '.join(sorted(valid_checktypes)))) if not self.watchd.checktypes: errors.append('watchd.checktype must specify at least one valid type') if 'none' in self.watchd.checktypes and len(self.watchd.checktypes) > 1: errors.append('watchd.checktype "none" cannot be combined with other types') # Validate engine names valid_engines = {'duckduckgo', 'startpage', 'brave', 'ecosia', 'mojeek', 'qwant', 'yandex', 'github', 'gitlab', 'codeberg', 'gitea', 'searx'} configured = [e.strip().lower() for e in self.scraper.engines.split(',')] for eng in configured: if eng and eng not in valid_engines: warnings.append('unknown engine: %s' % eng) # Validate source_file exists if not os.path.exists(self.watchd.source_file): warnings.append('source_file not found: %s' % self.watchd.source_file) # Validate database directories are writable for db in (self.watchd.database, self.ppf.database): db_dir = os.path.dirname(db) or '.' if not os.access(db_dir, os.W_OK): errors.append('database directory not writable: %s' % db_dir) # Log warnings for w in warnings: _log(w, 'warn') return errors def __init__(self): super(Config, self).__init__('config.ini') section = 'common' self.add_item(section, 'tor_hosts', str, '127.0.0.1:9050', 'comma-separated list of tor proxy address(es)', True) self.add_item(section, 'timeout_connect', int, 10, 'connection timeout in seconds (default: 10)', False) self.add_item(section, 'timeout_read', int, 15, 'read timeout in seconds (default: 15)', False) self.add_item(section, 'profiling', bool, False, 'enable cProfile profiling (default: False)', False) section = 'watchd' self.add_item(section, 'outage_threshold', float, 4.0, 'mininum success percentage required to not drop check results', False) self.add_item(section, 'max_fail', int, 5, 'number of fails after which a proxy is considered dead', False) self.add_item(section, 'threads', int, 10, 'number of threads watchd uses to check proxies', True) self.add_item(section, 'min_threads', int, 0, 'minimum threads (0 = auto: threads/4)', False) self.add_item(section, 'timeout', int, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False) self.add_item(section, 'timeout_fail_inc', float, 1.5, 'extra timeout per failure (default: 1.5)', False) self.add_item(section, 'timeout_fail_max', float, 15, 'max extra timeout for failures (default: 15)', False) self.add_item(section, 'submit_after', int, 200, 'min. number of tested proxies for DB write', False) self.add_item(section, 'debug', bool, False, 'whether to print additional debug info', False) self.add_item(section, 'working_checktime', int, 300, 'retest interval for working proxies in seconds (default: 300)', False) self.add_item(section, 'fail_retry_interval', int, 60, 'retry interval for failing proxies in seconds (default: 60)', False) self.add_item(section, 'fail_retry_backoff', bool, True, 'use linear backoff for failures: 60, 120, 180... (default: True)', False) self.add_item(section, 'database', str, 'proxies.sqlite', 'filename of database', True) self.add_item(section, 'oldies', bool, False, 're-test old proxies as well ? (default: False)', False) self.add_item(section, 'oldies_checktime', int, 43200, 'base checking interval for *old* proxies in seconds (default: 43200)', False) self.add_item(section, 'oldies_multi', int, 10, 'fetch threads*multi rows when testing oldies (default: 10)', False) self.add_item(section, 'source_file', str, 'servers.txt', 'server/url list to read from (default: servers.txt)', False) self.add_item(section, 'stale_days', int, 30, 'days after which dead proxies are removed (default: 30)', False) self.add_item(section, 'stats_interval', int, 300, 'seconds between status reports (default: 300)', False) self.add_item(section, 'tor_safeguard', bool, True, 'enable tor safeguard (default: True)', False) self.add_item(section, 'checktype', str, 'head', 'secondary check type: head, irc, judges, none/false (none = SSL-only)', False) self.add_item(section, 'ssl_first', bool, True, 'try SSL handshake first, fallback to checktype on failure (default: True)', False) self.add_item(section, 'ssl_only', bool, False, 'when ssl_first enabled, skip secondary check on SSL failure (default: False)', False) self.add_item(section, 'fingerprint', bool, True, 'probe proxy protocol before testing (default: True)', False) self.add_item(section, 'scale_cooldown', int, 10, 'seconds between thread scaling decisions (default: 10)', False) self.add_item(section, 'scale_threshold', float, 10.0, 'min success rate % to scale up threads (default: 10.0)', False) section = 'httpd' self.add_item(section, 'listenip', str, '127.0.0.1', 'address for the httpd to listen to (default: 127.0.0.1)', True) self.add_item(section, 'port', int, 8081, 'port for the httpd to listen to (default: 8081)', True) self.add_item(section, 'enabled', bool, False, 'start httpd (default: False)', True) section = 'ppf' self.add_item(section, 'debug', bool, False, 'whether to print additional debug info', False) self.add_item(section, 'search', bool, True, 'whether to use searx search engine to find new proxy lists', False) self.add_item(section, 'timeout', float, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False) self.add_item(section, 'http_retries', int, 1, 'number of retries for http connects', False) self.add_item(section, 'threads', int, 1, 'number of threads to run (default: 1)', False) self.add_item(section, 'checktime', int, 3600, 'base checking interval for urls in db in seconds', False) self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for urls in db in seconds per resultless check', False) self.add_item(section, 'max_fail', int, 5, 'number of fails after which an url is considered dead', False) self.add_item(section, 'database', str, 'proxies.sqlite', 'filename of database', True) self.add_item(section, 'extract_samedomain', bool, False, 'extract only url from same domains? (default: False)', False) self.add_item(section, 'list_max_age_days', int, 7, 'max age in days for proxy list URLs (default: 7)', False) section = 'scraper' self.add_item(section, 'enabled', bool, True, 'enable search engine scraper (default: True)', False) self.add_item(section, 'threads', int, 3, 'number of scraper threads (default: 3)', False) self.add_item(section, 'debug', bool, False, 'scraper: whether to print additional debug info', False) self.add_item(section, 'query', str, 'psw', 'build query using Proxies, Search, Websites', False) self.add_item(section, 'backoff_base', int, 30, 'base backoff delay in seconds (default: 30)', False) self.add_item(section, 'backoff_max', int, 3600, 'max backoff delay in seconds (default: 3600)', False) self.add_item(section, 'fail_threshold', int, 2, 'consecutive failures before backoff (default: 2)', False) self.add_item(section, 'engines', str, 'searx,duckduckgo,github', 'comma-separated search engines (default: searx,duckduckgo,github)', False) self.add_item(section, 'max_pages', int, 5, 'max pages to fetch per engine query (default: 5)', False) self.add_item(section, 'libretranslate_url', str, 'https://lt.mymx.me/translate', 'LibreTranslate API URL (default: https://lt.mymx.me/translate)', False) self.add_item(section, 'libretranslate_enabled', bool, False, 'enable LibreTranslate for dynamic translations (default: False)', False) section = 'verification' self.add_item(section, 'enabled', bool, True, 'enable manager verification system (default: True)', False) self.add_item(section, 'threads', int, 2, 'number of verification threads (default: 2)', False) self.add_item(section, 'batch_size', int, 10, 'proxies per verification cycle (default: 10)', False) self.add_item(section, 'interval', int, 30, 'seconds between verification cycles (default: 30)', False) self.add_item(section, 'max_queue', int, 1000, 'max pending verifications (default: 1000)', False) self.add_item(section, 'spot_check_pct', float, 1.0, 'percent of working proxies to spot-check (default: 1.0)', False) section = 'worker' self.add_item(section, 'heartbeat', int, 60, 'heartbeat interval in seconds (default: 60)', False) self.add_item(section, 'url_batch_size', int, 5, 'URLs per claim cycle (default: 5)', False) self.add_item(section, 'fetch_timeout', int, 30, 'timeout for URL fetching (default: 30)', False) self.aparser.add_argument("--file", help="import a single file containing proxy addrs", type=str, default='', required=False) self.aparser.add_argument("--nobs", help="disable BeautifulSoup, use stdlib HTMLParser", action='store_true', default=False) self.aparser.add_argument("-q", "--quiet", help="suppress info messages, show warnings and errors only", action='store_true', default=False) self.aparser.add_argument("-v", "--verbose", help="show debug messages", action='store_true', default=False) self.aparser.add_argument("--profile", help="enable cProfile profiling, output to profile.stats", action='store_true', default=False) self.aparser.add_argument("--server", help="master server URL (e.g., https://master:8081)", type=str, default='') self.aparser.add_argument("--worker-key", help="worker authentication key", type=str, default='') self.aparser.add_argument("--register", help="register as worker with master server", action='store_true', default=False) self.aparser.add_argument("--worker-name", help="worker name for registration (default: hostname)", type=str, default='') self.aparser.add_argument("--worker", help="run as worker node", action='store_true', default=False)