from comboparse import ComboParser from misc import set_log_level, _log import os class Config(ComboParser): def load(self): super(Config, self).load() self.torhosts = [ str(i).strip() for i in self.common.tor_hosts.split(',') ] # threads config = per-host value, multiply by Tor host count self.watchd.threads = self.watchd.threads * len(self.torhosts) #with open('servers.txt', 'r') as handle: with open(self.watchd.source_file, 'r') as handle: self.servers = [x.strip() for x in handle.readlines() if len(x.strip()) > 0] # Apply log level from CLI flags if self.args.quiet: set_log_level('warn') elif self.args.verbose: set_log_level('debug') def validate(self): """Validate configuration values. Returns list of errors.""" errors = [] warnings = [] # Validate port numbers if not 1 <= self.httpd.port <= 65535: errors.append('httpd.port must be 1-65535, got %d' % self.httpd.port) # Validate timeouts (must be positive) if self.common.timeout_connect <= 0: errors.append('common.timeout_connect must be > 0') if self.common.timeout_read <= 0: errors.append('common.timeout_read must be > 0') if self.watchd.timeout <= 0: errors.append('watchd.timeout must be > 0') if self.ppf.timeout <= 0: errors.append('ppf.timeout must be > 0') # Validate thread counts if self.watchd.threads < 1: errors.append('watchd.threads must be >= 1') if self.ppf.threads < 1: errors.append('ppf.threads must be >= 1') if self.scraper.threads < 1: errors.append('scraper.threads must be >= 1') # Validate max_fail if self.watchd.max_fail < 1: errors.append('watchd.max_fail must be >= 1') if self.ppf.max_fail < 1: errors.append('ppf.max_fail must be >= 1') # Validate checktype valid_checktypes = {'irc', 'head', 'judges', 'ssl'} if self.watchd.checktype not in valid_checktypes: errors.append('watchd.checktype must be one of: %s' % ', '.join(sorted(valid_checktypes))) # Validate engine names valid_engines = {'duckduckgo', 'startpage', 'brave', 'ecosia', 'mojeek', 'qwant', 'yandex', 'github', 'gitlab', 'codeberg', 'gitea', 'searx'} configured = [e.strip().lower() for e in self.scraper.engines.split(',')] for eng in configured: if eng and eng not in valid_engines: warnings.append('unknown engine: %s' % eng) # Validate source_file exists if not os.path.exists(self.watchd.source_file): warnings.append('source_file not found: %s' % self.watchd.source_file) # Validate database directories are writable for db in (self.watchd.database, self.ppf.database): db_dir = os.path.dirname(db) or '.' if not os.access(db_dir, os.W_OK): errors.append('database directory not writable: %s' % db_dir) # Log warnings for w in warnings: _log(w, 'warn') return errors def __init__(self): super(Config, self).__init__('config.ini') section = 'common' self.add_item(section, 'tor_hosts', str, '127.0.0.1:9050', 'comma-separated list of tor proxy address(es)', True) self.add_item(section, 'timeout_connect', int, 10, 'connection timeout in seconds (default: 10)', False) self.add_item(section, 'timeout_read', int, 15, 'read timeout in seconds (default: 15)', False) self.add_item(section, 'profiling', bool, False, 'enable cProfile profiling (default: False)', False) section = 'watchd' self.add_item(section, 'outage_threshold', float, 4.0, 'mininum success percentage required to not drop check results', False) self.add_item(section, 'max_fail', int, 5, 'number of fails after which a proxy is considered dead', False) self.add_item(section, 'threads', int, 10, 'number of threads watchd uses to check proxies', True) self.add_item(section, 'min_threads', int, 0, 'minimum threads (0 = auto: threads/4)', False) self.add_item(section, 'timeout', int, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False) self.add_item(section, 'submit_after', int, 200, 'min. number of tested proxies for DB write', False) self.add_item(section, 'debug', bool, False, 'whether to print additional debug info', False) self.add_item(section, 'use_ssl', int, 1, 'whether to use SSL (1=always, 0=never, 2=random)', False) self.add_item(section, 'checktime', int, 1800, 'base checking interval for proxies in db in seconds', False) self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for proxies in db in seconds per experienced failure', False) self.add_item(section, 'database', str, 'proxies.sqlite', 'filename of database', True) self.add_item(section, 'oldies', bool, False, 're-test old proxies as well ? (default: False)', False) self.add_item(section, 'oldies_checktime', int, 43200, 'base checking interval for *old* proxies in seconds (default: 43200)', False) self.add_item(section, 'oldies_multi', int, 10, 'fetch threads*multi rows when testing oldies (default: 10)', False) self.add_item(section, 'source_file', str, 'servers.txt', 'server/url list to read from (default: servers.txt)', False) self.add_item(section, 'stale_days', int, 30, 'days after which dead proxies are removed (default: 30)', False) self.add_item(section, 'stats_interval', int, 300, 'seconds between status reports (default: 300)', False) self.add_item(section, 'tor_safeguard', bool, True, 'enable tor safeguard (default: True)', False) self.add_item(section, 'checktype', str, 'ssl', 'check type: irc, head, judges, or ssl', False) section = 'httpd' self.add_item(section, 'listenip', str, '127.0.0.1', 'address for the httpd to listen to (default: 127.0.0.1)', True) self.add_item(section, 'port', int, 8081, 'port for the httpd to listen to (default: 8081)', True) self.add_item(section, 'enabled', bool, False, 'start httpd (default: False)', True) section = 'ppf' self.add_item(section, 'debug', bool, False, 'whether to print additional debug info', False) self.add_item(section, 'search', bool, True, 'whether to use searx search engine to find new proxy lists', False) self.add_item(section, 'timeout', float, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False) self.add_item(section, 'http_retries', int, 1, 'number of retries for http connects', False) self.add_item(section, 'threads', int, 1, 'number of threads to run (default: 1)', False) self.add_item(section, 'checktime', int, 3600, 'base checking interval for urls in db in seconds', False) self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for urls in db in seconds per resultless check', False) self.add_item(section, 'max_fail', int, 5, 'number of fails after which an url is considered dead', False) self.add_item(section, 'database', str, 'proxies.sqlite', 'filename of database', True) self.add_item(section, 'extract_samedomain', bool, False, 'extract only url from same domains? (default: False)', False) section = 'scraper' self.add_item(section, 'enabled', bool, True, 'enable search engine scraper (default: True)', False) self.add_item(section, 'threads', int, 3, 'number of scraper threads (default: 3)', False) self.add_item(section, 'debug', bool, False, 'scraper: whether to print additional debug info', False) self.add_item(section, 'query', str, 'psw', 'build query using Proxies, Search, Websites', False) self.add_item(section, 'backoff_base', int, 30, 'base backoff delay in seconds (default: 30)', False) self.add_item(section, 'backoff_max', int, 3600, 'max backoff delay in seconds (default: 3600)', False) self.add_item(section, 'fail_threshold', int, 2, 'consecutive failures before backoff (default: 2)', False) self.add_item(section, 'engines', str, 'searx,duckduckgo,github', 'comma-separated search engines (default: searx,duckduckgo,github)', False) self.add_item(section, 'max_pages', int, 5, 'max pages to fetch per engine query (default: 5)', False) self.add_item(section, 'libretranslate_url', str, 'https://lt.mymx.me/translate', 'LibreTranslate API URL (default: https://lt.mymx.me/translate)', False) self.add_item(section, 'libretranslate_enabled', bool, False, 'enable LibreTranslate for dynamic translations (default: False)', False) self.aparser.add_argument("--file", help="import a single file containing proxy addrs", type=str, default='', required=False) self.aparser.add_argument("--nobs", help="disable BeautifulSoup, use stdlib HTMLParser", action='store_true', default=False) self.aparser.add_argument("-q", "--quiet", help="suppress info messages, show warnings and errors only", action='store_true', default=False) self.aparser.add_argument("-v", "--verbose", help="show debug messages", action='store_true', default=False) self.aparser.add_argument("--profile", help="enable cProfile profiling, output to profile.stats", action='store_true', default=False)