From 50bc13a615ab16b74b0353d826737c842eb18fd8 Mon Sep 17 00:00:00 2001 From: Username Date: Sun, 28 Dec 2025 15:18:29 +0100 Subject: [PATCH] misc: add SSL protocol error detection and diag logging --- misc.py | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/misc.py b/misc.py index 891262e..0dc4519 100644 --- a/misc.py +++ b/misc.py @@ -4,6 +4,8 @@ import time import sys +import random +import string # Log levels: lower number = more verbose LOG_LEVELS = { @@ -12,6 +14,7 @@ LOG_LEVELS = { 'rate': 1, # rate limiting info, same as info 'scraper': 1, # scraper info, same as info 'stats': 1, # statistics, same as info + 'diag': 1, # diagnostic sampling, same as info 'warn': 2, 'error': 3, 'none': 99, # suppress all @@ -34,6 +37,47 @@ SSL_ERRORS = frozenset({FAIL_SSL}) # Connection errors - proxy might be dead, need secondary verification CONN_ERRORS = frozenset({FAIL_TIMEOUT, FAIL_REFUSED, FAIL_UNREACHABLE, FAIL_CLOSED, FAIL_DNS}) +# SSL protocol errors - proxy doesn't support SSL, no fallback needed +# These indicate protocol mismatch, not certificate issues +SSL_PROTOCOL_ERROR_PATTERNS = ( + 'wrong version number', + 'unsupported protocol', + 'no protocols available', + 'protocol is shutdown', + 'unexpected eof', + 'eof occurred', + 'alert protocol version', + 'alert handshake failure', + 'http request', # Sent HTTP to HTTPS port + 'wrong ssl version', + 'no ciphers available', + 'unknown protocol', + 'record layer failure', + 'bad record mac', + 'decryption failed', + 'packet length too long', +) + + +def is_ssl_protocol_error(reason): + """Check if SSL error reason indicates protocol incompatibility. + + Args: + reason: SSL error reason string (from failedproxy) + + Returns: + True if this is a protocol error (proxy doesn't support SSL), + False if it might be a cert or other error where fallback makes sense. + """ + if not reason: + return False + reason_lower = reason.lower() + for pattern in SSL_PROTOCOL_ERROR_PATTERNS: + if pattern in reason_lower: + return True + return False + + # Levels that go to stderr STDERR_LEVELS = ('warn', 'error') @@ -68,6 +112,7 @@ def _log(msg, level='info'): output = sys.stderr if level in STDERR_LEVELS else sys.stdout print >> output, '\r%s/%s\t%s' % (timestamp(), level, msg) + output.flush() # Force flush for container logs def timestamp(): @@ -139,3 +184,15 @@ def categorize_error(exc): return FAIL_PROXY return FAIL_OTHER + + +def tor_proxy_url(torhost): + """Generate Tor SOCKS5 proxy URL with random credentials for circuit isolation. + + Tor treats different username:password as separate streams, using different + circuits. This ensures each connection gets a fresh circuit. + """ + chars = string.ascii_lowercase + string.digits + user = ''.join(random.choice(chars) for _ in range(8)) + passwd = ''.join(random.choice(chars) for _ in range(8)) + return 'socks5://%s:%s@%s' % (user, passwd, torhost)