misc: add SSL protocol error detection and diag logging

This commit is contained in:
Username
2025-12-28 15:18:29 +01:00
parent f4286ea515
commit 50bc13a615

57
misc.py
View File

@@ -4,6 +4,8 @@
import time
import sys
import random
import string
# Log levels: lower number = more verbose
LOG_LEVELS = {
@@ -12,6 +14,7 @@ LOG_LEVELS = {
'rate': 1, # rate limiting info, same as info
'scraper': 1, # scraper info, same as info
'stats': 1, # statistics, same as info
'diag': 1, # diagnostic sampling, same as info
'warn': 2,
'error': 3,
'none': 99, # suppress all
@@ -34,6 +37,47 @@ SSL_ERRORS = frozenset({FAIL_SSL})
# Connection errors - proxy might be dead, need secondary verification
CONN_ERRORS = frozenset({FAIL_TIMEOUT, FAIL_REFUSED, FAIL_UNREACHABLE, FAIL_CLOSED, FAIL_DNS})
# SSL protocol errors - proxy doesn't support SSL, no fallback needed
# These indicate protocol mismatch, not certificate issues
SSL_PROTOCOL_ERROR_PATTERNS = (
'wrong version number',
'unsupported protocol',
'no protocols available',
'protocol is shutdown',
'unexpected eof',
'eof occurred',
'alert protocol version',
'alert handshake failure',
'http request', # Sent HTTP to HTTPS port
'wrong ssl version',
'no ciphers available',
'unknown protocol',
'record layer failure',
'bad record mac',
'decryption failed',
'packet length too long',
)
def is_ssl_protocol_error(reason):
"""Check if SSL error reason indicates protocol incompatibility.
Args:
reason: SSL error reason string (from failedproxy)
Returns:
True if this is a protocol error (proxy doesn't support SSL),
False if it might be a cert or other error where fallback makes sense.
"""
if not reason:
return False
reason_lower = reason.lower()
for pattern in SSL_PROTOCOL_ERROR_PATTERNS:
if pattern in reason_lower:
return True
return False
# Levels that go to stderr
STDERR_LEVELS = ('warn', 'error')
@@ -68,6 +112,7 @@ def _log(msg, level='info'):
output = sys.stderr if level in STDERR_LEVELS else sys.stdout
print >> output, '\r%s/%s\t%s' % (timestamp(), level, msg)
output.flush() # Force flush for container logs
def timestamp():
@@ -139,3 +184,15 @@ def categorize_error(exc):
return FAIL_PROXY
return FAIL_OTHER
def tor_proxy_url(torhost):
"""Generate Tor SOCKS5 proxy URL with random credentials for circuit isolation.
Tor treats different username:password as separate streams, using different
circuits. This ensures each connection gets a fresh circuit.
"""
chars = string.ascii_lowercase + string.digits
user = ''.join(random.choice(chars) for _ in range(8))
passwd = ''.join(random.choice(chars) for _ in range(8))
return 'socks5://%s:%s@%s' % (user, passwd, torhost)