misc: add SSL protocol error detection and diag logging
This commit is contained in:
57
misc.py
57
misc.py
@@ -4,6 +4,8 @@
|
||||
|
||||
import time
|
||||
import sys
|
||||
import random
|
||||
import string
|
||||
|
||||
# Log levels: lower number = more verbose
|
||||
LOG_LEVELS = {
|
||||
@@ -12,6 +14,7 @@ LOG_LEVELS = {
|
||||
'rate': 1, # rate limiting info, same as info
|
||||
'scraper': 1, # scraper info, same as info
|
||||
'stats': 1, # statistics, same as info
|
||||
'diag': 1, # diagnostic sampling, same as info
|
||||
'warn': 2,
|
||||
'error': 3,
|
||||
'none': 99, # suppress all
|
||||
@@ -34,6 +37,47 @@ SSL_ERRORS = frozenset({FAIL_SSL})
|
||||
# Connection errors - proxy might be dead, need secondary verification
|
||||
CONN_ERRORS = frozenset({FAIL_TIMEOUT, FAIL_REFUSED, FAIL_UNREACHABLE, FAIL_CLOSED, FAIL_DNS})
|
||||
|
||||
# SSL protocol errors - proxy doesn't support SSL, no fallback needed
|
||||
# These indicate protocol mismatch, not certificate issues
|
||||
SSL_PROTOCOL_ERROR_PATTERNS = (
|
||||
'wrong version number',
|
||||
'unsupported protocol',
|
||||
'no protocols available',
|
||||
'protocol is shutdown',
|
||||
'unexpected eof',
|
||||
'eof occurred',
|
||||
'alert protocol version',
|
||||
'alert handshake failure',
|
||||
'http request', # Sent HTTP to HTTPS port
|
||||
'wrong ssl version',
|
||||
'no ciphers available',
|
||||
'unknown protocol',
|
||||
'record layer failure',
|
||||
'bad record mac',
|
||||
'decryption failed',
|
||||
'packet length too long',
|
||||
)
|
||||
|
||||
|
||||
def is_ssl_protocol_error(reason):
|
||||
"""Check if SSL error reason indicates protocol incompatibility.
|
||||
|
||||
Args:
|
||||
reason: SSL error reason string (from failedproxy)
|
||||
|
||||
Returns:
|
||||
True if this is a protocol error (proxy doesn't support SSL),
|
||||
False if it might be a cert or other error where fallback makes sense.
|
||||
"""
|
||||
if not reason:
|
||||
return False
|
||||
reason_lower = reason.lower()
|
||||
for pattern in SSL_PROTOCOL_ERROR_PATTERNS:
|
||||
if pattern in reason_lower:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# Levels that go to stderr
|
||||
STDERR_LEVELS = ('warn', 'error')
|
||||
|
||||
@@ -68,6 +112,7 @@ def _log(msg, level='info'):
|
||||
|
||||
output = sys.stderr if level in STDERR_LEVELS else sys.stdout
|
||||
print >> output, '\r%s/%s\t%s' % (timestamp(), level, msg)
|
||||
output.flush() # Force flush for container logs
|
||||
|
||||
|
||||
def timestamp():
|
||||
@@ -139,3 +184,15 @@ def categorize_error(exc):
|
||||
return FAIL_PROXY
|
||||
|
||||
return FAIL_OTHER
|
||||
|
||||
|
||||
def tor_proxy_url(torhost):
|
||||
"""Generate Tor SOCKS5 proxy URL with random credentials for circuit isolation.
|
||||
|
||||
Tor treats different username:password as separate streams, using different
|
||||
circuits. This ensures each connection gets a fresh circuit.
|
||||
"""
|
||||
chars = string.ascii_lowercase + string.digits
|
||||
user = ''.join(random.choice(chars) for _ in range(8))
|
||||
passwd = ''.join(random.choice(chars) for _ in range(8))
|
||||
return 'socks5://%s:%s@%s' % (user, passwd, torhost)
|
||||
|
||||
Reference in New Issue
Block a user