misc: add SSL protocol error detection and diag logging
This commit is contained in:
57
misc.py
57
misc.py
@@ -4,6 +4,8 @@
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
import sys
|
import sys
|
||||||
|
import random
|
||||||
|
import string
|
||||||
|
|
||||||
# Log levels: lower number = more verbose
|
# Log levels: lower number = more verbose
|
||||||
LOG_LEVELS = {
|
LOG_LEVELS = {
|
||||||
@@ -12,6 +14,7 @@ LOG_LEVELS = {
|
|||||||
'rate': 1, # rate limiting info, same as info
|
'rate': 1, # rate limiting info, same as info
|
||||||
'scraper': 1, # scraper info, same as info
|
'scraper': 1, # scraper info, same as info
|
||||||
'stats': 1, # statistics, same as info
|
'stats': 1, # statistics, same as info
|
||||||
|
'diag': 1, # diagnostic sampling, same as info
|
||||||
'warn': 2,
|
'warn': 2,
|
||||||
'error': 3,
|
'error': 3,
|
||||||
'none': 99, # suppress all
|
'none': 99, # suppress all
|
||||||
@@ -34,6 +37,47 @@ SSL_ERRORS = frozenset({FAIL_SSL})
|
|||||||
# Connection errors - proxy might be dead, need secondary verification
|
# Connection errors - proxy might be dead, need secondary verification
|
||||||
CONN_ERRORS = frozenset({FAIL_TIMEOUT, FAIL_REFUSED, FAIL_UNREACHABLE, FAIL_CLOSED, FAIL_DNS})
|
CONN_ERRORS = frozenset({FAIL_TIMEOUT, FAIL_REFUSED, FAIL_UNREACHABLE, FAIL_CLOSED, FAIL_DNS})
|
||||||
|
|
||||||
|
# SSL protocol errors - proxy doesn't support SSL, no fallback needed
|
||||||
|
# These indicate protocol mismatch, not certificate issues
|
||||||
|
SSL_PROTOCOL_ERROR_PATTERNS = (
|
||||||
|
'wrong version number',
|
||||||
|
'unsupported protocol',
|
||||||
|
'no protocols available',
|
||||||
|
'protocol is shutdown',
|
||||||
|
'unexpected eof',
|
||||||
|
'eof occurred',
|
||||||
|
'alert protocol version',
|
||||||
|
'alert handshake failure',
|
||||||
|
'http request', # Sent HTTP to HTTPS port
|
||||||
|
'wrong ssl version',
|
||||||
|
'no ciphers available',
|
||||||
|
'unknown protocol',
|
||||||
|
'record layer failure',
|
||||||
|
'bad record mac',
|
||||||
|
'decryption failed',
|
||||||
|
'packet length too long',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def is_ssl_protocol_error(reason):
|
||||||
|
"""Check if SSL error reason indicates protocol incompatibility.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
reason: SSL error reason string (from failedproxy)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if this is a protocol error (proxy doesn't support SSL),
|
||||||
|
False if it might be a cert or other error where fallback makes sense.
|
||||||
|
"""
|
||||||
|
if not reason:
|
||||||
|
return False
|
||||||
|
reason_lower = reason.lower()
|
||||||
|
for pattern in SSL_PROTOCOL_ERROR_PATTERNS:
|
||||||
|
if pattern in reason_lower:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
# Levels that go to stderr
|
# Levels that go to stderr
|
||||||
STDERR_LEVELS = ('warn', 'error')
|
STDERR_LEVELS = ('warn', 'error')
|
||||||
|
|
||||||
@@ -68,6 +112,7 @@ def _log(msg, level='info'):
|
|||||||
|
|
||||||
output = sys.stderr if level in STDERR_LEVELS else sys.stdout
|
output = sys.stderr if level in STDERR_LEVELS else sys.stdout
|
||||||
print >> output, '\r%s/%s\t%s' % (timestamp(), level, msg)
|
print >> output, '\r%s/%s\t%s' % (timestamp(), level, msg)
|
||||||
|
output.flush() # Force flush for container logs
|
||||||
|
|
||||||
|
|
||||||
def timestamp():
|
def timestamp():
|
||||||
@@ -139,3 +184,15 @@ def categorize_error(exc):
|
|||||||
return FAIL_PROXY
|
return FAIL_PROXY
|
||||||
|
|
||||||
return FAIL_OTHER
|
return FAIL_OTHER
|
||||||
|
|
||||||
|
|
||||||
|
def tor_proxy_url(torhost):
|
||||||
|
"""Generate Tor SOCKS5 proxy URL with random credentials for circuit isolation.
|
||||||
|
|
||||||
|
Tor treats different username:password as separate streams, using different
|
||||||
|
circuits. This ensures each connection gets a fresh circuit.
|
||||||
|
"""
|
||||||
|
chars = string.ascii_lowercase + string.digits
|
||||||
|
user = ''.join(random.choice(chars) for _ in range(8))
|
||||||
|
passwd = ''.join(random.choice(chars) for _ in range(8))
|
||||||
|
return 'socks5://%s:%s@%s' % (user, passwd, torhost)
|
||||||
|
|||||||
Reference in New Issue
Block a user