diff --git a/misc.py b/misc.py index 38e7e3e..2e55364 100644 --- a/misc.py +++ b/misc.py @@ -1,10 +1,135 @@ #!/usr/bin/env python2 +# -*- coding: utf-8 -*- +"""Logging and utility functions.""" + +import time +import sys + +# Log levels: lower number = more verbose +LOG_LEVELS = { + 'debug': 0, + 'info': 1, + 'rate': 1, # rate limiting info, same as info + 'scraper': 1, # scraper info, same as info + 'stats': 1, # statistics, same as info + 'warn': 2, + 'error': 3, + 'none': 99, # suppress all +} + +# Failure categories for proxy errors +FAIL_TIMEOUT = 'timeout' +FAIL_REFUSED = 'refused' +FAIL_AUTH = 'auth' +FAIL_UNREACHABLE = 'unreachable' +FAIL_DNS = 'dns' +FAIL_SSL = 'ssl' +FAIL_CLOSED = 'closed' +FAIL_PROXY = 'proxy' +FAIL_OTHER = 'other' + +# Levels that go to stderr +STDERR_LEVELS = ('warn', 'error') + +# Default log level (info) +_log_level = 1 + + +def set_log_level(level): + """Set minimum log level. Messages below this level are suppressed.""" + global _log_level + if isinstance(level, int): + _log_level = level + else: + _log_level = LOG_LEVELS.get(level, 1) + + +def get_log_level(): + """Get current log level.""" + return _log_level + + +def _log(msg, level='info'): + """Log a message if it meets the current log level threshold. + + Args: + msg: Message to log + level: Log level (debug, info, warn, error) + """ + msg_level = LOG_LEVELS.get(level, 1) + if msg_level < _log_level: + return + + output = sys.stderr if level in STDERR_LEVELS else sys.stdout + print >> output, '\r%s/%s\t%s' % (timestamp(), level, msg) -import time, sys def timestamp(): - """Return formatted timestamp.""" - return time.strftime('%H:%M:%S', time.gmtime()) + """Return formatted timestamp.""" + return time.strftime('%H:%M:%S', time.gmtime()) -def _log(strng, level='info'): - print '\r%s/%s\t%s' % (timestamp(), level, strng) + +def categorize_error(exc): + """Categorize a RocksockException into failure type. + + Args: + exc: RocksockException instance + + Returns: + One of FAIL_* constants + """ + # Import here to avoid circular dependency + import rocksock + + if not hasattr(exc, 'get_error'): + return FAIL_OTHER + + error = exc.get_error() + errortype = exc.get_errortype() + + # DNS resolution failures + if errortype == rocksock.RS_ET_GAI: + return FAIL_DNS + + # SSL errors + if errortype == rocksock.RS_ET_SSL: + return FAIL_SSL + if error in (rocksock.RS_E_SSL_GENERIC, rocksock.RS_E_SSL_CERTIFICATE_ERROR): + return FAIL_SSL + + # Timeout errors + if error in (rocksock.RS_E_HIT_TIMEOUT, rocksock.RS_E_HIT_READTIMEOUT, + rocksock.RS_E_HIT_WRITETIMEOUT, rocksock.RS_E_HIT_CONNECTTIMEOUT): + return FAIL_TIMEOUT + + # Connection refused + if error in (rocksock.RS_E_TARGET_CONN_REFUSED, + rocksock.RS_E_TARGETPROXY_CONN_REFUSED): + return FAIL_REFUSED + + # Authentication failures + if error in (rocksock.RS_E_PROXY_AUTH_FAILED, rocksock.RS_E_SOCKS4_NOAUTH): + return FAIL_AUTH + + # Unreachable + if error in (rocksock.RS_E_TARGET_NET_UNREACHABLE, + rocksock.RS_E_TARGETPROXY_NET_UNREACHABLE, + rocksock.RS_E_TARGET_HOST_UNREACHABLE, + rocksock.RS_E_TARGETPROXY_HOST_UNREACHABLE, + rocksock.RS_E_TARGET_TTL_EXPIRED, + rocksock.RS_E_TARGETPROXY_TTL_EXPIRED): + return FAIL_UNREACHABLE + + # Connection closed + if error == rocksock.RS_E_REMOTE_DISCONNECTED: + return FAIL_CLOSED + + # Proxy-specific errors + if error in (rocksock.RS_E_PROXY_UNEXPECTED_RESPONSE, + rocksock.RS_E_TARGETPROXY_CONNECT_FAILED, + rocksock.RS_E_PROXY_GENERAL_FAILURE, + rocksock.RS_E_PROXY_COMMAND_NOT_SUPPORTED, + rocksock.RS_E_PROXY_ADDRESSTYPE_NOT_SUPPORTED): + return FAIL_PROXY + + return FAIL_OTHER