449 lines
11 KiB
Python
449 lines
11 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
from rocksock import Rocksock, RocksockException
|
|
import rocksock
|
|
import urllib, zlib
|
|
import ssl, socket
|
|
import time, sys
|
|
|
|
def _parse_errorcode(line):
|
|
r = line.find(' ')
|
|
if r == -1:
|
|
return line, -1, ''
|
|
ver = line[:r]
|
|
rest = line[r+1:]
|
|
r = rest.find(' ')
|
|
if r == -1:
|
|
msg = ''
|
|
err = int(rest)
|
|
else:
|
|
msg = rest[r+1:]
|
|
err = int(rest[:r])
|
|
return ver, err, msg
|
|
|
|
def _parse_url(url):
|
|
host = ''
|
|
url_l = url.lower()
|
|
if url_l.startswith('https://'):
|
|
ssl = True
|
|
url = url[8:]
|
|
port = 443
|
|
elif url_l.startswith('http://'):
|
|
ssl = False
|
|
url = url[7:]
|
|
port = 80
|
|
elif url_l.startswith('//'):
|
|
# can happen with a redirect
|
|
ssl = False
|
|
url = url[2:]
|
|
port = -1
|
|
elif url_l.startswith('/'):
|
|
# can happen with a redirect
|
|
url = url[1:]
|
|
port = 0
|
|
else:
|
|
raise ValueError("invalid URL scheme: %s" % url[:50])
|
|
|
|
if not '/' in url: url = url + '/'
|
|
|
|
if port == 0:
|
|
return "", 0, False, url
|
|
|
|
port_index = -1
|
|
fixed_amazon_redirect = False
|
|
for i in range(len(url)):
|
|
if url[i] == '?':
|
|
if not fixed_amazon_redirect:
|
|
url = url.replace('?','/?',True)
|
|
fixed_amazon_redirect = True
|
|
if url[i] == ':':
|
|
host = url[:i]
|
|
port_index = i+1
|
|
elif url[i] == '/':
|
|
if port_index >= 0:
|
|
port = int(url[port_index:i])
|
|
else:
|
|
host = url[:i]
|
|
url = url[i:]
|
|
break
|
|
return host, port, ssl, url
|
|
|
|
def _parse_content_type(line):
|
|
ct = ''
|
|
cs = ''
|
|
a = line.split(';')
|
|
for x in a:
|
|
if x.lower().startswith('charset='):
|
|
cs = x[len('charset='):]
|
|
else:
|
|
ct = x
|
|
return ct, cs
|
|
|
|
TEXTUAL_CONTENT_TYPES_LIST = ['text/html', 'text/plain']
|
|
def _is_textual_content_type(ct):
|
|
ct = ct.lower()
|
|
return ct in TEXTUAL_CONTENT_TYPES_LIST
|
|
|
|
class RsHttp():
|
|
def __init__(self, host, port=80, ssl=False, follow_redirects=False, \
|
|
auto_set_cookies=False, keep_alive=False, timeout=60, \
|
|
user_agent=None, proxies=None, max_tries=10, log_errors=True, \
|
|
verify_cert=False,
|
|
**kwargs):
|
|
self.host = host
|
|
self.port = port
|
|
self.use_ssl = ssl
|
|
self.debugreq = False
|
|
self.follow_redirects = follow_redirects
|
|
self.redirect_counter = 0
|
|
self.auto_set_cookies = auto_set_cookies
|
|
self.keep_alive = keep_alive
|
|
self.timeout = timeout
|
|
self.user_agent = user_agent if user_agent else 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
|
|
self.proxies = proxies
|
|
self.cookies = dict()
|
|
self.max_tries = max_tries
|
|
self.log_errors = log_errors
|
|
self.last_rs_exception = None
|
|
self.verify_cert=verify_cert
|
|
self.headers = []
|
|
|
|
def get_last_rocksock_exception(self):
|
|
return self.last_rs_exception
|
|
|
|
def disconnect(self):
|
|
"""Safely close the underlying connection."""
|
|
if hasattr(self, 'conn') and self.conn:
|
|
try:
|
|
self.conn.disconnect()
|
|
except:
|
|
pass
|
|
self.conn = None
|
|
|
|
def _err_log(self, s):
|
|
if self.log_errors:
|
|
sys.stderr.write(s + '\n')
|
|
|
|
def connect(self):
|
|
return self.reconnect()
|
|
|
|
def _key_match(self, want, got):
|
|
return want.lower() == got.lower()
|
|
|
|
def _make_request(self, typ, url, extras=None):
|
|
extras = extras if extras else []
|
|
s = typ + ' '+ url +' HTTP/1.1\r\n'
|
|
if self.port != 80 and self.port != 443:
|
|
s += 'Host: %s:%d\r\n'%(self.host,self.port)
|
|
else:
|
|
s += 'Host: %s\r\n'%(self.host)
|
|
if self.keep_alive:
|
|
s += 'Connection: keep-alive\r\n'
|
|
else:
|
|
s += 'Connection: close\r\n'
|
|
s += 'Accept: */*\r\n'
|
|
s += 'Accept-Encoding: gzip, deflate\r\n'
|
|
s += 'User-Agent: %s\r\n'%self.user_agent
|
|
s += 'DNT: 1\r\n'
|
|
for i in self.headers:
|
|
s += i + '\r\n'
|
|
|
|
cs = ''
|
|
for c in self.cookies:
|
|
if cs != '':
|
|
cs += '; '
|
|
if self.cookies[c] != '':
|
|
cs += c + '=' + self.cookies[c]
|
|
else:
|
|
cs += c
|
|
if cs != '':
|
|
s += 'Cookie: ' + cs + '\r\n'
|
|
postdata = ''
|
|
for i in extras:
|
|
if i.startswith('p0$tD4ta:'):
|
|
postdata = i[9:]
|
|
else:
|
|
s += i + '\r\n'
|
|
s += '\r\n'
|
|
if postdata != '':
|
|
s += postdata
|
|
if self.debugreq:
|
|
print(">>>\n", s)
|
|
return s
|
|
|
|
def _make_head_request(self, url, extras=None):
|
|
return self._make_request('HEAD', url, extras)
|
|
|
|
def _make_get_request(self, url, extras=None):
|
|
return self._make_request('GET', url, extras)
|
|
|
|
def _make_post_request_raw(self, url, data, extras=None):
|
|
x = extras if extras else []
|
|
x.append('Content-Type: application/x-www-form-urlencoded')
|
|
x.append('Content-Length: ' + str(len(data)))
|
|
x.append('p0$tD4ta:' + data)
|
|
return self._make_request('POST', url, x)
|
|
|
|
def _make_post_request(self, url, values, extras=None):
|
|
data = urllib.urlencode(values)
|
|
return self._make_post_request_raw(url, data, extras)
|
|
|
|
def _try_gunzip(self, data):
|
|
try:
|
|
res = zlib.decompress(data, 16+zlib.MAX_WBITS)
|
|
return 0, res
|
|
except zlib.error as e:
|
|
if 'incomplete' in e.message:
|
|
return -1, ''
|
|
return -2, ''
|
|
|
|
def _get_response(self):
|
|
def parse_header_fields(line):
|
|
if not ':' in line: return line.rstrip(' '), ""
|
|
if not ': ' in line: return line.split(':', 1)
|
|
return line.split(': ', 1)
|
|
|
|
chunked = False
|
|
unzip = ''
|
|
redirect = ''
|
|
charset = ''
|
|
# some sites don't set content-length, -1 will cause to fetch as much as possible
|
|
q = -1
|
|
s = ''
|
|
res = ''
|
|
#'HTTP/1.1 302 Found\r\n'
|
|
l = ''
|
|
while not l.startswith('HTTP/'):
|
|
l = self.conn.recvline().strip()
|
|
s = l + '\n'
|
|
foo, code, msg = _parse_errorcode(l)
|
|
while True:
|
|
l = self.conn.recvline().strip()
|
|
s += l + '\n'
|
|
if l == '': break
|
|
key, val = parse_header_fields(l)
|
|
if self._key_match(key, 'Transfer-Encoding') and 'chunked' in val:
|
|
chunked = True
|
|
elif self._key_match(key, 'Set-Cookie') and self.auto_set_cookies:
|
|
self.set_cookie(l)
|
|
elif self._key_match(key, 'Location'):
|
|
redirect = val
|
|
elif self._key_match(key, 'Content-Type'):
|
|
ct, cs = _parse_content_type(val)
|
|
if cs.lower() == 'utf-8':
|
|
if _is_textual_content_type(ct):
|
|
charset = 'utf-8'
|
|
elif self._key_match(key, 'Content-Encoding'):
|
|
if val == 'gzip':
|
|
unzip = 'gzip'
|
|
elif val == 'deflate':
|
|
unzip = 'deflate'
|
|
elif self._key_match(key, 'Content-Length'):
|
|
q = int(val)
|
|
|
|
if q == -1 and code >= 400 and code < 600:
|
|
return (s, res, redirect)
|
|
|
|
if not chunked:
|
|
res = self.conn.recv(q)
|
|
else:
|
|
while True:
|
|
l = self.conn.recvline().strip().split(';', 1)
|
|
if(l[0]) == '': break
|
|
q = int(l[0], 16)
|
|
data = self.conn.recv(q)
|
|
assert(len(data) == q)
|
|
res += data
|
|
crlf = self.conn.recv(2)
|
|
assert(crlf == '\r\n')
|
|
if q == 0: break
|
|
|
|
if len(res) != 0:
|
|
if unzip == 'gzip':
|
|
ec, extr = self._try_gunzip(res)
|
|
while ec == -1:
|
|
res += self.conn.recv(-1)
|
|
ec, extr = self._try_gunzip(res)
|
|
if ec == -2:
|
|
raise zlib.error
|
|
res = extr
|
|
elif unzip == 'deflate':
|
|
try:
|
|
res = zlib.decompress(res)
|
|
except zlib.error:
|
|
res = zlib.decompress(res, -zlib.MAX_WBITS)
|
|
|
|
if charset != '':
|
|
res = res.decode(charset)
|
|
|
|
if self.debugreq:
|
|
print("<<<\n", s, res)
|
|
|
|
return (s, res, redirect)
|
|
|
|
def reconnect(self):
|
|
tries = 0
|
|
while tries < self.max_tries:
|
|
tries += 1
|
|
try:
|
|
self.conn = Rocksock(host=self.host, port=self.port, proxies=self.proxies, ssl=self.use_ssl, timeout=self.timeout, verifycert=self.verify_cert)
|
|
self.conn.connect()
|
|
return True
|
|
except RocksockException as e:
|
|
self.last_rs_exception = e
|
|
if e.errortype == rocksock.RS_ET_GAI and e.error==-2:
|
|
# -2: Name does not resolve
|
|
self.conn.disconnect()
|
|
self.conn = None
|
|
return False
|
|
self._err_log(e.get_errormessage())
|
|
time.sleep(0.05)
|
|
continue
|
|
except socket.gaierror:
|
|
self._err_log("gaie")
|
|
time.sleep(0.05)
|
|
continue
|
|
except ssl.SSLError as e:
|
|
self._err_log("ssle" + e.reason)
|
|
time.sleep(0.05)
|
|
continue
|
|
return False
|
|
|
|
def _send_and_recv_i(self, req):
|
|
if self._send_raw(req):
|
|
return self._get_response()
|
|
else: return "", "", ""
|
|
|
|
def _send_and_recv(self, req):
|
|
tries = 0
|
|
while tries < self.max_tries:
|
|
tries += 1
|
|
a = self._catch(self._send_and_recv_i, None, req)
|
|
if a is not None: return a
|
|
return "", "", ""
|
|
|
|
def _catch(self, func, failret, *args):
|
|
try:
|
|
return func(*args)
|
|
except RocksockException as e:
|
|
self.last_rs_exception = e
|
|
self.conn.disconnect()
|
|
if not self.reconnect(): return failret
|
|
except IOError:
|
|
self.conn.disconnect()
|
|
if not self.reconnect(): return failret
|
|
except EOFError:
|
|
self.conn.disconnect()
|
|
if not self.reconnect(): return failret
|
|
except ssl.SSLError:
|
|
self.conn.disconnect()
|
|
if not self.reconnect(): return failret
|
|
|
|
|
|
def _send_raw(self, req):
|
|
if self.conn is None:
|
|
if not self.reconnect(): return False
|
|
res = self.conn.send(req)
|
|
if res is not False: return True
|
|
return False
|
|
|
|
|
|
def get(self, url, extras=None):
|
|
req = self._make_get_request(url, extras)
|
|
hdr, res, redirect = self._send_and_recv(req)
|
|
|
|
if redirect != '' and self.follow_redirects:
|
|
MAX_REDIRECTS = 16
|
|
self.redirect_counter += 1
|
|
if self.redirect_counter > MAX_REDIRECTS:
|
|
return '', ''
|
|
|
|
host, port, use_ssl, url = _parse_url(redirect)
|
|
if port != 0:
|
|
self.host = host
|
|
if port != -1: # -1: use existing port/ssl
|
|
self.port = port
|
|
self.use_ssl = use_ssl
|
|
self.conn.disconnect()
|
|
self.conn = None
|
|
self.reconnect()
|
|
return self.get(url, extras)
|
|
else:
|
|
self.redirect_counter = 0
|
|
|
|
return hdr, res
|
|
|
|
def _head_i(self, url, extras=None):
|
|
req = self._make_head_request(url, extras)
|
|
if not self._send_raw(req): return ""
|
|
s = ''
|
|
res = ''
|
|
#'HTTP/1.1 302 Found\r\n'
|
|
l = self.conn.recvline().strip()
|
|
s = l + '\n'
|
|
foo, code, msg = _parse_errorcode(l)
|
|
while True:
|
|
l = self.conn.recvline().strip()
|
|
s += l + '\n'
|
|
if l == '': break
|
|
if self.debugreq: print("<<<\n", s)
|
|
return s
|
|
|
|
def head(self, url, extras=None):
|
|
tries = 0
|
|
while tries < self.max_tries:
|
|
tries += 1
|
|
res = self._catch(self._head_i, None, url, extras)
|
|
if res is not None: return res
|
|
return ""
|
|
|
|
def post_raw(self, url, data, extras=None):
|
|
req = self._make_post_request_raw(url, data, extras)
|
|
hdr, res, redirect = self._send_and_recv(req)
|
|
return hdr, res
|
|
|
|
def post(self, url, values, extras=None):
|
|
req = self._make_post_request(url, values, extras)
|
|
hdr, res, redirect = self._send_and_recv(req)
|
|
return hdr, res
|
|
|
|
def xhr_get(self, url):
|
|
return self.get(url, ['X-Requested-With: XMLHttpRequest'])
|
|
|
|
def xhr_post(self, url, values={}):
|
|
return self.post(url, values, ['X-Requested-With: XMLHttpRequest'])
|
|
|
|
def add_header(self, s):
|
|
# copy a header verbatim into each request, example:
|
|
# http.add_header("Referer: http://bbc.com")
|
|
self.headers.append(s)
|
|
|
|
def add_headers(self, lines):
|
|
# copy a multi-line header chunk verbatim into each request:
|
|
for line in lines.split('\n'):
|
|
line = line.rstrip('\r')
|
|
if len(line): self.headers.append(line)
|
|
|
|
def set_cookie(self, c):
|
|
if c.lower().startswith('set-cookie: '):
|
|
c = c[len('Set-Cookie: '):]
|
|
j = c.find(';')
|
|
if j == -1: j = len(c)
|
|
c = c[:j]
|
|
i = c.find('=')
|
|
if i == -1: i = len(c)
|
|
s = c[i+1:]
|
|
self.cookies[c[:i]] = s
|
|
|
|
|
|
if __name__ == '__main__':
|
|
url = 'https://www.openssl.org/news/secadv/20170126.txt'
|
|
host, port, use_ssl, uri = _parse_url(url)
|
|
http = RsHttp(host=host, port=port, timeout=15, ssl=use_ssl, follow_redirects=True, auto_set_cookies=True)
|
|
http.debugreq = True
|
|
if not http.connect():
|
|
print("sorry, couldn't connect")
|
|
else:
|
|
hdr = http.head(uri)
|
|
hdr, res = http.get(uri)
|