Files
ppf/http2.py
2021-07-03 18:47:34 +02:00

440 lines
11 KiB
Python

# -*- coding: utf-8 -*-
from rocksock import Rocksock, RocksockException
import rocksock
import urllib, zlib
import ssl, socket
import time, sys
def _parse_errorcode(line):
r = line.find(' ')
if r == -1:
return line, -1, ''
ver = line[:r]
rest = line[r+1:]
r = rest.find(' ')
if r == -1:
msg = ''
err = int(rest)
else:
msg = rest[r+1:]
err = int(rest[:r])
return ver, err, msg
def _parse_url(url):
host = ''
url_l = url.lower()
if url_l.startswith('https://'):
ssl = True
url = url[8:]
port = 443
elif url_l.startswith('http://'):
ssl = False
url = url[7:]
port = 80
elif url_l.startswith('//'):
# can happen with a redirect
ssl = False
url = url[2:]
port = -1
elif url_l.startswith('/'):
# can happen with a redirect
url = url[1:]
port = 0
else:
raise
if not '/' in url: url = url + '/'
if port == 0:
return "", 0, False, url
port_index = -1
fixed_amazon_redirect = False
for i in range(len(url)):
if url[i] == '?':
if not fixed_amazon_redirect:
url = url.replace('?','/?',True)
fixed_amazon_redirect = True
if url[i] == ':':
host = url[:i]
port_index = i+1
elif url[i] == '/':
if port_index >= 0:
port = int(url[port_index:i])
else:
host = url[:i]
url = url[i:]
break
return host, port, ssl, url
def _parse_content_type(line):
ct = ''
cs = ''
a = line.split(';')
for x in a:
if x.lower().startswith('charset='):
cs = x[len('charset='):]
else:
ct = x
return ct, cs
TEXTUAL_CONTENT_TYPES_LIST = ['text/html', 'text/plain']
def _is_textual_content_type(ct):
ct = ct.lower()
return ct in TEXTUAL_CONTENT_TYPES_LIST
class RsHttp():
def __init__(self, host, port=80, ssl=False, follow_redirects=False, \
auto_set_cookies=False, keep_alive=False, timeout=60, \
user_agent=None, proxies=None, max_tries=10, log_errors=True, \
verify_cert=False,
**kwargs):
self.host = host
self.port = port
self.use_ssl = ssl
self.debugreq = False
self.follow_redirects = follow_redirects
self.redirect_counter = 0
self.auto_set_cookies = auto_set_cookies
self.keep_alive = keep_alive
self.timeout = timeout
self.user_agent = user_agent if user_agent else 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
self.proxies = proxies
self.cookies = dict()
self.max_tries = max_tries
self.log_errors = log_errors
self.last_rs_exception = None
self.verify_cert=verify_cert
self.headers = []
def get_last_rocksock_exception(self):
return self.last_rs_exception
def _err_log(self, s):
if self.log_errors:
sys.stderr.write(s + '\n')
def connect(self):
return self.reconnect()
def _key_match(self, want, got):
return want.lower() == got.lower()
def _make_request(self, typ, url, extras=None):
extras = extras if extras else []
s = typ + ' '+ url +' HTTP/1.1\r\n'
if self.port != 80 and self.port != 443:
s += 'Host: %s:%d\r\n'%(self.host,self.port)
else:
s += 'Host: %s\r\n'%(self.host)
if self.keep_alive:
s += 'Connection: keep-alive\r\n'
else:
s += 'Connection: close\r\n'
s += 'Accept: */*\r\n'
s += 'Accept-Encoding: gzip, deflate\r\n'
s += 'User-Agent: %s\r\n'%self.user_agent
s += 'DNT: 1\r\n'
for i in self.headers:
s += i + '\r\n'
cs = ''
for c in self.cookies:
if cs != '':
cs += '; '
if self.cookies[c] != '':
cs += c + '=' + self.cookies[c]
else:
cs += c
if cs != '':
s += 'Cookie: ' + cs + '\r\n'
postdata = ''
for i in extras:
if i.startswith('p0$tD4ta:'):
postdata = i[9:]
else:
s += i + '\r\n'
s += '\r\n'
if postdata != '':
s += postdata
if self.debugreq:
print ">>>\n", s
return s
def _make_head_request(self, url, extras=None):
return self._make_request('HEAD', url, extras)
def _make_get_request(self, url, extras=None):
return self._make_request('GET', url, extras)
def _make_post_request_raw(self, url, data, extras=None):
x = extras if extras else []
x.append('Content-Type: application/x-www-form-urlencoded')
x.append('Content-Length: ' + str(len(data)))
x.append('p0$tD4ta:' + data)
return self._make_request('POST', url, x)
def _make_post_request(self, url, values, extras=None):
data = urllib.urlencode(values)
return self._make_post_request_raw(url, data, extras)
def _try_gunzip(self, data):
try:
res = zlib.decompress(data, 16+zlib.MAX_WBITS)
return 0, res
except zlib.error as e:
if 'incomplete' in e.message:
return -1, ''
return -2, ''
def _get_response(self):
def parse_header_fields(line):
if not ':' in line: return line.rstrip(' '), ""
if not ': ' in line: return line.split(':', 1)
return line.split(': ', 1)
chunked = False
unzip = ''
redirect = ''
charset = ''
# some sites don't set content-length, -1 will cause to fetch as much as possible
q = -1
s = ''
res = ''
#'HTTP/1.1 302 Found\r\n'
l = ''
while not l.startswith('HTTP/'):
l = self.conn.recvline().strip()
s = l + '\n'
foo, code, msg = _parse_errorcode(l)
while True:
l = self.conn.recvline().strip()
s += l + '\n'
if l == '': break
key, val = parse_header_fields(l)
if self._key_match(key, 'Transfer-Encoding') and 'chunked' in val:
chunked = True
elif self._key_match(key, 'Set-Cookie') and self.auto_set_cookies:
self.set_cookie(l)
elif self._key_match(key, 'Location'):
redirect = val
elif self._key_match(key, 'Content-Type'):
ct, cs = _parse_content_type(val)
if cs.lower() == 'utf-8':
if _is_textual_content_type(ct):
charset = 'utf-8'
elif self._key_match(key, 'Content-Encoding'):
if val == 'gzip':
unzip = 'gzip'
elif val == 'deflate':
unzip = 'deflate'
elif self._key_match(key, 'Content-Length'):
q = int(val)
if q == -1 and code >= 400 and code < 600:
return (s, res, redirect)
if not chunked:
res = self.conn.recv(q)
else:
while True:
l = self.conn.recvline().strip().split(';', 1)
if(l[0]) == '': break
q = int(l[0], 16)
data = self.conn.recv(q)
assert(len(data) == q)
res += data
crlf = self.conn.recv(2)
assert(crlf == '\r\n')
if q == 0: break
if len(res) != 0:
if unzip == 'gzip':
ec, extr = self._try_gunzip(res)
while ec == -1:
res += self.conn.recv(-1)
ec, extr = self._try_gunzip(res)
if ec == -2:
raise zlib.error
res = extr
elif unzip == 'deflate':
try:
res = zlib.decompress(res)
except zlib.error:
res = zlib.decompress(res, -zlib.MAX_WBITS)
if charset != '':
res = res.decode(charset)
if self.debugreq:
print "<<<\n", s, res
return (s, res, redirect)
def reconnect(self):
tries = 0
while tries < self.max_tries:
tries += 1
try:
self.conn = Rocksock(host=self.host, port=self.port, proxies=self.proxies, ssl=self.use_ssl, timeout=self.timeout, verifycert=self.verify_cert)
self.conn.connect()
return True
except RocksockException as e:
self.last_rs_exception = e
if e.errortype == rocksock.RS_ET_GAI and e.error==-2:
# -2: Name does not resolve
self.conn.disconnect()
self.conn = None
return False
self._err_log(e.get_errormessage())
time.sleep(0.05)
continue
except socket.gaierror:
self._err_log("gaie")
time.sleep(0.05)
continue
except ssl.SSLError as e:
self._err_log("ssle" + e.reason)
time.sleep(0.05)
continue
return False
def _send_and_recv_i(self, req):
if self._send_raw(req):
return self._get_response()
else: return "", "", ""
def _send_and_recv(self, req):
tries = 0
while tries < self.max_tries:
tries += 1
a = self._catch(self._send_and_recv_i, None, req)
if a is not None: return a
return "", "", ""
def _catch(self, func, failret, *args):
try:
return func(*args)
except RocksockException as e:
self.last_rs_exception = e
self.conn.disconnect()
if not self.reconnect(): return failret
except IOError:
self.conn.disconnect()
if not self.reconnect(): return failret
except EOFError:
self.conn.disconnect()
if not self.reconnect(): return failret
except ssl.SSLError:
self.conn.disconnect()
if not self.reconnect(): return failret
def _send_raw(self, req):
if self.conn is None:
if not self.reconnect(): return False
res = self.conn.send(req)
if res is not False: return True
return False
def get(self, url, extras=None):
req = self._make_get_request(url, extras)
hdr, res, redirect = self._send_and_recv(req)
if redirect != '' and self.follow_redirects:
MAX_REDIRECTS = 16
self.redirect_counter += 1
if self.redirect_counter > MAX_REDIRECTS:
return '', ''
host, port, use_ssl, url = _parse_url(redirect)
if port != 0:
self.host = host
if port != -1: # -1: use existing port/ssl
self.port = port
self.use_ssl = use_ssl
self.conn.disconnect()
self.conn = None
self.reconnect()
return self.get(url, extras)
else:
self.redirect_counter = 0
return hdr, res
def _head_i(self, url, extras=None):
req = self._make_head_request(url, extras)
if not self._send_raw(req): return ""
s = ''
res = ''
#'HTTP/1.1 302 Found\r\n'
l = self.conn.recvline().strip()
s = l + '\n'
foo, code, msg = _parse_errorcode(l)
while True:
l = self.conn.recvline().strip()
s += l + '\n'
if l == '': break
if self.debugreq: print "<<<\n", s
return s
def head(self, url, extras=None):
tries = 0
while tries < self.max_tries:
tries += 1
res = self._catch(self._head_i, None, url, extras)
if res is not None: return res
return ""
def post_raw(self, url, data, extras=None):
req = self._make_post_request_raw(url, data, extras)
hdr, res, redirect = self._send_and_recv(req)
return hdr, res
def post(self, url, values, extras=None):
req = self._make_post_request(url, values, extras)
hdr, res, redirect = self._send_and_recv(req)
return hdr, res
def xhr_get(self, url):
return self.get(url, ['X-Requested-With: XMLHttpRequest'])
def xhr_post(self, url, values={}):
return self.post(url, values, ['X-Requested-With: XMLHttpRequest'])
def add_header(self, s):
# copy a header verbatim into each request, example:
# http.add_header("Referer: http://bbc.com")
self.headers.append(s)
def add_headers(self, lines):
# copy a multi-line header chunk verbatim into each request:
for line in lines.split('\n'):
line = line.rstrip('\r')
if len(line): self.headers.append(line)
def set_cookie(self, c):
if c.lower().startswith('set-cookie: '):
c = c[len('Set-Cookie: '):]
j = c.find(';')
if j == -1: j = len(c)
c = c[:j]
i = c.find('=')
if i == -1: i = len(c)
s = c[i+1:]
self.cookies[c[:i]] = s
if __name__ == '__main__':
url = 'https://www.openssl.org/news/secadv/20170126.txt'
host, port, use_ssl, uri = _parse_url(url)
http = RsHttp(host=host, port=port, timeout=15, ssl=use_ssl, follow_redirects=True, auto_set_cookies=True)
http.debugreq = True
if not http.connect():
print "sorry, couldn't connect"
else:
hdr = http.head(uri)
hdr, res = http.get(uri)