update from twatscrape

This commit is contained in:
Your Name
2021-07-03 17:17:18 +02:00
parent 11c5bd67b3
commit 37622d8a17
2 changed files with 126 additions and 58 deletions

View File

@@ -32,6 +32,11 @@ def _parse_url(url):
ssl = False
url = url[7:]
port = 80
elif url_l.startswith('//'):
# can happen with a redirect
ssl = False
url = url[2:]
port = -1
elif url_l.startswith('/'):
# can happen with a redirect
url = url[1:]
@@ -45,7 +50,12 @@ def _parse_url(url):
return "", 0, False, url
port_index = -1
fixed_amazon_redirect = False
for i in range(len(url)):
if url[i] == '?':
if not fixed_amazon_redirect:
url = url.replace('?','/?',True)
fixed_amazon_redirect = True
if url[i] == ':':
host = url[:i]
port_index = i+1
@@ -78,12 +88,14 @@ class RsHttp():
def __init__(self, host, port=80, ssl=False, follow_redirects=False, \
auto_set_cookies=False, keep_alive=False, timeout=60, \
user_agent=None, proxies=None, max_tries=10, log_errors=True, \
verify_cert=False,
**kwargs):
self.host = host
self.port = port
self.use_ssl = ssl
self.debugreq = False
self.follow_redirects = follow_redirects
self.redirect_counter = 0
self.auto_set_cookies = auto_set_cookies
self.keep_alive = keep_alive
self.timeout = timeout
@@ -93,6 +105,7 @@ class RsHttp():
self.max_tries = max_tries
self.log_errors = log_errors
self.last_rs_exception = None
self.verify_cert=verify_cert
self.headers = []
def get_last_rocksock_exception(self):
@@ -166,6 +179,15 @@ class RsHttp():
data = urllib.urlencode(values)
return self._make_post_request_raw(url, data, extras)
def _try_gunzip(self, data):
try:
res = zlib.decompress(data, 16+zlib.MAX_WBITS)
return 0, res
except zlib.error as e:
if 'incomplete' in e.message:
return -1, ''
return -2, ''
def _get_response(self):
def parse_header_fields(line):
if not ':' in line: return line.rstrip(' '), ""
@@ -181,6 +203,8 @@ class RsHttp():
s = ''
res = ''
#'HTTP/1.1 302 Found\r\n'
l = ''
while not l.startswith('HTTP/'):
l = self.conn.recvline().strip()
s = l + '\n'
foo, code, msg = _parse_errorcode(l)
@@ -227,7 +251,13 @@ class RsHttp():
if len(res) != 0:
if unzip == 'gzip':
res = zlib.decompress(res, 16+zlib.MAX_WBITS)
ec, extr = self._try_gunzip(res)
while ec == -1:
res += self.conn.recv(-1)
ec, extr = self._try_gunzip(res)
if ec == -2:
raise zlib.error
res = extr
elif unzip == 'deflate':
try:
res = zlib.decompress(res)
@@ -247,7 +277,7 @@ class RsHttp():
while tries < self.max_tries:
tries += 1
try:
self.conn = Rocksock(host=self.host, port=self.port, proxies=self.proxies, ssl=self.use_ssl, timeout=self.timeout)
self.conn = Rocksock(host=self.host, port=self.port, proxies=self.proxies, ssl=self.use_ssl, timeout=self.timeout, verifycert=self.verify_cert)
self.conn.connect()
return True
except RocksockException as e:
@@ -314,15 +344,23 @@ class RsHttp():
hdr, res, redirect = self._send_and_recv(req)
if redirect != '' and self.follow_redirects:
MAX_REDIRECTS = 16
self.redirect_counter += 1
if self.redirect_counter > MAX_REDIRECTS:
return '', ''
host, port, use_ssl, url = _parse_url(redirect)
if port != 0:
self.host = host
if port != -1: # -1: use existing port/ssl
self.port = port
self.use_ssl = use_ssl
self.conn.disconnect()
self.conn = None
self.reconnect()
return self.get(url, extras)
else:
self.redirect_counter = 0
return hdr, res
@@ -371,6 +409,12 @@ class RsHttp():
# http.add_header("Referer: http://bbc.com")
self.headers.append(s)
def add_headers(self, lines):
# copy a multi-line header chunk verbatim into each request:
for line in lines.split('\n'):
line = line.rstrip('\r')
if len(line): self.headers.append(line)
def set_cookie(self, c):
if c.lower().startswith('set-cookie: '):
c = c[len('Set-Cookie: '):]

View File

@@ -1,3 +1,23 @@
# rocksock socket library routines for python.
# Copyright (C) 2018-2020 rofl0r
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
# you can find the full license text at
# https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html
import socket, ssl, select, copy, errno
# rs_proxyType
@@ -45,7 +65,7 @@ RS_E_REMOTE_DISCONNECTED = 24
RS_E_NO_PROXYSTORAGE = 25
RS_E_HOSTNAME_TOO_LONG = 26
RS_E_INVALID_PROXY_URL = 27
RS_E_SSL_CERTIFICATE_ERROR = 28
class RocksockException(Exception):
def __init__(self, error, failedproxy=None, errortype=RS_ET_OWN, *args, **kwargs):
@@ -66,10 +86,10 @@ class RocksockException(Exception):
def reraise(self):
import sys
ei = sys.exc_info()
raise ei[0], ei[1], ei[2]
raise(ei[0], ei[1], ei[2])
# import traceback, sys
# traceback.print_exc(file=sys.stderr)
# raise self
# raise(self)
def get_errormessage(self):
errordict = {
@@ -100,7 +120,8 @@ class RocksockException(Exception):
RS_E_REMOTE_DISCONNECTED : "remote socket closed connection",
RS_E_NO_PROXYSTORAGE : "no proxy storage assigned",
RS_E_HOSTNAME_TOO_LONG : "hostname exceeds 255 chars",
RS_E_INVALID_PROXY_URL : "invalid proxy URL string"
RS_E_INVALID_PROXY_URL : "invalid proxy URL string",
RS_E_SSL_CERTIFICATE_ERROR : "certificate check error"
}
if self.errortype == RS_ET_SYS:
if self.error in errno.errorcode:
@@ -111,7 +132,7 @@ class RocksockException(Exception):
msg = "GAI: " + self.failedproxy
elif self.errortype == RS_ET_SSL:
msg = errordict[self.error]
if self.error == RS_E_SSL_GENERIC and self.failedproxy != None:
if (self.error == RS_E_SSL_GENERIC or self.error == RS_E_SSL_CERTIFICATE_ERROR) and self.failedproxy != None:
msg += ': ' + self.failedproxy #failedproxy is repurposed for SSL exceptions
else: #RS_ET_OWN
msg = errordict[self.error] + " (proxy %d)"%self.failedproxy
@@ -121,7 +142,7 @@ class RocksockException(Exception):
class RocksockHostinfo():
def __init__(self, host, port):
if port < 0 or port > 65535:
raise RocksockException(RS_E_INVALID_PROXY_URL, failedproxy=-1)
raise(RocksockException(RS_E_INVALID_PROXY_URL, failedproxy=-1))
self.host = host
self.port = port
@@ -151,7 +172,7 @@ def resolve(hostinfo, want_v4=True):
except socket.gaierror as e:
eno, str = e.args
raise RocksockException(eno, str, errortype=RS_ET_GAI)
raise(RocksockException(eno, str, errortype=RS_ET_GAI))
return None, None
@@ -164,7 +185,7 @@ class RocksockProxy():
'http' : RS_PT_HTTP }
self.type = typemap[type] if type in typemap else type
if not self.type in [RS_PT_NONE, RS_PT_SOCKS4, RS_PT_SOCKS5, RS_PT_HTTP]:
raise ValueError('Invalid proxy type')
raise(ValueError('Invalid proxy type'))
self.username = username
self.password = password
self.hostinfo = RocksockHostinfo(host, port)
@@ -193,7 +214,10 @@ class Rocksock():
if 'ssl' in kwargs and kwargs['ssl'] == True:
self.sslcontext = ssl.create_default_context()
self.sslcontext.check_hostname = False
if not verifycert: self.sslcontext.verify_mode = ssl.CERT_NONE
self.sslcontext.verify_mode = ssl.CERT_NONE
if verifycert:
self.sslcontext.verify_mode = ssl.CERT_OPTIONAL
self.sslcontext.check_hostname = True
else:
self.sslcontext = None
self.proxychain = []
@@ -225,21 +249,21 @@ class Rocksock():
try:
x = af+1
except TypeError:
raise RocksockException(-3, "unexpected problem resolving DNS, try again", failedproxy=self._failed_proxy(0), errortype=RS_ET_GAI)
# print "GOT A WEIRD AF"
# print af
# raise RocksockException(-6666, af, errortype=RS_ET_GAI)
raise(RocksockException(-3, "unexpected problem resolving DNS, try again", failedproxy=self._failed_proxy(0), errortype=RS_ET_GAI))
# print("GOT A WEIRD AF")
# print(af)
# raise(RocksockException(-6666, af, errortype=RS_ET_GAI))
self.sock = socket.socket(af, socket.SOCK_STREAM)
self.sock.settimeout(None if self.timeout == 0 else self.timeout)
try:
self.sock.connect((sa[0], sa[1]))
except socket.timeout:
raise RocksockException(RS_E_HIT_TIMEOUT, failedproxy=self._failed_proxy(0))
raise(RocksockException(RS_E_HIT_TIMEOUT, failedproxy=self._failed_proxy(0)))
except socket.error as e:
raise self._translate_socket_error(e, 0)
raise(self._translate_socket_error(e, 0))
for pnum in xrange(1, len(self.proxychain)):
for pnum in range(1, len(self.proxychain)):
curr = self.proxychain[pnum]
prev = self.proxychain[pnum-1]
self._connect_step(pnum)
@@ -247,14 +271,17 @@ class Rocksock():
if self.sslcontext:
try:
self.sock = self.sslcontext.wrap_socket(self.sock, server_hostname=self.proxychain[len(self.proxychain)-1].hostinfo.host)
except ssl.CertificateError as e:
reason = self._get_ssl_exception_reason(e)
raise(RocksockException(RS_E_SSL_CERTIFICATE_ERROR, failedproxy=reason, errortype=RS_ET_SSL))
except ssl.SSLError as e:
reason = self._get_ssl_exception_reason(e)
#if hasattr(e, 'library'): subsystem = e.library
raise RocksockException(RS_E_SSL_GENERIC, failedproxy=reason, errortype=RS_ET_SSL)
raise(RocksockException(RS_E_SSL_GENERIC, failedproxy=reason, errortype=RS_ET_SSL))
except socket.error as e:
raise self._translate_socket_error(e, -1)
raise(self._translate_socket_error(e, -1))
except Exception as e:
raise e
raise(e)
"""
while True:
try:
@@ -281,11 +308,11 @@ class Rocksock():
def send(self, buf, pnum=-1):
if self.sock is None:
raise RocksockException(RS_E_NO_SOCKET, failedproxy=self._failed_proxy(pnum))
raise(RocksockException(RS_E_NO_SOCKET, failedproxy=self._failed_proxy(pnum)))
try:
return self.sock.sendall(buf)
except socket.error as e:
raise self._translate_socket_error(e, pnum)
raise(self._translate_socket_error(e, pnum))
def _get_ssl_exception_reason(self, e):
s = ''
@@ -302,17 +329,17 @@ class Rocksock():
if n >= 1024*1024: n = 1024*1024
chunk = self.sock.recv(n)
except socket.timeout:
raise RocksockException(RS_E_HIT_TIMEOUT, failedproxy=self._failed_proxy(pnum))
raise(RocksockException(RS_E_HIT_TIMEOUT, failedproxy=self._failed_proxy(pnum)))
except socket.error as e:
raise self._translate_socket_error(e, pnum)
raise(self._translate_socket_error(e, pnum))
except ssl.SSLError as e:
s = self._get_ssl_exception_reason(e)
if s == 'The read operation timed out':
raise RocksockException(RS_E_HIT_READTIMEOUT, failedproxy=self._failed_proxy(pnum))
raise(RocksockException(RS_E_HIT_READTIMEOUT, failedproxy=self._failed_proxy(pnum)))
else:
raise RocksockException(RS_E_SSL_GENERIC, failedproxy=s, errortype=RS_ET_SSL)
raise(RocksockException(RS_E_SSL_GENERIC, failedproxy=s, errortype=RS_ET_SSL))
if len(chunk) == 0:
raise RocksockException(RS_E_REMOTE_DISCONNECTED, failedproxy=self._failed_proxy(pnum))
raise(RocksockException(RS_E_REMOTE_DISCONNECTED, failedproxy=self._failed_proxy(pnum)))
data += chunk
if count == -1: break
else: count -= len(chunk)
@@ -322,12 +349,9 @@ class Rocksock():
s = ''
c = '\0'
while c != '\n':
try:
c = self.recv(1)
if c == '': return s
s += c
except Exception as e:
raise e
return s
def recvuntil(self, until):
@@ -358,7 +382,7 @@ class Rocksock():
buf += '\0\0\0\x01'
else:
af, sa = resolve(dest.hostinfo, True)
if af != socket.AF_INET: raise RocksockException(RS_E_SOCKS4_NO_IP6, failedproxy=-1)
if af != socket.AF_INET: raise(RocksockException(RS_E_SOCKS4_NO_IP6, failedproxy=-1))
buf += self._ip_to_bytes(sa[0])
buf += '\0'
if v4a: buf += dest.hostinfo.host + '\0'
@@ -368,16 +392,16 @@ class Rocksock():
self.send(header)
res = self.recv(8, pnum=pnum)
if len(res) < 8 or ord(res[0]) != 0:
raise RocksockException(RS_E_PROXY_UNEXPECTED_RESPONSE, failedproxy=self._failed_proxy(pnum))
raise(RocksockException(RS_E_PROXY_UNEXPECTED_RESPONSE, failedproxy=self._failed_proxy(pnum)))
ch = ord(res[1])
if ch == 0x5a:
pass
elif ch == 0x5b:
raise RocksockException(RS_E_TARGETPROXY_CONNECT_FAILED, failedproxy=self._failed_proxy(pnum))
raise(RocksockException(RS_E_TARGETPROXY_CONNECT_FAILED, failedproxy=self._failed_proxy(pnum)))
elif ch == 0x5c or ch == 0x5d:
return RocksockException(RS_E_PROXY_AUTH_FAILED, failedproxy=self._failed_proxy(pnum))
else:
raise RocksockException(RS_E_PROXY_UNEXPECTED_RESPONSE, failedproxy=self._failed_proxy(pnum))
raise(RocksockException(RS_E_PROXY_UNEXPECTED_RESPONSE, failedproxy=self._failed_proxy(pnum)))
def _setup_socks5_header(self, proxy):
buf = '\x05'
@@ -391,9 +415,9 @@ class Rocksock():
self.send(header)
res = self.recv(2, pnum=pnum)
if len(res) != 2 or res[0] != '\x05':
raise RocksockException(RS_E_PROXY_UNEXPECTED_RESPONSE, failedproxy=self._failed_proxy(pnum))
raise(RocksockException(RS_E_PROXY_UNEXPECTED_RESPONSE, failedproxy=self._failed_proxy(pnum)))
if res[1] == '\xff':
raise RocksockException(RS_E_PROXY_AUTH_FAILED, failedproxy=self._failed_proxy(pnum))
raise(RocksockException(RS_E_PROXY_AUTH_FAILED, failedproxy=self._failed_proxy(pnum)))
if ord(res[1]) == 2:
px = self.proxychain[pnum-1]
@@ -402,8 +426,8 @@ class Rocksock():
self.send(pkt)
res = self.recv(2, pnum=pnum)
if len(res) < 2 or res[1] != '\0':
raise RocksockException(RS_E_PROXY_AUTH_FAILED, failedproxy=self._failed_proxy(pnum))
else: raise RocksockException(RS_E_PROXY_AUTH_FAILED, failedproxy=self._failed_proxy(pnum))
raise(RocksockException(RS_E_PROXY_AUTH_FAILED, failedproxy=self._failed_proxy(pnum)))
else: raise(RocksockException(RS_E_PROXY_AUTH_FAILED, failedproxy=self._failed_proxy(pnum)))
dst = self.proxychain[pnum]
numeric = isnumericipv4(dst.hostinfo.host)
if numeric:
@@ -415,18 +439,18 @@ class Rocksock():
self.send(pkt)
res = self.recv(pnum=pnum)
if len(res) < 2 or res[0] != '\x05':
raise RocksockException(RS_E_PROXY_UNEXPECTED_RESPONSE, failedproxy=self._failed_proxy(pnum))
raise(RocksockException(RS_E_PROXY_UNEXPECTED_RESPONSE, failedproxy=self._failed_proxy(pnum)))
ch = ord(res[1])
if ch == 0: pass
elif ch == 1: raise RocksockException(RS_E_PROXY_GENERAL_FAILURE, failedproxy=self._failed_proxy(pnum))
elif ch == 2: raise RocksockException(RS_E_PROXY_AUTH_FAILED, failedproxy=self._failed_proxy(pnum))
elif ch == 3: raise RocksockException(RS_E_TARGETPROXY_NET_UNREACHABLE, failedproxy=self._failed_proxy(pnum))
elif ch == 4: raise RocksockException(RS_E_TARGETPROXY_HOST_UNREACHABLE, failedproxy=self._failed_proxy(pnum))
elif ch == 5: raise RocksockException(RS_E_TARGETPROXY_CONN_REFUSED, failedproxy=self._failed_proxy(pnum))
elif ch == 6: raise RocksockException(RS_E_TARGETPROXY_TTL_EXPIRED, failedproxy=self._failed_proxy(pnum))
elif ch == 7: raise RocksockException(RS_E_PROXY_COMMAND_NOT_SUPPORTED, failedproxy=self._failed_proxy(pnum))
elif ch == 8: raise RocksockException(RS_E_PROXY_ADDRESSTYPE_NOT_SUPPORTED, failedproxy=self._failed_proxy(pnum))
else: raise RocksockException(RS_E_PROXY_UNEXPECTED_RESPONSE, failedproxy=self._failed_proxy(pnum))
elif ch == 1: raise(RocksockException(RS_E_PROXY_GENERAL_FAILURE, failedproxy=self._failed_proxy(pnum)))
elif ch == 2: raise(RocksockException(RS_E_PROXY_AUTH_FAILED, failedproxy=self._failed_proxy(pnum)))
elif ch == 3: raise(RocksockException(RS_E_TARGETPROXY_NET_UNREACHABLE, failedproxy=self._failed_proxy(pnum)))
elif ch == 4: raise(RocksockException(RS_E_TARGETPROXY_HOST_UNREACHABLE, failedproxy=self._failed_proxy(pnum)))
elif ch == 5: raise(RocksockException(RS_E_TARGETPROXY_CONN_REFUSED, failedproxy=self._failed_proxy(pnum)))
elif ch == 6: raise(RocksockException(RS_E_TARGETPROXY_TTL_EXPIRED, failedproxy=self._failed_proxy(pnum)))
elif ch == 7: raise(RocksockException(RS_E_PROXY_COMMAND_NOT_SUPPORTED, failedproxy=self._failed_proxy(pnum)))
elif ch == 8: raise(RocksockException(RS_E_PROXY_ADDRESSTYPE_NOT_SUPPORTED, failedproxy=self._failed_proxy(pnum)))
else: raise(RocksockException(RS_E_PROXY_UNEXPECTED_RESPONSE, failedproxy=self._failed_proxy(pnum)))
def _connect_step(self, pnum):
@@ -440,7 +464,7 @@ class Rocksock():
if e.get_error() == RS_E_TARGETPROXY_CONNECT_FAILED:
s4 = self._setup_socks4_header(False, curr)
self._connect_socks4(s4a, pnum)
else: raise e
else: raise(e)
elif prev.type == RS_PT_SOCKS5:
s5 = self._setup_socks5_header(prev)
self._connect_socks5(s5, pnum)
@@ -449,9 +473,9 @@ class Rocksock():
self.send("CONNECT %s:%d HTTP/1.1\r\n\r\n"%(dest.hostinfo.host, dest.hostinfo.port))
resp = self.recv(pnum=pnum)
if len(resp) <12:
raise RocksockException(RS_E_PROXY_UNEXPECTED_RESPONSE, failedproxy=self._failed_proxy(pnum))
raise(RocksockException(RS_E_PROXY_UNEXPECTED_RESPONSE, failedproxy=self._failed_proxy(pnum)))
if resp[9] != '2':
raise RocksockException(RS_E_TARGETPROXY_CONNECT_FAILED, failedproxy=self._failed_proxy(pnum))
raise(RocksockException(RS_E_TARGETPROXY_CONNECT_FAILED, failedproxy=self._failed_proxy(pnum)))
if __name__ == '__main__':
@@ -466,12 +490,12 @@ if __name__ == '__main__':
try:
rs.connect()
except RocksockException as e:
print e.get_errormessage()
print(e.get_errormessage())
e.reraise()
rs.send('GET / HTTP/1.0\r\n\r\n')
print rs.recvline()
print(rs.recvline())
rs.disconnect()
rs.connect()
rs.send('GET / HTTP/1.0\r\n\r\n')
print rs.recvline()
print(rs.recvline())