cleansing

This commit is contained in:
mickael
2019-01-03 21:49:17 +00:00
parent d7eaa62ae8
commit f0b7e2dc2f
5 changed files with 6 additions and 179 deletions

View File

@@ -1,23 +0,0 @@
[global]
tor_host = 127.0.0.1:9050
i2p_host = 127.0.0.1:4444
common_proxy_ports = 80, 1080, 3124, 3128, 4145, 4444, 8080, 8081, 8118, 8888, 9999
proxylist_reload_every = 180
database = proxylist.sqlite
proxy_max_fail = 5
[watcherd]
enabled = true
proxy_file = false
checktime = 1800
threads = 10
timeout = 15
read_timeout = 20
max_fail = 5
[proxyfind]
enabled = true
search = true
maxfail = 10
timeout = 30
threads = 3

View File

@@ -16,72 +16,3 @@ def random_string(strlen=20):
def _log(strng, level='info'): def _log(strng, level='info'):
print '%s/%s\t%s' % (timestamp(), level, strng) print '%s/%s\t%s' % (timestamp(), level, strng)
def option_matches_options(strng, items):
try: return [item for item in items if re.match(strng, item)]
except: return False
def prepare_socksocket(self, destination, path, path_item):
if path_item in self.paths and self.paths[path_item]['path'] == path:
self.paths[path_item]['path'] = False
#socks.setdefaultproxy()
# relay to i2p http proxy if *.i2p domain
if destination.endswith('i2p'):
proxy = random.choice(self.i2p_host).split(':')
path = False
# or go with tor
else:
proxies = [ rocksock.RocksockProxyFromURL('socks5://%s' % random.choice(self.tor_host)) ]
#socks.adddefaultproxy(*socks.parseproxy('tor://%s' % random.choice(self.tor_host)))
# add 'clearnet' proxies to the chain ?
if self.proxify and (not destination.endswith('onion') and not destination.endswith('.exit')):
# get a proxy path
path = build_path(self, path_item, path)
# if path isn't long enough, break
if not len(path): return False, False, False
# add chain...
#for inc in xrange(len(path) - 1): socks.adddefaultproxy(*socks.parseproxy('http://%s' % path[inc]))
#for inc in xrange(len(path) - 1): socks.adddefaultproxy(*socks.parseproxy('%s://%s' % (path[inc][1], path[inc][0])))
for inc in xrange(len(path)): proxies.append( rocksock.RocksockProxyFromURL('%s://%s' % (path[inc][1], path[inc][0])))
#return True, socks.socksocket, path
return True, proxies, path
def build_path(self, path_item, path):
chainlen = random.randint( self.path_len, (self.path_len + self.path_randomlen))
# if not enough proxies
# FIXME: try to get a proxylist from database
if len(self.proxylist) < chainlen: return []
# valid path already available
elif (path_item in self.paths and
self.paths[path_item]['path'] and
(time.time() - self.paths[path_item]['ticks']) < self.path_duration):
# take available path if any
if path != self.paths[path_item]['path']: path = self.paths[path_item]['path']
# or nope, none available
# build a new one from scratch
else:
path = []
avail = []
# dec chainlen if we have to select the exit proxy
if self.exitcountry is not None: chainlen -= 1
#avail = [item[0] for item in self.proxylist if not item[0] in avail and item[1] != str(self.exitcountry).upper()]
avail = [[item[0],item[2]] for item in self.proxylist if not item[0] in avail and item[1] != str(self.exitcountry).upper()]
path = random.sample(avail, chainlen)
# choose the exit proxy
if self.exitcountry is not None:
#avail = [item[0] for item in self.proxylist if not item[0] in path and item[1] == str(self.exitcountry).upper()]
avail = [[item[0],item[2]] for item in self.proxylist if not item[0] in path and item[1] == str(self.exitcountry).upper()]
if not len(avail): return []
path.append(random.choice(avail))
return path

View File

@@ -3,7 +3,6 @@
from threading import Thread from threading import Thread
import threading, commands import threading, commands
import socket, time, random, sys, string, re import socket, time, random, sys, string, re
#import sockschain as socks
import requests import requests
#from geoip import geolite2 #from geoip import geolite2

51
ppf.py
View File

@@ -1,23 +1,16 @@
#!/usr/bin/env python #!/usr/bin/env python
import os
import sys import sys
import socket
import requests import requests
import socks
import random, time import random, time
import sqlite3
import re import re
import urllib import urllib
import threading
import hashlib import hashlib
import ipcalc import ipcalc
from soup_parser import soupify from soup_parser import soupify
from ConfigParser import SafeConfigParser from ConfigParser import SafeConfigParser
from requests.packages.urllib3.exceptions import InsecureRequestWarning from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning) requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
from dns import resolver
#from selenium import webdriver
sys.path.append('./includes') sys.path.append('./includes')
import mysqlite import mysqlite
@@ -63,53 +56,21 @@ def import_from_file(fn, sqlite):
sqlite.execute('INSERT INTO uris (added,url,check_time,error) VALUES (?,?,?,?)', (time.time(),u,0,1)) sqlite.execute('INSERT INTO uris (added,url,check_time,error) VALUES (?,?,?,?)', (time.time(),u,0,1))
sqlite.commit() sqlite.commit()
def fetch_contents(uri, driver=None): def fetch_contents(uri):
headers = base_header headers = base_header
## use requests (default) try: resp = requests.get(uri, timeout=45, headers=headers, verify=False, proxies=proxies)
if not driver: except: return ''
try: resp = requests.get(uri, timeout=45, headers=headers, verify=False, proxies=proxies) data = resp.text
except: return ''
data = resp.text
## phantomjs
else:
for key, value in enumerate(base_header):
capability_key = 'phantomjs.page.customHeaders.{}'.format(key)
webdriver.DesiredCapabilities.PHANTOMJS[capability_key] = value
service_args = ['--proxy=127.0.0.1:9050', '--proxy-type=socks5']
driver = webdriver.PhantomJS()
try:
driver.implicitly_wait(45)
driver.set_page_load_timeout(45)
driver.get(uri)
data = driver.page_source
except: data = ''
finally:driver.quit()
for retry_message in retry_messages: for retry_message in retry_messages:
if retry_message in data: return '' if retry_message in data: return ''
return data
def update_proxy_sources(sqlite, proxies, uri): return data
for proxy in proxies:
md5sum = hashlib.md5(proxy).hexdigest()
sqlite.execute('CREATE TABLE IF NOT EXISTS "%s" (uri TEXT)' % md5sum)
sqlite.commit()
#check = [ i for i in sqlite.execute('SELECT uri FROM "%s" WHERE uri=?' % md5sum, (uri,)).fetchall() ]
check = sqlite.execute('SELECT uri FROM "%s" WHERE uri=?' % md5sum, (uri,)).fetchall()
if not len(check):
sqlite.execute('INSERT INTO "%s" (uri) VALUES(?)' % md5sum, (uri,))
sqlite.commit()
def insert_proxies(proxies, uri, sqlite): def insert_proxies(proxies, uri, sqlite):
time_now = time.time() time_now = time.time()
added = 0 added = 0
## very wasteful
#update_proxy_sources(sqlite, proxies, uri)
query = [ 'proxy=?' for p in proxies ] query = [ 'proxy=?' for p in proxies ]
known = [ i[0] for i in sqlite.execute('SELECT proxy FROM proxylist WHERE %s' % ' OR '.join(query), proxies).fetchall() ] known = [ i[0] for i in sqlite.execute('SELECT proxy FROM proxylist WHERE %s' % ' OR '.join(query), proxies).fetchall() ]
new = [ (time_now,i,3,0) for i in proxies if not i in known ] new = [ (time_now,i,3,0) for i in proxies if not i in known ]
@@ -161,7 +122,7 @@ def proxyleech(sqlite, rows):
#print('entering proxyleech...') #print('entering proxyleech...')
for row in rows: for row in rows:
try: content = fetch_contents(row[0], None) try: content = fetch_contents(row[0])
except: content = '' except: content = ''
uniques = [] uniques = []

View File

@@ -1,41 +0,0 @@
#!/usr/bin/env python
from HTMLParser import HTMLParser
import requests
import re
from selenium.webdriver.common.proxy import *
from selenium import webdriver
from selenium.webdriver.common.by import By
phantomjs_path = '/home/mickael/bin/phantomjs'
def cleanhtml(raw_html):
cleanr = re.compile('<.*?>')
cleantext = re.sub(cleanr, ':', raw_html)
cleantext = re.sub('::+',':', cleantext)
return cleantext
class MLStripper(HTMLParser):
def __init__(self):
self.reset()
self.fed = []
def handle_data(self, d):
self.fed.append(d)
def get_data(self):
return ''.join(self.fed)
def strip_tags(html):
s = MLStripper()
s.feed(html)
return s.get_data()
service_args = ['--proxy=127.0.0.1:9050', '--proxy-type=socks5']
driver = webdriver.PhantomJS(phantomjs_path,service_args=service_args)
try: driver.get('http://www.proxz.com/proxy_list_fr_1_ext.html')
except: sys.exit(0)
html = driver.page_source
driver.quit()
text = cleanhtml(html)
proxies = sorted(re.findall(r'[0-9]+(?:\.[0-9]+){3}:[0-9]+', text))
print(text)
print(proxies)