cleansing

This commit is contained in:
mickael
2019-01-03 21:49:17 +00:00
parent d7eaa62ae8
commit f0b7e2dc2f
5 changed files with 6 additions and 179 deletions

View File

@@ -1,23 +0,0 @@
[global]
tor_host = 127.0.0.1:9050
i2p_host = 127.0.0.1:4444
common_proxy_ports = 80, 1080, 3124, 3128, 4145, 4444, 8080, 8081, 8118, 8888, 9999
proxylist_reload_every = 180
database = proxylist.sqlite
proxy_max_fail = 5
[watcherd]
enabled = true
proxy_file = false
checktime = 1800
threads = 10
timeout = 15
read_timeout = 20
max_fail = 5
[proxyfind]
enabled = true
search = true
maxfail = 10
timeout = 30
threads = 3

View File

@@ -16,72 +16,3 @@ def random_string(strlen=20):
def _log(strng, level='info'):
print '%s/%s\t%s' % (timestamp(), level, strng)
def option_matches_options(strng, items):
try: return [item for item in items if re.match(strng, item)]
except: return False
def prepare_socksocket(self, destination, path, path_item):
if path_item in self.paths and self.paths[path_item]['path'] == path:
self.paths[path_item]['path'] = False
#socks.setdefaultproxy()
# relay to i2p http proxy if *.i2p domain
if destination.endswith('i2p'):
proxy = random.choice(self.i2p_host).split(':')
path = False
# or go with tor
else:
proxies = [ rocksock.RocksockProxyFromURL('socks5://%s' % random.choice(self.tor_host)) ]
#socks.adddefaultproxy(*socks.parseproxy('tor://%s' % random.choice(self.tor_host)))
# add 'clearnet' proxies to the chain ?
if self.proxify and (not destination.endswith('onion') and not destination.endswith('.exit')):
# get a proxy path
path = build_path(self, path_item, path)
# if path isn't long enough, break
if not len(path): return False, False, False
# add chain...
#for inc in xrange(len(path) - 1): socks.adddefaultproxy(*socks.parseproxy('http://%s' % path[inc]))
#for inc in xrange(len(path) - 1): socks.adddefaultproxy(*socks.parseproxy('%s://%s' % (path[inc][1], path[inc][0])))
for inc in xrange(len(path)): proxies.append( rocksock.RocksockProxyFromURL('%s://%s' % (path[inc][1], path[inc][0])))
#return True, socks.socksocket, path
return True, proxies, path
def build_path(self, path_item, path):
chainlen = random.randint( self.path_len, (self.path_len + self.path_randomlen))
# if not enough proxies
# FIXME: try to get a proxylist from database
if len(self.proxylist) < chainlen: return []
# valid path already available
elif (path_item in self.paths and
self.paths[path_item]['path'] and
(time.time() - self.paths[path_item]['ticks']) < self.path_duration):
# take available path if any
if path != self.paths[path_item]['path']: path = self.paths[path_item]['path']
# or nope, none available
# build a new one from scratch
else:
path = []
avail = []
# dec chainlen if we have to select the exit proxy
if self.exitcountry is not None: chainlen -= 1
#avail = [item[0] for item in self.proxylist if not item[0] in avail and item[1] != str(self.exitcountry).upper()]
avail = [[item[0],item[2]] for item in self.proxylist if not item[0] in avail and item[1] != str(self.exitcountry).upper()]
path = random.sample(avail, chainlen)
# choose the exit proxy
if self.exitcountry is not None:
#avail = [item[0] for item in self.proxylist if not item[0] in path and item[1] == str(self.exitcountry).upper()]
avail = [[item[0],item[2]] for item in self.proxylist if not item[0] in path and item[1] == str(self.exitcountry).upper()]
if not len(avail): return []
path.append(random.choice(avail))
return path

View File

@@ -3,7 +3,6 @@
from threading import Thread
import threading, commands
import socket, time, random, sys, string, re
#import sockschain as socks
import requests
#from geoip import geolite2

51
ppf.py
View File

@@ -1,23 +1,16 @@
#!/usr/bin/env python
import os
import sys
import socket
import requests
import socks
import random, time
import sqlite3
import re
import urllib
import threading
import hashlib
import ipcalc
from soup_parser import soupify
from ConfigParser import SafeConfigParser
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
from dns import resolver
#from selenium import webdriver
sys.path.append('./includes')
import mysqlite
@@ -63,53 +56,21 @@ def import_from_file(fn, sqlite):
sqlite.execute('INSERT INTO uris (added,url,check_time,error) VALUES (?,?,?,?)', (time.time(),u,0,1))
sqlite.commit()
def fetch_contents(uri, driver=None):
def fetch_contents(uri):
headers = base_header
## use requests (default)
if not driver:
try: resp = requests.get(uri, timeout=45, headers=headers, verify=False, proxies=proxies)
except: return ''
data = resp.text
## phantomjs
else:
for key, value in enumerate(base_header):
capability_key = 'phantomjs.page.customHeaders.{}'.format(key)
webdriver.DesiredCapabilities.PHANTOMJS[capability_key] = value
service_args = ['--proxy=127.0.0.1:9050', '--proxy-type=socks5']
driver = webdriver.PhantomJS()
try:
driver.implicitly_wait(45)
driver.set_page_load_timeout(45)
driver.get(uri)
data = driver.page_source
except: data = ''
finally:driver.quit()
try: resp = requests.get(uri, timeout=45, headers=headers, verify=False, proxies=proxies)
except: return ''
data = resp.text
for retry_message in retry_messages:
if retry_message in data: return ''
return data
def update_proxy_sources(sqlite, proxies, uri):
for proxy in proxies:
md5sum = hashlib.md5(proxy).hexdigest()
sqlite.execute('CREATE TABLE IF NOT EXISTS "%s" (uri TEXT)' % md5sum)
sqlite.commit()
#check = [ i for i in sqlite.execute('SELECT uri FROM "%s" WHERE uri=?' % md5sum, (uri,)).fetchall() ]
check = sqlite.execute('SELECT uri FROM "%s" WHERE uri=?' % md5sum, (uri,)).fetchall()
if not len(check):
sqlite.execute('INSERT INTO "%s" (uri) VALUES(?)' % md5sum, (uri,))
sqlite.commit()
return data
def insert_proxies(proxies, uri, sqlite):
time_now = time.time()
added = 0
## very wasteful
#update_proxy_sources(sqlite, proxies, uri)
query = [ 'proxy=?' for p in proxies ]
known = [ i[0] for i in sqlite.execute('SELECT proxy FROM proxylist WHERE %s' % ' OR '.join(query), proxies).fetchall() ]
new = [ (time_now,i,3,0) for i in proxies if not i in known ]
@@ -161,7 +122,7 @@ def proxyleech(sqlite, rows):
#print('entering proxyleech...')
for row in rows:
try: content = fetch_contents(row[0], None)
try: content = fetch_contents(row[0])
except: content = ''
uniques = []

View File

@@ -1,41 +0,0 @@
#!/usr/bin/env python
from HTMLParser import HTMLParser
import requests
import re
from selenium.webdriver.common.proxy import *
from selenium import webdriver
from selenium.webdriver.common.by import By
phantomjs_path = '/home/mickael/bin/phantomjs'
def cleanhtml(raw_html):
cleanr = re.compile('<.*?>')
cleantext = re.sub(cleanr, ':', raw_html)
cleantext = re.sub('::+',':', cleantext)
return cleantext
class MLStripper(HTMLParser):
def __init__(self):
self.reset()
self.fed = []
def handle_data(self, d):
self.fed.append(d)
def get_data(self):
return ''.join(self.fed)
def strip_tags(html):
s = MLStripper()
s.feed(html)
return s.get_data()
service_args = ['--proxy=127.0.0.1:9050', '--proxy-type=socks5']
driver = webdriver.PhantomJS(phantomjs_path,service_args=service_args)
try: driver.get('http://www.proxz.com/proxy_list_fr_1_ext.html')
except: sys.exit(0)
html = driver.page_source
driver.quit()
text = cleanhtml(html)
proxies = sorted(re.findall(r'[0-9]+(?:\.[0-9]+){3}:[0-9]+', text))
print(text)
print(proxies)