cleansing

This commit is contained in:
mickael
2019-01-03 21:49:17 +00:00
parent d7eaa62ae8
commit f0b7e2dc2f
5 changed files with 6 additions and 179 deletions

51
ppf.py
View File

@@ -1,23 +1,16 @@
#!/usr/bin/env python
import os
import sys
import socket
import requests
import socks
import random, time
import sqlite3
import re
import urllib
import threading
import hashlib
import ipcalc
from soup_parser import soupify
from ConfigParser import SafeConfigParser
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
from dns import resolver
#from selenium import webdriver
sys.path.append('./includes')
import mysqlite
@@ -63,53 +56,21 @@ def import_from_file(fn, sqlite):
sqlite.execute('INSERT INTO uris (added,url,check_time,error) VALUES (?,?,?,?)', (time.time(),u,0,1))
sqlite.commit()
def fetch_contents(uri, driver=None):
def fetch_contents(uri):
headers = base_header
## use requests (default)
if not driver:
try: resp = requests.get(uri, timeout=45, headers=headers, verify=False, proxies=proxies)
except: return ''
data = resp.text
## phantomjs
else:
for key, value in enumerate(base_header):
capability_key = 'phantomjs.page.customHeaders.{}'.format(key)
webdriver.DesiredCapabilities.PHANTOMJS[capability_key] = value
service_args = ['--proxy=127.0.0.1:9050', '--proxy-type=socks5']
driver = webdriver.PhantomJS()
try:
driver.implicitly_wait(45)
driver.set_page_load_timeout(45)
driver.get(uri)
data = driver.page_source
except: data = ''
finally:driver.quit()
try: resp = requests.get(uri, timeout=45, headers=headers, verify=False, proxies=proxies)
except: return ''
data = resp.text
for retry_message in retry_messages:
if retry_message in data: return ''
return data
def update_proxy_sources(sqlite, proxies, uri):
for proxy in proxies:
md5sum = hashlib.md5(proxy).hexdigest()
sqlite.execute('CREATE TABLE IF NOT EXISTS "%s" (uri TEXT)' % md5sum)
sqlite.commit()
#check = [ i for i in sqlite.execute('SELECT uri FROM "%s" WHERE uri=?' % md5sum, (uri,)).fetchall() ]
check = sqlite.execute('SELECT uri FROM "%s" WHERE uri=?' % md5sum, (uri,)).fetchall()
if not len(check):
sqlite.execute('INSERT INTO "%s" (uri) VALUES(?)' % md5sum, (uri,))
sqlite.commit()
return data
def insert_proxies(proxies, uri, sqlite):
time_now = time.time()
added = 0
## very wasteful
#update_proxy_sources(sqlite, proxies, uri)
query = [ 'proxy=?' for p in proxies ]
known = [ i[0] for i in sqlite.execute('SELECT proxy FROM proxylist WHERE %s' % ' OR '.join(query), proxies).fetchall() ]
new = [ (time_now,i,3,0) for i in proxies if not i in known ]
@@ -161,7 +122,7 @@ def proxyleech(sqlite, rows):
#print('entering proxyleech...')
for row in rows:
try: content = fetch_contents(row[0], None)
try: content = fetch_contents(row[0])
except: content = ''
uniques = []