initial commit

This commit is contained in:
mickael
2019-01-03 16:36:31 +00:00
commit 3fb2dc40a6
12 changed files with 1792 additions and 0 deletions

41
striphtml.py Normal file
View File

@@ -0,0 +1,41 @@
#!/usr/bin/env python
from HTMLParser import HTMLParser
import requests
import re
from selenium.webdriver.common.proxy import *
from selenium import webdriver
from selenium.webdriver.common.by import By
phantomjs_path = '/home/mickael/bin/phantomjs'
def cleanhtml(raw_html):
cleanr = re.compile('<.*?>')
cleantext = re.sub(cleanr, ':', raw_html)
cleantext = re.sub('::+',':', cleantext)
return cleantext
class MLStripper(HTMLParser):
def __init__(self):
self.reset()
self.fed = []
def handle_data(self, d):
self.fed.append(d)
def get_data(self):
return ''.join(self.fed)
def strip_tags(html):
s = MLStripper()
s.feed(html)
return s.get_data()
service_args = ['--proxy=127.0.0.1:9050', '--proxy-type=socks5']
driver = webdriver.PhantomJS(phantomjs_path,service_args=service_args)
try: driver.get('http://www.proxz.com/proxy_list_fr_1_ext.html')
except: sys.exit(0)
html = driver.page_source
driver.quit()
text = cleanhtml(html)
proxies = sorted(re.findall(r'[0-9]+(?:\.[0-9]+){3}:[0-9]+', text))
print(text)
print(proxies)