diff --git a/ppf.py b/ppf.py index cbe6c5c..caaf411 100755 --- a/ppf.py +++ b/ppf.py @@ -8,7 +8,7 @@ from misc import _log from config import Config import fetch import sys -from bs4 import BeautifulSoup +from soup_parser import soupify, set_nobs import re import threading import random @@ -55,7 +55,7 @@ def extract_urls(html, url): domain = url.split('/')[2] urls = [] - soup = BeautifulSoup(html, features='lxml') + soup = soupify(html, nohtml=True) for a in soup.find_all('a', href=True): item = a['href'].encode('utf-8') if isinstance(a['href'], unicode) else a['href'] @@ -176,6 +176,11 @@ if __name__ == '__main__': config.load() fetch.set_config(config) + # handle --nobs flag + args = config.aparser.parse_args() + if args.nobs: + set_nobs(True) + proxydb = mysqlite.mysqlite(config.watchd.database, str) dbs.create_table_if_not_exists(proxydb, 'proxylist')