ppf: use soup_parser instead of direct bs4 import

2025-12-20 17:33:40 +01:00
parent 0fd8424d33
commit 1d865d5250
1 changed files with 7 additions and 2 deletions
--- a/ppf.py
+++ b/ppf.py
@@ -8,7 +8,7 @@ from misc import _log
 from config import Config
 import fetch
 import sys
-from bs4 import BeautifulSoup
+from soup_parser import soupify, set_nobs
 import re
 import threading
 import random
@@ -55,7 +55,7 @@ def extract_urls(html, url):
 	domain = url.split('/')[2]
 	urls = []
-	soup = BeautifulSoup(html, features='lxml')
+	soup = soupify(html, nohtml=True)
 	for a in soup.find_all('a', href=True):
 		item = a['href'].encode('utf-8') if isinstance(a['href'], unicode) else a['href']
@@ -176,6 +176,11 @@ if __name__ == '__main__':
 	config.load()
 	fetch.set_config(config)
 	# handle --nobs flag
 	args = config.aparser.parse_args()
 	if args.nobs:
 		set_nobs(True)
 	proxydb = mysqlite.mysqlite(config.watchd.database, str)
 	dbs.create_table_if_not_exists(proxydb, 'proxylist')