From b226bc0b035104dcc451dd9fea1fff731d4bfa27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Serneels?= Date: Tue, 14 May 2019 19:31:19 +0200 Subject: [PATCH] check if bad url *after* building the url --- ppf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ppf.py b/ppf.py index 0eada2f..a23cbf8 100755 --- a/ppf.py +++ b/ppf.py @@ -84,13 +84,13 @@ def extract_urls(html, url): item = a['href'].encode('utf-8') if isinstance(a['href'], unicode) else a['href'] item = item.strip() - if is_bad_url(item): - continue - elif item.startswith('www.'): + if item.startswith('www.'): item = 'http://%s' % item elif not item.startswith('http'): if not item.startswith('/'): item = '/%s' % item item = '%s://%s%s' % (proto,domain,item) + elif is_bad_url(item): + continue if not item in urls: urls.append(item)