From b99f83a9913e4c5adf4a72c86e5ca8e7a52d1638 Mon Sep 17 00:00:00 2001 From: rofl0r Date: Fri, 18 Jan 2019 19:32:37 +0000 Subject: [PATCH] fetch.py: improve readability of extract_urls --- fetch.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fetch.py b/fetch.py index 4f54bf6..9493ce7 100644 --- a/fetch.py +++ b/fetch.py @@ -107,7 +107,11 @@ def extract_urls(content, urls = None, urignore=None): soup = soupify(content) for a in soup.body.find_all('a'): if not 'rel' in a.attrs or not 'noreferrer' in a.attrs['rel'] or a.attrs['href'] in urls: continue - badurl = [ i for i in urignore if re.findall(i,a.attrs['href'], re.IGNORECASE) ] - if not len(badurl): urls.append(a.attrs['href']) + bad = False + for i in urignore: + if re.findall(i,a.attrs['href'], re.IGNORECASE): + bad = True + break + if not bad: urls.append(a.attrs['href']) return urls