standardize code style: shebangs, class definitions, comments

2025-12-20 18:05:41 +01:00
parent 4c9a658d26
commit 86cabd1562
5 changed files with 11 additions and 23 deletions
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2

 import time, sys

@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2

 import threading
 import time, random, string, re, copy
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2

 import dbs
 import random, time
@@ -1,9 +1,9 @@
-"""HTML parsing with optional BeautifulSoup or stdlib fallback."""
+#!/usr/bin/env python2
+# HTML parsing with optional BeautifulSoup or stdlib fallback

 from HTMLParser import HTMLParser
 import sys

-# Track if bs4 is available
 _bs4_available = False
 _use_bs4 = True

@@ -14,8 +14,7 @@ except ImportError:
 	_bs4_available = False


-class Tag(object):
-	"""Minimal BeautifulSoup Tag interface."""
+class Tag():
 	def __init__(self, name, attrs):
 		self.name = name
 		self.attrs = dict(attrs)
@@ -27,19 +26,16 @@ class Tag(object):
 		return self.attrs.get(key, default)


-class SoupResult(object):
-	"""Minimal BeautifulSoup result interface."""
+class SoupResult():
 	def __init__(self, tags):
 		self._tags = tags
-		self.body = self  # self-reference for soup.body.find_all()
+		self.body = self

 	def find_all(self, tag_name, **kwargs):
-		"""Find all tags matching criteria."""
 		results = []
 		for tag in self._tags:
 			if tag.name != tag_name:
 				continue
-			# check href=True means "has href attribute"
 			if 'href' in kwargs:
 				if kwargs['href'] is True and 'href' not in tag.attrs:
 					continue
@@ -50,7 +46,6 @@ class SoupResult(object):


 class LinkExtractor(HTMLParser):
-	"""HTMLParser-based link extractor."""
 	def __init__(self):
 		HTMLParser.__init__(self)
 		self.tags = []
@@ -63,17 +58,15 @@ class LinkExtractor(HTMLParser):


 def _parse_stdlib(html):
-	"""Parse HTML using stdlib HTMLParser."""
 	parser = LinkExtractor()
 	try:
 		parser.feed(html)
 	except:
-		pass  # tolerate malformed HTML
+		pass
 	return SoupResult(parser.tags)


 def _parse_bs4(html):
-	"""Parse HTML using BeautifulSoup."""
 	try:
 		return BeautifulSoup(html, 'lxml')
 	except (FeatureNotFound, Exception):
@@ -81,7 +74,6 @@ def _parse_bs4(html):


 def set_nobs(enabled):
-	"""Disable BeautifulSoup usage."""
 	global _use_bs4
 	_use_bs4 = not enabled
 	if enabled and _bs4_available:
@@ -91,9 +83,7 @@ def set_nobs(enabled):


 def soupify(html, nohtml=False):
-	"""Parse HTML and return soup-like object."""
 	htm = html if nohtml else '<html><body>%s</body></html>' % (html)
-
 	if _use_bs4 and _bs4_available:
 		return _parse_bs4(htm)
 	else:
@@ -101,5 +91,4 @@ def soupify(html, nohtml=False):


 def is_available():
-	"""Check if BeautifulSoup is available."""
 	return _bs4_available
@@ -1,6 +1,5 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""Test --nobs functionality (stdlib HTML parsing without BeautifulSoup)."""
+#!/usr/bin/env python2
+# test --nobs functionality (stdlib HTML parsing without BeautifulSoup)

 import sys