standardize code style: shebangs, class definitions, comments

This commit is contained in:
Username
2025-12-20 18:05:41 +01:00
parent 4c9a658d26
commit 86cabd1562
5 changed files with 11 additions and 23 deletions

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python2
import time, sys

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python2
import threading
import time, random, string, re, copy

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python2
import dbs
import random, time

View File

@@ -1,9 +1,9 @@
"""HTML parsing with optional BeautifulSoup or stdlib fallback."""
#!/usr/bin/env python2
# HTML parsing with optional BeautifulSoup or stdlib fallback
from HTMLParser import HTMLParser
import sys
# Track if bs4 is available
_bs4_available = False
_use_bs4 = True
@@ -14,8 +14,7 @@ except ImportError:
_bs4_available = False
class Tag(object):
"""Minimal BeautifulSoup Tag interface."""
class Tag():
def __init__(self, name, attrs):
self.name = name
self.attrs = dict(attrs)
@@ -27,19 +26,16 @@ class Tag(object):
return self.attrs.get(key, default)
class SoupResult(object):
"""Minimal BeautifulSoup result interface."""
class SoupResult():
def __init__(self, tags):
self._tags = tags
self.body = self # self-reference for soup.body.find_all()
self.body = self
def find_all(self, tag_name, **kwargs):
"""Find all tags matching criteria."""
results = []
for tag in self._tags:
if tag.name != tag_name:
continue
# check href=True means "has href attribute"
if 'href' in kwargs:
if kwargs['href'] is True and 'href' not in tag.attrs:
continue
@@ -50,7 +46,6 @@ class SoupResult(object):
class LinkExtractor(HTMLParser):
"""HTMLParser-based link extractor."""
def __init__(self):
HTMLParser.__init__(self)
self.tags = []
@@ -63,17 +58,15 @@ class LinkExtractor(HTMLParser):
def _parse_stdlib(html):
"""Parse HTML using stdlib HTMLParser."""
parser = LinkExtractor()
try:
parser.feed(html)
except:
pass # tolerate malformed HTML
pass
return SoupResult(parser.tags)
def _parse_bs4(html):
"""Parse HTML using BeautifulSoup."""
try:
return BeautifulSoup(html, 'lxml')
except (FeatureNotFound, Exception):
@@ -81,7 +74,6 @@ def _parse_bs4(html):
def set_nobs(enabled):
"""Disable BeautifulSoup usage."""
global _use_bs4
_use_bs4 = not enabled
if enabled and _bs4_available:
@@ -91,9 +83,7 @@ def set_nobs(enabled):
def soupify(html, nohtml=False):
"""Parse HTML and return soup-like object."""
htm = html if nohtml else '<html><body>%s</body></html>' % (html)
if _use_bs4 and _bs4_available:
return _parse_bs4(htm)
else:
@@ -101,5 +91,4 @@ def soupify(html, nohtml=False):
def is_available():
"""Check if BeautifulSoup is available."""
return _bs4_available

View File

@@ -1,6 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Test --nobs functionality (stdlib HTML parsing without BeautifulSoup)."""
#!/usr/bin/env python2
# test --nobs functionality (stdlib HTML parsing without BeautifulSoup)
import sys