diff --git a/Dockerfile b/Dockerfile index 470b397..975bc7d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,8 +2,11 @@ FROM python:2.7-slim WORKDIR /app +# install dependencies (optional - bs4 can be skipped with --nobs) +COPY requirements.txt . +RUN pip install -r requirements.txt || true + COPY . . -RUN pip install -r requirements.txt - -CMD ["python", "ppf.py"] \ No newline at end of file +# default: run syntax check +CMD ["python", "-m", "py_compile", "ppf.py", "soup_parser.py", "config.py", "fetch.py"] \ No newline at end of file diff --git a/test_nobs.py b/test_nobs.py new file mode 100644 index 0000000..2d169be --- /dev/null +++ b/test_nobs.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Test --nobs functionality (stdlib HTML parsing without BeautifulSoup).""" + +import sys + +OK = "[OK]" +FAIL = "[FAIL]" + +# Test 1: Import soup_parser +print("Test 1: Import soup_parser") +try: + import soup_parser + print(" %s soup_parser imported" % OK) +except Exception as e: + print(" %s Failed: %s" % (FAIL, e)) + sys.exit(1) + +# Test 2: Check bs4 availability detection +print("Test 2: Check bs4 availability") +print(" bs4 available: %s" % soup_parser.is_available()) + +# Test 3: Test stdlib parser (force --nobs mode) +print("Test 3: Test stdlib parser") +soup_parser.set_nobs(True) + +html = ''' + +
+Link 1 +Link 2 +No href +Not a link
+ + +''' + +soup = soup_parser.soupify(html, nohtml=True) +links = soup.find_all('a', href=True) + +print(" Found %d links with href" % len(links)) +if len(links) == 2: + print(" %s Correct count" % OK) +else: + print(" %s Expected 2, got %d" % (FAIL, len(links))) + sys.exit(1) + +# Test 4: Verify link attributes +print("Test 4: Verify link attributes") +for i, link in enumerate(links): + href = link['href'] + print(" Link %d: %s" % (i+1, href)) + if not href.startswith('http://example.com/'): + print(" %s Unexpected href" % FAIL) + sys.exit(1) +print(" %s All hrefs correct" % OK) + +# Test 5: Test attrs access (used by fetch.py) +print("Test 5: Test attrs dict access") +link = links[1] +if 'rel' in link.attrs: + print(" %s rel attribute found: %s" % (OK, link.attrs['rel'])) +else: + print(" %s rel attribute missing" % FAIL) + sys.exit(1) + +print("") +print("All tests passed!")