Files
ppf/tests/test_fetch.py
Username 44604f1ce3 tests: add unit test infrastructure
pytest-based test suite with fixtures for database testing.
Covers misc.py utilities, dbs.py operations, and fetch.py validation.
Includes mock_network.py for future network testing.
2026-01-08 01:42:38 +01:00

381 lines
14 KiB
Python

# -*- coding: utf-8 -*-
"""Tests for fetch.py proxy validation and extraction functions."""
from __future__ import print_function
import sys
import os
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# fetch.py has Python 2 dependencies - skip tests if import fails
try:
import fetch
FETCH_AVAILABLE = True
except ImportError as e:
FETCH_AVAILABLE = False
fetch = None
pytestmark = pytest.mark.skipif(not FETCH_AVAILABLE, reason="fetch module requires Python 2")
class TestValidPort:
"""Tests for valid_port() function."""
def test_port_zero_invalid(self):
"""Port 0 is invalid."""
assert fetch.valid_port(0) is False
def test_port_one_valid(self):
"""Port 1 is valid."""
assert fetch.valid_port(1) is True
def test_port_max_valid(self):
"""Port 65535 is valid."""
assert fetch.valid_port(65535) is True
def test_port_over_max_invalid(self):
"""Port 65536 is invalid."""
assert fetch.valid_port(65536) is False
def test_negative_port_invalid(self):
"""Negative port is invalid."""
assert fetch.valid_port(-1) is False
def test_common_ports_valid(self):
"""Common proxy ports are valid."""
assert fetch.valid_port(80) is True
assert fetch.valid_port(443) is True
assert fetch.valid_port(1080) is True
assert fetch.valid_port(3128) is True
assert fetch.valid_port(8080) is True
assert fetch.valid_port(9050) is True
class TestIsValidIpv6:
"""Tests for is_valid_ipv6() function."""
def test_valid_global_unicast(self):
"""Valid global unicast addresses pass."""
assert fetch.is_valid_ipv6('2001:db8::1') is True
assert fetch.is_valid_ipv6('2001:0db8:0000:0000:0000:ff00:0042:8329') is True
def test_valid_full_address(self):
"""Full 8-segment address is valid."""
assert fetch.is_valid_ipv6('2001:0db8:85a3:0000:0000:8a2e:0370:7334') is True
def test_reject_loopback(self):
"""Loopback ::1 is rejected."""
assert fetch.is_valid_ipv6('::1') is False
assert fetch.is_valid_ipv6('0:0:0:0:0:0:0:1') is False
def test_reject_unspecified(self):
"""Unspecified :: is rejected."""
assert fetch.is_valid_ipv6('::') is False
assert fetch.is_valid_ipv6('0:0:0:0:0:0:0:0') is False
def test_reject_link_local(self):
"""Link-local fe80::/10 is rejected."""
assert fetch.is_valid_ipv6('fe80::1') is False
assert fetch.is_valid_ipv6('fe90::1') is False
assert fetch.is_valid_ipv6('fea0::1') is False
assert fetch.is_valid_ipv6('feb0::1') is False
def test_reject_unique_local(self):
"""Unique local fc00::/7 is rejected."""
assert fetch.is_valid_ipv6('fc00::1') is False
assert fetch.is_valid_ipv6('fd00::1') is False
assert fetch.is_valid_ipv6('fdff::1') is False
def test_reject_multicast(self):
"""Multicast ff00::/8 is rejected."""
assert fetch.is_valid_ipv6('ff00::1') is False
assert fetch.is_valid_ipv6('ff02::1') is False
def test_reject_invalid_format(self):
"""Malformed addresses are rejected."""
assert fetch.is_valid_ipv6('gggg::1') is False
assert fetch.is_valid_ipv6('not-an-ipv6') is False
assert fetch.is_valid_ipv6('') is False
def test_reject_multiple_double_colon(self):
"""Multiple :: in address is invalid."""
assert fetch.is_valid_ipv6('2001::db8::1') is False
class TestIsUsableProxy:
"""Tests for is_usable_proxy() function."""
def test_valid_public_ipv4(self):
"""Valid public IPv4 proxies pass."""
assert fetch.is_usable_proxy('1.2.3.4:8080') is True
assert fetch.is_usable_proxy('8.8.8.8:3128') is True
assert fetch.is_usable_proxy('203.0.113.50:1080') is True
def test_reject_private_class_a(self):
"""Private 10.0.0.0/8 is rejected."""
assert fetch.is_usable_proxy('10.0.0.1:8080') is False
assert fetch.is_usable_proxy('10.255.255.255:8080') is False
def test_reject_private_class_b(self):
"""Private 172.16.0.0/12 is rejected."""
assert fetch.is_usable_proxy('172.16.0.1:8080') is False
assert fetch.is_usable_proxy('172.31.255.255:8080') is False
# 172.15.x.x and 172.32.x.x should be valid
assert fetch.is_usable_proxy('172.15.0.1:8080') is True
assert fetch.is_usable_proxy('172.32.0.1:8080') is True
def test_reject_private_class_c(self):
"""Private 192.168.0.0/16 is rejected."""
assert fetch.is_usable_proxy('192.168.1.1:8080') is False
assert fetch.is_usable_proxy('192.168.0.1:8080') is False
def test_reject_loopback(self):
"""Loopback 127.0.0.0/8 is rejected."""
assert fetch.is_usable_proxy('127.0.0.1:8080') is False
assert fetch.is_usable_proxy('127.255.255.255:8080') is False
def test_reject_link_local(self):
"""Link-local 169.254.0.0/16 is rejected."""
assert fetch.is_usable_proxy('169.254.1.1:8080') is False
def test_reject_cgnat(self):
"""CGNAT 100.64.0.0/10 is rejected."""
assert fetch.is_usable_proxy('100.64.0.1:8080') is False
assert fetch.is_usable_proxy('100.127.255.255:8080') is False
# 100.63.x.x and 100.128.x.x should be valid
assert fetch.is_usable_proxy('100.63.0.1:8080') is True
assert fetch.is_usable_proxy('100.128.0.1:8080') is True
def test_reject_multicast(self):
"""Multicast 224.0.0.0/4 is rejected."""
assert fetch.is_usable_proxy('224.0.0.1:8080') is False
assert fetch.is_usable_proxy('239.255.255.255:8080') is False
def test_reject_reserved(self):
"""Reserved 240.0.0.0/4 is rejected."""
assert fetch.is_usable_proxy('240.0.0.1:8080') is False
assert fetch.is_usable_proxy('255.255.255.255:8080') is False
def test_reject_zero_first_octet(self):
"""0.0.0.0/8 is rejected."""
assert fetch.is_usable_proxy('0.0.0.0:8080') is False
assert fetch.is_usable_proxy('0.1.2.3:8080') is False
def test_reject_invalid_port_zero(self):
"""Port 0 is rejected."""
assert fetch.is_usable_proxy('1.2.3.4:0') is False
def test_reject_invalid_port_high(self):
"""Port > 65535 is rejected."""
assert fetch.is_usable_proxy('1.2.3.4:65536') is False
assert fetch.is_usable_proxy('1.2.3.4:99999') is False
def test_reject_malformed_ip(self):
"""Malformed IP addresses are rejected."""
assert fetch.is_usable_proxy('1.2.3:8080') is False
assert fetch.is_usable_proxy('1.2.3.4.5:8080') is False
assert fetch.is_usable_proxy('not-an-ip:8080') is False
assert fetch.is_usable_proxy('1.2.3.256:8080') is False
def test_reject_no_colon(self):
"""String without colon is rejected."""
assert fetch.is_usable_proxy('1.2.3.4') is False
def test_auth_format_valid(self):
"""Authenticated proxy format is valid."""
assert fetch.is_usable_proxy('user:pass@1.2.3.4:8080') is True
def test_auth_private_ip_rejected(self):
"""Auth format with private IP is rejected."""
assert fetch.is_usable_proxy('user:pass@192.168.1.1:8080') is False
def test_ipv6_valid(self):
"""Valid IPv6 proxy is accepted."""
assert fetch.is_usable_proxy('[2001:db8::1]:8080') is True
def test_ipv6_loopback_rejected(self):
"""IPv6 loopback is rejected."""
assert fetch.is_usable_proxy('[::1]:8080') is False
def test_ipv6_malformed_rejected(self):
"""Malformed IPv6 is rejected."""
assert fetch.is_usable_proxy('[not-ipv6]:8080') is False
class TestNormalizeProto:
"""Tests for _normalize_proto() function."""
def test_none_returns_none(self):
"""None input returns None."""
assert fetch._normalize_proto(None) is None
def test_empty_returns_none(self):
"""Empty string returns None."""
assert fetch._normalize_proto('') is None
def test_socks5_variants(self):
"""SOCKS5 variants normalize to 'socks5'."""
assert fetch._normalize_proto('socks5') == 'socks5'
assert fetch._normalize_proto('SOCKS5') == 'socks5'
assert fetch._normalize_proto('s5') == 'socks5'
assert fetch._normalize_proto('tor') == 'socks5'
def test_socks4_variants(self):
"""SOCKS4 variants normalize to 'socks4'."""
assert fetch._normalize_proto('socks4') == 'socks4'
assert fetch._normalize_proto('SOCKS4') == 'socks4'
assert fetch._normalize_proto('socks4a') == 'socks4'
assert fetch._normalize_proto('s4') == 'socks4'
def test_http_variants(self):
"""HTTP variants normalize to 'http'."""
assert fetch._normalize_proto('http') == 'http'
assert fetch._normalize_proto('HTTP') == 'http'
assert fetch._normalize_proto('https') == 'http'
assert fetch._normalize_proto('connect') == 'http'
assert fetch._normalize_proto('ssl') == 'http'
def test_unknown_returns_none(self):
"""Unknown protocol returns None."""
assert fetch._normalize_proto('ftp') is None
assert fetch._normalize_proto('unknown') is None
class TestDetectProtoFromPath:
"""Tests for detect_proto_from_path() function."""
def test_socks5_in_path(self):
"""Detect socks5 from URL path."""
assert fetch.detect_proto_from_path('/socks5/') == 'socks5'
assert fetch.detect_proto_from_path('/proxy/socks5.txt') == 'socks5'
assert fetch.detect_proto_from_path('socks5-proxies.txt') == 'socks5'
def test_socks4_in_path(self):
"""Detect socks4 from URL path."""
assert fetch.detect_proto_from_path('/socks4/') == 'socks4'
assert fetch.detect_proto_from_path('/socks4a/') == 'socks4'
assert fetch.detect_proto_from_path('socks4.txt') == 'socks4'
def test_http_in_path(self):
"""Detect http from URL path."""
assert fetch.detect_proto_from_path('/http/') == 'http'
assert fetch.detect_proto_from_path('http-proxies.txt') == 'http'
assert fetch.detect_proto_from_path('http_list.txt') == 'http'
assert fetch.detect_proto_from_path('http.txt') == 'http'
def test_https_ssl_as_http(self):
"""HTTPS/SSL paths return 'http' (CONNECT proxies)."""
assert fetch.detect_proto_from_path('/https/') == 'http'
assert fetch.detect_proto_from_path('/ssl/') == 'http'
assert fetch.detect_proto_from_path('/connect/') == 'http'
def test_no_proto_returns_none(self):
"""No protocol indicator returns None."""
assert fetch.detect_proto_from_path('/proxies/') is None
assert fetch.detect_proto_from_path('/data/list.txt') is None
assert fetch.detect_proto_from_path('') is None
def test_case_insensitive(self):
"""Detection is case-insensitive."""
assert fetch.detect_proto_from_path('/SOCKS5/') == 'socks5'
assert fetch.detect_proto_from_path('/HTTP/') == 'http'
class TestCleanhtml:
"""Tests for cleanhtml() function."""
def test_strips_tags(self):
"""HTML tags are replaced with colons."""
result = fetch.cleanhtml('<b>1.2.3.4</b>:<i>8080</i>')
assert '1.2.3.4' in result
assert '8080' in result
assert '<b>' not in result
assert '</b>' not in result
def test_replaces_nbsp(self):
"""&nbsp; is replaced with space."""
result = fetch.cleanhtml('1.2.3.4&nbsp;8080')
assert '&nbsp;' not in result
def test_collapses_whitespace(self):
"""Multiple whitespace becomes single colon."""
result = fetch.cleanhtml('1.2.3.4 8080')
# Whitespace collapsed to colon
assert ' ' not in result
class TestExtractAuthProxies:
"""Tests for extract_auth_proxies() function."""
def test_basic_auth_format(self):
"""Extract basic user:pass@ip:port format."""
content = 'some text user:pass@1.2.3.4:8080 more text'
result = fetch.extract_auth_proxies(content)
assert len(result) == 1
assert result[0][0] == 'user:pass@1.2.3.4:8080'
assert result[0][1] is None
def test_with_protocol_prefix(self):
"""Extract with protocol prefix."""
content = 'socks5://user:pass@1.2.3.4:8080'
result = fetch.extract_auth_proxies(content)
assert len(result) == 1
assert result[0][0] == 'user:pass@1.2.3.4:8080'
assert result[0][1] == 'socks5'
def test_http_protocol(self):
"""Extract HTTP auth proxy."""
content = 'http://alice:secret@5.6.7.8:3128'
result = fetch.extract_auth_proxies(content)
assert len(result) == 1
assert result[0][1] == 'http'
def test_multiple_proxies(self):
"""Extract multiple auth proxies."""
content = '''
user1:pass1@1.2.3.4:8080
socks5://user2:pass2@5.6.7.8:1080
'''
result = fetch.extract_auth_proxies(content)
assert len(result) == 2
def test_normalizes_ip(self):
"""Leading zeros in IP are normalized."""
content = 'user:pass@001.002.003.004:8080'
result = fetch.extract_auth_proxies(content)
assert len(result) == 1
# IP normalized to remove leading zeros
assert '001' not in result[0][0]
assert '1.2.3.4' in result[0][0]
def test_empty_content(self):
"""Empty content returns empty list."""
assert fetch.extract_auth_proxies('') == []
def test_no_match(self):
"""Content without auth proxies returns empty list."""
assert fetch.extract_auth_proxies('just some text') == []
class TestConfidenceScoring:
"""Tests for confidence score constants."""
def test_auth_highest_confidence(self):
"""Auth proxies have highest confidence."""
assert fetch.CONFIDENCE_AUTH > fetch.CONFIDENCE_JSON
assert fetch.CONFIDENCE_AUTH > fetch.CONFIDENCE_TABLE
def test_json_above_table(self):
"""JSON has higher confidence than table."""
assert fetch.CONFIDENCE_JSON > fetch.CONFIDENCE_TABLE
def test_table_above_hint(self):
"""Table has higher confidence than hint."""
assert fetch.CONFIDENCE_TABLE > fetch.CONFIDENCE_HINT
def test_hint_above_regex(self):
"""Hint has higher confidence than regex."""
assert fetch.CONFIDENCE_HINT > fetch.CONFIDENCE_REGEX