# -*- coding: utf-8 -*- """Tests for fetch.py proxy validation and extraction functions.""" from __future__ import print_function import sys import os import pytest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # fetch.py has Python 2 dependencies - skip tests if import fails try: import fetch FETCH_AVAILABLE = True except ImportError as e: FETCH_AVAILABLE = False fetch = None pytestmark = pytest.mark.skipif(not FETCH_AVAILABLE, reason="fetch module requires Python 2") class TestValidPort: """Tests for valid_port() function.""" def test_port_zero_invalid(self): """Port 0 is invalid.""" assert fetch.valid_port(0) is False def test_port_one_valid(self): """Port 1 is valid.""" assert fetch.valid_port(1) is True def test_port_max_valid(self): """Port 65535 is valid.""" assert fetch.valid_port(65535) is True def test_port_over_max_invalid(self): """Port 65536 is invalid.""" assert fetch.valid_port(65536) is False def test_negative_port_invalid(self): """Negative port is invalid.""" assert fetch.valid_port(-1) is False def test_common_ports_valid(self): """Common proxy ports are valid.""" assert fetch.valid_port(80) is True assert fetch.valid_port(443) is True assert fetch.valid_port(1080) is True assert fetch.valid_port(3128) is True assert fetch.valid_port(8080) is True assert fetch.valid_port(9050) is True class TestIsValidIpv6: """Tests for is_valid_ipv6() function.""" def test_valid_global_unicast(self): """Valid global unicast addresses pass.""" assert fetch.is_valid_ipv6('2001:db8::1') is True assert fetch.is_valid_ipv6('2001:0db8:0000:0000:0000:ff00:0042:8329') is True def test_valid_full_address(self): """Full 8-segment address is valid.""" assert fetch.is_valid_ipv6('2001:0db8:85a3:0000:0000:8a2e:0370:7334') is True def test_reject_loopback(self): """Loopback ::1 is rejected.""" assert fetch.is_valid_ipv6('::1') is False assert fetch.is_valid_ipv6('0:0:0:0:0:0:0:1') is False def test_reject_unspecified(self): """Unspecified :: is rejected.""" assert fetch.is_valid_ipv6('::') is False assert fetch.is_valid_ipv6('0:0:0:0:0:0:0:0') is False def test_reject_link_local(self): """Link-local fe80::/10 is rejected.""" assert fetch.is_valid_ipv6('fe80::1') is False assert fetch.is_valid_ipv6('fe90::1') is False assert fetch.is_valid_ipv6('fea0::1') is False assert fetch.is_valid_ipv6('feb0::1') is False def test_reject_unique_local(self): """Unique local fc00::/7 is rejected.""" assert fetch.is_valid_ipv6('fc00::1') is False assert fetch.is_valid_ipv6('fd00::1') is False assert fetch.is_valid_ipv6('fdff::1') is False def test_reject_multicast(self): """Multicast ff00::/8 is rejected.""" assert fetch.is_valid_ipv6('ff00::1') is False assert fetch.is_valid_ipv6('ff02::1') is False def test_reject_invalid_format(self): """Malformed addresses are rejected.""" assert fetch.is_valid_ipv6('gggg::1') is False assert fetch.is_valid_ipv6('not-an-ipv6') is False assert fetch.is_valid_ipv6('') is False def test_reject_multiple_double_colon(self): """Multiple :: in address is invalid.""" assert fetch.is_valid_ipv6('2001::db8::1') is False class TestIsUsableProxy: """Tests for is_usable_proxy() function.""" def test_valid_public_ipv4(self): """Valid public IPv4 proxies pass.""" assert fetch.is_usable_proxy('1.2.3.4:8080') is True assert fetch.is_usable_proxy('8.8.8.8:3128') is True assert fetch.is_usable_proxy('203.0.113.50:1080') is True def test_reject_private_class_a(self): """Private 10.0.0.0/8 is rejected.""" assert fetch.is_usable_proxy('10.0.0.1:8080') is False assert fetch.is_usable_proxy('10.255.255.255:8080') is False def test_reject_private_class_b(self): """Private 172.16.0.0/12 is rejected.""" assert fetch.is_usable_proxy('172.16.0.1:8080') is False assert fetch.is_usable_proxy('172.31.255.255:8080') is False # 172.15.x.x and 172.32.x.x should be valid assert fetch.is_usable_proxy('172.15.0.1:8080') is True assert fetch.is_usable_proxy('172.32.0.1:8080') is True def test_reject_private_class_c(self): """Private 192.168.0.0/16 is rejected.""" assert fetch.is_usable_proxy('192.168.1.1:8080') is False assert fetch.is_usable_proxy('192.168.0.1:8080') is False def test_reject_loopback(self): """Loopback 127.0.0.0/8 is rejected.""" assert fetch.is_usable_proxy('127.0.0.1:8080') is False assert fetch.is_usable_proxy('127.255.255.255:8080') is False def test_reject_link_local(self): """Link-local 169.254.0.0/16 is rejected.""" assert fetch.is_usable_proxy('169.254.1.1:8080') is False def test_reject_cgnat(self): """CGNAT 100.64.0.0/10 is rejected.""" assert fetch.is_usable_proxy('100.64.0.1:8080') is False assert fetch.is_usable_proxy('100.127.255.255:8080') is False # 100.63.x.x and 100.128.x.x should be valid assert fetch.is_usable_proxy('100.63.0.1:8080') is True assert fetch.is_usable_proxy('100.128.0.1:8080') is True def test_reject_multicast(self): """Multicast 224.0.0.0/4 is rejected.""" assert fetch.is_usable_proxy('224.0.0.1:8080') is False assert fetch.is_usable_proxy('239.255.255.255:8080') is False def test_reject_reserved(self): """Reserved 240.0.0.0/4 is rejected.""" assert fetch.is_usable_proxy('240.0.0.1:8080') is False assert fetch.is_usable_proxy('255.255.255.255:8080') is False def test_reject_zero_first_octet(self): """0.0.0.0/8 is rejected.""" assert fetch.is_usable_proxy('0.0.0.0:8080') is False assert fetch.is_usable_proxy('0.1.2.3:8080') is False def test_reject_invalid_port_zero(self): """Port 0 is rejected.""" assert fetch.is_usable_proxy('1.2.3.4:0') is False def test_reject_invalid_port_high(self): """Port > 65535 is rejected.""" assert fetch.is_usable_proxy('1.2.3.4:65536') is False assert fetch.is_usable_proxy('1.2.3.4:99999') is False def test_reject_malformed_ip(self): """Malformed IP addresses are rejected.""" assert fetch.is_usable_proxy('1.2.3:8080') is False assert fetch.is_usable_proxy('1.2.3.4.5:8080') is False assert fetch.is_usable_proxy('not-an-ip:8080') is False assert fetch.is_usable_proxy('1.2.3.256:8080') is False def test_reject_no_colon(self): """String without colon is rejected.""" assert fetch.is_usable_proxy('1.2.3.4') is False def test_auth_format_valid(self): """Authenticated proxy format is valid.""" assert fetch.is_usable_proxy('user:pass@1.2.3.4:8080') is True def test_auth_private_ip_rejected(self): """Auth format with private IP is rejected.""" assert fetch.is_usable_proxy('user:pass@192.168.1.1:8080') is False def test_ipv6_valid(self): """Valid IPv6 proxy is accepted.""" assert fetch.is_usable_proxy('[2001:db8::1]:8080') is True def test_ipv6_loopback_rejected(self): """IPv6 loopback is rejected.""" assert fetch.is_usable_proxy('[::1]:8080') is False def test_ipv6_malformed_rejected(self): """Malformed IPv6 is rejected.""" assert fetch.is_usable_proxy('[not-ipv6]:8080') is False class TestNormalizeProto: """Tests for _normalize_proto() function.""" def test_none_returns_none(self): """None input returns None.""" assert fetch._normalize_proto(None) is None def test_empty_returns_none(self): """Empty string returns None.""" assert fetch._normalize_proto('') is None def test_socks5_variants(self): """SOCKS5 variants normalize to 'socks5'.""" assert fetch._normalize_proto('socks5') == 'socks5' assert fetch._normalize_proto('SOCKS5') == 'socks5' assert fetch._normalize_proto('s5') == 'socks5' assert fetch._normalize_proto('tor') == 'socks5' def test_socks4_variants(self): """SOCKS4 variants normalize to 'socks4'.""" assert fetch._normalize_proto('socks4') == 'socks4' assert fetch._normalize_proto('SOCKS4') == 'socks4' assert fetch._normalize_proto('socks4a') == 'socks4' assert fetch._normalize_proto('s4') == 'socks4' def test_http_variants(self): """HTTP variants normalize to 'http'.""" assert fetch._normalize_proto('http') == 'http' assert fetch._normalize_proto('HTTP') == 'http' assert fetch._normalize_proto('https') == 'http' assert fetch._normalize_proto('connect') == 'http' assert fetch._normalize_proto('ssl') == 'http' def test_unknown_returns_none(self): """Unknown protocol returns None.""" assert fetch._normalize_proto('ftp') is None assert fetch._normalize_proto('unknown') is None class TestDetectProtoFromPath: """Tests for detect_proto_from_path() function.""" def test_socks5_in_path(self): """Detect socks5 from URL path.""" assert fetch.detect_proto_from_path('/socks5/') == 'socks5' assert fetch.detect_proto_from_path('/proxy/socks5.txt') == 'socks5' assert fetch.detect_proto_from_path('socks5-proxies.txt') == 'socks5' def test_socks4_in_path(self): """Detect socks4 from URL path.""" assert fetch.detect_proto_from_path('/socks4/') == 'socks4' assert fetch.detect_proto_from_path('/socks4a/') == 'socks4' assert fetch.detect_proto_from_path('socks4.txt') == 'socks4' def test_http_in_path(self): """Detect http from URL path.""" assert fetch.detect_proto_from_path('/http/') == 'http' assert fetch.detect_proto_from_path('http-proxies.txt') == 'http' assert fetch.detect_proto_from_path('http_list.txt') == 'http' assert fetch.detect_proto_from_path('http.txt') == 'http' def test_https_ssl_as_http(self): """HTTPS/SSL paths return 'http' (CONNECT proxies).""" assert fetch.detect_proto_from_path('/https/') == 'http' assert fetch.detect_proto_from_path('/ssl/') == 'http' assert fetch.detect_proto_from_path('/connect/') == 'http' def test_no_proto_returns_none(self): """No protocol indicator returns None.""" assert fetch.detect_proto_from_path('/proxies/') is None assert fetch.detect_proto_from_path('/data/list.txt') is None assert fetch.detect_proto_from_path('') is None def test_case_insensitive(self): """Detection is case-insensitive.""" assert fetch.detect_proto_from_path('/SOCKS5/') == 'socks5' assert fetch.detect_proto_from_path('/HTTP/') == 'http' class TestCleanhtml: """Tests for cleanhtml() function.""" def test_strips_tags(self): """HTML tags are replaced with colons.""" result = fetch.cleanhtml('1.2.3.4:8080') assert '1.2.3.4' in result assert '8080' in result assert '' not in result assert '' not in result def test_replaces_nbsp(self): """  is replaced with space.""" result = fetch.cleanhtml('1.2.3.4 8080') assert ' ' not in result def test_collapses_whitespace(self): """Multiple whitespace becomes single colon.""" result = fetch.cleanhtml('1.2.3.4 8080') # Whitespace collapsed to colon assert ' ' not in result class TestExtractAuthProxies: """Tests for extract_auth_proxies() function.""" def test_basic_auth_format(self): """Extract basic user:pass@ip:port format.""" content = 'some text user:pass@1.2.3.4:8080 more text' result = fetch.extract_auth_proxies(content) assert len(result) == 1 assert result[0][0] == 'user:pass@1.2.3.4:8080' assert result[0][1] is None def test_with_protocol_prefix(self): """Extract with protocol prefix.""" content = 'socks5://user:pass@1.2.3.4:8080' result = fetch.extract_auth_proxies(content) assert len(result) == 1 assert result[0][0] == 'user:pass@1.2.3.4:8080' assert result[0][1] == 'socks5' def test_http_protocol(self): """Extract HTTP auth proxy.""" content = 'http://alice:secret@5.6.7.8:3128' result = fetch.extract_auth_proxies(content) assert len(result) == 1 assert result[0][1] == 'http' def test_multiple_proxies(self): """Extract multiple auth proxies.""" content = ''' user1:pass1@1.2.3.4:8080 socks5://user2:pass2@5.6.7.8:1080 ''' result = fetch.extract_auth_proxies(content) assert len(result) == 2 def test_normalizes_ip(self): """Leading zeros in IP are normalized.""" content = 'user:pass@001.002.003.004:8080' result = fetch.extract_auth_proxies(content) assert len(result) == 1 # IP normalized to remove leading zeros assert '001' not in result[0][0] assert '1.2.3.4' in result[0][0] def test_empty_content(self): """Empty content returns empty list.""" assert fetch.extract_auth_proxies('') == [] def test_no_match(self): """Content without auth proxies returns empty list.""" assert fetch.extract_auth_proxies('just some text') == [] class TestConfidenceScoring: """Tests for confidence score constants.""" def test_auth_highest_confidence(self): """Auth proxies have highest confidence.""" assert fetch.CONFIDENCE_AUTH > fetch.CONFIDENCE_JSON assert fetch.CONFIDENCE_AUTH > fetch.CONFIDENCE_TABLE def test_json_above_table(self): """JSON has higher confidence than table.""" assert fetch.CONFIDENCE_JSON > fetch.CONFIDENCE_TABLE def test_table_above_hint(self): """Table has higher confidence than hint.""" assert fetch.CONFIDENCE_TABLE > fetch.CONFIDENCE_HINT def test_hint_above_regex(self): """Hint has higher confidence than regex.""" assert fetch.CONFIDENCE_HINT > fetch.CONFIDENCE_REGEX