diff --git a/tests/test_mime_detection.py.disabled b/tests/test_mime_detection.py.disabled deleted file mode 100644 index 9ddee3c..0000000 --- a/tests/test_mime_detection.py.disabled +++ /dev/null @@ -1,395 +0,0 @@ -"""Tests for MIME type detection.""" - -import json - - -class TestMimeDetection: - """Tests for automatic MIME type detection.""" - - def test_detect_png(self, client, png_bytes): - """Detect PNG from magic bytes.""" - response = client.post("/", data=png_bytes) - data = json.loads(response.data) - assert data["mime_type"] == "image/png" - - def test_detect_jpeg(self, client, jpeg_bytes): - """Detect JPEG from magic bytes.""" - response = client.post("/", data=jpeg_bytes) - data = json.loads(response.data) - assert data["mime_type"] == "image/jpeg" - - def test_detect_zip(self, client, zip_bytes): - """Detect ZIP from magic bytes.""" - response = client.post("/", data=zip_bytes) - data = json.loads(response.data) - assert data["mime_type"] == "application/zip" - - def test_detect_pdf(self, client, pdf_bytes): - """Detect PDF from magic bytes.""" - response = client.post("/", data=pdf_bytes) - data = json.loads(response.data) - assert data["mime_type"] == "application/pdf" - - def test_detect_gif87a(self, client): - """Detect GIF87a from magic bytes.""" - response = client.post("/", data=b"GIF87a" + b"\x00" * 10) - data = json.loads(response.data) - assert data["mime_type"] == "image/gif" - - def test_detect_gif89a(self, client): - """Detect GIF89a from magic bytes.""" - response = client.post("/", data=b"GIF89a" + b"\x00" * 10) - data = json.loads(response.data) - assert data["mime_type"] == "image/gif" - - def test_detect_gzip(self, client): - """Detect GZIP from magic bytes.""" - response = client.post("/", data=b"\x1f\x8b\x08" + b"\x00" * 10) - data = json.loads(response.data) - assert data["mime_type"] == "application/gzip" - - def test_detect_utf8_text(self, client): - """UTF-8 text defaults to text/plain.""" - response = client.post("/", data="Hello, world! 你好") - data = json.loads(response.data) - assert data["mime_type"] == "text/plain" - - def test_detect_binary_fallback(self, client): - """Non-UTF8 binary without magic falls back to octet-stream.""" - response = client.post("/", data=b"\x80\x81\x82\x83\x84") - data = json.loads(response.data) - assert data["mime_type"] == "application/octet-stream" - - def test_explicit_content_type_honored(self, client): - """Explicit Content-Type is honored for non-generic types.""" - response = client.post( - "/", - data="
test", - content_type="text/html", - ) - data = json.loads(response.data) - assert data["mime_type"] == "text/html" - - def test_generic_content_type_overridden(self, client, png_bytes): - """Generic Content-Type is overridden by magic detection.""" - response = client.post( - "/", - data=png_bytes, - content_type="application/octet-stream", - ) - data = json.loads(response.data) - assert data["mime_type"] == "image/png" - - def test_webp_detection(self, client): - """Detect WebP from RIFF...WEBP magic.""" - webp_header = b"RIFF\x00\x00\x00\x00WEBP" - response = client.post("/", data=webp_header + b"\x00" * 20) - data = json.loads(response.data) - assert data["mime_type"] == "image/webp" - - def test_riff_non_webp_not_detected(self, client): - """RIFF without WEBP marker is not detected as WebP.""" - riff_other = b"RIFF\x00\x00\x00\x00WAVE" - response = client.post("/", data=riff_other + b"\x00" * 20) - data = json.loads(response.data) - assert data["mime_type"] != "image/webp" - - # --- Additional Image Formats --- - - def test_detect_bmp(self, client): - """Detect BMP from magic bytes.""" - bmp_header = b"BM" + b"\x00" * 50 - response = client.post("/", data=bmp_header) - data = json.loads(response.data) - assert data["mime_type"] == "image/bmp" - - def test_detect_tiff_little_endian(self, client): - """Detect little-endian TIFF from magic bytes.""" - tiff_le = b"II\x2a\x00" + b"\x00" * 50 - response = client.post("/", data=tiff_le) - data = json.loads(response.data) - assert data["mime_type"] == "image/tiff" - - def test_detect_tiff_big_endian(self, client): - """Detect big-endian TIFF from magic bytes.""" - tiff_be = b"MM\x00\x2a" + b"\x00" * 50 - response = client.post("/", data=tiff_be) - data = json.loads(response.data) - assert data["mime_type"] == "image/tiff" - - def test_detect_ico(self, client): - """Detect ICO from magic bytes.""" - ico_header = b"\x00\x00\x01\x00" + b"\x00" * 50 - response = client.post("/", data=ico_header) - data = json.loads(response.data) - assert data["mime_type"] == "image/x-icon" - - def test_detect_heic(self, client): - """Detect HEIC from ftyp box with heic brand.""" - # ftyp box: size (0x18) + "ftyp" + "heic" brand - heic_header = b"\x00\x00\x00\x18\x66\x74\x79\x70\x68\x65\x69\x63" + b"\x00" * 50 - response = client.post("/", data=heic_header) - data = json.loads(response.data) - assert data["mime_type"] == "image/heic" - - def test_detect_heif(self, client): - """Detect HEIF from ftyp box with mif1 brand.""" - # ftyp box: size (0x18) + "ftyp" + "mif1" brand - heif_header = b"\x00\x00\x00\x18\x66\x74\x79\x70\x6d\x69\x66\x31" + b"\x00" * 50 - response = client.post("/", data=heif_header) - data = json.loads(response.data) - assert data["mime_type"] == "image/heif" - - def test_detect_avif(self, client): - """Detect AVIF from ftyp box with avif brand.""" - # ftyp box: size (0x1c) + "ftyp" + "avif" brand - avif_header = b"\x00\x00\x00\x1c\x66\x74\x79\x70\x61\x76\x69\x66" + b"\x00" * 50 - response = client.post("/", data=avif_header) - data = json.loads(response.data) - assert data["mime_type"] == "image/avif" - - # --- Video Formats --- - - def test_detect_webm(self, client): - """Detect WebM/Matroska from magic bytes.""" - webm_header = b"\x1a\x45\xdf\xa3" + b"\x00" * 50 - response = client.post("/", data=webm_header) - data = json.loads(response.data) - assert data["mime_type"] == "video/webm" - - def test_detect_flv(self, client): - """Detect FLV from magic bytes.""" - flv_header = b"FLV\x01" + b"\x00" * 50 - response = client.post("/", data=flv_header) - data = json.loads(response.data) - assert data["mime_type"] == "video/x-flv" - - def test_detect_mp4_ftyp_1c(self, client): - """Detect MP4 from ftyp box (0x1c variant).""" - mp4_header = b"\x00\x00\x00\x1c\x66\x74\x79\x70" + b"\x00" * 50 - response = client.post("/", data=mp4_header) - data = json.loads(response.data) - assert data["mime_type"] == "video/mp4" - - def test_detect_mp4_ftyp_20(self, client): - """Detect MP4 from ftyp box (0x20 variant).""" - mp4_header = b"\x00\x00\x00\x20\x66\x74\x79\x70" + b"\x00" * 50 - response = client.post("/", data=mp4_header) - data = json.loads(response.data) - assert data["mime_type"] == "video/mp4" - - def test_detect_mp4_ftyp_18(self, client): - """Detect MP4 from ftyp box (0x18 variant).""" - mp4_header = b"\x00\x00\x00\x18\x66\x74\x79\x70" + b"\x00" * 50 - response = client.post("/", data=mp4_header) - data = json.loads(response.data) - assert data["mime_type"] == "video/mp4" - - # --- Audio Formats --- - - def test_detect_mp3_id3(self, client): - """Detect MP3 from ID3 tag.""" - mp3_id3 = b"ID3" + b"\x00" * 50 - response = client.post("/", data=mp3_id3) - data = json.loads(response.data) - assert data["mime_type"] == "audio/mpeg" - - def test_detect_mp3_frame_sync_fb(self, client): - """Detect MP3 from frame sync (0xfffb).""" - mp3_sync = b"\xff\xfb" + b"\x00" * 50 - response = client.post("/", data=mp3_sync) - data = json.loads(response.data) - assert data["mime_type"] == "audio/mpeg" - - def test_detect_mp3_frame_sync_fa(self, client): - """Detect MP3 from frame sync (0xfffa).""" - mp3_sync = b"\xff\xfa" + b"\x00" * 50 - response = client.post("/", data=mp3_sync) - data = json.loads(response.data) - assert data["mime_type"] == "audio/mpeg" - - def test_detect_mp3_frame_sync_f3(self, client): - """Detect MP3 from frame sync (0xfff3).""" - mp3_sync = b"\xff\xf3" + b"\x00" * 50 - response = client.post("/", data=mp3_sync) - data = json.loads(response.data) - assert data["mime_type"] == "audio/mpeg" - - def test_detect_mp3_frame_sync_f2(self, client): - """Detect MP3 from frame sync (0xfff2).""" - mp3_sync = b"\xff\xf2" + b"\x00" * 50 - response = client.post("/", data=mp3_sync) - data = json.loads(response.data) - assert data["mime_type"] == "audio/mpeg" - - def test_detect_flac(self, client): - """Detect FLAC from magic bytes.""" - flac_header = b"fLaC" + b"\x00" * 50 - response = client.post("/", data=flac_header) - data = json.loads(response.data) - assert data["mime_type"] == "audio/flac" - - def test_detect_ogg(self, client): - """Detect OGG from magic bytes.""" - ogg_header = b"OggS" + b"\x00" * 50 - response = client.post("/", data=ogg_header) - data = json.loads(response.data) - assert data["mime_type"] == "audio/ogg" - - # --- Document Formats --- - - def test_detect_ole_msoffice(self, client): - """Detect MS Office OLE from magic bytes.""" - ole_header = b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" + b"\x00" * 50 - response = client.post("/", data=ole_header) - data = json.loads(response.data) - assert data["mime_type"] == "application/msword" - - # --- Executable Formats --- - - def test_detect_pe_exe(self, client): - """Detect PE/EXE from MZ magic bytes.""" - pe_header = b"MZ" + b"\x00" * 50 - response = client.post("/", data=pe_header) - data = json.loads(response.data) - assert data["mime_type"] == "application/x-msdownload" - - def test_detect_elf(self, client): - """Detect ELF from magic bytes.""" - elf_header = b"\x7fELF" + b"\x00" * 50 - response = client.post("/", data=elf_header) - data = json.loads(response.data) - assert data["mime_type"] == "application/x-executable" - - def test_detect_macho_32le(self, client): - """Detect Mach-O 32-bit little-endian.""" - macho_header = b"\xce\xfa\xed\xfe" + b"\x00" * 50 - response = client.post("/", data=macho_header) - data = json.loads(response.data) - assert data["mime_type"] == "application/x-mach-binary" - - def test_detect_macho_32be(self, client): - """Detect Mach-O 32-bit big-endian.""" - macho_header = b"\xfe\xed\xfa\xce" + b"\x00" * 50 - response = client.post("/", data=macho_header) - data = json.loads(response.data) - assert data["mime_type"] == "application/x-mach-binary" - - def test_detect_macho_64le(self, client): - """Detect Mach-O 64-bit little-endian.""" - macho_header = b"\xcf\xfa\xed\xfe" + b"\x00" * 50 - response = client.post("/", data=macho_header) - data = json.loads(response.data) - assert data["mime_type"] == "application/x-mach-binary" - - def test_detect_macho_64be(self, client): - """Detect Mach-O 64-bit big-endian.""" - macho_header = b"\xfe\xed\xfa\xcf" + b"\x00" * 50 - response = client.post("/", data=macho_header) - data = json.loads(response.data) - assert data["mime_type"] == "application/x-mach-binary" - - def test_detect_macho_fat(self, client): - """Detect Mach-O fat/universal binary.""" - macho_fat = b"\xca\xfe\xba\xbe" + b"\x00" * 50 - response = client.post("/", data=macho_fat) - data = json.loads(response.data) - assert data["mime_type"] == "application/x-mach-binary" - - def test_detect_wasm(self, client): - """Detect WebAssembly from magic bytes.""" - wasm_header = b"\x00asm" + b"\x00" * 50 - response = client.post("/", data=wasm_header) - data = json.loads(response.data) - assert data["mime_type"] == "application/wasm" - - # --- Archive/Compression Formats --- - - def test_detect_bzip2(self, client): - """Detect BZIP2 from magic bytes.""" - bz2_header = b"BZh" + b"\x00" * 50 - response = client.post("/", data=bz2_header) - data = json.loads(response.data) - assert data["mime_type"] == "application/x-bzip2" - - def test_detect_xz(self, client): - """Detect XZ from magic bytes.""" - xz_header = b"\xfd7zXZ\x00" + b"\x00" * 50 - response = client.post("/", data=xz_header) - data = json.loads(response.data) - assert data["mime_type"] == "application/x-xz" - - def test_detect_zstd(self, client): - """Detect ZSTD from magic bytes.""" - zstd_header = b"\x28\xb5\x2f\xfd" + b"\x00" * 50 - response = client.post("/", data=zstd_header) - data = json.loads(response.data) - assert data["mime_type"] == "application/zstd" - - def test_detect_lz4(self, client): - """Detect LZ4 from magic bytes.""" - lz4_header = b"\x04\x22\x4d\x18" + b"\x00" * 50 - response = client.post("/", data=lz4_header) - data = json.loads(response.data) - assert data["mime_type"] == "application/x-lz4" - - def test_detect_7z(self, client): - """Detect 7z from magic bytes.""" - sz_header = b"7z\xbc\xaf\x27\x1c" + b"\x00" * 50 - response = client.post("/", data=sz_header) - data = json.loads(response.data) - assert data["mime_type"] == "application/x-7z-compressed" - - def test_detect_rar(self, client): - """Detect RAR from magic bytes.""" - rar_header = b"Rar!\x1a\x07" + b"\x00" * 50 - response = client.post("/", data=rar_header) - data = json.loads(response.data) - assert data["mime_type"] == "application/vnd.rar" - - # --- Data Formats --- - - def test_detect_sqlite(self, client): - """Detect SQLite from magic bytes.""" - sqlite_header = b"SQLite format 3\x00" + b"\x00" * 50 - response = client.post("/", data=sqlite_header) - data = json.loads(response.data) - assert data["mime_type"] == "application/x-sqlite3" - - # --- Edge Cases --- - - def test_empty_content_rejected(self, client): - """Empty content is rejected (no empty pastes).""" - response = client.post("/", data=b"") - assert response.status_code == 400 - data = json.loads(response.data) - assert "error" in data - - def test_single_byte_content(self, client): - """Single byte content handles gracefully.""" - response = client.post("/", data=b"x") - data = json.loads(response.data) - assert data["mime_type"] == "text/plain" - - def test_short_binary_content(self, client): - """Short binary content (< magic length) handles gracefully.""" - response = client.post("/", data=b"\x89P") # Truncated PNG - data = json.loads(response.data) - assert data["mime_type"] == "application/octet-stream" - - def test_prefix_boundary_exact_match(self, client): - """Exact magic length content detects correctly.""" - # SQLite has longest signature at 16 bytes - sqlite_exact = b"SQLite format 3\x00" - assert len(sqlite_exact) == 16 - response = client.post("/", data=sqlite_exact) - data = json.loads(response.data) - assert data["mime_type"] == "application/x-sqlite3" - - def test_partial_magic_no_false_positive(self, client): - """Partial magic bytes don't cause false positives.""" - # b"SQLite form" is not a valid signature - partial = b"SQLite form" + b"\x00" * 50 - response = client.post("/", data=partial) - data = json.loads(response.data) - assert data["mime_type"] != "application/x-sqlite3" diff --git a/tests/test_polyglot.py.disabled b/tests/test_polyglot.py.disabled deleted file mode 100644 index b18f166..0000000 --- a/tests/test_polyglot.py.disabled +++ /dev/null @@ -1,146 +0,0 @@ -"""Tests for polyglot file MIME detection. - -Verifies that polyglot files (valid in multiple formats) are detected -by their primary magic bytes at offset 0, not by embedded payloads. -""" - -import json -import sys - -import pytest - -sys.path.insert(0, "tests/security") -from polyglot_generator import ( - generate_gif_js, - generate_pdf_js, - generate_png_html, - generate_polyglot, - generate_zip_html, -) - - -class TestPolyglotDetection: - """Verify polyglot files are detected by primary magic.""" - - def test_gif_js_detected_as_gif(self, client): - """GIF/JS polyglot should be detected as GIF.""" - content = generate_gif_js() - response = client.post("/", data=content) - if response.status_code == 201: - data = json.loads(response.data) - assert data["mime_type"] == "image/gif" - - def test_pdf_js_detected_as_pdf(self, client): - """PDF with JavaScript should be detected as PDF.""" - content = generate_pdf_js() - response = client.post("/", data=content) - if response.status_code == 201: - data = json.loads(response.data) - assert data["mime_type"] == "application/pdf" - - def test_zip_html_detected_as_zip(self, client): - """ZIP containing HTML should be detected as ZIP.""" - content = generate_zip_html() - response = client.post("/", data=content) - if response.status_code == 201: - data = json.loads(response.data) - assert data["mime_type"] == "application/zip" - - def test_png_html_detected_as_png(self, client): - """PNG with trailing HTML should be detected as PNG.""" - content = generate_png_html() - response = client.post("/", data=content) - if response.status_code == 201: - data = json.loads(response.data) - assert data["mime_type"] == "image/png" - - -class TestGenericPolyglots: - """Test generic primary:payload combinations.""" - - @pytest.mark.parametrize( - "primary,expected_mime", - [ - ("png", "image/png"), - ("gif", "image/gif"), - ("jpeg", "image/jpeg"), - ("pdf", "application/pdf"), - ("zip", "application/zip"), - ("gzip", "application/gzip"), - ("elf", "application/x-executable"), - ("pe", "application/x-msdownload"), - ], - ) - @pytest.mark.parametrize("payload", ["html", "js", "php", "shell"]) - def test_primary_format_wins(self, client, primary, expected_mime, payload): - """Primary format magic should determine MIME type, not payload.""" - content = generate_polyglot(primary, payload) - response = client.post("/", data=content, content_type="application/octet-stream") - if response.status_code == 201: - data = json.loads(response.data) - assert data["mime_type"] == expected_mime, ( - f"{primary}:{payload} detected as {data['mime_type']}, expected {expected_mime}" - ) - - -class TestSecurityHeaders: - """Verify security headers prevent polyglot execution.""" - - def test_nosniff_header_on_polyglot(self, client): - """X-Content-Type-Options: nosniff should be present.""" - content = generate_gif_js() - create = client.post("/", data=content) - if create.status_code == 201: - data = json.loads(create.data) - paste_id = data["id"] - raw = client.get(f"/{paste_id}/raw") - assert raw.headers.get("X-Content-Type-Options") == "nosniff" - - def test_csp_header_on_polyglot(self, client): - """CSP should prevent script execution.""" - content = generate_png_html() - create = client.post("/", data=content) - if create.status_code == 201: - data = json.loads(create.data) - paste_id = data["id"] - raw = client.get(f"/{paste_id}/raw") - csp = raw.headers.get("Content-Security-Policy", "") - assert "default-src 'none'" in csp - - def test_xframe_options_on_polyglot(self, client): - """X-Frame-Options should prevent framing.""" - content = generate_pdf_js() - create = client.post("/", data=content) - if create.status_code == 201: - data = json.loads(create.data) - paste_id = data["id"] - raw = client.get(f"/{paste_id}/raw") - assert raw.headers.get("X-Frame-Options") == "DENY" - - -class TestPayloadNotExecuted: - """Verify embedded payloads are returned literally.""" - - def test_html_payload_literal(self, client): - """HTML payload should be returned as-is, not rendered.""" - content = generate_polyglot("png", "html") - create = client.post("/", data=content) - if create.status_code == 201: - data = json.loads(create.data) - paste_id = data["id"] - raw = client.get(f"/{paste_id}/raw") - # Content should contain literal script tag - assert b"