"""Tests for MIME type detection.""" import json class TestMimeDetection: """Tests for automatic MIME type detection.""" def test_detect_png(self, client, png_bytes): """Detect PNG from magic bytes.""" response = client.post("/", data=png_bytes) data = json.loads(response.data) assert data["mime_type"] == "image/png" def test_detect_jpeg(self, client, jpeg_bytes): """Detect JPEG from magic bytes.""" response = client.post("/", data=jpeg_bytes) data = json.loads(response.data) assert data["mime_type"] == "image/jpeg" def test_detect_zip(self, client, zip_bytes): """Detect ZIP from magic bytes.""" response = client.post("/", data=zip_bytes) data = json.loads(response.data) assert data["mime_type"] == "application/zip" def test_detect_pdf(self, client, pdf_bytes): """Detect PDF from magic bytes.""" response = client.post("/", data=pdf_bytes) data = json.loads(response.data) assert data["mime_type"] == "application/pdf" def test_detect_gif87a(self, client): """Detect GIF87a from magic bytes.""" response = client.post("/", data=b"GIF87a" + b"\x00" * 10) data = json.loads(response.data) assert data["mime_type"] == "image/gif" def test_detect_gif89a(self, client): """Detect GIF89a from magic bytes.""" response = client.post("/", data=b"GIF89a" + b"\x00" * 10) data = json.loads(response.data) assert data["mime_type"] == "image/gif" def test_detect_gzip(self, client): """Detect GZIP from magic bytes.""" response = client.post("/", data=b"\x1f\x8b\x08" + b"\x00" * 10) data = json.loads(response.data) assert data["mime_type"] == "application/gzip" def test_detect_utf8_text(self, client): """UTF-8 text defaults to text/plain.""" response = client.post("/", data="Hello, world! 你好") data = json.loads(response.data) assert data["mime_type"] == "text/plain" def test_detect_binary_fallback(self, client): """Non-UTF8 binary without magic falls back to octet-stream.""" response = client.post("/", data=b"\x80\x81\x82\x83\x84") data = json.loads(response.data) assert data["mime_type"] == "application/octet-stream" def test_explicit_content_type_honored(self, client): """Explicit Content-Type is honored for non-generic types.""" response = client.post( "/", data="test", content_type="text/html", ) data = json.loads(response.data) assert data["mime_type"] == "text/html" def test_generic_content_type_overridden(self, client, png_bytes): """Generic Content-Type is overridden by magic detection.""" response = client.post( "/", data=png_bytes, content_type="application/octet-stream", ) data = json.loads(response.data) assert data["mime_type"] == "image/png" def test_webp_detection(self, client): """Detect WebP from RIFF...WEBP magic.""" webp_header = b"RIFF\x00\x00\x00\x00WEBP" response = client.post("/", data=webp_header + b"\x00" * 20) data = json.loads(response.data) assert data["mime_type"] == "image/webp" def test_riff_non_webp_not_detected(self, client): """RIFF without WEBP marker is not detected as WebP.""" riff_other = b"RIFF\x00\x00\x00\x00WAVE" response = client.post("/", data=riff_other + b"\x00" * 20) data = json.loads(response.data) assert data["mime_type"] != "image/webp" # --- Additional Image Formats --- def test_detect_bmp(self, client): """Detect BMP from magic bytes.""" bmp_header = b"BM" + b"\x00" * 50 response = client.post("/", data=bmp_header) data = json.loads(response.data) assert data["mime_type"] == "image/bmp" def test_detect_tiff_little_endian(self, client): """Detect little-endian TIFF from magic bytes.""" tiff_le = b"II\x2a\x00" + b"\x00" * 50 response = client.post("/", data=tiff_le) data = json.loads(response.data) assert data["mime_type"] == "image/tiff" def test_detect_tiff_big_endian(self, client): """Detect big-endian TIFF from magic bytes.""" tiff_be = b"MM\x00\x2a" + b"\x00" * 50 response = client.post("/", data=tiff_be) data = json.loads(response.data) assert data["mime_type"] == "image/tiff" def test_detect_ico(self, client): """Detect ICO from magic bytes.""" ico_header = b"\x00\x00\x01\x00" + b"\x00" * 50 response = client.post("/", data=ico_header) data = json.loads(response.data) assert data["mime_type"] == "image/x-icon" def test_detect_heic(self, client): """Detect HEIC from ftyp box with heic brand.""" # ftyp box: size (0x18) + "ftyp" + "heic" brand heic_header = b"\x00\x00\x00\x18\x66\x74\x79\x70\x68\x65\x69\x63" + b"\x00" * 50 response = client.post("/", data=heic_header) data = json.loads(response.data) assert data["mime_type"] == "image/heic" def test_detect_heif(self, client): """Detect HEIF from ftyp box with mif1 brand.""" # ftyp box: size (0x18) + "ftyp" + "mif1" brand heif_header = b"\x00\x00\x00\x18\x66\x74\x79\x70\x6d\x69\x66\x31" + b"\x00" * 50 response = client.post("/", data=heif_header) data = json.loads(response.data) assert data["mime_type"] == "image/heif" def test_detect_avif(self, client): """Detect AVIF from ftyp box with avif brand.""" # ftyp box: size (0x1c) + "ftyp" + "avif" brand avif_header = b"\x00\x00\x00\x1c\x66\x74\x79\x70\x61\x76\x69\x66" + b"\x00" * 50 response = client.post("/", data=avif_header) data = json.loads(response.data) assert data["mime_type"] == "image/avif" # --- Video Formats --- def test_detect_webm(self, client): """Detect WebM/Matroska from magic bytes.""" webm_header = b"\x1a\x45\xdf\xa3" + b"\x00" * 50 response = client.post("/", data=webm_header) data = json.loads(response.data) assert data["mime_type"] == "video/webm" def test_detect_flv(self, client): """Detect FLV from magic bytes.""" flv_header = b"FLV\x01" + b"\x00" * 50 response = client.post("/", data=flv_header) data = json.loads(response.data) assert data["mime_type"] == "video/x-flv" def test_detect_mp4_ftyp_1c(self, client): """Detect MP4 from ftyp box (0x1c variant).""" mp4_header = b"\x00\x00\x00\x1c\x66\x74\x79\x70" + b"\x00" * 50 response = client.post("/", data=mp4_header) data = json.loads(response.data) assert data["mime_type"] == "video/mp4" def test_detect_mp4_ftyp_20(self, client): """Detect MP4 from ftyp box (0x20 variant).""" mp4_header = b"\x00\x00\x00\x20\x66\x74\x79\x70" + b"\x00" * 50 response = client.post("/", data=mp4_header) data = json.loads(response.data) assert data["mime_type"] == "video/mp4" def test_detect_mp4_ftyp_18(self, client): """Detect MP4 from ftyp box (0x18 variant).""" mp4_header = b"\x00\x00\x00\x18\x66\x74\x79\x70" + b"\x00" * 50 response = client.post("/", data=mp4_header) data = json.loads(response.data) assert data["mime_type"] == "video/mp4" # --- Audio Formats --- def test_detect_mp3_id3(self, client): """Detect MP3 from ID3 tag.""" mp3_id3 = b"ID3" + b"\x00" * 50 response = client.post("/", data=mp3_id3) data = json.loads(response.data) assert data["mime_type"] == "audio/mpeg" def test_detect_mp3_frame_sync_fb(self, client): """Detect MP3 from frame sync (0xfffb).""" mp3_sync = b"\xff\xfb" + b"\x00" * 50 response = client.post("/", data=mp3_sync) data = json.loads(response.data) assert data["mime_type"] == "audio/mpeg" def test_detect_mp3_frame_sync_fa(self, client): """Detect MP3 from frame sync (0xfffa).""" mp3_sync = b"\xff\xfa" + b"\x00" * 50 response = client.post("/", data=mp3_sync) data = json.loads(response.data) assert data["mime_type"] == "audio/mpeg" def test_detect_mp3_frame_sync_f3(self, client): """Detect MP3 from frame sync (0xfff3).""" mp3_sync = b"\xff\xf3" + b"\x00" * 50 response = client.post("/", data=mp3_sync) data = json.loads(response.data) assert data["mime_type"] == "audio/mpeg" def test_detect_mp3_frame_sync_f2(self, client): """Detect MP3 from frame sync (0xfff2).""" mp3_sync = b"\xff\xf2" + b"\x00" * 50 response = client.post("/", data=mp3_sync) data = json.loads(response.data) assert data["mime_type"] == "audio/mpeg" def test_detect_flac(self, client): """Detect FLAC from magic bytes.""" flac_header = b"fLaC" + b"\x00" * 50 response = client.post("/", data=flac_header) data = json.loads(response.data) assert data["mime_type"] == "audio/flac" def test_detect_ogg(self, client): """Detect OGG from magic bytes.""" ogg_header = b"OggS" + b"\x00" * 50 response = client.post("/", data=ogg_header) data = json.loads(response.data) assert data["mime_type"] == "audio/ogg" # --- Document Formats --- def test_detect_ole_msoffice(self, client): """Detect MS Office OLE from magic bytes.""" ole_header = b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" + b"\x00" * 50 response = client.post("/", data=ole_header) data = json.loads(response.data) assert data["mime_type"] == "application/msword" # --- Executable Formats --- def test_detect_pe_exe(self, client): """Detect PE/EXE from MZ magic bytes.""" pe_header = b"MZ" + b"\x00" * 50 response = client.post("/", data=pe_header) data = json.loads(response.data) assert data["mime_type"] == "application/x-msdownload" def test_detect_elf(self, client): """Detect ELF from magic bytes.""" elf_header = b"\x7fELF" + b"\x00" * 50 response = client.post("/", data=elf_header) data = json.loads(response.data) assert data["mime_type"] == "application/x-executable" def test_detect_macho_32le(self, client): """Detect Mach-O 32-bit little-endian.""" macho_header = b"\xce\xfa\xed\xfe" + b"\x00" * 50 response = client.post("/", data=macho_header) data = json.loads(response.data) assert data["mime_type"] == "application/x-mach-binary" def test_detect_macho_32be(self, client): """Detect Mach-O 32-bit big-endian.""" macho_header = b"\xfe\xed\xfa\xce" + b"\x00" * 50 response = client.post("/", data=macho_header) data = json.loads(response.data) assert data["mime_type"] == "application/x-mach-binary" def test_detect_macho_64le(self, client): """Detect Mach-O 64-bit little-endian.""" macho_header = b"\xcf\xfa\xed\xfe" + b"\x00" * 50 response = client.post("/", data=macho_header) data = json.loads(response.data) assert data["mime_type"] == "application/x-mach-binary" def test_detect_macho_64be(self, client): """Detect Mach-O 64-bit big-endian.""" macho_header = b"\xfe\xed\xfa\xcf" + b"\x00" * 50 response = client.post("/", data=macho_header) data = json.loads(response.data) assert data["mime_type"] == "application/x-mach-binary" def test_detect_macho_fat(self, client): """Detect Mach-O fat/universal binary.""" macho_fat = b"\xca\xfe\xba\xbe" + b"\x00" * 50 response = client.post("/", data=macho_fat) data = json.loads(response.data) assert data["mime_type"] == "application/x-mach-binary" def test_detect_wasm(self, client): """Detect WebAssembly from magic bytes.""" wasm_header = b"\x00asm" + b"\x00" * 50 response = client.post("/", data=wasm_header) data = json.loads(response.data) assert data["mime_type"] == "application/wasm" # --- Archive/Compression Formats --- def test_detect_bzip2(self, client): """Detect BZIP2 from magic bytes.""" bz2_header = b"BZh" + b"\x00" * 50 response = client.post("/", data=bz2_header) data = json.loads(response.data) assert data["mime_type"] == "application/x-bzip2" def test_detect_xz(self, client): """Detect XZ from magic bytes.""" xz_header = b"\xfd7zXZ\x00" + b"\x00" * 50 response = client.post("/", data=xz_header) data = json.loads(response.data) assert data["mime_type"] == "application/x-xz" def test_detect_zstd(self, client): """Detect ZSTD from magic bytes.""" zstd_header = b"\x28\xb5\x2f\xfd" + b"\x00" * 50 response = client.post("/", data=zstd_header) data = json.loads(response.data) assert data["mime_type"] == "application/zstd" def test_detect_lz4(self, client): """Detect LZ4 from magic bytes.""" lz4_header = b"\x04\x22\x4d\x18" + b"\x00" * 50 response = client.post("/", data=lz4_header) data = json.loads(response.data) assert data["mime_type"] == "application/x-lz4" def test_detect_7z(self, client): """Detect 7z from magic bytes.""" sz_header = b"7z\xbc\xaf\x27\x1c" + b"\x00" * 50 response = client.post("/", data=sz_header) data = json.loads(response.data) assert data["mime_type"] == "application/x-7z-compressed" def test_detect_rar(self, client): """Detect RAR from magic bytes.""" rar_header = b"Rar!\x1a\x07" + b"\x00" * 50 response = client.post("/", data=rar_header) data = json.loads(response.data) assert data["mime_type"] == "application/vnd.rar" # --- Data Formats --- def test_detect_sqlite(self, client): """Detect SQLite from magic bytes.""" sqlite_header = b"SQLite format 3\x00" + b"\x00" * 50 response = client.post("/", data=sqlite_header) data = json.loads(response.data) assert data["mime_type"] == "application/x-sqlite3" # --- Edge Cases --- def test_empty_content_rejected(self, client): """Empty content is rejected (no empty pastes).""" response = client.post("/", data=b"") assert response.status_code == 400 data = json.loads(response.data) assert "error" in data def test_single_byte_content(self, client): """Single byte content handles gracefully.""" response = client.post("/", data=b"x") data = json.loads(response.data) assert data["mime_type"] == "text/plain" def test_short_binary_content(self, client): """Short binary content (< magic length) handles gracefully.""" response = client.post("/", data=b"\x89P") # Truncated PNG data = json.loads(response.data) assert data["mime_type"] == "application/octet-stream" def test_prefix_boundary_exact_match(self, client): """Exact magic length content detects correctly.""" # SQLite has longest signature at 16 bytes sqlite_exact = b"SQLite format 3\x00" assert len(sqlite_exact) == 16 response = client.post("/", data=sqlite_exact) data = json.loads(response.data) assert data["mime_type"] == "application/x-sqlite3" def test_partial_magic_no_false_positive(self, client): """Partial magic bytes don't cause false positives.""" # b"SQLite form" is not a valid signature partial = b"SQLite form" + b"\x00" * 50 response = client.post("/", data=partial) data = json.loads(response.data) assert data["mime_type"] != "application/x-sqlite3"