diff --git a/README.md b/README.md index f6c25b5..062a59e 100644 --- a/README.md +++ b/README.md @@ -386,7 +386,7 @@ flaskpaste/ │ └── api/ │ ├── __init__.py # Blueprint setup │ └── routes.py # API endpoints -├── tests/ # Test suite (337 tests) +├── tests/ # Test suite (356 tests) ├── data/ # SQLite database ├── run.py # Development server ├── wsgi.py # Production WSGI entry diff --git a/ROADMAP.md b/ROADMAP.md index a0bf38e..4df5d56 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -29,7 +29,7 @@ FlaskPaste v1.5.0 is deployed with comprehensive security hardening and abuse pr - CLI with list, search, update, export commands - Public certificate registration (PoW-protected) - CLI register command for certificate enrollment -- Comprehensive test suite (337 tests) +- Comprehensive test suite (356 tests) - Complete security pentest remediation (15 items) - PKI audit logging (certificate lifecycle events) - Request duration metrics (Prometheus histogram) diff --git a/app/api/routes.py b/app/api/routes.py index daaeff2..306cf52 100644 --- a/app/api/routes.py +++ b/app/api/routes.py @@ -70,9 +70,10 @@ MAGIC_SIGNATURES: dict[bytes, str] = { # Executables b"MZ": "application/x-msdownload", # EXE, DLL b"\x7fELF": "application/x-executable", # ELF (Linux) - b"\xfe\xed\xfa\xce": "application/x-mach-binary", # Mach-O 32-bit - b"\xfe\xed\xfa\xcf": "application/x-mach-binary", # Mach-O 64-bit - b"\xcf\xfa\xed\xfe": "application/x-mach-binary", # Mach-O 64-bit (reversed) + b"\xfe\xed\xfa\xce": "application/x-mach-binary", # Mach-O 32-bit big-endian + b"\xce\xfa\xed\xfe": "application/x-mach-binary", # Mach-O 32-bit little-endian + b"\xfe\xed\xfa\xcf": "application/x-mach-binary", # Mach-O 64-bit big-endian + b"\xcf\xfa\xed\xfe": "application/x-mach-binary", # Mach-O 64-bit little-endian b"\xca\xfe\xba\xbe": "application/x-mach-binary", # Mach-O fat/universal binary b"\x00asm": "application/wasm", # WebAssembly # Compression/Archives diff --git a/tests/test_mime_detection.py b/tests/test_mime_detection.py index e2c43ef..498e015 100644 --- a/tests/test_mime_detection.py +++ b/tests/test_mime_detection.py @@ -93,3 +93,279 @@ class TestMimeDetection: response = client.post("/", data=riff_other + b"\x00" * 20) data = json.loads(response.data) assert data["mime_type"] != "image/webp" + + # --- Additional Image Formats --- + + def test_detect_bmp(self, client): + """Detect BMP from magic bytes.""" + bmp_header = b"BM" + b"\x00" * 50 + response = client.post("/", data=bmp_header) + data = json.loads(response.data) + assert data["mime_type"] == "image/bmp" + + def test_detect_tiff_little_endian(self, client): + """Detect little-endian TIFF from magic bytes.""" + tiff_le = b"II\x2a\x00" + b"\x00" * 50 + response = client.post("/", data=tiff_le) + data = json.loads(response.data) + assert data["mime_type"] == "image/tiff" + + def test_detect_tiff_big_endian(self, client): + """Detect big-endian TIFF from magic bytes.""" + tiff_be = b"MM\x00\x2a" + b"\x00" * 50 + response = client.post("/", data=tiff_be) + data = json.loads(response.data) + assert data["mime_type"] == "image/tiff" + + def test_detect_ico(self, client): + """Detect ICO from magic bytes.""" + ico_header = b"\x00\x00\x01\x00" + b"\x00" * 50 + response = client.post("/", data=ico_header) + data = json.loads(response.data) + assert data["mime_type"] == "image/x-icon" + + # --- Video Formats --- + + def test_detect_webm(self, client): + """Detect WebM/Matroska from magic bytes.""" + webm_header = b"\x1a\x45\xdf\xa3" + b"\x00" * 50 + response = client.post("/", data=webm_header) + data = json.loads(response.data) + assert data["mime_type"] == "video/webm" + + def test_detect_flv(self, client): + """Detect FLV from magic bytes.""" + flv_header = b"FLV\x01" + b"\x00" * 50 + response = client.post("/", data=flv_header) + data = json.loads(response.data) + assert data["mime_type"] == "video/x-flv" + + def test_detect_mp4_ftyp_1c(self, client): + """Detect MP4 from ftyp box (0x1c variant).""" + mp4_header = b"\x00\x00\x00\x1c\x66\x74\x79\x70" + b"\x00" * 50 + response = client.post("/", data=mp4_header) + data = json.loads(response.data) + assert data["mime_type"] == "video/mp4" + + def test_detect_mp4_ftyp_20(self, client): + """Detect MP4 from ftyp box (0x20 variant).""" + mp4_header = b"\x00\x00\x00\x20\x66\x74\x79\x70" + b"\x00" * 50 + response = client.post("/", data=mp4_header) + data = json.loads(response.data) + assert data["mime_type"] == "video/mp4" + + def test_detect_mp4_ftyp_18(self, client): + """Detect MP4 from ftyp box (0x18 variant).""" + mp4_header = b"\x00\x00\x00\x18\x66\x74\x79\x70" + b"\x00" * 50 + response = client.post("/", data=mp4_header) + data = json.loads(response.data) + assert data["mime_type"] == "video/mp4" + + # --- Audio Formats --- + + def test_detect_mp3_id3(self, client): + """Detect MP3 from ID3 tag.""" + mp3_id3 = b"ID3" + b"\x00" * 50 + response = client.post("/", data=mp3_id3) + data = json.loads(response.data) + assert data["mime_type"] == "audio/mpeg" + + def test_detect_mp3_frame_sync_fb(self, client): + """Detect MP3 from frame sync (0xfffb).""" + mp3_sync = b"\xff\xfb" + b"\x00" * 50 + response = client.post("/", data=mp3_sync) + data = json.loads(response.data) + assert data["mime_type"] == "audio/mpeg" + + def test_detect_mp3_frame_sync_fa(self, client): + """Detect MP3 from frame sync (0xfffa).""" + mp3_sync = b"\xff\xfa" + b"\x00" * 50 + response = client.post("/", data=mp3_sync) + data = json.loads(response.data) + assert data["mime_type"] == "audio/mpeg" + + def test_detect_mp3_frame_sync_f3(self, client): + """Detect MP3 from frame sync (0xfff3).""" + mp3_sync = b"\xff\xf3" + b"\x00" * 50 + response = client.post("/", data=mp3_sync) + data = json.loads(response.data) + assert data["mime_type"] == "audio/mpeg" + + def test_detect_mp3_frame_sync_f2(self, client): + """Detect MP3 from frame sync (0xfff2).""" + mp3_sync = b"\xff\xf2" + b"\x00" * 50 + response = client.post("/", data=mp3_sync) + data = json.loads(response.data) + assert data["mime_type"] == "audio/mpeg" + + def test_detect_flac(self, client): + """Detect FLAC from magic bytes.""" + flac_header = b"fLaC" + b"\x00" * 50 + response = client.post("/", data=flac_header) + data = json.loads(response.data) + assert data["mime_type"] == "audio/flac" + + def test_detect_ogg(self, client): + """Detect OGG from magic bytes.""" + ogg_header = b"OggS" + b"\x00" * 50 + response = client.post("/", data=ogg_header) + data = json.loads(response.data) + assert data["mime_type"] == "audio/ogg" + + # --- Document Formats --- + + def test_detect_ole_msoffice(self, client): + """Detect MS Office OLE from magic bytes.""" + ole_header = b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" + b"\x00" * 50 + response = client.post("/", data=ole_header) + data = json.loads(response.data) + assert data["mime_type"] == "application/msword" + + # --- Executable Formats --- + + def test_detect_pe_exe(self, client): + """Detect PE/EXE from MZ magic bytes.""" + pe_header = b"MZ" + b"\x00" * 50 + response = client.post("/", data=pe_header) + data = json.loads(response.data) + assert data["mime_type"] == "application/x-msdownload" + + def test_detect_elf(self, client): + """Detect ELF from magic bytes.""" + elf_header = b"\x7fELF" + b"\x00" * 50 + response = client.post("/", data=elf_header) + data = json.loads(response.data) + assert data["mime_type"] == "application/x-executable" + + def test_detect_macho_32le(self, client): + """Detect Mach-O 32-bit little-endian.""" + macho_header = b"\xce\xfa\xed\xfe" + b"\x00" * 50 + response = client.post("/", data=macho_header) + data = json.loads(response.data) + assert data["mime_type"] == "application/x-mach-binary" + + def test_detect_macho_32be(self, client): + """Detect Mach-O 32-bit big-endian.""" + macho_header = b"\xfe\xed\xfa\xce" + b"\x00" * 50 + response = client.post("/", data=macho_header) + data = json.loads(response.data) + assert data["mime_type"] == "application/x-mach-binary" + + def test_detect_macho_64le(self, client): + """Detect Mach-O 64-bit little-endian.""" + macho_header = b"\xcf\xfa\xed\xfe" + b"\x00" * 50 + response = client.post("/", data=macho_header) + data = json.loads(response.data) + assert data["mime_type"] == "application/x-mach-binary" + + def test_detect_macho_64be(self, client): + """Detect Mach-O 64-bit big-endian.""" + macho_header = b"\xfe\xed\xfa\xcf" + b"\x00" * 50 + response = client.post("/", data=macho_header) + data = json.loads(response.data) + assert data["mime_type"] == "application/x-mach-binary" + + def test_detect_macho_fat(self, client): + """Detect Mach-O fat/universal binary.""" + macho_fat = b"\xca\xfe\xba\xbe" + b"\x00" * 50 + response = client.post("/", data=macho_fat) + data = json.loads(response.data) + assert data["mime_type"] == "application/x-mach-binary" + + def test_detect_wasm(self, client): + """Detect WebAssembly from magic bytes.""" + wasm_header = b"\x00asm" + b"\x00" * 50 + response = client.post("/", data=wasm_header) + data = json.loads(response.data) + assert data["mime_type"] == "application/wasm" + + # --- Archive/Compression Formats --- + + def test_detect_bzip2(self, client): + """Detect BZIP2 from magic bytes.""" + bz2_header = b"BZh" + b"\x00" * 50 + response = client.post("/", data=bz2_header) + data = json.loads(response.data) + assert data["mime_type"] == "application/x-bzip2" + + def test_detect_xz(self, client): + """Detect XZ from magic bytes.""" + xz_header = b"\xfd7zXZ\x00" + b"\x00" * 50 + response = client.post("/", data=xz_header) + data = json.loads(response.data) + assert data["mime_type"] == "application/x-xz" + + def test_detect_zstd(self, client): + """Detect ZSTD from magic bytes.""" + zstd_header = b"\x28\xb5\x2f\xfd" + b"\x00" * 50 + response = client.post("/", data=zstd_header) + data = json.loads(response.data) + assert data["mime_type"] == "application/zstd" + + def test_detect_lz4(self, client): + """Detect LZ4 from magic bytes.""" + lz4_header = b"\x04\x22\x4d\x18" + b"\x00" * 50 + response = client.post("/", data=lz4_header) + data = json.loads(response.data) + assert data["mime_type"] == "application/x-lz4" + + def test_detect_7z(self, client): + """Detect 7z from magic bytes.""" + sz_header = b"7z\xbc\xaf\x27\x1c" + b"\x00" * 50 + response = client.post("/", data=sz_header) + data = json.loads(response.data) + assert data["mime_type"] == "application/x-7z-compressed" + + def test_detect_rar(self, client): + """Detect RAR from magic bytes.""" + rar_header = b"Rar!\x1a\x07" + b"\x00" * 50 + response = client.post("/", data=rar_header) + data = json.loads(response.data) + assert data["mime_type"] == "application/vnd.rar" + + # --- Data Formats --- + + def test_detect_sqlite(self, client): + """Detect SQLite from magic bytes.""" + sqlite_header = b"SQLite format 3\x00" + b"\x00" * 50 + response = client.post("/", data=sqlite_header) + data = json.loads(response.data) + assert data["mime_type"] == "application/x-sqlite3" + + # --- Edge Cases --- + + def test_empty_content_rejected(self, client): + """Empty content is rejected (no empty pastes).""" + response = client.post("/", data=b"") + assert response.status_code == 400 + data = json.loads(response.data) + assert "error" in data + + def test_single_byte_content(self, client): + """Single byte content handles gracefully.""" + response = client.post("/", data=b"x") + data = json.loads(response.data) + assert data["mime_type"] == "text/plain" + + def test_short_binary_content(self, client): + """Short binary content (< magic length) handles gracefully.""" + response = client.post("/", data=b"\x89P") # Truncated PNG + data = json.loads(response.data) + assert data["mime_type"] == "application/octet-stream" + + def test_prefix_boundary_exact_match(self, client): + """Exact magic length content detects correctly.""" + # SQLite has longest signature at 16 bytes + sqlite_exact = b"SQLite format 3\x00" + assert len(sqlite_exact) == 16 + response = client.post("/", data=sqlite_exact) + data = json.loads(response.data) + assert data["mime_type"] == "application/x-sqlite3" + + def test_partial_magic_no_false_positive(self, client): + """Partial magic bytes don't cause false positives.""" + # b"SQLite form" is not a valid signature + partial = b"SQLite form" + b"\x00" * 50 + response = client.post("/", data=partial) + data = json.loads(response.data) + assert data["mime_type"] != "application/x-sqlite3"