add comprehensive MIME detection tests (50 tests)
Some checks failed
CI / Lint & Format (push) Failing after 16s
CI / Unit Tests (push) Has been skipped
CI / Memory Leak Check (push) Has been skipped
CI / SBOM Generation (push) Has been skipped
CI / Security Scan (push) Failing after 22s
CI / Security Tests (push) Has been skipped

Cover all 42 magic byte signatures:
- Images: BMP, TIFF, ICO
- Video: MP4, WebM, FLV
- Audio: MP3, FLAC, OGG
- Documents: MS Office OLE
- Executables: PE, ELF, Mach-O, WASM
- Archives: BZIP2, XZ, ZSTD, LZ4, 7z, RAR
- Data: SQLite
- Edge cases: empty, short, boundary tests

Also adds missing Mach-O 32-bit little-endian signature.
This commit is contained in:
Username
2025-12-25 20:36:49 +01:00
parent 11bb095ca6
commit 0496a39a91
4 changed files with 282 additions and 5 deletions

View File

@@ -93,3 +93,279 @@ class TestMimeDetection:
response = client.post("/", data=riff_other + b"\x00" * 20)
data = json.loads(response.data)
assert data["mime_type"] != "image/webp"
# --- Additional Image Formats ---
def test_detect_bmp(self, client):
"""Detect BMP from magic bytes."""
bmp_header = b"BM" + b"\x00" * 50
response = client.post("/", data=bmp_header)
data = json.loads(response.data)
assert data["mime_type"] == "image/bmp"
def test_detect_tiff_little_endian(self, client):
"""Detect little-endian TIFF from magic bytes."""
tiff_le = b"II\x2a\x00" + b"\x00" * 50
response = client.post("/", data=tiff_le)
data = json.loads(response.data)
assert data["mime_type"] == "image/tiff"
def test_detect_tiff_big_endian(self, client):
"""Detect big-endian TIFF from magic bytes."""
tiff_be = b"MM\x00\x2a" + b"\x00" * 50
response = client.post("/", data=tiff_be)
data = json.loads(response.data)
assert data["mime_type"] == "image/tiff"
def test_detect_ico(self, client):
"""Detect ICO from magic bytes."""
ico_header = b"\x00\x00\x01\x00" + b"\x00" * 50
response = client.post("/", data=ico_header)
data = json.loads(response.data)
assert data["mime_type"] == "image/x-icon"
# --- Video Formats ---
def test_detect_webm(self, client):
"""Detect WebM/Matroska from magic bytes."""
webm_header = b"\x1a\x45\xdf\xa3" + b"\x00" * 50
response = client.post("/", data=webm_header)
data = json.loads(response.data)
assert data["mime_type"] == "video/webm"
def test_detect_flv(self, client):
"""Detect FLV from magic bytes."""
flv_header = b"FLV\x01" + b"\x00" * 50
response = client.post("/", data=flv_header)
data = json.loads(response.data)
assert data["mime_type"] == "video/x-flv"
def test_detect_mp4_ftyp_1c(self, client):
"""Detect MP4 from ftyp box (0x1c variant)."""
mp4_header = b"\x00\x00\x00\x1c\x66\x74\x79\x70" + b"\x00" * 50
response = client.post("/", data=mp4_header)
data = json.loads(response.data)
assert data["mime_type"] == "video/mp4"
def test_detect_mp4_ftyp_20(self, client):
"""Detect MP4 from ftyp box (0x20 variant)."""
mp4_header = b"\x00\x00\x00\x20\x66\x74\x79\x70" + b"\x00" * 50
response = client.post("/", data=mp4_header)
data = json.loads(response.data)
assert data["mime_type"] == "video/mp4"
def test_detect_mp4_ftyp_18(self, client):
"""Detect MP4 from ftyp box (0x18 variant)."""
mp4_header = b"\x00\x00\x00\x18\x66\x74\x79\x70" + b"\x00" * 50
response = client.post("/", data=mp4_header)
data = json.loads(response.data)
assert data["mime_type"] == "video/mp4"
# --- Audio Formats ---
def test_detect_mp3_id3(self, client):
"""Detect MP3 from ID3 tag."""
mp3_id3 = b"ID3" + b"\x00" * 50
response = client.post("/", data=mp3_id3)
data = json.loads(response.data)
assert data["mime_type"] == "audio/mpeg"
def test_detect_mp3_frame_sync_fb(self, client):
"""Detect MP3 from frame sync (0xfffb)."""
mp3_sync = b"\xff\xfb" + b"\x00" * 50
response = client.post("/", data=mp3_sync)
data = json.loads(response.data)
assert data["mime_type"] == "audio/mpeg"
def test_detect_mp3_frame_sync_fa(self, client):
"""Detect MP3 from frame sync (0xfffa)."""
mp3_sync = b"\xff\xfa" + b"\x00" * 50
response = client.post("/", data=mp3_sync)
data = json.loads(response.data)
assert data["mime_type"] == "audio/mpeg"
def test_detect_mp3_frame_sync_f3(self, client):
"""Detect MP3 from frame sync (0xfff3)."""
mp3_sync = b"\xff\xf3" + b"\x00" * 50
response = client.post("/", data=mp3_sync)
data = json.loads(response.data)
assert data["mime_type"] == "audio/mpeg"
def test_detect_mp3_frame_sync_f2(self, client):
"""Detect MP3 from frame sync (0xfff2)."""
mp3_sync = b"\xff\xf2" + b"\x00" * 50
response = client.post("/", data=mp3_sync)
data = json.loads(response.data)
assert data["mime_type"] == "audio/mpeg"
def test_detect_flac(self, client):
"""Detect FLAC from magic bytes."""
flac_header = b"fLaC" + b"\x00" * 50
response = client.post("/", data=flac_header)
data = json.loads(response.data)
assert data["mime_type"] == "audio/flac"
def test_detect_ogg(self, client):
"""Detect OGG from magic bytes."""
ogg_header = b"OggS" + b"\x00" * 50
response = client.post("/", data=ogg_header)
data = json.loads(response.data)
assert data["mime_type"] == "audio/ogg"
# --- Document Formats ---
def test_detect_ole_msoffice(self, client):
"""Detect MS Office OLE from magic bytes."""
ole_header = b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" + b"\x00" * 50
response = client.post("/", data=ole_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/msword"
# --- Executable Formats ---
def test_detect_pe_exe(self, client):
"""Detect PE/EXE from MZ magic bytes."""
pe_header = b"MZ" + b"\x00" * 50
response = client.post("/", data=pe_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-msdownload"
def test_detect_elf(self, client):
"""Detect ELF from magic bytes."""
elf_header = b"\x7fELF" + b"\x00" * 50
response = client.post("/", data=elf_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-executable"
def test_detect_macho_32le(self, client):
"""Detect Mach-O 32-bit little-endian."""
macho_header = b"\xce\xfa\xed\xfe" + b"\x00" * 50
response = client.post("/", data=macho_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-mach-binary"
def test_detect_macho_32be(self, client):
"""Detect Mach-O 32-bit big-endian."""
macho_header = b"\xfe\xed\xfa\xce" + b"\x00" * 50
response = client.post("/", data=macho_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-mach-binary"
def test_detect_macho_64le(self, client):
"""Detect Mach-O 64-bit little-endian."""
macho_header = b"\xcf\xfa\xed\xfe" + b"\x00" * 50
response = client.post("/", data=macho_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-mach-binary"
def test_detect_macho_64be(self, client):
"""Detect Mach-O 64-bit big-endian."""
macho_header = b"\xfe\xed\xfa\xcf" + b"\x00" * 50
response = client.post("/", data=macho_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-mach-binary"
def test_detect_macho_fat(self, client):
"""Detect Mach-O fat/universal binary."""
macho_fat = b"\xca\xfe\xba\xbe" + b"\x00" * 50
response = client.post("/", data=macho_fat)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-mach-binary"
def test_detect_wasm(self, client):
"""Detect WebAssembly from magic bytes."""
wasm_header = b"\x00asm" + b"\x00" * 50
response = client.post("/", data=wasm_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/wasm"
# --- Archive/Compression Formats ---
def test_detect_bzip2(self, client):
"""Detect BZIP2 from magic bytes."""
bz2_header = b"BZh" + b"\x00" * 50
response = client.post("/", data=bz2_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-bzip2"
def test_detect_xz(self, client):
"""Detect XZ from magic bytes."""
xz_header = b"\xfd7zXZ\x00" + b"\x00" * 50
response = client.post("/", data=xz_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-xz"
def test_detect_zstd(self, client):
"""Detect ZSTD from magic bytes."""
zstd_header = b"\x28\xb5\x2f\xfd" + b"\x00" * 50
response = client.post("/", data=zstd_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/zstd"
def test_detect_lz4(self, client):
"""Detect LZ4 from magic bytes."""
lz4_header = b"\x04\x22\x4d\x18" + b"\x00" * 50
response = client.post("/", data=lz4_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-lz4"
def test_detect_7z(self, client):
"""Detect 7z from magic bytes."""
sz_header = b"7z\xbc\xaf\x27\x1c" + b"\x00" * 50
response = client.post("/", data=sz_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-7z-compressed"
def test_detect_rar(self, client):
"""Detect RAR from magic bytes."""
rar_header = b"Rar!\x1a\x07" + b"\x00" * 50
response = client.post("/", data=rar_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/vnd.rar"
# --- Data Formats ---
def test_detect_sqlite(self, client):
"""Detect SQLite from magic bytes."""
sqlite_header = b"SQLite format 3\x00" + b"\x00" * 50
response = client.post("/", data=sqlite_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-sqlite3"
# --- Edge Cases ---
def test_empty_content_rejected(self, client):
"""Empty content is rejected (no empty pastes)."""
response = client.post("/", data=b"")
assert response.status_code == 400
data = json.loads(response.data)
assert "error" in data
def test_single_byte_content(self, client):
"""Single byte content handles gracefully."""
response = client.post("/", data=b"x")
data = json.loads(response.data)
assert data["mime_type"] == "text/plain"
def test_short_binary_content(self, client):
"""Short binary content (< magic length) handles gracefully."""
response = client.post("/", data=b"\x89P") # Truncated PNG
data = json.loads(response.data)
assert data["mime_type"] == "application/octet-stream"
def test_prefix_boundary_exact_match(self, client):
"""Exact magic length content detects correctly."""
# SQLite has longest signature at 16 bytes
sqlite_exact = b"SQLite format 3\x00"
assert len(sqlite_exact) == 16
response = client.post("/", data=sqlite_exact)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-sqlite3"
def test_partial_magic_no_false_positive(self, client):
"""Partial magic bytes don't cause false positives."""
# b"SQLite form" is not a valid signature
partial = b"SQLite form" + b"\x00" * 50
response = client.post("/", data=partial)
data = json.loads(response.data)
assert data["mime_type"] != "application/x-sqlite3"