Files
flaskpaste/tests/test_mime_detection.py
Username 0496a39a91
Some checks failed
CI / Lint & Format (push) Failing after 16s
CI / Unit Tests (push) Has been skipped
CI / Memory Leak Check (push) Has been skipped
CI / SBOM Generation (push) Has been skipped
CI / Security Scan (push) Failing after 22s
CI / Security Tests (push) Has been skipped
add comprehensive MIME detection tests (50 tests)
Cover all 42 magic byte signatures:
- Images: BMP, TIFF, ICO
- Video: MP4, WebM, FLV
- Audio: MP3, FLAC, OGG
- Documents: MS Office OLE
- Executables: PE, ELF, Mach-O, WASM
- Archives: BZIP2, XZ, ZSTD, LZ4, 7z, RAR
- Data: SQLite
- Edge cases: empty, short, boundary tests

Also adds missing Mach-O 32-bit little-endian signature.
2025-12-25 20:36:49 +01:00

372 lines
15 KiB
Python

"""Tests for MIME type detection."""
import json
class TestMimeDetection:
"""Tests for automatic MIME type detection."""
def test_detect_png(self, client, png_bytes):
"""Detect PNG from magic bytes."""
response = client.post("/", data=png_bytes)
data = json.loads(response.data)
assert data["mime_type"] == "image/png"
def test_detect_jpeg(self, client, jpeg_bytes):
"""Detect JPEG from magic bytes."""
response = client.post("/", data=jpeg_bytes)
data = json.loads(response.data)
assert data["mime_type"] == "image/jpeg"
def test_detect_zip(self, client, zip_bytes):
"""Detect ZIP from magic bytes."""
response = client.post("/", data=zip_bytes)
data = json.loads(response.data)
assert data["mime_type"] == "application/zip"
def test_detect_pdf(self, client, pdf_bytes):
"""Detect PDF from magic bytes."""
response = client.post("/", data=pdf_bytes)
data = json.loads(response.data)
assert data["mime_type"] == "application/pdf"
def test_detect_gif87a(self, client):
"""Detect GIF87a from magic bytes."""
response = client.post("/", data=b"GIF87a" + b"\x00" * 10)
data = json.loads(response.data)
assert data["mime_type"] == "image/gif"
def test_detect_gif89a(self, client):
"""Detect GIF89a from magic bytes."""
response = client.post("/", data=b"GIF89a" + b"\x00" * 10)
data = json.loads(response.data)
assert data["mime_type"] == "image/gif"
def test_detect_gzip(self, client):
"""Detect GZIP from magic bytes."""
response = client.post("/", data=b"\x1f\x8b\x08" + b"\x00" * 10)
data = json.loads(response.data)
assert data["mime_type"] == "application/gzip"
def test_detect_utf8_text(self, client):
"""UTF-8 text defaults to text/plain."""
response = client.post("/", data="Hello, world! 你好")
data = json.loads(response.data)
assert data["mime_type"] == "text/plain"
def test_detect_binary_fallback(self, client):
"""Non-UTF8 binary without magic falls back to octet-stream."""
response = client.post("/", data=b"\x80\x81\x82\x83\x84")
data = json.loads(response.data)
assert data["mime_type"] == "application/octet-stream"
def test_explicit_content_type_honored(self, client):
"""Explicit Content-Type is honored for non-generic types."""
response = client.post(
"/",
data="<html><body>test</body></html>",
content_type="text/html",
)
data = json.loads(response.data)
assert data["mime_type"] == "text/html"
def test_generic_content_type_overridden(self, client, png_bytes):
"""Generic Content-Type is overridden by magic detection."""
response = client.post(
"/",
data=png_bytes,
content_type="application/octet-stream",
)
data = json.loads(response.data)
assert data["mime_type"] == "image/png"
def test_webp_detection(self, client):
"""Detect WebP from RIFF...WEBP magic."""
webp_header = b"RIFF\x00\x00\x00\x00WEBP"
response = client.post("/", data=webp_header + b"\x00" * 20)
data = json.loads(response.data)
assert data["mime_type"] == "image/webp"
def test_riff_non_webp_not_detected(self, client):
"""RIFF without WEBP marker is not detected as WebP."""
riff_other = b"RIFF\x00\x00\x00\x00WAVE"
response = client.post("/", data=riff_other + b"\x00" * 20)
data = json.loads(response.data)
assert data["mime_type"] != "image/webp"
# --- Additional Image Formats ---
def test_detect_bmp(self, client):
"""Detect BMP from magic bytes."""
bmp_header = b"BM" + b"\x00" * 50
response = client.post("/", data=bmp_header)
data = json.loads(response.data)
assert data["mime_type"] == "image/bmp"
def test_detect_tiff_little_endian(self, client):
"""Detect little-endian TIFF from magic bytes."""
tiff_le = b"II\x2a\x00" + b"\x00" * 50
response = client.post("/", data=tiff_le)
data = json.loads(response.data)
assert data["mime_type"] == "image/tiff"
def test_detect_tiff_big_endian(self, client):
"""Detect big-endian TIFF from magic bytes."""
tiff_be = b"MM\x00\x2a" + b"\x00" * 50
response = client.post("/", data=tiff_be)
data = json.loads(response.data)
assert data["mime_type"] == "image/tiff"
def test_detect_ico(self, client):
"""Detect ICO from magic bytes."""
ico_header = b"\x00\x00\x01\x00" + b"\x00" * 50
response = client.post("/", data=ico_header)
data = json.loads(response.data)
assert data["mime_type"] == "image/x-icon"
# --- Video Formats ---
def test_detect_webm(self, client):
"""Detect WebM/Matroska from magic bytes."""
webm_header = b"\x1a\x45\xdf\xa3" + b"\x00" * 50
response = client.post("/", data=webm_header)
data = json.loads(response.data)
assert data["mime_type"] == "video/webm"
def test_detect_flv(self, client):
"""Detect FLV from magic bytes."""
flv_header = b"FLV\x01" + b"\x00" * 50
response = client.post("/", data=flv_header)
data = json.loads(response.data)
assert data["mime_type"] == "video/x-flv"
def test_detect_mp4_ftyp_1c(self, client):
"""Detect MP4 from ftyp box (0x1c variant)."""
mp4_header = b"\x00\x00\x00\x1c\x66\x74\x79\x70" + b"\x00" * 50
response = client.post("/", data=mp4_header)
data = json.loads(response.data)
assert data["mime_type"] == "video/mp4"
def test_detect_mp4_ftyp_20(self, client):
"""Detect MP4 from ftyp box (0x20 variant)."""
mp4_header = b"\x00\x00\x00\x20\x66\x74\x79\x70" + b"\x00" * 50
response = client.post("/", data=mp4_header)
data = json.loads(response.data)
assert data["mime_type"] == "video/mp4"
def test_detect_mp4_ftyp_18(self, client):
"""Detect MP4 from ftyp box (0x18 variant)."""
mp4_header = b"\x00\x00\x00\x18\x66\x74\x79\x70" + b"\x00" * 50
response = client.post("/", data=mp4_header)
data = json.loads(response.data)
assert data["mime_type"] == "video/mp4"
# --- Audio Formats ---
def test_detect_mp3_id3(self, client):
"""Detect MP3 from ID3 tag."""
mp3_id3 = b"ID3" + b"\x00" * 50
response = client.post("/", data=mp3_id3)
data = json.loads(response.data)
assert data["mime_type"] == "audio/mpeg"
def test_detect_mp3_frame_sync_fb(self, client):
"""Detect MP3 from frame sync (0xfffb)."""
mp3_sync = b"\xff\xfb" + b"\x00" * 50
response = client.post("/", data=mp3_sync)
data = json.loads(response.data)
assert data["mime_type"] == "audio/mpeg"
def test_detect_mp3_frame_sync_fa(self, client):
"""Detect MP3 from frame sync (0xfffa)."""
mp3_sync = b"\xff\xfa" + b"\x00" * 50
response = client.post("/", data=mp3_sync)
data = json.loads(response.data)
assert data["mime_type"] == "audio/mpeg"
def test_detect_mp3_frame_sync_f3(self, client):
"""Detect MP3 from frame sync (0xfff3)."""
mp3_sync = b"\xff\xf3" + b"\x00" * 50
response = client.post("/", data=mp3_sync)
data = json.loads(response.data)
assert data["mime_type"] == "audio/mpeg"
def test_detect_mp3_frame_sync_f2(self, client):
"""Detect MP3 from frame sync (0xfff2)."""
mp3_sync = b"\xff\xf2" + b"\x00" * 50
response = client.post("/", data=mp3_sync)
data = json.loads(response.data)
assert data["mime_type"] == "audio/mpeg"
def test_detect_flac(self, client):
"""Detect FLAC from magic bytes."""
flac_header = b"fLaC" + b"\x00" * 50
response = client.post("/", data=flac_header)
data = json.loads(response.data)
assert data["mime_type"] == "audio/flac"
def test_detect_ogg(self, client):
"""Detect OGG from magic bytes."""
ogg_header = b"OggS" + b"\x00" * 50
response = client.post("/", data=ogg_header)
data = json.loads(response.data)
assert data["mime_type"] == "audio/ogg"
# --- Document Formats ---
def test_detect_ole_msoffice(self, client):
"""Detect MS Office OLE from magic bytes."""
ole_header = b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" + b"\x00" * 50
response = client.post("/", data=ole_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/msword"
# --- Executable Formats ---
def test_detect_pe_exe(self, client):
"""Detect PE/EXE from MZ magic bytes."""
pe_header = b"MZ" + b"\x00" * 50
response = client.post("/", data=pe_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-msdownload"
def test_detect_elf(self, client):
"""Detect ELF from magic bytes."""
elf_header = b"\x7fELF" + b"\x00" * 50
response = client.post("/", data=elf_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-executable"
def test_detect_macho_32le(self, client):
"""Detect Mach-O 32-bit little-endian."""
macho_header = b"\xce\xfa\xed\xfe" + b"\x00" * 50
response = client.post("/", data=macho_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-mach-binary"
def test_detect_macho_32be(self, client):
"""Detect Mach-O 32-bit big-endian."""
macho_header = b"\xfe\xed\xfa\xce" + b"\x00" * 50
response = client.post("/", data=macho_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-mach-binary"
def test_detect_macho_64le(self, client):
"""Detect Mach-O 64-bit little-endian."""
macho_header = b"\xcf\xfa\xed\xfe" + b"\x00" * 50
response = client.post("/", data=macho_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-mach-binary"
def test_detect_macho_64be(self, client):
"""Detect Mach-O 64-bit big-endian."""
macho_header = b"\xfe\xed\xfa\xcf" + b"\x00" * 50
response = client.post("/", data=macho_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-mach-binary"
def test_detect_macho_fat(self, client):
"""Detect Mach-O fat/universal binary."""
macho_fat = b"\xca\xfe\xba\xbe" + b"\x00" * 50
response = client.post("/", data=macho_fat)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-mach-binary"
def test_detect_wasm(self, client):
"""Detect WebAssembly from magic bytes."""
wasm_header = b"\x00asm" + b"\x00" * 50
response = client.post("/", data=wasm_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/wasm"
# --- Archive/Compression Formats ---
def test_detect_bzip2(self, client):
"""Detect BZIP2 from magic bytes."""
bz2_header = b"BZh" + b"\x00" * 50
response = client.post("/", data=bz2_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-bzip2"
def test_detect_xz(self, client):
"""Detect XZ from magic bytes."""
xz_header = b"\xfd7zXZ\x00" + b"\x00" * 50
response = client.post("/", data=xz_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-xz"
def test_detect_zstd(self, client):
"""Detect ZSTD from magic bytes."""
zstd_header = b"\x28\xb5\x2f\xfd" + b"\x00" * 50
response = client.post("/", data=zstd_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/zstd"
def test_detect_lz4(self, client):
"""Detect LZ4 from magic bytes."""
lz4_header = b"\x04\x22\x4d\x18" + b"\x00" * 50
response = client.post("/", data=lz4_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-lz4"
def test_detect_7z(self, client):
"""Detect 7z from magic bytes."""
sz_header = b"7z\xbc\xaf\x27\x1c" + b"\x00" * 50
response = client.post("/", data=sz_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-7z-compressed"
def test_detect_rar(self, client):
"""Detect RAR from magic bytes."""
rar_header = b"Rar!\x1a\x07" + b"\x00" * 50
response = client.post("/", data=rar_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/vnd.rar"
# --- Data Formats ---
def test_detect_sqlite(self, client):
"""Detect SQLite from magic bytes."""
sqlite_header = b"SQLite format 3\x00" + b"\x00" * 50
response = client.post("/", data=sqlite_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-sqlite3"
# --- Edge Cases ---
def test_empty_content_rejected(self, client):
"""Empty content is rejected (no empty pastes)."""
response = client.post("/", data=b"")
assert response.status_code == 400
data = json.loads(response.data)
assert "error" in data
def test_single_byte_content(self, client):
"""Single byte content handles gracefully."""
response = client.post("/", data=b"x")
data = json.loads(response.data)
assert data["mime_type"] == "text/plain"
def test_short_binary_content(self, client):
"""Short binary content (< magic length) handles gracefully."""
response = client.post("/", data=b"\x89P") # Truncated PNG
data = json.loads(response.data)
assert data["mime_type"] == "application/octet-stream"
def test_prefix_boundary_exact_match(self, client):
"""Exact magic length content detects correctly."""
# SQLite has longest signature at 16 bytes
sqlite_exact = b"SQLite format 3\x00"
assert len(sqlite_exact) == 16
response = client.post("/", data=sqlite_exact)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-sqlite3"
def test_partial_magic_no_false_positive(self, client):
"""Partial magic bytes don't cause false positives."""
# b"SQLite form" is not a valid signature
partial = b"SQLite form" + b"\x00" * 50
response = client.post("/", data=partial)
data = json.loads(response.data)
assert data["mime_type"] != "application/x-sqlite3"