Some checks failed
CI / Lint & Format (push) Failing after 16s
CI / Unit Tests (push) Has been skipped
CI / Memory Leak Check (push) Has been skipped
CI / SBOM Generation (push) Has been skipped
CI / Security Scan (push) Successful in 20s
CI / Security Tests (push) Has been skipped
CI / Advanced Security Tests (push) Has been skipped
- Add ftyp box signatures for heic, mif1, and avif brands - Add tests for new image formats - Fix nested if lint warning in lookup rate limit - Update security docs: MKV uses WebM header, TAR needs offset 257
396 lines
16 KiB
Python
396 lines
16 KiB
Python
"""Tests for MIME type detection."""
|
|
|
|
import json
|
|
|
|
|
|
class TestMimeDetection:
|
|
"""Tests for automatic MIME type detection."""
|
|
|
|
def test_detect_png(self, client, png_bytes):
|
|
"""Detect PNG from magic bytes."""
|
|
response = client.post("/", data=png_bytes)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "image/png"
|
|
|
|
def test_detect_jpeg(self, client, jpeg_bytes):
|
|
"""Detect JPEG from magic bytes."""
|
|
response = client.post("/", data=jpeg_bytes)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "image/jpeg"
|
|
|
|
def test_detect_zip(self, client, zip_bytes):
|
|
"""Detect ZIP from magic bytes."""
|
|
response = client.post("/", data=zip_bytes)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/zip"
|
|
|
|
def test_detect_pdf(self, client, pdf_bytes):
|
|
"""Detect PDF from magic bytes."""
|
|
response = client.post("/", data=pdf_bytes)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/pdf"
|
|
|
|
def test_detect_gif87a(self, client):
|
|
"""Detect GIF87a from magic bytes."""
|
|
response = client.post("/", data=b"GIF87a" + b"\x00" * 10)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "image/gif"
|
|
|
|
def test_detect_gif89a(self, client):
|
|
"""Detect GIF89a from magic bytes."""
|
|
response = client.post("/", data=b"GIF89a" + b"\x00" * 10)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "image/gif"
|
|
|
|
def test_detect_gzip(self, client):
|
|
"""Detect GZIP from magic bytes."""
|
|
response = client.post("/", data=b"\x1f\x8b\x08" + b"\x00" * 10)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/gzip"
|
|
|
|
def test_detect_utf8_text(self, client):
|
|
"""UTF-8 text defaults to text/plain."""
|
|
response = client.post("/", data="Hello, world! 你好")
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "text/plain"
|
|
|
|
def test_detect_binary_fallback(self, client):
|
|
"""Non-UTF8 binary without magic falls back to octet-stream."""
|
|
response = client.post("/", data=b"\x80\x81\x82\x83\x84")
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/octet-stream"
|
|
|
|
def test_explicit_content_type_honored(self, client):
|
|
"""Explicit Content-Type is honored for non-generic types."""
|
|
response = client.post(
|
|
"/",
|
|
data="<html><body>test</body></html>",
|
|
content_type="text/html",
|
|
)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "text/html"
|
|
|
|
def test_generic_content_type_overridden(self, client, png_bytes):
|
|
"""Generic Content-Type is overridden by magic detection."""
|
|
response = client.post(
|
|
"/",
|
|
data=png_bytes,
|
|
content_type="application/octet-stream",
|
|
)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "image/png"
|
|
|
|
def test_webp_detection(self, client):
|
|
"""Detect WebP from RIFF...WEBP magic."""
|
|
webp_header = b"RIFF\x00\x00\x00\x00WEBP"
|
|
response = client.post("/", data=webp_header + b"\x00" * 20)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "image/webp"
|
|
|
|
def test_riff_non_webp_not_detected(self, client):
|
|
"""RIFF without WEBP marker is not detected as WebP."""
|
|
riff_other = b"RIFF\x00\x00\x00\x00WAVE"
|
|
response = client.post("/", data=riff_other + b"\x00" * 20)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] != "image/webp"
|
|
|
|
# --- Additional Image Formats ---
|
|
|
|
def test_detect_bmp(self, client):
|
|
"""Detect BMP from magic bytes."""
|
|
bmp_header = b"BM" + b"\x00" * 50
|
|
response = client.post("/", data=bmp_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "image/bmp"
|
|
|
|
def test_detect_tiff_little_endian(self, client):
|
|
"""Detect little-endian TIFF from magic bytes."""
|
|
tiff_le = b"II\x2a\x00" + b"\x00" * 50
|
|
response = client.post("/", data=tiff_le)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "image/tiff"
|
|
|
|
def test_detect_tiff_big_endian(self, client):
|
|
"""Detect big-endian TIFF from magic bytes."""
|
|
tiff_be = b"MM\x00\x2a" + b"\x00" * 50
|
|
response = client.post("/", data=tiff_be)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "image/tiff"
|
|
|
|
def test_detect_ico(self, client):
|
|
"""Detect ICO from magic bytes."""
|
|
ico_header = b"\x00\x00\x01\x00" + b"\x00" * 50
|
|
response = client.post("/", data=ico_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "image/x-icon"
|
|
|
|
def test_detect_heic(self, client):
|
|
"""Detect HEIC from ftyp box with heic brand."""
|
|
# ftyp box: size (0x18) + "ftyp" + "heic" brand
|
|
heic_header = b"\x00\x00\x00\x18\x66\x74\x79\x70\x68\x65\x69\x63" + b"\x00" * 50
|
|
response = client.post("/", data=heic_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "image/heic"
|
|
|
|
def test_detect_heif(self, client):
|
|
"""Detect HEIF from ftyp box with mif1 brand."""
|
|
# ftyp box: size (0x18) + "ftyp" + "mif1" brand
|
|
heif_header = b"\x00\x00\x00\x18\x66\x74\x79\x70\x6d\x69\x66\x31" + b"\x00" * 50
|
|
response = client.post("/", data=heif_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "image/heif"
|
|
|
|
def test_detect_avif(self, client):
|
|
"""Detect AVIF from ftyp box with avif brand."""
|
|
# ftyp box: size (0x1c) + "ftyp" + "avif" brand
|
|
avif_header = b"\x00\x00\x00\x1c\x66\x74\x79\x70\x61\x76\x69\x66" + b"\x00" * 50
|
|
response = client.post("/", data=avif_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "image/avif"
|
|
|
|
# --- Video Formats ---
|
|
|
|
def test_detect_webm(self, client):
|
|
"""Detect WebM/Matroska from magic bytes."""
|
|
webm_header = b"\x1a\x45\xdf\xa3" + b"\x00" * 50
|
|
response = client.post("/", data=webm_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "video/webm"
|
|
|
|
def test_detect_flv(self, client):
|
|
"""Detect FLV from magic bytes."""
|
|
flv_header = b"FLV\x01" + b"\x00" * 50
|
|
response = client.post("/", data=flv_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "video/x-flv"
|
|
|
|
def test_detect_mp4_ftyp_1c(self, client):
|
|
"""Detect MP4 from ftyp box (0x1c variant)."""
|
|
mp4_header = b"\x00\x00\x00\x1c\x66\x74\x79\x70" + b"\x00" * 50
|
|
response = client.post("/", data=mp4_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "video/mp4"
|
|
|
|
def test_detect_mp4_ftyp_20(self, client):
|
|
"""Detect MP4 from ftyp box (0x20 variant)."""
|
|
mp4_header = b"\x00\x00\x00\x20\x66\x74\x79\x70" + b"\x00" * 50
|
|
response = client.post("/", data=mp4_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "video/mp4"
|
|
|
|
def test_detect_mp4_ftyp_18(self, client):
|
|
"""Detect MP4 from ftyp box (0x18 variant)."""
|
|
mp4_header = b"\x00\x00\x00\x18\x66\x74\x79\x70" + b"\x00" * 50
|
|
response = client.post("/", data=mp4_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "video/mp4"
|
|
|
|
# --- Audio Formats ---
|
|
|
|
def test_detect_mp3_id3(self, client):
|
|
"""Detect MP3 from ID3 tag."""
|
|
mp3_id3 = b"ID3" + b"\x00" * 50
|
|
response = client.post("/", data=mp3_id3)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "audio/mpeg"
|
|
|
|
def test_detect_mp3_frame_sync_fb(self, client):
|
|
"""Detect MP3 from frame sync (0xfffb)."""
|
|
mp3_sync = b"\xff\xfb" + b"\x00" * 50
|
|
response = client.post("/", data=mp3_sync)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "audio/mpeg"
|
|
|
|
def test_detect_mp3_frame_sync_fa(self, client):
|
|
"""Detect MP3 from frame sync (0xfffa)."""
|
|
mp3_sync = b"\xff\xfa" + b"\x00" * 50
|
|
response = client.post("/", data=mp3_sync)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "audio/mpeg"
|
|
|
|
def test_detect_mp3_frame_sync_f3(self, client):
|
|
"""Detect MP3 from frame sync (0xfff3)."""
|
|
mp3_sync = b"\xff\xf3" + b"\x00" * 50
|
|
response = client.post("/", data=mp3_sync)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "audio/mpeg"
|
|
|
|
def test_detect_mp3_frame_sync_f2(self, client):
|
|
"""Detect MP3 from frame sync (0xfff2)."""
|
|
mp3_sync = b"\xff\xf2" + b"\x00" * 50
|
|
response = client.post("/", data=mp3_sync)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "audio/mpeg"
|
|
|
|
def test_detect_flac(self, client):
|
|
"""Detect FLAC from magic bytes."""
|
|
flac_header = b"fLaC" + b"\x00" * 50
|
|
response = client.post("/", data=flac_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "audio/flac"
|
|
|
|
def test_detect_ogg(self, client):
|
|
"""Detect OGG from magic bytes."""
|
|
ogg_header = b"OggS" + b"\x00" * 50
|
|
response = client.post("/", data=ogg_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "audio/ogg"
|
|
|
|
# --- Document Formats ---
|
|
|
|
def test_detect_ole_msoffice(self, client):
|
|
"""Detect MS Office OLE from magic bytes."""
|
|
ole_header = b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" + b"\x00" * 50
|
|
response = client.post("/", data=ole_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/msword"
|
|
|
|
# --- Executable Formats ---
|
|
|
|
def test_detect_pe_exe(self, client):
|
|
"""Detect PE/EXE from MZ magic bytes."""
|
|
pe_header = b"MZ" + b"\x00" * 50
|
|
response = client.post("/", data=pe_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/x-msdownload"
|
|
|
|
def test_detect_elf(self, client):
|
|
"""Detect ELF from magic bytes."""
|
|
elf_header = b"\x7fELF" + b"\x00" * 50
|
|
response = client.post("/", data=elf_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/x-executable"
|
|
|
|
def test_detect_macho_32le(self, client):
|
|
"""Detect Mach-O 32-bit little-endian."""
|
|
macho_header = b"\xce\xfa\xed\xfe" + b"\x00" * 50
|
|
response = client.post("/", data=macho_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/x-mach-binary"
|
|
|
|
def test_detect_macho_32be(self, client):
|
|
"""Detect Mach-O 32-bit big-endian."""
|
|
macho_header = b"\xfe\xed\xfa\xce" + b"\x00" * 50
|
|
response = client.post("/", data=macho_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/x-mach-binary"
|
|
|
|
def test_detect_macho_64le(self, client):
|
|
"""Detect Mach-O 64-bit little-endian."""
|
|
macho_header = b"\xcf\xfa\xed\xfe" + b"\x00" * 50
|
|
response = client.post("/", data=macho_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/x-mach-binary"
|
|
|
|
def test_detect_macho_64be(self, client):
|
|
"""Detect Mach-O 64-bit big-endian."""
|
|
macho_header = b"\xfe\xed\xfa\xcf" + b"\x00" * 50
|
|
response = client.post("/", data=macho_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/x-mach-binary"
|
|
|
|
def test_detect_macho_fat(self, client):
|
|
"""Detect Mach-O fat/universal binary."""
|
|
macho_fat = b"\xca\xfe\xba\xbe" + b"\x00" * 50
|
|
response = client.post("/", data=macho_fat)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/x-mach-binary"
|
|
|
|
def test_detect_wasm(self, client):
|
|
"""Detect WebAssembly from magic bytes."""
|
|
wasm_header = b"\x00asm" + b"\x00" * 50
|
|
response = client.post("/", data=wasm_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/wasm"
|
|
|
|
# --- Archive/Compression Formats ---
|
|
|
|
def test_detect_bzip2(self, client):
|
|
"""Detect BZIP2 from magic bytes."""
|
|
bz2_header = b"BZh" + b"\x00" * 50
|
|
response = client.post("/", data=bz2_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/x-bzip2"
|
|
|
|
def test_detect_xz(self, client):
|
|
"""Detect XZ from magic bytes."""
|
|
xz_header = b"\xfd7zXZ\x00" + b"\x00" * 50
|
|
response = client.post("/", data=xz_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/x-xz"
|
|
|
|
def test_detect_zstd(self, client):
|
|
"""Detect ZSTD from magic bytes."""
|
|
zstd_header = b"\x28\xb5\x2f\xfd" + b"\x00" * 50
|
|
response = client.post("/", data=zstd_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/zstd"
|
|
|
|
def test_detect_lz4(self, client):
|
|
"""Detect LZ4 from magic bytes."""
|
|
lz4_header = b"\x04\x22\x4d\x18" + b"\x00" * 50
|
|
response = client.post("/", data=lz4_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/x-lz4"
|
|
|
|
def test_detect_7z(self, client):
|
|
"""Detect 7z from magic bytes."""
|
|
sz_header = b"7z\xbc\xaf\x27\x1c" + b"\x00" * 50
|
|
response = client.post("/", data=sz_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/x-7z-compressed"
|
|
|
|
def test_detect_rar(self, client):
|
|
"""Detect RAR from magic bytes."""
|
|
rar_header = b"Rar!\x1a\x07" + b"\x00" * 50
|
|
response = client.post("/", data=rar_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/vnd.rar"
|
|
|
|
# --- Data Formats ---
|
|
|
|
def test_detect_sqlite(self, client):
|
|
"""Detect SQLite from magic bytes."""
|
|
sqlite_header = b"SQLite format 3\x00" + b"\x00" * 50
|
|
response = client.post("/", data=sqlite_header)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/x-sqlite3"
|
|
|
|
# --- Edge Cases ---
|
|
|
|
def test_empty_content_rejected(self, client):
|
|
"""Empty content is rejected (no empty pastes)."""
|
|
response = client.post("/", data=b"")
|
|
assert response.status_code == 400
|
|
data = json.loads(response.data)
|
|
assert "error" in data
|
|
|
|
def test_single_byte_content(self, client):
|
|
"""Single byte content handles gracefully."""
|
|
response = client.post("/", data=b"x")
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "text/plain"
|
|
|
|
def test_short_binary_content(self, client):
|
|
"""Short binary content (< magic length) handles gracefully."""
|
|
response = client.post("/", data=b"\x89P") # Truncated PNG
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/octet-stream"
|
|
|
|
def test_prefix_boundary_exact_match(self, client):
|
|
"""Exact magic length content detects correctly."""
|
|
# SQLite has longest signature at 16 bytes
|
|
sqlite_exact = b"SQLite format 3\x00"
|
|
assert len(sqlite_exact) == 16
|
|
response = client.post("/", data=sqlite_exact)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/x-sqlite3"
|
|
|
|
def test_partial_magic_no_false_positive(self, client):
|
|
"""Partial magic bytes don't cause false positives."""
|
|
# b"SQLite form" is not a valid signature
|
|
partial = b"SQLite form" + b"\x00" * 50
|
|
response = client.post("/", data=partial)
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] != "application/x-sqlite3"
|