remove obsolete MIME detection tests

This commit is contained in:
Username
2025-12-26 19:06:35 +01:00
parent bc751d1b8c
commit 28e31f0b37
2 changed files with 0 additions and 541 deletions

View File

@@ -1,395 +0,0 @@
"""Tests for MIME type detection."""
import json
class TestMimeDetection:
"""Tests for automatic MIME type detection."""
def test_detect_png(self, client, png_bytes):
"""Detect PNG from magic bytes."""
response = client.post("/", data=png_bytes)
data = json.loads(response.data)
assert data["mime_type"] == "image/png"
def test_detect_jpeg(self, client, jpeg_bytes):
"""Detect JPEG from magic bytes."""
response = client.post("/", data=jpeg_bytes)
data = json.loads(response.data)
assert data["mime_type"] == "image/jpeg"
def test_detect_zip(self, client, zip_bytes):
"""Detect ZIP from magic bytes."""
response = client.post("/", data=zip_bytes)
data = json.loads(response.data)
assert data["mime_type"] == "application/zip"
def test_detect_pdf(self, client, pdf_bytes):
"""Detect PDF from magic bytes."""
response = client.post("/", data=pdf_bytes)
data = json.loads(response.data)
assert data["mime_type"] == "application/pdf"
def test_detect_gif87a(self, client):
"""Detect GIF87a from magic bytes."""
response = client.post("/", data=b"GIF87a" + b"\x00" * 10)
data = json.loads(response.data)
assert data["mime_type"] == "image/gif"
def test_detect_gif89a(self, client):
"""Detect GIF89a from magic bytes."""
response = client.post("/", data=b"GIF89a" + b"\x00" * 10)
data = json.loads(response.data)
assert data["mime_type"] == "image/gif"
def test_detect_gzip(self, client):
"""Detect GZIP from magic bytes."""
response = client.post("/", data=b"\x1f\x8b\x08" + b"\x00" * 10)
data = json.loads(response.data)
assert data["mime_type"] == "application/gzip"
def test_detect_utf8_text(self, client):
"""UTF-8 text defaults to text/plain."""
response = client.post("/", data="Hello, world! 你好")
data = json.loads(response.data)
assert data["mime_type"] == "text/plain"
def test_detect_binary_fallback(self, client):
"""Non-UTF8 binary without magic falls back to octet-stream."""
response = client.post("/", data=b"\x80\x81\x82\x83\x84")
data = json.loads(response.data)
assert data["mime_type"] == "application/octet-stream"
def test_explicit_content_type_honored(self, client):
"""Explicit Content-Type is honored for non-generic types."""
response = client.post(
"/",
data="<html><body>test</body></html>",
content_type="text/html",
)
data = json.loads(response.data)
assert data["mime_type"] == "text/html"
def test_generic_content_type_overridden(self, client, png_bytes):
"""Generic Content-Type is overridden by magic detection."""
response = client.post(
"/",
data=png_bytes,
content_type="application/octet-stream",
)
data = json.loads(response.data)
assert data["mime_type"] == "image/png"
def test_webp_detection(self, client):
"""Detect WebP from RIFF...WEBP magic."""
webp_header = b"RIFF\x00\x00\x00\x00WEBP"
response = client.post("/", data=webp_header + b"\x00" * 20)
data = json.loads(response.data)
assert data["mime_type"] == "image/webp"
def test_riff_non_webp_not_detected(self, client):
"""RIFF without WEBP marker is not detected as WebP."""
riff_other = b"RIFF\x00\x00\x00\x00WAVE"
response = client.post("/", data=riff_other + b"\x00" * 20)
data = json.loads(response.data)
assert data["mime_type"] != "image/webp"
# --- Additional Image Formats ---
def test_detect_bmp(self, client):
"""Detect BMP from magic bytes."""
bmp_header = b"BM" + b"\x00" * 50
response = client.post("/", data=bmp_header)
data = json.loads(response.data)
assert data["mime_type"] == "image/bmp"
def test_detect_tiff_little_endian(self, client):
"""Detect little-endian TIFF from magic bytes."""
tiff_le = b"II\x2a\x00" + b"\x00" * 50
response = client.post("/", data=tiff_le)
data = json.loads(response.data)
assert data["mime_type"] == "image/tiff"
def test_detect_tiff_big_endian(self, client):
"""Detect big-endian TIFF from magic bytes."""
tiff_be = b"MM\x00\x2a" + b"\x00" * 50
response = client.post("/", data=tiff_be)
data = json.loads(response.data)
assert data["mime_type"] == "image/tiff"
def test_detect_ico(self, client):
"""Detect ICO from magic bytes."""
ico_header = b"\x00\x00\x01\x00" + b"\x00" * 50
response = client.post("/", data=ico_header)
data = json.loads(response.data)
assert data["mime_type"] == "image/x-icon"
def test_detect_heic(self, client):
"""Detect HEIC from ftyp box with heic brand."""
# ftyp box: size (0x18) + "ftyp" + "heic" brand
heic_header = b"\x00\x00\x00\x18\x66\x74\x79\x70\x68\x65\x69\x63" + b"\x00" * 50
response = client.post("/", data=heic_header)
data = json.loads(response.data)
assert data["mime_type"] == "image/heic"
def test_detect_heif(self, client):
"""Detect HEIF from ftyp box with mif1 brand."""
# ftyp box: size (0x18) + "ftyp" + "mif1" brand
heif_header = b"\x00\x00\x00\x18\x66\x74\x79\x70\x6d\x69\x66\x31" + b"\x00" * 50
response = client.post("/", data=heif_header)
data = json.loads(response.data)
assert data["mime_type"] == "image/heif"
def test_detect_avif(self, client):
"""Detect AVIF from ftyp box with avif brand."""
# ftyp box: size (0x1c) + "ftyp" + "avif" brand
avif_header = b"\x00\x00\x00\x1c\x66\x74\x79\x70\x61\x76\x69\x66" + b"\x00" * 50
response = client.post("/", data=avif_header)
data = json.loads(response.data)
assert data["mime_type"] == "image/avif"
# --- Video Formats ---
def test_detect_webm(self, client):
"""Detect WebM/Matroska from magic bytes."""
webm_header = b"\x1a\x45\xdf\xa3" + b"\x00" * 50
response = client.post("/", data=webm_header)
data = json.loads(response.data)
assert data["mime_type"] == "video/webm"
def test_detect_flv(self, client):
"""Detect FLV from magic bytes."""
flv_header = b"FLV\x01" + b"\x00" * 50
response = client.post("/", data=flv_header)
data = json.loads(response.data)
assert data["mime_type"] == "video/x-flv"
def test_detect_mp4_ftyp_1c(self, client):
"""Detect MP4 from ftyp box (0x1c variant)."""
mp4_header = b"\x00\x00\x00\x1c\x66\x74\x79\x70" + b"\x00" * 50
response = client.post("/", data=mp4_header)
data = json.loads(response.data)
assert data["mime_type"] == "video/mp4"
def test_detect_mp4_ftyp_20(self, client):
"""Detect MP4 from ftyp box (0x20 variant)."""
mp4_header = b"\x00\x00\x00\x20\x66\x74\x79\x70" + b"\x00" * 50
response = client.post("/", data=mp4_header)
data = json.loads(response.data)
assert data["mime_type"] == "video/mp4"
def test_detect_mp4_ftyp_18(self, client):
"""Detect MP4 from ftyp box (0x18 variant)."""
mp4_header = b"\x00\x00\x00\x18\x66\x74\x79\x70" + b"\x00" * 50
response = client.post("/", data=mp4_header)
data = json.loads(response.data)
assert data["mime_type"] == "video/mp4"
# --- Audio Formats ---
def test_detect_mp3_id3(self, client):
"""Detect MP3 from ID3 tag."""
mp3_id3 = b"ID3" + b"\x00" * 50
response = client.post("/", data=mp3_id3)
data = json.loads(response.data)
assert data["mime_type"] == "audio/mpeg"
def test_detect_mp3_frame_sync_fb(self, client):
"""Detect MP3 from frame sync (0xfffb)."""
mp3_sync = b"\xff\xfb" + b"\x00" * 50
response = client.post("/", data=mp3_sync)
data = json.loads(response.data)
assert data["mime_type"] == "audio/mpeg"
def test_detect_mp3_frame_sync_fa(self, client):
"""Detect MP3 from frame sync (0xfffa)."""
mp3_sync = b"\xff\xfa" + b"\x00" * 50
response = client.post("/", data=mp3_sync)
data = json.loads(response.data)
assert data["mime_type"] == "audio/mpeg"
def test_detect_mp3_frame_sync_f3(self, client):
"""Detect MP3 from frame sync (0xfff3)."""
mp3_sync = b"\xff\xf3" + b"\x00" * 50
response = client.post("/", data=mp3_sync)
data = json.loads(response.data)
assert data["mime_type"] == "audio/mpeg"
def test_detect_mp3_frame_sync_f2(self, client):
"""Detect MP3 from frame sync (0xfff2)."""
mp3_sync = b"\xff\xf2" + b"\x00" * 50
response = client.post("/", data=mp3_sync)
data = json.loads(response.data)
assert data["mime_type"] == "audio/mpeg"
def test_detect_flac(self, client):
"""Detect FLAC from magic bytes."""
flac_header = b"fLaC" + b"\x00" * 50
response = client.post("/", data=flac_header)
data = json.loads(response.data)
assert data["mime_type"] == "audio/flac"
def test_detect_ogg(self, client):
"""Detect OGG from magic bytes."""
ogg_header = b"OggS" + b"\x00" * 50
response = client.post("/", data=ogg_header)
data = json.loads(response.data)
assert data["mime_type"] == "audio/ogg"
# --- Document Formats ---
def test_detect_ole_msoffice(self, client):
"""Detect MS Office OLE from magic bytes."""
ole_header = b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" + b"\x00" * 50
response = client.post("/", data=ole_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/msword"
# --- Executable Formats ---
def test_detect_pe_exe(self, client):
"""Detect PE/EXE from MZ magic bytes."""
pe_header = b"MZ" + b"\x00" * 50
response = client.post("/", data=pe_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-msdownload"
def test_detect_elf(self, client):
"""Detect ELF from magic bytes."""
elf_header = b"\x7fELF" + b"\x00" * 50
response = client.post("/", data=elf_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-executable"
def test_detect_macho_32le(self, client):
"""Detect Mach-O 32-bit little-endian."""
macho_header = b"\xce\xfa\xed\xfe" + b"\x00" * 50
response = client.post("/", data=macho_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-mach-binary"
def test_detect_macho_32be(self, client):
"""Detect Mach-O 32-bit big-endian."""
macho_header = b"\xfe\xed\xfa\xce" + b"\x00" * 50
response = client.post("/", data=macho_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-mach-binary"
def test_detect_macho_64le(self, client):
"""Detect Mach-O 64-bit little-endian."""
macho_header = b"\xcf\xfa\xed\xfe" + b"\x00" * 50
response = client.post("/", data=macho_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-mach-binary"
def test_detect_macho_64be(self, client):
"""Detect Mach-O 64-bit big-endian."""
macho_header = b"\xfe\xed\xfa\xcf" + b"\x00" * 50
response = client.post("/", data=macho_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-mach-binary"
def test_detect_macho_fat(self, client):
"""Detect Mach-O fat/universal binary."""
macho_fat = b"\xca\xfe\xba\xbe" + b"\x00" * 50
response = client.post("/", data=macho_fat)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-mach-binary"
def test_detect_wasm(self, client):
"""Detect WebAssembly from magic bytes."""
wasm_header = b"\x00asm" + b"\x00" * 50
response = client.post("/", data=wasm_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/wasm"
# --- Archive/Compression Formats ---
def test_detect_bzip2(self, client):
"""Detect BZIP2 from magic bytes."""
bz2_header = b"BZh" + b"\x00" * 50
response = client.post("/", data=bz2_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-bzip2"
def test_detect_xz(self, client):
"""Detect XZ from magic bytes."""
xz_header = b"\xfd7zXZ\x00" + b"\x00" * 50
response = client.post("/", data=xz_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-xz"
def test_detect_zstd(self, client):
"""Detect ZSTD from magic bytes."""
zstd_header = b"\x28\xb5\x2f\xfd" + b"\x00" * 50
response = client.post("/", data=zstd_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/zstd"
def test_detect_lz4(self, client):
"""Detect LZ4 from magic bytes."""
lz4_header = b"\x04\x22\x4d\x18" + b"\x00" * 50
response = client.post("/", data=lz4_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-lz4"
def test_detect_7z(self, client):
"""Detect 7z from magic bytes."""
sz_header = b"7z\xbc\xaf\x27\x1c" + b"\x00" * 50
response = client.post("/", data=sz_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-7z-compressed"
def test_detect_rar(self, client):
"""Detect RAR from magic bytes."""
rar_header = b"Rar!\x1a\x07" + b"\x00" * 50
response = client.post("/", data=rar_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/vnd.rar"
# --- Data Formats ---
def test_detect_sqlite(self, client):
"""Detect SQLite from magic bytes."""
sqlite_header = b"SQLite format 3\x00" + b"\x00" * 50
response = client.post("/", data=sqlite_header)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-sqlite3"
# --- Edge Cases ---
def test_empty_content_rejected(self, client):
"""Empty content is rejected (no empty pastes)."""
response = client.post("/", data=b"")
assert response.status_code == 400
data = json.loads(response.data)
assert "error" in data
def test_single_byte_content(self, client):
"""Single byte content handles gracefully."""
response = client.post("/", data=b"x")
data = json.loads(response.data)
assert data["mime_type"] == "text/plain"
def test_short_binary_content(self, client):
"""Short binary content (< magic length) handles gracefully."""
response = client.post("/", data=b"\x89P") # Truncated PNG
data = json.loads(response.data)
assert data["mime_type"] == "application/octet-stream"
def test_prefix_boundary_exact_match(self, client):
"""Exact magic length content detects correctly."""
# SQLite has longest signature at 16 bytes
sqlite_exact = b"SQLite format 3\x00"
assert len(sqlite_exact) == 16
response = client.post("/", data=sqlite_exact)
data = json.loads(response.data)
assert data["mime_type"] == "application/x-sqlite3"
def test_partial_magic_no_false_positive(self, client):
"""Partial magic bytes don't cause false positives."""
# b"SQLite form" is not a valid signature
partial = b"SQLite form" + b"\x00" * 50
response = client.post("/", data=partial)
data = json.loads(response.data)
assert data["mime_type"] != "application/x-sqlite3"

View File

@@ -1,146 +0,0 @@
"""Tests for polyglot file MIME detection.
Verifies that polyglot files (valid in multiple formats) are detected
by their primary magic bytes at offset 0, not by embedded payloads.
"""
import json
import sys
import pytest
sys.path.insert(0, "tests/security")
from polyglot_generator import (
generate_gif_js,
generate_pdf_js,
generate_png_html,
generate_polyglot,
generate_zip_html,
)
class TestPolyglotDetection:
"""Verify polyglot files are detected by primary magic."""
def test_gif_js_detected_as_gif(self, client):
"""GIF/JS polyglot should be detected as GIF."""
content = generate_gif_js()
response = client.post("/", data=content)
if response.status_code == 201:
data = json.loads(response.data)
assert data["mime_type"] == "image/gif"
def test_pdf_js_detected_as_pdf(self, client):
"""PDF with JavaScript should be detected as PDF."""
content = generate_pdf_js()
response = client.post("/", data=content)
if response.status_code == 201:
data = json.loads(response.data)
assert data["mime_type"] == "application/pdf"
def test_zip_html_detected_as_zip(self, client):
"""ZIP containing HTML should be detected as ZIP."""
content = generate_zip_html()
response = client.post("/", data=content)
if response.status_code == 201:
data = json.loads(response.data)
assert data["mime_type"] == "application/zip"
def test_png_html_detected_as_png(self, client):
"""PNG with trailing HTML should be detected as PNG."""
content = generate_png_html()
response = client.post("/", data=content)
if response.status_code == 201:
data = json.loads(response.data)
assert data["mime_type"] == "image/png"
class TestGenericPolyglots:
"""Test generic primary:payload combinations."""
@pytest.mark.parametrize(
"primary,expected_mime",
[
("png", "image/png"),
("gif", "image/gif"),
("jpeg", "image/jpeg"),
("pdf", "application/pdf"),
("zip", "application/zip"),
("gzip", "application/gzip"),
("elf", "application/x-executable"),
("pe", "application/x-msdownload"),
],
)
@pytest.mark.parametrize("payload", ["html", "js", "php", "shell"])
def test_primary_format_wins(self, client, primary, expected_mime, payload):
"""Primary format magic should determine MIME type, not payload."""
content = generate_polyglot(primary, payload)
response = client.post("/", data=content, content_type="application/octet-stream")
if response.status_code == 201:
data = json.loads(response.data)
assert data["mime_type"] == expected_mime, (
f"{primary}:{payload} detected as {data['mime_type']}, expected {expected_mime}"
)
class TestSecurityHeaders:
"""Verify security headers prevent polyglot execution."""
def test_nosniff_header_on_polyglot(self, client):
"""X-Content-Type-Options: nosniff should be present."""
content = generate_gif_js()
create = client.post("/", data=content)
if create.status_code == 201:
data = json.loads(create.data)
paste_id = data["id"]
raw = client.get(f"/{paste_id}/raw")
assert raw.headers.get("X-Content-Type-Options") == "nosniff"
def test_csp_header_on_polyglot(self, client):
"""CSP should prevent script execution."""
content = generate_png_html()
create = client.post("/", data=content)
if create.status_code == 201:
data = json.loads(create.data)
paste_id = data["id"]
raw = client.get(f"/{paste_id}/raw")
csp = raw.headers.get("Content-Security-Policy", "")
assert "default-src 'none'" in csp
def test_xframe_options_on_polyglot(self, client):
"""X-Frame-Options should prevent framing."""
content = generate_pdf_js()
create = client.post("/", data=content)
if create.status_code == 201:
data = json.loads(create.data)
paste_id = data["id"]
raw = client.get(f"/{paste_id}/raw")
assert raw.headers.get("X-Frame-Options") == "DENY"
class TestPayloadNotExecuted:
"""Verify embedded payloads are returned literally."""
def test_html_payload_literal(self, client):
"""HTML payload should be returned as-is, not rendered."""
content = generate_polyglot("png", "html")
create = client.post("/", data=content)
if create.status_code == 201:
data = json.loads(create.data)
paste_id = data["id"]
raw = client.get(f"/{paste_id}/raw")
# Content should contain literal script tag
assert b"<script>" in raw.data
# But Content-Type should be image/png
assert "image/png" in raw.content_type
def test_php_payload_literal(self, client):
"""PHP payload should be returned as-is."""
content = generate_polyglot("gif", "php")
create = client.post("/", data=content)
if create.status_code == 201:
data = json.loads(create.data)
paste_id = data["id"]
raw = client.get(f"/{paste_id}/raw")
assert b"<?php" in raw.data
assert "image/gif" in raw.content_type