From 11bb095ca64fa4153e9ca9fdd1d48a96967ab884 Mon Sep 17 00:00:00 2001 From: Username Date: Thu, 25 Dec 2025 20:03:04 +0100 Subject: [PATCH] use prefix slicing for magic byte detection Slice content once to MAX_MAGIC_LEN (16 bytes) before comparing against signatures. More explicit bounds, same safety guarantees, marginally cleaner. --- app/api/routes.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/app/api/routes.py b/app/api/routes.py index 8a86623..daaeff2 100644 --- a/app/api/routes.py +++ b/app/api/routes.py @@ -87,6 +87,9 @@ MAGIC_SIGNATURES: dict[bytes, str] = { b"SQLite format 3\x00": "application/x-sqlite3", } +# Maximum magic signature length (for safe prefix slicing) +MAX_MAGIC_LEN = 16 # SQLite signature is longest at 16 bytes + # Generic MIME types to override with detection GENERIC_MIME_TYPES = frozenset( { @@ -792,8 +795,10 @@ def calculate_entropy(data: bytes) -> float: def detect_mime_type(content: bytes, content_type: str | None = None) -> str: """Detect MIME type using magic bytes, headers, or content analysis.""" # Magic byte detection (highest priority) + # Slice once for safety - only examine first MAX_MAGIC_LEN bytes + prefix = content[:MAX_MAGIC_LEN] for magic, mime in MAGIC_SIGNATURES.items(): - if content.startswith(magic): + if prefix[: len(magic)] == magic: # RIFF container: verify WEBP subtype if magic == b"RIFF" and len(content) >= 12 and content[8:12] != b"WEBP": continue @@ -819,9 +824,10 @@ def is_recognizable_format(content: bytes) -> tuple[bool, str | None]: Returns (is_recognizable, detected_format). Used to enforce encryption by rejecting known formats. """ - # Check magic bytes + # Check magic bytes - slice once for safety + prefix = content[:MAX_MAGIC_LEN] for magic, mime in MAGIC_SIGNATURES.items(): - if content.startswith(magic): + if prefix[: len(magic)] == magic: if magic == b"RIFF" and len(content) >= 12 and content[8:12] != b"WEBP": continue return True, mime