use prefix slicing for magic byte detection

Slice content once to MAX_MAGIC_LEN (16 bytes) before
comparing against signatures. More explicit bounds, same
safety guarantees, marginally cleaner.
This commit is contained in:
Username
2025-12-25 20:03:04 +01:00
parent 764b831bb0
commit 11bb095ca6

View File

@@ -87,6 +87,9 @@ MAGIC_SIGNATURES: dict[bytes, str] = {
b"SQLite format 3\x00": "application/x-sqlite3",
}
# Maximum magic signature length (for safe prefix slicing)
MAX_MAGIC_LEN = 16 # SQLite signature is longest at 16 bytes
# Generic MIME types to override with detection
GENERIC_MIME_TYPES = frozenset(
{
@@ -792,8 +795,10 @@ def calculate_entropy(data: bytes) -> float:
def detect_mime_type(content: bytes, content_type: str | None = None) -> str:
"""Detect MIME type using magic bytes, headers, or content analysis."""
# Magic byte detection (highest priority)
# Slice once for safety - only examine first MAX_MAGIC_LEN bytes
prefix = content[:MAX_MAGIC_LEN]
for magic, mime in MAGIC_SIGNATURES.items():
if content.startswith(magic):
if prefix[: len(magic)] == magic:
# RIFF container: verify WEBP subtype
if magic == b"RIFF" and len(content) >= 12 and content[8:12] != b"WEBP":
continue
@@ -819,9 +824,10 @@ def is_recognizable_format(content: bytes) -> tuple[bool, str | None]:
Returns (is_recognizable, detected_format).
Used to enforce encryption by rejecting known formats.
"""
# Check magic bytes
# Check magic bytes - slice once for safety
prefix = content[:MAX_MAGIC_LEN]
for magic, mime in MAGIC_SIGNATURES.items():
if content.startswith(magic):
if prefix[: len(magic)] == magic:
if magic == b"RIFF" and len(content) >= 12 and content[8:12] != b"WEBP":
continue
return True, mime