add HEIC/HEIF/AVIF MIME detection signatures

- Add ftyp box signatures for heic, mif1, and avif brands
- Add tests for new image formats
- Fix nested if lint warning in lookup rate limit
- Update security docs: MKV uses WebM header, TAR needs offset 257
This commit is contained in:
Username
2025-12-26 17:04:51 +01:00
parent 93a4dd2f97
commit 03bcb157cc
3 changed files with 62 additions and 20 deletions

View File

@@ -49,9 +49,18 @@ MAGIC_SIGNATURES: dict[bytes, str] = {
b"II\x2a\x00": "image/tiff", # Little-endian TIFF
b"MM\x00\x2a": "image/tiff", # Big-endian TIFF
b"\x00\x00\x01\x00": "image/x-icon",
# HEIC/HEIF (ftyp box with heic/mif1 brand) - bytes 4-7 = "ftyp", 8-12 = brand
b"\x00\x00\x00\x18\x66\x74\x79\x70\x68\x65\x69\x63": "image/heic", # ftyp heic
b"\x00\x00\x00\x1c\x66\x74\x79\x70\x68\x65\x69\x63": "image/heic", # ftyp heic
b"\x00\x00\x00\x18\x66\x74\x79\x70\x6d\x69\x66\x31": "image/heif", # ftyp mif1
b"\x00\x00\x00\x1c\x66\x74\x79\x70\x6d\x69\x66\x31": "image/heif", # ftyp mif1
# AVIF (ftyp box with avif brand)
b"\x00\x00\x00\x1c\x66\x74\x79\x70\x61\x76\x69\x66": "image/avif", # ftyp avif
b"\x00\x00\x00\x20\x66\x74\x79\x70\x61\x76\x69\x66": "image/avif", # ftyp avif
# Video/Audio containers (checked for subtype in detect_mime_type)
b"\x1a\x45\xdf\xa3": "video/webm", # Matroska/WebM
b"\x1a\x45\xdf\xa3": "video/webm", # Matroska/WebM (same format)
b"FLV\x01": "video/x-flv",
b"\x00\x00\x00\x1c\x66\x74\x79\x70\x69\x73\x6f\x6d": "video/mp4", # ftyp isom
b"\x00\x00\x00\x1c\x66\x74\x79\x70": "video/mp4", # ftyp box at standard offset
b"\x00\x00\x00\x20\x66\x74\x79\x70": "video/mp4", # ftyp with different size
b"\x00\x00\x00\x18\x66\x74\x79\x70": "video/mp4", # ftyp with different size
@@ -346,25 +355,27 @@ def check_lookup_rate_limit(client_ip: str) -> tuple[bool, int]:
with _lookup_rate_limit_lock:
# ENUM-002: Memory protection - prune if at capacity
if len(_lookup_rate_limit_requests) >= max_entries:
if client_ip not in _lookup_rate_limit_requests:
# Evict expired entries first
expired = [
ip
for ip, reqs in _lookup_rate_limit_requests.items()
if not reqs or reqs[-1] <= cutoff
]
for ip in expired:
del _lookup_rate_limit_requests[ip]
if (
len(_lookup_rate_limit_requests) >= max_entries
and client_ip not in _lookup_rate_limit_requests
):
# Evict expired entries first
expired = [
ip
for ip, reqs in _lookup_rate_limit_requests.items()
if not reqs or reqs[-1] <= cutoff
]
for ip in expired:
del _lookup_rate_limit_requests[ip]
# If still at capacity, evict oldest entries
if len(_lookup_rate_limit_requests) >= max_entries:
sorted_ips = sorted(
_lookup_rate_limit_requests.items(),
key=lambda x: x[1][-1] if x[1] else 0,
)
for ip, _ in sorted_ips[: max_entries // 4]:
del _lookup_rate_limit_requests[ip]
# If still at capacity, evict oldest entries
if len(_lookup_rate_limit_requests) >= max_entries:
sorted_ips = sorted(
_lookup_rate_limit_requests.items(),
key=lambda x: x[1][-1] if x[1] else 0,
)
for ip, _ in sorted_ips[: max_entries // 4]:
del _lookup_rate_limit_requests[ip]
requests = _lookup_rate_limit_requests[client_ip]
requests[:] = [t for t in requests if t > cutoff]

View File

@@ -125,11 +125,18 @@ Tested on production (2025-12-25):
[x] MachO-32 (application/x-mach-binary) PASS
[x] MachO-64 (application/x-mach-binary) PASS
Added (2025-12-26):
[x] HEIC (image/heic) PASS - ftyp box with heic brand
[x] HEIF (image/heif) PASS - ftyp box with mif1 brand
[x] AVIF (image/avif) PASS - ftyp box with avif brand
[x] MKV (video/webm) PASS - Same EBML header as WebM
Fallback to text/plain (safe default):
[~] MOV - ftyp offset varies
[~] CAB - Signature not implemented
[~] DEB - Signature not implemented
[~] AR - Signature not implemented
[~] TAR - ustar at offset 257 (beyond 16-byte check)
Fixed (2025-12-25):
[x] RPM - Added signature (0xEDABEEDB)
@@ -140,7 +147,7 @@ Known issues:
[!] JavaClass - Detected as Mach-O (0xCAFEBABE collision, unfixable)
Not tested (no signature defined):
[ ] AVIF, HEIC, MKV, TAR, DMG, ISO, DOCX/XLSX/PPTX, ODF
[ ] DMG, ISO, DOCX/XLSX/PPTX, ODF
```
### Fuzzing Improvements

View File

@@ -124,6 +124,30 @@ class TestMimeDetection:
data = json.loads(response.data)
assert data["mime_type"] == "image/x-icon"
def test_detect_heic(self, client):
"""Detect HEIC from ftyp box with heic brand."""
# ftyp box: size (0x18) + "ftyp" + "heic" brand
heic_header = b"\x00\x00\x00\x18\x66\x74\x79\x70\x68\x65\x69\x63" + b"\x00" * 50
response = client.post("/", data=heic_header)
data = json.loads(response.data)
assert data["mime_type"] == "image/heic"
def test_detect_heif(self, client):
"""Detect HEIF from ftyp box with mif1 brand."""
# ftyp box: size (0x18) + "ftyp" + "mif1" brand
heif_header = b"\x00\x00\x00\x18\x66\x74\x79\x70\x6d\x69\x66\x31" + b"\x00" * 50
response = client.post("/", data=heif_header)
data = json.loads(response.data)
assert data["mime_type"] == "image/heif"
def test_detect_avif(self, client):
"""Detect AVIF from ftyp box with avif brand."""
# ftyp box: size (0x1c) + "ftyp" + "avif" brand
avif_header = b"\x00\x00\x00\x1c\x66\x74\x79\x70\x61\x76\x69\x66" + b"\x00" * 50
response = client.post("/", data=avif_header)
data = json.loads(response.data)
assert data["mime_type"] == "image/avif"
# --- Video Formats ---
def test_detect_webm(self, client):