forked from claw/flaskpaste
expand magic byte detection for common file formats
Add detection for: - Images: BMP, TIFF, ICO - Video: MP4, WebM, FLV, Matroska - Audio: MP3, FLAC, OGG - Documents: MS Office OLE (DOC/XLS/PPT) - Executables: PE (EXE/DLL), ELF, Mach-O, WASM - Archives: BZIP2, XZ, ZSTD, LZ4, 7z, RAR - Data: SQLite This improves REQUIRE_BINARY enforcement by detecting more recognizable formats that should be encrypted before upload.
This commit is contained in:
@@ -39,14 +39,52 @@ MIME_PATTERN = re.compile(r"^[a-z0-9][a-z0-9!#$&\-^_.+]*/[a-z0-9][a-z0-9!#$&\-^_
|
||||
|
||||
# Magic bytes for binary format detection
|
||||
MAGIC_SIGNATURES: dict[bytes, str] = {
|
||||
# Images
|
||||
b"\x89PNG\r\n\x1a\n": "image/png",
|
||||
b"\xff\xd8\xff": "image/jpeg",
|
||||
b"GIF87a": "image/gif",
|
||||
b"GIF89a": "image/gif",
|
||||
b"RIFF": "image/webp",
|
||||
b"PK\x03\x04": "application/zip",
|
||||
b"RIFF": "image/webp", # RIFF container, verified as WEBP in detect_mime_type
|
||||
b"BM": "image/bmp",
|
||||
b"II\x2a\x00": "image/tiff", # Little-endian TIFF
|
||||
b"MM\x00\x2a": "image/tiff", # Big-endian TIFF
|
||||
b"\x00\x00\x01\x00": "image/x-icon",
|
||||
# Video/Audio containers (checked for subtype in detect_mime_type)
|
||||
b"\x1a\x45\xdf\xa3": "video/webm", # Matroska/WebM
|
||||
b"FLV\x01": "video/x-flv",
|
||||
b"\x00\x00\x00\x1c\x66\x74\x79\x70": "video/mp4", # ftyp box at standard offset
|
||||
b"\x00\x00\x00\x20\x66\x74\x79\x70": "video/mp4", # ftyp with different size
|
||||
b"\x00\x00\x00\x18\x66\x74\x79\x70": "video/mp4", # ftyp with different size
|
||||
# Audio
|
||||
b"ID3": "audio/mpeg", # MP3 with ID3 tag
|
||||
b"\xff\xfb": "audio/mpeg", # MP3 frame sync
|
||||
b"\xff\xfa": "audio/mpeg",
|
||||
b"\xff\xf3": "audio/mpeg",
|
||||
b"\xff\xf2": "audio/mpeg",
|
||||
b"fLaC": "audio/flac",
|
||||
b"OggS": "audio/ogg",
|
||||
# Documents
|
||||
b"%PDF": "application/pdf",
|
||||
b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1": "application/msword", # OLE (DOC, XLS, PPT, MSI)
|
||||
b"PK\x03\x04": "application/zip", # ZIP, DOCX, XLSX, PPTX, ODT, JAR, APK
|
||||
# Executables
|
||||
b"MZ": "application/x-msdownload", # EXE, DLL
|
||||
b"\x7fELF": "application/x-executable", # ELF (Linux)
|
||||
b"\xfe\xed\xfa\xce": "application/x-mach-binary", # Mach-O 32-bit
|
||||
b"\xfe\xed\xfa\xcf": "application/x-mach-binary", # Mach-O 64-bit
|
||||
b"\xcf\xfa\xed\xfe": "application/x-mach-binary", # Mach-O 64-bit (reversed)
|
||||
b"\xca\xfe\xba\xbe": "application/x-mach-binary", # Mach-O fat/universal binary
|
||||
b"\x00asm": "application/wasm", # WebAssembly
|
||||
# Compression/Archives
|
||||
b"\x1f\x8b": "application/gzip",
|
||||
b"BZh": "application/x-bzip2",
|
||||
b"\xfd7zXZ\x00": "application/x-xz",
|
||||
b"\x28\xb5\x2f\xfd": "application/zstd",
|
||||
b"\x04\x22\x4d\x18": "application/x-lz4",
|
||||
b"7z\xbc\xaf\x27\x1c": "application/x-7z-compressed",
|
||||
b"Rar!\x1a\x07": "application/vnd.rar",
|
||||
# Data
|
||||
b"SQLite format 3\x00": "application/x-sqlite3",
|
||||
}
|
||||
|
||||
# Generic MIME types to override with detection
|
||||
|
||||
Reference in New Issue
Block a user