diff --git a/README.md b/README.md index 669d56b..f6c25b5 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ A lightweight, secure pastebin REST API built with Flask. - **Simple REST API** - Create, retrieve, list, and delete pastes via HTTP - **Binary support** - Upload text, images, archives, and other binary content -- **Automatic MIME detection** - Magic byte detection (PNG, JPEG, GIF, WebP, ZIP, PDF, GZIP) +- **Automatic MIME detection** - Magic byte detection (images, video, audio, documents, executables, archives) - **Client certificate authentication** - mTLS or header-based via reverse proxy - **Tiered expiry** - 1 day (anon), 7 days (untrusted), 30 days (trusted PKI) - **Size limits** - 3 MiB anonymous, 50 MiB authenticated diff --git a/TODO.md b/TODO.md index 256fa6d..ccaf56e 100644 --- a/TODO.md +++ b/TODO.md @@ -12,6 +12,7 @@ Unstructured intake buffer for ideas, issues, and observations. Items here are r - Design: compress-then-encrypt only (not compress-only) - Compressed data has high entropy → bypasses entropy enforcement - Must enforce encryption when compression enabled (CLI-side) + - Server detects compression formats via magic bytes (REQUIRE_BINARY) - ETag support for conditional requests - Neovim/Vim plugin for editor integration - Webhook notifications for paste events diff --git a/app/api/routes.py b/app/api/routes.py index e820e34..8a86623 100644 --- a/app/api/routes.py +++ b/app/api/routes.py @@ -39,14 +39,52 @@ MIME_PATTERN = re.compile(r"^[a-z0-9][a-z0-9!#$&\-^_.+]*/[a-z0-9][a-z0-9!#$&\-^_ # Magic bytes for binary format detection MAGIC_SIGNATURES: dict[bytes, str] = { + # Images b"\x89PNG\r\n\x1a\n": "image/png", b"\xff\xd8\xff": "image/jpeg", b"GIF87a": "image/gif", b"GIF89a": "image/gif", - b"RIFF": "image/webp", - b"PK\x03\x04": "application/zip", + b"RIFF": "image/webp", # RIFF container, verified as WEBP in detect_mime_type + b"BM": "image/bmp", + b"II\x2a\x00": "image/tiff", # Little-endian TIFF + b"MM\x00\x2a": "image/tiff", # Big-endian TIFF + b"\x00\x00\x01\x00": "image/x-icon", + # Video/Audio containers (checked for subtype in detect_mime_type) + b"\x1a\x45\xdf\xa3": "video/webm", # Matroska/WebM + b"FLV\x01": "video/x-flv", + b"\x00\x00\x00\x1c\x66\x74\x79\x70": "video/mp4", # ftyp box at standard offset + b"\x00\x00\x00\x20\x66\x74\x79\x70": "video/mp4", # ftyp with different size + b"\x00\x00\x00\x18\x66\x74\x79\x70": "video/mp4", # ftyp with different size + # Audio + b"ID3": "audio/mpeg", # MP3 with ID3 tag + b"\xff\xfb": "audio/mpeg", # MP3 frame sync + b"\xff\xfa": "audio/mpeg", + b"\xff\xf3": "audio/mpeg", + b"\xff\xf2": "audio/mpeg", + b"fLaC": "audio/flac", + b"OggS": "audio/ogg", + # Documents b"%PDF": "application/pdf", + b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1": "application/msword", # OLE (DOC, XLS, PPT, MSI) + b"PK\x03\x04": "application/zip", # ZIP, DOCX, XLSX, PPTX, ODT, JAR, APK + # Executables + b"MZ": "application/x-msdownload", # EXE, DLL + b"\x7fELF": "application/x-executable", # ELF (Linux) + b"\xfe\xed\xfa\xce": "application/x-mach-binary", # Mach-O 32-bit + b"\xfe\xed\xfa\xcf": "application/x-mach-binary", # Mach-O 64-bit + b"\xcf\xfa\xed\xfe": "application/x-mach-binary", # Mach-O 64-bit (reversed) + b"\xca\xfe\xba\xbe": "application/x-mach-binary", # Mach-O fat/universal binary + b"\x00asm": "application/wasm", # WebAssembly + # Compression/Archives b"\x1f\x8b": "application/gzip", + b"BZh": "application/x-bzip2", + b"\xfd7zXZ\x00": "application/x-xz", + b"\x28\xb5\x2f\xfd": "application/zstd", + b"\x04\x22\x4d\x18": "application/x-lz4", + b"7z\xbc\xaf\x27\x1c": "application/x-7z-compressed", + b"Rar!\x1a\x07": "application/vnd.rar", + # Data + b"SQLite format 3\x00": "application/x-sqlite3", } # Generic MIME types to override with detection diff --git a/documentation/api.md b/documentation/api.md index 248e9af..098cd4d 100644 --- a/documentation/api.md +++ b/documentation/api.md @@ -464,13 +464,16 @@ X-SSL-Client-SHA1: a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2 FlaskPaste automatically detects MIME types using: 1. **Magic byte signatures** (highest priority) - - PNG: `\x89PNG\r\n\x1a\n` - - JPEG: `\xff\xd8\xff` - - GIF: `GIF87a` or `GIF89a` - - WebP: `RIFF....WEBP` - - ZIP: `PK\x03\x04` - - PDF: `%PDF` - - GZIP: `\x1f\x8b` + + | Category | Formats | + |----------|---------| + | Images | PNG, JPEG, GIF, WebP, BMP, TIFF, ICO | + | Video | MP4, WebM, FLV, Matroska | + | Audio | MP3, FLAC, OGG | + | Documents | PDF, MS Office (DOC/XLS/PPT), ZIP-based (DOCX/XLSX/ODT) | + | Executables | EXE/DLL (PE), ELF (Linux), Mach-O (macOS), WASM | + | Archives | ZIP, GZIP, BZIP2, XZ, ZSTD, LZ4, 7z, RAR | + | Data | SQLite | 2. **Explicit Content-Type header** (if not generic)