forked from username/flaskpaste
- polyglot_generator.py: creates files valid in multiple formats - 41 new tests verify MIME detection handles polyglots correctly - Document rate limiting behavior under attack - Clarify DMG/ISO/DOCX detection limitations
234 lines
6.9 KiB
Python
234 lines
6.9 KiB
Python
#!/usr/bin/env python3
|
|
"""Polyglot file generator for MIME confusion testing.
|
|
|
|
Creates files that are technically valid in multiple formats to test
|
|
that MIME detection correctly identifies the primary format based on
|
|
magic bytes at offset 0.
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Magic byte signatures
|
|
SIGNATURES = {
|
|
"png": b"\x89PNG\r\n\x1a\n",
|
|
"gif": b"GIF89a",
|
|
"jpeg": b"\xff\xd8\xff\xe0\x00\x10JFIF",
|
|
"pdf": b"%PDF-1.4\n",
|
|
"zip": b"PK\x03\x04",
|
|
"gzip": b"\x1f\x8b\x08",
|
|
"elf": b"\x7fELF",
|
|
"pe": b"MZ",
|
|
}
|
|
|
|
# Payloads that could be dangerous if executed
|
|
PAYLOADS = {
|
|
"html": b"<html><body><script>alert('XSS')</script></body></html>",
|
|
"js": b"/**/alert('XSS')//",
|
|
"php": b"<?php system($_GET['cmd']); ?>",
|
|
"shell": b"#!/bin/sh\necho pwned\n",
|
|
"svg": b'<svg xmlns="http://www.w3.org/2000/svg"><script>alert(1)</script></svg>',
|
|
}
|
|
|
|
|
|
def generate_polyglot(primary: str, payload: str, size: int = 1024) -> bytes:
|
|
"""Generate a polyglot file with primary format magic and embedded payload.
|
|
|
|
Args:
|
|
primary: Primary format (png, gif, jpeg, pdf, zip, etc.)
|
|
payload: Payload type to embed (html, js, php, shell, svg)
|
|
size: Minimum file size (padded with nulls)
|
|
|
|
Returns:
|
|
Polyglot file content
|
|
"""
|
|
if primary not in SIGNATURES:
|
|
raise ValueError(f"Unknown primary format: {primary}")
|
|
if payload not in PAYLOADS:
|
|
raise ValueError(f"Unknown payload type: {payload}")
|
|
|
|
magic = SIGNATURES[primary]
|
|
payload_bytes = PAYLOADS[payload]
|
|
|
|
# Build polyglot: magic + padding + payload + padding
|
|
content = magic + b"\x00" * 32 + payload_bytes
|
|
|
|
# Pad to minimum size
|
|
if len(content) < size:
|
|
content += b"\x00" * (size - len(content))
|
|
|
|
return content
|
|
|
|
|
|
def generate_gif_js() -> bytes:
|
|
"""Generate GIF/JavaScript polyglot.
|
|
|
|
GIF89a header followed by JS that ignores the binary prefix.
|
|
"""
|
|
# GIF header that's also valid JS start
|
|
# GIF89a = valid GIF magic
|
|
# The trick: wrap binary in JS comment
|
|
gif_header = b"GIF89a"
|
|
# Minimal GIF structure
|
|
gif_data = (
|
|
b"\x01\x00\x01\x00" # 1x1 dimensions
|
|
b"\x00" # no global color table
|
|
b"\x00" # background color
|
|
b"\x00" # aspect ratio
|
|
b"\x2c" # image descriptor
|
|
b"\x00\x00\x00\x00" # position
|
|
b"\x01\x00\x01\x00" # dimensions
|
|
b"\x00" # no local color table
|
|
b"\x02\x01\x01\x00\x3b" # minimal image data + trailer
|
|
)
|
|
# JS payload after GIF (browsers may try to execute)
|
|
js_payload = b"/**/=1;alert('XSS')//"
|
|
|
|
return gif_header + gif_data + js_payload
|
|
|
|
|
|
def generate_pdf_js() -> bytes:
|
|
"""Generate PDF with embedded JavaScript."""
|
|
# PDF header
|
|
pdf = b"%PDF-1.4\n"
|
|
# Minimal PDF structure with JS
|
|
pdf += b"1 0 obj<</Type/Catalog/Pages 2 0 R/OpenAction 3 0 R>>endobj\n"
|
|
pdf += b"2 0 obj<</Type/Pages/Kids[]/Count 0>>endobj\n"
|
|
pdf += b"3 0 obj<</S/JavaScript/JS(app.alert('XSS'))>>endobj\n"
|
|
pdf += b"xref\n0 4\n"
|
|
pdf += b"0000000000 65535 f \n"
|
|
pdf += b"0000000009 00000 n \n"
|
|
pdf += b"0000000058 00000 n \n"
|
|
pdf += b"0000000101 00000 n \n"
|
|
pdf += b"trailer<</Size 4/Root 1 0 R>>\n"
|
|
pdf += b"startxref\n154\n%%EOF"
|
|
return pdf
|
|
|
|
|
|
def generate_zip_html() -> bytes:
|
|
"""Generate ZIP with HTML file inside."""
|
|
# PK signature
|
|
zip_data = b"PK\x03\x04"
|
|
# Version needed
|
|
zip_data += b"\x14\x00"
|
|
# Flags
|
|
zip_data += b"\x00\x00"
|
|
# Compression (store)
|
|
zip_data += b"\x00\x00"
|
|
# Time/date
|
|
zip_data += b"\x00\x00\x00\x00"
|
|
# CRC32 (placeholder)
|
|
zip_data += b"\x00\x00\x00\x00"
|
|
# Compressed/uncompressed size
|
|
html = b"<script>alert(1)</script>"
|
|
size = len(html).to_bytes(4, "little")
|
|
zip_data += size + size
|
|
# Filename length
|
|
filename = b"index.html"
|
|
zip_data += len(filename).to_bytes(2, "little")
|
|
# Extra field length
|
|
zip_data += b"\x00\x00"
|
|
# Filename
|
|
zip_data += filename
|
|
# File content
|
|
zip_data += html
|
|
return zip_data
|
|
|
|
|
|
def generate_png_html() -> bytes:
|
|
"""Generate PNG with HTML in trailing data."""
|
|
# Minimal valid PNG
|
|
png = b"\x89PNG\r\n\x1a\n"
|
|
# IHDR chunk
|
|
ihdr_data = (
|
|
b"\x00\x00\x00\x01" # width
|
|
b"\x00\x00\x00\x01" # height
|
|
b"\x08" # bit depth
|
|
b"\x02" # color type (RGB)
|
|
b"\x00" # compression
|
|
b"\x00" # filter
|
|
b"\x00" # interlace
|
|
)
|
|
ihdr_crc = b"\x00\x00\x00\x00" # placeholder
|
|
png += b"\x00\x00\x00\x0d" + b"IHDR" + ihdr_data + ihdr_crc
|
|
|
|
# IDAT chunk (minimal)
|
|
idat_data = b"\x08\xd7\x63\xf8\x0f\x00\x00\x01\x01\x00"
|
|
idat_crc = b"\x00\x00\x00\x00"
|
|
png += len(idat_data).to_bytes(4, "big") + b"IDAT" + idat_data + idat_crc
|
|
|
|
# IEND chunk
|
|
png += b"\x00\x00\x00\x00" + b"IEND" + b"\xae\x42\x60\x82"
|
|
|
|
# HTML payload after PNG (should be ignored)
|
|
png += b"<html><script>alert(1)</script></html>"
|
|
|
|
return png
|
|
|
|
|
|
# Polyglot generators registry
|
|
POLYGLOTS = {
|
|
"gif-js": ("GIF with embedded JavaScript", generate_gif_js),
|
|
"pdf-js": ("PDF with JavaScript action", generate_pdf_js),
|
|
"zip-html": ("ZIP containing HTML", generate_zip_html),
|
|
"png-html": ("PNG with trailing HTML", generate_png_html),
|
|
}
|
|
|
|
|
|
def list_polyglots() -> None:
|
|
"""List available polyglot types."""
|
|
print("Available polyglots:")
|
|
print()
|
|
for name, (desc, _) in POLYGLOTS.items():
|
|
print(f" {name:12} {desc}")
|
|
print()
|
|
print("Generic formats:")
|
|
print(f" primary: {', '.join(SIGNATURES.keys())}")
|
|
print(f" payloads: {', '.join(PAYLOADS.keys())}")
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(
|
|
description="Generate polyglot files for MIME confusion testing"
|
|
)
|
|
parser.add_argument(
|
|
"type",
|
|
nargs="?",
|
|
help="Polyglot type (e.g., gif-js, png-html) or primary:payload",
|
|
)
|
|
parser.add_argument("-o", "--output", help="Output file (default: stdout)")
|
|
parser.add_argument("-l", "--list", action="store_true", help="List polyglot types")
|
|
parser.add_argument("-s", "--size", type=int, default=1024, help="Minimum size (default: 1024)")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.list or not args.type:
|
|
list_polyglots()
|
|
return 0
|
|
|
|
# Generate polyglot
|
|
if args.type in POLYGLOTS:
|
|
_, generator = POLYGLOTS[args.type]
|
|
content = generator()
|
|
elif ":" in args.type:
|
|
primary, payload = args.type.split(":", 1)
|
|
content = generate_polyglot(primary, payload, args.size)
|
|
else:
|
|
print(f"Unknown polyglot type: {args.type}", file=sys.stderr)
|
|
print("Use --list to see available types", file=sys.stderr)
|
|
return 1
|
|
|
|
# Output
|
|
if args.output:
|
|
Path(args.output).write_bytes(content)
|
|
print(f"Written {len(content)} bytes to {args.output}")
|
|
else:
|
|
sys.stdout.buffer.write(content)
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|