Files
flaskpaste/tests/security/polyglot_generator.py
Username fb45005766
Some checks failed
CI / Lint & Format (push) Failing after 16s
CI / Unit Tests (push) Has been skipped
CI / Memory Leak Check (push) Has been skipped
CI / SBOM Generation (push) Has been skipped
CI / Security Scan (push) Successful in 20s
CI / Security Tests (push) Has been skipped
CI / Advanced Security Tests (push) Has been skipped
add polyglot generator and MIME confusion tests
- polyglot_generator.py: creates files valid in multiple formats
- 41 new tests verify MIME detection handles polyglots correctly
- Document rate limiting behavior under attack
- Clarify DMG/ISO/DOCX detection limitations
2025-12-26 18:25:46 +01:00

234 lines
6.9 KiB
Python

#!/usr/bin/env python3
"""Polyglot file generator for MIME confusion testing.
Creates files that are technically valid in multiple formats to test
that MIME detection correctly identifies the primary format based on
magic bytes at offset 0.
"""
import argparse
import sys
from pathlib import Path
# Magic byte signatures
SIGNATURES = {
"png": b"\x89PNG\r\n\x1a\n",
"gif": b"GIF89a",
"jpeg": b"\xff\xd8\xff\xe0\x00\x10JFIF",
"pdf": b"%PDF-1.4\n",
"zip": b"PK\x03\x04",
"gzip": b"\x1f\x8b\x08",
"elf": b"\x7fELF",
"pe": b"MZ",
}
# Payloads that could be dangerous if executed
PAYLOADS = {
"html": b"<html><body><script>alert('XSS')</script></body></html>",
"js": b"/**/alert('XSS')//",
"php": b"<?php system($_GET['cmd']); ?>",
"shell": b"#!/bin/sh\necho pwned\n",
"svg": b'<svg xmlns="http://www.w3.org/2000/svg"><script>alert(1)</script></svg>',
}
def generate_polyglot(primary: str, payload: str, size: int = 1024) -> bytes:
"""Generate a polyglot file with primary format magic and embedded payload.
Args:
primary: Primary format (png, gif, jpeg, pdf, zip, etc.)
payload: Payload type to embed (html, js, php, shell, svg)
size: Minimum file size (padded with nulls)
Returns:
Polyglot file content
"""
if primary not in SIGNATURES:
raise ValueError(f"Unknown primary format: {primary}")
if payload not in PAYLOADS:
raise ValueError(f"Unknown payload type: {payload}")
magic = SIGNATURES[primary]
payload_bytes = PAYLOADS[payload]
# Build polyglot: magic + padding + payload + padding
content = magic + b"\x00" * 32 + payload_bytes
# Pad to minimum size
if len(content) < size:
content += b"\x00" * (size - len(content))
return content
def generate_gif_js() -> bytes:
"""Generate GIF/JavaScript polyglot.
GIF89a header followed by JS that ignores the binary prefix.
"""
# GIF header that's also valid JS start
# GIF89a = valid GIF magic
# The trick: wrap binary in JS comment
gif_header = b"GIF89a"
# Minimal GIF structure
gif_data = (
b"\x01\x00\x01\x00" # 1x1 dimensions
b"\x00" # no global color table
b"\x00" # background color
b"\x00" # aspect ratio
b"\x2c" # image descriptor
b"\x00\x00\x00\x00" # position
b"\x01\x00\x01\x00" # dimensions
b"\x00" # no local color table
b"\x02\x01\x01\x00\x3b" # minimal image data + trailer
)
# JS payload after GIF (browsers may try to execute)
js_payload = b"/**/=1;alert('XSS')//"
return gif_header + gif_data + js_payload
def generate_pdf_js() -> bytes:
"""Generate PDF with embedded JavaScript."""
# PDF header
pdf = b"%PDF-1.4\n"
# Minimal PDF structure with JS
pdf += b"1 0 obj<</Type/Catalog/Pages 2 0 R/OpenAction 3 0 R>>endobj\n"
pdf += b"2 0 obj<</Type/Pages/Kids[]/Count 0>>endobj\n"
pdf += b"3 0 obj<</S/JavaScript/JS(app.alert('XSS'))>>endobj\n"
pdf += b"xref\n0 4\n"
pdf += b"0000000000 65535 f \n"
pdf += b"0000000009 00000 n \n"
pdf += b"0000000058 00000 n \n"
pdf += b"0000000101 00000 n \n"
pdf += b"trailer<</Size 4/Root 1 0 R>>\n"
pdf += b"startxref\n154\n%%EOF"
return pdf
def generate_zip_html() -> bytes:
"""Generate ZIP with HTML file inside."""
# PK signature
zip_data = b"PK\x03\x04"
# Version needed
zip_data += b"\x14\x00"
# Flags
zip_data += b"\x00\x00"
# Compression (store)
zip_data += b"\x00\x00"
# Time/date
zip_data += b"\x00\x00\x00\x00"
# CRC32 (placeholder)
zip_data += b"\x00\x00\x00\x00"
# Compressed/uncompressed size
html = b"<script>alert(1)</script>"
size = len(html).to_bytes(4, "little")
zip_data += size + size
# Filename length
filename = b"index.html"
zip_data += len(filename).to_bytes(2, "little")
# Extra field length
zip_data += b"\x00\x00"
# Filename
zip_data += filename
# File content
zip_data += html
return zip_data
def generate_png_html() -> bytes:
"""Generate PNG with HTML in trailing data."""
# Minimal valid PNG
png = b"\x89PNG\r\n\x1a\n"
# IHDR chunk
ihdr_data = (
b"\x00\x00\x00\x01" # width
b"\x00\x00\x00\x01" # height
b"\x08" # bit depth
b"\x02" # color type (RGB)
b"\x00" # compression
b"\x00" # filter
b"\x00" # interlace
)
ihdr_crc = b"\x00\x00\x00\x00" # placeholder
png += b"\x00\x00\x00\x0d" + b"IHDR" + ihdr_data + ihdr_crc
# IDAT chunk (minimal)
idat_data = b"\x08\xd7\x63\xf8\x0f\x00\x00\x01\x01\x00"
idat_crc = b"\x00\x00\x00\x00"
png += len(idat_data).to_bytes(4, "big") + b"IDAT" + idat_data + idat_crc
# IEND chunk
png += b"\x00\x00\x00\x00" + b"IEND" + b"\xae\x42\x60\x82"
# HTML payload after PNG (should be ignored)
png += b"<html><script>alert(1)</script></html>"
return png
# Polyglot generators registry
POLYGLOTS = {
"gif-js": ("GIF with embedded JavaScript", generate_gif_js),
"pdf-js": ("PDF with JavaScript action", generate_pdf_js),
"zip-html": ("ZIP containing HTML", generate_zip_html),
"png-html": ("PNG with trailing HTML", generate_png_html),
}
def list_polyglots() -> None:
"""List available polyglot types."""
print("Available polyglots:")
print()
for name, (desc, _) in POLYGLOTS.items():
print(f" {name:12} {desc}")
print()
print("Generic formats:")
print(f" primary: {', '.join(SIGNATURES.keys())}")
print(f" payloads: {', '.join(PAYLOADS.keys())}")
def main() -> int:
parser = argparse.ArgumentParser(
description="Generate polyglot files for MIME confusion testing"
)
parser.add_argument(
"type",
nargs="?",
help="Polyglot type (e.g., gif-js, png-html) or primary:payload",
)
parser.add_argument("-o", "--output", help="Output file (default: stdout)")
parser.add_argument("-l", "--list", action="store_true", help="List polyglot types")
parser.add_argument("-s", "--size", type=int, default=1024, help="Minimum size (default: 1024)")
args = parser.parse_args()
if args.list or not args.type:
list_polyglots()
return 0
# Generate polyglot
if args.type in POLYGLOTS:
_, generator = POLYGLOTS[args.type]
content = generator()
elif ":" in args.type:
primary, payload = args.type.split(":", 1)
content = generate_polyglot(primary, payload, args.size)
else:
print(f"Unknown polyglot type: {args.type}", file=sys.stderr)
print("Use --list to see available types", file=sys.stderr)
return 1
# Output
if args.output:
Path(args.output).write_bytes(content)
print(f"Written {len(content)} bytes to {args.output}")
else:
sys.stdout.buffer.write(content)
return 0
if __name__ == "__main__":
sys.exit(main())