add polyglot generator and MIME confusion tests
Some checks failed
CI / Lint & Format (push) Failing after 16s
CI / Unit Tests (push) Has been skipped
CI / Memory Leak Check (push) Has been skipped
CI / SBOM Generation (push) Has been skipped
CI / Security Scan (push) Successful in 20s
CI / Security Tests (push) Has been skipped
CI / Advanced Security Tests (push) Has been skipped
Some checks failed
CI / Lint & Format (push) Failing after 16s
CI / Unit Tests (push) Has been skipped
CI / Memory Leak Check (push) Has been skipped
CI / SBOM Generation (push) Has been skipped
CI / Security Scan (push) Successful in 20s
CI / Security Tests (push) Has been skipped
CI / Advanced Security Tests (push) Has been skipped
- polyglot_generator.py: creates files valid in multiple formats - 41 new tests verify MIME detection handles polyglots correctly - Document rate limiting behavior under attack - Clarify DMG/ISO/DOCX detection limitations
This commit is contained in:
233
tests/security/polyglot_generator.py
Normal file
233
tests/security/polyglot_generator.py
Normal file
@@ -0,0 +1,233 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Polyglot file generator for MIME confusion testing.
|
||||
|
||||
Creates files that are technically valid in multiple formats to test
|
||||
that MIME detection correctly identifies the primary format based on
|
||||
magic bytes at offset 0.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Magic byte signatures
|
||||
SIGNATURES = {
|
||||
"png": b"\x89PNG\r\n\x1a\n",
|
||||
"gif": b"GIF89a",
|
||||
"jpeg": b"\xff\xd8\xff\xe0\x00\x10JFIF",
|
||||
"pdf": b"%PDF-1.4\n",
|
||||
"zip": b"PK\x03\x04",
|
||||
"gzip": b"\x1f\x8b\x08",
|
||||
"elf": b"\x7fELF",
|
||||
"pe": b"MZ",
|
||||
}
|
||||
|
||||
# Payloads that could be dangerous if executed
|
||||
PAYLOADS = {
|
||||
"html": b"<html><body><script>alert('XSS')</script></body></html>",
|
||||
"js": b"/**/alert('XSS')//",
|
||||
"php": b"<?php system($_GET['cmd']); ?>",
|
||||
"shell": b"#!/bin/sh\necho pwned\n",
|
||||
"svg": b'<svg xmlns="http://www.w3.org/2000/svg"><script>alert(1)</script></svg>',
|
||||
}
|
||||
|
||||
|
||||
def generate_polyglot(primary: str, payload: str, size: int = 1024) -> bytes:
|
||||
"""Generate a polyglot file with primary format magic and embedded payload.
|
||||
|
||||
Args:
|
||||
primary: Primary format (png, gif, jpeg, pdf, zip, etc.)
|
||||
payload: Payload type to embed (html, js, php, shell, svg)
|
||||
size: Minimum file size (padded with nulls)
|
||||
|
||||
Returns:
|
||||
Polyglot file content
|
||||
"""
|
||||
if primary not in SIGNATURES:
|
||||
raise ValueError(f"Unknown primary format: {primary}")
|
||||
if payload not in PAYLOADS:
|
||||
raise ValueError(f"Unknown payload type: {payload}")
|
||||
|
||||
magic = SIGNATURES[primary]
|
||||
payload_bytes = PAYLOADS[payload]
|
||||
|
||||
# Build polyglot: magic + padding + payload + padding
|
||||
content = magic + b"\x00" * 32 + payload_bytes
|
||||
|
||||
# Pad to minimum size
|
||||
if len(content) < size:
|
||||
content += b"\x00" * (size - len(content))
|
||||
|
||||
return content
|
||||
|
||||
|
||||
def generate_gif_js() -> bytes:
|
||||
"""Generate GIF/JavaScript polyglot.
|
||||
|
||||
GIF89a header followed by JS that ignores the binary prefix.
|
||||
"""
|
||||
# GIF header that's also valid JS start
|
||||
# GIF89a = valid GIF magic
|
||||
# The trick: wrap binary in JS comment
|
||||
gif_header = b"GIF89a"
|
||||
# Minimal GIF structure
|
||||
gif_data = (
|
||||
b"\x01\x00\x01\x00" # 1x1 dimensions
|
||||
b"\x00" # no global color table
|
||||
b"\x00" # background color
|
||||
b"\x00" # aspect ratio
|
||||
b"\x2c" # image descriptor
|
||||
b"\x00\x00\x00\x00" # position
|
||||
b"\x01\x00\x01\x00" # dimensions
|
||||
b"\x00" # no local color table
|
||||
b"\x02\x01\x01\x00\x3b" # minimal image data + trailer
|
||||
)
|
||||
# JS payload after GIF (browsers may try to execute)
|
||||
js_payload = b"/**/=1;alert('XSS')//"
|
||||
|
||||
return gif_header + gif_data + js_payload
|
||||
|
||||
|
||||
def generate_pdf_js() -> bytes:
|
||||
"""Generate PDF with embedded JavaScript."""
|
||||
# PDF header
|
||||
pdf = b"%PDF-1.4\n"
|
||||
# Minimal PDF structure with JS
|
||||
pdf += b"1 0 obj<</Type/Catalog/Pages 2 0 R/OpenAction 3 0 R>>endobj\n"
|
||||
pdf += b"2 0 obj<</Type/Pages/Kids[]/Count 0>>endobj\n"
|
||||
pdf += b"3 0 obj<</S/JavaScript/JS(app.alert('XSS'))>>endobj\n"
|
||||
pdf += b"xref\n0 4\n"
|
||||
pdf += b"0000000000 65535 f \n"
|
||||
pdf += b"0000000009 00000 n \n"
|
||||
pdf += b"0000000058 00000 n \n"
|
||||
pdf += b"0000000101 00000 n \n"
|
||||
pdf += b"trailer<</Size 4/Root 1 0 R>>\n"
|
||||
pdf += b"startxref\n154\n%%EOF"
|
||||
return pdf
|
||||
|
||||
|
||||
def generate_zip_html() -> bytes:
|
||||
"""Generate ZIP with HTML file inside."""
|
||||
# PK signature
|
||||
zip_data = b"PK\x03\x04"
|
||||
# Version needed
|
||||
zip_data += b"\x14\x00"
|
||||
# Flags
|
||||
zip_data += b"\x00\x00"
|
||||
# Compression (store)
|
||||
zip_data += b"\x00\x00"
|
||||
# Time/date
|
||||
zip_data += b"\x00\x00\x00\x00"
|
||||
# CRC32 (placeholder)
|
||||
zip_data += b"\x00\x00\x00\x00"
|
||||
# Compressed/uncompressed size
|
||||
html = b"<script>alert(1)</script>"
|
||||
size = len(html).to_bytes(4, "little")
|
||||
zip_data += size + size
|
||||
# Filename length
|
||||
filename = b"index.html"
|
||||
zip_data += len(filename).to_bytes(2, "little")
|
||||
# Extra field length
|
||||
zip_data += b"\x00\x00"
|
||||
# Filename
|
||||
zip_data += filename
|
||||
# File content
|
||||
zip_data += html
|
||||
return zip_data
|
||||
|
||||
|
||||
def generate_png_html() -> bytes:
|
||||
"""Generate PNG with HTML in trailing data."""
|
||||
# Minimal valid PNG
|
||||
png = b"\x89PNG\r\n\x1a\n"
|
||||
# IHDR chunk
|
||||
ihdr_data = (
|
||||
b"\x00\x00\x00\x01" # width
|
||||
b"\x00\x00\x00\x01" # height
|
||||
b"\x08" # bit depth
|
||||
b"\x02" # color type (RGB)
|
||||
b"\x00" # compression
|
||||
b"\x00" # filter
|
||||
b"\x00" # interlace
|
||||
)
|
||||
ihdr_crc = b"\x00\x00\x00\x00" # placeholder
|
||||
png += b"\x00\x00\x00\x0d" + b"IHDR" + ihdr_data + ihdr_crc
|
||||
|
||||
# IDAT chunk (minimal)
|
||||
idat_data = b"\x08\xd7\x63\xf8\x0f\x00\x00\x01\x01\x00"
|
||||
idat_crc = b"\x00\x00\x00\x00"
|
||||
png += len(idat_data).to_bytes(4, "big") + b"IDAT" + idat_data + idat_crc
|
||||
|
||||
# IEND chunk
|
||||
png += b"\x00\x00\x00\x00" + b"IEND" + b"\xae\x42\x60\x82"
|
||||
|
||||
# HTML payload after PNG (should be ignored)
|
||||
png += b"<html><script>alert(1)</script></html>"
|
||||
|
||||
return png
|
||||
|
||||
|
||||
# Polyglot generators registry
|
||||
POLYGLOTS = {
|
||||
"gif-js": ("GIF with embedded JavaScript", generate_gif_js),
|
||||
"pdf-js": ("PDF with JavaScript action", generate_pdf_js),
|
||||
"zip-html": ("ZIP containing HTML", generate_zip_html),
|
||||
"png-html": ("PNG with trailing HTML", generate_png_html),
|
||||
}
|
||||
|
||||
|
||||
def list_polyglots() -> None:
|
||||
"""List available polyglot types."""
|
||||
print("Available polyglots:")
|
||||
print()
|
||||
for name, (desc, _) in POLYGLOTS.items():
|
||||
print(f" {name:12} {desc}")
|
||||
print()
|
||||
print("Generic formats:")
|
||||
print(f" primary: {', '.join(SIGNATURES.keys())}")
|
||||
print(f" payloads: {', '.join(PAYLOADS.keys())}")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate polyglot files for MIME confusion testing"
|
||||
)
|
||||
parser.add_argument(
|
||||
"type",
|
||||
nargs="?",
|
||||
help="Polyglot type (e.g., gif-js, png-html) or primary:payload",
|
||||
)
|
||||
parser.add_argument("-o", "--output", help="Output file (default: stdout)")
|
||||
parser.add_argument("-l", "--list", action="store_true", help="List polyglot types")
|
||||
parser.add_argument("-s", "--size", type=int, default=1024, help="Minimum size (default: 1024)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.list or not args.type:
|
||||
list_polyglots()
|
||||
return 0
|
||||
|
||||
# Generate polyglot
|
||||
if args.type in POLYGLOTS:
|
||||
_, generator = POLYGLOTS[args.type]
|
||||
content = generator()
|
||||
elif ":" in args.type:
|
||||
primary, payload = args.type.split(":", 1)
|
||||
content = generate_polyglot(primary, payload, args.size)
|
||||
else:
|
||||
print(f"Unknown polyglot type: {args.type}", file=sys.stderr)
|
||||
print("Use --list to see available types", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Output
|
||||
if args.output:
|
||||
Path(args.output).write_bytes(content)
|
||||
print(f"Written {len(content)} bytes to {args.output}")
|
||||
else:
|
||||
sys.stdout.buffer.write(content)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user