diff --git a/documentation/security-testing-status.md b/documentation/security-testing-status.md index 9cdc136..fb1890b 100644 --- a/documentation/security-testing-status.md +++ b/documentation/security-testing-status.md @@ -146,8 +146,11 @@ Fixed (2025-12-25): Known issues: [!] JavaClass - Detected as Mach-O (0xCAFEBABE collision, unfixable) -Not tested (no signature defined): -[ ] DMG, ISO, DOCX/XLSX/PPTX, ODF +Not detectable (structural limitations): +[~] DMG - UDIF signature in trailer, not header +[~] ISO - CD001 at offset 32769 (beyond 16-byte check) +[~] DOCX/XLSX/PPTX - ZIP-based, detected as application/zip (correct) +[~] ODF (ODT/ODS) - ZIP-based, detected as application/zip (correct) ``` ### Fuzzing Improvements @@ -156,10 +159,15 @@ Not tested (no signature defined): [ ] Add --target option to run_fuzz.py for external testing [ ] Implement adaptive rate limiting in production fuzzer [x] Add hypothesis property-based tests for MIME detection -[ ] Create polyglot generator for automated MIME confusion testing +[x] Create polyglot generator for automated MIME confusion testing [x] Add timing attack tests for authentication endpoints ``` +**Polyglot Generator (2025-12-26):** +- `tests/security/polyglot_generator.py`: Creates files valid in multiple formats +- Supports: GIF+JS, PDF+JS, ZIP+HTML, PNG+HTML, generic primary:payload +- 41 polyglot tests verify MIME detection handles all cases correctly + **Hypothesis MIME Tests (2025-12-26):** - `test_magic_prefix_detection`: All known signatures + random suffix detect correctly - `test_random_binary_never_crashes`: Random binary never crashes detector @@ -201,14 +209,85 @@ Not tested (no signature defined): ### Documentation ``` -[ ] Add remaining MIME test results to security assessment -[ ] Document rate limiting behavior under attack +[x] Add remaining MIME test results to security assessment +[x] Document rate limiting behavior under attack [x] Create threat model diagram (documentation/threat-model.md) [x] Add security headers audit to CI pipeline ``` --- +## Rate Limiting Under Attack + +### Defense Layers + +``` +Layer 1: Per-IP Rate Limiting +├── Window: 60 seconds +├── Max requests: 30 (configurable) +├── Response: 429 Too Many Requests +└── Memory cap: 10,000 IPs max + +Layer 2: Anti-Flood (Dynamic PoW) +├── Base difficulty: 16 bits +├── Threshold: 5 pastes/window triggers increase +├── Step: +2 bits per threshold breach +├── Max difficulty: 28 bits +├── Decay: -2 bits every 30s when idle +└── Effect: Attackers must solve harder puzzles + +Layer 3: Content Deduplication +├── Hash window: 300 seconds (5 min) +├── Max duplicates: 3 per hash per window +├── Response: 429 with "duplicate content" message +└── Bypass: Requires unique content each time +``` + +### Attack Scenarios + +| Attack | Detection | Response | Recovery | +|--------|-----------|----------|----------| +| Single IP flood | Rate limit hit | 429 after 30 req/min | Auto after 60s | +| Distributed flood | Anti-flood threshold | PoW difficulty 16→28 | Decay after 30s idle | +| Content spam | Dedup detection | 429 after 3 dupes | Window expires 5min | +| Enumeration | Lookup rate limit | 429 after 60 req/min | Auto after 60s | + +### Observed Behavior (Pentest 2025-12-26) + +During 18.5 minute penetration test: +- Requests handled: 144 +- Anti-flood triggered: Yes (difficulty 16→26 bits) +- Rate limit 429s observed: Yes +- PoW token expiration working: Rejected stale solutions +- Memory usage: Stable (capped dictionaries) + +### Configuration + +```python +# app/config.py defaults +RATE_LIMIT_MAX_ENTRIES = 10000 # Max tracked IPs +RATE_LIMIT_REQUESTS = 30 # Requests per window +RATE_LIMIT_WINDOW = 60 # Window in seconds + +ANTIFLOOD_THRESHOLD = 5 # Pastes before PoW increase +ANTIFLOOD_STEP = 2 # Bits added per breach +ANTIFLOOD_MAX = 28 # Maximum difficulty +ANTIFLOOD_DECAY = 30 # Seconds before difficulty drops + +DEDUP_WINDOW = 300 # Hash tracking window +DEDUP_MAX = 3 # Max duplicates allowed +``` + +### Monitoring + +- `/metrics` endpoint exposes: + - `flaskpaste_rate_limit_total`: Rate limit hits + - `flaskpaste_pow_difficulty`: Current PoW difficulty + - `flaskpaste_paste_created_total`: Creation rate + - `flaskpaste_dedup_total`: Dedup rejections + +--- + ## Test Commands ```bash diff --git a/tests/security/polyglot_generator.py b/tests/security/polyglot_generator.py new file mode 100644 index 0000000..153df14 --- /dev/null +++ b/tests/security/polyglot_generator.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python3 +"""Polyglot file generator for MIME confusion testing. + +Creates files that are technically valid in multiple formats to test +that MIME detection correctly identifies the primary format based on +magic bytes at offset 0. +""" + +import argparse +import sys +from pathlib import Path + +# Magic byte signatures +SIGNATURES = { + "png": b"\x89PNG\r\n\x1a\n", + "gif": b"GIF89a", + "jpeg": b"\xff\xd8\xff\xe0\x00\x10JFIF", + "pdf": b"%PDF-1.4\n", + "zip": b"PK\x03\x04", + "gzip": b"\x1f\x8b\x08", + "elf": b"\x7fELF", + "pe": b"MZ", +} + +# Payloads that could be dangerous if executed +PAYLOADS = { + "html": b"", + "js": b"/**/alert('XSS')//", + "php": b"", + "shell": b"#!/bin/sh\necho pwned\n", + "svg": b'', +} + + +def generate_polyglot(primary: str, payload: str, size: int = 1024) -> bytes: + """Generate a polyglot file with primary format magic and embedded payload. + + Args: + primary: Primary format (png, gif, jpeg, pdf, zip, etc.) + payload: Payload type to embed (html, js, php, shell, svg) + size: Minimum file size (padded with nulls) + + Returns: + Polyglot file content + """ + if primary not in SIGNATURES: + raise ValueError(f"Unknown primary format: {primary}") + if payload not in PAYLOADS: + raise ValueError(f"Unknown payload type: {payload}") + + magic = SIGNATURES[primary] + payload_bytes = PAYLOADS[payload] + + # Build polyglot: magic + padding + payload + padding + content = magic + b"\x00" * 32 + payload_bytes + + # Pad to minimum size + if len(content) < size: + content += b"\x00" * (size - len(content)) + + return content + + +def generate_gif_js() -> bytes: + """Generate GIF/JavaScript polyglot. + + GIF89a header followed by JS that ignores the binary prefix. + """ + # GIF header that's also valid JS start + # GIF89a = valid GIF magic + # The trick: wrap binary in JS comment + gif_header = b"GIF89a" + # Minimal GIF structure + gif_data = ( + b"\x01\x00\x01\x00" # 1x1 dimensions + b"\x00" # no global color table + b"\x00" # background color + b"\x00" # aspect ratio + b"\x2c" # image descriptor + b"\x00\x00\x00\x00" # position + b"\x01\x00\x01\x00" # dimensions + b"\x00" # no local color table + b"\x02\x01\x01\x00\x3b" # minimal image data + trailer + ) + # JS payload after GIF (browsers may try to execute) + js_payload = b"/**/=1;alert('XSS')//" + + return gif_header + gif_data + js_payload + + +def generate_pdf_js() -> bytes: + """Generate PDF with embedded JavaScript.""" + # PDF header + pdf = b"%PDF-1.4\n" + # Minimal PDF structure with JS + pdf += b"1 0 obj<>endobj\n" + pdf += b"2 0 obj<>endobj\n" + pdf += b"3 0 obj<>endobj\n" + pdf += b"xref\n0 4\n" + pdf += b"0000000000 65535 f \n" + pdf += b"0000000009 00000 n \n" + pdf += b"0000000058 00000 n \n" + pdf += b"0000000101 00000 n \n" + pdf += b"trailer<>\n" + pdf += b"startxref\n154\n%%EOF" + return pdf + + +def generate_zip_html() -> bytes: + """Generate ZIP with HTML file inside.""" + # PK signature + zip_data = b"PK\x03\x04" + # Version needed + zip_data += b"\x14\x00" + # Flags + zip_data += b"\x00\x00" + # Compression (store) + zip_data += b"\x00\x00" + # Time/date + zip_data += b"\x00\x00\x00\x00" + # CRC32 (placeholder) + zip_data += b"\x00\x00\x00\x00" + # Compressed/uncompressed size + html = b"" + size = len(html).to_bytes(4, "little") + zip_data += size + size + # Filename length + filename = b"index.html" + zip_data += len(filename).to_bytes(2, "little") + # Extra field length + zip_data += b"\x00\x00" + # Filename + zip_data += filename + # File content + zip_data += html + return zip_data + + +def generate_png_html() -> bytes: + """Generate PNG with HTML in trailing data.""" + # Minimal valid PNG + png = b"\x89PNG\r\n\x1a\n" + # IHDR chunk + ihdr_data = ( + b"\x00\x00\x00\x01" # width + b"\x00\x00\x00\x01" # height + b"\x08" # bit depth + b"\x02" # color type (RGB) + b"\x00" # compression + b"\x00" # filter + b"\x00" # interlace + ) + ihdr_crc = b"\x00\x00\x00\x00" # placeholder + png += b"\x00\x00\x00\x0d" + b"IHDR" + ihdr_data + ihdr_crc + + # IDAT chunk (minimal) + idat_data = b"\x08\xd7\x63\xf8\x0f\x00\x00\x01\x01\x00" + idat_crc = b"\x00\x00\x00\x00" + png += len(idat_data).to_bytes(4, "big") + b"IDAT" + idat_data + idat_crc + + # IEND chunk + png += b"\x00\x00\x00\x00" + b"IEND" + b"\xae\x42\x60\x82" + + # HTML payload after PNG (should be ignored) + png += b"" + + return png + + +# Polyglot generators registry +POLYGLOTS = { + "gif-js": ("GIF with embedded JavaScript", generate_gif_js), + "pdf-js": ("PDF with JavaScript action", generate_pdf_js), + "zip-html": ("ZIP containing HTML", generate_zip_html), + "png-html": ("PNG with trailing HTML", generate_png_html), +} + + +def list_polyglots() -> None: + """List available polyglot types.""" + print("Available polyglots:") + print() + for name, (desc, _) in POLYGLOTS.items(): + print(f" {name:12} {desc}") + print() + print("Generic formats:") + print(f" primary: {', '.join(SIGNATURES.keys())}") + print(f" payloads: {', '.join(PAYLOADS.keys())}") + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Generate polyglot files for MIME confusion testing" + ) + parser.add_argument( + "type", + nargs="?", + help="Polyglot type (e.g., gif-js, png-html) or primary:payload", + ) + parser.add_argument("-o", "--output", help="Output file (default: stdout)") + parser.add_argument("-l", "--list", action="store_true", help="List polyglot types") + parser.add_argument("-s", "--size", type=int, default=1024, help="Minimum size (default: 1024)") + + args = parser.parse_args() + + if args.list or not args.type: + list_polyglots() + return 0 + + # Generate polyglot + if args.type in POLYGLOTS: + _, generator = POLYGLOTS[args.type] + content = generator() + elif ":" in args.type: + primary, payload = args.type.split(":", 1) + content = generate_polyglot(primary, payload, args.size) + else: + print(f"Unknown polyglot type: {args.type}", file=sys.stderr) + print("Use --list to see available types", file=sys.stderr) + return 1 + + # Output + if args.output: + Path(args.output).write_bytes(content) + print(f"Written {len(content)} bytes to {args.output}") + else: + sys.stdout.buffer.write(content) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/test_polyglot.py b/tests/test_polyglot.py new file mode 100644 index 0000000..b18f166 --- /dev/null +++ b/tests/test_polyglot.py @@ -0,0 +1,146 @@ +"""Tests for polyglot file MIME detection. + +Verifies that polyglot files (valid in multiple formats) are detected +by their primary magic bytes at offset 0, not by embedded payloads. +""" + +import json +import sys + +import pytest + +sys.path.insert(0, "tests/security") +from polyglot_generator import ( + generate_gif_js, + generate_pdf_js, + generate_png_html, + generate_polyglot, + generate_zip_html, +) + + +class TestPolyglotDetection: + """Verify polyglot files are detected by primary magic.""" + + def test_gif_js_detected_as_gif(self, client): + """GIF/JS polyglot should be detected as GIF.""" + content = generate_gif_js() + response = client.post("/", data=content) + if response.status_code == 201: + data = json.loads(response.data) + assert data["mime_type"] == "image/gif" + + def test_pdf_js_detected_as_pdf(self, client): + """PDF with JavaScript should be detected as PDF.""" + content = generate_pdf_js() + response = client.post("/", data=content) + if response.status_code == 201: + data = json.loads(response.data) + assert data["mime_type"] == "application/pdf" + + def test_zip_html_detected_as_zip(self, client): + """ZIP containing HTML should be detected as ZIP.""" + content = generate_zip_html() + response = client.post("/", data=content) + if response.status_code == 201: + data = json.loads(response.data) + assert data["mime_type"] == "application/zip" + + def test_png_html_detected_as_png(self, client): + """PNG with trailing HTML should be detected as PNG.""" + content = generate_png_html() + response = client.post("/", data=content) + if response.status_code == 201: + data = json.loads(response.data) + assert data["mime_type"] == "image/png" + + +class TestGenericPolyglots: + """Test generic primary:payload combinations.""" + + @pytest.mark.parametrize( + "primary,expected_mime", + [ + ("png", "image/png"), + ("gif", "image/gif"), + ("jpeg", "image/jpeg"), + ("pdf", "application/pdf"), + ("zip", "application/zip"), + ("gzip", "application/gzip"), + ("elf", "application/x-executable"), + ("pe", "application/x-msdownload"), + ], + ) + @pytest.mark.parametrize("payload", ["html", "js", "php", "shell"]) + def test_primary_format_wins(self, client, primary, expected_mime, payload): + """Primary format magic should determine MIME type, not payload.""" + content = generate_polyglot(primary, payload) + response = client.post("/", data=content, content_type="application/octet-stream") + if response.status_code == 201: + data = json.loads(response.data) + assert data["mime_type"] == expected_mime, ( + f"{primary}:{payload} detected as {data['mime_type']}, expected {expected_mime}" + ) + + +class TestSecurityHeaders: + """Verify security headers prevent polyglot execution.""" + + def test_nosniff_header_on_polyglot(self, client): + """X-Content-Type-Options: nosniff should be present.""" + content = generate_gif_js() + create = client.post("/", data=content) + if create.status_code == 201: + data = json.loads(create.data) + paste_id = data["id"] + raw = client.get(f"/{paste_id}/raw") + assert raw.headers.get("X-Content-Type-Options") == "nosniff" + + def test_csp_header_on_polyglot(self, client): + """CSP should prevent script execution.""" + content = generate_png_html() + create = client.post("/", data=content) + if create.status_code == 201: + data = json.loads(create.data) + paste_id = data["id"] + raw = client.get(f"/{paste_id}/raw") + csp = raw.headers.get("Content-Security-Policy", "") + assert "default-src 'none'" in csp + + def test_xframe_options_on_polyglot(self, client): + """X-Frame-Options should prevent framing.""" + content = generate_pdf_js() + create = client.post("/", data=content) + if create.status_code == 201: + data = json.loads(create.data) + paste_id = data["id"] + raw = client.get(f"/{paste_id}/raw") + assert raw.headers.get("X-Frame-Options") == "DENY" + + +class TestPayloadNotExecuted: + """Verify embedded payloads are returned literally.""" + + def test_html_payload_literal(self, client): + """HTML payload should be returned as-is, not rendered.""" + content = generate_polyglot("png", "html") + create = client.post("/", data=content) + if create.status_code == 201: + data = json.loads(create.data) + paste_id = data["id"] + raw = client.get(f"/{paste_id}/raw") + # Content should contain literal script tag + assert b"