Files
flaskpaste/tests/test_polyglot.py
Username fb45005766
Some checks failed
CI / Lint & Format (push) Failing after 16s
CI / Unit Tests (push) Has been skipped
CI / Memory Leak Check (push) Has been skipped
CI / SBOM Generation (push) Has been skipped
CI / Security Scan (push) Successful in 20s
CI / Security Tests (push) Has been skipped
CI / Advanced Security Tests (push) Has been skipped
add polyglot generator and MIME confusion tests
- polyglot_generator.py: creates files valid in multiple formats
- 41 new tests verify MIME detection handles polyglots correctly
- Document rate limiting behavior under attack
- Clarify DMG/ISO/DOCX detection limitations
2025-12-26 18:25:46 +01:00

147 lines
5.4 KiB
Python

"""Tests for polyglot file MIME detection.
Verifies that polyglot files (valid in multiple formats) are detected
by their primary magic bytes at offset 0, not by embedded payloads.
"""
import json
import sys
import pytest
sys.path.insert(0, "tests/security")
from polyglot_generator import (
generate_gif_js,
generate_pdf_js,
generate_png_html,
generate_polyglot,
generate_zip_html,
)
class TestPolyglotDetection:
"""Verify polyglot files are detected by primary magic."""
def test_gif_js_detected_as_gif(self, client):
"""GIF/JS polyglot should be detected as GIF."""
content = generate_gif_js()
response = client.post("/", data=content)
if response.status_code == 201:
data = json.loads(response.data)
assert data["mime_type"] == "image/gif"
def test_pdf_js_detected_as_pdf(self, client):
"""PDF with JavaScript should be detected as PDF."""
content = generate_pdf_js()
response = client.post("/", data=content)
if response.status_code == 201:
data = json.loads(response.data)
assert data["mime_type"] == "application/pdf"
def test_zip_html_detected_as_zip(self, client):
"""ZIP containing HTML should be detected as ZIP."""
content = generate_zip_html()
response = client.post("/", data=content)
if response.status_code == 201:
data = json.loads(response.data)
assert data["mime_type"] == "application/zip"
def test_png_html_detected_as_png(self, client):
"""PNG with trailing HTML should be detected as PNG."""
content = generate_png_html()
response = client.post("/", data=content)
if response.status_code == 201:
data = json.loads(response.data)
assert data["mime_type"] == "image/png"
class TestGenericPolyglots:
"""Test generic primary:payload combinations."""
@pytest.mark.parametrize(
"primary,expected_mime",
[
("png", "image/png"),
("gif", "image/gif"),
("jpeg", "image/jpeg"),
("pdf", "application/pdf"),
("zip", "application/zip"),
("gzip", "application/gzip"),
("elf", "application/x-executable"),
("pe", "application/x-msdownload"),
],
)
@pytest.mark.parametrize("payload", ["html", "js", "php", "shell"])
def test_primary_format_wins(self, client, primary, expected_mime, payload):
"""Primary format magic should determine MIME type, not payload."""
content = generate_polyglot(primary, payload)
response = client.post("/", data=content, content_type="application/octet-stream")
if response.status_code == 201:
data = json.loads(response.data)
assert data["mime_type"] == expected_mime, (
f"{primary}:{payload} detected as {data['mime_type']}, expected {expected_mime}"
)
class TestSecurityHeaders:
"""Verify security headers prevent polyglot execution."""
def test_nosniff_header_on_polyglot(self, client):
"""X-Content-Type-Options: nosniff should be present."""
content = generate_gif_js()
create = client.post("/", data=content)
if create.status_code == 201:
data = json.loads(create.data)
paste_id = data["id"]
raw = client.get(f"/{paste_id}/raw")
assert raw.headers.get("X-Content-Type-Options") == "nosniff"
def test_csp_header_on_polyglot(self, client):
"""CSP should prevent script execution."""
content = generate_png_html()
create = client.post("/", data=content)
if create.status_code == 201:
data = json.loads(create.data)
paste_id = data["id"]
raw = client.get(f"/{paste_id}/raw")
csp = raw.headers.get("Content-Security-Policy", "")
assert "default-src 'none'" in csp
def test_xframe_options_on_polyglot(self, client):
"""X-Frame-Options should prevent framing."""
content = generate_pdf_js()
create = client.post("/", data=content)
if create.status_code == 201:
data = json.loads(create.data)
paste_id = data["id"]
raw = client.get(f"/{paste_id}/raw")
assert raw.headers.get("X-Frame-Options") == "DENY"
class TestPayloadNotExecuted:
"""Verify embedded payloads are returned literally."""
def test_html_payload_literal(self, client):
"""HTML payload should be returned as-is, not rendered."""
content = generate_polyglot("png", "html")
create = client.post("/", data=content)
if create.status_code == 201:
data = json.loads(create.data)
paste_id = data["id"]
raw = client.get(f"/{paste_id}/raw")
# Content should contain literal script tag
assert b"<script>" in raw.data
# But Content-Type should be image/png
assert "image/png" in raw.content_type
def test_php_payload_literal(self, client):
"""PHP payload should be returned as-is."""
content = generate_polyglot("gif", "php")
create = client.post("/", data=content)
if create.status_code == 201:
data = json.loads(create.data)
paste_id = data["id"]
raw = client.get(f"/{paste_id}/raw")
assert b"<?php" in raw.data
assert "image/gif" in raw.content_type