Files
flaskpaste/tests/test_polyglot.py.disabled
Username 3cda73c8b0 simplify MIME detection to text/binary only
Remove magic byte detection in favor of simple UTF-8 validation:
- text/plain for valid UTF-8 content
- application/octet-stream for binary data

Security maintained via headers (X-Content-Type-Options: nosniff, CSP).
Magic signatures preserved as comments for future reference.

Disabled test files:
- test_mime_detection.py.disabled (magic-dependent tests)
- test_polyglot.py.disabled (polyglot format tests)

For full MIME detection, consider using the `filetype` library.
2025-12-26 18:44:24 +01:00

147 lines
5.4 KiB
Plaintext

"""Tests for polyglot file MIME detection.
Verifies that polyglot files (valid in multiple formats) are detected
by their primary magic bytes at offset 0, not by embedded payloads.
"""
import json
import sys
import pytest
sys.path.insert(0, "tests/security")
from polyglot_generator import (
generate_gif_js,
generate_pdf_js,
generate_png_html,
generate_polyglot,
generate_zip_html,
)
class TestPolyglotDetection:
"""Verify polyglot files are detected by primary magic."""
def test_gif_js_detected_as_gif(self, client):
"""GIF/JS polyglot should be detected as GIF."""
content = generate_gif_js()
response = client.post("/", data=content)
if response.status_code == 201:
data = json.loads(response.data)
assert data["mime_type"] == "image/gif"
def test_pdf_js_detected_as_pdf(self, client):
"""PDF with JavaScript should be detected as PDF."""
content = generate_pdf_js()
response = client.post("/", data=content)
if response.status_code == 201:
data = json.loads(response.data)
assert data["mime_type"] == "application/pdf"
def test_zip_html_detected_as_zip(self, client):
"""ZIP containing HTML should be detected as ZIP."""
content = generate_zip_html()
response = client.post("/", data=content)
if response.status_code == 201:
data = json.loads(response.data)
assert data["mime_type"] == "application/zip"
def test_png_html_detected_as_png(self, client):
"""PNG with trailing HTML should be detected as PNG."""
content = generate_png_html()
response = client.post("/", data=content)
if response.status_code == 201:
data = json.loads(response.data)
assert data["mime_type"] == "image/png"
class TestGenericPolyglots:
"""Test generic primary:payload combinations."""
@pytest.mark.parametrize(
"primary,expected_mime",
[
("png", "image/png"),
("gif", "image/gif"),
("jpeg", "image/jpeg"),
("pdf", "application/pdf"),
("zip", "application/zip"),
("gzip", "application/gzip"),
("elf", "application/x-executable"),
("pe", "application/x-msdownload"),
],
)
@pytest.mark.parametrize("payload", ["html", "js", "php", "shell"])
def test_primary_format_wins(self, client, primary, expected_mime, payload):
"""Primary format magic should determine MIME type, not payload."""
content = generate_polyglot(primary, payload)
response = client.post("/", data=content, content_type="application/octet-stream")
if response.status_code == 201:
data = json.loads(response.data)
assert data["mime_type"] == expected_mime, (
f"{primary}:{payload} detected as {data['mime_type']}, expected {expected_mime}"
)
class TestSecurityHeaders:
"""Verify security headers prevent polyglot execution."""
def test_nosniff_header_on_polyglot(self, client):
"""X-Content-Type-Options: nosniff should be present."""
content = generate_gif_js()
create = client.post("/", data=content)
if create.status_code == 201:
data = json.loads(create.data)
paste_id = data["id"]
raw = client.get(f"/{paste_id}/raw")
assert raw.headers.get("X-Content-Type-Options") == "nosniff"
def test_csp_header_on_polyglot(self, client):
"""CSP should prevent script execution."""
content = generate_png_html()
create = client.post("/", data=content)
if create.status_code == 201:
data = json.loads(create.data)
paste_id = data["id"]
raw = client.get(f"/{paste_id}/raw")
csp = raw.headers.get("Content-Security-Policy", "")
assert "default-src 'none'" in csp
def test_xframe_options_on_polyglot(self, client):
"""X-Frame-Options should prevent framing."""
content = generate_pdf_js()
create = client.post("/", data=content)
if create.status_code == 201:
data = json.loads(create.data)
paste_id = data["id"]
raw = client.get(f"/{paste_id}/raw")
assert raw.headers.get("X-Frame-Options") == "DENY"
class TestPayloadNotExecuted:
"""Verify embedded payloads are returned literally."""
def test_html_payload_literal(self, client):
"""HTML payload should be returned as-is, not rendered."""
content = generate_polyglot("png", "html")
create = client.post("/", data=content)
if create.status_code == 201:
data = json.loads(create.data)
paste_id = data["id"]
raw = client.get(f"/{paste_id}/raw")
# Content should contain literal script tag
assert b"<script>" in raw.data
# But Content-Type should be image/png
assert "image/png" in raw.content_type
def test_php_payload_literal(self, client):
"""PHP payload should be returned as-is."""
content = generate_polyglot("gif", "php")
create = client.post("/", data=content)
if create.status_code == 201:
data = json.loads(create.data)
paste_id = data["id"]
raw = client.get(f"/{paste_id}/raw")
assert b"<?php" in raw.data
assert "image/gif" in raw.content_type