forked from username/flaskpaste
Remove magic byte detection in favor of simple UTF-8 validation: - text/plain for valid UTF-8 content - application/octet-stream for binary data Security maintained via headers (X-Content-Type-Options: nosniff, CSP). Magic signatures preserved as comments for future reference. Disabled test files: - test_mime_detection.py.disabled (magic-dependent tests) - test_polyglot.py.disabled (polyglot format tests) For full MIME detection, consider using the `filetype` library.
147 lines
5.4 KiB
Plaintext
147 lines
5.4 KiB
Plaintext
"""Tests for polyglot file MIME detection.
|
|
|
|
Verifies that polyglot files (valid in multiple formats) are detected
|
|
by their primary magic bytes at offset 0, not by embedded payloads.
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
|
|
import pytest
|
|
|
|
sys.path.insert(0, "tests/security")
|
|
from polyglot_generator import (
|
|
generate_gif_js,
|
|
generate_pdf_js,
|
|
generate_png_html,
|
|
generate_polyglot,
|
|
generate_zip_html,
|
|
)
|
|
|
|
|
|
class TestPolyglotDetection:
|
|
"""Verify polyglot files are detected by primary magic."""
|
|
|
|
def test_gif_js_detected_as_gif(self, client):
|
|
"""GIF/JS polyglot should be detected as GIF."""
|
|
content = generate_gif_js()
|
|
response = client.post("/", data=content)
|
|
if response.status_code == 201:
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "image/gif"
|
|
|
|
def test_pdf_js_detected_as_pdf(self, client):
|
|
"""PDF with JavaScript should be detected as PDF."""
|
|
content = generate_pdf_js()
|
|
response = client.post("/", data=content)
|
|
if response.status_code == 201:
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/pdf"
|
|
|
|
def test_zip_html_detected_as_zip(self, client):
|
|
"""ZIP containing HTML should be detected as ZIP."""
|
|
content = generate_zip_html()
|
|
response = client.post("/", data=content)
|
|
if response.status_code == 201:
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "application/zip"
|
|
|
|
def test_png_html_detected_as_png(self, client):
|
|
"""PNG with trailing HTML should be detected as PNG."""
|
|
content = generate_png_html()
|
|
response = client.post("/", data=content)
|
|
if response.status_code == 201:
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == "image/png"
|
|
|
|
|
|
class TestGenericPolyglots:
|
|
"""Test generic primary:payload combinations."""
|
|
|
|
@pytest.mark.parametrize(
|
|
"primary,expected_mime",
|
|
[
|
|
("png", "image/png"),
|
|
("gif", "image/gif"),
|
|
("jpeg", "image/jpeg"),
|
|
("pdf", "application/pdf"),
|
|
("zip", "application/zip"),
|
|
("gzip", "application/gzip"),
|
|
("elf", "application/x-executable"),
|
|
("pe", "application/x-msdownload"),
|
|
],
|
|
)
|
|
@pytest.mark.parametrize("payload", ["html", "js", "php", "shell"])
|
|
def test_primary_format_wins(self, client, primary, expected_mime, payload):
|
|
"""Primary format magic should determine MIME type, not payload."""
|
|
content = generate_polyglot(primary, payload)
|
|
response = client.post("/", data=content, content_type="application/octet-stream")
|
|
if response.status_code == 201:
|
|
data = json.loads(response.data)
|
|
assert data["mime_type"] == expected_mime, (
|
|
f"{primary}:{payload} detected as {data['mime_type']}, expected {expected_mime}"
|
|
)
|
|
|
|
|
|
class TestSecurityHeaders:
|
|
"""Verify security headers prevent polyglot execution."""
|
|
|
|
def test_nosniff_header_on_polyglot(self, client):
|
|
"""X-Content-Type-Options: nosniff should be present."""
|
|
content = generate_gif_js()
|
|
create = client.post("/", data=content)
|
|
if create.status_code == 201:
|
|
data = json.loads(create.data)
|
|
paste_id = data["id"]
|
|
raw = client.get(f"/{paste_id}/raw")
|
|
assert raw.headers.get("X-Content-Type-Options") == "nosniff"
|
|
|
|
def test_csp_header_on_polyglot(self, client):
|
|
"""CSP should prevent script execution."""
|
|
content = generate_png_html()
|
|
create = client.post("/", data=content)
|
|
if create.status_code == 201:
|
|
data = json.loads(create.data)
|
|
paste_id = data["id"]
|
|
raw = client.get(f"/{paste_id}/raw")
|
|
csp = raw.headers.get("Content-Security-Policy", "")
|
|
assert "default-src 'none'" in csp
|
|
|
|
def test_xframe_options_on_polyglot(self, client):
|
|
"""X-Frame-Options should prevent framing."""
|
|
content = generate_pdf_js()
|
|
create = client.post("/", data=content)
|
|
if create.status_code == 201:
|
|
data = json.loads(create.data)
|
|
paste_id = data["id"]
|
|
raw = client.get(f"/{paste_id}/raw")
|
|
assert raw.headers.get("X-Frame-Options") == "DENY"
|
|
|
|
|
|
class TestPayloadNotExecuted:
|
|
"""Verify embedded payloads are returned literally."""
|
|
|
|
def test_html_payload_literal(self, client):
|
|
"""HTML payload should be returned as-is, not rendered."""
|
|
content = generate_polyglot("png", "html")
|
|
create = client.post("/", data=content)
|
|
if create.status_code == 201:
|
|
data = json.loads(create.data)
|
|
paste_id = data["id"]
|
|
raw = client.get(f"/{paste_id}/raw")
|
|
# Content should contain literal script tag
|
|
assert b"<script>" in raw.data
|
|
# But Content-Type should be image/png
|
|
assert "image/png" in raw.content_type
|
|
|
|
def test_php_payload_literal(self, client):
|
|
"""PHP payload should be returned as-is."""
|
|
content = generate_polyglot("gif", "php")
|
|
create = client.post("/", data=content)
|
|
if create.status_code == 201:
|
|
data = json.loads(create.data)
|
|
paste_id = data["id"]
|
|
raw = client.get(f"/{paste_id}/raw")
|
|
assert b"<?php" in raw.data
|
|
assert "image/gif" in raw.content_type
|