flaskpaste: initial commit with security hardening

Features:
- REST API for text/binary pastes with MIME detection
- Client certificate auth via X-SSL-Client-SHA1 header
- SQLite with WAL mode for concurrent access
- Automatic paste expiry with LRU cleanup

Security:
- HSTS, CSP, X-Frame-Options, X-Content-Type-Options
- Cache-Control: no-store for sensitive responses
- X-Request-ID tracing for log correlation
- X-Proxy-Secret validation for defense-in-depth
- Parameterized queries, input validation
- Size limits (3 MiB anon, 50 MiB auth)

Includes /health endpoint, container support, and 70 tests.
This commit is contained in:
Username
2025-12-16 04:42:18 +01:00
commit 8f9868f0d9
21 changed files with 2588 additions and 0 deletions

225
app/__init__.py Normal file
View File

@@ -0,0 +1,225 @@
"""Flask application factory."""
import logging
import os
import sys
import uuid
from flask import Flask, Response, g, request
from app.config import config
def setup_logging(app: Flask) -> None:
"""Configure structured logging."""
log_level = logging.DEBUG if app.debug else logging.INFO
log_format = (
"%(asctime)s %(levelname)s [%(name)s] %(message)s"
if app.debug
else '{"time":"%(asctime)s","level":"%(levelname)s","logger":"%(name)s","message":"%(message)s"}'
)
logging.basicConfig(
level=log_level,
format=log_format,
stream=sys.stdout,
)
# Reduce noise from werkzeug in production
if not app.debug:
logging.getLogger("werkzeug").setLevel(logging.WARNING)
app.logger.info("FlaskPaste starting", extra={"config": type(app.config).__name__})
def setup_security_headers(app: Flask) -> None:
"""Add security headers to all responses."""
@app.after_request
def add_security_headers(response: Response) -> Response:
# Prevent MIME type sniffing
response.headers["X-Content-Type-Options"] = "nosniff"
# Prevent clickjacking
response.headers["X-Frame-Options"] = "DENY"
# XSS protection (legacy but still useful)
response.headers["X-XSS-Protection"] = "1; mode=block"
# Referrer policy
response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
# Content Security Policy (restrictive for API)
response.headers["Content-Security-Policy"] = "default-src 'none'; frame-ancestors 'none'"
# Permissions policy
response.headers["Permissions-Policy"] = "geolocation=(), microphone=(), camera=()"
# HSTS - enforce HTTPS (1 year, include subdomains)
response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains"
# Prevent caching of sensitive paste data
response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate, private"
response.headers["Pragma"] = "no-cache"
return response
def setup_request_id(app: Flask) -> None:
"""Add request ID tracking for log correlation and tracing."""
@app.before_request
def assign_request_id():
# Use incoming X-Request-ID from proxy, or generate a new one
request_id = request.headers.get("X-Request-ID", "").strip()
if not request_id:
request_id = str(uuid.uuid4())
g.request_id = request_id
@app.after_request
def add_request_id_header(response: Response) -> Response:
# Echo request ID back to client for tracing
request_id = getattr(g, "request_id", None)
if request_id:
response.headers["X-Request-ID"] = request_id
# Access logging with request ID
app.logger.info(
"%s %s %s [rid=%s]",
request.method,
request.path,
response.status_code,
request_id or "-",
)
return response
def setup_error_handlers(app: Flask) -> None:
"""Register global error handlers."""
import json
@app.errorhandler(400)
def bad_request(error):
app.logger.warning("Bad request: %s [rid=%s]", request.path, getattr(g, "request_id", "-"))
return Response(
json.dumps({"error": "Bad request"}),
status=400,
mimetype="application/json",
)
@app.errorhandler(404)
def not_found(error):
return Response(
json.dumps({"error": "Not found"}),
status=404,
mimetype="application/json",
)
@app.errorhandler(429)
def rate_limit_exceeded(error):
app.logger.warning(
"Rate limit exceeded: %s from %s [rid=%s]",
request.path, request.remote_addr, getattr(g, "request_id", "-")
)
return Response(
json.dumps({"error": "Rate limit exceeded", "retry_after": error.description}),
status=429,
mimetype="application/json",
)
@app.errorhandler(500)
def internal_error(error):
app.logger.error(
"Internal error: %s - %s [rid=%s]",
request.path, str(error), getattr(g, "request_id", "-")
)
return Response(
json.dumps({"error": "Internal server error"}),
status=500,
mimetype="application/json",
)
@app.errorhandler(Exception)
def handle_exception(error):
app.logger.exception(
"Unhandled exception: %s [rid=%s]",
str(error), getattr(g, "request_id", "-")
)
return Response(
json.dumps({"error": "Internal server error"}),
status=500,
mimetype="application/json",
)
def setup_rate_limiting(app: Flask) -> None:
"""Configure rate limiting."""
from flask_limiter import Limiter
from flask_limiter.util import get_remote_address
limiter = Limiter(
key_func=get_remote_address,
app=app,
default_limits=["200 per day", "60 per hour"],
storage_uri="memory://",
strategy="fixed-window",
)
# Store limiter on app for use in routes
app.extensions["limiter"] = limiter
return limiter
def setup_metrics(app: Flask) -> None:
"""Configure Prometheus metrics."""
# Only enable metrics in production
if app.config.get("TESTING"):
return
try:
from prometheus_flask_exporter import PrometheusMetrics
metrics = PrometheusMetrics(app)
# Add app info
metrics.info("flaskpaste_info", "FlaskPaste application info", version="1.0.0")
app.extensions["metrics"] = metrics
except ImportError:
app.logger.warning("prometheus_flask_exporter not available, metrics disabled")
def create_app(config_name: str | None = None) -> Flask:
"""Create and configure the Flask application."""
if config_name is None:
config_name = os.environ.get("FLASK_ENV", "default")
app = Flask(__name__)
app.config.from_object(config[config_name])
# Setup logging first
setup_logging(app)
# Setup request ID tracking
setup_request_id(app)
# Setup security headers
setup_security_headers(app)
# Setup error handlers
setup_error_handlers(app)
# Setup rate limiting (skip in testing)
if not app.config.get("TESTING"):
setup_rate_limiting(app)
# Setup metrics (skip in testing)
setup_metrics(app)
# Initialize database
from app import database
database.init_app(app)
# Register blueprints
from app.api import bp as api_bp
app.register_blueprint(api_bp)
app.logger.info("FlaskPaste initialized successfully")
return app

32
app/api/__init__.py Normal file
View File

@@ -0,0 +1,32 @@
"""API blueprint registration."""
import time
from flask import Blueprint, current_app
bp = Blueprint("api", __name__)
# Throttle cleanup to run at most once per hour
_last_cleanup = 0
_CLEANUP_INTERVAL = 3600 # 1 hour
@bp.before_request
def cleanup_expired():
"""Periodically clean up expired pastes."""
global _last_cleanup
now = time.time()
if now - _last_cleanup < _CLEANUP_INTERVAL:
return
_last_cleanup = now
from app.database import cleanup_expired_pastes
count = cleanup_expired_pastes()
if count > 0:
current_app.logger.info(f"Cleaned up {count} expired paste(s)")
from app.api import routes # noqa: E402, F401

319
app/api/routes.py Normal file
View File

@@ -0,0 +1,319 @@
"""API route handlers."""
import hashlib
import hmac
import json
import re
import time
from flask import Response, current_app, request
from app.api import bp
from app.database import get_db
# Valid paste ID pattern (hexadecimal only)
PASTE_ID_PATTERN = re.compile(r"^[a-f0-9]+$")
# Valid client certificate SHA1 pattern (40 hex chars)
CLIENT_ID_PATTERN = re.compile(r"^[a-f0-9]{40}$")
# Magic bytes for common binary formats
MAGIC_SIGNATURES = {
b"\x89PNG\r\n\x1a\n": "image/png",
b"\xff\xd8\xff": "image/jpeg",
b"GIF87a": "image/gif",
b"GIF89a": "image/gif",
b"RIFF": "image/webp", # WebP (check for WEBP after RIFF)
b"PK\x03\x04": "application/zip",
b"%PDF": "application/pdf",
b"\x1f\x8b": "application/gzip",
}
def _is_valid_paste_id(paste_id: str) -> bool:
"""Validate paste ID format (hexadecimal, correct length)."""
expected_length = current_app.config["PASTE_ID_LENGTH"]
return (
len(paste_id) == expected_length
and PASTE_ID_PATTERN.match(paste_id) is not None
)
def _detect_mime_type(content: bytes, content_type: str | None = None) -> str:
"""Detect MIME type from content bytes, with magic byte detection taking priority."""
# Check magic bytes first - most reliable method
for magic, mime in MAGIC_SIGNATURES.items():
if content.startswith(magic):
# Special case for WebP (RIFF....WEBP)
if magic == b"RIFF" and len(content) >= 12:
if content[8:12] != b"WEBP":
continue
return mime
# Trust explicit Content-Type if it's specific (not generic defaults)
generic_types = {
"application/octet-stream",
"application/x-www-form-urlencoded",
"text/plain",
}
if content_type:
mime = content_type.split(";")[0].strip().lower()
if mime not in generic_types:
# Sanitize: only allow safe characters in MIME type
if re.match(r"^[a-z0-9][a-z0-9!#$&\-^_.+]*\/[a-z0-9][a-z0-9!#$&\-^_.+]*$", mime):
return mime
# Try to decode as UTF-8 text
try:
content.decode("utf-8")
return "text/plain"
except UnicodeDecodeError:
return "application/octet-stream"
def _generate_id(content: bytes) -> str:
"""Generate a short unique ID from content hash and timestamp."""
data = content + str(time.time_ns()).encode()
length = current_app.config["PASTE_ID_LENGTH"]
return hashlib.sha256(data).hexdigest()[:length]
def _json_response(data: dict, status: int = 200) -> Response:
"""Create a JSON response with proper encoding and security headers."""
response = Response(
json.dumps(data, ensure_ascii=False),
status=status,
mimetype="application/json",
)
return response
def _is_trusted_proxy() -> bool:
"""Verify request comes from a trusted reverse proxy.
If TRUSTED_PROXY_SECRET is configured, the request must include a matching
X-Proxy-Secret header. This provides defense-in-depth against header spoofing
if an attacker bypasses the reverse proxy.
Returns True if no secret is configured (backwards compatible) or if the
secret matches.
"""
expected_secret = current_app.config.get("TRUSTED_PROXY_SECRET", "")
if not expected_secret:
# No secret configured - trust all requests (backwards compatible)
return True
# Constant-time comparison to prevent timing attacks
provided_secret = request.headers.get("X-Proxy-Secret", "")
return hmac.compare_digest(expected_secret, provided_secret)
def _get_client_id() -> str | None:
"""Extract and validate client identity from X-SSL-Client-SHA1 header.
Returns lowercase SHA1 fingerprint or None if not present/invalid.
SECURITY: The X-SSL-Client-SHA1 header is only trusted if the request
comes from a trusted proxy (verified via X-Proxy-Secret if configured).
"""
# Verify request comes from trusted proxy before trusting auth headers
if not _is_trusted_proxy():
current_app.logger.warning(
"Auth header ignored: X-Proxy-Secret mismatch from %s",
request.remote_addr
)
return None
client_sha1 = request.headers.get("X-SSL-Client-SHA1", "").strip().lower()
# Validate format: must be 40 hex characters (SHA1)
if client_sha1 and CLIENT_ID_PATTERN.match(client_sha1):
return client_sha1
return None
@bp.route("/health", methods=["GET"])
def health():
"""Health check endpoint for load balancers and monitoring."""
try:
db = get_db()
db.execute("SELECT 1")
return _json_response({"status": "healthy", "database": "ok"})
except Exception:
return _json_response({"status": "unhealthy", "database": "error"}, 503)
@bp.route("/", methods=["GET", "POST"])
def index():
"""Handle API info (GET) and paste creation (POST)."""
if request.method == "POST":
return create_paste()
return _json_response(
{
"name": "FlaskPaste",
"version": "1.0.0",
"endpoints": {
"GET /": "API information",
"GET /health": "Health check",
"POST /": "Create paste",
"GET /<id>": "Retrieve paste metadata",
"GET /<id>/raw": "Retrieve raw paste content",
"DELETE /<id>": "Delete paste",
},
"usage": {
"raw": "curl --data-binary @file.txt http://host/",
"pipe": "cat file.txt | curl --data-binary @- http://host/",
"json": "curl -H 'Content-Type: application/json' -d '{\"content\":\"...\"}' http://host/",
},
"note": "Use --data-binary (not -d) to preserve newlines",
}
)
def create_paste():
"""Create a new paste from request body."""
content: bytes | None = None
mime_type: str | None = None
if request.is_json:
data = request.get_json(silent=True)
if data and isinstance(data.get("content"), str):
content = data["content"].encode("utf-8")
mime_type = "text/plain"
else:
content = request.get_data(as_text=False)
if content:
mime_type = _detect_mime_type(content, request.content_type)
if not content:
return _json_response({"error": "No content provided"}, 400)
owner = _get_client_id()
# Enforce size limits based on authentication
content_size = len(content)
if owner:
max_size = current_app.config["MAX_PASTE_SIZE_AUTH"]
else:
max_size = current_app.config["MAX_PASTE_SIZE_ANON"]
if content_size > max_size:
return _json_response({
"error": "Paste too large",
"size": content_size,
"max_size": max_size,
"authenticated": owner is not None,
}, 413)
paste_id = _generate_id(content)
now = int(time.time())
db = get_db()
db.execute(
"INSERT INTO pastes (id, content, mime_type, owner, created_at, last_accessed) VALUES (?, ?, ?, ?, ?, ?)",
(paste_id, content, mime_type, owner, now, now),
)
db.commit()
response_data = {
"id": paste_id,
"url": f"/{paste_id}",
"raw": f"/{paste_id}/raw",
"mime_type": mime_type,
"created_at": now,
}
if owner:
response_data["owner"] = owner
return _json_response(response_data, 201)
@bp.route("/<paste_id>", methods=["GET"])
def get_paste(paste_id: str):
"""Retrieve paste metadata by ID."""
if not _is_valid_paste_id(paste_id):
return _json_response({"error": "Invalid paste ID"}, 400)
db = get_db()
now = int(time.time())
# Update last_accessed and return paste in one transaction
db.execute(
"UPDATE pastes SET last_accessed = ? WHERE id = ?", (now, paste_id)
)
row = db.execute(
"SELECT id, mime_type, created_at, length(content) as size FROM pastes WHERE id = ?",
(paste_id,)
).fetchone()
db.commit()
if row is None:
return _json_response({"error": "Paste not found"}, 404)
return _json_response({
"id": row["id"],
"mime_type": row["mime_type"],
"size": row["size"],
"created_at": row["created_at"],
"raw": f"/{paste_id}/raw",
})
@bp.route("/<paste_id>/raw", methods=["GET"])
def get_paste_raw(paste_id: str):
"""Retrieve raw paste content with correct MIME type."""
if not _is_valid_paste_id(paste_id):
return _json_response({"error": "Invalid paste ID"}, 400)
db = get_db()
now = int(time.time())
# Update last_accessed and return paste in one transaction
db.execute(
"UPDATE pastes SET last_accessed = ? WHERE id = ?", (now, paste_id)
)
row = db.execute(
"SELECT content, mime_type FROM pastes WHERE id = ?", (paste_id,)
).fetchone()
db.commit()
if row is None:
return _json_response({"error": "Paste not found"}, 404)
mime_type = row["mime_type"]
response = Response(row["content"], mimetype=mime_type)
# Display inline for images and text, let browser decide for others
if mime_type.startswith(("image/", "text/")):
response.headers["Content-Disposition"] = "inline"
return response
@bp.route("/<paste_id>", methods=["DELETE"])
def delete_paste(paste_id: str):
"""Delete a paste by ID. Requires ownership via X-SSL-Client-SHA1 header."""
if not _is_valid_paste_id(paste_id):
return _json_response({"error": "Invalid paste ID"}, 400)
client_id = _get_client_id()
if not client_id:
return _json_response({"error": "Authentication required"}, 401)
db = get_db()
# Check paste exists and verify ownership
row = db.execute(
"SELECT owner FROM pastes WHERE id = ?", (paste_id,)
).fetchone()
if row is None:
return _json_response({"error": "Paste not found"}, 404)
if row["owner"] != client_id:
return _json_response({"error": "Permission denied"}, 403)
db.execute("DELETE FROM pastes WHERE id = ?", (paste_id,))
db.commit()
return _json_response({"message": "Paste deleted"})

56
app/config.py Normal file
View File

@@ -0,0 +1,56 @@
"""Application configuration."""
import os
from pathlib import Path
class Config:
"""Base configuration."""
BASE_DIR = Path(__file__).parent.parent
DATABASE = os.environ.get("FLASKPASTE_DB", BASE_DIR / "data" / "pastes.db")
PASTE_ID_LENGTH = int(os.environ.get("FLASKPASTE_ID_LENGTH", "12"))
# Paste size limits
MAX_PASTE_SIZE_ANON = int(os.environ.get("FLASKPASTE_MAX_ANON", 3 * 1024 * 1024)) # 3MiB
MAX_PASTE_SIZE_AUTH = int(os.environ.get("FLASKPASTE_MAX_AUTH", 50 * 1024 * 1024)) # 50MiB
MAX_CONTENT_LENGTH = MAX_PASTE_SIZE_AUTH # Flask request limit
# Paste expiry (default 5 days)
PASTE_EXPIRY_SECONDS = int(os.environ.get("FLASKPASTE_EXPIRY", 5 * 24 * 60 * 60))
# Reverse proxy trust configuration
# SECURITY: The X-SSL-Client-SHA1 header is trusted for authentication.
# This header MUST only come from a trusted reverse proxy that validates
# client certificates. Direct access to this app MUST be blocked.
#
# Set FLASKPASTE_PROXY_SECRET to require the proxy to send a matching
# X-Proxy-Secret header, providing defense-in-depth against header spoofing.
TRUSTED_PROXY_SECRET = os.environ.get("FLASKPASTE_PROXY_SECRET", "")
class DevelopmentConfig(Config):
"""Development configuration."""
DEBUG = True
class ProductionConfig(Config):
"""Production configuration."""
DEBUG = False
class TestingConfig(Config):
"""Testing configuration."""
TESTING = True
DATABASE = ":memory:"
config = {
"development": DevelopmentConfig,
"production": ProductionConfig,
"testing": TestingConfig,
"default": DevelopmentConfig,
}

96
app/database.py Normal file
View File

@@ -0,0 +1,96 @@
"""Database connection and schema management."""
import sqlite3
import time
from pathlib import Path
from flask import current_app, g
SCHEMA = """
CREATE TABLE IF NOT EXISTS pastes (
id TEXT PRIMARY KEY,
content BLOB NOT NULL,
mime_type TEXT NOT NULL DEFAULT 'text/plain',
owner TEXT,
created_at INTEGER NOT NULL,
last_accessed INTEGER NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_pastes_created_at ON pastes(created_at);
CREATE INDEX IF NOT EXISTS idx_pastes_owner ON pastes(owner);
CREATE INDEX IF NOT EXISTS idx_pastes_last_accessed ON pastes(last_accessed);
"""
# Hold reference for in-memory shared cache databases
_memory_db_holder = None
def _get_connection_string(db_path) -> tuple[str, dict]:
"""Get connection string and kwargs for sqlite3.connect."""
if isinstance(db_path, Path):
db_path.parent.mkdir(parents=True, exist_ok=True)
return str(db_path), {}
if db_path == ":memory:":
return "file::memory:?cache=shared", {"uri": True}
return db_path, {}
def get_db() -> sqlite3.Connection:
"""Get database connection for current request context."""
if "db" not in g:
db_path = current_app.config["DATABASE"]
conn_str, kwargs = _get_connection_string(db_path)
g.db = sqlite3.connect(conn_str, **kwargs)
g.db.row_factory = sqlite3.Row
g.db.execute("PRAGMA foreign_keys = ON")
if isinstance(db_path, Path):
g.db.execute("PRAGMA journal_mode = WAL")
return g.db
def close_db(exception=None) -> None:
"""Close database connection at end of request."""
db = g.pop("db", None)
if db is not None:
db.close()
def init_db() -> None:
"""Initialize database schema."""
global _memory_db_holder
db_path = current_app.config["DATABASE"]
conn_str, kwargs = _get_connection_string(db_path)
# For in-memory databases, keep a connection alive
if db_path == ":memory:":
_memory_db_holder = sqlite3.connect(conn_str, **kwargs)
_memory_db_holder.executescript(SCHEMA)
_memory_db_holder.commit()
else:
db = get_db()
db.executescript(SCHEMA)
db.commit()
def cleanup_expired_pastes() -> int:
"""Delete pastes that haven't been accessed within expiry period.
Returns number of deleted pastes.
"""
expiry_seconds = current_app.config["PASTE_EXPIRY_SECONDS"]
cutoff = int(time.time()) - expiry_seconds
db = get_db()
cursor = db.execute("DELETE FROM pastes WHERE last_accessed < ?", (cutoff,))
db.commit()
return cursor.rowcount
def init_app(app) -> None:
"""Register database functions with Flask app."""
app.teardown_appcontext(close_db)
with app.app_context():
init_db()