add content-hash dedup for abuse prevention

Throttle repeated submissions of identical content using SHA256 hash
tracking. Configurable via FLASKPASTE_DEDUP_WINDOW and FLASKPASTE_DEDUP_MAX.
This commit is contained in:
Username
2025-12-20 03:31:20 +01:00
parent 8f9868f0d9
commit 202e927918
6 changed files with 382 additions and 1 deletions

View File

@@ -9,7 +9,7 @@ import time
from flask import Response, current_app, request
from app.api import bp
from app.database import get_db
from app.database import check_content_hash, get_db
# Valid paste ID pattern (hexadecimal only)
PASTE_ID_PATTERN = re.compile(r"^[a-f0-9]+$")
@@ -205,6 +205,22 @@ def create_paste():
"authenticated": owner is not None,
}, 413)
# Check content deduplication threshold
content_hash = hashlib.sha256(content).hexdigest()
is_allowed, dedup_count = check_content_hash(content_hash)
if not is_allowed:
window = current_app.config["CONTENT_DEDUP_WINDOW"]
current_app.logger.warning(
"Dedup threshold exceeded: hash=%s count=%d from=%s",
content_hash[:16], dedup_count, request.remote_addr
)
return _json_response({
"error": "Duplicate content rate limit exceeded",
"count": dedup_count,
"window_seconds": window,
}, 429)
paste_id = _generate_id(content)
now = int(time.time())

View File

@@ -19,6 +19,11 @@ class Config:
# Paste expiry (default 5 days)
PASTE_EXPIRY_SECONDS = int(os.environ.get("FLASKPASTE_EXPIRY", 5 * 24 * 60 * 60))
# Content deduplication / abuse prevention
# Throttle repeated submissions of identical content
CONTENT_DEDUP_WINDOW = int(os.environ.get("FLASKPASTE_DEDUP_WINDOW", 3600)) # 1 hour
CONTENT_DEDUP_MAX = int(os.environ.get("FLASKPASTE_DEDUP_MAX", 3)) # max 3 per window
# Reverse proxy trust configuration
# SECURITY: The X-SSL-Client-SHA1 header is trusted for authentication.
# This header MUST only come from a trusted reverse proxy that validates
@@ -47,6 +52,10 @@ class TestingConfig(Config):
TESTING = True
DATABASE = ":memory:"
# Relaxed dedup for testing (100 per second window)
CONTENT_DEDUP_WINDOW = 1
CONTENT_DEDUP_MAX = 100
config = {
"development": DevelopmentConfig,

View File

@@ -19,6 +19,16 @@ CREATE TABLE IF NOT EXISTS pastes (
CREATE INDEX IF NOT EXISTS idx_pastes_created_at ON pastes(created_at);
CREATE INDEX IF NOT EXISTS idx_pastes_owner ON pastes(owner);
CREATE INDEX IF NOT EXISTS idx_pastes_last_accessed ON pastes(last_accessed);
-- Content hash tracking for abuse prevention
CREATE TABLE IF NOT EXISTS content_hashes (
hash TEXT PRIMARY KEY,
first_seen INTEGER NOT NULL,
last_seen INTEGER NOT NULL,
count INTEGER NOT NULL DEFAULT 1
);
CREATE INDEX IF NOT EXISTS idx_content_hashes_last_seen ON content_hashes(last_seen);
"""
# Hold reference for in-memory shared cache databases
@@ -88,6 +98,79 @@ def cleanup_expired_pastes() -> int:
return cursor.rowcount
def cleanup_expired_hashes() -> int:
"""Delete content hashes outside the dedup window.
Returns number of deleted hashes.
"""
window = current_app.config["CONTENT_DEDUP_WINDOW"]
cutoff = int(time.time()) - window
db = get_db()
cursor = db.execute("DELETE FROM content_hashes WHERE last_seen < ?", (cutoff,))
db.commit()
return cursor.rowcount
def check_content_hash(content_hash: str) -> tuple[bool, int]:
"""Check if content hash exceeds dedup threshold.
Args:
content_hash: SHA256 hex digest of content
Returns:
Tuple of (is_allowed, current_count)
is_allowed is False if threshold exceeded within window
"""
window = current_app.config["CONTENT_DEDUP_WINDOW"]
max_count = current_app.config["CONTENT_DEDUP_MAX"]
now = int(time.time())
cutoff = now - window
db = get_db()
# Check existing hash record
row = db.execute(
"SELECT count, last_seen FROM content_hashes WHERE hash = ?",
(content_hash,)
).fetchone()
if row is None:
# First time seeing this content
db.execute(
"INSERT INTO content_hashes (hash, first_seen, last_seen, count) VALUES (?, ?, ?, 1)",
(content_hash, now, now)
)
db.commit()
return True, 1
if row["last_seen"] < cutoff:
# Outside window, reset counter
db.execute(
"UPDATE content_hashes SET first_seen = ?, last_seen = ?, count = 1 WHERE hash = ?",
(now, now, content_hash)
)
db.commit()
return True, 1
# Within window, check threshold
current_count = row["count"] + 1
if current_count > max_count:
# Exceeded threshold, don't increment (prevent counter overflow)
return False, row["count"]
# Update counter
db.execute(
"UPDATE content_hashes SET last_seen = ?, count = ? WHERE hash = ?",
(now, current_count, content_hash)
)
db.commit()
return True, current_count
def init_app(app) -> None:
"""Register database functions with Flask app."""
app.teardown_appcontext(close_db)