mirror of https://github.com/interlegis/sapl.git
Browse Source
sapl/middleware/ratelimit.py:
- Decision chain: known UA → IP blocked → authenticated → anonymous
- Authenticated: 120 req/min per user (rl:{ns}:user:{id}:reqs, DB1)
- Anonymous: 35 req/min per IP (rl:ip:{ip}:reqs) + per-ns/IP/window
counter to catch UA rotators (rl:ns:{ns}:ip:{ip}:w:{bucket})
- Blocking keys expire after 300 s (BLOCK_TTL)
- Thresholds driven by RATE_LIMITER_RATE / RATE_LIMITER_RATE_AUTHENTICATED
- RATE_LIMIT_WHITELIST_IPS for legislative-house IP ranges (future)
- Atomic INCR+EXPIRE via Redis Lua script; falls back to non-atomic
cache get/set when Redis unavailable (dry-run / file-cache safe)
- RATELIMIT_DRY_RUN=True by default — logs only, no 429s returned
- OAI-SearchBot added to BOT_UA_FRAGMENTS
- Suspicious-header check: missing Accept-Language + Accept (2/2)
- Whitelist check short-circuits all other checks
settings.py:
- RateLimitMiddleware inserted after AuthenticationMiddleware so
request.user is available for authenticated-vs-anonymous branching
- RATELIMIT_DRY_RUN (default True)
- RATE_LIMITER_RATE_AUTHENTICATED (default '120/m')
- RATE_LIMITER_RATE_BOT (default '5/m')
- RATE_LIMIT_WHITELIST_IPS (default empty)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
rate-limiter-2026
2 changed files with 222 additions and 1 deletions
@ -0,0 +1,201 @@ |
|||
""" |
|||
RateLimitMiddleware — cross-pod rate limiting backed by shared Redis. |
|||
|
|||
Decision flow (per request): |
|||
1. Known bot UA? → 429 |
|||
2. IP in blocked set? → 429 |
|||
3. Authenticated user? |
|||
a. User blocked? → 429 |
|||
b. Suspicious hdrs? → 429 |
|||
c. User rate ≥ 120? → SET user:blocked, 429 |
|||
4. Anonymous: |
|||
a. Suspicious hdrs? → 429 |
|||
b. IP rate ≥ 30/min? → SET ip:blocked, 429 |
|||
c. NS/IP window hit? → SET ip:blocked, 429 |
|||
|
|||
All decisions are no-ops when RATELIMIT_DRY_RUN=True (logged only). |
|||
Degrades gracefully to non-atomic counting when Redis is unavailable. |
|||
""" |
|||
|
|||
import hashlib |
|||
import logging |
|||
import time |
|||
|
|||
from django.conf import settings |
|||
from django.core.cache import caches |
|||
from django.http import HttpResponse |
|||
|
|||
logger = logging.getLogger('sapl.ratelimit') |
|||
|
|||
BOT_UA_FRAGMENTS = [ |
|||
'GPTBot', |
|||
'ClaudeBot', |
|||
'PerplexityBot', |
|||
'Bytespider', |
|||
'AhrefsBot', |
|||
'meta-externalagent', |
|||
'OAI-SearchBot', |
|||
'Chrome/98.0.4758', # known scraper impersonating an old Chrome |
|||
] |
|||
|
|||
_INCR_LUA = """ |
|||
local n = redis.call('INCR', KEYS[1]) |
|||
if n == 1 then redis.call('EXPIRE', KEYS[1], ARGV[1]) end |
|||
return n |
|||
""" |
|||
|
|||
|
|||
def _sha256(s): |
|||
return hashlib.sha256(s.encode()).hexdigest() |
|||
|
|||
|
|||
def _get_ip(request): |
|||
return ( |
|||
request.META.get('HTTP_X_FORWARDED_FOR', '').split(',')[0].strip() |
|||
or request.META.get('REMOTE_ADDR', '') |
|||
) |
|||
|
|||
|
|||
def _is_suspicious_headers(request): |
|||
"""Real browsers send Accept-Language + Accept; bots frequently omit them.""" |
|||
missing = sum([ |
|||
not request.META.get('HTTP_ACCEPT_LANGUAGE'), |
|||
not request.META.get('HTTP_ACCEPT'), |
|||
]) |
|||
return missing >= 2 |
|||
|
|||
|
|||
def _parse_rate(rate_str): |
|||
"""Parse '30/m' or '120/m' into (count, seconds).""" |
|||
count, period = rate_str.split('/') |
|||
count = int(count) |
|||
seconds = {'s': 1, 'm': 60, 'h': 3600}.get(period.lower(), 60) |
|||
return count, seconds |
|||
|
|||
|
|||
class RateLimitMiddleware: |
|||
BLOCK_TTL = 300 # seconds an IP/user stays blocked after threshold breach |
|||
|
|||
def __init__(self, get_response): |
|||
self.get_response = get_response |
|||
self.dry_run = getattr(settings, 'RATELIMIT_DRY_RUN', True) |
|||
|
|||
anon_rate = getattr(settings, 'RATE_LIMITER_RATE', '35/m') |
|||
auth_rate = getattr(settings, 'RATE_LIMITER_RATE_AUTHENTICATED', '120/m') |
|||
|
|||
self.anon_threshold, self.anon_window = _parse_rate(anon_rate) |
|||
self.auth_threshold, self.auth_window = _parse_rate(auth_rate) |
|||
|
|||
self.whitelist = set( |
|||
getattr(settings, 'RATE_LIMIT_WHITELIST_IPS', []) or [] |
|||
) |
|||
self._rl_cache = caches['ratelimit'] |
|||
|
|||
def __call__(self, request): |
|||
decision = self._evaluate(request) |
|||
if decision['action'] == 'block': |
|||
logger.warning( |
|||
'ratelimit_block reason=%s ip=%s path=%s dry_run=%s', |
|||
decision['reason'], |
|||
decision['ip'], |
|||
request.path, |
|||
self.dry_run, |
|||
extra={ |
|||
'ua': request.META.get('HTTP_USER_AGENT', ''), |
|||
'namespace': getattr(request, 'tenant', 'unknown'), |
|||
}, |
|||
) |
|||
if not self.dry_run: |
|||
return HttpResponse(status=429) |
|||
return self.get_response(request) |
|||
|
|||
# ------------------------------------------------------------------ |
|||
# Evaluation |
|||
# ------------------------------------------------------------------ |
|||
|
|||
def _evaluate(self, request): |
|||
ip = _get_ip(request) |
|||
|
|||
if ip in self.whitelist: |
|||
return {'action': 'pass', 'ip': ip} |
|||
|
|||
# Check 1: known bad UA |
|||
ua = request.META.get('HTTP_USER_AGENT', '') |
|||
for fragment in BOT_UA_FRAGMENTS: |
|||
if fragment.lower() in ua.lower(): |
|||
return {'action': 'block', 'reason': 'known_ua', 'ip': ip} |
|||
|
|||
# Check 2: IP already blocked |
|||
if self._rl_cache.get(f'rl:ip:{ip}:blocked'): |
|||
return {'action': 'block', 'reason': 'ip_blocked', 'ip': ip} |
|||
|
|||
user = getattr(request, 'user', None) |
|||
if user is not None and user.is_authenticated: |
|||
return self._evaluate_authenticated(request, ip) |
|||
return self._evaluate_anonymous(request, ip) |
|||
|
|||
def _evaluate_authenticated(self, request, ip): |
|||
user_id = str(request.user.pk).lower().strip() |
|||
ns = getattr(request, 'tenant', 'global') |
|||
|
|||
# Check 3a: user already blocked |
|||
if self._rl_cache.get(f'rl:{ns}:user:{user_id}:blocked'): |
|||
return {'action': 'block', 'reason': 'user_blocked', 'ip': ip} |
|||
|
|||
# Check 3b: suspicious headers |
|||
if _is_suspicious_headers(request): |
|||
return {'action': 'block', 'reason': 'suspicious_headers_auth', 'ip': ip} |
|||
|
|||
# Check 3c: authenticated request rate |
|||
count = self._incr_with_ttl( |
|||
f'rl:{ns}:user:{user_id}:reqs', ttl=self.auth_window |
|||
) |
|||
if count >= self.auth_threshold: |
|||
self._rl_cache.set( |
|||
f'rl:{ns}:user:{user_id}:blocked', 1, timeout=self.BLOCK_TTL |
|||
) |
|||
return {'action': 'block', 'reason': 'auth_user_rate', 'ip': ip} |
|||
|
|||
return {'action': 'pass', 'ip': ip} |
|||
|
|||
def _evaluate_anonymous(self, request, ip): |
|||
# Check 4a: suspicious headers |
|||
if _is_suspicious_headers(request): |
|||
return {'action': 'block', 'reason': 'suspicious_headers', 'ip': ip} |
|||
|
|||
# Check 4b: IP request rate |
|||
count = self._incr_with_ttl(f'rl:ip:{ip}:reqs', ttl=self.anon_window) |
|||
if count >= self.anon_threshold: |
|||
self._rl_cache.set(f'rl:ip:{ip}:blocked', 1, timeout=self.BLOCK_TTL) |
|||
return {'action': 'block', 'reason': 'ip_rate', 'ip': ip} |
|||
|
|||
# Check 4c: per-namespace/IP/window (catches UA rotators behind NAT) |
|||
ns = getattr(request, 'tenant', 'global') |
|||
bucket = int(time.time() // self.anon_window) |
|||
count = self._incr_with_ttl( |
|||
f'rl:ns:{ns}:ip:{ip}:w:{bucket}', ttl=self.anon_window * 2 |
|||
) |
|||
if count >= self.anon_threshold: |
|||
self._rl_cache.set(f'rl:ip:{ip}:blocked', 1, timeout=self.BLOCK_TTL) |
|||
return {'action': 'block', 'reason': 'ua_rotation', 'ip': ip} |
|||
|
|||
return {'action': 'pass', 'ip': ip} |
|||
|
|||
# ------------------------------------------------------------------ |
|||
# Helpers |
|||
# ------------------------------------------------------------------ |
|||
|
|||
def _incr_with_ttl(self, key, ttl): |
|||
""" |
|||
Atomic INCR + EXPIRE via Redis Lua script. |
|||
Falls back to non-atomic cache get/set when Redis is unavailable |
|||
(dry-run mode or file-based cache — correct enough for logging). |
|||
""" |
|||
try: |
|||
from django_redis import get_redis_connection |
|||
client = get_redis_connection('ratelimit') |
|||
return client.eval(_INCR_LUA, 1, key, ttl) |
|||
except Exception: |
|||
count = (self._rl_cache.get(key) or 0) + 1 |
|||
self._rl_cache.set(key, count, timeout=ttl) |
|||
return count |
|||
Loading…
Reference in new issue