Browse Source

Phase 2: RateLimitMiddleware — cross-pod rate limiting via shared Redis

sapl/middleware/ratelimit.py:
- Decision chain: known UA → IP blocked → authenticated → anonymous
- Authenticated: 120 req/min per user (rl:{ns}:user:{id}:reqs, DB1)
- Anonymous: 35 req/min per IP (rl:ip:{ip}:reqs) + per-ns/IP/window
  counter to catch UA rotators (rl:ns:{ns}:ip:{ip}:w:{bucket})
- Blocking keys expire after 300 s (BLOCK_TTL)
- Thresholds driven by RATE_LIMITER_RATE / RATE_LIMITER_RATE_AUTHENTICATED
- RATE_LIMIT_WHITELIST_IPS for legislative-house IP ranges (future)
- Atomic INCR+EXPIRE via Redis Lua script; falls back to non-atomic
  cache get/set when Redis unavailable (dry-run / file-cache safe)
- RATELIMIT_DRY_RUN=True by default — logs only, no 429s returned
- OAI-SearchBot added to BOT_UA_FRAGMENTS
- Suspicious-header check: missing Accept-Language + Accept (2/2)
- Whitelist check short-circuits all other checks

settings.py:
- RateLimitMiddleware inserted after AuthenticationMiddleware so
  request.user is available for authenticated-vs-anonymous branching
- RATELIMIT_DRY_RUN (default True)
- RATE_LIMITER_RATE_AUTHENTICATED (default '120/m')
- RATE_LIMITER_RATE_BOT (default '5/m')
- RATE_LIMIT_WHITELIST_IPS (default empty)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
rate-limiter-2026
Edward Ribeiro 3 weeks ago
parent
commit
c3ccc9be74
  1. 201
      sapl/middleware/ratelimit.py
  2. 22
      sapl/settings.py

201
sapl/middleware/ratelimit.py

@ -0,0 +1,201 @@
"""
RateLimitMiddleware cross-pod rate limiting backed by shared Redis.
Decision flow (per request):
1. Known bot UA? 429
2. IP in blocked set? 429
3. Authenticated user?
a. User blocked? 429
b. Suspicious hdrs? 429
c. User rate 120? SET user:blocked, 429
4. Anonymous:
a. Suspicious hdrs? 429
b. IP rate 30/min? SET ip:blocked, 429
c. NS/IP window hit? SET ip:blocked, 429
All decisions are no-ops when RATELIMIT_DRY_RUN=True (logged only).
Degrades gracefully to non-atomic counting when Redis is unavailable.
"""
import hashlib
import logging
import time
from django.conf import settings
from django.core.cache import caches
from django.http import HttpResponse
logger = logging.getLogger('sapl.ratelimit')
BOT_UA_FRAGMENTS = [
'GPTBot',
'ClaudeBot',
'PerplexityBot',
'Bytespider',
'AhrefsBot',
'meta-externalagent',
'OAI-SearchBot',
'Chrome/98.0.4758', # known scraper impersonating an old Chrome
]
_INCR_LUA = """
local n = redis.call('INCR', KEYS[1])
if n == 1 then redis.call('EXPIRE', KEYS[1], ARGV[1]) end
return n
"""
def _sha256(s):
return hashlib.sha256(s.encode()).hexdigest()
def _get_ip(request):
return (
request.META.get('HTTP_X_FORWARDED_FOR', '').split(',')[0].strip()
or request.META.get('REMOTE_ADDR', '')
)
def _is_suspicious_headers(request):
"""Real browsers send Accept-Language + Accept; bots frequently omit them."""
missing = sum([
not request.META.get('HTTP_ACCEPT_LANGUAGE'),
not request.META.get('HTTP_ACCEPT'),
])
return missing >= 2
def _parse_rate(rate_str):
"""Parse '30/m' or '120/m' into (count, seconds)."""
count, period = rate_str.split('/')
count = int(count)
seconds = {'s': 1, 'm': 60, 'h': 3600}.get(period.lower(), 60)
return count, seconds
class RateLimitMiddleware:
BLOCK_TTL = 300 # seconds an IP/user stays blocked after threshold breach
def __init__(self, get_response):
self.get_response = get_response
self.dry_run = getattr(settings, 'RATELIMIT_DRY_RUN', True)
anon_rate = getattr(settings, 'RATE_LIMITER_RATE', '35/m')
auth_rate = getattr(settings, 'RATE_LIMITER_RATE_AUTHENTICATED', '120/m')
self.anon_threshold, self.anon_window = _parse_rate(anon_rate)
self.auth_threshold, self.auth_window = _parse_rate(auth_rate)
self.whitelist = set(
getattr(settings, 'RATE_LIMIT_WHITELIST_IPS', []) or []
)
self._rl_cache = caches['ratelimit']
def __call__(self, request):
decision = self._evaluate(request)
if decision['action'] == 'block':
logger.warning(
'ratelimit_block reason=%s ip=%s path=%s dry_run=%s',
decision['reason'],
decision['ip'],
request.path,
self.dry_run,
extra={
'ua': request.META.get('HTTP_USER_AGENT', ''),
'namespace': getattr(request, 'tenant', 'unknown'),
},
)
if not self.dry_run:
return HttpResponse(status=429)
return self.get_response(request)
# ------------------------------------------------------------------
# Evaluation
# ------------------------------------------------------------------
def _evaluate(self, request):
ip = _get_ip(request)
if ip in self.whitelist:
return {'action': 'pass', 'ip': ip}
# Check 1: known bad UA
ua = request.META.get('HTTP_USER_AGENT', '')
for fragment in BOT_UA_FRAGMENTS:
if fragment.lower() in ua.lower():
return {'action': 'block', 'reason': 'known_ua', 'ip': ip}
# Check 2: IP already blocked
if self._rl_cache.get(f'rl:ip:{ip}:blocked'):
return {'action': 'block', 'reason': 'ip_blocked', 'ip': ip}
user = getattr(request, 'user', None)
if user is not None and user.is_authenticated:
return self._evaluate_authenticated(request, ip)
return self._evaluate_anonymous(request, ip)
def _evaluate_authenticated(self, request, ip):
user_id = str(request.user.pk).lower().strip()
ns = getattr(request, 'tenant', 'global')
# Check 3a: user already blocked
if self._rl_cache.get(f'rl:{ns}:user:{user_id}:blocked'):
return {'action': 'block', 'reason': 'user_blocked', 'ip': ip}
# Check 3b: suspicious headers
if _is_suspicious_headers(request):
return {'action': 'block', 'reason': 'suspicious_headers_auth', 'ip': ip}
# Check 3c: authenticated request rate
count = self._incr_with_ttl(
f'rl:{ns}:user:{user_id}:reqs', ttl=self.auth_window
)
if count >= self.auth_threshold:
self._rl_cache.set(
f'rl:{ns}:user:{user_id}:blocked', 1, timeout=self.BLOCK_TTL
)
return {'action': 'block', 'reason': 'auth_user_rate', 'ip': ip}
return {'action': 'pass', 'ip': ip}
def _evaluate_anonymous(self, request, ip):
# Check 4a: suspicious headers
if _is_suspicious_headers(request):
return {'action': 'block', 'reason': 'suspicious_headers', 'ip': ip}
# Check 4b: IP request rate
count = self._incr_with_ttl(f'rl:ip:{ip}:reqs', ttl=self.anon_window)
if count >= self.anon_threshold:
self._rl_cache.set(f'rl:ip:{ip}:blocked', 1, timeout=self.BLOCK_TTL)
return {'action': 'block', 'reason': 'ip_rate', 'ip': ip}
# Check 4c: per-namespace/IP/window (catches UA rotators behind NAT)
ns = getattr(request, 'tenant', 'global')
bucket = int(time.time() // self.anon_window)
count = self._incr_with_ttl(
f'rl:ns:{ns}:ip:{ip}:w:{bucket}', ttl=self.anon_window * 2
)
if count >= self.anon_threshold:
self._rl_cache.set(f'rl:ip:{ip}:blocked', 1, timeout=self.BLOCK_TTL)
return {'action': 'block', 'reason': 'ua_rotation', 'ip': ip}
return {'action': 'pass', 'ip': ip}
# ------------------------------------------------------------------
# Helpers
# ------------------------------------------------------------------
def _incr_with_ttl(self, key, ttl):
"""
Atomic INCR + EXPIRE via Redis Lua script.
Falls back to non-atomic cache get/set when Redis is unavailable
(dry-run mode or file-based cache correct enough for logging).
"""
try:
from django_redis import get_redis_connection
client = get_redis_connection('ratelimit')
return client.eval(_INCR_LUA, 1, key, ttl)
except Exception:
count = (self._rl_cache.get(key) or 0) + 1
self._rl_cache.set(key, count, timeout=ttl)
return count

22
sapl/settings.py

@ -146,6 +146,9 @@ MIDDLEWARE = [
'sapl.middleware.endpoint_restriction.EndpointRestrictionMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
# RateLimitMiddleware runs after AuthenticationMiddleware so it can
# distinguish authenticated users (higher threshold) from anonymous ones.
'sapl.middleware.ratelimit.RateLimitMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
'django.middleware.security.SecurityMiddleware',
@ -373,7 +376,24 @@ DATA_UPLOAD_MAX_MEMORY_SIZE = 10 * 1024 * 1024 # 10MB
FILE_UPLOAD_MAX_MEMORY_SIZE = 2 * 1024 * 1024 # 2MB
FILE_UPLOAD_TEMP_DIR = '/var/interlegis/sapl/tmp'
RATE_LIMITER_RATE = config('RATE_LIMITER_RATE', default='35/m')
# ---------------------------------------------------------------------------
# Rate limiting — RateLimitMiddleware (sapl/middleware/ratelimit.py)
# ---------------------------------------------------------------------------
# Start with RATELIMIT_DRY_RUN=True; flip to False one check at a time
# after validating in logs that no legitimate traffic is flagged.
RATELIMIT_DRY_RUN = config('RATELIMIT_DRY_RUN', default=True, cast=bool)
RATE_LIMITER_RATE = config('RATE_LIMITER_RATE', default='35/m')
RATE_LIMITER_RATE_AUTHENTICATED = config('RATE_LIMITER_RATE_AUTHENTICATED', default='120/m')
RATE_LIMITER_RATE_BOT = config('RATE_LIMITER_RATE_BOT', default='5/m')
# Comma-separated IPs exempt from rate limiting (e.g. legislative-house ranges).
# Leave empty until the IP list is available — see rate-limiter-v2.md §9.
RATE_LIMIT_WHITELIST_IPS = config(
'RATE_LIMIT_WHITELIST_IPS',
default='',
cast=lambda v: [x.strip() for x in v.split(',') if x.strip()],
)
# Internationalization
# https://docs.djangoproject.com/en/1.8/topics/i18n/

Loading…
Cancel
Save