Phase 2: RateLimitMiddleware — cross-pod rate limiting via shared Redis

sapl/middleware/ratelimit.py: - Decision chain: known UA → IP blocked → authenticated → anonymous - Authenticated: 120 req/min per user (rl:{ns}:user:{id}:reqs, DB1) - Anonymous: 35 req/min per IP (rl:ip:{ip}:reqs) + per-ns/IP/window counter to catch UA rotators (rl:ns:{ns}:ip:{ip}:w:{bucket}) - Blocking keys expire after 300 s (BLOCK_TTL) - Thresholds driven by RATE_LIMITER_RATE / RATE_LIMITER_RATE_AUTHENTICATED - RATE_LIMIT_WHITELIST_IPS for legislative-house IP ranges (future) - Atomic INCR+EXPIRE via Redis Lua script; falls back to non-atomic cache get/set when Redis unavailable (dry-run / file-cache safe) - RATELIMIT_DRY_RUN=True by default — logs only, no 429s returned - OAI-SearchBot added to BOT_UA_FRAGMENTS - Suspicious-header check: missing Accept-Language + Accept (2/2) - Whitelist check short-circuits all other checks settings.py: - RateLimitMiddleware inserted after AuthenticationMiddleware so request.user is available for authenticated-vs-anonymous branching - RATELIMIT_DRY_RUN (default True) - RATE_LIMITER_RATE_AUTHENTICATED (default '120/m') - RATE_LIMITER_RATE_BOT (default '5/m') - RATE_LIMIT_WHITELIST_IPS (default empty) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
3 weeks ago · c3ccc9be74
2 changed files with 222 additions and 1 deletions
--- a/sapl/middleware/ratelimit.py
+++ b/sapl/middleware/ratelimit.py
@ -0,0 +1,201 @@
+"""
+RateLimitMiddleware — cross-pod rate limiting backed by shared Redis.
+
+Decision flow (per request):
+  1. Known bot UA?          → 429
+  2. IP in blocked set?     → 429
+  3. Authenticated user?
+       a. User blocked?     → 429
+       b. Suspicious hdrs?  → 429
+       c. User rate ≥ 120?  → SET user:blocked, 429
+  4. Anonymous:
+       a. Suspicious hdrs?  → 429
+       b. IP rate ≥ 30/min? → SET ip:blocked, 429
+       c. NS/IP window hit? → SET ip:blocked, 429
+
+All decisions are no-ops when RATELIMIT_DRY_RUN=True (logged only).
+Degrades gracefully to non-atomic counting when Redis is unavailable.
+"""
+
+import hashlib
+import logging
+import time
+
+from django.conf import settings
+from django.core.cache import caches
+from django.http import HttpResponse
+
+logger = logging.getLogger('sapl.ratelimit')
+
+BOT_UA_FRAGMENTS = [
+    'GPTBot',
+    'ClaudeBot',
+    'PerplexityBot',
+    'Bytespider',
+    'AhrefsBot',
+    'meta-externalagent',
+    'OAI-SearchBot',
+    'Chrome/98.0.4758',   # known scraper impersonating an old Chrome
+]
+
+_INCR_LUA = """
+    local n = redis.call('INCR', KEYS[1])
+    if n == 1 then redis.call('EXPIRE', KEYS[1], ARGV[1]) end
+    return n
+"""
+
+
+def _sha256(s):
+    return hashlib.sha256(s.encode()).hexdigest()
+
+
+def _get_ip(request):
+    return (
+        request.META.get('HTTP_X_FORWARDED_FOR', '').split(',')[0].strip()
+        or request.META.get('REMOTE_ADDR', '')
+    )
+
+
+def _is_suspicious_headers(request):
+    """Real browsers send Accept-Language + Accept; bots frequently omit them."""
+    missing = sum([
+        not request.META.get('HTTP_ACCEPT_LANGUAGE'),
+        not request.META.get('HTTP_ACCEPT'),
+    ])
+    return missing >= 2
+
+
+def _parse_rate(rate_str):
+    """Parse '30/m' or '120/m' into (count, seconds)."""
+    count, period = rate_str.split('/')
+    count = int(count)
+    seconds = {'s': 1, 'm': 60, 'h': 3600}.get(period.lower(), 60)
+    return count, seconds
+
+
+class RateLimitMiddleware:
+    BLOCK_TTL = 300  # seconds an IP/user stays blocked after threshold breach
+
+    def __init__(self, get_response):
+        self.get_response = get_response
+        self.dry_run = getattr(settings, 'RATELIMIT_DRY_RUN', True)
+
+        anon_rate  = getattr(settings, 'RATE_LIMITER_RATE', '35/m')
+        auth_rate  = getattr(settings, 'RATE_LIMITER_RATE_AUTHENTICATED', '120/m')
+
+        self.anon_threshold,  self.anon_window  = _parse_rate(anon_rate)
+        self.auth_threshold,  self.auth_window  = _parse_rate(auth_rate)
+
+        self.whitelist = set(
+            getattr(settings, 'RATE_LIMIT_WHITELIST_IPS', []) or []
+        )
+        self._rl_cache = caches['ratelimit']
+
+    def __call__(self, request):
+        decision = self._evaluate(request)
+        if decision['action'] == 'block':
+            logger.warning(
+                'ratelimit_block reason=%s ip=%s path=%s dry_run=%s',
+                decision['reason'],
+                decision['ip'],
+                request.path,
+                self.dry_run,
+                extra={
+                    'ua':        request.META.get('HTTP_USER_AGENT', ''),
+                    'namespace': getattr(request, 'tenant', 'unknown'),
+                },
+            )
+            if not self.dry_run:
+                return HttpResponse(status=429)
+        return self.get_response(request)
+
+    # ------------------------------------------------------------------
+    # Evaluation
+    # ------------------------------------------------------------------
+
+    def _evaluate(self, request):
+        ip = _get_ip(request)
+
+        if ip in self.whitelist:
+            return {'action': 'pass', 'ip': ip}
+
+        # Check 1: known bad UA
+        ua = request.META.get('HTTP_USER_AGENT', '')
+        for fragment in BOT_UA_FRAGMENTS:
+            if fragment.lower() in ua.lower():
+                return {'action': 'block', 'reason': 'known_ua', 'ip': ip}
+
+        # Check 2: IP already blocked
+        if self._rl_cache.get(f'rl:ip:{ip}:blocked'):
+            return {'action': 'block', 'reason': 'ip_blocked', 'ip': ip}
+
+        user = getattr(request, 'user', None)
+        if user is not None and user.is_authenticated:
+            return self._evaluate_authenticated(request, ip)
+        return self._evaluate_anonymous(request, ip)
+
+    def _evaluate_authenticated(self, request, ip):
+        user_id = str(request.user.pk).lower().strip()
+        ns = getattr(request, 'tenant', 'global')
+
+        # Check 3a: user already blocked
+        if self._rl_cache.get(f'rl:{ns}:user:{user_id}:blocked'):
+            return {'action': 'block', 'reason': 'user_blocked', 'ip': ip}
+
+        # Check 3b: suspicious headers
+        if _is_suspicious_headers(request):
+            return {'action': 'block', 'reason': 'suspicious_headers_auth', 'ip': ip}
+
+        # Check 3c: authenticated request rate
+        count = self._incr_with_ttl(
+            f'rl:{ns}:user:{user_id}:reqs', ttl=self.auth_window
+        )
+        if count >= self.auth_threshold:
+            self._rl_cache.set(
+                f'rl:{ns}:user:{user_id}:blocked', 1, timeout=self.BLOCK_TTL
+            )
+            return {'action': 'block', 'reason': 'auth_user_rate', 'ip': ip}
+
+        return {'action': 'pass', 'ip': ip}
+
+    def _evaluate_anonymous(self, request, ip):
+        # Check 4a: suspicious headers
+        if _is_suspicious_headers(request):
+            return {'action': 'block', 'reason': 'suspicious_headers', 'ip': ip}
+
+        # Check 4b: IP request rate
+        count = self._incr_with_ttl(f'rl:ip:{ip}:reqs', ttl=self.anon_window)
+        if count >= self.anon_threshold:
+            self._rl_cache.set(f'rl:ip:{ip}:blocked', 1, timeout=self.BLOCK_TTL)
+            return {'action': 'block', 'reason': 'ip_rate', 'ip': ip}
+
+        # Check 4c: per-namespace/IP/window (catches UA rotators behind NAT)
+        ns     = getattr(request, 'tenant', 'global')
+        bucket = int(time.time() // self.anon_window)
+        count  = self._incr_with_ttl(
+            f'rl:ns:{ns}:ip:{ip}:w:{bucket}', ttl=self.anon_window * 2
+        )
+        if count >= self.anon_threshold:
+            self._rl_cache.set(f'rl:ip:{ip}:blocked', 1, timeout=self.BLOCK_TTL)
+            return {'action': 'block', 'reason': 'ua_rotation', 'ip': ip}
+
+        return {'action': 'pass', 'ip': ip}
+
+    # ------------------------------------------------------------------
+    # Helpers
+    # ------------------------------------------------------------------
+
+    def _incr_with_ttl(self, key, ttl):
+        """
+        Atomic INCR + EXPIRE via Redis Lua script.
+        Falls back to non-atomic cache get/set when Redis is unavailable
+        (dry-run mode or file-based cache — correct enough for logging).
+        """
+        try:
+            from django_redis import get_redis_connection
+            client = get_redis_connection('ratelimit')
+            return client.eval(_INCR_LUA, 1, key, ttl)
+        except Exception:
+            count = (self._rl_cache.get(key) or 0) + 1
+            self._rl_cache.set(key, count, timeout=ttl)
+            return count
--- a/sapl/settings.py
+++ b/sapl/settings.py
@ -146,6 +146,9 @@ MIDDLEWARE = [
    'sapl.middleware.endpoint_restriction.EndpointRestrictionMiddleware',
    'django.middleware.csrf.CsrfViewMiddleware',
    'django.contrib.auth.middleware.AuthenticationMiddleware',
+    # RateLimitMiddleware runs after AuthenticationMiddleware so it can
+    # distinguish authenticated users (higher threshold) from anonymous ones.
+    'sapl.middleware.ratelimit.RateLimitMiddleware',
    'django.contrib.messages.middleware.MessageMiddleware',
    'django.middleware.clickjacking.XFrameOptionsMiddleware',
    'django.middleware.security.SecurityMiddleware',
@ -373,7 +376,24 @@ DATA_UPLOAD_MAX_MEMORY_SIZE = 10 * 1024 * 1024  # 10MB
 FILE_UPLOAD_MAX_MEMORY_SIZE = 2 * 1024 * 1024   # 2MB
 FILE_UPLOAD_TEMP_DIR = '/var/interlegis/sapl/tmp'

-RATE_LIMITER_RATE = config('RATE_LIMITER_RATE', default='35/m')
+# ---------------------------------------------------------------------------
+# Rate limiting — RateLimitMiddleware (sapl/middleware/ratelimit.py)
+# ---------------------------------------------------------------------------
+# Start with RATELIMIT_DRY_RUN=True; flip to False one check at a time
+# after validating in logs that no legitimate traffic is flagged.
+RATELIMIT_DRY_RUN = config('RATELIMIT_DRY_RUN', default=True, cast=bool)
+
+RATE_LIMITER_RATE               = config('RATE_LIMITER_RATE',               default='35/m')
+RATE_LIMITER_RATE_AUTHENTICATED = config('RATE_LIMITER_RATE_AUTHENTICATED', default='120/m')
+RATE_LIMITER_RATE_BOT           = config('RATE_LIMITER_RATE_BOT',           default='5/m')
+
+# Comma-separated IPs exempt from rate limiting (e.g. legislative-house ranges).
+# Leave empty until the IP list is available — see rate-limiter-v2.md §9.
+RATE_LIMIT_WHITELIST_IPS = config(
+    'RATE_LIMIT_WHITELIST_IPS',
+    default='',
+    cast=lambda v: [x.strip() for x in v.split(',') if x.strip()],
+)

 # Internationalization
 # https://docs.djangoproject.com/en/1.8/topics/i18n/