Fix 5 code-quality issues in RateLimitMiddleware

1. user_id: str(request.user.pk) — pk is int, lower()/strip() were no-ops 2. Redis key constants: RL_IP_REQUESTS, RL_IP_BLOCKED, RL_USER_REQUESTS, RL_USER_BLOCKED, RL_NS_WINDOW — no more inline f-string literals 3. Tenant namespace: _NAMESPACE resolved once at module load from POD_NAMESPACE env var (K8s Downward API) → service-account namespace file → 'global' fallback. No per-request getattr(request, 'tenant'). 4. KEY_PREFIX in CACHES['default'] set to POD_NAMESPACE (e.g. patobranco-pr) so each tenant's cache keys are isolated in shared Redis. 5. Logger extra: replaced getattr(request, 'tenant', 'unknown') with _NAMESPACE (the actual resolved constant). settings.py: add POD_NAMESPACE = config('POD_NAMESPACE', default='sapl'); use it as KEY_PREFIX. start.sh: add resolve_pod_namespace() (Downward API → SA file → fallback); call it before resolve_redis_url(); write POD_NAMESPACE into .env. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
3 weeks ago · 69d10d893d
3 changed files with 94 additions and 28 deletions
--- a/docker/startup_scripts/start.sh
+++ b/docker/startup_scripts/start.sh
@ -106,6 +106,7 @@ write_env_file() {
  : "${ENABLE_SAPN:=False}"
  : "${REDIS_URL:=}"
  : "${CACHE_BACKEND:=file}"
  : "${POD_NAMESPACE:=sapl}"
  tmp="$(mktemp)"
  {
@ -130,6 +131,7 @@ write_env_file() {
    printf 'ENABLE_SAPN=%s\n' "$ENABLE_SAPN"
    printf 'REDIS_URL=%s\n' "$REDIS_URL"
    printf 'CACHE_BACKEND=%s\n' "$CACHE_BACKEND"
    printf 'POD_NAMESPACE=%s\n' "$POD_NAMESPACE"
  } > "$tmp"
  chmod 600 "$tmp"
@ -260,6 +262,27 @@ setup_cache_dir() {
  umask 0007
 }
 # ---------------------------------------------------------------------------
 # Tenant namespace — resolved once at startup, written into .env
 # ---------------------------------------------------------------------------
 resolve_pod_namespace() {
  # 1. Already set by K8s Downward API (fieldRef: metadata.namespace)
  [[ -n "${POD_NAMESPACE:-}" ]] && { log "POD_NAMESPACE=${POD_NAMESPACE} (from env)."; return 0; }
  # 2. K8s service-account namespace file (present in every in-cluster pod)
  local ns_file="/var/run/secrets/kubernetes.io/serviceaccount/namespace"
  if [[ -f "$ns_file" ]]; then
    export POD_NAMESPACE="$(<"$ns_file")"
    log "POD_NAMESPACE=${POD_NAMESPACE} (from service-account file)."
    return 0
  fi
  # 3. Fallback for local development
  export POD_NAMESPACE="sapl"
  log "POD_NAMESPACE not found — using fallback '${POD_NAMESPACE}'."
 }
 # ---------------------------------------------------------------------------
 # Redis — resolve URL, check waffle switch, wait for connectivity
 # ---------------------------------------------------------------------------
@ -351,6 +374,7 @@ start_services() {
 main() {
  create_secret
  resolve_pod_namespace
  resolve_redis_url
  wait_for_pg
  configure_pg_timezone
--- a/sapl/middleware/ratelimit.py
+++ b/sapl/middleware/ratelimit.py
@ -7,18 +7,26 @@ Decision flow (per request):
  3. Authenticated user?
       a. User blocked?     → 429
       b. Suspicious hdrs?  → 429
-       c. User rate ≥ 120?  → SET user:blocked, 429
+       c. User rate ≥ 120?  → SET RL_USER_BLOCKED, 429
  4. Anonymous:
       a. Suspicious hdrs?  → 429
-       b. IP rate ≥ 30/min? → SET ip:blocked, 429
+       b. IP rate ≥ 35/min? → SET RL_IP_BLOCKED, 429
-       c. NS/IP window hit? → SET ip:blocked, 429
+       c. NS/IP window hit? → SET RL_IP_BLOCKED, 429
 All decisions are no-ops when RATELIMIT_DRY_RUN=True (logged only).
 Degrades gracefully to non-atomic counting when Redis is unavailable.
 Tenant namespace (_NAMESPACE) is resolved once at module load from:
  1. POD_NAMESPACE env var  (K8s Downward API — preferred)
  2. K8s service-account namespace file (always present in-cluster)
  3. 'global' (local development fallback)
 Since each pod serves exactly one tenant, this is a startup constant —
 no per-request lookup is needed or correct.
 """
 import hashlib
 import logging
 import os
 import time
 from django.conf import settings
@ -27,6 +35,36 @@ from django.http import HttpResponse
 logger = logging.getLogger('sapl.ratelimit')
 # ---------------------------------------------------------------------------
 # Tenant namespace — pod-level constant
 # ---------------------------------------------------------------------------
 def _resolve_namespace():
    ns = os.environ.get('POD_NAMESPACE', '')
    if ns:
        return ns
    try:
        with open('/var/run/secrets/kubernetes.io/serviceaccount/namespace') as f:
            return f.read().strip()
    except OSError:
        return 'global'
 _NAMESPACE = _resolve_namespace()
 # ---------------------------------------------------------------------------
 # Redis key templates — module-level constants, never inline strings
 # ---------------------------------------------------------------------------
 RL_IP_REQUESTS   = 'rl:ip:{ip}:reqs'
 RL_IP_BLOCKED    = 'rl:ip:{ip}:blocked'
 RL_USER_REQUESTS = 'rl:{ns}:user:{uid}:reqs'
 RL_USER_BLOCKED  = 'rl:{ns}:user:{uid}:blocked'
 RL_NS_WINDOW     = 'rl:{ns}:ip:{ip}:w:{bucket}'
 # ---------------------------------------------------------------------------
 # Bot UA fragments
 # ---------------------------------------------------------------------------
 BOT_UA_FRAGMENTS = [
    'GPTBot',
    'ClaudeBot',
@ -88,7 +126,7 @@ def _is_suspicious_headers(request):
 def _parse_rate(rate_str):
-    """Parse '30/m' or '120/m' into (count, seconds)."""
+    """Parse '35/m' or '120/m' into (count, seconds)."""
    count, period = rate_str.split('/')
    count = int(count)
    seconds = {'s': 1, 'm': 60, 'h': 3600}.get(period.lower(), 60)
@ -110,15 +148,13 @@ class RateLimitMiddleware:
        decision = self._evaluate(request)
        if decision['action'] == 'block':
            logger.warning(
-                'ratelimit_block reason=%s ip=%s path=%s dry_run=%s',
+                'ratelimit_block reason=%s ip=%s path=%s dry_run=%s namespace=%s',
                decision['reason'],
                decision['ip'],
                request.path,
                self.dry_run,
-                extra={
+                _NAMESPACE,
-                    'ua': request.META.get('HTTP_USER_AGENT', ''),
+                extra={'ua': request.META.get('HTTP_USER_AGENT', '')},
                    'namespace': getattr(request, 'tenant', 'unknown'),
                },
            )
            if not self.dry_run:
                return HttpResponse(status=429)
@ -141,7 +177,7 @@ class RateLimitMiddleware:
                return {'action': 'block', 'reason': 'known_ua', 'ip': ip}
        # Check 2: IP already blocked
-        if self._rl_cache.get(f'rl:ip:{ip}:blocked'):
+        if self._rl_cache.get(RL_IP_BLOCKED.format(ip=ip)):
            return {'action': 'block', 'reason': 'ip_blocked', 'ip': ip}
        user = getattr(request, 'user', None)
@ -150,11 +186,10 @@ class RateLimitMiddleware:
        return self._evaluate_anonymous(request, ip)
    def _evaluate_authenticated(self, request, ip):
-        user_id = str(request.user.pk).lower().strip()
+        uid = str(request.user.pk)
        ns = getattr(request, 'tenant', 'global')
        # Check 3a: user already blocked
-        if self._rl_cache.get(f'rl:{ns}:user:{user_id}:blocked'):
+        if self._rl_cache.get(RL_USER_BLOCKED.format(ns=_NAMESPACE, uid=uid)):
            return {'action': 'block', 'reason': 'user_blocked', 'ip': ip}
        # Check 3b: suspicious headers
@ -163,11 +198,11 @@ class RateLimitMiddleware:
        # Check 3c: authenticated request rate
        count = self._incr_with_ttl(
-            f'rl:{ns}:user:{user_id}:reqs', ttl=self.auth_window
+            RL_USER_REQUESTS.format(ns=_NAMESPACE, uid=uid), ttl=self.auth_window
        )
        if count >= self.auth_threshold:
            self._rl_cache.set(
-                f'rl:{ns}:user:{user_id}:blocked', 1, timeout=self.BLOCK_TTL
+                RL_USER_BLOCKED.format(ns=_NAMESPACE, uid=uid), 1, timeout=self.BLOCK_TTL
            )
            return {'action': 'block', 'reason': 'auth_user_rate', 'ip': ip}
@ -179,19 +214,19 @@ class RateLimitMiddleware:
            return {'action': 'block', 'reason': 'suspicious_headers', 'ip': ip}
        # Check 4b: IP request rate
-        count = self._incr_with_ttl(f'rl:ip:{ip}:reqs', ttl=self.anon_window)
+        count = self._incr_with_ttl(RL_IP_REQUESTS.format(ip=ip), ttl=self.anon_window)
        if count >= self.anon_threshold:
-            self._rl_cache.set(f'rl:ip:{ip}:blocked', 1, timeout=self.BLOCK_TTL)
+            self._rl_cache.set(RL_IP_BLOCKED.format(ip=ip), 1, timeout=self.BLOCK_TTL)
            return {'action': 'block', 'reason': 'ip_rate', 'ip': ip}
        # Check 4c: per-namespace/IP/window (catches UA rotators behind NAT)
        ns = getattr(request, 'tenant', 'global')
        bucket = int(time.time() // self.anon_window)
        count = self._incr_with_ttl(
-            f'rl:ns:{ns}:ip:{ip}:w:{bucket}', ttl=self.anon_window * 2
+            RL_NS_WINDOW.format(ns=_NAMESPACE, ip=ip, bucket=bucket),
            ttl=self.anon_window * 2,
        )
        if count >= self.anon_threshold:
-            self._rl_cache.set(f'rl:ip:{ip}:blocked', 1, timeout=self.BLOCK_TTL)
+            self._rl_cache.set(RL_IP_BLOCKED.format(ip=ip), 1, timeout=self.BLOCK_TTL)
            return {'action': 'block', 'reason': 'ua_rotation', 'ip': ip}
        return {'action': 'pass', 'ip': ip}
--- a/sapl/settings.py
+++ b/sapl/settings.py
@ -204,6 +204,14 @@ SPECTACULAR_SETTINGS = {
    'VERSION': '1.0.0',
 }
 # ---------------------------------------------------------------------------
 # Tenant namespace — identifies this pod's municipality (e.g. patobranco-pr).
 # Resolved by start.sh from POD_NAMESPACE env var (K8s Downward API) or the
 # service-account namespace file, then written into .env before Gunicorn starts.
 # Used as KEY_PREFIX so each tenant's cache keys are isolated in shared Redis.
 # ---------------------------------------------------------------------------
 POD_NAMESPACE = config('POD_NAMESPACE', default='sapl')
 # ---------------------------------------------------------------------------
 # Cache — switches between file-based (default) and Redis at pod startup.
 # REDIS_URL and CACHE_BACKEND are resolved by start.sh before Gunicorn
@ -228,7 +236,7 @@ CACHES = {
            else 'django.core.cache.backends.filebased.FileBasedCache'
        ),
        'LOCATION': REDIS_URL + '/0' if _redis_ready else '/var/tmp/django_cache',
-        'KEY_PREFIX': 'sapl',
+        'KEY_PREFIX': POD_NAMESPACE,
        **(
            {
                'OPTIONS': {
@ -411,7 +419,6 @@ USE_I18N = True
 USE_L10N = True
 USE_TZ = True
 ##
 ## Monkey patch of the Django 2.2 because latest version of psycopg2 returns DB time zone as UTC,
 ## but Django 2.2 requires an int! This should be removed once we are able to upgrade to Django >= 4