From 69d10d893d406478586c9a7e3e817d51e81d85d1 Mon Sep 17 00:00:00 2001 From: Edward Oliveira Date: Tue, 14 Apr 2026 01:47:26 -0300 Subject: [PATCH] Fix 5 code-quality issues in RateLimitMiddleware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. user_id: str(request.user.pk) — pk is int, lower()/strip() were no-ops 2. Redis key constants: RL_IP_REQUESTS, RL_IP_BLOCKED, RL_USER_REQUESTS, RL_USER_BLOCKED, RL_NS_WINDOW — no more inline f-string literals 3. Tenant namespace: _NAMESPACE resolved once at module load from POD_NAMESPACE env var (K8s Downward API) → service-account namespace file → 'global' fallback. No per-request getattr(request, 'tenant'). 4. KEY_PREFIX in CACHES['default'] set to POD_NAMESPACE (e.g. patobranco-pr) so each tenant's cache keys are isolated in shared Redis. 5. Logger extra: replaced getattr(request, 'tenant', 'unknown') with _NAMESPACE (the actual resolved constant). settings.py: add POD_NAMESPACE = config('POD_NAMESPACE', default='sapl'); use it as KEY_PREFIX. start.sh: add resolve_pod_namespace() (Downward API → SA file → fallback); call it before resolve_redis_url(); write POD_NAMESPACE into .env. Co-Authored-By: Claude Sonnet 4.6 --- docker/startup_scripts/start.sh | 24 +++++++++++ sapl/middleware/ratelimit.py | 75 ++++++++++++++++++++++++--------- sapl/settings.py | 23 ++++++---- 3 files changed, 94 insertions(+), 28 deletions(-) diff --git a/docker/startup_scripts/start.sh b/docker/startup_scripts/start.sh index 2b39cef82..d1ecdb804 100755 --- a/docker/startup_scripts/start.sh +++ b/docker/startup_scripts/start.sh @@ -106,6 +106,7 @@ write_env_file() { : "${ENABLE_SAPN:=False}" : "${REDIS_URL:=}" : "${CACHE_BACKEND:=file}" + : "${POD_NAMESPACE:=sapl}" tmp="$(mktemp)" { @@ -130,6 +131,7 @@ write_env_file() { printf 'ENABLE_SAPN=%s\n' "$ENABLE_SAPN" printf 'REDIS_URL=%s\n' "$REDIS_URL" printf 'CACHE_BACKEND=%s\n' "$CACHE_BACKEND" + printf 'POD_NAMESPACE=%s\n' "$POD_NAMESPACE" } > "$tmp" chmod 600 "$tmp" @@ -260,6 +262,27 @@ setup_cache_dir() { umask 0007 } +# --------------------------------------------------------------------------- +# Tenant namespace — resolved once at startup, written into .env +# --------------------------------------------------------------------------- + +resolve_pod_namespace() { + # 1. Already set by K8s Downward API (fieldRef: metadata.namespace) + [[ -n "${POD_NAMESPACE:-}" ]] && { log "POD_NAMESPACE=${POD_NAMESPACE} (from env)."; return 0; } + + # 2. K8s service-account namespace file (present in every in-cluster pod) + local ns_file="/var/run/secrets/kubernetes.io/serviceaccount/namespace" + if [[ -f "$ns_file" ]]; then + export POD_NAMESPACE="$(<"$ns_file")" + log "POD_NAMESPACE=${POD_NAMESPACE} (from service-account file)." + return 0 + fi + + # 3. Fallback for local development + export POD_NAMESPACE="sapl" + log "POD_NAMESPACE not found — using fallback '${POD_NAMESPACE}'." +} + # --------------------------------------------------------------------------- # Redis — resolve URL, check waffle switch, wait for connectivity # --------------------------------------------------------------------------- @@ -351,6 +374,7 @@ start_services() { main() { create_secret + resolve_pod_namespace resolve_redis_url wait_for_pg configure_pg_timezone diff --git a/sapl/middleware/ratelimit.py b/sapl/middleware/ratelimit.py index b96729f97..ae75b1200 100644 --- a/sapl/middleware/ratelimit.py +++ b/sapl/middleware/ratelimit.py @@ -7,18 +7,26 @@ Decision flow (per request): 3. Authenticated user? a. User blocked? → 429 b. Suspicious hdrs? → 429 - c. User rate ≥ 120? → SET user:blocked, 429 + c. User rate ≥ 120? → SET RL_USER_BLOCKED, 429 4. Anonymous: a. Suspicious hdrs? → 429 - b. IP rate ≥ 30/min? → SET ip:blocked, 429 - c. NS/IP window hit? → SET ip:blocked, 429 + b. IP rate ≥ 35/min? → SET RL_IP_BLOCKED, 429 + c. NS/IP window hit? → SET RL_IP_BLOCKED, 429 All decisions are no-ops when RATELIMIT_DRY_RUN=True (logged only). Degrades gracefully to non-atomic counting when Redis is unavailable. + +Tenant namespace (_NAMESPACE) is resolved once at module load from: + 1. POD_NAMESPACE env var (K8s Downward API — preferred) + 2. K8s service-account namespace file (always present in-cluster) + 3. 'global' (local development fallback) +Since each pod serves exactly one tenant, this is a startup constant — +no per-request lookup is needed or correct. """ import hashlib import logging +import os import time from django.conf import settings @@ -27,6 +35,36 @@ from django.http import HttpResponse logger = logging.getLogger('sapl.ratelimit') +# --------------------------------------------------------------------------- +# Tenant namespace — pod-level constant +# --------------------------------------------------------------------------- + +def _resolve_namespace(): + ns = os.environ.get('POD_NAMESPACE', '') + if ns: + return ns + try: + with open('/var/run/secrets/kubernetes.io/serviceaccount/namespace') as f: + return f.read().strip() + except OSError: + return 'global' + +_NAMESPACE = _resolve_namespace() + +# --------------------------------------------------------------------------- +# Redis key templates — module-level constants, never inline strings +# --------------------------------------------------------------------------- + +RL_IP_REQUESTS = 'rl:ip:{ip}:reqs' +RL_IP_BLOCKED = 'rl:ip:{ip}:blocked' +RL_USER_REQUESTS = 'rl:{ns}:user:{uid}:reqs' +RL_USER_BLOCKED = 'rl:{ns}:user:{uid}:blocked' +RL_NS_WINDOW = 'rl:{ns}:ip:{ip}:w:{bucket}' + +# --------------------------------------------------------------------------- +# Bot UA fragments +# --------------------------------------------------------------------------- + BOT_UA_FRAGMENTS = [ 'GPTBot', 'ClaudeBot', @@ -88,7 +126,7 @@ def _is_suspicious_headers(request): def _parse_rate(rate_str): - """Parse '30/m' or '120/m' into (count, seconds).""" + """Parse '35/m' or '120/m' into (count, seconds).""" count, period = rate_str.split('/') count = int(count) seconds = {'s': 1, 'm': 60, 'h': 3600}.get(period.lower(), 60) @@ -110,15 +148,13 @@ class RateLimitMiddleware: decision = self._evaluate(request) if decision['action'] == 'block': logger.warning( - 'ratelimit_block reason=%s ip=%s path=%s dry_run=%s', + 'ratelimit_block reason=%s ip=%s path=%s dry_run=%s namespace=%s', decision['reason'], decision['ip'], request.path, self.dry_run, - extra={ - 'ua': request.META.get('HTTP_USER_AGENT', ''), - 'namespace': getattr(request, 'tenant', 'unknown'), - }, + _NAMESPACE, + extra={'ua': request.META.get('HTTP_USER_AGENT', '')}, ) if not self.dry_run: return HttpResponse(status=429) @@ -141,7 +177,7 @@ class RateLimitMiddleware: return {'action': 'block', 'reason': 'known_ua', 'ip': ip} # Check 2: IP already blocked - if self._rl_cache.get(f'rl:ip:{ip}:blocked'): + if self._rl_cache.get(RL_IP_BLOCKED.format(ip=ip)): return {'action': 'block', 'reason': 'ip_blocked', 'ip': ip} user = getattr(request, 'user', None) @@ -150,11 +186,10 @@ class RateLimitMiddleware: return self._evaluate_anonymous(request, ip) def _evaluate_authenticated(self, request, ip): - user_id = str(request.user.pk).lower().strip() - ns = getattr(request, 'tenant', 'global') + uid = str(request.user.pk) # Check 3a: user already blocked - if self._rl_cache.get(f'rl:{ns}:user:{user_id}:blocked'): + if self._rl_cache.get(RL_USER_BLOCKED.format(ns=_NAMESPACE, uid=uid)): return {'action': 'block', 'reason': 'user_blocked', 'ip': ip} # Check 3b: suspicious headers @@ -163,11 +198,11 @@ class RateLimitMiddleware: # Check 3c: authenticated request rate count = self._incr_with_ttl( - f'rl:{ns}:user:{user_id}:reqs', ttl=self.auth_window + RL_USER_REQUESTS.format(ns=_NAMESPACE, uid=uid), ttl=self.auth_window ) if count >= self.auth_threshold: self._rl_cache.set( - f'rl:{ns}:user:{user_id}:blocked', 1, timeout=self.BLOCK_TTL + RL_USER_BLOCKED.format(ns=_NAMESPACE, uid=uid), 1, timeout=self.BLOCK_TTL ) return {'action': 'block', 'reason': 'auth_user_rate', 'ip': ip} @@ -179,19 +214,19 @@ class RateLimitMiddleware: return {'action': 'block', 'reason': 'suspicious_headers', 'ip': ip} # Check 4b: IP request rate - count = self._incr_with_ttl(f'rl:ip:{ip}:reqs', ttl=self.anon_window) + count = self._incr_with_ttl(RL_IP_REQUESTS.format(ip=ip), ttl=self.anon_window) if count >= self.anon_threshold: - self._rl_cache.set(f'rl:ip:{ip}:blocked', 1, timeout=self.BLOCK_TTL) + self._rl_cache.set(RL_IP_BLOCKED.format(ip=ip), 1, timeout=self.BLOCK_TTL) return {'action': 'block', 'reason': 'ip_rate', 'ip': ip} # Check 4c: per-namespace/IP/window (catches UA rotators behind NAT) - ns = getattr(request, 'tenant', 'global') bucket = int(time.time() // self.anon_window) count = self._incr_with_ttl( - f'rl:ns:{ns}:ip:{ip}:w:{bucket}', ttl=self.anon_window * 2 + RL_NS_WINDOW.format(ns=_NAMESPACE, ip=ip, bucket=bucket), + ttl=self.anon_window * 2, ) if count >= self.anon_threshold: - self._rl_cache.set(f'rl:ip:{ip}:blocked', 1, timeout=self.BLOCK_TTL) + self._rl_cache.set(RL_IP_BLOCKED.format(ip=ip), 1, timeout=self.BLOCK_TTL) return {'action': 'block', 'reason': 'ua_rotation', 'ip': ip} return {'action': 'pass', 'ip': ip} diff --git a/sapl/settings.py b/sapl/settings.py index a43ee8d7b..92859fb23 100644 --- a/sapl/settings.py +++ b/sapl/settings.py @@ -204,18 +204,26 @@ SPECTACULAR_SETTINGS = { 'VERSION': '1.0.0', } +# --------------------------------------------------------------------------- +# Tenant namespace — identifies this pod's municipality (e.g. patobranco-pr). +# Resolved by start.sh from POD_NAMESPACE env var (K8s Downward API) or the +# service-account namespace file, then written into .env before Gunicorn starts. +# Used as KEY_PREFIX so each tenant's cache keys are isolated in shared Redis. +# --------------------------------------------------------------------------- +POD_NAMESPACE = config('POD_NAMESPACE', default='sapl') + # --------------------------------------------------------------------------- # Cache — switches between file-based (default) and Redis at pod startup. # REDIS_URL and CACHE_BACKEND are resolved by start.sh before Gunicorn # starts; settings.py reads them as env vars (written into .env). # --------------------------------------------------------------------------- -REDIS_URL = config('REDIS_URL', default='') +REDIS_URL = config('REDIS_URL', default='') CACHE_BACKEND = config('CACHE_BACKEND', default='file') _redis_ready = CACHE_BACKEND == 'redis' and bool(REDIS_URL) _redis_pool = { - 'max_connections': 6, # 1,200 pods × 2 workers × 6 = 14,400 peak + 'max_connections': 6, # 1,200 pods × 2 workers × 6 = 14,400 peak 'socket_timeout': 0.5, 'socket_connect_timeout': 0.5, } @@ -228,7 +236,7 @@ CACHES = { else 'django.core.cache.backends.filebased.FileBasedCache' ), 'LOCATION': REDIS_URL + '/0' if _redis_ready else '/var/tmp/django_cache', - 'KEY_PREFIX': 'sapl', + 'KEY_PREFIX': POD_NAMESPACE, **( { 'OPTIONS': { @@ -373,7 +381,7 @@ MAX_IMAGE_UPLOAD_SIZE = 2 * 1024 * 1024 # 2MB DATA_UPLOAD_MAX_MEMORY_SIZE = 10 * 1024 * 1024 # 10MB # Files above 2 MB are streamed to a temp file on disk rather than held in # worker RAM. Critical for large upload support without memory blowup. -FILE_UPLOAD_MAX_MEMORY_SIZE = 2 * 1024 * 1024 # 2MB +FILE_UPLOAD_MAX_MEMORY_SIZE = 2 * 1024 * 1024 # 2MB FILE_UPLOAD_TEMP_DIR = '/var/interlegis/sapl/tmp' # --------------------------------------------------------------------------- @@ -383,9 +391,9 @@ FILE_UPLOAD_TEMP_DIR = '/var/interlegis/sapl/tmp' # after validating in logs that no legitimate traffic is flagged. RATELIMIT_DRY_RUN = config('RATELIMIT_DRY_RUN', default=True, cast=bool) -RATE_LIMITER_RATE = config('RATE_LIMITER_RATE', default='35/m') +RATE_LIMITER_RATE = config('RATE_LIMITER_RATE', default='35/m') RATE_LIMITER_RATE_AUTHENTICATED = config('RATE_LIMITER_RATE_AUTHENTICATED', default='120/m') -RATE_LIMITER_RATE_BOT = config('RATE_LIMITER_RATE_BOT', default='5/m') +RATE_LIMITER_RATE_BOT = config('RATE_LIMITER_RATE_BOT', default='5/m') # Comma-separated IPs exempt from rate limiting (e.g. legislative-house ranges). # Leave empty until the IP list is available — see rate-limiter-v2.md §9. @@ -411,7 +419,6 @@ USE_I18N = True USE_L10N = True USE_TZ = True - ## ## Monkey patch of the Django 2.2 because latest version of psycopg2 returns DB time zone as UTC, ## but Django 2.2 requires an int! This should be removed once we are able to upgrade to Django >= 4 @@ -506,7 +513,7 @@ LOGGING = { 'formatters': { 'verbose': { 'format': '%(levelname)s %(asctime)s [%(request_id)s] ' + host + '%(pathname)s %(name)s:%(funcName)s:%(' - 'lineno)d %(message)s ' + 'lineno)d %(message)s ' }, 'simple': { 'format': '%(levelname)s %(asctime)s [%(request_id)s] - %(message)s'