Browse Source

Fix 5 code-quality issues in RateLimitMiddleware

1. user_id: str(request.user.pk) — pk is int, lower()/strip() were no-ops
2. Redis key constants: RL_IP_REQUESTS, RL_IP_BLOCKED, RL_USER_REQUESTS,
   RL_USER_BLOCKED, RL_NS_WINDOW — no more inline f-string literals
3. Tenant namespace: _NAMESPACE resolved once at module load from
   POD_NAMESPACE env var (K8s Downward API) → service-account namespace
   file → 'global' fallback. No per-request getattr(request, 'tenant').
4. KEY_PREFIX in CACHES['default'] set to POD_NAMESPACE (e.g. patobranco-pr)
   so each tenant's cache keys are isolated in shared Redis.
5. Logger extra: replaced getattr(request, 'tenant', 'unknown') with
   _NAMESPACE (the actual resolved constant).

settings.py: add POD_NAMESPACE = config('POD_NAMESPACE', default='sapl');
  use it as KEY_PREFIX.
start.sh: add resolve_pod_namespace() (Downward API → SA file → fallback);
  call it before resolve_redis_url(); write POD_NAMESPACE into .env.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
rate-limiter-2026
Edward Ribeiro 3 weeks ago
parent
commit
69d10d893d
  1. 24
      docker/startup_scripts/start.sh
  2. 75
      sapl/middleware/ratelimit.py
  3. 11
      sapl/settings.py

24
docker/startup_scripts/start.sh

@ -106,6 +106,7 @@ write_env_file() {
: "${ENABLE_SAPN:=False}" : "${ENABLE_SAPN:=False}"
: "${REDIS_URL:=}" : "${REDIS_URL:=}"
: "${CACHE_BACKEND:=file}" : "${CACHE_BACKEND:=file}"
: "${POD_NAMESPACE:=sapl}"
tmp="$(mktemp)" tmp="$(mktemp)"
{ {
@ -130,6 +131,7 @@ write_env_file() {
printf 'ENABLE_SAPN=%s\n' "$ENABLE_SAPN" printf 'ENABLE_SAPN=%s\n' "$ENABLE_SAPN"
printf 'REDIS_URL=%s\n' "$REDIS_URL" printf 'REDIS_URL=%s\n' "$REDIS_URL"
printf 'CACHE_BACKEND=%s\n' "$CACHE_BACKEND" printf 'CACHE_BACKEND=%s\n' "$CACHE_BACKEND"
printf 'POD_NAMESPACE=%s\n' "$POD_NAMESPACE"
} > "$tmp" } > "$tmp"
chmod 600 "$tmp" chmod 600 "$tmp"
@ -260,6 +262,27 @@ setup_cache_dir() {
umask 0007 umask 0007
} }
# ---------------------------------------------------------------------------
# Tenant namespace — resolved once at startup, written into .env
# ---------------------------------------------------------------------------
resolve_pod_namespace() {
# 1. Already set by K8s Downward API (fieldRef: metadata.namespace)
[[ -n "${POD_NAMESPACE:-}" ]] && { log "POD_NAMESPACE=${POD_NAMESPACE} (from env)."; return 0; }
# 2. K8s service-account namespace file (present in every in-cluster pod)
local ns_file="/var/run/secrets/kubernetes.io/serviceaccount/namespace"
if [[ -f "$ns_file" ]]; then
export POD_NAMESPACE="$(<"$ns_file")"
log "POD_NAMESPACE=${POD_NAMESPACE} (from service-account file)."
return 0
fi
# 3. Fallback for local development
export POD_NAMESPACE="sapl"
log "POD_NAMESPACE not found — using fallback '${POD_NAMESPACE}'."
}
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Redis — resolve URL, check waffle switch, wait for connectivity # Redis — resolve URL, check waffle switch, wait for connectivity
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -351,6 +374,7 @@ start_services() {
main() { main() {
create_secret create_secret
resolve_pod_namespace
resolve_redis_url resolve_redis_url
wait_for_pg wait_for_pg
configure_pg_timezone configure_pg_timezone

75
sapl/middleware/ratelimit.py

@ -7,18 +7,26 @@ Decision flow (per request):
3. Authenticated user? 3. Authenticated user?
a. User blocked? 429 a. User blocked? 429
b. Suspicious hdrs? 429 b. Suspicious hdrs? 429
c. User rate 120? SET user:blocked, 429 c. User rate 120? SET RL_USER_BLOCKED, 429
4. Anonymous: 4. Anonymous:
a. Suspicious hdrs? 429 a. Suspicious hdrs? 429
b. IP rate 30/min? SET ip:blocked, 429 b. IP rate 35/min? SET RL_IP_BLOCKED, 429
c. NS/IP window hit? SET ip:blocked, 429 c. NS/IP window hit? SET RL_IP_BLOCKED, 429
All decisions are no-ops when RATELIMIT_DRY_RUN=True (logged only). All decisions are no-ops when RATELIMIT_DRY_RUN=True (logged only).
Degrades gracefully to non-atomic counting when Redis is unavailable. Degrades gracefully to non-atomic counting when Redis is unavailable.
Tenant namespace (_NAMESPACE) is resolved once at module load from:
1. POD_NAMESPACE env var (K8s Downward API preferred)
2. K8s service-account namespace file (always present in-cluster)
3. 'global' (local development fallback)
Since each pod serves exactly one tenant, this is a startup constant
no per-request lookup is needed or correct.
""" """
import hashlib import hashlib
import logging import logging
import os
import time import time
from django.conf import settings from django.conf import settings
@ -27,6 +35,36 @@ from django.http import HttpResponse
logger = logging.getLogger('sapl.ratelimit') logger = logging.getLogger('sapl.ratelimit')
# ---------------------------------------------------------------------------
# Tenant namespace — pod-level constant
# ---------------------------------------------------------------------------
def _resolve_namespace():
ns = os.environ.get('POD_NAMESPACE', '')
if ns:
return ns
try:
with open('/var/run/secrets/kubernetes.io/serviceaccount/namespace') as f:
return f.read().strip()
except OSError:
return 'global'
_NAMESPACE = _resolve_namespace()
# ---------------------------------------------------------------------------
# Redis key templates — module-level constants, never inline strings
# ---------------------------------------------------------------------------
RL_IP_REQUESTS = 'rl:ip:{ip}:reqs'
RL_IP_BLOCKED = 'rl:ip:{ip}:blocked'
RL_USER_REQUESTS = 'rl:{ns}:user:{uid}:reqs'
RL_USER_BLOCKED = 'rl:{ns}:user:{uid}:blocked'
RL_NS_WINDOW = 'rl:{ns}:ip:{ip}:w:{bucket}'
# ---------------------------------------------------------------------------
# Bot UA fragments
# ---------------------------------------------------------------------------
BOT_UA_FRAGMENTS = [ BOT_UA_FRAGMENTS = [
'GPTBot', 'GPTBot',
'ClaudeBot', 'ClaudeBot',
@ -88,7 +126,7 @@ def _is_suspicious_headers(request):
def _parse_rate(rate_str): def _parse_rate(rate_str):
"""Parse '30/m' or '120/m' into (count, seconds).""" """Parse '35/m' or '120/m' into (count, seconds)."""
count, period = rate_str.split('/') count, period = rate_str.split('/')
count = int(count) count = int(count)
seconds = {'s': 1, 'm': 60, 'h': 3600}.get(period.lower(), 60) seconds = {'s': 1, 'm': 60, 'h': 3600}.get(period.lower(), 60)
@ -110,15 +148,13 @@ class RateLimitMiddleware:
decision = self._evaluate(request) decision = self._evaluate(request)
if decision['action'] == 'block': if decision['action'] == 'block':
logger.warning( logger.warning(
'ratelimit_block reason=%s ip=%s path=%s dry_run=%s', 'ratelimit_block reason=%s ip=%s path=%s dry_run=%s namespace=%s',
decision['reason'], decision['reason'],
decision['ip'], decision['ip'],
request.path, request.path,
self.dry_run, self.dry_run,
extra={ _NAMESPACE,
'ua': request.META.get('HTTP_USER_AGENT', ''), extra={'ua': request.META.get('HTTP_USER_AGENT', '')},
'namespace': getattr(request, 'tenant', 'unknown'),
},
) )
if not self.dry_run: if not self.dry_run:
return HttpResponse(status=429) return HttpResponse(status=429)
@ -141,7 +177,7 @@ class RateLimitMiddleware:
return {'action': 'block', 'reason': 'known_ua', 'ip': ip} return {'action': 'block', 'reason': 'known_ua', 'ip': ip}
# Check 2: IP already blocked # Check 2: IP already blocked
if self._rl_cache.get(f'rl:ip:{ip}:blocked'): if self._rl_cache.get(RL_IP_BLOCKED.format(ip=ip)):
return {'action': 'block', 'reason': 'ip_blocked', 'ip': ip} return {'action': 'block', 'reason': 'ip_blocked', 'ip': ip}
user = getattr(request, 'user', None) user = getattr(request, 'user', None)
@ -150,11 +186,10 @@ class RateLimitMiddleware:
return self._evaluate_anonymous(request, ip) return self._evaluate_anonymous(request, ip)
def _evaluate_authenticated(self, request, ip): def _evaluate_authenticated(self, request, ip):
user_id = str(request.user.pk).lower().strip() uid = str(request.user.pk)
ns = getattr(request, 'tenant', 'global')
# Check 3a: user already blocked # Check 3a: user already blocked
if self._rl_cache.get(f'rl:{ns}:user:{user_id}:blocked'): if self._rl_cache.get(RL_USER_BLOCKED.format(ns=_NAMESPACE, uid=uid)):
return {'action': 'block', 'reason': 'user_blocked', 'ip': ip} return {'action': 'block', 'reason': 'user_blocked', 'ip': ip}
# Check 3b: suspicious headers # Check 3b: suspicious headers
@ -163,11 +198,11 @@ class RateLimitMiddleware:
# Check 3c: authenticated request rate # Check 3c: authenticated request rate
count = self._incr_with_ttl( count = self._incr_with_ttl(
f'rl:{ns}:user:{user_id}:reqs', ttl=self.auth_window RL_USER_REQUESTS.format(ns=_NAMESPACE, uid=uid), ttl=self.auth_window
) )
if count >= self.auth_threshold: if count >= self.auth_threshold:
self._rl_cache.set( self._rl_cache.set(
f'rl:{ns}:user:{user_id}:blocked', 1, timeout=self.BLOCK_TTL RL_USER_BLOCKED.format(ns=_NAMESPACE, uid=uid), 1, timeout=self.BLOCK_TTL
) )
return {'action': 'block', 'reason': 'auth_user_rate', 'ip': ip} return {'action': 'block', 'reason': 'auth_user_rate', 'ip': ip}
@ -179,19 +214,19 @@ class RateLimitMiddleware:
return {'action': 'block', 'reason': 'suspicious_headers', 'ip': ip} return {'action': 'block', 'reason': 'suspicious_headers', 'ip': ip}
# Check 4b: IP request rate # Check 4b: IP request rate
count = self._incr_with_ttl(f'rl:ip:{ip}:reqs', ttl=self.anon_window) count = self._incr_with_ttl(RL_IP_REQUESTS.format(ip=ip), ttl=self.anon_window)
if count >= self.anon_threshold: if count >= self.anon_threshold:
self._rl_cache.set(f'rl:ip:{ip}:blocked', 1, timeout=self.BLOCK_TTL) self._rl_cache.set(RL_IP_BLOCKED.format(ip=ip), 1, timeout=self.BLOCK_TTL)
return {'action': 'block', 'reason': 'ip_rate', 'ip': ip} return {'action': 'block', 'reason': 'ip_rate', 'ip': ip}
# Check 4c: per-namespace/IP/window (catches UA rotators behind NAT) # Check 4c: per-namespace/IP/window (catches UA rotators behind NAT)
ns = getattr(request, 'tenant', 'global')
bucket = int(time.time() // self.anon_window) bucket = int(time.time() // self.anon_window)
count = self._incr_with_ttl( count = self._incr_with_ttl(
f'rl:ns:{ns}:ip:{ip}:w:{bucket}', ttl=self.anon_window * 2 RL_NS_WINDOW.format(ns=_NAMESPACE, ip=ip, bucket=bucket),
ttl=self.anon_window * 2,
) )
if count >= self.anon_threshold: if count >= self.anon_threshold:
self._rl_cache.set(f'rl:ip:{ip}:blocked', 1, timeout=self.BLOCK_TTL) self._rl_cache.set(RL_IP_BLOCKED.format(ip=ip), 1, timeout=self.BLOCK_TTL)
return {'action': 'block', 'reason': 'ua_rotation', 'ip': ip} return {'action': 'block', 'reason': 'ua_rotation', 'ip': ip}
return {'action': 'pass', 'ip': ip} return {'action': 'pass', 'ip': ip}

11
sapl/settings.py

@ -204,6 +204,14 @@ SPECTACULAR_SETTINGS = {
'VERSION': '1.0.0', 'VERSION': '1.0.0',
} }
# ---------------------------------------------------------------------------
# Tenant namespace — identifies this pod's municipality (e.g. patobranco-pr).
# Resolved by start.sh from POD_NAMESPACE env var (K8s Downward API) or the
# service-account namespace file, then written into .env before Gunicorn starts.
# Used as KEY_PREFIX so each tenant's cache keys are isolated in shared Redis.
# ---------------------------------------------------------------------------
POD_NAMESPACE = config('POD_NAMESPACE', default='sapl')
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Cache — switches between file-based (default) and Redis at pod startup. # Cache — switches between file-based (default) and Redis at pod startup.
# REDIS_URL and CACHE_BACKEND are resolved by start.sh before Gunicorn # REDIS_URL and CACHE_BACKEND are resolved by start.sh before Gunicorn
@ -228,7 +236,7 @@ CACHES = {
else 'django.core.cache.backends.filebased.FileBasedCache' else 'django.core.cache.backends.filebased.FileBasedCache'
), ),
'LOCATION': REDIS_URL + '/0' if _redis_ready else '/var/tmp/django_cache', 'LOCATION': REDIS_URL + '/0' if _redis_ready else '/var/tmp/django_cache',
'KEY_PREFIX': 'sapl', 'KEY_PREFIX': POD_NAMESPACE,
**( **(
{ {
'OPTIONS': { 'OPTIONS': {
@ -411,7 +419,6 @@ USE_I18N = True
USE_L10N = True USE_L10N = True
USE_TZ = True USE_TZ = True
## ##
## Monkey patch of the Django 2.2 because latest version of psycopg2 returns DB time zone as UTC, ## Monkey patch of the Django 2.2 because latest version of psycopg2 returns DB time zone as UTC,
## but Django 2.2 requires an int! This should be removed once we are able to upgrade to Django >= 4 ## but Django 2.2 requires an int! This should be removed once we are able to upgrade to Django >= 4

Loading…
Cancel
Save