Browse Source

Raise rate limits and split nginx zones for legitimate traffic

SAPL pages fire 12-45 parallel requests; the old 30r/m nginx zone and
35/m Django threshold blocked normal navigation. Key changes:

nginx (nginx.conf / sapl.conf / start.sh):
- Split sapl_general (30r/m) into four dedicated zones:
    sapl_general 90r/m  burst=180  (HTML pages)
    sapl_media   180r/m burst=180  (/media/ — own bucket, no longer drains general)
    sapl_api     60r/m  burst=120  (/api/ — quota layer is the real constraint)
    sapl_heavy   10r/m  burst=20   (/relatorios/ — unchanged, nodelay kept)
- /media/ and /api/ location blocks now reference their own zones

Django (settings.py):
- RATE_LIMITER_RATE: 35/m → 120/m
- RATE_LIMITER_RATE_AUTHENTICATED: 120/m → 240/m
- RATE_LIMIT_404_THRESHOLD: 10 → 20
- API_QUOTA_ANON_DAILY: 50 → 500 / weekly 350 → 3500
- API_QUOTA_AUTH_DAILY: 1000 → 5000 / weekly 7000 → 35000

Middleware (ratelimit.py):
- Authenticated users no longer receive a persistent 300s block key on
  rate breach — they get 429 for the over-limit request and the window
  resets naturally after 60s. A 5-minute lockout is wrong for a logged-in
  user who clicked too fast.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
rate-limiter-2026
Edward Ribeiro 3 weeks ago
parent
commit
d4738a20a9
  1. 13
      docker/config/nginx/nginx.conf
  2. 4
      docker/config/nginx/sapl.conf
  3. 16
      docker/startup_scripts/start.sh
  4. 13
      sapl/middleware/ratelimit.py
  5. 14
      sapl/settings.py

13
docker/config/nginx/nginx.conf

@ -51,11 +51,16 @@ http {
# ----------------------------------------------------------------
# Rate limiting zones (effective once real_ip is resolved).
# sapl_general : 30 req/min for most traffic
# sapl_heavy : 10 req/min for PDF/report endpoints
# sapl_general : 90 req/min HTML pages (burst absorbs parallel assets)
# sapl_media : 180 req/min /media/ has its own bucket; doesn't drain general
# sapl_api : 60 req/min API quota layer is the real binding constraint
# sapl_heavy : 10 req/min PDF generation; slow by design
# Burst values are env-var configurable at container start (start.sh).
# ----------------------------------------------------------------
limit_req_zone $binary_remote_addr zone=sapl_general:20m rate=30r/m;
limit_req_zone $binary_remote_addr zone=sapl_heavy:20m rate=10r/m;
limit_req_zone $binary_remote_addr zone=sapl_general:20m rate=90r/m;
limit_req_zone $binary_remote_addr zone=sapl_media:20m rate=180r/m;
limit_req_zone $binary_remote_addr zone=sapl_api:20m rate=60r/m;
limit_req_zone $binary_remote_addr zone=sapl_heavy:10m rate=10r/m;
# ----------------------------------------------------------------
# ASN-Based Blocking (datacenter / scraper ASNs).

4
docker/config/nginx/sapl.conf

@ -51,7 +51,7 @@ server {
# and content-type caching; served from disk via X-Accel-Redirect.
# ----------------------------------------------------------------
location /media/ {
limit_req zone=sapl_general burst=${NGINX_BURST_GENERAL} nodelay;
limit_req zone=sapl_media burst=${NGINX_BURST_MEDIA} nodelay;
limit_req_status 429;
proxy_set_header X-Request-ID $req_id;
@ -94,7 +94,7 @@ server {
# /api/ — rate limited, CORS maintained from original config.
# ----------------------------------------------------------------
location /api/ {
limit_req zone=sapl_general burst=${NGINX_BURST_API} nodelay;
limit_req zone=sapl_api burst=${NGINX_BURST_API} nodelay;
limit_req_status 429;
add_header 'Access-Control-Allow-Origin' '*' always;

16
docker/startup_scripts/start.sh

@ -107,12 +107,13 @@ write_env_file() {
: "${REDIS_URL:=}"
: "${CACHE_BACKEND:=file}"
: "${POD_NAMESPACE:=sapl}"
# nginx burst defaults: 2× the zone's sustained rate (30r/m and 10r/m).
# Raise these if legitimate users hit 429 before the Django threshold.
: "${NGINX_BURST_GENERAL:=60}"
: "${NGINX_BURST_API:=60}"
# nginx burst defaults — 2× each zone's sustained rate.
# general=90r/m media=180r/m api=60r/m heavy=10r/m
: "${NGINX_BURST_GENERAL:=180}"
: "${NGINX_BURST_MEDIA:=180}"
: "${NGINX_BURST_API:=120}"
: "${NGINX_BURST_HEAVY:=20}"
export NGINX_BURST_GENERAL NGINX_BURST_API NGINX_BURST_HEAVY
export NGINX_BURST_GENERAL NGINX_BURST_MEDIA NGINX_BURST_API NGINX_BURST_HEAVY
tmp="$(mktemp)"
{
@ -139,6 +140,7 @@ write_env_file() {
printf 'CACHE_BACKEND=%s\n' "$CACHE_BACKEND"
printf 'POD_NAMESPACE=%s\n' "$POD_NAMESPACE"
printf 'NGINX_BURST_GENERAL=%s\n' "$NGINX_BURST_GENERAL"
printf 'NGINX_BURST_MEDIA=%s\n' "$NGINX_BURST_MEDIA"
printf 'NGINX_BURST_API=%s\n' "$NGINX_BURST_API"
printf 'NGINX_BURST_HEAVY=%s\n' "$NGINX_BURST_HEAVY"
} > "$tmp"
@ -342,8 +344,8 @@ wait_for_redis() {
start_services() {
log "Starting gunicorn..."
gunicorn -c gunicorn.conf.py &
log "Applying nginx config (burst: general=${NGINX_BURST_GENERAL} api=${NGINX_BURST_API} heavy=${NGINX_BURST_HEAVY})..."
envsubst '${NGINX_BURST_GENERAL} ${NGINX_BURST_API} ${NGINX_BURST_HEAVY}' \
log "Applying nginx config (burst: general=${NGINX_BURST_GENERAL} media=${NGINX_BURST_MEDIA} api=${NGINX_BURST_API} heavy=${NGINX_BURST_HEAVY})..."
envsubst '${NGINX_BURST_GENERAL} ${NGINX_BURST_MEDIA} ${NGINX_BURST_API} ${NGINX_BURST_HEAVY}' \
< /etc/nginx/conf.d/sapl.conf.template \
> /etc/nginx/conf.d/sapl.conf
log "Starting nginx..."

13
sapl/middleware/ratelimit.py

@ -9,10 +9,10 @@ Decision flow (per request):
3. Authenticated user?
a. User blocked? 429
b. Suspicious hdrs? 429
c. User rate 120? SET RL_USER_BLOCKED, 429
c. User rate 240? 429 (no persistent block; window resets after 60 s)
4. Anonymous:
a. Suspicious hdrs? 429
b. IP rate 35/min? SET RL_IP_BLOCKED, 429
b. IP rate 120/min? SET RL_IP_BLOCKED, 429
c. NS/IP window hit? SET RL_IP_BLOCKED, 429
Degrades gracefully to non-atomic counting when Redis is unavailable.
@ -366,16 +366,13 @@ class RateLimitMiddleware:
if _is_suspicious_headers(request):
return {'action': 'block', 'reason': 'suspicious_headers_auth', 'ip': ip}
# Check 3c: authenticated request rate
# Check 3c: authenticated request rate — return 429 for this request only;
# no persistent block key so the window resets naturally after auth_window
# seconds. A 300s lockout is wrong for a logged-in user who clicked fast.
count = self._incr_with_ttl(
RL_USER_REQUESTS.format(ns=_NAMESPACE, uid=uid), ttl=self.auth_window
)
if count >= self.auth_threshold:
_set_block(
RL_USER_BLOCKED.format(ns=_NAMESPACE, uid=uid),
RL_INDEX_BLOCKED_USERS,
self.BLOCK_TTL,
)
return {'action': 'block', 'reason': 'auth_user_rate', 'ip': ip}
return {'action': 'pass', 'ip': ip}

14
sapl/settings.py

@ -406,8 +406,8 @@ FILE_UPLOAD_TEMP_DIR = '/var/interlegis/sapl/tmp'
# ---------------------------------------------------------------------------
# Rate limiting — RateLimitMiddleware (sapl/middleware/ratelimit.py)
# ---------------------------------------------------------------------------
RATE_LIMITER_RATE = config('RATE_LIMITER_RATE', default='35/m')
RATE_LIMITER_RATE_AUTHENTICATED = config('RATE_LIMITER_RATE_AUTHENTICATED', default='120/m')
RATE_LIMITER_RATE = config('RATE_LIMITER_RATE', default='120/m')
RATE_LIMITER_RATE_AUTHENTICATED = config('RATE_LIMITER_RATE_AUTHENTICATED', default='240/m')
RATE_LIMITER_RATE_BOT = config('RATE_LIMITER_RATE_BOT', default='5/m')
# Comma-separated IPs exempt from rate limiting (e.g. legislative-house ranges).
@ -424,7 +424,7 @@ RATE_LIMITER_UA_BLOCKLIST_REFRESH = config('RATE_LIMITER_UA_BLOCKLIST_REFRESH',
# Maximum 404 responses from one anonymous IP in one anon window before the IP
# is blocked. Catches path-probing scanners that don't use recognised extensions.
RATE_LIMIT_404_THRESHOLD = config('RATE_LIMIT_404_THRESHOLD', default=10, cast=int)
RATE_LIMIT_404_THRESHOLD = config('RATE_LIMIT_404_THRESHOLD', default=20, cast=int)
# Paths exempt from rate limiting at the Django layer.
# Regex strings matched against request.path.
@ -443,10 +443,10 @@ RATE_LIMIT_BYPASS_PATHS = [
# Anon quota is tighter than auth quota — mirrors the rate limiter relationship.
# Both must be > their respective per-minute rate limit thresholds (35 anon, 120 auth),
# otherwise the quota fires before the rate limiter ever engages.
API_QUOTA_ANON_DAILY = config('API_QUOTA_ANON_DAILY', default=50, cast=int)
API_QUOTA_ANON_WEEKLY = config('API_QUOTA_ANON_WEEKLY', default=350, cast=int)
API_QUOTA_AUTH_DAILY = config('API_QUOTA_AUTH_DAILY', default=1000, cast=int)
API_QUOTA_AUTH_WEEKLY = config('API_QUOTA_AUTH_WEEKLY', default=7000, cast=int)
API_QUOTA_ANON_DAILY = config('API_QUOTA_ANON_DAILY', default=500, cast=int)
API_QUOTA_ANON_WEEKLY = config('API_QUOTA_ANON_WEEKLY', default=3500, cast=int)
API_QUOTA_AUTH_DAILY = config('API_QUOTA_AUTH_DAILY', default=5000, cast=int)
API_QUOTA_AUTH_WEEKLY = config('API_QUOTA_AUTH_WEEKLY', default=35000, cast=int)
# Media file serving — serve_media (sapl/base/media.py) via X-Accel-Redirect.
# TTL for both URL-path and storage-path access counters (DB 1).

Loading…
Cancel
Save