From 568c838d08c57e00bd4895b376b6a9ec55667d12 Mon Sep 17 00:00:00 2001 From: Edward Oliveira Date: Tue, 28 Apr 2026 21:46:29 -0300 Subject: [PATCH] Rate limiter: move scanner probes to nginx, fix NAT IP block for authenticated users, add 404-scan counter; remove dead painel sub-views MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - nginx sapl.conf: return 444 for scanner extension probes (.php, .asp, .jsp, .env, etc.) before requests reach Gunicorn — zero Python cost - ratelimit.py: remove check 2b (scanner_probe) — dead code now that nginx handles it; remove unused `import os` - ratelimit.py: authenticated users skip the rl:ip:blocked check (check 2) to prevent anonymous NAT traffic from blocking legislative house staff - ratelimit.py: add _handle_not_found — post-response 404 counter per anonymous IP; blocks after RATE_LIMIT_404_THRESHOLD (default 10) hits in the anon window, catching path probes without known extensions - settings.py: replace RATE_LIMIT_SCANNER_EXTENSIONS with RATE_LIMIT_404_THRESHOLD; add RL_IP_404S Redis key constant - painel: remove dead mensagem/parlamentares/votacao templates, views, and URL entries — unreachable from any menu or template - RATE-LIMITER-PLAN.md: update decision flow, mermaid diagram, enforcement graduation table, and key schema to reflect all changes Co-Authored-By: Claude Sonnet 4.6 --- docker/config/nginx/sapl.conf | 8 ++ plan/RATE-LIMITER-PLAN.md | 51 ++++++--- sapl/middleware/ratelimit.py | 47 ++++++--- sapl/painel/urls.py | 7 +- sapl/painel/views.py | 14 --- sapl/settings.py | 13 +-- sapl/templates/painel/mensagem.html | 120 --------------------- sapl/templates/painel/parlamentares.html | 128 ----------------------- sapl/templates/painel/votacao.html | 123 ---------------------- 9 files changed, 83 insertions(+), 428 deletions(-) delete mode 100644 sapl/templates/painel/mensagem.html delete mode 100644 sapl/templates/painel/parlamentares.html delete mode 100644 sapl/templates/painel/votacao.html diff --git a/docker/config/nginx/sapl.conf b/docker/config/nginx/sapl.conf index ed823ed20..f7e0603e2 100644 --- a/docker/config/nginx/sapl.conf +++ b/docker/config/nginx/sapl.conf @@ -136,6 +136,14 @@ server { proxy_pass http://sapl_server; } + # ---------------------------------------------------------------- + # Scanner extension probes (.php, .asp, etc.) — SAPL never serves + # these. Drop the connection before reaching Gunicorn. + # ---------------------------------------------------------------- + location ~* \.(php|asp|aspx|jsp|cgi|env|htaccess|htpasswd|bak|sql|sh|bash|py|rb|pl)$ { + return 444; + } + # ---------------------------------------------------------------- # General traffic — moderate rate limit. # ---------------------------------------------------------------- diff --git a/plan/RATE-LIMITER-PLAN.md b/plan/RATE-LIMITER-PLAN.md index d60022f46..ddaac7229 100644 --- a/plan/RATE-LIMITER-PLAN.md +++ b/plan/RATE-LIMITER-PLAN.md @@ -176,6 +176,27 @@ sapl-redis-6d9f8b7c4d-xk2lm 1/1 Running 0 30s ## Verify the rate limiter +### Canary tenants + +Current canary namespaces receiving the `rate-limiter-2026` image: + +``` +joaopessoa-pb patobranco-pr al-am al-pi al-ro divinopolis-mg +``` + +Verify image digest, `imagePullPolicy: Always`, and `REDIS_URL` for all six at once: + +```bash +# From monitoring_metrics-2025-2026/logs/cluster-prod/ +bash check-canary-tenants.sh +``` + +Expected: all checks green and the same `sha256` digest across all pods. + +--- + +### Functional test + `scripts/test_ratelimiter.py` fires repeated GET requests at a SAPL URL and reports when the first 429 is returned. @@ -600,8 +621,9 @@ Decision flow inside `RateLimitMiddleware._evaluate()`: 1. IP in whitelist? → pass (no further checks) 1a. UA matches BOT_UA_FRAGMENTS list? → 429 reason=known_ua 1b. UA token hash in rl:bot:ua:blocked SET? → 429 reason=redis_ua -2. IP in rl:ip:{ip}:blocked? → 429 reason=ip_blocked -2b. Path extension in RATE_LIMIT_SCANNER_EXTENSIONS? → SET blocked, 429 reason=scanner_probe +2. Anonymous AND IP in rl:ip:{ip}:blocked? → 429 reason=ip_blocked + (authenticated users skip — they have independent per-user limiting at 3c) + (scanner extension probes are rejected at nginx before reaching Django — see sapl.conf) 3. Authenticated user? 3a. User in rl:{ns}:user:{uid}:blocked? → 429 reason=user_blocked 3b. Suspicious headers (no Accept/AL)? → 429 reason=suspicious_headers_auth @@ -627,18 +649,17 @@ flowchart TD C1B -- "yes — token hash in rl:bot:ua:blocked" --> R_RUA([429\nredis_ua]) C1B -- no --> C2 - C2{"IP blocked?"} - C2 -- "yes — rl:ip:IP:blocked exists" --> R_IPB([429\nip_blocked]) - C2 -- no --> C2B + C2{"Authenticated?"} + C2 -- yes --> C2B + C2 -- no --> C2_ANON - C2B{"Scanner extension?\n.php .asp .aspx …"} - C2B -- yes --> SIPB["SET rl:ip:IP:blocked TTL 300 s"] - SIPB --> R_SCN([429\nscanner_probe]) - C2B -- no --> C3 + C2_ANON{"IP blocked?\nrl:ip:IP:blocked"} + C2_ANON -- yes --> R_IPB([429\nip_blocked]) + C2_ANON -- no --> C3 C3{"Authenticated?"} C3 -- yes --> C3A - C3 -- no --> C4A + C3 -- "no (anonymous)" --> C4A subgraph AUTH ["Authenticated"] C3A{"User blocked?"} @@ -674,13 +695,14 @@ Roll out to canary pods first; promote check-by-check in order of false-positive | Order | Check | Reason | Risk | Condition to promote | |-------|-------|--------|------|---------------------| +| nginx | scanner extensions | `return 444` in `sapl.conf` for `.php`/`.asp`/etc. | Zero | Gunicorn never sees these requests | | 1st | `known_ua` | Substring in hardcoded `BOT_UA_FRAGMENTS` list | Zero | UA strings are deterministic | | 2nd | `redis_ua` | Token hash in `rl:bot:ua:blocked` SET | Zero | Keys only set manually by operators | | 3rd | `ip_blocked` | Marker set by prior proven-bad requests | Zero | Fast-path only, no new blocks created | -| 4th | `scanner_probe` | Path ext in `RATE_LIMIT_SCANNER_EXTENSIONS` | Zero | Django never legitimately serves `.php`/`.asp`/etc. | -| 5th | `ip_rate` | Rolling IP counter ≥ 35/min | Low | Threshold calibrated from canary logs | -| 6th | `suspicious_headers` | No Accept-Language **and** no Accept | Medium | Confirmed no legitimate clients omit both headers | -| 7th | `ua_rotation` (ns/window) | NS/IP clock-aligned bucket ≥ 35 | Medium | NAT IP whitelist in place (see Open Questions) | +| 4th | `ip_rate` | Rolling IP counter ≥ 35/min | Low | Threshold calibrated from canary logs | +| 5th | `suspicious_headers` | No Accept-Language **and** no Accept | Medium | Confirmed no legitimate clients omit both headers | +| 6th | `ua_rotation` (ns/window) | NS/IP clock-aligned bucket ≥ 35 | Medium | NAT IP whitelist in place (see Open Questions) | +| 7th | `404_scan` | Anonymous IP accumulates ≥ 10 404s/min | Low | Catches path probes without known extensions | ### Decorator migration @@ -836,6 +858,7 @@ Redis PDF caching would solve "high request volume reaching the file layer" — | 0 | Static file cache (logos) | `static:{ns}:{sha256}` | 3 – 24 h | — | *Future* (requires OpenResty/Lua) | | 0 | File content cache (≤ 360 KB) | `file:{ns}:{sha256}` | 1 h | — | *Future* | | 1 | IP rate-limit counter | `rl:ip:{ip}:reqs` | 60 s | 35 (`RATE_LIMITER_RATE`) | `RL_IP_REQUESTS` | +| 1 | IP 404 counter | `rl:ip:{ip}:404s` | 60 s | 10 (`RATE_LIMIT_404_THRESHOLD`) | `RL_IP_404S` | | 1 | IP blocked marker | `rl:ip:{ip}:blocked` | 300 s | — | `RL_IP_BLOCKED` | | 1 | User rate-limit counter | `rl:{ns}:user:{uid}:reqs` | 60 s | 120 (`RATE_LIMITER_RATE_AUTHENTICATED`) | `RL_USER_REQUESTS` | | 1 | User blocked marker | `rl:{ns}:user:{uid}:blocked` | 300 s | — | `RL_USER_BLOCKED` | diff --git a/sapl/middleware/ratelimit.py b/sapl/middleware/ratelimit.py index e5f8ebb1c..65efaace6 100644 --- a/sapl/middleware/ratelimit.py +++ b/sapl/middleware/ratelimit.py @@ -4,8 +4,7 @@ RateLimitMiddleware — cross-pod rate limiting backed by shared Redis. Decision flow (per request): 1. Known bot UA? → 429 (Python list — substring match) 1b. Redis UA deny list? → 429 (runtime SET — token hash match, refreshed every 60 s) - 2. IP in blocked set? → 429 - 2b. Path extension in scanner set? → SET RL_IP_BLOCKED, 429 + 2. Anonymous AND IP in blocked set? → 429 (authenticated users skip — have per-user limit at 3c) 3. Authenticated user? a. User blocked? → 429 b. Suspicious hdrs? → 429 @@ -28,7 +27,6 @@ no per-request lookup is needed or correct. import hashlib import logging -import os import re import time from datetime import date @@ -53,6 +51,7 @@ _NAMESPACE = settings.POD_NAMESPACE RL_IP_REQUESTS = 'rl:ip:{ip}:reqs' RL_IP_BLOCKED = 'rl:ip:{ip}:blocked' +RL_IP_404S = 'rl:ip:{ip}:404s' RL_USER_REQUESTS = 'rl:{ns}:user:{uid}:reqs' RL_USER_BLOCKED = 'rl:{ns}:user:{uid}:blocked' RL_NS_WINDOW = 'rl:{ns}:ip:{ip}:w:{bucket}' @@ -212,6 +211,7 @@ class RateLimitMiddleware: self.auth_threshold, self.auth_window = _parse_rate(settings.RATE_LIMITER_RATE_AUTHENTICATED) self.whitelist = set(settings.RATE_LIMIT_WHITELIST_IPS) self._rl_cache = caches['ratelimit'] + self.not_found_threshold = settings.RATE_LIMIT_404_THRESHOLD self._bypass_paths = [ re.compile(p) for p in getattr(settings, 'RATE_LIMIT_BYPASS_PATHS', []) ] @@ -249,7 +249,10 @@ class RateLimitMiddleware: getattr(getattr(request, 'user', None), 'pk', 'anon'), _NAMESPACE, ) - return self.get_response(request) + response = self.get_response(request) + if response.status_code == 404: + self._handle_not_found(request, decision['ip']) + return response # ------------------------------------------------------------------ # Evaluation @@ -271,17 +274,12 @@ class RateLimitMiddleware: if self._is_redis_blocked_ua(ua): return {'action': 'block', 'reason': 'redis_ua', 'ip': ip} - # Check 2: IP already blocked - if self._rl_cache.get(RL_IP_BLOCKED.format(ip=ip)): + # Check 2: IP already blocked — authenticated users are exempt since they + # have independent per-user limiting at check 3c; IP blocks target anonymous traffic. + user = getattr(request, 'user', None) + if not (user and user.is_authenticated) and self._rl_cache.get(RL_IP_BLOCKED.format(ip=ip)): return {'action': 'block', 'reason': 'ip_blocked', 'ip': ip} - # Check 2b: scanner probe (e.g. .php, .asp) — Django never serves these. - ext = os.path.splitext(request.path)[1].lower() - if ext in settings.RATE_LIMIT_SCANNER_EXTENSIONS: - self._rl_cache.set(RL_IP_BLOCKED.format(ip=ip), 1, timeout=self.BLOCK_TTL) - return {'action': 'block', 'reason': 'scanner_probe', 'ip': ip} - - user = getattr(request, 'user', None) if user is not None and user.is_authenticated: return self._evaluate_authenticated(request, ip) return self._evaluate_anonymous(request, ip) @@ -336,6 +334,29 @@ class RateLimitMiddleware: # Helpers — delegate to module-level so media.py can reuse them # ------------------------------------------------------------------ + def _handle_not_found(self, request, ip): + """ + Block IPs that accumulate too many 404s in one window — catches scanner + probes that use paths without recognised extensions (e.g. /wp-login, + /.git/HEAD, /xmlrpc) and bypass check 2b entirely. + Only anonymous requests are counted; authenticated users have their own + per-user rate limit and may legitimately hit stale bookmarks. + """ + user = getattr(request, 'user', None) + if user and user.is_authenticated: + return + if ip in self.whitelist: + return + count = self._incr_with_ttl(RL_IP_404S.format(ip=ip), ttl=self.anon_window) + if count >= self.not_found_threshold: + self._rl_cache.set(RL_IP_BLOCKED.format(ip=ip), 1, timeout=self.BLOCK_TTL) + logger.warning( + 'ratelimit_block layer=django reason=404_scan ip=%s path=%s namespace=%s', + ip, request.path, _NAMESPACE, + extra={'ua': request.META.get('HTTP_USER_AGENT', '')}, + ) + self._inc_block_metric('404_scan') + def _incr_with_ttl(self, key, ttl): return _incr_with_ttl(key, ttl) diff --git a/sapl/painel/urls.py b/sapl/painel/urls.py index 0795d0a35..b645c7bbe 100644 --- a/sapl/painel/urls.py +++ b/sapl/painel/urls.py @@ -1,8 +1,7 @@ from django.conf.urls import url from .apps import AppConfig -from .views import (cronometro_painel, get_dados_painel, painel_mensagem_view, - painel_parlamentar_view, painel_view, painel_votacao_view, +from .views import (cronometro_painel, get_dados_painel, painel_view, switch_painel, verifica_painel, votante_view) app_name = AppConfig.name @@ -11,12 +10,8 @@ urlpatterns = [ url(r'^painel-principal/(?P\d+)$', painel_view, name="painel_principal"), url(r'^painel/(?P\d+)/dados$', get_dados_painel, name='dados_painel'), - url(r'^painel/mensagem$', painel_mensagem_view, name="painel_mensagem"), - url(r'^painel/parlamentar$', painel_parlamentar_view, - name='painel_parlamentar'), url(r'^painel/switch-painel$', switch_painel, name="switch_painel"), - url(r'^painel/votacao$', painel_votacao_view, name='painel_votacao'), url(r'^painel/verifica-painel$', verifica_painel, name="verifica_painel"), url(r'^painel/cronometro$', cronometro_painel, name='cronometro_painel'), diff --git a/sapl/painel/views.py b/sapl/painel/views.py index 83145173a..d1b6219f9 100644 --- a/sapl/painel/views.py +++ b/sapl/painel/views.py @@ -308,20 +308,6 @@ def verifica_painel(request): return resposta -@user_passes_test(check_permission) -def painel_mensagem_view(request): - return render(request, 'painel/mensagem.html') - - -@user_passes_test(check_permission) -def painel_parlamentar_view(request): - return render(request, 'painel/parlamentares.html') - - -@user_passes_test(check_permission) -def painel_votacao_view(request): - return render(request, 'painel/votacao.html') - @user_passes_test(check_permission) def cronometro_painel(request): diff --git a/sapl/settings.py b/sapl/settings.py index 322458812..1b2f53875 100644 --- a/sapl/settings.py +++ b/sapl/settings.py @@ -421,16 +421,9 @@ RATE_LIMIT_WHITELIST_IPS = config( # Lower values pick up new blocked UAs faster; higher values reduce Redis round-trips. RATE_LIMITER_UA_BLOCKLIST_REFRESH = config('RATE_LIMITER_UA_BLOCKLIST_REFRESH', default=60, cast=int) -# File extensions that indicate a scanner probe (e.g. PHP/ASP app fingerprinting). -# Requests for these extensions are blocked immediately and the IP is written to -# rl:ip:{ip}:blocked for BLOCK_TTL seconds — Django never legitimately serves them. -RATE_LIMIT_SCANNER_EXTENSIONS = frozenset( - config( - 'RATE_LIMIT_SCANNER_EXTENSIONS', - default='.php .asp .aspx .jsp .cgi .env', - cast=lambda v: [x.strip() for x in v.split() if x.strip()], - ) -) +# Maximum 404 responses from one anonymous IP in one anon window before the IP +# is blocked. Catches path-probing scanners that don't use recognised extensions. +RATE_LIMIT_404_THRESHOLD = config('RATE_LIMIT_404_THRESHOLD', default=10, cast=int) # Paths exempt from rate limiting at the Django layer. # Regex strings matched against request.path. diff --git a/sapl/templates/painel/mensagem.html b/sapl/templates/painel/mensagem.html deleted file mode 100644 index da8e355d4..000000000 --- a/sapl/templates/painel/mensagem.html +++ /dev/null @@ -1,120 +0,0 @@ -{% load i18n %} -{% load common_tags %} -{% load render_bundle from webpack_loader %} -{% load webpack_static from webpack_loader %} - - - - - - - - - - - {% block head_title %}{% trans 'SAPL - Sistema de Apoio ao Processo Legislativo' %}{% endblock %} - - - {% render_chunk_vendors 'css' %} - {% render_bundle 'global' 'css' %} - {% render_bundle 'painel' 'css' %} - - - - - - -

{{ context.title }}

- -

Ajax refresh counter:

-

-

-

-

-




-
- - - - - {% render_chunk_vendors 'js' %} - {% render_bundle 'global' 'js' %} - {% render_bundle 'painel' 'js' %} - - - diff --git a/sapl/templates/painel/parlamentares.html b/sapl/templates/painel/parlamentares.html deleted file mode 100644 index 3dbe6e740..000000000 --- a/sapl/templates/painel/parlamentares.html +++ /dev/null @@ -1,128 +0,0 @@ -{% load i18n %} -{% load common_tags %} - -{% load render_bundle from webpack_loader %} -{% load webpack_static from webpack_loader %} - - - - - - - - - - - {% block head_title %}{% trans 'SAPL - Sistema de Apoio ao Processo Legislativo' %}{% endblock %} - - - {% render_chunk_vendors 'css' %} - {% render_bundle 'global' 'css' %} - {% render_bundle 'painel' 'css' %} - - - - - - - -

{{ context.title }}

- -

-

-

-

-

- - - - -
-
    -
-
- - - - {% render_chunk_vendors 'js' %} - {% render_bundle 'global' 'js' %} - {% render_bundle 'painel' 'js' %} - - - diff --git a/sapl/templates/painel/votacao.html b/sapl/templates/painel/votacao.html deleted file mode 100644 index e73551160..000000000 --- a/sapl/templates/painel/votacao.html +++ /dev/null @@ -1,123 +0,0 @@ -{% load i18n %} -{% load render_bundle from webpack_loader %} -{% load webpack_static from webpack_loader %} - - - - - - - - - - - {% block head_title %}{% trans 'SAPL - Sistema de Apoio ao Processo Legislativo' %}{% endblock %} - - - {% render_chunk_vendors 'css' %} - {% render_bundle 'global' 'css' %} - {% render_bundle 'painel' 'css' %} - - - - -

{{ context.title }}

- -

-

-

-

-

- - - - -
-
    -
-
-
- - - - {% render_chunk_vendors 'js' %} - {% render_bundle 'global' 'js' %} - {% render_bundle 'painel' 'js' %} - - -