From f520c11be3765ea560f5fdca0aa0ea8a30636b64 Mon Sep 17 00:00:00 2001 From: Edward Oliveira Date: Mon, 27 Apr 2026 17:12:23 -0300 Subject: [PATCH] Add painel/dados bypass, Django block metrics, and layer tracking in logs - nginx: exempt /painel//dados from rate limiting (polling endpoint, will become WebSocket); dedicated location block with no limit_req - ratelimit.py: bypass RATE_LIMIT_BYPASS_PATHS paths before _evaluate; add layer=django to block log; increment daily Redis metrics counter rl:metrics:{ns}:{date}:blocked:{reason} (TTL 8 days) on every block - ratelimit.py: add quiltbot and AwarioBot to BOT_UA_FRAGMENTS - ratelimit.py: fix _is_suspicious_headers to require missing UA before blocking - settings: add RATE_LIMIT_BYPASS_PATHS with /painel//dados pattern - plan: extend UA blocklist SADD seed command with missing bot tokens Co-Authored-By: Claude Sonnet 4.6 --- docker/config/nginx/sapl.conf | 14 ++++++++++++++ plan/RATE-LIMITER-PLAN.md | 6 ++++++ sapl/middleware/ratelimit.py | 30 +++++++++++++++++++++++++++--- sapl/settings.py | 8 ++++++++ 4 files changed, 55 insertions(+), 3 deletions(-) diff --git a/docker/config/nginx/sapl.conf b/docker/config/nginx/sapl.conf index 89f3b3706..ed823ed20 100644 --- a/docker/config/nginx/sapl.conf +++ b/docker/config/nginx/sapl.conf @@ -122,6 +122,20 @@ server { proxy_pass http://sapl_server; } + # ---------------------------------------------------------------- + # /painel//dados — high-frequency polling endpoint (will become + # WebSocket). No rate limiting at either layer (Django middleware + # also bypasses via RATE_LIMIT_BYPASS_PATHS). + # ---------------------------------------------------------------- + location ~ ^/painel/\d+/dados$ { + proxy_set_header X-Request-ID $req_id; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Host $http_host; + proxy_redirect off; + proxy_pass http://sapl_server; + } + # ---------------------------------------------------------------- # General traffic — moderate rate limit. # ---------------------------------------------------------------- diff --git a/plan/RATE-LIMITER-PLAN.md b/plan/RATE-LIMITER-PLAN.md index e22444fa0..d60022f46 100644 --- a/plan/RATE-LIMITER-PLAN.md +++ b/plan/RATE-LIMITER-PLAN.md @@ -374,6 +374,12 @@ rancher kubectl exec -n sapl-redis deploy/sapl-redis -- redis-cli -n 1 \ "$(echo -n 'Bytespider' | sha256sum | cut -d' ' -f1)" \ "$(echo -n 'AhrefsBot' | sha256sum | cut -d' ' -f1)" \ "$(echo -n 'meta-externalagent' | sha256sum | cut -d' ' -f1)" + "$(echo -n 'OAI-SearchBot' | sha256sum | cut -d' ' -f1)" + "$(echo -n 'quiltbot' | sha256sum | cut -d' ' -f1)" + "$(echo -n 'Googlebot' | sha256sum | cut -d' ' -f1)" + "$(echo -n 'Applebot' | sha256sum | cut -d' ' -f1)" + "$(echo -n 'meta-webindexer' | sha256sum | cut -d' ' -f1)" + "$(echo -n 'AwarioBot' | sha256sum | cut -d' ' -f1)" # Add a new offender at runtime (picked up within RATE_LIMITER_UA_BLOCKLIST_REFRESH seconds) rancher kubectl exec -n sapl-redis deploy/sapl-redis -- redis-cli -n 1 \ diff --git a/sapl/middleware/ratelimit.py b/sapl/middleware/ratelimit.py index 03647bced..e5f8ebb1c 100644 --- a/sapl/middleware/ratelimit.py +++ b/sapl/middleware/ratelimit.py @@ -31,6 +31,7 @@ import logging import os import re import time +from datetime import date from sapl import settings from django.core.cache import caches @@ -57,6 +58,7 @@ RL_USER_BLOCKED = 'rl:{ns}:user:{uid}:blocked' RL_NS_WINDOW = 'rl:{ns}:ip:{ip}:w:{bucket}' RL_PATH_REQUESTS = 'rl:{ns}:path:{sha256}:reqs' RL_UA_BLOCKLIST = 'rl:bot:ua:blocked' # permanent SET — runtime UA deny list +RL_METRICS_BLOCKED = 'rl:metrics:{ns}:{date}:blocked:{reason}' # daily counter per block reason # --------------------------------------------------------------------------- # Bot UA fragments @@ -73,6 +75,8 @@ BOT_UA_FRAGMENTS = [ 'bingbot', 'SERankingBacklinksBot', 'Chrome/98.0.4758', # known scraper impersonating an old Chrome + 'quiltbot', + 'AwarioBot', ] _INCR_LUA = """ @@ -163,7 +167,9 @@ def _is_suspicious_headers(request): not request.META.get('HTTP_ACCEPT_LANGUAGE'), not request.META.get('HTTP_ACCEPT'), ]) - return missing >= 2 + # Também considera User-Agent antes de bloquear + has_ua = bool(request.META.get('HTTP_USER_AGENT')) + return missing >= 2 and not has_ua def _parse_rate(rate_str): @@ -206,25 +212,33 @@ class RateLimitMiddleware: self.auth_threshold, self.auth_window = _parse_rate(settings.RATE_LIMITER_RATE_AUTHENTICATED) self.whitelist = set(settings.RATE_LIMIT_WHITELIST_IPS) self._rl_cache = caches['ratelimit'] + self._bypass_paths = [ + re.compile(p) for p in getattr(settings, 'RATE_LIMIT_BYPASS_PATHS', []) + ] logger.info( - '[RATELIMIT] anon=%s auth=%s bot=%s whitelist=%s', + '[RATELIMIT] anon=%s auth=%s bot=%s whitelist=%s bypass_paths=%s', settings.RATE_LIMITER_RATE, settings.RATE_LIMITER_RATE_AUTHENTICATED, settings.RATE_LIMITER_RATE_BOT, list(self.whitelist) or '(none)', + [p.pattern for p in self._bypass_paths] or '(none)', ) def __call__(self, request): + if any(p.match(request.path) for p in self._bypass_paths): + return self.get_response(request) + decision = self._evaluate(request) if decision['action'] == 'block': logger.warning( - 'ratelimit_block reason=%s ip=%s path=%s namespace=%s', + 'ratelimit_block layer=django reason=%s ip=%s path=%s namespace=%s', decision['reason'], decision['ip'], request.path, _NAMESPACE, extra={'ua': request.META.get('HTTP_USER_AGENT', '')}, ) + self._inc_block_metric(decision['reason']) response = HttpResponse(status=429) response['Retry-After'] = self.BLOCK_TTL return response @@ -325,6 +339,16 @@ class RateLimitMiddleware: def _incr_with_ttl(self, key, ttl): return _incr_with_ttl(key, ttl) + def _inc_block_metric(self, reason): + """Increment daily per-reason block counter in Redis DB 1 (TTL 8 days).""" + key = RL_METRICS_BLOCKED.format( + ns=_NAMESPACE, date=date.today().isoformat(), reason=reason + ) + try: + _incr_with_ttl(key, ttl=8 * 86400) + except Exception: + pass + def _refresh_ua_blocklist(self): """ Fetch the full UA deny list from Redis DB 1 (SMEMBERS). diff --git a/sapl/settings.py b/sapl/settings.py index 0e5135e90..322458812 100644 --- a/sapl/settings.py +++ b/sapl/settings.py @@ -432,6 +432,14 @@ RATE_LIMIT_SCANNER_EXTENSIONS = frozenset( ) ) +# Paths exempt from rate limiting at the Django layer. +# Regex strings matched against request.path. +# /painel//dados is a high-frequency polling endpoint (will become WebSocket); +# it is also exempt at the nginx layer (location block with no limit_req). +RATE_LIMIT_BYPASS_PATHS = [ + r'^/painel/\d+/dados$', +] + # Media file serving — serve_media (sapl/base/media.py) via X-Accel-Redirect. # TTL for both URL-path and storage-path access counters (DB 1). MEDIA_PATH_COUNTER_TTL = config('MEDIA_PATH_COUNTER_TTL', default=60, cast=int)