Browse Source

Add painel/dados bypass, Django block metrics, and layer tracking in logs

- nginx: exempt /painel/<pk>/dados from rate limiting (polling endpoint,
  will become WebSocket); dedicated location block with no limit_req
- ratelimit.py: bypass RATE_LIMIT_BYPASS_PATHS paths before _evaluate;
  add layer=django to block log; increment daily Redis metrics counter
  rl:metrics:{ns}:{date}:blocked:{reason} (TTL 8 days) on every block
- ratelimit.py: add quiltbot and AwarioBot to BOT_UA_FRAGMENTS
- ratelimit.py: fix _is_suspicious_headers to require missing UA before blocking
- settings: add RATE_LIMIT_BYPASS_PATHS with /painel/<pk>/dados pattern
- plan: extend UA blocklist SADD seed command with missing bot tokens

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
rate-limiter-2026
Edward Ribeiro 1 week ago
parent
commit
f520c11be3
  1. 14
      docker/config/nginx/sapl.conf
  2. 6
      plan/RATE-LIMITER-PLAN.md
  3. 30
      sapl/middleware/ratelimit.py
  4. 8
      sapl/settings.py

14
docker/config/nginx/sapl.conf

@ -122,6 +122,20 @@ server {
proxy_pass http://sapl_server; proxy_pass http://sapl_server;
} }
# ----------------------------------------------------------------
# /painel/<pk>/dados — high-frequency polling endpoint (will become
# WebSocket). No rate limiting at either layer (Django middleware
# also bypasses via RATE_LIMIT_BYPASS_PATHS).
# ----------------------------------------------------------------
location ~ ^/painel/\d+/dados$ {
proxy_set_header X-Request-ID $req_id;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Host $http_host;
proxy_redirect off;
proxy_pass http://sapl_server;
}
# ---------------------------------------------------------------- # ----------------------------------------------------------------
# General traffic — moderate rate limit. # General traffic — moderate rate limit.
# ---------------------------------------------------------------- # ----------------------------------------------------------------

6
plan/RATE-LIMITER-PLAN.md

@ -374,6 +374,12 @@ rancher kubectl exec -n sapl-redis deploy/sapl-redis -- redis-cli -n 1 \
"$(echo -n 'Bytespider' | sha256sum | cut -d' ' -f1)" \ "$(echo -n 'Bytespider' | sha256sum | cut -d' ' -f1)" \
"$(echo -n 'AhrefsBot' | sha256sum | cut -d' ' -f1)" \ "$(echo -n 'AhrefsBot' | sha256sum | cut -d' ' -f1)" \
"$(echo -n 'meta-externalagent' | sha256sum | cut -d' ' -f1)" "$(echo -n 'meta-externalagent' | sha256sum | cut -d' ' -f1)"
"$(echo -n 'OAI-SearchBot' | sha256sum | cut -d' ' -f1)"
"$(echo -n 'quiltbot' | sha256sum | cut -d' ' -f1)"
"$(echo -n 'Googlebot' | sha256sum | cut -d' ' -f1)"
"$(echo -n 'Applebot' | sha256sum | cut -d' ' -f1)"
"$(echo -n 'meta-webindexer' | sha256sum | cut -d' ' -f1)"
"$(echo -n 'AwarioBot' | sha256sum | cut -d' ' -f1)"
# Add a new offender at runtime (picked up within RATE_LIMITER_UA_BLOCKLIST_REFRESH seconds) # Add a new offender at runtime (picked up within RATE_LIMITER_UA_BLOCKLIST_REFRESH seconds)
rancher kubectl exec -n sapl-redis deploy/sapl-redis -- redis-cli -n 1 \ rancher kubectl exec -n sapl-redis deploy/sapl-redis -- redis-cli -n 1 \

30
sapl/middleware/ratelimit.py

@ -31,6 +31,7 @@ import logging
import os import os
import re import re
import time import time
from datetime import date
from sapl import settings from sapl import settings
from django.core.cache import caches from django.core.cache import caches
@ -57,6 +58,7 @@ RL_USER_BLOCKED = 'rl:{ns}:user:{uid}:blocked'
RL_NS_WINDOW = 'rl:{ns}:ip:{ip}:w:{bucket}' RL_NS_WINDOW = 'rl:{ns}:ip:{ip}:w:{bucket}'
RL_PATH_REQUESTS = 'rl:{ns}:path:{sha256}:reqs' RL_PATH_REQUESTS = 'rl:{ns}:path:{sha256}:reqs'
RL_UA_BLOCKLIST = 'rl:bot:ua:blocked' # permanent SET — runtime UA deny list RL_UA_BLOCKLIST = 'rl:bot:ua:blocked' # permanent SET — runtime UA deny list
RL_METRICS_BLOCKED = 'rl:metrics:{ns}:{date}:blocked:{reason}' # daily counter per block reason
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Bot UA fragments # Bot UA fragments
@ -73,6 +75,8 @@ BOT_UA_FRAGMENTS = [
'bingbot', 'bingbot',
'SERankingBacklinksBot', 'SERankingBacklinksBot',
'Chrome/98.0.4758', # known scraper impersonating an old Chrome 'Chrome/98.0.4758', # known scraper impersonating an old Chrome
'quiltbot',
'AwarioBot',
] ]
_INCR_LUA = """ _INCR_LUA = """
@ -163,7 +167,9 @@ def _is_suspicious_headers(request):
not request.META.get('HTTP_ACCEPT_LANGUAGE'), not request.META.get('HTTP_ACCEPT_LANGUAGE'),
not request.META.get('HTTP_ACCEPT'), not request.META.get('HTTP_ACCEPT'),
]) ])
return missing >= 2 # Também considera User-Agent antes de bloquear
has_ua = bool(request.META.get('HTTP_USER_AGENT'))
return missing >= 2 and not has_ua
def _parse_rate(rate_str): def _parse_rate(rate_str):
@ -206,25 +212,33 @@ class RateLimitMiddleware:
self.auth_threshold, self.auth_window = _parse_rate(settings.RATE_LIMITER_RATE_AUTHENTICATED) self.auth_threshold, self.auth_window = _parse_rate(settings.RATE_LIMITER_RATE_AUTHENTICATED)
self.whitelist = set(settings.RATE_LIMIT_WHITELIST_IPS) self.whitelist = set(settings.RATE_LIMIT_WHITELIST_IPS)
self._rl_cache = caches['ratelimit'] self._rl_cache = caches['ratelimit']
self._bypass_paths = [
re.compile(p) for p in getattr(settings, 'RATE_LIMIT_BYPASS_PATHS', [])
]
logger.info( logger.info(
'[RATELIMIT] anon=%s auth=%s bot=%s whitelist=%s', '[RATELIMIT] anon=%s auth=%s bot=%s whitelist=%s bypass_paths=%s',
settings.RATE_LIMITER_RATE, settings.RATE_LIMITER_RATE,
settings.RATE_LIMITER_RATE_AUTHENTICATED, settings.RATE_LIMITER_RATE_AUTHENTICATED,
settings.RATE_LIMITER_RATE_BOT, settings.RATE_LIMITER_RATE_BOT,
list(self.whitelist) or '(none)', list(self.whitelist) or '(none)',
[p.pattern for p in self._bypass_paths] or '(none)',
) )
def __call__(self, request): def __call__(self, request):
if any(p.match(request.path) for p in self._bypass_paths):
return self.get_response(request)
decision = self._evaluate(request) decision = self._evaluate(request)
if decision['action'] == 'block': if decision['action'] == 'block':
logger.warning( logger.warning(
'ratelimit_block reason=%s ip=%s path=%s namespace=%s', 'ratelimit_block layer=django reason=%s ip=%s path=%s namespace=%s',
decision['reason'], decision['reason'],
decision['ip'], decision['ip'],
request.path, request.path,
_NAMESPACE, _NAMESPACE,
extra={'ua': request.META.get('HTTP_USER_AGENT', '')}, extra={'ua': request.META.get('HTTP_USER_AGENT', '')},
) )
self._inc_block_metric(decision['reason'])
response = HttpResponse(status=429) response = HttpResponse(status=429)
response['Retry-After'] = self.BLOCK_TTL response['Retry-After'] = self.BLOCK_TTL
return response return response
@ -325,6 +339,16 @@ class RateLimitMiddleware:
def _incr_with_ttl(self, key, ttl): def _incr_with_ttl(self, key, ttl):
return _incr_with_ttl(key, ttl) return _incr_with_ttl(key, ttl)
def _inc_block_metric(self, reason):
"""Increment daily per-reason block counter in Redis DB 1 (TTL 8 days)."""
key = RL_METRICS_BLOCKED.format(
ns=_NAMESPACE, date=date.today().isoformat(), reason=reason
)
try:
_incr_with_ttl(key, ttl=8 * 86400)
except Exception:
pass
def _refresh_ua_blocklist(self): def _refresh_ua_blocklist(self):
""" """
Fetch the full UA deny list from Redis DB 1 (SMEMBERS). Fetch the full UA deny list from Redis DB 1 (SMEMBERS).

8
sapl/settings.py

@ -432,6 +432,14 @@ RATE_LIMIT_SCANNER_EXTENSIONS = frozenset(
) )
) )
# Paths exempt from rate limiting at the Django layer.
# Regex strings matched against request.path.
# /painel/<pk>/dados is a high-frequency polling endpoint (will become WebSocket);
# it is also exempt at the nginx layer (location block with no limit_req).
RATE_LIMIT_BYPASS_PATHS = [
r'^/painel/\d+/dados$',
]
# Media file serving — serve_media (sapl/base/media.py) via X-Accel-Redirect. # Media file serving — serve_media (sapl/base/media.py) via X-Accel-Redirect.
# TTL for both URL-path and storage-path access counters (DB 1). # TTL for both URL-path and storage-path access counters (DB 1).
MEDIA_PATH_COUNTER_TTL = config('MEDIA_PATH_COUNTER_TTL', default=60, cast=int) MEDIA_PATH_COUNTER_TTL = config('MEDIA_PATH_COUNTER_TTL', default=60, cast=int)

Loading…
Cancel
Save