Browse Source

Add painel/dados bypass, Django block metrics, and layer tracking in logs

- nginx: exempt /painel/<pk>/dados from rate limiting (polling endpoint,
  will become WebSocket); dedicated location block with no limit_req
- ratelimit.py: bypass RATE_LIMIT_BYPASS_PATHS paths before _evaluate;
  add layer=django to block log; increment daily Redis metrics counter
  rl:metrics:{ns}:{date}:blocked:{reason} (TTL 8 days) on every block
- ratelimit.py: add quiltbot and AwarioBot to BOT_UA_FRAGMENTS
- ratelimit.py: fix _is_suspicious_headers to require missing UA before blocking
- settings: add RATE_LIMIT_BYPASS_PATHS with /painel/<pk>/dados pattern
- plan: extend UA blocklist SADD seed command with missing bot tokens

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
rate-limiter-2026
Edward Ribeiro 1 week ago
parent
commit
f520c11be3
  1. 14
      docker/config/nginx/sapl.conf
  2. 6
      plan/RATE-LIMITER-PLAN.md
  3. 30
      sapl/middleware/ratelimit.py
  4. 8
      sapl/settings.py

14
docker/config/nginx/sapl.conf

@ -122,6 +122,20 @@ server {
proxy_pass http://sapl_server;
}
# ----------------------------------------------------------------
# /painel/<pk>/dados — high-frequency polling endpoint (will become
# WebSocket). No rate limiting at either layer (Django middleware
# also bypasses via RATE_LIMIT_BYPASS_PATHS).
# ----------------------------------------------------------------
location ~ ^/painel/\d+/dados$ {
proxy_set_header X-Request-ID $req_id;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Host $http_host;
proxy_redirect off;
proxy_pass http://sapl_server;
}
# ----------------------------------------------------------------
# General traffic — moderate rate limit.
# ----------------------------------------------------------------

6
plan/RATE-LIMITER-PLAN.md

@ -374,6 +374,12 @@ rancher kubectl exec -n sapl-redis deploy/sapl-redis -- redis-cli -n 1 \
"$(echo -n 'Bytespider' | sha256sum | cut -d' ' -f1)" \
"$(echo -n 'AhrefsBot' | sha256sum | cut -d' ' -f1)" \
"$(echo -n 'meta-externalagent' | sha256sum | cut -d' ' -f1)"
"$(echo -n 'OAI-SearchBot' | sha256sum | cut -d' ' -f1)"
"$(echo -n 'quiltbot' | sha256sum | cut -d' ' -f1)"
"$(echo -n 'Googlebot' | sha256sum | cut -d' ' -f1)"
"$(echo -n 'Applebot' | sha256sum | cut -d' ' -f1)"
"$(echo -n 'meta-webindexer' | sha256sum | cut -d' ' -f1)"
"$(echo -n 'AwarioBot' | sha256sum | cut -d' ' -f1)"
# Add a new offender at runtime (picked up within RATE_LIMITER_UA_BLOCKLIST_REFRESH seconds)
rancher kubectl exec -n sapl-redis deploy/sapl-redis -- redis-cli -n 1 \

30
sapl/middleware/ratelimit.py

@ -31,6 +31,7 @@ import logging
import os
import re
import time
from datetime import date
from sapl import settings
from django.core.cache import caches
@ -57,6 +58,7 @@ RL_USER_BLOCKED = 'rl:{ns}:user:{uid}:blocked'
RL_NS_WINDOW = 'rl:{ns}:ip:{ip}:w:{bucket}'
RL_PATH_REQUESTS = 'rl:{ns}:path:{sha256}:reqs'
RL_UA_BLOCKLIST = 'rl:bot:ua:blocked' # permanent SET — runtime UA deny list
RL_METRICS_BLOCKED = 'rl:metrics:{ns}:{date}:blocked:{reason}' # daily counter per block reason
# ---------------------------------------------------------------------------
# Bot UA fragments
@ -73,6 +75,8 @@ BOT_UA_FRAGMENTS = [
'bingbot',
'SERankingBacklinksBot',
'Chrome/98.0.4758', # known scraper impersonating an old Chrome
'quiltbot',
'AwarioBot',
]
_INCR_LUA = """
@ -163,7 +167,9 @@ def _is_suspicious_headers(request):
not request.META.get('HTTP_ACCEPT_LANGUAGE'),
not request.META.get('HTTP_ACCEPT'),
])
return missing >= 2
# Também considera User-Agent antes de bloquear
has_ua = bool(request.META.get('HTTP_USER_AGENT'))
return missing >= 2 and not has_ua
def _parse_rate(rate_str):
@ -206,25 +212,33 @@ class RateLimitMiddleware:
self.auth_threshold, self.auth_window = _parse_rate(settings.RATE_LIMITER_RATE_AUTHENTICATED)
self.whitelist = set(settings.RATE_LIMIT_WHITELIST_IPS)
self._rl_cache = caches['ratelimit']
self._bypass_paths = [
re.compile(p) for p in getattr(settings, 'RATE_LIMIT_BYPASS_PATHS', [])
]
logger.info(
'[RATELIMIT] anon=%s auth=%s bot=%s whitelist=%s',
'[RATELIMIT] anon=%s auth=%s bot=%s whitelist=%s bypass_paths=%s',
settings.RATE_LIMITER_RATE,
settings.RATE_LIMITER_RATE_AUTHENTICATED,
settings.RATE_LIMITER_RATE_BOT,
list(self.whitelist) or '(none)',
[p.pattern for p in self._bypass_paths] or '(none)',
)
def __call__(self, request):
if any(p.match(request.path) for p in self._bypass_paths):
return self.get_response(request)
decision = self._evaluate(request)
if decision['action'] == 'block':
logger.warning(
'ratelimit_block reason=%s ip=%s path=%s namespace=%s',
'ratelimit_block layer=django reason=%s ip=%s path=%s namespace=%s',
decision['reason'],
decision['ip'],
request.path,
_NAMESPACE,
extra={'ua': request.META.get('HTTP_USER_AGENT', '')},
)
self._inc_block_metric(decision['reason'])
response = HttpResponse(status=429)
response['Retry-After'] = self.BLOCK_TTL
return response
@ -325,6 +339,16 @@ class RateLimitMiddleware:
def _incr_with_ttl(self, key, ttl):
return _incr_with_ttl(key, ttl)
def _inc_block_metric(self, reason):
"""Increment daily per-reason block counter in Redis DB 1 (TTL 8 days)."""
key = RL_METRICS_BLOCKED.format(
ns=_NAMESPACE, date=date.today().isoformat(), reason=reason
)
try:
_incr_with_ttl(key, ttl=8 * 86400)
except Exception:
pass
def _refresh_ua_blocklist(self):
"""
Fetch the full UA deny list from Redis DB 1 (SMEMBERS).

8
sapl/settings.py

@ -432,6 +432,14 @@ RATE_LIMIT_SCANNER_EXTENSIONS = frozenset(
)
)
# Paths exempt from rate limiting at the Django layer.
# Regex strings matched against request.path.
# /painel/<pk>/dados is a high-frequency polling endpoint (will become WebSocket);
# it is also exempt at the nginx layer (location block with no limit_req).
RATE_LIMIT_BYPASS_PATHS = [
r'^/painel/\d+/dados$',
]
# Media file serving — serve_media (sapl/base/media.py) via X-Accel-Redirect.
# TTL for both URL-path and storage-path access counters (DB 1).
MEDIA_PATH_COUNTER_TTL = config('MEDIA_PATH_COUNTER_TTL', default=60, cast=int)

Loading…
Cancel
Save