Browse Source

Add Redis-backed IP-prefix blocklist to RateLimitMiddleware

Operators can now SADD/SREM dotted-decimal prefixes (e.g. "103.124.225")
into rl:ip_prefix:blocked to block entire ranges of abusive traffic
network-wide, mirroring the existing runtime UA deny-list pattern
(periodic SMEMBERS refresh into an in-process cache). Checked first in
both _handle_api and _evaluate, ahead of all auth-aware exemptions, so
it applies universally to anonymous and authenticated traffic alike.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
rate-limiter-2026
Edward Ribeiro 1 week ago
parent
commit
df2f5ee30a
  1. 72
      sapl/middleware/ratelimit.py
  2. 131
      sapl/middleware/test_ratelimiter.py
  3. 5
      sapl/settings.py

72
sapl/middleware/ratelimit.py

@ -2,6 +2,9 @@
RateLimitMiddleware cross-pod rate limiting backed by shared Redis. RateLimitMiddleware cross-pod rate limiting backed by shared Redis.
Decision flow (per request): Decision flow (per request):
Both /api/ and non-/api/ paths checked first, before all else:
-1. IP in rl:ip_prefix:blocked (prefix match)? 429
(universal applies to authenticated users too, like the UA bot checks)
/api/ paths handled by _handle_api: /api/ paths handled by _handle_api:
0a. OPTIONS? pass (CORS preflight must never be blocked) 0a. OPTIONS? pass (CORS preflight must never be blocked)
0b. Same-origin? pass (SAPL's own browser polling) 0b. Same-origin? pass (SAPL's own browser polling)
@ -67,6 +70,7 @@ RL_USER_BLOCKED = 'rl:{ns}:user:{uid}:blocked'
RL_NS_WINDOW = 'rl:{ns}:ip:{ip}:w:{bucket}' RL_NS_WINDOW = 'rl:{ns}:ip:{ip}:w:{bucket}'
RL_PATH_REQUESTS = 'rl:{ns}:path:{sha256}:reqs' RL_PATH_REQUESTS = 'rl:{ns}:path:{sha256}:reqs'
RL_UA_BLOCKLIST = 'rl:bot:ua:blocked' # permanent SET — runtime UA deny list RL_UA_BLOCKLIST = 'rl:bot:ua:blocked' # permanent SET — runtime UA deny list
RL_IP_PREFIX_BLOCKLIST = 'rl:ip_prefix:blocked' # permanent SET — runtime IP-prefix deny list
RL_METRICS_BLOCKED = 'rl:metrics:{ns}:{date}:blocked:{reason}' # daily counter per block reason RL_METRICS_BLOCKED = 'rl:metrics:{ns}:{date}:blocked:{reason}' # daily counter per block reason
# ZSET indexes — members are full block-key strings, score = expiry unix timestamp. # ZSET indexes — members are full block-key strings, score = expiry unix timestamp.
@ -348,6 +352,12 @@ class RateLimitMiddleware:
_ua_blocklist: set = set() _ua_blocklist: set = set()
_ua_blocklist_fetched_at: float = 0.0 _ua_blocklist_fetched_at: float = 0.0
# In-process cache for the Redis IP-prefix deny list (operator-curated SET
# of dotted-decimal prefixes, e.g. '103.124.225'). Same refresh pattern as
# the UA deny list above, on its own cadence (RATE_LIMITER_IP_PREFIX_BLOCKLIST_REFRESH).
_ip_prefix_blocklist: set = set()
_ip_prefix_blocklist_fetched_at: float = 0.0
def __init__(self, get_response): def __init__(self, get_response):
self.get_response = get_response self.get_response = get_response
self.anon_threshold, self.anon_window = _parse_rate(settings.RATE_LIMITER_RATE) self.anon_threshold, self.anon_window = _parse_rate(settings.RATE_LIMITER_RATE)
@ -444,6 +454,15 @@ class RateLimitMiddleware:
ip = get_client_ip(request) ip = get_client_ip(request)
# 3a. IP-prefix block — operator-curated deny list, applies to everyone
if self._is_ip_prefix_blocked(ip):
logger.warning(
'api_rate_limit_block reason=ip_prefix_blocked ip=%s path=%s user_agent=%s',
ip, request.path, request.META.get('HTTP_USER_AGENT', ''),
)
self._inc_block_metric('api_ip_prefix_blocked')
return self._api_block_response('ip_prefix_blocked')
# 3. Global IP block also covers /api/ # 3. Global IP block also covers /api/
if self._rl_cache.get(RL_IP_BLOCKED.format(ip=ip)): if self._rl_cache.get(RL_IP_BLOCKED.format(ip=ip)):
logger.warning( logger.warning(
@ -500,6 +519,10 @@ class RateLimitMiddleware:
def _evaluate(self, request): def _evaluate(self, request):
ip = get_client_ip(request) ip = get_client_ip(request)
# Check 0: IP-prefix block — operator-curated deny list, applies to everyone
if self._is_ip_prefix_blocked(ip):
return {'action': 'block', 'reason': 'ip_prefix_blocked', 'ip': ip}
# Check 1: known bad UA (hardcoded Python list — substring match) # Check 1: known bad UA (hardcoded Python list — substring match)
ua = request.META.get('HTTP_USER_AGENT', '') ua = request.META.get('HTTP_USER_AGENT', '')
for fragment in BOT_UA_FRAGMENTS: for fragment in BOT_UA_FRAGMENTS:
@ -669,3 +692,52 @@ class RateLimitMiddleware:
hashlib.sha256(t.encode()).hexdigest() in self._ua_blocklist hashlib.sha256(t.encode()).hexdigest() in self._ua_blocklist
for t in tokens if t for t in tokens if t
) )
def _refresh_ip_prefix_blocklist(self):
"""
Fetch the full IP-prefix deny list from Redis DB 1 (SMEMBERS).
Stores dotted-decimal prefix strings (e.g. '103.124.225') in the
class-level set. Falls back silently an empty set means no prefix blocks.
"""
try:
from django_redis import get_redis_connection
client = get_redis_connection('ratelimit')
raw = client.smembers(RL_IP_PREFIX_BLOCKLIST)
RateLimitMiddleware._ip_prefix_blocklist = {
m.decode() if isinstance(m, bytes) else m for m in raw
}
RateLimitMiddleware._ip_prefix_blocklist_fetched_at = time.time()
logger.debug('[RATELIMIT] ip_prefix_blocklist refreshed entries=%d', len(raw))
except Exception as exc:
logger.debug('[RATELIMIT] ip_prefix_blocklist refresh skipped: %s', exc)
def _is_ip_prefix_blocked(self, ip):
"""
Return True if `ip` starts with any prefix in the Redis IP-prefix deny list.
Matches are anchored on the dot boundary so that a stored prefix like
'103.124.225' matches '103.124.225.7' but not '103.124.2250.1' or
'103.124.2255' i.e. the prefix behaves like an octet-aligned /24-ish
network range, not a raw string prefix.
A stored entry that is already a full dotted-quad address (3 dots and
no trailing dot, e.g. '103.124.225.7' for blocking one specific IP) is
matched by equality only building a prefix anchor by appending a
trailing dot would be pointless there, since no valid IPv4 address has
a 5th octet. Entries with a trailing dot (e.g. '103.124.225.') are
still treated as prefixes.
Degrades to False when Redis is unavailable.
"""
if time.time() - self._ip_prefix_blocklist_fetched_at > settings.RATE_LIMITER_IP_PREFIX_BLOCKLIST_REFRESH:
self._refresh_ip_prefix_blocklist()
if not self._ip_prefix_blocklist:
return False
for prefix in self._ip_prefix_blocklist:
if ip == prefix:
return True
if prefix.count('.') >= 3 and not prefix.endswith('.'):
continue # full address (no trailing dot) — only exact match makes sense
anchored = prefix if prefix.endswith('.') else prefix + '.'
if ip.startswith(anchored):
return True
return False

131
sapl/middleware/test_ratelimiter.py

@ -7,6 +7,8 @@ middleware instance or the fallback non-atomic path is exercised via the
mock cache. mock cache.
""" """
import time
import pytest import pytest
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
from django.test import RequestFactory from django.test import RequestFactory
@ -27,6 +29,7 @@ from sapl.middleware.ratelimit import (
RL_API_IP_REQUESTS, RL_API_IP_REQUESTS,
RL_INDEX_BLOCKED_IPS, RL_INDEX_BLOCKED_IPS,
RL_IP_BLOCKED, RL_IP_BLOCKED,
RL_IP_PREFIX_BLOCKLIST,
RL_USER_BLOCKED, RL_USER_BLOCKED,
smart_key, smart_key,
smart_rate, smart_rate,
@ -100,6 +103,7 @@ def _make_middleware(
mock_settings.API_QUOTA_DAILY = 999999 mock_settings.API_QUOTA_DAILY = 999999
mock_settings.API_QUOTA_WEEKLY = 999999 mock_settings.API_QUOTA_WEEKLY = 999999
mock_settings.RATE_LIMITER_UA_BLOCKLIST_REFRESH = 60 mock_settings.RATE_LIMITER_UA_BLOCKLIST_REFRESH = 60
mock_settings.RATE_LIMITER_IP_PREFIX_BLOCKLIST_REFRESH = 60
mock_settings.API_RATE_LIMIT_ENABLED = api_rate_limit_enabled mock_settings.API_RATE_LIMIT_ENABLED = api_rate_limit_enabled
mock_settings.API_RATE_LIMIT_THRESHOLD = api_threshold mock_settings.API_RATE_LIMIT_THRESHOLD = api_threshold
mock_settings.API_RATE_LIMIT_WINDOW_SECONDS = api_window mock_settings.API_RATE_LIMIT_WINDOW_SECONDS = api_window
@ -273,6 +277,101 @@ def test_index_shard_distributes_across_shards():
assert shards_seen == {'0', '1', '2'} assert shards_seen == {'0', '1', '2'}
# ---------------------------------------------------------------------------
# Check 0 — IP-prefix blocklist (operator-curated SET, dot-anchored matching)
# ---------------------------------------------------------------------------
@pytest.fixture
def _seed_prefix_blocklist():
"""
Seed RateLimitMiddleware._ip_prefix_blocklist for the test and restore the
previous class-level state afterwards (it's shared across instances, like
the UA blocklist).
"""
saved_list = RateLimitMiddleware._ip_prefix_blocklist
saved_fetched_at = RateLimitMiddleware._ip_prefix_blocklist_fetched_at
def _seed(prefixes):
RateLimitMiddleware._ip_prefix_blocklist = set(prefixes)
RateLimitMiddleware._ip_prefix_blocklist_fetched_at = time.time()
yield _seed
RateLimitMiddleware._ip_prefix_blocklist = saved_list
RateLimitMiddleware._ip_prefix_blocklist_fetched_at = saved_fetched_at
def test_is_ip_prefix_blocked_matches_dot_boundary(_seed_prefix_blocklist):
mw, _ = _make_middleware()
_seed_prefix_blocklist(['103.124.225'])
assert mw._is_ip_prefix_blocked('103.124.225.7') is True
# must not match on raw substring — only on a full-octet boundary
assert mw._is_ip_prefix_blocked('103.124.2250.1') is False
assert mw._is_ip_prefix_blocked('103.124.2255') is False
def test_is_ip_prefix_blocked_exact_match(_seed_prefix_blocklist):
mw, _ = _make_middleware()
_seed_prefix_blocklist(['103.124.225'])
assert mw._is_ip_prefix_blocked('103.124.225') is True
def test_is_ip_prefix_blocked_full_address_matches_only_exactly(_seed_prefix_blocklist):
mw, _ = _make_middleware()
_seed_prefix_blocklist(['103.124.225.7'])
assert mw._is_ip_prefix_blocked('103.124.225.7') is True
# a full dotted-quad entry must not be treated as a prefix of a longer string
assert mw._is_ip_prefix_blocked('103.124.225.70') is False
def test_is_ip_prefix_blocked_trailing_dot_in_stored_prefix(_seed_prefix_blocklist):
mw, _ = _make_middleware()
_seed_prefix_blocklist(['103.124.225.'])
# anchoring must not double the dot ('103.124.225..') and break the match
assert mw._is_ip_prefix_blocked('103.124.225.7') is True
def test_is_ip_prefix_blocked_empty_list_passes(_seed_prefix_blocklist):
mw, _ = _make_middleware()
_seed_prefix_blocklist([])
assert mw._is_ip_prefix_blocked('1.2.3.4') is False
def test_is_ip_prefix_blocked_no_match_passes(_seed_prefix_blocklist):
mw, _ = _make_middleware()
_seed_prefix_blocklist(['103.124.225', '45.177.154'])
assert mw._is_ip_prefix_blocked('1.2.3.4') is False
def test_evaluate_blocks_on_ip_prefix_before_other_checks(_seed_prefix_blocklist):
mw, _ = _make_middleware()
_seed_prefix_blocklist(['1.2.3'])
result = mw._evaluate(_anon_req(ip='1.2.3.4', ua='GPTBot'))
# would otherwise match the known_ua check — prefix block must win
assert result == {'action': 'block', 'reason': 'ip_prefix_blocked', 'ip': '1.2.3.4'}
def test_evaluate_passes_through_when_ip_not_in_prefix_list(_seed_prefix_blocklist):
mw, _ = _make_middleware()
_seed_prefix_blocklist(['9.9.9'])
mw._incr_with_ttl = MagicMock(return_value=1)
result = mw._evaluate(_anon_req(ip='1.2.3.4'))
assert result['action'] == 'pass'
def test_refresh_ip_prefix_blocklist_populates_set(_seed_prefix_blocklist):
mw, _ = _make_middleware()
_seed_prefix_blocklist([]) # start empty so the refresh result is observable
mock_client = MagicMock()
mock_client.smembers.return_value = {b'103.124.225', b'45.177.154'}
with patch('django_redis.get_redis_connection', return_value=mock_client):
mw._refresh_ip_prefix_blocklist()
mock_client.smembers.assert_called_once_with(RL_IP_PREFIX_BLOCKLIST)
assert RateLimitMiddleware._ip_prefix_blocklist == {'103.124.225', '45.177.154'}
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Check 1 — known bot User-Agent # Check 1 — known bot User-Agent
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -548,6 +647,38 @@ def test_api_malicious_origin_is_not_same_origin():
mw._incr_with_ttl.assert_called_once() mw._incr_with_ttl.assert_called_once()
# ---------------------------------------------------------------------------
# _handle_api — Check 3a: IP-prefix block (operator-curated deny list)
# ---------------------------------------------------------------------------
def test_api_blocks_on_ip_prefix_before_quota_checks(_seed_prefix_blocklist):
mw, mock_cache = _make_middleware()
_seed_prefix_blocklist(['1.2.3'])
mw._check_api_quota = MagicMock(return_value=None)
mw._incr_with_ttl = MagicMock()
request = _api_req(ip='1.2.3.4')
response = mw(request)
mw.get_response.assert_not_called()
mw._check_api_quota.assert_not_called()
mw._incr_with_ttl.assert_not_called()
assert response.status_code == 429
assert response['X-RateLimit-Reason'] == 'ip_prefix_blocked'
def test_api_passes_through_when_ip_not_in_prefix_list(_seed_prefix_blocklist):
mw, _ = _make_middleware(api_threshold=999)
_seed_prefix_blocklist(['9.9.9'])
mw._check_api_quota = MagicMock(return_value=None)
mw._incr_with_ttl = MagicMock(return_value=1)
request = _api_req(ip='1.2.3.4')
mw(request)
mw.get_response.assert_called_once_with(request)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# _handle_api — rate limiting and block key isolation # _handle_api — rate limiting and block key isolation
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------

5
sapl/settings.py

@ -413,6 +413,11 @@ RATE_LIMITER_RATE_BOT = config('RATE_LIMITER_RATE_BOT', default='5/m')
# Lower values pick up new blocked UAs faster; higher values reduce Redis round-trips. # Lower values pick up new blocked UAs faster; higher values reduce Redis round-trips.
RATE_LIMITER_UA_BLOCKLIST_REFRESH = config('RATE_LIMITER_UA_BLOCKLIST_REFRESH', default=60, cast=int) RATE_LIMITER_UA_BLOCKLIST_REFRESH = config('RATE_LIMITER_UA_BLOCKLIST_REFRESH', default=60, cast=int)
# Seconds between re-fetches of the runtime IP-prefix deny list from Redis DB 1
# (rl:ip_prefix:blocked — operator-curated SET of dotted-decimal prefixes,
# e.g. '103.124.225', managed directly via SADD/SREM).
RATE_LIMITER_IP_PREFIX_BLOCKLIST_REFRESH = config('RATE_LIMITER_IP_PREFIX_BLOCKLIST_REFRESH', default=60, cast=int)
# Number of shards for the blocked-IP ZSET indexes. # Number of shards for the blocked-IP ZSET indexes.
# Each shard receives IPs deterministically via md5(ip) % N, distributing # Each shard receives IPs deterministically via md5(ip) % N, distributing
# write contention across N keys. Increase for high-throughput deployments. # write contention across N keys. Increase for high-throughput deployments.

Loading…
Cancel
Save