Browse Source

Add Redis-backed IP-prefix blocklist to RateLimitMiddleware

Operators can now SADD/SREM dotted-decimal prefixes (e.g. "103.124.225")
into rl:ip_prefix:blocked to block entire ranges of abusive traffic
network-wide, mirroring the existing runtime UA deny-list pattern
(periodic SMEMBERS refresh into an in-process cache). Checked first in
both _handle_api and _evaluate, ahead of all auth-aware exemptions, so
it applies universally to anonymous and authenticated traffic alike.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
rate-limiter-2026
Edward Ribeiro 1 week ago
parent
commit
df2f5ee30a
  1. 72
      sapl/middleware/ratelimit.py
  2. 131
      sapl/middleware/test_ratelimiter.py
  3. 5
      sapl/settings.py

72
sapl/middleware/ratelimit.py

@ -2,6 +2,9 @@
RateLimitMiddleware cross-pod rate limiting backed by shared Redis.
Decision flow (per request):
Both /api/ and non-/api/ paths checked first, before all else:
-1. IP in rl:ip_prefix:blocked (prefix match)? 429
(universal applies to authenticated users too, like the UA bot checks)
/api/ paths handled by _handle_api:
0a. OPTIONS? pass (CORS preflight must never be blocked)
0b. Same-origin? pass (SAPL's own browser polling)
@ -67,6 +70,7 @@ RL_USER_BLOCKED = 'rl:{ns}:user:{uid}:blocked'
RL_NS_WINDOW = 'rl:{ns}:ip:{ip}:w:{bucket}'
RL_PATH_REQUESTS = 'rl:{ns}:path:{sha256}:reqs'
RL_UA_BLOCKLIST = 'rl:bot:ua:blocked' # permanent SET — runtime UA deny list
RL_IP_PREFIX_BLOCKLIST = 'rl:ip_prefix:blocked' # permanent SET — runtime IP-prefix deny list
RL_METRICS_BLOCKED = 'rl:metrics:{ns}:{date}:blocked:{reason}' # daily counter per block reason
# ZSET indexes — members are full block-key strings, score = expiry unix timestamp.
@ -348,6 +352,12 @@ class RateLimitMiddleware:
_ua_blocklist: set = set()
_ua_blocklist_fetched_at: float = 0.0
# In-process cache for the Redis IP-prefix deny list (operator-curated SET
# of dotted-decimal prefixes, e.g. '103.124.225'). Same refresh pattern as
# the UA deny list above, on its own cadence (RATE_LIMITER_IP_PREFIX_BLOCKLIST_REFRESH).
_ip_prefix_blocklist: set = set()
_ip_prefix_blocklist_fetched_at: float = 0.0
def __init__(self, get_response):
self.get_response = get_response
self.anon_threshold, self.anon_window = _parse_rate(settings.RATE_LIMITER_RATE)
@ -444,6 +454,15 @@ class RateLimitMiddleware:
ip = get_client_ip(request)
# 3a. IP-prefix block — operator-curated deny list, applies to everyone
if self._is_ip_prefix_blocked(ip):
logger.warning(
'api_rate_limit_block reason=ip_prefix_blocked ip=%s path=%s user_agent=%s',
ip, request.path, request.META.get('HTTP_USER_AGENT', ''),
)
self._inc_block_metric('api_ip_prefix_blocked')
return self._api_block_response('ip_prefix_blocked')
# 3. Global IP block also covers /api/
if self._rl_cache.get(RL_IP_BLOCKED.format(ip=ip)):
logger.warning(
@ -500,6 +519,10 @@ class RateLimitMiddleware:
def _evaluate(self, request):
ip = get_client_ip(request)
# Check 0: IP-prefix block — operator-curated deny list, applies to everyone
if self._is_ip_prefix_blocked(ip):
return {'action': 'block', 'reason': 'ip_prefix_blocked', 'ip': ip}
# Check 1: known bad UA (hardcoded Python list — substring match)
ua = request.META.get('HTTP_USER_AGENT', '')
for fragment in BOT_UA_FRAGMENTS:
@ -669,3 +692,52 @@ class RateLimitMiddleware:
hashlib.sha256(t.encode()).hexdigest() in self._ua_blocklist
for t in tokens if t
)
def _refresh_ip_prefix_blocklist(self):
"""
Fetch the full IP-prefix deny list from Redis DB 1 (SMEMBERS).
Stores dotted-decimal prefix strings (e.g. '103.124.225') in the
class-level set. Falls back silently an empty set means no prefix blocks.
"""
try:
from django_redis import get_redis_connection
client = get_redis_connection('ratelimit')
raw = client.smembers(RL_IP_PREFIX_BLOCKLIST)
RateLimitMiddleware._ip_prefix_blocklist = {
m.decode() if isinstance(m, bytes) else m for m in raw
}
RateLimitMiddleware._ip_prefix_blocklist_fetched_at = time.time()
logger.debug('[RATELIMIT] ip_prefix_blocklist refreshed entries=%d', len(raw))
except Exception as exc:
logger.debug('[RATELIMIT] ip_prefix_blocklist refresh skipped: %s', exc)
def _is_ip_prefix_blocked(self, ip):
"""
Return True if `ip` starts with any prefix in the Redis IP-prefix deny list.
Matches are anchored on the dot boundary so that a stored prefix like
'103.124.225' matches '103.124.225.7' but not '103.124.2250.1' or
'103.124.2255' i.e. the prefix behaves like an octet-aligned /24-ish
network range, not a raw string prefix.
A stored entry that is already a full dotted-quad address (3 dots and
no trailing dot, e.g. '103.124.225.7' for blocking one specific IP) is
matched by equality only building a prefix anchor by appending a
trailing dot would be pointless there, since no valid IPv4 address has
a 5th octet. Entries with a trailing dot (e.g. '103.124.225.') are
still treated as prefixes.
Degrades to False when Redis is unavailable.
"""
if time.time() - self._ip_prefix_blocklist_fetched_at > settings.RATE_LIMITER_IP_PREFIX_BLOCKLIST_REFRESH:
self._refresh_ip_prefix_blocklist()
if not self._ip_prefix_blocklist:
return False
for prefix in self._ip_prefix_blocklist:
if ip == prefix:
return True
if prefix.count('.') >= 3 and not prefix.endswith('.'):
continue # full address (no trailing dot) — only exact match makes sense
anchored = prefix if prefix.endswith('.') else prefix + '.'
if ip.startswith(anchored):
return True
return False

131
sapl/middleware/test_ratelimiter.py

@ -7,6 +7,8 @@ middleware instance or the fallback non-atomic path is exercised via the
mock cache.
"""
import time
import pytest
from unittest.mock import MagicMock, patch
from django.test import RequestFactory
@ -27,6 +29,7 @@ from sapl.middleware.ratelimit import (
RL_API_IP_REQUESTS,
RL_INDEX_BLOCKED_IPS,
RL_IP_BLOCKED,
RL_IP_PREFIX_BLOCKLIST,
RL_USER_BLOCKED,
smart_key,
smart_rate,
@ -100,6 +103,7 @@ def _make_middleware(
mock_settings.API_QUOTA_DAILY = 999999
mock_settings.API_QUOTA_WEEKLY = 999999
mock_settings.RATE_LIMITER_UA_BLOCKLIST_REFRESH = 60
mock_settings.RATE_LIMITER_IP_PREFIX_BLOCKLIST_REFRESH = 60
mock_settings.API_RATE_LIMIT_ENABLED = api_rate_limit_enabled
mock_settings.API_RATE_LIMIT_THRESHOLD = api_threshold
mock_settings.API_RATE_LIMIT_WINDOW_SECONDS = api_window
@ -273,6 +277,101 @@ def test_index_shard_distributes_across_shards():
assert shards_seen == {'0', '1', '2'}
# ---------------------------------------------------------------------------
# Check 0 — IP-prefix blocklist (operator-curated SET, dot-anchored matching)
# ---------------------------------------------------------------------------
@pytest.fixture
def _seed_prefix_blocklist():
"""
Seed RateLimitMiddleware._ip_prefix_blocklist for the test and restore the
previous class-level state afterwards (it's shared across instances, like
the UA blocklist).
"""
saved_list = RateLimitMiddleware._ip_prefix_blocklist
saved_fetched_at = RateLimitMiddleware._ip_prefix_blocklist_fetched_at
def _seed(prefixes):
RateLimitMiddleware._ip_prefix_blocklist = set(prefixes)
RateLimitMiddleware._ip_prefix_blocklist_fetched_at = time.time()
yield _seed
RateLimitMiddleware._ip_prefix_blocklist = saved_list
RateLimitMiddleware._ip_prefix_blocklist_fetched_at = saved_fetched_at
def test_is_ip_prefix_blocked_matches_dot_boundary(_seed_prefix_blocklist):
mw, _ = _make_middleware()
_seed_prefix_blocklist(['103.124.225'])
assert mw._is_ip_prefix_blocked('103.124.225.7') is True
# must not match on raw substring — only on a full-octet boundary
assert mw._is_ip_prefix_blocked('103.124.2250.1') is False
assert mw._is_ip_prefix_blocked('103.124.2255') is False
def test_is_ip_prefix_blocked_exact_match(_seed_prefix_blocklist):
mw, _ = _make_middleware()
_seed_prefix_blocklist(['103.124.225'])
assert mw._is_ip_prefix_blocked('103.124.225') is True
def test_is_ip_prefix_blocked_full_address_matches_only_exactly(_seed_prefix_blocklist):
mw, _ = _make_middleware()
_seed_prefix_blocklist(['103.124.225.7'])
assert mw._is_ip_prefix_blocked('103.124.225.7') is True
# a full dotted-quad entry must not be treated as a prefix of a longer string
assert mw._is_ip_prefix_blocked('103.124.225.70') is False
def test_is_ip_prefix_blocked_trailing_dot_in_stored_prefix(_seed_prefix_blocklist):
mw, _ = _make_middleware()
_seed_prefix_blocklist(['103.124.225.'])
# anchoring must not double the dot ('103.124.225..') and break the match
assert mw._is_ip_prefix_blocked('103.124.225.7') is True
def test_is_ip_prefix_blocked_empty_list_passes(_seed_prefix_blocklist):
mw, _ = _make_middleware()
_seed_prefix_blocklist([])
assert mw._is_ip_prefix_blocked('1.2.3.4') is False
def test_is_ip_prefix_blocked_no_match_passes(_seed_prefix_blocklist):
mw, _ = _make_middleware()
_seed_prefix_blocklist(['103.124.225', '45.177.154'])
assert mw._is_ip_prefix_blocked('1.2.3.4') is False
def test_evaluate_blocks_on_ip_prefix_before_other_checks(_seed_prefix_blocklist):
mw, _ = _make_middleware()
_seed_prefix_blocklist(['1.2.3'])
result = mw._evaluate(_anon_req(ip='1.2.3.4', ua='GPTBot'))
# would otherwise match the known_ua check — prefix block must win
assert result == {'action': 'block', 'reason': 'ip_prefix_blocked', 'ip': '1.2.3.4'}
def test_evaluate_passes_through_when_ip_not_in_prefix_list(_seed_prefix_blocklist):
mw, _ = _make_middleware()
_seed_prefix_blocklist(['9.9.9'])
mw._incr_with_ttl = MagicMock(return_value=1)
result = mw._evaluate(_anon_req(ip='1.2.3.4'))
assert result['action'] == 'pass'
def test_refresh_ip_prefix_blocklist_populates_set(_seed_prefix_blocklist):
mw, _ = _make_middleware()
_seed_prefix_blocklist([]) # start empty so the refresh result is observable
mock_client = MagicMock()
mock_client.smembers.return_value = {b'103.124.225', b'45.177.154'}
with patch('django_redis.get_redis_connection', return_value=mock_client):
mw._refresh_ip_prefix_blocklist()
mock_client.smembers.assert_called_once_with(RL_IP_PREFIX_BLOCKLIST)
assert RateLimitMiddleware._ip_prefix_blocklist == {'103.124.225', '45.177.154'}
# ---------------------------------------------------------------------------
# Check 1 — known bot User-Agent
# ---------------------------------------------------------------------------
@ -548,6 +647,38 @@ def test_api_malicious_origin_is_not_same_origin():
mw._incr_with_ttl.assert_called_once()
# ---------------------------------------------------------------------------
# _handle_api — Check 3a: IP-prefix block (operator-curated deny list)
# ---------------------------------------------------------------------------
def test_api_blocks_on_ip_prefix_before_quota_checks(_seed_prefix_blocklist):
mw, mock_cache = _make_middleware()
_seed_prefix_blocklist(['1.2.3'])
mw._check_api_quota = MagicMock(return_value=None)
mw._incr_with_ttl = MagicMock()
request = _api_req(ip='1.2.3.4')
response = mw(request)
mw.get_response.assert_not_called()
mw._check_api_quota.assert_not_called()
mw._incr_with_ttl.assert_not_called()
assert response.status_code == 429
assert response['X-RateLimit-Reason'] == 'ip_prefix_blocked'
def test_api_passes_through_when_ip_not_in_prefix_list(_seed_prefix_blocklist):
mw, _ = _make_middleware(api_threshold=999)
_seed_prefix_blocklist(['9.9.9'])
mw._check_api_quota = MagicMock(return_value=None)
mw._incr_with_ttl = MagicMock(return_value=1)
request = _api_req(ip='1.2.3.4')
mw(request)
mw.get_response.assert_called_once_with(request)
# ---------------------------------------------------------------------------
# _handle_api — rate limiting and block key isolation
# ---------------------------------------------------------------------------

5
sapl/settings.py

@ -413,6 +413,11 @@ RATE_LIMITER_RATE_BOT = config('RATE_LIMITER_RATE_BOT', default='5/m')
# Lower values pick up new blocked UAs faster; higher values reduce Redis round-trips.
RATE_LIMITER_UA_BLOCKLIST_REFRESH = config('RATE_LIMITER_UA_BLOCKLIST_REFRESH', default=60, cast=int)
# Seconds between re-fetches of the runtime IP-prefix deny list from Redis DB 1
# (rl:ip_prefix:blocked — operator-curated SET of dotted-decimal prefixes,
# e.g. '103.124.225', managed directly via SADD/SREM).
RATE_LIMITER_IP_PREFIX_BLOCKLIST_REFRESH = config('RATE_LIMITER_IP_PREFIX_BLOCKLIST_REFRESH', default=60, cast=int)
# Number of shards for the blocked-IP ZSET indexes.
# Each shard receives IPs deterministically via md5(ip) % N, distributing
# write contention across N keys. Increase for high-throughput deployments.

Loading…
Cancel
Save