diff --git a/sapl/middleware/ratelimit.py b/sapl/middleware/ratelimit.py index 146de9474..8c2489d57 100644 --- a/sapl/middleware/ratelimit.py +++ b/sapl/middleware/ratelimit.py @@ -2,6 +2,9 @@ RateLimitMiddleware — cross-pod rate limiting backed by shared Redis. Decision flow (per request): + Both /api/ and non-/api/ paths — checked first, before all else: + -1. IP in rl:ip_prefix:blocked (prefix match)? → 429 + (universal — applies to authenticated users too, like the UA bot checks) /api/ paths — handled by _handle_api: 0a. OPTIONS? → pass (CORS preflight must never be blocked) 0b. Same-origin? → pass (SAPL's own browser polling) @@ -67,6 +70,7 @@ RL_USER_BLOCKED = 'rl:{ns}:user:{uid}:blocked' RL_NS_WINDOW = 'rl:{ns}:ip:{ip}:w:{bucket}' RL_PATH_REQUESTS = 'rl:{ns}:path:{sha256}:reqs' RL_UA_BLOCKLIST = 'rl:bot:ua:blocked' # permanent SET — runtime UA deny list +RL_IP_PREFIX_BLOCKLIST = 'rl:ip_prefix:blocked' # permanent SET — runtime IP-prefix deny list RL_METRICS_BLOCKED = 'rl:metrics:{ns}:{date}:blocked:{reason}' # daily counter per block reason # ZSET indexes — members are full block-key strings, score = expiry unix timestamp. @@ -348,6 +352,12 @@ class RateLimitMiddleware: _ua_blocklist: set = set() _ua_blocklist_fetched_at: float = 0.0 + # In-process cache for the Redis IP-prefix deny list (operator-curated SET + # of dotted-decimal prefixes, e.g. '103.124.225'). Same refresh pattern as + # the UA deny list above, on its own cadence (RATE_LIMITER_IP_PREFIX_BLOCKLIST_REFRESH). + _ip_prefix_blocklist: set = set() + _ip_prefix_blocklist_fetched_at: float = 0.0 + def __init__(self, get_response): self.get_response = get_response self.anon_threshold, self.anon_window = _parse_rate(settings.RATE_LIMITER_RATE) @@ -444,6 +454,15 @@ class RateLimitMiddleware: ip = get_client_ip(request) + # 3a. IP-prefix block — operator-curated deny list, applies to everyone + if self._is_ip_prefix_blocked(ip): + logger.warning( + 'api_rate_limit_block reason=ip_prefix_blocked ip=%s path=%s user_agent=%s', + ip, request.path, request.META.get('HTTP_USER_AGENT', ''), + ) + self._inc_block_metric('api_ip_prefix_blocked') + return self._api_block_response('ip_prefix_blocked') + # 3. Global IP block also covers /api/ if self._rl_cache.get(RL_IP_BLOCKED.format(ip=ip)): logger.warning( @@ -500,6 +519,10 @@ class RateLimitMiddleware: def _evaluate(self, request): ip = get_client_ip(request) + # Check 0: IP-prefix block — operator-curated deny list, applies to everyone + if self._is_ip_prefix_blocked(ip): + return {'action': 'block', 'reason': 'ip_prefix_blocked', 'ip': ip} + # Check 1: known bad UA (hardcoded Python list — substring match) ua = request.META.get('HTTP_USER_AGENT', '') for fragment in BOT_UA_FRAGMENTS: @@ -669,3 +692,52 @@ class RateLimitMiddleware: hashlib.sha256(t.encode()).hexdigest() in self._ua_blocklist for t in tokens if t ) + + def _refresh_ip_prefix_blocklist(self): + """ + Fetch the full IP-prefix deny list from Redis DB 1 (SMEMBERS). + Stores dotted-decimal prefix strings (e.g. '103.124.225') in the + class-level set. Falls back silently — an empty set means no prefix blocks. + """ + try: + from django_redis import get_redis_connection + client = get_redis_connection('ratelimit') + raw = client.smembers(RL_IP_PREFIX_BLOCKLIST) + RateLimitMiddleware._ip_prefix_blocklist = { + m.decode() if isinstance(m, bytes) else m for m in raw + } + RateLimitMiddleware._ip_prefix_blocklist_fetched_at = time.time() + logger.debug('[RATELIMIT] ip_prefix_blocklist refreshed entries=%d', len(raw)) + except Exception as exc: + logger.debug('[RATELIMIT] ip_prefix_blocklist refresh skipped: %s', exc) + + def _is_ip_prefix_blocked(self, ip): + """ + Return True if `ip` starts with any prefix in the Redis IP-prefix deny list. + + Matches are anchored on the dot boundary so that a stored prefix like + '103.124.225' matches '103.124.225.7' but not '103.124.2250.1' or + '103.124.2255' — i.e. the prefix behaves like an octet-aligned /24-ish + network range, not a raw string prefix. + + A stored entry that is already a full dotted-quad address (3 dots and + no trailing dot, e.g. '103.124.225.7' for blocking one specific IP) is + matched by equality only — building a prefix anchor by appending a + trailing dot would be pointless there, since no valid IPv4 address has + a 5th octet. Entries with a trailing dot (e.g. '103.124.225.') are + still treated as prefixes. + Degrades to False when Redis is unavailable. + """ + if time.time() - self._ip_prefix_blocklist_fetched_at > settings.RATE_LIMITER_IP_PREFIX_BLOCKLIST_REFRESH: + self._refresh_ip_prefix_blocklist() + if not self._ip_prefix_blocklist: + return False + for prefix in self._ip_prefix_blocklist: + if ip == prefix: + return True + if prefix.count('.') >= 3 and not prefix.endswith('.'): + continue # full address (no trailing dot) — only exact match makes sense + anchored = prefix if prefix.endswith('.') else prefix + '.' + if ip.startswith(anchored): + return True + return False diff --git a/sapl/middleware/test_ratelimiter.py b/sapl/middleware/test_ratelimiter.py index 8eaf73841..9b4c6b5cb 100644 --- a/sapl/middleware/test_ratelimiter.py +++ b/sapl/middleware/test_ratelimiter.py @@ -7,6 +7,8 @@ middleware instance or the fallback non-atomic path is exercised via the mock cache. """ +import time + import pytest from unittest.mock import MagicMock, patch from django.test import RequestFactory @@ -27,6 +29,7 @@ from sapl.middleware.ratelimit import ( RL_API_IP_REQUESTS, RL_INDEX_BLOCKED_IPS, RL_IP_BLOCKED, + RL_IP_PREFIX_BLOCKLIST, RL_USER_BLOCKED, smart_key, smart_rate, @@ -100,6 +103,7 @@ def _make_middleware( mock_settings.API_QUOTA_DAILY = 999999 mock_settings.API_QUOTA_WEEKLY = 999999 mock_settings.RATE_LIMITER_UA_BLOCKLIST_REFRESH = 60 + mock_settings.RATE_LIMITER_IP_PREFIX_BLOCKLIST_REFRESH = 60 mock_settings.API_RATE_LIMIT_ENABLED = api_rate_limit_enabled mock_settings.API_RATE_LIMIT_THRESHOLD = api_threshold mock_settings.API_RATE_LIMIT_WINDOW_SECONDS = api_window @@ -273,6 +277,101 @@ def test_index_shard_distributes_across_shards(): assert shards_seen == {'0', '1', '2'} +# --------------------------------------------------------------------------- +# Check 0 — IP-prefix blocklist (operator-curated SET, dot-anchored matching) +# --------------------------------------------------------------------------- + +@pytest.fixture +def _seed_prefix_blocklist(): + """ + Seed RateLimitMiddleware._ip_prefix_blocklist for the test and restore the + previous class-level state afterwards (it's shared across instances, like + the UA blocklist). + """ + saved_list = RateLimitMiddleware._ip_prefix_blocklist + saved_fetched_at = RateLimitMiddleware._ip_prefix_blocklist_fetched_at + + def _seed(prefixes): + RateLimitMiddleware._ip_prefix_blocklist = set(prefixes) + RateLimitMiddleware._ip_prefix_blocklist_fetched_at = time.time() + + yield _seed + + RateLimitMiddleware._ip_prefix_blocklist = saved_list + RateLimitMiddleware._ip_prefix_blocklist_fetched_at = saved_fetched_at + + +def test_is_ip_prefix_blocked_matches_dot_boundary(_seed_prefix_blocklist): + mw, _ = _make_middleware() + _seed_prefix_blocklist(['103.124.225']) + assert mw._is_ip_prefix_blocked('103.124.225.7') is True + # must not match on raw substring — only on a full-octet boundary + assert mw._is_ip_prefix_blocked('103.124.2250.1') is False + assert mw._is_ip_prefix_blocked('103.124.2255') is False + + +def test_is_ip_prefix_blocked_exact_match(_seed_prefix_blocklist): + mw, _ = _make_middleware() + _seed_prefix_blocklist(['103.124.225']) + assert mw._is_ip_prefix_blocked('103.124.225') is True + + +def test_is_ip_prefix_blocked_full_address_matches_only_exactly(_seed_prefix_blocklist): + mw, _ = _make_middleware() + _seed_prefix_blocklist(['103.124.225.7']) + assert mw._is_ip_prefix_blocked('103.124.225.7') is True + # a full dotted-quad entry must not be treated as a prefix of a longer string + assert mw._is_ip_prefix_blocked('103.124.225.70') is False + + +def test_is_ip_prefix_blocked_trailing_dot_in_stored_prefix(_seed_prefix_blocklist): + mw, _ = _make_middleware() + _seed_prefix_blocklist(['103.124.225.']) + # anchoring must not double the dot ('103.124.225..') and break the match + assert mw._is_ip_prefix_blocked('103.124.225.7') is True + + +def test_is_ip_prefix_blocked_empty_list_passes(_seed_prefix_blocklist): + mw, _ = _make_middleware() + _seed_prefix_blocklist([]) + assert mw._is_ip_prefix_blocked('1.2.3.4') is False + + +def test_is_ip_prefix_blocked_no_match_passes(_seed_prefix_blocklist): + mw, _ = _make_middleware() + _seed_prefix_blocklist(['103.124.225', '45.177.154']) + assert mw._is_ip_prefix_blocked('1.2.3.4') is False + + +def test_evaluate_blocks_on_ip_prefix_before_other_checks(_seed_prefix_blocklist): + mw, _ = _make_middleware() + _seed_prefix_blocklist(['1.2.3']) + result = mw._evaluate(_anon_req(ip='1.2.3.4', ua='GPTBot')) + # would otherwise match the known_ua check — prefix block must win + assert result == {'action': 'block', 'reason': 'ip_prefix_blocked', 'ip': '1.2.3.4'} + + +def test_evaluate_passes_through_when_ip_not_in_prefix_list(_seed_prefix_blocklist): + mw, _ = _make_middleware() + _seed_prefix_blocklist(['9.9.9']) + mw._incr_with_ttl = MagicMock(return_value=1) + result = mw._evaluate(_anon_req(ip='1.2.3.4')) + assert result['action'] == 'pass' + + +def test_refresh_ip_prefix_blocklist_populates_set(_seed_prefix_blocklist): + mw, _ = _make_middleware() + _seed_prefix_blocklist([]) # start empty so the refresh result is observable + + mock_client = MagicMock() + mock_client.smembers.return_value = {b'103.124.225', b'45.177.154'} + with patch('django_redis.get_redis_connection', return_value=mock_client): + mw._refresh_ip_prefix_blocklist() + + mock_client.smembers.assert_called_once_with(RL_IP_PREFIX_BLOCKLIST) + assert RateLimitMiddleware._ip_prefix_blocklist == {'103.124.225', '45.177.154'} + + # --------------------------------------------------------------------------- # Check 1 — known bot User-Agent # --------------------------------------------------------------------------- @@ -548,6 +647,38 @@ def test_api_malicious_origin_is_not_same_origin(): mw._incr_with_ttl.assert_called_once() +# --------------------------------------------------------------------------- +# _handle_api — Check 3a: IP-prefix block (operator-curated deny list) +# --------------------------------------------------------------------------- + +def test_api_blocks_on_ip_prefix_before_quota_checks(_seed_prefix_blocklist): + mw, mock_cache = _make_middleware() + _seed_prefix_blocklist(['1.2.3']) + mw._check_api_quota = MagicMock(return_value=None) + mw._incr_with_ttl = MagicMock() + request = _api_req(ip='1.2.3.4') + + response = mw(request) + + mw.get_response.assert_not_called() + mw._check_api_quota.assert_not_called() + mw._incr_with_ttl.assert_not_called() + assert response.status_code == 429 + assert response['X-RateLimit-Reason'] == 'ip_prefix_blocked' + + +def test_api_passes_through_when_ip_not_in_prefix_list(_seed_prefix_blocklist): + mw, _ = _make_middleware(api_threshold=999) + _seed_prefix_blocklist(['9.9.9']) + mw._check_api_quota = MagicMock(return_value=None) + mw._incr_with_ttl = MagicMock(return_value=1) + request = _api_req(ip='1.2.3.4') + + mw(request) + + mw.get_response.assert_called_once_with(request) + + # --------------------------------------------------------------------------- # _handle_api — rate limiting and block key isolation # --------------------------------------------------------------------------- diff --git a/sapl/settings.py b/sapl/settings.py index 9a50ff545..512eee0d0 100644 --- a/sapl/settings.py +++ b/sapl/settings.py @@ -413,6 +413,11 @@ RATE_LIMITER_RATE_BOT = config('RATE_LIMITER_RATE_BOT', default='5/m') # Lower values pick up new blocked UAs faster; higher values reduce Redis round-trips. RATE_LIMITER_UA_BLOCKLIST_REFRESH = config('RATE_LIMITER_UA_BLOCKLIST_REFRESH', default=60, cast=int) +# Seconds between re-fetches of the runtime IP-prefix deny list from Redis DB 1 +# (rl:ip_prefix:blocked — operator-curated SET of dotted-decimal prefixes, +# e.g. '103.124.225', managed directly via SADD/SREM). +RATE_LIMITER_IP_PREFIX_BLOCKLIST_REFRESH = config('RATE_LIMITER_IP_PREFIX_BLOCKLIST_REFRESH', default=60, cast=int) + # Number of shards for the blocked-IP ZSET indexes. # Each shard receives IPs deterministically via md5(ip) % N, distributing # write contention across N keys. Increase for high-throughput deployments.