From 6b6bc810cb94de95ab0910e2ec1df4193121ac94 Mon Sep 17 00:00:00 2001
From: Edward Oliveira <edwardr@senado.gov.br>
Date: Mon, 8 Jun 2026 16:06:00 -0300
Subject: [PATCH] Return 403 instead of 429 for IP-prefix blocklist hits;
 document feature

A permanent operator-curated deny decision (rl:ip_prefix:blocked) is not a
transient rate limit, so it should surface as 403 Forbidden with no
Retry-After rather than 429. Also brings RATE-LIMITER-PLAN.md up to date
with the IP-prefix blocklist feature and the same-origin bypass fix,
including redis-cli commands to populate/remove/list the deny-list set.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 plan/RATE-LIMITER-PLAN.md           | 137 ++++++++++++++++++++++++++--
 sapl/middleware/ratelimit.py        |  16 +++-
 sapl/middleware/test_ratelimiter.py |  14 ++-
 3 files changed, 151 insertions(+), 16 deletions(-)
diff --git a/plan/RATE-LIMITER-PLAN.md b/plan/RATE-LIMITER-PLAN.md
index 396f83595..cd9da2ea1 100644
--- a/plan/RATE-LIMITER-PLAN.md
+++ b/plan/RATE-LIMITER-PLAN.md
@@ -111,6 +111,7 @@ graph TD
 | Blocked-user index | `rl:index:blocked_users` | permanent ZSET | 1 | negligible |
 | Path counter | `rl:{ns}:path:{sha256}:reqs` | 60 s | 1 | ~0.3 MB |
 | UA deny list | `rl:bot:ua:blocked` | permanent SET | 1 | ~0.03 MB |
+| IP-prefix deny list | `rl:ip_prefix:blocked` | permanent SET | 1 | ~0.01 MB |
 | NS/IP/window counter | `rl:{ns}:ip:{ip}:w:{bucket}` | 120 s | 1 | ~0.6 MB |
 | API daily quota (all callers, by IP) | `quota:{ns}:daily:{date}` (HASH, field=ip) | 24 h | 1 | ~32 MB at 1 200 tenants |
 | API weekly quota (all callers, by IP) | `quota:{ns}:weekly:{week}` (HASH, field=ip) | 7 d | 1 | ~158 MB at 1 200 tenants |
@@ -146,6 +147,8 @@ graph TD
 | API threshold raised (2026-05-11) | 60→120 req/min | Aligns with legitimate integration patterns; slow-drip abuse is caught by the daily quota |
 | API block TTL reduced (2026-05-11) | 300→60 s | Shorter cooldown reduces false-positive lockout duration for shared IPs |
 | API quota raised (2026-05-11) | 1 000→100 000/day · 7 000→700 000/week | Quota serves as outer envelope for all-day slow scrapers; 1 000/day was exhausted too quickly for legitimate integrations |
+| IP-prefix deny list added (2026-05-11) | `rl:ip_prefix:blocked` permanent SET, dot-boundary-anchored prefix match (`df2f5ee30`) | Operators need to block whole ranges (e.g. `103.124.225.*`) at runtime by curating prefixes, not individual IPs; mirrors the existing `rl:bot:ua:blocked` runtime-deny-list pattern |
+| Same-origin bypass moved after block checks (2026-05-11) | `_is_same_origin` now runs *after* the IP-prefix / global-IP / API-IP block checks in `_handle_api` (`5f71354f5`) | `Origin`/`Referer` are client-controlled and trivially spoofable; the bypass was short-circuiting before `ip = get_client_ip(request)` and before every block lookup, letting a forged `Origin` header defeat an operator-set `rl:ip:<ip>:blocked` key entirely |
 
 ---
 
@@ -446,6 +449,82 @@ rancher kubectl exec -n sapl-redis deploy/sapl-redis -- redis-cli -n 1 \
 
 ---
 
+## Manage the IP-prefix deny list (`rl:ip_prefix:blocked`)
+
+`rl:ip_prefix:blocked` (constant `RL_IP_PREFIX_BLOCKLIST`, added in `df2f5ee30`) is a
+**permanent Redis SET in DB 1**. Each member is a dotted-decimal **prefix string**
+such as `103.124.225` or `45.177.154` — or a complete dotted-quad address such as
+`201.23.71.13` to block one specific IP. Any client IP that starts with a stored
+prefix on a **dot boundary** is blocked, e.g. `103.124.225` matches `103.124.225.7`
+but not `103.124.2250.1` or `103.124.2255` (an octet-aligned, /24-ish range match —
+not a raw string prefix). A stored entry that is already a full 4-octet address
+(no trailing dot) is matched by equality only.
+
+This check runs **before everything else** — both at the top of `_evaluate` (non-`/api/`
+paths) and at the top of `_handle_api` (`/api/` paths) — and applies universally to
+**all traffic**: anonymous, authenticated, `/api/` and regular paths alike. It mirrors
+the `rl:bot:ua:blocked` runtime-deny-list pattern: the SET is fetched via `SMEMBERS`
+at most once per `RATE_LIMITER_IP_PREFIX_BLOCKLIST_REFRESH` seconds (default `60`)
+per worker process, so additions/removals take effect fleet-wide without a code deploy
+or restart — only the in-process cache refresh delay.
+
+### Populate — block a range or a single IP
+
+```bash
+# Block an entire /24-ish range by prefix (matches 103.124.225.0 – 103.124.225.255)
+rancher kubectl exec -n sapl-redis deploy/sapl-redis -- redis-cli -n 1 \
+  SADD rl:ip_prefix:blocked 103.124.225
+
+# Block multiple ranges in one call
+rancher kubectl exec -n sapl-redis deploy/sapl-redis -- redis-cli -n 1 \
+  SADD rl:ip_prefix:blocked 103.124.225 45.177.154
+
+# Block one specific IP (matched by equality only — no range semantics)
+rancher kubectl exec -n sapl-redis deploy/sapl-redis -- redis-cli -n 1 \
+  SADD rl:ip_prefix:blocked 201.23.71.13
+```
+
+### Remove — unblock a range or a single IP
+
+```bash
+rancher kubectl exec -n sapl-redis deploy/sapl-redis -- redis-cli -n 1 \
+  SREM rl:ip_prefix:blocked 103.124.225
+
+# Remove multiple entries in one call
+rancher kubectl exec -n sapl-redis deploy/sapl-redis -- redis-cli -n 1 \
+  SREM rl:ip_prefix:blocked 103.124.225 45.177.154 201.23.71.13
+```
+
+### List — inspect the current deny list
+
+```bash
+# All prefixes/addresses currently blocked
+rancher kubectl exec -n sapl-redis deploy/sapl-redis -- redis-cli -n 1 \
+  SMEMBERS rl:ip_prefix:blocked
+
+# Count of entries
+rancher kubectl exec -n sapl-redis deploy/sapl-redis -- redis-cli -n 1 \
+  SCARD rl:ip_prefix:blocked
+
+# Check whether a specific prefix/address is already in the list
+rancher kubectl exec -n sapl-redis deploy/sapl-redis -- redis-cli -n 1 \
+  SISMEMBER rl:ip_prefix:blocked 103.124.225
+```
+
+Via port-forward (local machine):
+
+```bash
+redis-cli -n 1 SADD    rl:ip_prefix:blocked 103.124.225
+redis-cli -n 1 SREM    rl:ip_prefix:blocked 103.124.225
+redis-cli -n 1 SMEMBERS rl:ip_prefix:blocked
+```
+
+> **Note**: changes are picked up by each worker within `RATE_LIMITER_IP_PREFIX_BLOCKLIST_REFRESH`
+> seconds (default `60`) — there is no immediate fleet-wide invalidation, by design
+> (mirrors `rl:bot:ua:blocked`; avoids a Redis round-trip on every request).
+
+---
+
 ## Local standalone Redis (development / testing)
 
 No Kubernetes? Run Redis directly with Docker:
@@ -671,6 +750,10 @@ anonymous/bot traffic.
 Decision flow inside `RateLimitMiddleware.__call__()` / `_evaluate()`:
 
 ```
+-1. IP starts with a prefix in rl:ip_prefix:blocked? → 403  reason=ip_prefix_blocked
+    (checked first, before everything else; universal — applies to
+     authenticated users too; 403 + no Retry-After, since this is a
+     permanent operator-curated deny decision, not a transient rate limit)
 0.  /api/
  path AND consumer daily/weekly quota exceeded?  → 429  reason=quota_daily / quota_weekly
     (per-consumer: auth users by pk, anon by masked IP; fail-open when Redis unavailable)
@@ -694,7 +777,11 @@ Decision flow inside `RateLimitMiddleware.__call__()` / `_evaluate()`:
 
 ```mermaid
 flowchart TD
-    REQ([Request]) --> C0
+    REQ([Request]) --> CM1
+
+    CM1{"IP starts with a prefix in\nrl:ip_prefix:blocked?"}
+    CM1 -- "yes — operator-curated permanent deny list" --> R_PREFIX([403\nip_prefix_blocked\nno Retry-After])
+    CM1 -- no --> C0
 
     C0{"/api/ path AND\ndaily/weekly quota exceeded?"}
     C0 -- "yes — quota:{ns}:daily/weekly:{period}:user/ip exceeded" --> R_QUOTA([429\nquota_daily / quota_weekly])
@@ -1214,6 +1301,7 @@ Redis PDF caching would solve "high request volume reaching the file layer" —
 | 1 | Path counter (`/media/`) | `rl:{ns}:path:{sha256}:reqs` | 60 s | — (observability only) | `RL_PATH_REQUESTS` |
 | 1 | Path counter (`/static/`) | `rl:{ns}:path:{sha256}:reqs` | 60 s | — | *Future* (requires OpenResty/Lua) |
 | 1 | UA deny list | `rl:bot:ua:blocked` | permanent SET | — (block on match) | `RL_UA_BLOCKLIST` |
+| 1 | IP-prefix deny list | `rl:ip_prefix:blocked` | permanent SET | — (block on prefix match) | `RL_IP_PREFIX_BLOCKLIST` |
 | 1 | API daily quota (all callers, by IP) | `quota:{ns}:daily:{date}` HASH, field=ip | 24 h | 100 000 (`API_QUOTA_DAILY`) | `QUOTA_DAILY_HASH` |
 | 1 | API weekly quota (all callers, by IP) | `quota:{ns}:weekly:{week}` HASH, field=ip | 7 d | 700 000 (`API_QUOTA_WEEKLY`) | `QUOTA_WEEKLY_HASH` |
 | 1 | API IP rate counter (all callers, ns-scoped) | `rl:api:ns:{ns}:ip:{ip}:reqs` | 60 s (`API_RATE_LIMIT_WINDOW_SECONDS`) | 120 (`API_RATE_LIMIT_THRESHOLD`) | `RL_API_IP_REQUESTS` |
@@ -1357,16 +1445,18 @@ insufficient:
 
 ### Solution — `_handle_api`
 
-`RateLimitMiddleware.__call__` delegates all `/api/` requests to `_handle_api`, which applies a separate, scoped decision chain:
+`RateLimitMiddleware.__call__` delegates all `/api/` requests to `_handle_api`, which applies a separate, scoped decision chain (current order, **post-`5f71354f5` fix** — see §Security fix below for why block checks now precede the same-origin bypass):
 
 | Step | Condition | Action |
 |------|-----------|--------|
 | 1 | `OPTIONS` method | Pass — CORS preflight must never be blocked |
-| 2 | Same-origin (`_is_same_origin`) | Pass — SAPL's own browser polling; no counter |
-| 3 | `rl:ip:<ip>:blocked` exists | 429 `global_ip_blocked` — global block also covers `/api/` |
-| 4 | `rl:api:ns:<ns>:ip:<ip>:blocked` exists | 429 `api_ip_blocked` — API-only, tenant-scoped block |
-| 5 | Daily/weekly quota exceeded | 429 `quota_daily` / `quota_weekly` |
-| 6 | API counter ≥ threshold (all callers) | Write `rl:api:ns:<ns>:ip:<ip>:blocked`; 429 `api_threshold_exceeded` |
+| 2 | `ip = get_client_ip(request)` | (no action — IP resolved up front so every check below can use it) |
+| 3 | IP-prefix block (`_is_ip_prefix_blocked`) | **403** `ip_prefix_blocked` — operator-curated range block; applies to everyone. 403 (not 429) and no `Retry-After`: this is a permanent deny decision, not a transient rate limit |
+| 4 | `rl:ip:<ip>:blocked` exists | 429 `global_ip_blocked` — global block also covers `/api/` |
+| 5 | `rl:api:ns:<ns>:ip:<ip>:blocked` exists | 429 `api_ip_blocked` — API-only, tenant-scoped block |
+| 6 | Same-origin (`_is_same_origin`) | Pass, skipping steps 7-8 only — SAPL's own browser polling is exempt from quota/rate-limit *accounting*, never from the block checks above |
+| 7 | Daily/weekly quota exceeded | 429 `quota_daily` / `quota_weekly` |
+| 8 | API counter ≥ threshold (all callers) | Write `rl:api:ns:<ns>:ip:<ip>:blocked`; 429 `api_threshold_exceeded` |
 | — | Under threshold | Pass |
 
 Auth status is **not checked**. Authenticated and anonymous callers are treated identically — both keyed by IP, both subject to the same threshold and quota. `_evaluate` (240/min per-user) still governs all non-`/api/` paths.
@@ -1374,6 +1464,33 @@ Auth status is **not checked**. Authenticated and anonymous callers are treated
 **Key invariant**: `rl:ip:<ip>:blocked` is **never written** because of `/api/` abuse.
 `rl:api:ns:<ns>:ip:<ip>:blocked` is tenant-scoped and blocks only `/api/` — page requests from the same NAT continue, and a block in one k8s namespace does not affect other tenants.
 
+### Security fix — same-origin bypass let blocked IPs through (`5f71354f5`, 2026-05-11)
+
+**Symptom**: an operator set `rl:ip:201.23.71.13:blocked` (the global `RL_IP_BLOCKED` key) with a large TTL, expecting that IP to be blocked everywhere — per the middleware's own documented decision flow, "global IP block also covers `/api/`". Requests from that IP to `/api/` endpoints kept getting through.
+
+**Root cause**: in the original `_handle_api`, the same-origin check ran — and could `return` — *before* `ip = get_client_ip(request)` and before any of the block-key lookups:
+
+```python
+def _handle_api(self, request):
+    if request.method == 'OPTIONS':
+        return self.get_response(request)
+    if self.api_same_origin_bypass and _is_same_origin(request):
+        return self.get_response(request)          # <-- returned HERE, nothing below ever ran
+    ip = get_client_ip(request)
+    # ...IP-prefix block, global IP block, API block, quota, rate limit
+```
+
+`_is_same_origin` decides "same origin" purely by parsing the client-supplied `Origin`/`Referer` headers and comparing their host to `request.get_host()` — **both headers are entirely client-controlled**, with no CSRF-token or session/cookie verification backing them. Any bot can send `Origin: https://<sapl-host>` and be treated as "SAPL's own browser polling," getting a complete free pass on the IP-prefix blocklist, the global IP block, the API-specific IP block, daily/weekly quota, and the per-minute API rate limit. `201.23.71.13` was simply sending requests with a spoofed `Origin`/`Referer` matching the SAPL host.
+
+**Fix**: reorder `_handle_api` so block checks — decisions already made by the system or an operator about an IP — always run first and can never be bypassed by spoofable headers. The same-origin bypass now sits *after* the IP-prefix, global-IP, and API-IP block checks (step 6 above) and exempts a request from quota/rate-limit *accounting* only, never from an active block. See the updated decision-chain table above for the full new order.
+
+```bash
+# Manual verification (against local Redis, ratelimit cache / DB 1):
+redis-cli -n 1 SET rl:ip:201.23.71.13:blocked 1 EX 3600
+curl -H 'X-Forwarded-For: 201.23.71.13' -H 'Origin: https://<your-dev-host>' http://localhost:8000/api/materia/
+# Before the fix: 200 (bypassed). After the fix: 429 with X-RateLimit-Reason: global_ip_blocked
+```
+
 ### Same-origin detection — `_is_same_origin`
 
 Replaces `ApiEmergencySameSiteOnlyMiddleware._came_from_same_host` (deleted).
@@ -1405,9 +1522,9 @@ header means the browser knows this is cross-origin, regardless of what `Referer
 | File | Change |
 |------|--------|
 | `sapl/middleware/api_emergency_block.py` | Deleted |
-| `sapl/settings.py` | Removed `ApiEmergencySameSiteOnlyMiddleware` from `MIDDLEWARE`; added `API_RATE_LIMIT_*` and `API_QUOTA_*` settings; auth-specific quota settings removed; threshold 60→120; block TTL 300→60 s; quota 1 000→100 000/day |
-| `sapl/middleware/ratelimit.py` | Added `RL_API_IP_REQUESTS`, `RL_API_IP_BLOCKED` (both ns-scoped), `RL_INDEX_API_BLOCKED_IPS` constants; added `_is_same_origin`; extended `__init__`; added `_handle_api`, `_api_block_response`; auth check removed from `_handle_api` and `_check_api_quota` — all callers keyed by IP |
-| `sapl/middleware/test_ratelimiter.py` | Extended `_make_middleware`; added 17 new tests |
+| `sapl/settings.py` | Removed `ApiEmergencySameSiteOnlyMiddleware` from `MIDDLEWARE`; added `API_RATE_LIMIT_*` and `API_QUOTA_*` settings; auth-specific quota settings removed; threshold 60→120; block TTL 300→60 s; quota 1 000→100 000/day; added `RATE_LIMITER_IP_PREFIX_BLOCKLIST_REFRESH` (default `60`) |
+| `sapl/middleware/ratelimit.py` | Added `RL_API_IP_REQUESTS`, `RL_API_IP_BLOCKED` (both ns-scoped), `RL_INDEX_API_BLOCKED_IPS` constants; added `_is_same_origin`; extended `__init__`; added `_handle_api`, `_api_block_response`; auth check removed from `_handle_api` and `_check_api_quota` — all callers keyed by IP. Later (`df2f5ee30`): added `RL_IP_PREFIX_BLOCKLIST` constant, `_ip_prefix_blocklist`/`_ip_prefix_blocklist_fetched_at` class state, `_refresh_ip_prefix_blocklist`, `_is_ip_prefix_blocked`, and a top-of-`_evaluate` + top-of-`_handle_api` check. Later still (`5f71354f5`): reordered `_handle_api` so `ip = get_client_ip(request)` and all block-key lookups (IP-prefix, global IP, API IP) run *before* the same-origin bypass — closing the spoofed-`Origin` bypass described above |
+| `sapl/middleware/test_ratelimiter.py` | Extended `_make_middleware`; added 17 new tests for `_handle_api`/quota/same-origin. Later: added `_seed_prefix_blocklist` fixture and 11 tests for the IP-prefix blocklist; added 5 regression tests proving same-origin headers can no longer bypass an active block (`test_api_same_origin_does_not_bypass_global_ip_block`, `..._api_ip_block`, `..._ip_prefix_block`, `test_api_same_origin_still_skips_quota_and_rate_limit_when_not_blocked`) |
 
 ---
 
diff --git a/sapl/middleware/ratelimit.py b/sapl/middleware/ratelimit.py
index 0aafbfe3d..0d6885231 100644
--- a/sapl/middleware/ratelimit.py
+++ b/sapl/middleware/ratelimit.py
@@ -3,11 +3,12 @@ RateLimitMiddleware — cross-pod rate limiting backed by shared Redis.
 
 Decision flow (per request):
   Both /api/ and non-/api/ paths — checked first, before all else:
-    -1. IP in rl:ip_prefix:blocked (prefix match)? → 429
+    -1. IP in rl:ip_prefix:blocked (prefix match)? → 403  (Forbidden — not a rate
+        limit; an operator-curated permanent deny list, no Retry-After)
         (universal — applies to authenticated users too, like the UA bot checks)
   /api/ paths — handled by _handle_api:
     0a. OPTIONS?                → pass (CORS preflight must never be blocked)
-    0b. rl:ip_prefix:blocked?   → 429  (prefix match; see -1 above)
+    0b. rl:ip_prefix:blocked?   → 403  (prefix match; see -1 above)
     0c. rl:ip:<ip>:blocked?     → 429  (global block also covers /api/)
     0d. rl:api:ip:<ip>:blocked? → 429  (API-only block)
         Block checks 0b-0d always run before the same-origin check below —
@@ -415,8 +416,11 @@ class RateLimitMiddleware:
                 extra={'ua': request.META.get('HTTP_USER_AGENT', '')},
             )
             self._inc_block_metric(decision['reason'])
-            response = HttpResponse(status=429)
-            response['Retry-After'] = self.BLOCK_TTL
+            if decision['reason'] == 'ip_prefix_blocked':
+                response = HttpResponse(status=403)
+            else:
+                response = HttpResponse(status=429)
+                response['Retry-After'] = self.BLOCK_TTL
             response['X-RateLimit-Reason'] = decision['reason']
             return response
         logger.debug(
@@ -437,6 +441,10 @@ class RateLimitMiddleware:
 
     def _api_block_response(self, reason, retry_after=None):
         from django.http import JsonResponse
+        if reason == 'ip_prefix_blocked':
+            resp = JsonResponse({'detail': 'Forbidden.'}, status=403)
+            resp['X-RateLimit-Reason'] = reason
+            return resp
         if retry_after is None:
             retry_after = self.api_block_seconds
         resp = JsonResponse(
diff --git a/sapl/middleware/test_ratelimiter.py b/sapl/middleware/test_ratelimiter.py
index 3fb585a7a..e0898f7ec 100644
--- a/sapl/middleware/test_ratelimiter.py
+++ b/sapl/middleware/test_ratelimiter.py
@@ -531,6 +531,16 @@ def test_call_block_returns_429_with_retry_after_header():
     mw.get_response.assert_not_called()
 
 
+def test_call_ip_prefix_block_returns_403_without_retry_after_header():
+    mw, _ = _make_middleware()
+    mw._evaluate = MagicMock(return_value={'action': 'block', 'reason': 'ip_prefix_blocked', 'ip': '1.2.3.4'})
+    response = mw(_factory.get('/'))
+    assert response.status_code == 403
+    assert 'Retry-After' not in response
+    assert response['X-RateLimit-Reason'] == 'ip_prefix_blocked'
+    mw.get_response.assert_not_called()
+
+
 def test_call_pass_forwards_request_to_get_response():
     mw, _ = _make_middleware()
     mw._evaluate = MagicMock(return_value={'action': 'pass', 'ip': '1.2.3.4'})
@@ -698,7 +708,7 @@ def test_api_same_origin_does_not_bypass_ip_prefix_block(_seed_prefix_blocklist)
     response = mw(request)
 
     mw.get_response.assert_not_called()
-    assert response.status_code == 429
+    assert response.status_code == 403
     assert response['X-RateLimit-Reason'] == 'ip_prefix_blocked'
 
 
@@ -733,7 +743,7 @@ def test_api_blocks_on_ip_prefix_before_quota_checks(_seed_prefix_blocklist):
     mw.get_response.assert_not_called()
     mw._check_api_quota.assert_not_called()
     mw._incr_with_ttl.assert_not_called()
-    assert response.status_code == 429
+    assert response.status_code == 403
     assert response['X-RateLimit-Reason'] == 'ip_prefix_blocked'