Browse Source

Fix: replace OpenResty with nginx + libnginx-mod-http-lua (arm64 compat)

OpenResty has no arm64 packages for Debian Bookworm; the official repo
only publishes amd64. Switch to Debian's own packages which support both
architectures and avoid any external repo setup:

  nginx libnginx-mod-http-geoip2 libnginx-mod-http-lua lua-resty-redis

GeoIP2 C module returns (same nginx version → compatible). ASN/UA blocking
goes back to nginx if() blocks. blocklist.lua handles only the Redis checks
(prefix shared dict + pipelined GET for global/API block keys).

lua_package_path set to /usr/share/lua/5.1/ where Debian installs resty.*.
All paths revert to /etc/nginx/; start.sh reverts to /usr/sbin/nginx.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
rate-limiter-2026
Edward Ribeiro 6 days ago
parent
commit
af9c38f1ce
  1. 28
      docker/Dockerfile
  2. 67
      docker/config/nginx/blocklist.lua
  3. 59
      docker/config/nginx/nginx.conf
  4. 21
      docker/config/nginx/sapl.conf
  5. 9
      docker/startup_scripts/start.sh

28
docker/Dockerfile

@ -57,13 +57,7 @@ RUN set -eux; \
if [ "$WITH_GRAPHVIZ" = "1" ]; then apt-get install -y --no-install-recommends graphviz; fi; \
if [ "$WITH_POPPLER" = "1" ]; then apt-get install -y --no-install-recommends poppler-utils; fi; \
if [ "$WITH_PSQL_CLIENT" = "1" ]; then apt-get install -y --no-install-recommends postgresql-client; fi; \
if [ "$WITH_NGINX" = "1" ]; then \
curl -fsSL https://openresty.org/package/pubkey.gpg -o /usr/share/keyrings/openresty.asc; \
echo "deb [signed-by=/usr/share/keyrings/openresty.asc] http://openresty.org/package/debian bookworm openresty" | tee /etc/apt/sources.list.d/openresty.list; \
apt-get update; \
apt-get install -y --no-install-recommends openresty libmaxminddb0; \
opm get anjia0532/lua-resty-maxminddb; \
fi; \
if [ "$WITH_NGINX" = "1" ]; then apt-get install -y --no-install-recommends nginx libnginx-mod-http-geoip2 libnginx-mod-http-lua lua-resty-redis libmaxminddb0; fi; \
rm -rf /var/lib/apt/lists/*
# Usuários/grupos (idempotente)
@ -79,6 +73,7 @@ RUN mkdir -p \
/var/interlegis/sapl/media \
/var/interlegis/sapl/run \
/var/interlegis/sapl/tmp \
/etc/nginx/geoip \
&& chown -R root:nginx /var/interlegis/sapl /var/interlegis/sapl/run \
&& chmod -R g+rwX /var/interlegis/sapl \
&& chmod 2775 /var/interlegis/sapl /var/interlegis/sapl/run \
@ -99,14 +94,12 @@ COPY . /var/interlegis/sapl/
# The .mmdb file lives at docker/geoip/GeoLite2-ASN.mmdb (git-ignored binary).
# If the file is absent the build FAILS — run update_geoip.sh first.
RUN if [ "$WITH_NGINX" = "1" ]; then \
OR_CONF=/usr/local/openresty/nginx/conf; \
mkdir -p $OR_CONF/conf.d $OR_CONF/geoip; \
rm -f $OR_CONF/conf.d/*; \
cp docker/config/nginx/sapl.conf $OR_CONF/conf.d/sapl.conf.template; \
cp docker/config/nginx/nginx.conf $OR_CONF/nginx.conf; \
cp docker/config/nginx/blocklist.lua $OR_CONF/blocklist.lua; \
rm -f /etc/nginx/conf.d/*; \
cp docker/config/nginx/sapl.conf /etc/nginx/conf.d/sapl.conf.template; \
cp docker/config/nginx/nginx.conf /etc/nginx/nginx.conf; \
cp docker/config/nginx/blocklist.lua /etc/nginx/blocklist.lua; \
if [ -f "docker/geoip/GeoLite2-ASN.mmdb" ]; then \
cp docker/geoip/GeoLite2-ASN.mmdb $OR_CONF/geoip/GeoLite2-ASN.mmdb; \
cp docker/geoip/GeoLite2-ASN.mmdb /etc/nginx/geoip/GeoLite2-ASN.mmdb; \
echo "[geoip] GeoLite2-ASN.mmdb installed."; \
else \
echo "[geoip] ERROR: docker/geoip/GeoLite2-ASN.mmdb not found."; \
@ -127,11 +120,10 @@ RUN install -m 755 docker/startup_scripts/start.sh /var/interlegis
# (Se possível, evite copiar .env no build. Use secrets/variáveis em runtime.)
COPY docker/config/env_dockerfile /var/interlegis/sapl/sapl/.env
# Logs (só se OpenResty estiver presente)
# Logs (só se nginx estiver presente)
RUN if [ "$WITH_NGINX" = "1" ]; then \
mkdir -p /var/log/openresty; \
ln -sf /dev/stdout /var/log/openresty/access.log; \
ln -sf /dev/stderr /var/log/openresty/error.log; \
ln -sf /dev/stdout /var/log/nginx/access.log; \
ln -sf /dev/stderr /var/log/nginx/error.log; \
fi \
&& mkdir -p /var/log/sapl/ \
&& ln -sf /var/interlegis/sapl/sapl.log /var/log/sapl/sapl.log

67
docker/config/nginx/blocklist.lua

@ -1,25 +1,32 @@
-- blocklist.lua: early-reject blocked IPs before reaching Gunicorn.
-- blocklist.lua: Redis-backed early IP rejection before Gunicorn.
-- ASN and User-Agent blocking are handled upstream by nginx if() blocks.
--
-- Checks (in order, cheapest first):
-- 1. User-Agent in bot UA list — nginx map variable, no Redis
-- 2. ASN in datacenter deny list — lua-resty-maxminddb (MaxMind ASN DB)
-- 3. ngx.shared.ip_prefix_blocked membership — in-process cache refreshed every 60s
-- 4. GET rl:ip:{ip}:blocked — global IP block (Redis DB 1)
-- 5. GET rl:api:ns:{ns}:ip:{ip}:blocked — per-tenant API block (/api/ only, Redis DB 1)
-- Checks (Redis DB 1, read-only):
-- 1. ngx.shared.ip_prefix_blocked — in-process prefix cache (60s refresh, no Redis I/O)
-- 2. GET rl:ip:{ip}:blocked — global IP block
-- 3. GET rl:api:ns:{ns}:ip:{ip}:blocked — per-tenant API block (/api/ only)
--
-- Checks 4+5 are pipelined in one Redis round trip.
-- Checks 2+3 are pipelined in one Redis round trip.
-- On Redis failure: fail-open (request passes to Django).
-- Parse REDIS_URL (redis://host:port or redis://host:port/db).
local redis_url = os.getenv("REDIS_URL") or "redis://127.0.0.1:6379"
local REDIS_HOST, port_str = redis_url:match("redis://([^:/]+):(%d+)")
if not REDIS_HOST then REDIS_HOST = redis_url:match("redis://([^:/]+)") or "127.0.0.1" end
local url = os.getenv("REDIS_URL") or "redis://127.0.0.1:6379"
local REDIS_HOST, port_str = url:match("redis://([^:/]+):(%d+)")
if not REDIS_HOST then REDIS_HOST = url:match("redis://([^:/]+)") or "127.0.0.1" end
local REDIS_PORT = tonumber(port_str) or 6379
local POD_NS = os.getenv("POD_NAMESPACE") or ""
local ip = ngx.var.remote_addr
local is_api = ngx.var.uri:sub(1, 5) == "/api/"
-- Build 4 prefix candidates for ip e.g. '203.0.113.42':
-- '203.', '203.0.', '203.0.113.', '203.0.113.42'
-- Mirrors Django's _is_ip_prefix_blocked normalisation.
local parts = {}
for p in ip:gmatch("[^.]+") do parts[#parts+1] = p end
local p1 = parts[1] .. "."
local p2 = parts[1] .. "." .. parts[2] .. "."
local p3 = parts[1] .. "." .. parts[2] .. "." .. parts[3] .. "."
local function return_429()
ngx.status = 429
ngx.header["Retry-After"] = "300"
@ -28,51 +35,19 @@ local function return_429()
return ngx.exit(429)
end
-- 1. Bot UA check (nginx map variable — no I/O).
if ngx.var.bot_ua_blocked == "1" then return return_429() end
-- 2. ASN check via lua-resty-maxminddb (shared DB handle opened in init_by_lua_block).
local BLOCKED_ASNS = {
[16509] = true, -- Amazon AWS
[14618] = true, -- Amazon AWS us-east
[8075] = true, -- Microsoft Azure
[396982]= true, -- Google Cloud
[20473] = true, -- Vultr
[24940] = true, -- Hetzner
[16276] = true, -- OVH
[36352] = true, -- ColoCrossing
[63949] = true, -- Linode / Akamai
}
local ok_mmdb, mmdb = pcall(require, "resty.maxminddb")
if ok_mmdb and mmdb.initted() then
local result = mmdb.lookup(ip)
if result and BLOCKED_ASNS[result.autonomous_system_number] then
return return_429()
end
end
-- Build 4 candidates for prefix check: three trailing-dot prefixes + exact IP.
-- Mirrors Django's _is_ip_prefix_blocked normalisation and _refresh_ip_prefix_blocklist.
local parts = {}
for p in ip:gmatch("[^.]+") do parts[#parts+1] = p end
local p1 = parts[1] .. "."
local p2 = parts[1] .. "." .. parts[2] .. "."
local p3 = parts[1] .. "." .. parts[2] .. "." .. parts[3] .. "."
-- 3. IP prefix check (in-process shared dict — no Redis I/O per request).
-- 1. Prefix check (shared dict — zero Redis I/O per request).
local dict = ngx.shared.ip_prefix_blocked
if dict:get(p1) or dict:get(p2) or dict:get(p3) or dict:get(ip) then
return return_429()
end
-- 4+5. Pipeline both STRING block checks in one Redis round trip.
-- 2+3. Pipeline both STRING block checks in one Redis round trip.
local red = require("resty.redis"):new()
red:set_timeout(200)
local ok = red:connect(REDIS_HOST, REDIS_PORT)
if not ok then return end -- fail-open
red:select(1)
red:init_pipeline()
red:get("rl:ip:" .. ip .. ":blocked")
red:get("rl:api:ns:" .. POD_NS .. ":ip:" .. ip .. ":blocked")

59
docker/config/nginx/nginx.conf

@ -1,15 +1,14 @@
# OpenResty configuration replaces the previous nginx + libnginx-mod-http-geoip2 stack.
# ASN-based blocking moved to blocklist.lua using lua-resty-maxminddb (pure Lua, no C module).
load_module modules/ngx_http_geoip2_module.so;
# Make POD_NAMESPACE and Redis URL available to Lua.
env POD_NAMESPACE;
env REDIS_URL;
user www-data;
user www-data nginx;
worker_processes 1;
error_log /var/log/openresty/error.log warn;
pid /var/run/openresty.pid;
error_log /var/log/nginx/error.log warn;
pid /var/run/nginx.pid;
events {
@ -18,7 +17,7 @@ events {
http {
include /usr/local/openresty/nginx/conf/mime.types;
include /etc/nginx/mime.types;
default_type application/octet-stream;
# ----------------------------------------------------------------
@ -36,7 +35,7 @@ http {
'"$http_user_agent" "$http_x_forwarded_for" '
'rt=$request_time';
access_log /var/log/openresty/access.log main;
access_log /var/log/nginx/access.log main;
# ----------------------------------------------------------------
# FIX: kernel bypass was off (disables zero-copy file serving)
@ -70,7 +69,29 @@ http {
limit_req_zone $binary_remote_addr zone=sapl_heavy:10m rate=10r/m;
# ----------------------------------------------------------------
# Bot blocking by User-Agent (nginx map no module required).
# ASN-Based Blocking (datacenter / scraper ASNs).
# Requires libnginx-mod-http-geoip2 and GeoLite2-ASN.mmdb.
# ----------------------------------------------------------------
geoip2 /etc/nginx/geoip/GeoLite2-ASN.mmdb {
$geoip2_asn_number autonomous_system_number;
$geoip2_asn_org autonomous_system_organization;
}
map $geoip2_asn_number $bot_asn {
default 0;
16509 1; # Amazon AWS
14618 1; # Amazon AWS us-east
8075 1; # Microsoft Azure
396982 1; # Google Cloud
20473 1; # Vultr
24940 1; # Hetzner
16276 1; # OVH
36352 1; # ColoCrossing
63949 1; # Linode / Akamai
}
# ----------------------------------------------------------------
# Bot blocking by User-Agent.
# Chrome/98.0.4758 is a confirmed scraper (no real user runs a
# 2022 browser version in 2026). Googlebot excluded for SEO.
# ----------------------------------------------------------------
@ -91,29 +112,22 @@ http {
}
# ----------------------------------------------------------------
# OpenResty: open MaxMind ASN DB once in master (workers inherit fd).
# ASN-based blocking runs in blocklist.lua via lua-resty-maxminddb.
# Lua: search path for Debian-packaged resty.* libraries.
# lua-resty-redis installs to /usr/share/lua/5.1/resty/redis.lua.
# ----------------------------------------------------------------
init_by_lua_block {
local ok, mmdb = pcall(require, "resty.maxminddb")
if ok then
local db_path = "/usr/local/openresty/nginx/conf/geoip/GeoLite2-ASN.mmdb"
pcall(function() mmdb.init(db_path) end)
end
}
lua_package_path '/usr/share/lua/5.1/?.lua;;';
# ----------------------------------------------------------------
# OpenResty: shared dict for IP-prefix deny list (refreshed every 60s).
# Shared dict for IP-prefix deny list (refreshed every 60s).
# 1 MB holds ~10,000 prefix entries with overhead to spare.
# ----------------------------------------------------------------
lua_shared_dict ip_prefix_blocked 1m;
# ----------------------------------------------------------------
# OpenResty: background timer populates ip_prefix_blocked from Redis.
# Runs once per worker process at startup, then every 60s.
# Background timer: populates ip_prefix_blocked from Redis DB 1.
# Runs once per worker at startup, then every 60s.
# ----------------------------------------------------------------
init_worker_by_lua_block {
-- Parse REDIS_URL (redis://host:port or redis://host:port/db).
local url = os.getenv("REDIS_URL") or "redis://127.0.0.1:6379"
local REDIS_HOST, port_str = url:match("redis://([^:/]+):(%d+)")
if not REDIS_HOST then REDIS_HOST = url:match("redis://([^:/]+)") or "127.0.0.1" end
@ -136,7 +150,6 @@ http {
local dict = ngx.shared.ip_prefix_blocked
dict:flush_all()
for _, m in ipairs(members) do
-- Normalise: strip trailing dot, re-add unless it's a full dotted-quad.
local stripped = m:gsub("%.$", "")
local key = (select(2, stripped:gsub("%.", "")) < 3)
and (stripped .. ".") or stripped
@ -156,5 +169,5 @@ http {
gzip_types text/plain text/css text/javascript application/javascript application/x-javascript text/xml application/xml application/rss+xml image/gif image/png image/x-icon image/jpeg image/svg+xml;
gzip_vary on;
include /usr/local/openresty/nginx/conf/conf.d/*.conf;
include /etc/nginx/conf.d/*.conf;
}

21
docker/config/nginx/sapl.conf

@ -17,11 +17,24 @@ server {
client_max_body_size 4G;
# ----------------------------------------------------------------
# OpenResty Lua blocklist: ASN block, prefix block, and Redis-backed
# IP/API blocks — all evaluated before reaching Gunicorn.
# UA block is also enforced here via $bot_ua_blocked map variable.
# Block known scraper ASNs (datacenter traffic) — zero Python cost.
# ----------------------------------------------------------------
access_by_lua_file /usr/local/openresty/nginx/conf/blocklist.lua;
if ($bot_asn = 1) {
return 429 "Too Many Requests";
}
# ----------------------------------------------------------------
# Block known bots by User-Agent — zero Python cost.
# ----------------------------------------------------------------
if ($bot_ua_blocked = 1) {
return 429 "Too Many Requests";
}
# ----------------------------------------------------------------
# Redis-backed IP blocklist (Lua): prefix SET, global IP block,
# and per-tenant API block — checked before reaching Gunicorn.
# ----------------------------------------------------------------
access_by_lua_file /etc/nginx/blocklist.lua;
# ----------------------------------------------------------------
# robots.txt served directly by nginx.

9
docker/startup_scripts/start.sh

@ -345,12 +345,11 @@ start_services() {
log "Starting gunicorn..."
gunicorn -c gunicorn.conf.py &
log "Applying nginx config (burst: general=${NGINX_BURST_GENERAL} media=${NGINX_BURST_MEDIA} api=${NGINX_BURST_API} heavy=${NGINX_BURST_HEAVY})..."
OR_CONF=/usr/local/openresty/nginx/conf
envsubst '${NGINX_BURST_GENERAL} ${NGINX_BURST_MEDIA} ${NGINX_BURST_API} ${NGINX_BURST_HEAVY}' \
< $OR_CONF/conf.d/sapl.conf.template \
> $OR_CONF/conf.d/sapl.conf
log "Starting OpenResty..."
exec /usr/local/openresty/nginx/sbin/nginx -g "daemon off;"
< /etc/nginx/conf.d/sapl.conf.template \
> /etc/nginx/conf.d/sapl.conf
log "Starting nginx..."
exec /usr/sbin/nginx -g "daemon off;"
}
main() {

Loading…
Cancel
Save