From af9c38f1ce7fdf7ad1088975e53ec3b66ab60163 Mon Sep 17 00:00:00 2001 From: Edward Oliveira Date: Tue, 16 Jun 2026 19:02:05 -0300 Subject: [PATCH] Fix: replace OpenResty with nginx + libnginx-mod-http-lua (arm64 compat) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenResty has no arm64 packages for Debian Bookworm; the official repo only publishes amd64. Switch to Debian's own packages which support both architectures and avoid any external repo setup: nginx libnginx-mod-http-geoip2 libnginx-mod-http-lua lua-resty-redis GeoIP2 C module returns (same nginx version → compatible). ASN/UA blocking goes back to nginx if() blocks. blocklist.lua handles only the Redis checks (prefix shared dict + pipelined GET for global/API block keys). lua_package_path set to /usr/share/lua/5.1/ where Debian installs resty.*. All paths revert to /etc/nginx/; start.sh reverts to /usr/sbin/nginx. Co-Authored-By: Claude Sonnet 4.6 --- docker/Dockerfile | 28 +++++-------- docker/config/nginx/blocklist.lua | 67 ++++++++++--------------------- docker/config/nginx/nginx.conf | 59 ++++++++++++++++----------- docker/config/nginx/sapl.conf | 21 ++++++++-- docker/startup_scripts/start.sh | 9 ++--- 5 files changed, 88 insertions(+), 96 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 9ecd3ec2c..803ec5cf6 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -57,13 +57,7 @@ RUN set -eux; \ if [ "$WITH_GRAPHVIZ" = "1" ]; then apt-get install -y --no-install-recommends graphviz; fi; \ if [ "$WITH_POPPLER" = "1" ]; then apt-get install -y --no-install-recommends poppler-utils; fi; \ if [ "$WITH_PSQL_CLIENT" = "1" ]; then apt-get install -y --no-install-recommends postgresql-client; fi; \ - if [ "$WITH_NGINX" = "1" ]; then \ - curl -fsSL https://openresty.org/package/pubkey.gpg -o /usr/share/keyrings/openresty.asc; \ - echo "deb [signed-by=/usr/share/keyrings/openresty.asc] http://openresty.org/package/debian bookworm openresty" | tee /etc/apt/sources.list.d/openresty.list; \ - apt-get update; \ - apt-get install -y --no-install-recommends openresty libmaxminddb0; \ - opm get anjia0532/lua-resty-maxminddb; \ - fi; \ + if [ "$WITH_NGINX" = "1" ]; then apt-get install -y --no-install-recommends nginx libnginx-mod-http-geoip2 libnginx-mod-http-lua lua-resty-redis libmaxminddb0; fi; \ rm -rf /var/lib/apt/lists/* # Usuários/grupos (idempotente) @@ -79,6 +73,7 @@ RUN mkdir -p \ /var/interlegis/sapl/media \ /var/interlegis/sapl/run \ /var/interlegis/sapl/tmp \ + /etc/nginx/geoip \ && chown -R root:nginx /var/interlegis/sapl /var/interlegis/sapl/run \ && chmod -R g+rwX /var/interlegis/sapl \ && chmod 2775 /var/interlegis/sapl /var/interlegis/sapl/run \ @@ -99,14 +94,12 @@ COPY . /var/interlegis/sapl/ # The .mmdb file lives at docker/geoip/GeoLite2-ASN.mmdb (git-ignored binary). # If the file is absent the build FAILS — run update_geoip.sh first. RUN if [ "$WITH_NGINX" = "1" ]; then \ - OR_CONF=/usr/local/openresty/nginx/conf; \ - mkdir -p $OR_CONF/conf.d $OR_CONF/geoip; \ - rm -f $OR_CONF/conf.d/*; \ - cp docker/config/nginx/sapl.conf $OR_CONF/conf.d/sapl.conf.template; \ - cp docker/config/nginx/nginx.conf $OR_CONF/nginx.conf; \ - cp docker/config/nginx/blocklist.lua $OR_CONF/blocklist.lua; \ + rm -f /etc/nginx/conf.d/*; \ + cp docker/config/nginx/sapl.conf /etc/nginx/conf.d/sapl.conf.template; \ + cp docker/config/nginx/nginx.conf /etc/nginx/nginx.conf; \ + cp docker/config/nginx/blocklist.lua /etc/nginx/blocklist.lua; \ if [ -f "docker/geoip/GeoLite2-ASN.mmdb" ]; then \ - cp docker/geoip/GeoLite2-ASN.mmdb $OR_CONF/geoip/GeoLite2-ASN.mmdb; \ + cp docker/geoip/GeoLite2-ASN.mmdb /etc/nginx/geoip/GeoLite2-ASN.mmdb; \ echo "[geoip] GeoLite2-ASN.mmdb installed."; \ else \ echo "[geoip] ERROR: docker/geoip/GeoLite2-ASN.mmdb not found."; \ @@ -127,11 +120,10 @@ RUN install -m 755 docker/startup_scripts/start.sh /var/interlegis # (Se possível, evite copiar .env no build. Use secrets/variáveis em runtime.) COPY docker/config/env_dockerfile /var/interlegis/sapl/sapl/.env -# Logs (só se OpenResty estiver presente) +# Logs (só se nginx estiver presente) RUN if [ "$WITH_NGINX" = "1" ]; then \ - mkdir -p /var/log/openresty; \ - ln -sf /dev/stdout /var/log/openresty/access.log; \ - ln -sf /dev/stderr /var/log/openresty/error.log; \ + ln -sf /dev/stdout /var/log/nginx/access.log; \ + ln -sf /dev/stderr /var/log/nginx/error.log; \ fi \ && mkdir -p /var/log/sapl/ \ && ln -sf /var/interlegis/sapl/sapl.log /var/log/sapl/sapl.log diff --git a/docker/config/nginx/blocklist.lua b/docker/config/nginx/blocklist.lua index 19e390e23..02a0526d8 100644 --- a/docker/config/nginx/blocklist.lua +++ b/docker/config/nginx/blocklist.lua @@ -1,25 +1,32 @@ --- blocklist.lua: early-reject blocked IPs before reaching Gunicorn. +-- blocklist.lua: Redis-backed early IP rejection before Gunicorn. +-- ASN and User-Agent blocking are handled upstream by nginx if() blocks. -- --- Checks (in order, cheapest first): --- 1. User-Agent in bot UA list — nginx map variable, no Redis --- 2. ASN in datacenter deny list — lua-resty-maxminddb (MaxMind ASN DB) --- 3. ngx.shared.ip_prefix_blocked membership — in-process cache refreshed every 60s --- 4. GET rl:ip:{ip}:blocked — global IP block (Redis DB 1) --- 5. GET rl:api:ns:{ns}:ip:{ip}:blocked — per-tenant API block (/api/ only, Redis DB 1) +-- Checks (Redis DB 1, read-only): +-- 1. ngx.shared.ip_prefix_blocked — in-process prefix cache (60s refresh, no Redis I/O) +-- 2. GET rl:ip:{ip}:blocked — global IP block +-- 3. GET rl:api:ns:{ns}:ip:{ip}:blocked — per-tenant API block (/api/ only) -- --- Checks 4+5 are pipelined in one Redis round trip. +-- Checks 2+3 are pipelined in one Redis round trip. -- On Redis failure: fail-open (request passes to Django). --- Parse REDIS_URL (redis://host:port or redis://host:port/db). -local redis_url = os.getenv("REDIS_URL") or "redis://127.0.0.1:6379" -local REDIS_HOST, port_str = redis_url:match("redis://([^:/]+):(%d+)") -if not REDIS_HOST then REDIS_HOST = redis_url:match("redis://([^:/]+)") or "127.0.0.1" end +local url = os.getenv("REDIS_URL") or "redis://127.0.0.1:6379" +local REDIS_HOST, port_str = url:match("redis://([^:/]+):(%d+)") +if not REDIS_HOST then REDIS_HOST = url:match("redis://([^:/]+)") or "127.0.0.1" end local REDIS_PORT = tonumber(port_str) or 6379 local POD_NS = os.getenv("POD_NAMESPACE") or "" local ip = ngx.var.remote_addr local is_api = ngx.var.uri:sub(1, 5) == "/api/" +-- Build 4 prefix candidates for ip e.g. '203.0.113.42': +-- '203.', '203.0.', '203.0.113.', '203.0.113.42' +-- Mirrors Django's _is_ip_prefix_blocked normalisation. +local parts = {} +for p in ip:gmatch("[^.]+") do parts[#parts+1] = p end +local p1 = parts[1] .. "." +local p2 = parts[1] .. "." .. parts[2] .. "." +local p3 = parts[1] .. "." .. parts[2] .. "." .. parts[3] .. "." + local function return_429() ngx.status = 429 ngx.header["Retry-After"] = "300" @@ -28,51 +35,19 @@ local function return_429() return ngx.exit(429) end --- 1. Bot UA check (nginx map variable — no I/O). -if ngx.var.bot_ua_blocked == "1" then return return_429() end - --- 2. ASN check via lua-resty-maxminddb (shared DB handle opened in init_by_lua_block). -local BLOCKED_ASNS = { - [16509] = true, -- Amazon AWS - [14618] = true, -- Amazon AWS us-east - [8075] = true, -- Microsoft Azure - [396982]= true, -- Google Cloud - [20473] = true, -- Vultr - [24940] = true, -- Hetzner - [16276] = true, -- OVH - [36352] = true, -- ColoCrossing - [63949] = true, -- Linode / Akamai -} -local ok_mmdb, mmdb = pcall(require, "resty.maxminddb") -if ok_mmdb and mmdb.initted() then - local result = mmdb.lookup(ip) - if result and BLOCKED_ASNS[result.autonomous_system_number] then - return return_429() - end -end - --- Build 4 candidates for prefix check: three trailing-dot prefixes + exact IP. --- Mirrors Django's _is_ip_prefix_blocked normalisation and _refresh_ip_prefix_blocklist. -local parts = {} -for p in ip:gmatch("[^.]+") do parts[#parts+1] = p end -local p1 = parts[1] .. "." -local p2 = parts[1] .. "." .. parts[2] .. "." -local p3 = parts[1] .. "." .. parts[2] .. "." .. parts[3] .. "." - --- 3. IP prefix check (in-process shared dict — no Redis I/O per request). +-- 1. Prefix check (shared dict — zero Redis I/O per request). local dict = ngx.shared.ip_prefix_blocked if dict:get(p1) or dict:get(p2) or dict:get(p3) or dict:get(ip) then return return_429() end --- 4+5. Pipeline both STRING block checks in one Redis round trip. +-- 2+3. Pipeline both STRING block checks in one Redis round trip. local red = require("resty.redis"):new() red:set_timeout(200) local ok = red:connect(REDIS_HOST, REDIS_PORT) if not ok then return end -- fail-open red:select(1) - red:init_pipeline() red:get("rl:ip:" .. ip .. ":blocked") red:get("rl:api:ns:" .. POD_NS .. ":ip:" .. ip .. ":blocked") diff --git a/docker/config/nginx/nginx.conf b/docker/config/nginx/nginx.conf index c0f8f70f4..1b37b219a 100644 --- a/docker/config/nginx/nginx.conf +++ b/docker/config/nginx/nginx.conf @@ -1,15 +1,14 @@ -# OpenResty configuration — replaces the previous nginx + libnginx-mod-http-geoip2 stack. -# ASN-based blocking moved to blocklist.lua using lua-resty-maxminddb (pure Lua, no C module). +load_module modules/ngx_http_geoip2_module.so; # Make POD_NAMESPACE and Redis URL available to Lua. env POD_NAMESPACE; env REDIS_URL; -user www-data; +user www-data nginx; worker_processes 1; -error_log /var/log/openresty/error.log warn; -pid /var/run/openresty.pid; +error_log /var/log/nginx/error.log warn; +pid /var/run/nginx.pid; events { @@ -18,7 +17,7 @@ events { http { - include /usr/local/openresty/nginx/conf/mime.types; + include /etc/nginx/mime.types; default_type application/octet-stream; # ---------------------------------------------------------------- @@ -36,7 +35,7 @@ http { '"$http_user_agent" "$http_x_forwarded_for" ' 'rt=$request_time'; - access_log /var/log/openresty/access.log main; + access_log /var/log/nginx/access.log main; # ---------------------------------------------------------------- # FIX: kernel bypass — was off (disables zero-copy file serving) @@ -70,7 +69,29 @@ http { limit_req_zone $binary_remote_addr zone=sapl_heavy:10m rate=10r/m; # ---------------------------------------------------------------- - # Bot blocking by User-Agent (nginx map — no module required). + # ASN-Based Blocking (datacenter / scraper ASNs). + # Requires libnginx-mod-http-geoip2 and GeoLite2-ASN.mmdb. + # ---------------------------------------------------------------- + geoip2 /etc/nginx/geoip/GeoLite2-ASN.mmdb { + $geoip2_asn_number autonomous_system_number; + $geoip2_asn_org autonomous_system_organization; + } + + map $geoip2_asn_number $bot_asn { + default 0; + 16509 1; # Amazon AWS + 14618 1; # Amazon AWS us-east + 8075 1; # Microsoft Azure + 396982 1; # Google Cloud + 20473 1; # Vultr + 24940 1; # Hetzner + 16276 1; # OVH + 36352 1; # ColoCrossing + 63949 1; # Linode / Akamai + } + + # ---------------------------------------------------------------- + # Bot blocking by User-Agent. # Chrome/98.0.4758 is a confirmed scraper (no real user runs a # 2022 browser version in 2026). Googlebot excluded for SEO. # ---------------------------------------------------------------- @@ -91,29 +112,22 @@ http { } # ---------------------------------------------------------------- - # OpenResty: open MaxMind ASN DB once in master (workers inherit fd). - # ASN-based blocking runs in blocklist.lua via lua-resty-maxminddb. + # Lua: search path for Debian-packaged resty.* libraries. + # lua-resty-redis installs to /usr/share/lua/5.1/resty/redis.lua. # ---------------------------------------------------------------- - init_by_lua_block { - local ok, mmdb = pcall(require, "resty.maxminddb") - if ok then - local db_path = "/usr/local/openresty/nginx/conf/geoip/GeoLite2-ASN.mmdb" - pcall(function() mmdb.init(db_path) end) - end - } + lua_package_path '/usr/share/lua/5.1/?.lua;;'; # ---------------------------------------------------------------- - # OpenResty: shared dict for IP-prefix deny list (refreshed every 60s). + # Shared dict for IP-prefix deny list (refreshed every 60s). # 1 MB holds ~10,000 prefix entries with overhead to spare. # ---------------------------------------------------------------- lua_shared_dict ip_prefix_blocked 1m; # ---------------------------------------------------------------- - # OpenResty: background timer populates ip_prefix_blocked from Redis. - # Runs once per worker process at startup, then every 60s. + # Background timer: populates ip_prefix_blocked from Redis DB 1. + # Runs once per worker at startup, then every 60s. # ---------------------------------------------------------------- init_worker_by_lua_block { - -- Parse REDIS_URL (redis://host:port or redis://host:port/db). local url = os.getenv("REDIS_URL") or "redis://127.0.0.1:6379" local REDIS_HOST, port_str = url:match("redis://([^:/]+):(%d+)") if not REDIS_HOST then REDIS_HOST = url:match("redis://([^:/]+)") or "127.0.0.1" end @@ -136,7 +150,6 @@ http { local dict = ngx.shared.ip_prefix_blocked dict:flush_all() for _, m in ipairs(members) do - -- Normalise: strip trailing dot, re-add unless it's a full dotted-quad. local stripped = m:gsub("%.$", "") local key = (select(2, stripped:gsub("%.", "")) < 3) and (stripped .. ".") or stripped @@ -156,5 +169,5 @@ http { gzip_types text/plain text/css text/javascript application/javascript application/x-javascript text/xml application/xml application/rss+xml image/gif image/png image/x-icon image/jpeg image/svg+xml; gzip_vary on; - include /usr/local/openresty/nginx/conf/conf.d/*.conf; + include /etc/nginx/conf.d/*.conf; } diff --git a/docker/config/nginx/sapl.conf b/docker/config/nginx/sapl.conf index bae13ad00..9364650f4 100644 --- a/docker/config/nginx/sapl.conf +++ b/docker/config/nginx/sapl.conf @@ -17,11 +17,24 @@ server { client_max_body_size 4G; # ---------------------------------------------------------------- - # OpenResty Lua blocklist: ASN block, prefix block, and Redis-backed - # IP/API blocks — all evaluated before reaching Gunicorn. - # UA block is also enforced here via $bot_ua_blocked map variable. + # Block known scraper ASNs (datacenter traffic) — zero Python cost. # ---------------------------------------------------------------- - access_by_lua_file /usr/local/openresty/nginx/conf/blocklist.lua; + if ($bot_asn = 1) { + return 429 "Too Many Requests"; + } + + # ---------------------------------------------------------------- + # Block known bots by User-Agent — zero Python cost. + # ---------------------------------------------------------------- + if ($bot_ua_blocked = 1) { + return 429 "Too Many Requests"; + } + + # ---------------------------------------------------------------- + # Redis-backed IP blocklist (Lua): prefix SET, global IP block, + # and per-tenant API block — checked before reaching Gunicorn. + # ---------------------------------------------------------------- + access_by_lua_file /etc/nginx/blocklist.lua; # ---------------------------------------------------------------- # robots.txt served directly by nginx. diff --git a/docker/startup_scripts/start.sh b/docker/startup_scripts/start.sh index 96143cd70..b2fc83f11 100755 --- a/docker/startup_scripts/start.sh +++ b/docker/startup_scripts/start.sh @@ -345,12 +345,11 @@ start_services() { log "Starting gunicorn..." gunicorn -c gunicorn.conf.py & log "Applying nginx config (burst: general=${NGINX_BURST_GENERAL} media=${NGINX_BURST_MEDIA} api=${NGINX_BURST_API} heavy=${NGINX_BURST_HEAVY})..." - OR_CONF=/usr/local/openresty/nginx/conf envsubst '${NGINX_BURST_GENERAL} ${NGINX_BURST_MEDIA} ${NGINX_BURST_API} ${NGINX_BURST_HEAVY}' \ - < $OR_CONF/conf.d/sapl.conf.template \ - > $OR_CONF/conf.d/sapl.conf - log "Starting OpenResty..." - exec /usr/local/openresty/nginx/sbin/nginx -g "daemon off;" + < /etc/nginx/conf.d/sapl.conf.template \ + > /etc/nginx/conf.d/sapl.conf + log "Starting nginx..." + exec /usr/sbin/nginx -g "daemon off;" } main() {