# OpenResty configuration — replaces the previous nginx + libnginx-mod-http-geoip2 stack. # ASN-based blocking moved to blocklist.lua using lua-resty-maxminddb (pure Lua, no C module). # Make POD_NAMESPACE and Redis URL available to Lua. env POD_NAMESPACE; env REDIS_URL; user www-data; worker_processes 1; error_log /var/log/openresty/error.log warn; pid /var/run/openresty.pid; events { worker_connections 1024; } http { include /usr/local/openresty/nginx/conf/mime.types; default_type application/octet-stream; # ---------------------------------------------------------------- # Real client IP extracted from X-Forwarded-For set by K8s Ingress. # ---------------------------------------------------------------- real_ip_header X-Forwarded-For; real_ip_recursive on; set_real_ip_from 10.0.0.0/8; set_real_ip_from 172.16.0.0/12; set_real_ip_from 192.168.0.0/16; set_real_ip_from 127.0.0.1; log_format main '$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for" ' 'rt=$request_time'; access_log /var/log/openresty/access.log main; # ---------------------------------------------------------------- # FIX: kernel bypass — was off (disables zero-copy file serving) # ---------------------------------------------------------------- sendfile on; tcp_nopush on; tcp_nodelay on; # ---------------------------------------------------------------- # Timeouts — reduced from 300s to prevent bots holding threads. # Per-location overrides in sapl.conf handle legitimate slow ops. # ---------------------------------------------------------------- keepalive_timeout 75; # was 300 proxy_connect_timeout 10s; proxy_read_timeout 120s; # was 300s — overridden per-location proxy_send_timeout 120s; # ---------------------------------------------------------------- # Rate limiting zones (effective once real_ip is resolved). # sapl_general : 120 req/min — aligned with Django anon rate (RATE_LIMITER_RATE) # sapl_media : 240 req/min — aligned with Django auth rate (RATE_LIMITER_RATE_AUTHENTICATED) # sapl_api : 120 req/min — aligned with Django rate limiter threshold # sapl_heavy : 10 req/min — PDF generation; slow by design # Burst values are env-var configurable at container start (start.sh). # ---------------------------------------------------------------- limit_req_log_level warn; limit_req_zone $binary_remote_addr zone=sapl_general:20m rate=120r/m; limit_req_zone $binary_remote_addr zone=sapl_media:20m rate=240r/m; limit_req_zone $binary_remote_addr zone=sapl_api:20m rate=120r/m; limit_req_zone $binary_remote_addr zone=sapl_heavy:10m rate=10r/m; # ---------------------------------------------------------------- # Bot blocking by User-Agent (nginx map — no module required). # Chrome/98.0.4758 is a confirmed scraper (no real user runs a # 2022 browser version in 2026). Googlebot excluded for SEO. # ---------------------------------------------------------------- map $http_user_agent $bot_ua_blocked { default 0; "~*GPTBot" 1; "~*ClaudeBot" 1; "~*PerplexityBot" 1; "~*Bytespider" 1; "~*AhrefsBot" 1; "~*SemrushBot" 1; "~*DotBot" 1; "~*meta-externalagent" 1; "~*OAI-SearchBot" 1; "~*bingbot" 1; "~*SERankingBacklinksBot" 1; "~*Chrome/98\.0\.4758" 1; } # ---------------------------------------------------------------- # OpenResty: open MaxMind ASN DB once in master (workers inherit fd). # ASN-based blocking runs in blocklist.lua via lua-resty-maxminddb. # ---------------------------------------------------------------- init_by_lua_block { local ok, mmdb = pcall(require, "resty.maxminddb") if ok then local db_path = "/usr/local/openresty/nginx/conf/geoip/GeoLite2-ASN.mmdb" pcall(function() mmdb.init(db_path) end) end } # ---------------------------------------------------------------- # OpenResty: shared dict for IP-prefix deny list (refreshed every 60s). # 1 MB holds ~10,000 prefix entries with overhead to spare. # ---------------------------------------------------------------- lua_shared_dict ip_prefix_blocked 1m; # ---------------------------------------------------------------- # OpenResty: background timer populates ip_prefix_blocked from Redis. # Runs once per worker process at startup, then every 60s. # ---------------------------------------------------------------- init_worker_by_lua_block { -- Parse REDIS_URL (redis://host:port or redis://host:port/db). local url = os.getenv("REDIS_URL") or "redis://127.0.0.1:6379" local REDIS_HOST, port_str = url:match("redis://([^:/]+):(%d+)") if not REDIS_HOST then REDIS_HOST = url:match("redis://([^:/]+)") or "127.0.0.1" end local REDIS_PORT = tonumber(port_str) or 6379 local INTERVAL = 60 local function refresh(premature) if premature then return end local ok, red = pcall(function() local r = require("resty.redis"):new() r:set_timeout(500) assert(r:connect(REDIS_HOST, REDIS_PORT)) r:select(1) return r end) if ok then local members = red:smembers("rl:ip_prefix:blocked") red:set_keepalive(10000, 1) if members and type(members) == "table" then local dict = ngx.shared.ip_prefix_blocked dict:flush_all() for _, m in ipairs(members) do -- Normalise: strip trailing dot, re-add unless it's a full dotted-quad. local stripped = m:gsub("%.$", "") local key = (select(2, stripped:gsub("%.", "")) < 3) and (stripped .. ".") or stripped dict:set(key, 1) end end end ngx.timer.at(INTERVAL, refresh) end ngx.timer.at(0, refresh) } gzip on; gzip_disable "MSIE [1-6]\\.(?!.*SV1)"; gzip_proxied any; gzip_comp_level 5; gzip_types text/plain text/css text/javascript application/javascript application/x-javascript text/xml application/xml application/rss+xml image/gif image/png image/x-icon image/jpeg image/svg+xml; gzip_vary on; include /usr/local/openresty/nginx/conf/conf.d/*.conf; }