load_module modules/ngx_http_geoip2_module.so; load_module modules/ngx_http_lua_module.so; # Make POD_NAMESPACE and Redis URL available to Lua. env POD_NAMESPACE; env REDIS_URL; user www-data nginx; worker_processes 1; error_log /var/log/nginx/error.log warn; pid /var/run/nginx.pid; events { worker_connections 1024; } http { include /etc/nginx/mime.types; default_type application/octet-stream; # ---------------------------------------------------------------- # Real client IP extracted from X-Forwarded-For set by K8s Ingress. # ---------------------------------------------------------------- real_ip_header X-Forwarded-For; real_ip_recursive on; set_real_ip_from 10.0.0.0/8; set_real_ip_from 172.16.0.0/12; set_real_ip_from 192.168.0.0/16; set_real_ip_from 127.0.0.1; log_format main '$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for" ' 'rt=$request_time'; access_log /var/log/nginx/access.log main; # ---------------------------------------------------------------- # FIX: kernel bypass — was off (disables zero-copy file serving) # ---------------------------------------------------------------- sendfile on; tcp_nopush on; tcp_nodelay on; # ---------------------------------------------------------------- # Timeouts — reduced from 300s to prevent bots holding threads. # Per-location overrides in sapl.conf handle legitimate slow ops. # ---------------------------------------------------------------- keepalive_timeout 75; # was 300 proxy_connect_timeout 10s; proxy_read_timeout 120s; # was 300s — overridden per-location proxy_send_timeout 120s; # ---------------------------------------------------------------- # Rate limiting zones (effective once real_ip is resolved). # sapl_general : 120 req/min — aligned with Django anon rate (RATE_LIMITER_RATE) # sapl_media : 240 req/min — aligned with Django auth rate (RATE_LIMITER_RATE_AUTHENTICATED) # sapl_api : 120 req/min — aligned with Django rate limiter threshold # sapl_heavy : 10 req/min — PDF generation; slow by design # Burst values are env-var configurable at container start (start.sh). # ---------------------------------------------------------------- limit_req_log_level warn; limit_req_zone $binary_remote_addr zone=sapl_general:20m rate=120r/m; limit_req_zone $binary_remote_addr zone=sapl_media:20m rate=240r/m; limit_req_zone $binary_remote_addr zone=sapl_api:20m rate=120r/m; limit_req_zone $binary_remote_addr zone=sapl_heavy:10m rate=10r/m; # ---------------------------------------------------------------- # ASN-Based Blocking (datacenter / scraper ASNs). # Requires libnginx-mod-http-geoip2 and GeoLite2-ASN.mmdb. # ---------------------------------------------------------------- geoip2 /etc/nginx/geoip/GeoLite2-ASN.mmdb { $geoip2_asn_number autonomous_system_number; $geoip2_asn_org autonomous_system_organization; } map $geoip2_asn_number $bot_asn { default 0; 16509 1; # Amazon AWS 14618 1; # Amazon AWS us-east 8075 1; # Microsoft Azure 396982 1; # Google Cloud 20473 1; # Vultr 24940 1; # Hetzner 16276 1; # OVH 36352 1; # ColoCrossing 63949 1; # Linode / Akamai } # ---------------------------------------------------------------- # Bot blocking by User-Agent. # Chrome/98.0.4758 is a confirmed scraper (no real user runs a # 2022 browser version in 2026). Googlebot excluded for SEO. # ---------------------------------------------------------------- map $http_user_agent $bot_ua_blocked { default 0; "~*GPTBot" 1; "~*ClaudeBot" 1; "~*PerplexityBot" 1; "~*Bytespider" 1; "~*AhrefsBot" 1; "~*SemrushBot" 1; "~*DotBot" 1; "~*meta-externalagent" 1; "~*OAI-SearchBot" 1; "~*bingbot" 1; "~*SERankingBacklinksBot" 1; "~*Chrome/98\.0\.4758" 1; } # ---------------------------------------------------------------- # Lua: search path for vendored resty.* libraries. # lua-resty-redis is not in Debian repos; vendored at docker/config/nginx/resty_redis.lua # and copied to /usr/lib/lua/resty/redis.lua at image build time. # ---------------------------------------------------------------- lua_package_path '/usr/lib/lua/?.lua;;'; # ---------------------------------------------------------------- # Shared dict for IP-prefix deny list (refreshed every 60s). # 1 MB holds ~10,000 prefix entries with overhead to spare. # ---------------------------------------------------------------- lua_shared_dict ip_prefix_blocked 1m; # ---------------------------------------------------------------- # Background timer: populates ip_prefix_blocked from Redis DB 1. # Runs once per worker at startup, then every 60s. # ---------------------------------------------------------------- init_worker_by_lua_block { local url = os.getenv("REDIS_URL") or "redis://127.0.0.1:6379" local REDIS_HOST, port_str = url:match("redis://([^:/]+):(%d+)") if not REDIS_HOST then REDIS_HOST = url:match("redis://([^:/]+)") or "127.0.0.1" end local REDIS_PORT = tonumber(port_str) or 6379 local INTERVAL = 60 local function refresh(premature) if premature then return end local ok, red = pcall(function() local r = require("resty.redis"):new() r:set_timeout(500) assert(r:connect(REDIS_HOST, REDIS_PORT)) r:select(1) return r end) if ok then local members = red:smembers("rl:ip_prefix:blocked") red:set_keepalive(10000, 1) if members and type(members) == "table" then local dict = ngx.shared.ip_prefix_blocked dict:flush_all() for _, m in ipairs(members) do local stripped = m:gsub("%.$", "") local key = (select(2, stripped:gsub("%.", "")) < 3) and (stripped .. ".") or stripped dict:set(key, 1) end end end ngx.timer.at(INTERVAL, refresh) end ngx.timer.at(0, refresh) } gzip on; gzip_disable "MSIE [1-6]\\.(?!.*SV1)"; gzip_proxied any; gzip_comp_level 5; gzip_types text/plain text/css text/javascript application/javascript application/x-javascript text/xml application/xml application/rss+xml image/gif image/png image/x-icon image/jpeg image/svg+xml; gzip_vary on; include /etc/nginx/conf.d/*.conf; }