load_module modules/ngx_http_geoip2_module.so; user www-data nginx; worker_processes 1; error_log /var/log/nginx/error.log warn; pid /var/run/nginx.pid; events { worker_connections 1024; } http { include /etc/nginx/mime.types; default_type application/octet-stream; # ---------------------------------------------------------------- # Real client IP extracted from X-Forwarded-For set by K8s Ingress. # ---------------------------------------------------------------- real_ip_header X-Forwarded-For; real_ip_recursive on; set_real_ip_from 10.0.0.0/8; set_real_ip_from 172.16.0.0/12; set_real_ip_from 192.168.0.0/16; set_real_ip_from 127.0.0.1; log_format main '$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for" ' 'rt=$request_time'; access_log /var/log/nginx/access.log main; # ---------------------------------------------------------------- # FIX: kernel bypass โ€” was off (disables zero-copy file serving) # ---------------------------------------------------------------- sendfile on; tcp_nopush on; tcp_nodelay on; # ---------------------------------------------------------------- # Timeouts โ€” reduced from 300s to prevent bots holding threads. # Per-location overrides in sapl.conf handle legitimate slow ops. # ---------------------------------------------------------------- keepalive_timeout 75; # was 300 proxy_connect_timeout 10s; proxy_read_timeout 120s; # was 300s โ€” overridden per-location proxy_send_timeout 120s; # ---------------------------------------------------------------- # Rate limiting zones (effective once real_ip is resolved). # sapl_general : 30 req/min for most traffic # sapl_heavy : 10 req/min for PDF/report endpoints # ---------------------------------------------------------------- limit_req_zone $binary_remote_addr zone=sapl_general:20m rate=30r/m; limit_req_zone $binary_remote_addr zone=sapl_heavy:20m rate=10r/m; # ---------------------------------------------------------------- # ASN-Based Blocking (datacenter / scraper ASNs). # Requires libnginx-mod-http-geoip2 and GeoLite2-ASN.mmdb. # See rate-limiter-v2.md Phase 0 ยง3.4 for install instructions. # ---------------------------------------------------------------- geoip2 /etc/nginx/geoip/GeoLite2-ASN.mmdb { $geoip2_asn_number autonomous_system_number; $geoip2_asn_org autonomous_system_organization; } map $geoip2_asn_number $bot_asn { default 0; 16509 1; # Amazon AWS 14618 1; # Amazon AWS us-east 8075 1; # Microsoft Azure 396982 1; # Google Cloud 20473 1; # Vultr 24940 1; # Hetzner 16276 1; # OVH 36352 1; # ColoCrossing 63949 1; # Linode / Akamai } # ---------------------------------------------------------------- # Bot blocking by User-Agent. # Chrome/98.0.4758 is a confirmed scraper (no real user runs a # 2022 browser version in 2026). Googlebot excluded for SEO. # ---------------------------------------------------------------- map $http_user_agent $bot_ua_blocked { default 0; "~*GPTBot" 1; "~*ClaudeBot" 1; "~*PerplexityBot" 1; "~*Bytespider" 1; "~*AhrefsBot" 1; "~*SemrushBot" 1; "~*DotBot" 1; "~*meta-externalagent" 1; "~*OAI-SearchBot" 1; "~*Chrome/98\.0\.4758" 1; } gzip on; gzip_disable "MSIE [1-6]\\.(?!.*SV1)"; gzip_proxied any; gzip_comp_level 5; gzip_types text/plain text/css text/javascript application/javascript application/x-javascript text/xml application/xml application/rss+xml image/gif image/png image/x-icon image/jpeg image/svg+xml; gzip_vary on; include /etc/nginx/conf.d/*.conf; }