load_module modules/ngx_http_geoip2_module.so; user www-data nginx; worker_processes 1; error_log /var/log/nginx/error.log warn; pid /var/run/nginx.pid; events { worker_connections 1024; } http { include /etc/nginx/mime.types; default_type application/octet-stream; # ---------------------------------------------------------------- # Real client IP extracted from X-Forwarded-For set by K8s Ingress. # ---------------------------------------------------------------- real_ip_header X-Forwarded-For; real_ip_recursive on; set_real_ip_from 10.0.0.0/8; set_real_ip_from 172.16.0.0/12; set_real_ip_from 192.168.0.0/16; set_real_ip_from 127.0.0.1; log_format main '$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for" ' 'rt=$request_time'; access_log /var/log/nginx/access.log main; # ---------------------------------------------------------------- # FIX: kernel bypass — was off (disables zero-copy file serving) # ---------------------------------------------------------------- sendfile on; tcp_nopush on; tcp_nodelay on; # ---------------------------------------------------------------- # Timeouts — reduced from 300s to prevent bots holding threads. # Per-location overrides in sapl.conf handle legitimate slow ops. # ---------------------------------------------------------------- keepalive_timeout 75; # was 300 proxy_connect_timeout 10s; proxy_read_timeout 120s; # was 300s — overridden per-location proxy_send_timeout 120s; # ---------------------------------------------------------------- # Rate limiting zones (effective once real_ip is resolved). # sapl_general : 90 req/min — HTML pages (burst absorbs parallel assets) # sapl_media : 180 req/min — /media/ has its own bucket; doesn't drain general # sapl_api : 60 req/min — API quota layer is the real binding constraint # sapl_heavy : 10 req/min — PDF generation; slow by design # Burst values are env-var configurable at container start (start.sh). # ---------------------------------------------------------------- limit_req_zone $binary_remote_addr zone=sapl_general:20m rate=90r/m; limit_req_zone $binary_remote_addr zone=sapl_media:20m rate=180r/m; limit_req_zone $binary_remote_addr zone=sapl_api:20m rate=60r/m; limit_req_zone $binary_remote_addr zone=sapl_heavy:10m rate=10r/m; # ---------------------------------------------------------------- # ASN-Based Blocking (datacenter / scraper ASNs). # Requires libnginx-mod-http-geoip2 and GeoLite2-ASN.mmdb. # See rate-limiter-v2.md Phase 0 §3.4 for install instructions. # ---------------------------------------------------------------- geoip2 /etc/nginx/geoip/GeoLite2-ASN.mmdb { $geoip2_asn_number autonomous_system_number; $geoip2_asn_org autonomous_system_organization; } map $geoip2_asn_number $bot_asn { default 0; 16509 1; # Amazon AWS 14618 1; # Amazon AWS us-east 8075 1; # Microsoft Azure 396982 1; # Google Cloud 20473 1; # Vultr 24940 1; # Hetzner 16276 1; # OVH 36352 1; # ColoCrossing 63949 1; # Linode / Akamai } # ---------------------------------------------------------------- # Bot blocking by User-Agent. # Chrome/98.0.4758 is a confirmed scraper (no real user runs a # 2022 browser version in 2026). Googlebot excluded for SEO. # ---------------------------------------------------------------- map $http_user_agent $bot_ua_blocked { default 0; "~*GPTBot" 1; "~*ClaudeBot" 1; "~*PerplexityBot" 1; "~*Bytespider" 1; "~*AhrefsBot" 1; "~*SemrushBot" 1; "~*DotBot" 1; "~*meta-externalagent" 1; "~*OAI-SearchBot" 1; "~*bingbot" 1; "~*SERankingBacklinksBot" 1; "~*Chrome/98\.0\.4758" 1; } gzip on; gzip_disable "MSIE [1-6]\\.(?!.*SV1)"; gzip_proxied any; gzip_comp_level 5; gzip_types text/plain text/css text/javascript application/javascript application/x-javascript text/xml application/xml application/rss+xml image/gif image/png image/x-icon image/jpeg image/svg+xml; gzip_vary on; include /etc/nginx/conf.d/*.conf; }