mirror of https://github.com/interlegis/sapl.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
173 lines
7.0 KiB
173 lines
7.0 KiB
load_module modules/ngx_http_geoip2_module.so;
|
|
|
|
# Make POD_NAMESPACE and Redis URL available to Lua.
|
|
env POD_NAMESPACE;
|
|
env REDIS_URL;
|
|
|
|
user www-data nginx;
|
|
worker_processes 1;
|
|
|
|
error_log /var/log/nginx/error.log warn;
|
|
pid /var/run/nginx.pid;
|
|
|
|
|
|
events {
|
|
worker_connections 1024;
|
|
}
|
|
|
|
|
|
http {
|
|
include /etc/nginx/mime.types;
|
|
default_type application/octet-stream;
|
|
|
|
# ----------------------------------------------------------------
|
|
# Real client IP extracted from X-Forwarded-For set by K8s Ingress.
|
|
# ----------------------------------------------------------------
|
|
real_ip_header X-Forwarded-For;
|
|
real_ip_recursive on;
|
|
set_real_ip_from 10.0.0.0/8;
|
|
set_real_ip_from 172.16.0.0/12;
|
|
set_real_ip_from 192.168.0.0/16;
|
|
set_real_ip_from 127.0.0.1;
|
|
|
|
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
|
|
'$status $body_bytes_sent "$http_referer" '
|
|
'"$http_user_agent" "$http_x_forwarded_for" '
|
|
'rt=$request_time';
|
|
|
|
access_log /var/log/nginx/access.log main;
|
|
|
|
# ----------------------------------------------------------------
|
|
# FIX: kernel bypass — was off (disables zero-copy file serving)
|
|
# ----------------------------------------------------------------
|
|
sendfile on;
|
|
tcp_nopush on;
|
|
tcp_nodelay on;
|
|
|
|
# ----------------------------------------------------------------
|
|
# Timeouts — reduced from 300s to prevent bots holding threads.
|
|
# Per-location overrides in sapl.conf handle legitimate slow ops.
|
|
# ----------------------------------------------------------------
|
|
keepalive_timeout 75; # was 300
|
|
proxy_connect_timeout 10s;
|
|
proxy_read_timeout 120s; # was 300s — overridden per-location
|
|
proxy_send_timeout 120s;
|
|
|
|
# ----------------------------------------------------------------
|
|
# Rate limiting zones (effective once real_ip is resolved).
|
|
# sapl_general : 120 req/min — aligned with Django anon rate (RATE_LIMITER_RATE)
|
|
# sapl_media : 240 req/min — aligned with Django auth rate (RATE_LIMITER_RATE_AUTHENTICATED)
|
|
# sapl_api : 120 req/min — aligned with Django rate limiter threshold
|
|
# sapl_heavy : 10 req/min — PDF generation; slow by design
|
|
# Burst values are env-var configurable at container start (start.sh).
|
|
# ----------------------------------------------------------------
|
|
limit_req_log_level warn;
|
|
|
|
limit_req_zone $binary_remote_addr zone=sapl_general:20m rate=120r/m;
|
|
limit_req_zone $binary_remote_addr zone=sapl_media:20m rate=240r/m;
|
|
limit_req_zone $binary_remote_addr zone=sapl_api:20m rate=120r/m;
|
|
limit_req_zone $binary_remote_addr zone=sapl_heavy:10m rate=10r/m;
|
|
|
|
# ----------------------------------------------------------------
|
|
# ASN-Based Blocking (datacenter / scraper ASNs).
|
|
# Requires libnginx-mod-http-geoip2 and GeoLite2-ASN.mmdb.
|
|
# ----------------------------------------------------------------
|
|
geoip2 /etc/nginx/geoip/GeoLite2-ASN.mmdb {
|
|
$geoip2_asn_number autonomous_system_number;
|
|
$geoip2_asn_org autonomous_system_organization;
|
|
}
|
|
|
|
map $geoip2_asn_number $bot_asn {
|
|
default 0;
|
|
16509 1; # Amazon AWS
|
|
14618 1; # Amazon AWS us-east
|
|
8075 1; # Microsoft Azure
|
|
396982 1; # Google Cloud
|
|
20473 1; # Vultr
|
|
24940 1; # Hetzner
|
|
16276 1; # OVH
|
|
36352 1; # ColoCrossing
|
|
63949 1; # Linode / Akamai
|
|
}
|
|
|
|
# ----------------------------------------------------------------
|
|
# Bot blocking by User-Agent.
|
|
# Chrome/98.0.4758 is a confirmed scraper (no real user runs a
|
|
# 2022 browser version in 2026). Googlebot excluded for SEO.
|
|
# ----------------------------------------------------------------
|
|
map $http_user_agent $bot_ua_blocked {
|
|
default 0;
|
|
"~*GPTBot" 1;
|
|
"~*ClaudeBot" 1;
|
|
"~*PerplexityBot" 1;
|
|
"~*Bytespider" 1;
|
|
"~*AhrefsBot" 1;
|
|
"~*SemrushBot" 1;
|
|
"~*DotBot" 1;
|
|
"~*meta-externalagent" 1;
|
|
"~*OAI-SearchBot" 1;
|
|
"~*bingbot" 1;
|
|
"~*SERankingBacklinksBot" 1;
|
|
"~*Chrome/98\.0\.4758" 1;
|
|
}
|
|
|
|
# ----------------------------------------------------------------
|
|
# Lua: search path for Debian-packaged resty.* libraries.
|
|
# lua-resty-redis installs to /usr/share/lua/5.1/resty/redis.lua.
|
|
# ----------------------------------------------------------------
|
|
lua_package_path '/usr/share/lua/5.1/?.lua;;';
|
|
|
|
# ----------------------------------------------------------------
|
|
# Shared dict for IP-prefix deny list (refreshed every 60s).
|
|
# 1 MB holds ~10,000 prefix entries with overhead to spare.
|
|
# ----------------------------------------------------------------
|
|
lua_shared_dict ip_prefix_blocked 1m;
|
|
|
|
# ----------------------------------------------------------------
|
|
# Background timer: populates ip_prefix_blocked from Redis DB 1.
|
|
# Runs once per worker at startup, then every 60s.
|
|
# ----------------------------------------------------------------
|
|
init_worker_by_lua_block {
|
|
local url = os.getenv("REDIS_URL") or "redis://127.0.0.1:6379"
|
|
local REDIS_HOST, port_str = url:match("redis://([^:/]+):(%d+)")
|
|
if not REDIS_HOST then REDIS_HOST = url:match("redis://([^:/]+)") or "127.0.0.1" end
|
|
local REDIS_PORT = tonumber(port_str) or 6379
|
|
local INTERVAL = 60
|
|
|
|
local function refresh(premature)
|
|
if premature then return end
|
|
local ok, red = pcall(function()
|
|
local r = require("resty.redis"):new()
|
|
r:set_timeout(500)
|
|
assert(r:connect(REDIS_HOST, REDIS_PORT))
|
|
r:select(1)
|
|
return r
|
|
end)
|
|
if ok then
|
|
local members = red:smembers("rl:ip_prefix:blocked")
|
|
red:set_keepalive(10000, 1)
|
|
if members and type(members) == "table" then
|
|
local dict = ngx.shared.ip_prefix_blocked
|
|
dict:flush_all()
|
|
for _, m in ipairs(members) do
|
|
local stripped = m:gsub("%.$", "")
|
|
local key = (select(2, stripped:gsub("%.", "")) < 3)
|
|
and (stripped .. ".") or stripped
|
|
dict:set(key, 1)
|
|
end
|
|
end
|
|
end
|
|
ngx.timer.at(INTERVAL, refresh)
|
|
end
|
|
ngx.timer.at(0, refresh)
|
|
}
|
|
|
|
gzip on;
|
|
gzip_disable "MSIE [1-6]\\.(?!.*SV1)";
|
|
gzip_proxied any;
|
|
gzip_comp_level 5;
|
|
gzip_types text/plain text/css text/javascript application/javascript application/x-javascript text/xml application/xml application/rss+xml image/gif image/png image/x-icon image/jpeg image/svg+xml;
|
|
gzip_vary on;
|
|
|
|
include /etc/nginx/conf.d/*.conf;
|
|
}
|
|
|