diff --git a/docker/Dockerfile b/docker/Dockerfile index aa9159cc1..8cd613d77 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -92,8 +92,7 @@ COPY . /var/interlegis/sapl/ # GeoLite2-ASN.mmdb is NOT downloaded at build time. # Run docker/geoip/update_geoip.sh before each build to refresh it. # The .mmdb file lives at docker/geoip/GeoLite2-ASN.mmdb (git-ignored binary). -# If the file is absent the build still succeeds but ASN-based blocking is -# disabled and nginx will emit a startup warning. +# If the file is absent the build FAILS — run update_geoip.sh first. RUN if [ "$WITH_NGINX" = "1" ]; then \ rm -f /etc/nginx/conf.d/*; \ cp docker/config/nginx/sapl.conf /etc/nginx/conf.d/sapl.conf.template; \ @@ -102,9 +101,9 @@ RUN if [ "$WITH_NGINX" = "1" ]; then \ cp docker/geoip/GeoLite2-ASN.mmdb /etc/nginx/geoip/GeoLite2-ASN.mmdb; \ echo "[geoip] GeoLite2-ASN.mmdb installed."; \ else \ - echo "[geoip] WARNING: docker/geoip/GeoLite2-ASN.mmdb not found."; \ + echo "[geoip] ERROR: docker/geoip/GeoLite2-ASN.mmdb not found."; \ echo "[geoip] Run docker/geoip/update_geoip.sh then rebuild."; \ - echo "[geoip] ASN-based blocking will be DISABLED in this image."; \ + exit 1; \ fi; \ fi diff --git a/docker/config/nginx/sapl.conf b/docker/config/nginx/sapl.conf index 9123181a8..c47fd3075 100644 --- a/docker/config/nginx/sapl.conf +++ b/docker/config/nginx/sapl.conf @@ -42,6 +42,8 @@ server { # ---------------------------------------------------------------- location /static/ { alias /var/interlegis/sapl/collected_static/; + expires 90m; + add_header Cache-Control "public, max-age=5400"; } # ---------------------------------------------------------------- @@ -62,7 +64,7 @@ server { # Internal location used exclusively by X-Accel-Redirect responses # from serve_media(). Not reachable by external clients. - location /_accel/media/ { + location /private/media/ { internal; alias /var/interlegis/sapl/media/; sendfile on; @@ -88,23 +90,6 @@ server { proxy_pass http://sapl_server; } - # ---------------------------------------------------------------- - # Upload endpoints — nginx buffers the full upload before forwarding. - # Protects workers from slow municipal-link clients uploading 150 MB. - # ---------------------------------------------------------------- - location ~* ^/(protocoloadm/criar-protocolo|materia/.*upload|norma/.*upload) { - proxy_request_buffering on; - proxy_read_timeout 180s; - proxy_send_timeout 180s; - - proxy_set_header X-Request-ID $req_id; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_set_header Host $http_host; - proxy_redirect off; - proxy_pass http://sapl_server; - } - # ---------------------------------------------------------------- # /api/ — rate limited, CORS maintained from original config. # ---------------------------------------------------------------- diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 77aca0129..25d33159f 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -62,10 +62,10 @@ services: - sapl-net sapl: - image: sapl:local -# build: -# context: ../ -# dockerfile: ./docker/Dockerfile +# image: interlegis/sapl:3.1.165-RC2 + build: + context: ../ + dockerfile: ./docker/Dockerfile container_name: sapl labels: NAME: "sapl" diff --git a/plan/RATE-LIMITER-PLAN.md b/plan/RATE-LIMITER-PLAN.md index d79ab70e7..c7111d3ba 100644 --- a/plan/RATE-LIMITER-PLAN.md +++ b/plan/RATE-LIMITER-PLAN.md @@ -121,6 +121,9 @@ graph TD |----------|--------|-----------| | Redis topology | **Single pod** (no Sentinel, no Cluster) | 65 MB of active data fits comfortably; cluster complexity not justified | | PDF caching in Redis | **No** — ETags + sendfile are sufficient | Once rate limiting + ETags are active, repeat requests become 304s with zero bytes transferred | +| HTTP conditional requests | **`ConditionalGetMiddleware` + `@condition` decorator** | `ConditionalGetMiddleware` handles ETag/304 for all views; `@condition(etag_func, last_modified_func)` on materia/norma detail views skips view execution entirely on cache hit | +| Upload endpoint special-casing (nginx) | **Removed** — fall through to `location /` | No justification for separate `limit_req` zone; `location /` with `sapl_general` covers it | +| Static asset cache policy | **90 min** (`expires 90m`, `max-age=5400`) | Conservative — safe with `collectstatic` content-hashed filenames; `immutable` not used (would require verified forever-hashed URLs) | | Rate-limit enforcement | **Django middleware** with shared Redis | No nginx image changes required; solves cross-pod consistency immediately | | `worker_max_memory_per_child` | **400 MB** | Pod limit 1600Mi, 2 workers × 400 MB = 800 MB — leaves 800 Mi headroom | | `sendfile off` → `on` | **Bug** — flip to `on` | No valid production reason found; disabling userspace copy is always correct | @@ -717,11 +720,11 @@ GET /media/foo.pdf │ ▼ serve_media(request, path='foo.pdf') - returns HttpResponse with X-Accel-Redirect: /_accel/media/foo.pdf + returns HttpResponse with X-Accel-Redirect: /private/media/foo.pdf │ ▼ nginx sees X-Accel-Redirect header - /_accel/media/ internal location → reads file from disk → sends to client + /private/media/ internal location → reads file from disk → sends to client ``` nginx does no routing beyond picking a `location` block. The mapping from @@ -740,6 +743,13 @@ response headers. ### nginx locations (`docker/config/nginx/sapl.conf`) ```nginx +# Static files — no rate limiting, no proxy; 90-minute browser cache. +location /static/ { + alias /var/interlegis/sapl/collected_static/; + expires 90m; + add_header Cache-Control "public, max-age=5400"; +} + # Proxied to Gunicorn — Django middleware + serve_media() run here. location /media/ { limit_req zone=sapl_general burst=${NGINX_BURST_GENERAL} nodelay; @@ -747,7 +757,7 @@ location /media/ { } # Internal — only reachable via X-Accel-Redirect, not by external clients. -location /_accel/media/ { +location /private/media/ { internal; alias /var/interlegis/sapl/media/; sendfile on; @@ -755,6 +765,8 @@ location /_accel/media/ { } ``` +Upload endpoints (`/protocoloadm/criar-protocolo`, `/materia/.*upload`, `/norma/.*upload`) no longer have a dedicated `location` block — they fall through to `location /` which applies the `sapl_general` zone. + ### Django view (`sapl/base/media.py`) `serve_media(request, path)` — registered at `^media/(?P.*)$` in `sapl/urls.py`. @@ -764,7 +776,7 @@ Per-request steps: 1. **Path traversal guard** — `os.path.abspath` check; raises 404 on escape. 2. **Auth gate** — `documentos_privados/` paths require an authenticated session; redirects to login otherwise. 3. **Path counter** — increments `rl:{ns}:path:{sha256}:reqs` in Redis DB 1 (TTL = `MEDIA_PATH_COUNTER_TTL`). -4. **Serve** — in DEBUG: `django.views.static.serve` directly. In production: `X-Accel-Redirect: /_accel/media/`. Nginx sets `Content-Type` from its own `mime.types`. +4. **Serve** — in DEBUG: `django.views.static.serve` directly. In production: `X-Accel-Redirect: /private/media/`. Nginx sets `Content-Type` from its own `mime.types`. ### Settings @@ -957,6 +969,56 @@ class PesquisarMateriaView(FilterView): --- +## HTTP Conditional Requests + +Two complementary mechanisms eliminate redundant work for unchanged content. + +### `ConditionalGetMiddleware` (all views) + +Added to `MIDDLEWARE` in `sapl/settings.py` (after `CommonMiddleware`). For every +Django response it: + +1. Generates a weak `ETag` from an MD5 of the response body if none is set. +2. Compares against the client's `If-None-Match` / `If-Modified-Since`. +3. Returns `304 Not Modified` (no body) on a match. +4. Handles `HEAD` requests by stripping the body and keeping headers. + +**Caveat**: the view still executes and renders before the check fires. The saving +is bandwidth, not CPU/DB work. + +### `@condition` decorator — materia and norma detail views + +For `MateriaLegislativaCrud.DetailView` and `NormaCrud.DetailView` a cheap +freshness function runs *before* the view body: + +```python +# sapl/materia/views.py +def _materia_last_modified(request, *args, **kwargs): + return MateriaLegislativa.objects.filter( + pk=kwargs['pk'] + ).values_list('data_ultima_atualizacao', flat=True).first() + +def _materia_etag(request, *args, **kwargs): + ts = _materia_last_modified(request, *args, **kwargs) + return f'{kwargs["pk"]}-{ts.timestamp()}' if ts else None + +@method_decorator(condition(etag_func=_materia_etag, last_modified_func=_materia_last_modified), name='get') +class DetailView(AnonCachePageMixin, Crud.DetailView): + ... +``` + +`NormaCrud.DetailView` follows the same pattern with `_norma_last_modified` / +`_norma_etag` querying `NormaJuridica.data_ultima_atualizacao`. + +**On a cache hit**: one `VALUES` query fires, Django returns `304` — view body, +template render, and ORM work are all skipped. + +**Signal used**: `data_ultima_atualizacao` (`auto_now=True`) — updated by Django +on every `save()`, so the ETag is invalidated automatically whenever the record +changes. + +--- + ## Open Questions | # | Question | Status | Blocks | diff --git a/sapl/base/media.py b/sapl/base/media.py index b4f8bf57a..7b9cd5d4e 100644 --- a/sapl/base/media.py +++ b/sapl/base/media.py @@ -74,7 +74,7 @@ def serve_media(request, path): # Production: tell nginx to serve the file from the internal location. # Nginx sets Content-Type from its own mime.types when serving the file. response = HttpResponse() - response['X-Accel-Redirect'] = f'/_accel/media/{path}' + response['X-Accel-Redirect'] = f'/private/media/{path}' response['Cache-Control'] = 'public, max-age=86400, stale-while-revalidate=3600' response['X-Robots-Tag'] = 'noindex' return response diff --git a/sapl/materia/views.py b/sapl/materia/views.py index 51779d3a9..f3c87a614 100644 --- a/sapl/materia/views.py +++ b/sapl/materia/views.py @@ -34,6 +34,7 @@ import weasyprint from ratelimit.decorators import ratelimit from django.utils.decorators import method_decorator +from django.views.decorators.http import condition import sapl from sapl.base.email_utils import do_envia_email_confirmacao @@ -1798,6 +1799,17 @@ class MateriaAssuntoCrud(MasterDetailCrud): return initial +def _materia_last_modified(request, *args, **kwargs): + return MateriaLegislativa.objects.filter( + pk=kwargs['pk'] + ).values_list('data_ultima_atualizacao', flat=True).first() + + +def _materia_etag(request, *args, **kwargs): + ts = _materia_last_modified(request, *args, **kwargs) + return f'{kwargs["pk"]}-{ts.timestamp()}' if ts else None + + class MateriaLegislativaCrud(Crud): model = MateriaLegislativa help_topic = 'materia_legislativa' @@ -1883,6 +1895,7 @@ class MateriaLegislativaCrud(Crud): def get_success_url(self): return self.search_url + @method_decorator(condition(etag_func=_materia_etag, last_modified_func=_materia_last_modified), name='get') class DetailView(AnonCachePageMixin, Crud.DetailView): # Materia detail pages are public, read-only, and change infrequently # once published. Cache anonymous responses for 5 minutes to absorb diff --git a/sapl/norma/views.py b/sapl/norma/views.py index e8684e233..3cfba9942 100644 --- a/sapl/norma/views.py +++ b/sapl/norma/views.py @@ -21,6 +21,7 @@ import weasyprint from ratelimit.decorators import ratelimit from django.utils.decorators import method_decorator +from django.views.decorators.http import condition from sapl import settings import sapl @@ -277,6 +278,17 @@ class NormaTaView(IntegracaoTaView): return self.get_redirect_deactivated() +def _norma_last_modified(request, *args, **kwargs): + return NormaJuridica.objects.filter( + pk=kwargs['pk'] + ).values_list('data_ultima_atualizacao', flat=True).first() + + +def _norma_etag(request, *args, **kwargs): + ts = _norma_last_modified(request, *args, **kwargs) + return f'{kwargs["pk"]}-{ts.timestamp()}' if ts else None + + class NormaCrud(Crud): model = NormaJuridica help_topic = 'norma_juridica' @@ -292,6 +304,7 @@ class NormaCrud(Crud): namespace = self.model._meta.app_config.name return reverse('%s:%s' % (namespace, 'norma_pesquisa')) + @method_decorator(condition(etag_func=_norma_etag, last_modified_func=_norma_last_modified), name='get') class DetailView(Crud.DetailView): def get(self, request, *args, **kwargs): estatisticas_acesso_normas = AppConfig.objects.first().estatisticas_acesso_normas diff --git a/sapl/settings.py b/sapl/settings.py index 6dcd31443..4137b2f35 100644 --- a/sapl/settings.py +++ b/sapl/settings.py @@ -34,7 +34,7 @@ PROJECT_DIR = Path(__file__).ancestor(2) # SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY = config('SECRET_KEY', default='32jk1h412l3kjh421lkj4hlkj234') # SECURITY WARNING: don't run with debug turned on in production! -DEBUG = config('DEBUG', default=True, cast=bool) +DEBUG = config('DEBUG', default=False, cast=bool) MESSAGE_STORAGE = 'django.contrib.messages.storage.session.SessionStorage' @@ -43,7 +43,7 @@ ALLOWED_HOSTS = ['*'] LOGIN_REDIRECT_URL = '/' LOGIN_URL = '/login/?next=' -SAPL_VERSION = '3.1.164-RC5' +SAPL_VERSION = '3.1.165-RC2' if DEBUG: EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend' @@ -143,6 +143,7 @@ MIDDLEWARE = [ 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.locale.LocaleMiddleware', 'django.middleware.common.CommonMiddleware', + 'django.middleware.http.ConditionalGetMiddleware', 'sapl.middleware.endpoint_restriction.EndpointRestrictionMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware',