From adc3aa7261c7f9e3966f7b0feb18bcfc25c4d4ae Mon Sep 17 00:00:00 2001 From: Edward Oliveira Date: Tue, 14 Apr 2026 02:12:14 -0300 Subject: [PATCH] Phase 4: anonymous page caching via AnonCachePageMixin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add AnonCachePageMixin (sapl/middleware/page_cache.py) that stores full view responses in the default Redis cache for anonymous (unauthenticated) GET requests only. Authenticated users always bypass the cache so CSRF tokens and user-specific UI controls are never served stale. Applied to: - ParlamentarCrud.ListView / DetailView — TTL 600 s (changes each term) - AudienciaCrud.ListView — TTL 120 s (hearings added infrequently) - ComissaoCrud.ListView — TTL 300 s (committees change rarely) Also: - Add PAGE_CACHE_TTL_LIST/DETAIL/STABLE settings (env-configurable) - Add bingbot + SERankingBacklinksBot to nginx UA blocklist (were already in BOT_UA_FRAGMENTS / robots.txt; nginx map was the only gap) - Remove unused ratelimit/method_decorator/RATE_LIMITER_RATE imports from audiencia/views.py that crept in during Phase 2 Co-Authored-By: Claude Sonnet 4.6 --- docker/config/nginx/nginx.conf | 2 + sapl/audiencia/views.py | 9 ++-- sapl/comissoes/views.py | 5 +++ sapl/middleware/page_cache.py | 75 ++++++++++++++++++++++++++++++++++ sapl/parlamentares/views.py | 9 +++- sapl/settings.py | 12 ++++++ 6 files changed, 104 insertions(+), 8 deletions(-) create mode 100644 sapl/middleware/page_cache.py diff --git a/docker/config/nginx/nginx.conf b/docker/config/nginx/nginx.conf index ab18f7540..001411608 100644 --- a/docker/config/nginx/nginx.conf +++ b/docker/config/nginx/nginx.conf @@ -96,6 +96,8 @@ http { "~*DotBot" 1; "~*meta-externalagent" 1; "~*OAI-SearchBot" 1; + "~*bingbot" 1; + "~*SERankingBacklinksBot" 1; "~*Chrome/98\.0\.4758" 1; } diff --git a/sapl/audiencia/views.py b/sapl/audiencia/views.py index 0490c3b4b..c9df5cf8c 100755 --- a/sapl/audiencia/views.py +++ b/sapl/audiencia/views.py @@ -9,11 +9,7 @@ from sapl.crud.base import RP_DETAIL, RP_LIST, Crud, MasterDetailCrud from .forms import AudienciaForm, AnexoAudienciaPublicaForm from .models import AudienciaPublica, AnexoAudienciaPublica -from ratelimit.decorators import ratelimit -from django.utils.decorators import method_decorator - -from ..settings import RATE_LIMITER_RATE -from sapl.middleware.ratelimit import ratelimit_ip +from sapl.middleware.page_cache import AnonCachePageMixin def index(request): @@ -28,8 +24,9 @@ class AudienciaCrud(Crud): list_field_names = ['numero', 'nome', 'tipo', 'materia', 'data'] ordering = '-ano', '-numero', '-data', 'nome', 'tipo' - class ListView(Crud.ListView): + class ListView(AnonCachePageMixin, Crud.ListView): paginate_by = 10 + anon_cache_ttl = 120 # PAGE_CACHE_TTL_LIST — hearings are added infrequently def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) diff --git a/sapl/comissoes/views.py b/sapl/comissoes/views.py index 46ad5f9f4..6d4e9386a 100644 --- a/sapl/comissoes/views.py +++ b/sapl/comissoes/views.py @@ -28,6 +28,7 @@ from sapl.crud.base import (Crud, CrudAux, MasterDetailCrud, RP_LIST) from sapl.materia.models import (MateriaEmTramitacao, MateriaLegislativa, PautaReuniao, Tramitacao) +from sapl.middleware.page_cache import AnonCachePageMixin from sapl.middleware.ratelimit import ratelimit_ip from sapl.utils import show_results_filter_set @@ -174,6 +175,10 @@ class ComissaoCrud(Crud): 'data_criacao', 'data_extincao', 'ativa'] ordering = '-ativa', 'sigla' + class ListView(AnonCachePageMixin, Crud.ListView): + # Committee lists change rarely; 5-minute cache is conservative. + anon_cache_ttl = 300 # PAGE_CACHE_TTL_DETAIL + class CreateView(Crud.CreateView): form_class = ComissaoForm diff --git a/sapl/middleware/page_cache.py b/sapl/middleware/page_cache.py new file mode 100644 index 000000000..553f9632a --- /dev/null +++ b/sapl/middleware/page_cache.py @@ -0,0 +1,75 @@ +""" +AnonCachePageMixin — anonymous-only Django view response caching. + +Why anonymous-only? + - Authenticated responses include CSRF tokens and user-specific UI fragments + (edit/delete action buttons injected by SAPL's CRUD framework). Caching + those would serve stale or wrong data to other users. + - Bot traffic is entirely anonymous. A 2-minute cache converts hundreds of + identical list-view DB queries into a single one — exactly the workload + that triggers OOM in the fleet. + +How it works: + - `dispatch()` short-circuits to the normal (uncached) path for any + authenticated request. + - For anonymous GET/HEAD requests, the response is stored in the 'default' + Redis cache under a key that includes the full URL (scheme + host + path + + query string). The Django cache framework handles key construction and + TTL expiry automatically. + - HTTPS and HTTP requests are stored under separate keys (Django default). + +Usage: + + from sapl.middleware.page_cache import AnonCachePageMixin + + class MyListView(AnonCachePageMixin, ListView): + anon_cache_ttl = settings.PAGE_CACHE_TTL_LIST # 120 s + + class MyDetailView(AnonCachePageMixin, DetailView): + anon_cache_ttl = settings.PAGE_CACHE_TTL_DETAIL # 300 s + +TTL reference (see settings.PAGE_CACHE_TTL_*): + + View type Default TTL + ───────────────────────────────────────────────────── + Public list (norma, materia, sessao…) 120 s (PAGE_CACHE_TTL_LIST) + Public detail (norma, materia, sessao…) 300 s (PAGE_CACHE_TTL_DETAIL) + Stable detail (parlamentar, comissão) 600 s (PAGE_CACHE_TTL_STABLE) + +Invalidation: + The cache TTL is intentionally short (≤ 10 min) so stale content expires + on its own. Explicit invalidation is not implemented — legislative data + changes infrequently and short TTLs are acceptable. +""" + +from django.conf import settings +from django.views.decorators.cache import cache_page, never_cache +from django.utils.decorators import method_decorator + + +class AnonCachePageMixin: + """ + Cache the full view response for anonymous (unauthenticated) requests. + + Set `anon_cache_ttl` on the subclass to override the default TTL. + Authenticated requests always bypass the cache. + """ + + # Override per view class. Use settings.PAGE_CACHE_TTL_* for consistency. + anon_cache_ttl = getattr(settings, 'PAGE_CACHE_TTL_LIST', 120) + + def dispatch(self, request, *args, **kwargs): + if getattr(request, 'user', None) and request.user.is_authenticated: + # Authenticated: skip cache entirely — response may contain + # user-specific controls (CSRF token, edit/delete buttons). + handler = never_cache( + lambda req, *a, **kw: super(AnonCachePageMixin, self).dispatch(req, *a, **kw) + ) + return handler(request, *args, **kwargs) + + # Anonymous: wrap the parent dispatch in cache_page so Django stores + # the rendered response in the 'default' cache for anon_cache_ttl seconds. + handler = cache_page(self.anon_cache_ttl)( + lambda req, *a, **kw: super(AnonCachePageMixin, self).dispatch(req, *a, **kw) + ) + return handler(request, *args, **kwargs) diff --git a/sapl/parlamentares/views.py b/sapl/parlamentares/views.py index 5c35733c0..ae29ba844 100644 --- a/sapl/parlamentares/views.py +++ b/sapl/parlamentares/views.py @@ -33,6 +33,7 @@ from sapl.materia.models import Autoria, Proposicao, Relatoria from sapl.norma.models import AutoriaNorma, NormaJuridica from sapl.parlamentares.apps import AppConfig from sapl.rules import SAPL_GROUP_VOTANTE +from sapl.middleware.page_cache import AnonCachePageMixin from sapl.middleware.ratelimit import ratelimit_ip from sapl.utils import (parlamentares_ativos, show_results_filter_set) @@ -750,7 +751,9 @@ class ParlamentarCrud(Crud): 'filiacao_atual', 'ativo'] - class DetailView(Crud.DetailView): + class DetailView(AnonCachePageMixin, Crud.DetailView): + # Parlamentar profiles change only at term boundaries — 10-minute cache. + anon_cache_ttl = 600 # PAGE_CACHE_TTL_STABLE def get_template_names(self): if self.request.user.has_perm(self.permission(RP_CHANGE)): @@ -789,10 +792,12 @@ class ParlamentarCrud(Crud): """ return super(Crud.CreateView, self).form_valid(form) - class ListView(Crud.ListView): + class ListView(AnonCachePageMixin, Crud.ListView): template_name = "parlamentares/parlamentares_list.html" paginate_by = None logger = logging.getLogger(__name__) + # Full list changes only when a mandato starts/ends — 10-minute cache. + anon_cache_ttl = 600 # PAGE_CACHE_TTL_STABLE @xframe_options_exempt def get(self, request, *args, **kwargs): diff --git a/sapl/settings.py b/sapl/settings.py index a178438a1..06d9f59b0 100644 --- a/sapl/settings.py +++ b/sapl/settings.py @@ -404,6 +404,18 @@ RATE_LIMIT_WHITELIST_IPS = config( cast=lambda v: [x.strip() for x in v.split(',') if x.strip()], ) +# --------------------------------------------------------------------------- +# Anonymous page caching — AnonCachePageMixin (sapl/middleware/page_cache.py) +# TTLs apply only to anonymous (unauthenticated) GET responses. +# Authenticated users always bypass the cache (see AnonCachePageMixin). +# --------------------------------------------------------------------------- +# Public list views (norma, materia, sessao, parlamentares…) +PAGE_CACHE_TTL_LIST = config('PAGE_CACHE_TTL_LIST', default=120, cast=int) +# Public detail views — rarely mutated once published +PAGE_CACHE_TTL_DETAIL = config('PAGE_CACHE_TTL_DETAIL', default=300, cast=int) +# High-stability detail views (parlamentar, comissão) — change only each term +PAGE_CACHE_TTL_STABLE = config('PAGE_CACHE_TTL_STABLE', default=600, cast=int) + # Internationalization # https://docs.djangoproject.com/en/1.8/topics/i18n/ LANGUAGE_CODE = 'pt-br'