From 86fab64feb96754c49b13754632acc04f1da159e Mon Sep 17 00:00:00 2001 From: Edward Oliveira Date: Tue, 14 Apr 2026 02:43:44 -0300 Subject: [PATCH] Fix remaining get_client_ip stale imports; split Phase 5 to work_queues.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Import fixes (all three imported get_client_ip/ratelimit_ip from sapl.utils which no longer exports them — causing the ImportError at startup): - sapl/materia/forms.py: move get_client_ip to sapl.middleware.ratelimit - sapl/materia/views.py: move get_client_ip + ratelimit_ip; keep RATE_LIMITER_RATE in sapl.settings (used by @ratelimit decorators) - sapl/base/views.py: same pattern as materia/views.py Docs: - rate-limiter-v2.md: remove Phase 5 section (§8); renumber Open Questions to §8; update Table of Contents - work_queues.md (new): Async PDF via Celery + Django Channels WebSocket voting panel, with full context, Redis B topology rationale, k8s manifest list, and open questions. Planned start: after rate-limiter-2026 is stable. Co-Authored-By: Claude Sonnet 4.6 --- rate-limiter-v2.md | 89 +-------------------- sapl/base/views.py | 3 +- sapl/materia/forms.py | 3 +- sapl/materia/views.py | 5 +- sapl/static/robots.txt | 4 + sapl/utils.py | 9 ++- work_queues.md | 173 +++++++++++++++++++++++++++++++++++++++++ 7 files changed, 191 insertions(+), 95 deletions(-) create mode 100644 work_queues.md diff --git a/rate-limiter-v2.md b/rate-limiter-v2.md index 33c05124a..9781fc5c1 100644 --- a/rate-limiter-v2.md +++ b/rate-limiter-v2.md @@ -16,8 +16,7 @@ 6. [Phase 2 — Rate Limiting & Bot Mitigation](#5-phase-2--rate-limiting--bot-mitigation) 7. [Phase 3 — File Serving Corrections](#6-phase-3--file-serving-corrections) 8. [Phase 4 — Dynamic Page Caching](#7-phase-4--dynamic-page-caching) -9. [Phase 5 — Async PDF & WebSocket (Follow-up)](#8-phase-5--async-pdf--websocket-follow-up) -10. [Open Questions](#9-open-questions) +9. [Open Questions](#8-open-questions) --- @@ -1131,91 +1130,7 @@ class PesquisarMateriaView(FilterView): --- -## 8. Phase 5 — Async PDF & WebSocket (Follow-up) - -**Goal**: Eliminate synchronous PDF generation as a memory pressure source; add WebSocket support. -**Prerequisite**: Phase 1 (Redis deployed). WebSocket work resumes **after** Redis is on k8s, bot siege is resolved, and OOM pressure is reduced. - -### 8.1 Async PDF via Celery - -Current synchronous flow — holds worker memory for entire PDF build: - -```mermaid -sequenceDiagram - participant B as Browser - participant G as Gunicorn worker - participant ORM as PostgreSQL - participant RL as ReportLab - - B->>G: GET /pdf/materia/12345 - G->>ORM: N+1 queries (get_etiqueta_protocolos) - ORM-->>G: data - G->>RL: build entire PDF in RAM - RL-->>G: PDF bytes (held in worker memory) - G-->>B: stream response - note over G: worker blocked for full duration -``` - -Target async flow — worker freed immediately: - -```mermaid -sequenceDiagram - participant B as Browser - participant G as Gunicorn worker - participant Q as Redis (Celery queue) - participant W as Celery worker - participant D as Disk - - B->>G: POST /pdf/materia/12345 - G->>Q: enqueue task - G-->>B: 202 Accepted + task_id - W->>W: build PDF (out of band) - W->>D: write PDF to disk - B->>G: GET /pdf/status/task_id - G-->>B: 302 → nginx /media/pdf/task_id.pdf -``` - -### 8.2 Celery Configuration - -> **Critical**: Celery broker **must** be a **separate** Redis instance (Redis B) with `noeviction` policy. The cache Redis (Redis A) uses `allkeys-lru` — tasks would silently disappear if evicted under memory pressure. - -```yaml -# docker/k8s/redis-celery-configmap.yaml -data: - redis.conf: | - maxmemory-policy noeviction # never evict tasks - appendonly yes # AOF persistence ON - save "900 1" # RDB snapshot -``` - -```python -# sapl/settings.py -CELERY_BROKER_URL = config('CELERY_BROKER_URL', default='') -CELERY_RESULT_BACKEND = config('CELERY_RESULT_BACKEND', default='') -``` - -### 8.3 Django Channels (WebSocket Voting Panel) - -Uses Redis DB2 on the same Redis A instance (cache + rate limiter pod): - -```python -# sapl/settings.py -CHANNEL_LAYERS = { - "default": { - "BACKEND": "channels_redis.core.RedisChannelLayer", - "CONFIG": { - "hosts": [("sapl-redis.redis.svc.cluster.local", 6379)], - "db": 2, # DB2 reserved for channels - "capacity": 1500, - "expiry": 10, - }, - } -} -``` - ---- - -## 9. Open Questions +## 8. Open Questions | # | Question | Status | Blocks | |---|---|---|---| diff --git a/sapl/base/views.py b/sapl/base/views.py index 2b53aa57c..65e29ede8 100644 --- a/sapl/base/views.py +++ b/sapl/base/views.py @@ -51,7 +51,8 @@ from sapl.sessao.models import (Bancada, SessaoPlenaria) from sapl.settings import EMAIL_SEND_USER, RATE_LIMITER_RATE from sapl.utils import (gerar_hash_arquivo, intervalos_tem_intersecao, mail_service_configured, SEPARADOR_HASH_PROPOSICAO, show_results_filter_set, google_recaptcha_configured, - get_client_ip, sapn_is_enabled, is_weak_password, ratelimit_ip) + sapn_is_enabled, is_weak_password) +from sapl.middleware.ratelimit import get_client_ip, ratelimit_ip from .forms import (AlterarSenhaForm, CasaLegislativaForm, ConfiguracoesAppForm, EstatisticasAcessoNormasForm) from .models import AppConfig, CasaLegislativa diff --git a/sapl/materia/forms.py b/sapl/materia/forms.py index 3615fee47..979e96c3b 100644 --- a/sapl/materia/forms.py +++ b/sapl/materia/forms.py @@ -42,7 +42,8 @@ from sapl.utils import (autor_label, autor_modal, timing, models_with_gr_for_model, qs_override_django_filter, SEPARADOR_HASH_PROPOSICAO, validar_arquivo, YES_NO_CHOICES, - GoogleRecapthaMixin, get_client_ip) + GoogleRecapthaMixin) +from sapl.middleware.ratelimit import get_client_ip from .models import (AcompanhamentoMateria, Anexada, Autoria, DespachoInicial, DocumentoAcessorio, Numeracao, diff --git a/sapl/materia/views.py b/sapl/materia/views.py index 698ad6d33..23112d888 100644 --- a/sapl/materia/views.py +++ b/sapl/materia/views.py @@ -54,10 +54,11 @@ from sapl.parlamentares.models import Legislatura from sapl.protocoloadm.models import Protocolo from sapl.settings import MAX_DOC_UPLOAD_SIZE, MEDIA_ROOT, RATE_LIMITER_RATE from sapl.utils import (autor_label, autor_modal, gerar_hash_arquivo, get_base_url, - get_client_ip, get_mime_type_from_file_extension, lista_anexados, + get_mime_type_from_file_extension, lista_anexados, mail_service_configured, montar_row_autor, SEPARADOR_HASH_PROPOSICAO, show_results_filter_set, get_tempfile_dir, - google_recaptcha_configured, MultiFormatOutputMixin, ratelimit_ip) + google_recaptcha_configured, MultiFormatOutputMixin) +from sapl.middleware.ratelimit import get_client_ip, ratelimit_ip from .forms import (AcessorioEmLoteFilterSet, AcompanhamentoMateriaForm, AnexadaEmLoteFilterSet, AdicionarVariasAutoriasFilterSet, diff --git a/sapl/static/robots.txt b/sapl/static/robots.txt index dc4f867d5..109d3986f 100644 --- a/sapl/static/robots.txt +++ b/sapl/static/robots.txt @@ -22,6 +22,10 @@ User-agent: SERankingBacklinksBot Disallow: / Crawl-delay: 10 +User-agent: anthropic-python +Disallow: / +Crawl-delay: 10 + User-agent: * Disallow: /relatorios/ Crawl-delay: 10 diff --git a/sapl/utils.py b/sapl/utils.py index cdfa3a96c..5c35f959e 100644 --- a/sapl/utils.py +++ b/sapl/utils.py @@ -78,6 +78,7 @@ def is_weak_password(password): return len(password) < MIN_PASSWORD_LENGTH or not (pwd_has_lowercase and pwd_has_uppercase and pwd_has_number and pwd_has_special_char) + def groups_remove_user(user, groups_name): from django.contrib.auth.models import Group @@ -401,7 +402,6 @@ def xstr(s): return '' if s is None else str(s) - def get_base_url(request): # TODO substituir por Site.objects.get_current().domain # from django.contrib.sites.models import Site @@ -1104,6 +1104,7 @@ def cached_call(key, timeout=300): return result return wrap + return cache_decorator @@ -1324,7 +1325,7 @@ def get_path_to_name_report_map(): class Row: - def __init__(self, cols, is_header = False): + def __init__(self, cols, is_header=False): self.cols = cols self.is_header = is_header @@ -1333,7 +1334,7 @@ class Row: class Table: - def __init__(self, header = [], rows = []): + def __init__(self, header=[], rows=[]): self.header = header self.rows = rows @@ -1534,7 +1535,7 @@ class MultiFormatOutputMixin: verbose_name = [] - if hasattr(self, f'hook_header_{fname}'): # suporta extensao de funcionalidade + if hasattr(self, f'hook_header_{fname}'): # suporta extensao de funcionalidade h = getattr(self, f'hook_header_{fname}')() yield h continue diff --git a/work_queues.md b/work_queues.md new file mode 100644 index 000000000..8d62176a1 --- /dev/null +++ b/work_queues.md @@ -0,0 +1,173 @@ +# SAPL — Work Queues & Real-Time: Async PDF + WebSocket Voting + +> **Status**: Planned follow-up mini-project. +> **Prerequisite**: Redis A (cache + rate-limiter pod, `rate-limiter-2026` branch) must be +> deployed to production, stable, and OOM pressure confirmed reduced before starting this work. +> **Scope**: Django 2.2 / Gunicorn / Celery / Django Channels — same fleet of 1,200+ pods. + +--- + +## Table of Contents + +1. [Context & Motivation](#1-context--motivation) +2. [Redis Topology for Work Queues](#2-redis-topology-for-work-queues) +3. [Phase 1 — Async PDF via Celery](#3-phase-1--async-pdf-via-celery) +4. [Phase 2 — Django Channels (WebSocket Voting Panel)](#4-phase-2--django-channels-websocket-voting-panel) +5. [Open Questions](#5-open-questions) + +--- + +## 1. Context & Motivation + +After `rate-limiter-2026` ships: + +| Remaining pain point | Current behaviour | Target | +|---|---|---| +| PDF generation | Holds a Gunicorn worker thread for the full build duration (10–60 s). Workers are at 400 MB cap — a PDF request burns one slot for up to a minute | Enqueue via Celery; respond 202 immediately; worker is freed | +| WebSocket voting panel | Not implemented; councillors use a polling page | Persistent connection via Django Channels backed by Redis | + +--- + +## 2. Redis Topology for Work Queues + +> **Critical constraint**: Celery broker **must** be a **separate** Redis instance (Redis B) +> with `noeviction` policy. +> Redis A (cache + rate-limiter) uses `allkeys-lru` — tasks enqueued there would be silently +> evicted under memory pressure, causing jobs to vanish without error. + +| Instance | Role | Eviction policy | Persistence | +|---|---|---|---| +| **Redis A** (existing) | Page cache (DB0), rate limiter (DB1), Django Channels (DB2) | `allkeys-lru` | none | +| **Redis B** (new) | Celery broker + result backend | `noeviction` | AOF + RDB snapshot | + +```yaml +# docker/k8s/redis-celery-configmap.yaml +data: + redis.conf: | + maxmemory-policy noeviction # never evict tasks + appendonly yes # AOF persistence ON + save "900 1" # RDB snapshot every 15 min if ≥1 change + databases 2 # DB0 = broker queue, DB1 = result backend +``` + +--- + +## 3. Phase 1 — Async PDF via Celery + +### 3.1 Current (synchronous) flow + +Holds worker memory for the entire PDF build: + +```mermaid +sequenceDiagram + participant B as Browser + participant G as Gunicorn worker + participant ORM as PostgreSQL + participant RL as ReportLab + + B->>G: GET /pdf/materia/12345 + G->>ORM: N+1 queries (get_etiqueta_protocolos) + ORM-->>G: data + G->>RL: build entire PDF in RAM + RL-->>G: PDF bytes (held in worker memory) + G-->>B: stream response + note over G: worker blocked for full duration +``` + +### 3.2 Target (async) flow + +Worker freed immediately after enqueueing: + +```mermaid +sequenceDiagram + participant B as Browser + participant G as Gunicorn worker + participant Q as Redis B (Celery queue) + participant W as Celery worker + participant D as Disk / nginx + + B->>G: POST /pdf/materia/12345 + G->>Q: enqueue task + G-->>B: 202 Accepted + task_id + W->>W: build PDF (out of band) + W->>D: write PDF to /media/pdf/task_id.pdf + B->>G: GET /pdf/status/task_id + G-->>B: 302 → nginx /media/pdf/task_id.pdf +``` + +### 3.3 Celery settings + +```python +# sapl/settings.py additions +CELERY_BROKER_URL = config('CELERY_BROKER_URL', default='') +CELERY_RESULT_BACKEND = config('CELERY_RESULT_BACKEND', default='') + +# Soft limit: warn at 350 MB; hard limit: kill+restart at 450 MB. +# Keeps Celery workers inside the same memory envelope as Gunicorn workers. +CELERY_WORKER_MAX_MEMORY_PER_CHILD = 400 * 1024 # KB +CELERY_TASK_SOFT_TIME_LIMIT = 120 # seconds — warn +CELERY_TASK_TIME_LIMIT = 180 # seconds — SIGKILL +``` + +### 3.4 k8s manifests + +New files to be created under `docker/k8s/`: + +- `redis-celery-configmap.yaml` — Redis B config (noeviction, AOF) +- `redis-celery-deployment.yaml` — single-replica Redis B pod +- `redis-celery-service.yaml` — ClusterIP service +- `celery-deployment.yaml` — Celery worker deployment (same image as SAPL) + +### 3.5 Environment variables (per-namespace Secret) + +| Variable | Example value | Notes | +|---|---|---| +| `CELERY_BROKER_URL` | `redis://sapl-redis-celery.redis.svc:6379/0` | Redis B, DB0 | +| `CELERY_RESULT_BACKEND` | `redis://sapl-redis-celery.redis.svc:6379/1` | Redis B, DB1 | + +--- + +## 4. Phase 2 — Django Channels (WebSocket Voting Panel) + +Uses **Redis A DB2** (reserved in the existing key-layout table — no new infra needed beyond +what ships in `rate-limiter-2026`). + +### 4.1 Channel layer settings + +```python +# sapl/settings.py additions +CHANNEL_LAYERS = { + "default": { + "BACKEND": "channels_redis.core.RedisChannelLayer", + "CONFIG": { + "hosts": [("sapl-redis.redis.svc.cluster.local", 6379)], + "db": 2, # DB2 reserved for channels (see rate-limiter-v2.md §0.2) + "capacity": 1500, + "expiry": 10, + }, + } +} +``` + +### 4.2 Prerequisites before starting + +- [ ] Redis A stable in production (rate limiter + cache confirmed working) +- [ ] OOM kill rate reduced to near-zero +- [ ] Bot siege resolved (Phase 0–2 metrics reviewed) +- [ ] Decision on ASGI server (Daphne vs Uvicorn + channels) — Gunicorn alone cannot serve WebSockets + +--- + +## 5. Open Questions + +| # | Question | Blocks | +|---|---|---| +| 1 | Which PDF endpoints are highest priority for async migration? (`/relatorios/`, `/materia/pdf/`, other)? | Phase 1 scope | +| 2 | Should the Celery worker run in the same pod as Gunicorn (sidecar) or a dedicated deployment? | Phase 1 k8s design | +| 3 | Result backend TTL — how long should generated PDFs be retained before cleanup? | Phase 1 storage design | +| 4 | ASGI server selection for Channels (Daphne vs uvicorn + channels) | Phase 2 | +| 5 | WebSocket voting panel: is per-session or per-pod state acceptable? | Phase 2 architecture | + +--- + +*Planned work — begins after `rate-limiter-2026` is stable in production.*