From 05d705bb3746c12ddaa4c1612a4d34d4f29a264b Mon Sep 17 00:00:00 2001 From: Edward Oliveira Date: Tue, 14 Apr 2026 00:10:53 -0300 Subject: [PATCH] Add bingbot and SERankingBacklinksBot to UA blocklist and robots.txt Co-Authored-By: Claude Sonnet 4.6 --- sapl/middleware/ratelimit.py | 22 ++++++++++++---------- sapl/static/robots.txt | 8 ++++++++ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/sapl/middleware/ratelimit.py b/sapl/middleware/ratelimit.py index 3d5943556..84da8802b 100644 --- a/sapl/middleware/ratelimit.py +++ b/sapl/middleware/ratelimit.py @@ -35,7 +35,9 @@ BOT_UA_FRAGMENTS = [ 'AhrefsBot', 'meta-externalagent', 'OAI-SearchBot', - 'Chrome/98.0.4758', # known scraper impersonating an old Chrome + 'bingbot', + 'SERankingBacklinksBot', + 'Chrome/98.0.4758', # known scraper impersonating an old Chrome ] _INCR_LUA = """ @@ -51,8 +53,8 @@ def _sha256(s): def _get_ip(request): return ( - request.META.get('HTTP_X_FORWARDED_FOR', '').split(',')[0].strip() - or request.META.get('REMOTE_ADDR', '') + request.META.get('HTTP_X_FORWARDED_FOR', '').split(',')[0].strip() + or request.META.get('REMOTE_ADDR', '') ) @@ -80,11 +82,11 @@ class RateLimitMiddleware: self.get_response = get_response self.dry_run = getattr(settings, 'RATELIMIT_DRY_RUN', True) - anon_rate = getattr(settings, 'RATE_LIMITER_RATE', '35/m') - auth_rate = getattr(settings, 'RATE_LIMITER_RATE_AUTHENTICATED', '120/m') + anon_rate = getattr(settings, 'RATE_LIMITER_RATE', '35/m') + auth_rate = getattr(settings, 'RATE_LIMITER_RATE_AUTHENTICATED', '120/m') - self.anon_threshold, self.anon_window = _parse_rate(anon_rate) - self.auth_threshold, self.auth_window = _parse_rate(auth_rate) + self.anon_threshold, self.anon_window = _parse_rate(anon_rate) + self.auth_threshold, self.auth_window = _parse_rate(auth_rate) self.whitelist = set( getattr(settings, 'RATE_LIMIT_WHITELIST_IPS', []) or [] @@ -101,7 +103,7 @@ class RateLimitMiddleware: request.path, self.dry_run, extra={ - 'ua': request.META.get('HTTP_USER_AGENT', ''), + 'ua': request.META.get('HTTP_USER_AGENT', ''), 'namespace': getattr(request, 'tenant', 'unknown'), }, ) @@ -170,9 +172,9 @@ class RateLimitMiddleware: return {'action': 'block', 'reason': 'ip_rate', 'ip': ip} # Check 4c: per-namespace/IP/window (catches UA rotators behind NAT) - ns = getattr(request, 'tenant', 'global') + ns = getattr(request, 'tenant', 'global') bucket = int(time.time() // self.anon_window) - count = self._incr_with_ttl( + count = self._incr_with_ttl( f'rl:ns:{ns}:ip:{ip}:w:{bucket}', ttl=self.anon_window * 2 ) if count >= self.anon_threshold: diff --git a/sapl/static/robots.txt b/sapl/static/robots.txt index 50c5a7328..dc4f867d5 100644 --- a/sapl/static/robots.txt +++ b/sapl/static/robots.txt @@ -14,6 +14,14 @@ User-agent: OAI-SearchBot Disallow: / Crawl-delay: 10 +User-agent: bingbot +Disallow: / +Crawl-delay: 10 + +User-agent: SERankingBacklinksBot +Disallow: / +Crawl-delay: 10 + User-agent: * Disallow: /relatorios/ Crawl-delay: 10