Browse Source

Add bingbot and SERankingBacklinksBot to UA blocklist and robots.txt

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
rate-limiter-2026
Edward Ribeiro 3 weeks ago
parent
commit
05d705bb37
  1. 22
      sapl/middleware/ratelimit.py
  2. 8
      sapl/static/robots.txt

22
sapl/middleware/ratelimit.py

@ -35,7 +35,9 @@ BOT_UA_FRAGMENTS = [
'AhrefsBot', 'AhrefsBot',
'meta-externalagent', 'meta-externalagent',
'OAI-SearchBot', 'OAI-SearchBot',
'Chrome/98.0.4758', # known scraper impersonating an old Chrome 'bingbot',
'SERankingBacklinksBot',
'Chrome/98.0.4758', # known scraper impersonating an old Chrome
] ]
_INCR_LUA = """ _INCR_LUA = """
@ -51,8 +53,8 @@ def _sha256(s):
def _get_ip(request): def _get_ip(request):
return ( return (
request.META.get('HTTP_X_FORWARDED_FOR', '').split(',')[0].strip() request.META.get('HTTP_X_FORWARDED_FOR', '').split(',')[0].strip()
or request.META.get('REMOTE_ADDR', '') or request.META.get('REMOTE_ADDR', '')
) )
@ -80,11 +82,11 @@ class RateLimitMiddleware:
self.get_response = get_response self.get_response = get_response
self.dry_run = getattr(settings, 'RATELIMIT_DRY_RUN', True) self.dry_run = getattr(settings, 'RATELIMIT_DRY_RUN', True)
anon_rate = getattr(settings, 'RATE_LIMITER_RATE', '35/m') anon_rate = getattr(settings, 'RATE_LIMITER_RATE', '35/m')
auth_rate = getattr(settings, 'RATE_LIMITER_RATE_AUTHENTICATED', '120/m') auth_rate = getattr(settings, 'RATE_LIMITER_RATE_AUTHENTICATED', '120/m')
self.anon_threshold, self.anon_window = _parse_rate(anon_rate) self.anon_threshold, self.anon_window = _parse_rate(anon_rate)
self.auth_threshold, self.auth_window = _parse_rate(auth_rate) self.auth_threshold, self.auth_window = _parse_rate(auth_rate)
self.whitelist = set( self.whitelist = set(
getattr(settings, 'RATE_LIMIT_WHITELIST_IPS', []) or [] getattr(settings, 'RATE_LIMIT_WHITELIST_IPS', []) or []
@ -101,7 +103,7 @@ class RateLimitMiddleware:
request.path, request.path,
self.dry_run, self.dry_run,
extra={ extra={
'ua': request.META.get('HTTP_USER_AGENT', ''), 'ua': request.META.get('HTTP_USER_AGENT', ''),
'namespace': getattr(request, 'tenant', 'unknown'), 'namespace': getattr(request, 'tenant', 'unknown'),
}, },
) )
@ -170,9 +172,9 @@ class RateLimitMiddleware:
return {'action': 'block', 'reason': 'ip_rate', 'ip': ip} return {'action': 'block', 'reason': 'ip_rate', 'ip': ip}
# Check 4c: per-namespace/IP/window (catches UA rotators behind NAT) # Check 4c: per-namespace/IP/window (catches UA rotators behind NAT)
ns = getattr(request, 'tenant', 'global') ns = getattr(request, 'tenant', 'global')
bucket = int(time.time() // self.anon_window) bucket = int(time.time() // self.anon_window)
count = self._incr_with_ttl( count = self._incr_with_ttl(
f'rl:ns:{ns}:ip:{ip}:w:{bucket}', ttl=self.anon_window * 2 f'rl:ns:{ns}:ip:{ip}:w:{bucket}', ttl=self.anon_window * 2
) )
if count >= self.anon_threshold: if count >= self.anon_threshold:

8
sapl/static/robots.txt

@ -14,6 +14,14 @@ User-agent: OAI-SearchBot
Disallow: / Disallow: /
Crawl-delay: 10 Crawl-delay: 10
User-agent: bingbot
Disallow: /
Crawl-delay: 10
User-agent: SERankingBacklinksBot
Disallow: /
Crawl-delay: 10
User-agent: * User-agent: *
Disallow: /relatorios/ Disallow: /relatorios/
Crawl-delay: 10 Crawl-delay: 10

Loading…
Cancel
Save