From 78d12217dc86c3de480f229174370dd755b234fe Mon Sep 17 00:00:00 2001 From: Raul Adorean Date: Wed, 3 Dec 2025 11:15:54 +0000 Subject: [PATCH 1/2] Change User-Agent order If a request User-Agent will match a Bad User-Agents, the Good User-Agents are never checked. Sure, you can use bots.d/blacklist-user-agents.conf, but for something generic that you want to whitelist on all servers I consider this approach simpler. --- conf.d/globalblacklist.conf | 192 ++++++++++++++++++------------------ 1 file changed, 96 insertions(+), 96 deletions(-) diff --git a/conf.d/globalblacklist.conf b/conf.d/globalblacklist.conf index ebcfa43e91..07e9c1b1e8 100644 --- a/conf.d/globalblacklist.conf +++ b/conf.d/globalblacklist.conf @@ -146,6 +146,102 @@ map $http_user_agent $bad_bot { # END CUSTOM BLACKLISTED USER AGENTS ### DO NOT EDIT OR REMOVE THIS LINE AT ALL ### # --------------------------------------------------------------------------------- +# -------------------------------------------- +# GOOD UA User-Agent Strings We Know and Trust +# -------------------------------------------- + +# ----------------------------------------------------------------------- +# You can over-ride these in /etc/nginx/bots.d/blacklist-user-agents.conf +# by adding the same UA line there and chaning its value of 1 +# If you think GoogleBot is bad you would simply add them to +# blacklist-user-agents.conf with a value of 1 +# ----------------------------------------------------------------------- + +# START GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ### + "~*(?:\b)AdsBot-Google(?:\b)" 0; + "~*(?:\b)Applebot(?:\b)" 0; + "~*(?:\b)DoCoMo(?:\b)" 0; + "~*(?:\b)Feedfetcher-Google(?:\b)" 0; + "~*(?:\b)Google-HTTP-Java-Client(?:\b)" 0; + "~*(?:\b)Googlebot(?:\b)" 0; + "~*(?:\b)Googlebot-Image(?:\b)" 0; + "~*(?:\b)Googlebot-Mobile(?:\b)" 0; + "~*(?:\b)Googlebot-News(?:\b)" 0; + "~*(?:\b)Googlebot-Video(?:\b)" 0; + "~*(?:\b)Googlebot/Test(?:\b)" 0; + "~*(?:\b)Gravityscan(?:\b)" 0; + "~*(?:\b)Jakarta\ Commons(?:\b)" 0; + "~*(?:\b)Kraken/0.1(?:\b)" 0; + "~*(?:\b)LinkedInBot(?:\b)" 0; + "~*(?:\b)Mediapartners-Google(?:\b)" 0; + "~*(?:\b)SAMSUNG(?:\b)" 0; + "~*(?:\b)Slackbot(?:\b)" 0; + "~*(?:\b)Slackbot-LinkExpanding(?:\b)" 0; + "~*(?:\b)TwitterBot(?:\b)" 0; + "~*(?:\b)Wordpress(?:\b)" 0; + "~*(?:\b)adidxbot(?:\b)" 0; + "~*(?:\b)aolbuild(?:\b)" 0; + "~*(?:\b)bing(?:\b)" 0; + "~*(?:\b)bingbot(?:\b)" 0; + "~*(?:\b)bingpreview(?:\b)" 0; + "~*(?:\b)developers.facebook.com(?:\b)" 0; + "~*(?:\b)duckduckgo(?:\b)" 0; + "~*(?:\b)facebookexternalhit(?:\b)" 0; + "~*(?:\b)facebookplatform(?:\b)" 0; + "~*(?:\b)gsa-crawler(?:\b)" 0; + "~*(?:\b)msnbot(?:\b)" 0; + "~*(?:\b)msnbot-media(?:\b)" 0; + "~*(?:\b)slurp(?:\b)" 0; + "~*(?:\b)teoma(?:\b)" 0; + "~*(?:\b)yahoo(?:\b)" 0; +# END GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ### + +# -------------------------------------------------------- +# GOOD UA User-Agent Rate Limiting 1 - Disabled by Default +# -------------------------------------------------------- + + # TO ACTIVATE THIS RATE LIMITING Uncomment these two lines in blockbots.conf + #limit_conn bot1_connlimit 100; + #limit_req zone=bot1_reqlimitip burst=50; + +# START ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ### + "~*(?:\b)Lynx(?:\b)" 1; + "~*(?:\b)Presto(?:\b)" 1; + "~*(?:\b)Wget/1.15(?:\b)" 1; + "~*(?:\b)jetmon(?:\b)" 1; + "~*(?:\b)libwww-perl(?:\b)" 1; + "~*(?:\b)munin(?:\b)" 1; +# END ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ### + +# ------------------------------------------------------- +# GOOD UA User-Agent Rate Limiting 2 - Enabled by Default +# ------------------------------------------------------- + +# ----------------------------------------------------------------------- +# You can over-ride these in /etc/nginx/bots.d/blacklist-user-agents.conf +# by adding the same UA line there and chaning its value of 1 +# ----------------------------------------------------------------------- + +# START LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ### + "~*(?:\b)Alexa(?:\b)" 2; + "~*(?:\b)ArchiveTeam(?:\b)" 2; + "~*(?:\b)BUbiNG(?:\b)" 2; + "~*(?:\b)Baidu(?:\b)" 2; + "~*(?:\b)FlipboardProxy(?:\b)" 2; + "~*(?:\b)MSIE\ 7.0(?:\b)" 2; + "~*(?:\b)R6_CommentReader(?:\b)" 2; + "~*(?:\b)R6_FeedFetcher(?:\b)" 2; + "~*(?:\b)RED/1(?:\b)" 2; + "~*(?:\b)RPT-HTTPClient(?:\b)" 2; + "~*(?:\b)Spaidu(?:\b)" 2; + "~*(?:\b)UptimeRobot/2.0(?:\b)" 2; + "~*(?:\b)YandexBot(?:\b)" 2; + "~*(?:\b)YandexImages(?:\b)" 2; + "~*(?:\b)archive.org(?:\b)" 2; + "~*(?:\b)ia_archiver(?:\b)" 2; + "~*(?:\b)sfFeedReader/0.9(?:\b)" 2; +# END LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ### + # -------------------------------------------------- # BAD UA (User-Agent) Strings That We Block Outright # -------------------------------------------------- @@ -834,102 +930,6 @@ map $http_user_agent $bad_bot { "~*(?:\b)zgrab(?:\b)" 3; # END BAD BOTS ### DO NOT EDIT THIS LINE AT ALL ### -# -------------------------------------------- -# GOOD UA User-Agent Strings We Know and Trust -# -------------------------------------------- - -# ----------------------------------------------------------------------- -# You can over-ride these in /etc/nginx/bots.d/blacklist-user-agents.conf -# by adding the same UA line there and chaning its value of 1 -# If you think GoogleBot is bad you would simply add them to -# blacklist-user-agents.conf with a value of 1 -# ----------------------------------------------------------------------- - -# START GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ### - "~*(?:\b)AdsBot-Google(?:\b)" 0; - "~*(?:\b)Applebot(?:\b)" 0; - "~*(?:\b)DoCoMo(?:\b)" 0; - "~*(?:\b)Feedfetcher-Google(?:\b)" 0; - "~*(?:\b)Google-HTTP-Java-Client(?:\b)" 0; - "~*(?:\b)Googlebot(?:\b)" 0; - "~*(?:\b)Googlebot-Image(?:\b)" 0; - "~*(?:\b)Googlebot-Mobile(?:\b)" 0; - "~*(?:\b)Googlebot-News(?:\b)" 0; - "~*(?:\b)Googlebot-Video(?:\b)" 0; - "~*(?:\b)Googlebot/Test(?:\b)" 0; - "~*(?:\b)Gravityscan(?:\b)" 0; - "~*(?:\b)Jakarta\ Commons(?:\b)" 0; - "~*(?:\b)Kraken/0.1(?:\b)" 0; - "~*(?:\b)LinkedInBot(?:\b)" 0; - "~*(?:\b)Mediapartners-Google(?:\b)" 0; - "~*(?:\b)SAMSUNG(?:\b)" 0; - "~*(?:\b)Slackbot(?:\b)" 0; - "~*(?:\b)Slackbot-LinkExpanding(?:\b)" 0; - "~*(?:\b)TwitterBot(?:\b)" 0; - "~*(?:\b)Wordpress(?:\b)" 0; - "~*(?:\b)adidxbot(?:\b)" 0; - "~*(?:\b)aolbuild(?:\b)" 0; - "~*(?:\b)bing(?:\b)" 0; - "~*(?:\b)bingbot(?:\b)" 0; - "~*(?:\b)bingpreview(?:\b)" 0; - "~*(?:\b)developers.facebook.com(?:\b)" 0; - "~*(?:\b)duckduckgo(?:\b)" 0; - "~*(?:\b)facebookexternalhit(?:\b)" 0; - "~*(?:\b)facebookplatform(?:\b)" 0; - "~*(?:\b)gsa-crawler(?:\b)" 0; - "~*(?:\b)msnbot(?:\b)" 0; - "~*(?:\b)msnbot-media(?:\b)" 0; - "~*(?:\b)slurp(?:\b)" 0; - "~*(?:\b)teoma(?:\b)" 0; - "~*(?:\b)yahoo(?:\b)" 0; -# END GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ### - -# -------------------------------------------------------- -# GOOD UA User-Agent Rate Limiting 1 - Disabled by Default -# -------------------------------------------------------- - - # TO ACTIVATE THIS RATE LIMITING Uncomment these two lines in blockbots.conf - #limit_conn bot1_connlimit 100; - #limit_req zone=bot1_reqlimitip burst=50; - -# START ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ### - "~*(?:\b)Lynx(?:\b)" 1; - "~*(?:\b)Presto(?:\b)" 1; - "~*(?:\b)Wget/1.15(?:\b)" 1; - "~*(?:\b)jetmon(?:\b)" 1; - "~*(?:\b)libwww-perl(?:\b)" 1; - "~*(?:\b)munin(?:\b)" 1; -# END ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ### - -# ------------------------------------------------------- -# GOOD UA User-Agent Rate Limiting 2 - Enabled by Default -# ------------------------------------------------------- - -# ----------------------------------------------------------------------- -# You can over-ride these in /etc/nginx/bots.d/blacklist-user-agents.conf -# by adding the same UA line there and chaning its value of 1 -# ----------------------------------------------------------------------- - -# START LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ### - "~*(?:\b)Alexa(?:\b)" 2; - "~*(?:\b)ArchiveTeam(?:\b)" 2; - "~*(?:\b)BUbiNG(?:\b)" 2; - "~*(?:\b)Baidu(?:\b)" 2; - "~*(?:\b)FlipboardProxy(?:\b)" 2; - "~*(?:\b)MSIE\ 7.0(?:\b)" 2; - "~*(?:\b)R6_CommentReader(?:\b)" 2; - "~*(?:\b)R6_FeedFetcher(?:\b)" 2; - "~*(?:\b)RED/1(?:\b)" 2; - "~*(?:\b)RPT-HTTPClient(?:\b)" 2; - "~*(?:\b)Spaidu(?:\b)" 2; - "~*(?:\b)UptimeRobot/2.0(?:\b)" 2; - "~*(?:\b)YandexBot(?:\b)" 2; - "~*(?:\b)YandexImages(?:\b)" 2; - "~*(?:\b)archive.org(?:\b)" 2; - "~*(?:\b)ia_archiver(?:\b)" 2; - "~*(?:\b)sfFeedReader/0.9(?:\b)" 2; -# END LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ### - } # =========================== From aa0e57556d620eea0145596d24aebb661cffc225 Mon Sep 17 00:00:00 2001 From: Raul Adorean Date: Wed, 3 Dec 2025 11:26:14 +0000 Subject: [PATCH 2/2] Allow Revolut-Octopus User-Agent Revolut payment processing uses "Revolut-Octopus/1.0" User-Agent to send callbacks. These requests are blocked by the "Octopus" bad User-Agent rule. --- conf.d/globalblacklist.conf | 1 + 1 file changed, 1 insertion(+) diff --git a/conf.d/globalblacklist.conf b/conf.d/globalblacklist.conf index 07e9c1b1e8..59c1f860ca 100644 --- a/conf.d/globalblacklist.conf +++ b/conf.d/globalblacklist.conf @@ -194,6 +194,7 @@ map $http_user_agent $bad_bot { "~*(?:\b)slurp(?:\b)" 0; "~*(?:\b)teoma(?:\b)" 0; "~*(?:\b)yahoo(?:\b)" 0; + "~*(?:\b)Revolut-Octopus(?:\b)" 0; # END GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ### # --------------------------------------------------------