diff --git a/conf.d/globalblacklist.conf b/conf.d/globalblacklist.conf index ebcfa43e91..59c1f860ca 100644 --- a/conf.d/globalblacklist.conf +++ b/conf.d/globalblacklist.conf @@ -146,6 +146,103 @@ map $http_user_agent $bad_bot { # END CUSTOM BLACKLISTED USER AGENTS ### DO NOT EDIT OR REMOVE THIS LINE AT ALL ### # --------------------------------------------------------------------------------- +# -------------------------------------------- +# GOOD UA User-Agent Strings We Know and Trust +# -------------------------------------------- + +# ----------------------------------------------------------------------- +# You can over-ride these in /etc/nginx/bots.d/blacklist-user-agents.conf +# by adding the same UA line there and chaning its value of 1 +# If you think GoogleBot is bad you would simply add them to +# blacklist-user-agents.conf with a value of 1 +# ----------------------------------------------------------------------- + +# START GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ### + "~*(?:\b)AdsBot-Google(?:\b)" 0; + "~*(?:\b)Applebot(?:\b)" 0; + "~*(?:\b)DoCoMo(?:\b)" 0; + "~*(?:\b)Feedfetcher-Google(?:\b)" 0; + "~*(?:\b)Google-HTTP-Java-Client(?:\b)" 0; + "~*(?:\b)Googlebot(?:\b)" 0; + "~*(?:\b)Googlebot-Image(?:\b)" 0; + "~*(?:\b)Googlebot-Mobile(?:\b)" 0; + "~*(?:\b)Googlebot-News(?:\b)" 0; + "~*(?:\b)Googlebot-Video(?:\b)" 0; + "~*(?:\b)Googlebot/Test(?:\b)" 0; + "~*(?:\b)Gravityscan(?:\b)" 0; + "~*(?:\b)Jakarta\ Commons(?:\b)" 0; + "~*(?:\b)Kraken/0.1(?:\b)" 0; + "~*(?:\b)LinkedInBot(?:\b)" 0; + "~*(?:\b)Mediapartners-Google(?:\b)" 0; + "~*(?:\b)SAMSUNG(?:\b)" 0; + "~*(?:\b)Slackbot(?:\b)" 0; + "~*(?:\b)Slackbot-LinkExpanding(?:\b)" 0; + "~*(?:\b)TwitterBot(?:\b)" 0; + "~*(?:\b)Wordpress(?:\b)" 0; + "~*(?:\b)adidxbot(?:\b)" 0; + "~*(?:\b)aolbuild(?:\b)" 0; + "~*(?:\b)bing(?:\b)" 0; + "~*(?:\b)bingbot(?:\b)" 0; + "~*(?:\b)bingpreview(?:\b)" 0; + "~*(?:\b)developers.facebook.com(?:\b)" 0; + "~*(?:\b)duckduckgo(?:\b)" 0; + "~*(?:\b)facebookexternalhit(?:\b)" 0; + "~*(?:\b)facebookplatform(?:\b)" 0; + "~*(?:\b)gsa-crawler(?:\b)" 0; + "~*(?:\b)msnbot(?:\b)" 0; + "~*(?:\b)msnbot-media(?:\b)" 0; + "~*(?:\b)slurp(?:\b)" 0; + "~*(?:\b)teoma(?:\b)" 0; + "~*(?:\b)yahoo(?:\b)" 0; + "~*(?:\b)Revolut-Octopus(?:\b)" 0; +# END GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ### + +# -------------------------------------------------------- +# GOOD UA User-Agent Rate Limiting 1 - Disabled by Default +# -------------------------------------------------------- + + # TO ACTIVATE THIS RATE LIMITING Uncomment these two lines in blockbots.conf + #limit_conn bot1_connlimit 100; + #limit_req zone=bot1_reqlimitip burst=50; + +# START ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ### + "~*(?:\b)Lynx(?:\b)" 1; + "~*(?:\b)Presto(?:\b)" 1; + "~*(?:\b)Wget/1.15(?:\b)" 1; + "~*(?:\b)jetmon(?:\b)" 1; + "~*(?:\b)libwww-perl(?:\b)" 1; + "~*(?:\b)munin(?:\b)" 1; +# END ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ### + +# ------------------------------------------------------- +# GOOD UA User-Agent Rate Limiting 2 - Enabled by Default +# ------------------------------------------------------- + +# ----------------------------------------------------------------------- +# You can over-ride these in /etc/nginx/bots.d/blacklist-user-agents.conf +# by adding the same UA line there and chaning its value of 1 +# ----------------------------------------------------------------------- + +# START LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ### + "~*(?:\b)Alexa(?:\b)" 2; + "~*(?:\b)ArchiveTeam(?:\b)" 2; + "~*(?:\b)BUbiNG(?:\b)" 2; + "~*(?:\b)Baidu(?:\b)" 2; + "~*(?:\b)FlipboardProxy(?:\b)" 2; + "~*(?:\b)MSIE\ 7.0(?:\b)" 2; + "~*(?:\b)R6_CommentReader(?:\b)" 2; + "~*(?:\b)R6_FeedFetcher(?:\b)" 2; + "~*(?:\b)RED/1(?:\b)" 2; + "~*(?:\b)RPT-HTTPClient(?:\b)" 2; + "~*(?:\b)Spaidu(?:\b)" 2; + "~*(?:\b)UptimeRobot/2.0(?:\b)" 2; + "~*(?:\b)YandexBot(?:\b)" 2; + "~*(?:\b)YandexImages(?:\b)" 2; + "~*(?:\b)archive.org(?:\b)" 2; + "~*(?:\b)ia_archiver(?:\b)" 2; + "~*(?:\b)sfFeedReader/0.9(?:\b)" 2; +# END LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ### + # -------------------------------------------------- # BAD UA (User-Agent) Strings That We Block Outright # -------------------------------------------------- @@ -834,102 +931,6 @@ map $http_user_agent $bad_bot { "~*(?:\b)zgrab(?:\b)" 3; # END BAD BOTS ### DO NOT EDIT THIS LINE AT ALL ### -# -------------------------------------------- -# GOOD UA User-Agent Strings We Know and Trust -# -------------------------------------------- - -# ----------------------------------------------------------------------- -# You can over-ride these in /etc/nginx/bots.d/blacklist-user-agents.conf -# by adding the same UA line there and chaning its value of 1 -# If you think GoogleBot is bad you would simply add them to -# blacklist-user-agents.conf with a value of 1 -# ----------------------------------------------------------------------- - -# START GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ### - "~*(?:\b)AdsBot-Google(?:\b)" 0; - "~*(?:\b)Applebot(?:\b)" 0; - "~*(?:\b)DoCoMo(?:\b)" 0; - "~*(?:\b)Feedfetcher-Google(?:\b)" 0; - "~*(?:\b)Google-HTTP-Java-Client(?:\b)" 0; - "~*(?:\b)Googlebot(?:\b)" 0; - "~*(?:\b)Googlebot-Image(?:\b)" 0; - "~*(?:\b)Googlebot-Mobile(?:\b)" 0; - "~*(?:\b)Googlebot-News(?:\b)" 0; - "~*(?:\b)Googlebot-Video(?:\b)" 0; - "~*(?:\b)Googlebot/Test(?:\b)" 0; - "~*(?:\b)Gravityscan(?:\b)" 0; - "~*(?:\b)Jakarta\ Commons(?:\b)" 0; - "~*(?:\b)Kraken/0.1(?:\b)" 0; - "~*(?:\b)LinkedInBot(?:\b)" 0; - "~*(?:\b)Mediapartners-Google(?:\b)" 0; - "~*(?:\b)SAMSUNG(?:\b)" 0; - "~*(?:\b)Slackbot(?:\b)" 0; - "~*(?:\b)Slackbot-LinkExpanding(?:\b)" 0; - "~*(?:\b)TwitterBot(?:\b)" 0; - "~*(?:\b)Wordpress(?:\b)" 0; - "~*(?:\b)adidxbot(?:\b)" 0; - "~*(?:\b)aolbuild(?:\b)" 0; - "~*(?:\b)bing(?:\b)" 0; - "~*(?:\b)bingbot(?:\b)" 0; - "~*(?:\b)bingpreview(?:\b)" 0; - "~*(?:\b)developers.facebook.com(?:\b)" 0; - "~*(?:\b)duckduckgo(?:\b)" 0; - "~*(?:\b)facebookexternalhit(?:\b)" 0; - "~*(?:\b)facebookplatform(?:\b)" 0; - "~*(?:\b)gsa-crawler(?:\b)" 0; - "~*(?:\b)msnbot(?:\b)" 0; - "~*(?:\b)msnbot-media(?:\b)" 0; - "~*(?:\b)slurp(?:\b)" 0; - "~*(?:\b)teoma(?:\b)" 0; - "~*(?:\b)yahoo(?:\b)" 0; -# END GOOD BOTS ### DO NOT EDIT THIS LINE AT ALL ### - -# -------------------------------------------------------- -# GOOD UA User-Agent Rate Limiting 1 - Disabled by Default -# -------------------------------------------------------- - - # TO ACTIVATE THIS RATE LIMITING Uncomment these two lines in blockbots.conf - #limit_conn bot1_connlimit 100; - #limit_req zone=bot1_reqlimitip burst=50; - -# START ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ### - "~*(?:\b)Lynx(?:\b)" 1; - "~*(?:\b)Presto(?:\b)" 1; - "~*(?:\b)Wget/1.15(?:\b)" 1; - "~*(?:\b)jetmon(?:\b)" 1; - "~*(?:\b)libwww-perl(?:\b)" 1; - "~*(?:\b)munin(?:\b)" 1; -# END ALLOWED BOTS ### DO NOT EDIT THIS LINE AT ALL ### - -# ------------------------------------------------------- -# GOOD UA User-Agent Rate Limiting 2 - Enabled by Default -# ------------------------------------------------------- - -# ----------------------------------------------------------------------- -# You can over-ride these in /etc/nginx/bots.d/blacklist-user-agents.conf -# by adding the same UA line there and chaning its value of 1 -# ----------------------------------------------------------------------- - -# START LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ### - "~*(?:\b)Alexa(?:\b)" 2; - "~*(?:\b)ArchiveTeam(?:\b)" 2; - "~*(?:\b)BUbiNG(?:\b)" 2; - "~*(?:\b)Baidu(?:\b)" 2; - "~*(?:\b)FlipboardProxy(?:\b)" 2; - "~*(?:\b)MSIE\ 7.0(?:\b)" 2; - "~*(?:\b)R6_CommentReader(?:\b)" 2; - "~*(?:\b)R6_FeedFetcher(?:\b)" 2; - "~*(?:\b)RED/1(?:\b)" 2; - "~*(?:\b)RPT-HTTPClient(?:\b)" 2; - "~*(?:\b)Spaidu(?:\b)" 2; - "~*(?:\b)UptimeRobot/2.0(?:\b)" 2; - "~*(?:\b)YandexBot(?:\b)" 2; - "~*(?:\b)YandexImages(?:\b)" 2; - "~*(?:\b)archive.org(?:\b)" 2; - "~*(?:\b)ia_archiver(?:\b)" 2; - "~*(?:\b)sfFeedReader/0.9(?:\b)" 2; -# END LIMITED BOTS ### DO NOT EDIT THIS LINE AT ALL ### - } # ===========================