Skip to content

Commit 9003fe5

Browse files
committed
[extensions] Add new bots: Algolia Crawler, contxbot, HubSpot Page Fetcher, Kagibot
1 parent 975c486 commit 9003fe5

File tree

3 files changed

+47
-5
lines changed

3 files changed

+47
-5
lines changed

src/extensions/ua-parser-extensions.js

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,15 @@ const Crawlers = Object.freeze({
4545
// Bingbot / AdIdxBot - https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0
4646
// Bravebot - https://search.brave.com/help/brave-search-crawler
4747
// CCBot - https://commoncrawl.org/faq
48+
// contxbot - https://affiliate-program.amazon.com/help/node/topic/GT98G5PPRERNVZ2C
4849
// Coveobot - https://connect.coveo.com/s/article/19648
4950
// CriteoBot - https://www.criteo.com/criteo-crawler/
5051
// Dotbot - https://moz.com/help/moz-procedures/crawlers/dotbot
5152
// DuckDuckBot - http://duckduckgo.com/duckduckbot.html
5253
// FacebookBot - https://developers.facebook.com/docs/sharing/bot/
5354
// GPTBot - https://platform.openai.com/docs/gptbot
5455
// iAskBot - https://iask.ai
56+
// Kagibot - https://kagi.com/bot
5557
// Kangaroo Bot - https://kangaroollm.com.au/kangaroo-bot/
5658
// LinkedInBot - http://www.linkedin.com
5759
// MJ12bot - https://mj12bot.com/
@@ -60,7 +62,7 @@ const Crawlers = Object.freeze({
6062
// OpenAI's SearchGPT - https://platform.openai.com/docs/bots
6163
// PerplexityBot - https://perplexity.ai/perplexitybot
6264
// SeznamBot - http://napoveda.seznam.cz/seznambot-intro
63-
/((?:adidx|ahrefs|amazon|bing|brave|cc|coveo|criteo|dot|duckduck(?:go-favicons-)?|exa|facebook|gpt|iask|kangaroo |linkedin|mj12|mojeek|oai-search|onespot-scraper|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i,
65+
/((?:adidx|ahrefs|amazon|bing|brave|cc|contx|coveo|criteo|dot|duckduck(?:go-favicons-)?|exa|facebook|gpt|iask|kagi|kangaroo |linkedin|mj12|mojeek|oai-search|onespot-scraper|perplexity|semrush|seznam)bot)\/([\w\.-]+)/i,
6466

6567
// Applebot - http://apple.com/go/applebot
6668
/(applebot(?:-extended)?)\/?([\w\.]*)/i,
@@ -105,9 +107,9 @@ const Crawlers = Object.freeze({
105107
// Yeti (Naver)
106108
/(yeti)\/([\w\.]+)/i,
107109

108-
// aiHitBot / Diffbot / FirecrawlAgent / HuggingFace-Bot / Linespider / Magpie-Crawler / Omgilibot / OpenAI Image Downloader / PanguBot / Replicate-Bot / RunPod-Bot / Webzio-Extended / Screaming Frog SEO Spider / Startpage / Timpibot / Together-Bot / VelenPublicWebCrawler / xAI-Bot / YisouSpider / YouBot
110+
// aiHitBot / Algolia Crawler / Diffbot / FirecrawlAgent / HuggingFace-Bot / Linespider / Magpie-Crawler / Omgilibot / OpenAI Image Downloader / PanguBot / Replicate-Bot / RunPod-Bot / Webzio-Extended / Screaming Frog SEO Spider / Startpage / Timpibot / Together-Bot / VelenPublicWebCrawler / xAI-Bot / YisouSpider / YouBot
109111
// Cotoyogi - https://ds.rois.ac.jp/en_center8/en_crawler/
110-
/((?:aihit|diff|huggingface-|pangu|replicate-|runpod-|timpi|together-|xai-|you)bot|omgili(?:bot)?|cotoyogi|firecrawlagent|openai image downloader|(?:magpie-|velenpublicweb)crawler|startpageprivateimageproxy|webzio-extended|(?:chatglm-|line|screaming frog seo |yisou)spider)\/?([\w\.]*)/i
112+
/((?:aihit|diff|huggingface-|pangu|replicate-|runpod-|timpi|together-|xai-|you)bot|omgili(?:bot)?|cotoyogi|firecrawlagent|openai image downloader|(?:algolia |magpie-|velenpublicweb)crawler|startpageprivateimageproxy|webzio-extended|(?:chatglm-|line|screaming frog seo |yisou)spider)\/?([\w\.]*)/i
111113
],
112114

113115
[NAME, VERSION, [TYPE, CRAWLER]],
@@ -241,7 +243,7 @@ const Emails = Object.freeze({
241243
const Fetchers = Object.freeze({
242244
browser : [
243245
[
244-
// Asana / Bitlybot / Better Uptime / BingPreview / Blueno / kakaotalk-scrap / Mastodon / MicrosoftPreview / Pinterestbot / Redditbot / Rogerbot / SiteAuditBot / Telegrambot / Twitterbot / UptimeRobot
246+
// Asana / Bitlybot / Better Uptime / BingPreview / Blueno / HubSpot Page Fetcher / kakaotalk-scrap / Mastodon / MicrosoftPreview / Pinterestbot / Redditbot / Rogerbot / SiteAuditBot / Telegrambot / Twitterbot / UptimeRobot
245247
// AhrefsSiteAudit - https://ahrefs.com/robot/site-audit
246248
// Buffer Link Preview Bot - https://scraper.buffer.com/about/bots/link-preview-bot
247249
// ChatGPT-User - https://platform.openai.com/docs/plugins/bot
@@ -251,7 +253,7 @@ const Fetchers = Object.freeze({
251253
// Perplexity-User - https://docs.perplexity.ai/guides/bots
252254
// MistralAI-User - https://docs.mistral.ai/robots/
253255
// Yandex Bots - https://yandex.com/bots
254-
/(asana|ahrefssiteaudit|(?:bing|microsoft)preview|blueno|(?:chatgpt|claude|mistralai|perplexity)-user|mastodon|(?:bitly|bufferlinkpreview|discord|duckassist|linkedin|pinterest|reddit|roger|siteaudit|twitter|uptimero|zoom)bot|google-site-verification|iframely|kakaotalk-scrap|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|searchshop)|yadirectfetcher)\/([\w\.]+)/i,
256+
/(asana|ahrefssiteaudit|(?:bing|microsoft)preview|blueno|(?:chatgpt|claude|mistralai|perplexity)-user|hubspot page fetcher|mastodon|(?:bitly|bufferlinkpreview|discord|duckassist|linkedin|pinterest|reddit|roger|siteaudit|twitter|uptimero|zoom)bot|google-site-verification|iframely|kakaotalk-scrap|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|searchshop)|yadirectfetcher)\/([\w\.]+)/i,
255257

256258
// Bluesky
257259
/(bluesky) cardyb\/([\w\.]+)/i,

test/data/ua/extension/crawler.json

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,16 @@
7979
"type" : "crawler"
8080
}
8181
},
82+
{
83+
"desc" : "Algolia Crawler",
84+
"ua" : "Algolia Crawler/v2.183.0",
85+
"expect" :
86+
{
87+
"name" : "Algolia Crawler",
88+
"version" : "v2.183.0",
89+
"type" : "crawler"
90+
}
91+
},
8292
{
8393
"desc" : "Applebot",
8494
"ua" : "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4 (Applebot/0.1;+http://www.apple.com/go/applebot)",
@@ -319,6 +329,16 @@
319329
"type" : "crawler"
320330
}
321331
},
332+
{
333+
"desc" : "contxbot",
334+
"ua" : "Mozilla/5.0 (compatible;contxbot/1.0)",
335+
"expect" :
336+
{
337+
"name" : "contxbot",
338+
"version" : "1.0",
339+
"type" : "crawler"
340+
}
341+
},
322342
{
323343
"desc" : "Cotoyogi",
324344
"ua" : "Mozilla/5.0 (compatible; Cotoyogi/4.0; +https://ds.rois.ac.jp/center8/crawler/)",
@@ -649,6 +669,16 @@
649669
"type" : "crawler"
650670
}
651671
},
672+
{
673+
"desc" : "Kagibot",
674+
"ua" : "Mozilla/5.0 (compatible; Kagibot/1.0; +https://kagi.com/bot)",
675+
"expect" :
676+
{
677+
"name" : "Kagibot",
678+
"version" : "1.0",
679+
"type" : "crawler"
680+
}
681+
},
652682
{
653683
"desc" : "Kangaroo Bot",
654684
"ua" : "Mozilla/5.0 (compatible; Kangaroo Bot/1.0)",

test/data/ua/extension/fetcher.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,16 @@
189189
"type" : "fetcher"
190190
}
191191
},
192+
{
193+
"desc" : "HubSpot Page Fetcher",
194+
"ua" : "HubSpot Page Fetcher/1.0 http://www.hubspot.com/ web-crawlers@hubspot.com",
195+
"expect" :
196+
{
197+
"name" : "HubSpot Page Fetcher",
198+
"version" : "1.0",
199+
"type" : "fetcher"
200+
}
201+
},
192202
{
193203
"desc" : "Iframely",
194204
"ua" : "Iframely/1.3.1 (+https://iframely.com/docs/about)",

0 commit comments

Comments
 (0)