Skip to content

Commit 146f182

Browse files
committed
[extensions] Improve bot detection for ByteDance, Google, SB Intuitions, Webzio
1 parent ce242a3 commit 146f182

File tree

4 files changed

+81
-10
lines changed

4 files changed

+81
-10
lines changed

src/enums/ua-parser-enums.js

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,6 @@ const Extension = Object.freeze({
441441
WGET: 'wget'
442442
},
443443
Crawlers: {
444-
'360_SPIDER': '360Spider',
445444
AHREFS_BOT: 'AhrefsBot',
446445
AI2_BOT: 'AI2Bot',
447446
AIHIT_BOT: 'aiHitBot',
@@ -468,9 +467,9 @@ const Extension = Object.freeze({
468467
BLEX_BOT: 'BLEXBot',
469468
BOTIFY: 'botify',
470469
BRAVE_BOT: 'Bravebot',
471-
BYTEDANCE_SPIDER: 'Bytespider',
470+
BYTEDANCE_BYTESPIDER: 'Bytespider',
471+
BYTEDANCE_TIKTOKSPIDER: 'TikTokSpider',
472472
CC_BOT: 'CCBot',
473-
CHATGLM_SPIDER: 'ChatGLM-Spider',
474473
COCCOC_BOT_WEB: 'coccocbot-web',
475474
COCCOC_BOT_IMAGE: 'coccocbot-image',
476475
COHERE_TRAINING_DATA_CRAWLER: 'cohere-training-data-crawler',
@@ -492,10 +491,12 @@ const Extension = Object.freeze({
492491
GOOGLE_ADSBOT: 'AdsBot-Google',
493492
GOOGLE_ADSBOT_MOBILE: 'Adsbot-Google-Mobile',
494493
GOOGLE_ADSENSE: 'AdSense',
494+
GOOGLE_APIS: 'APIs-Google',
495495
GOOGLE_BOT: 'Googlebot',
496496
GOOGLE_BOT_IMAGE: 'Googlebot-Image',
497497
GOOGLE_BOT_NEWS: 'Googlebot-News',
498498
GOOGLE_BOT_VIDEO: 'Googlebot-Video',
499+
GOOGLE_CLOUDVERTEXBOT: 'Google-CloudVertexBot',
499500
GOOGLE_INSPECTIONTOOL: 'Google-InspectionTool',
500501
GOOGLE_OTHER: 'GoogleOther',
501502
GOOGLE_OTHER_IMAGE: 'GoogleOther-Image',
@@ -525,16 +526,16 @@ const Extension = Object.freeze({
525526
MICROSOFT_ADIDXBOT: 'adidxbot',
526527
MOJEEK_BOT: 'MojeekBot',
527528
MOZ_DOTBOT: 'DotBot',
528-
OMGILI: 'omgili',
529-
OMGILI_BOT: 'omgilibot',
530529
ONCRAWL: 'OnCrawl',
531530
ONESPOT_SCRAPERBOT: 'Onespot-ScraperBot',
532531
OPENAI_GPTBOT: 'GPTBot',
533532
OPENAI_SEARCH: 'OAI-SearchBot',
534533
PERPLEXITY_BOT: 'PerplexityBot',
534+
QIHOO_360_SPIDER: '360Spider',
535535
QWANT_BOT: 'Qwantbot',
536536
REPLICATE_BOT: 'Replicate-Bot',
537537
RUNPOD_BOT: 'RunPod-Bot',
538+
SB_INTUITIONS_BOT: 'SBIntuitionsBot',
538539
SEEKPORT_BOT: 'SeekportBot',
539540
SEMRUSH_BOT: 'SemrushBot',
540541
SEMRUSH_BOT_BACKLINK: 'SemrushBot-BA',
@@ -549,8 +550,12 @@ const Extension = Object.freeze({
549550
TOGETHER_BOT: 'Together-Bot',
550551
TURNITIN_BOT: 'TurnitinBot',
551552
TWIN_AGENT: 'TwinAgent',
552-
XAI_BOT: 'xAI-Bot',
553553
VERCEL_V0BOT: 'v0bot',
554+
WEBZIO: 'webzio',
555+
WEBZIO_EXTENDED: 'Webzio-Extended',
556+
WEBZIO_OMGILI: 'omgili',
557+
WEBZIO_OMGILI_BOT: 'omgilibot',
558+
XAI_BOT: 'xAI-Bot',
554559
YAHOO_JAPAN: 'Y!J-BRW',
555560
YAHOO_SLURP: 'Yahoo! Slurp',
556561
YANDEX_ACCESSIBILITY_BOT: 'YandexAccessibilityBot',
@@ -586,6 +591,7 @@ const Extension = Object.freeze({
586591
YETI: 'Yeti',
587592
YISOU_SPIDER: 'YisouSpider',
588593
YOU_BOT: 'YouBot',
594+
ZHIPU_CHATGLM_SPIDER: 'ChatGLM-Spider',
589595
ZUM_BOT: 'ZumBot'
590596
},
591597
Emails: {
@@ -624,7 +630,7 @@ const Extension = Object.freeze({
624630
GOOGLE_CHROME_LIGHTHOUSE: 'Chrome-Lighthouse',
625631
GOOGLE_FEEDFETCHER: 'FeedFetcher-Google',
626632
GOOGLE_GEMINI_DEEP_RESEARCH: 'Gemini-Deep-Research',
627-
GOOGLE_IMAGE_PROXY: 'GoogleImageProxy',
633+
GOOGLE_IMAGEPROXY: 'GoogleImageProxy',
628634
GOOGLE_PAGERENDERER: 'Google-PageRenderer',
629635
GOOGLE_READ_ALOUD: 'Google-Read-Aloud',
630636
GOOGLE_PRODUCER: 'GoogleProducer',

src/extensions/ua-parser-extensions.js

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,10 @@ const Crawlers = Object.freeze({
6161
// Onespot - https://www.onespot.com/identifying-traffic.html
6262
// OpenAI's SearchGPT - https://platform.openai.com/docs/bots
6363
// PerplexityBot - https://perplexity.ai/perplexitybot
64+
// SBIntuitionsBot - https://www.sbintuitions.co.jp/bot/
6465
// SeznamBot - http://napoveda.seznam.cz/seznambot-intro
6566
// YepBot - https://yep.com/yepbot/
66-
/((?:adidx|ahrefs|amazon|bing|brave|cc|contx|coveo|criteo|dot|duckduck(?:go-favicons-)?|exa|facebook|gpt|iask|kagi|kangaroo |linkedin|mj12|mojeek|oai-search|onespot-scraper|perplexity|semrush|seznam|yep)bot)\/([\w\.-]+)/i,
67+
/((?:adidx|ahrefs|amazon|bing|brave|cc|contx|coveo|criteo|dot|duckduck(?:go-favicons-)?|exa|facebook|gpt|iask|kagi|kangaroo |linkedin|mj12|mojeek|oai-search|onespot-scraper|perplexity|sbintuitions|semrush|seznam|yep)bot)\/([\w\.-]+)/i,
6768

6869
// Algolia Crawler
6970
/(algolia crawler(?: renderscript)?)\/?([\w\.]*)/i,
@@ -139,8 +140,8 @@ const Crawlers = Object.freeze({
139140
// TurnitinBot - https://www.turnitin.com/robot/crawlerinfo.html
140141
// v0bot - https://vercel.com/docs/bot-management
141142
// Yahoo! Slurp - http://help.yahoo.com/help/us/ysearch/slurp
142-
// Botify / Bytespider / DeepSeekBot / Qihoo 360Spider / SeekportBot
143-
/\b((?:ai2|aspiegel|dataforseo|deepseek|imagesift|petal|seekport|turnitin|v0)bot|360spider-?(?:image|video)?|baidu-ads|botify|bytespider|cohere-training-data-crawler|elastic(?=\/s)|marginalia|siteimprove(?=bot|\.com)|teoma|yahoo! slurp)/i
143+
// Botify / Bytespider / DeepSeekBot / Qihoo 360Spider / SeekportBot / TikTokSpider
144+
/\b((ai2|aspiegel|dataforseo|deepseek|imagesift|petal|seekport|turnitin|v0)bot|360spider-?(image|video)?|baidu-ads|botify|(byte|tiktok)spider|cohere-training-data-crawler|elastic(?=\/s)|marginalia|siteimprove(?=bot|\.com)|teoma|webzio|yahoo! slurp)/i
144145
],
145146
[NAME, [TYPE, CRAWLER]]
146147
]

src/helpers/ua-parser-helpers.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ const isAIBot = (resultOrUA) => [
8888
'googleother',
8989
'googleother-image',
9090
'googleother-video',
91+
'google-cloudvertexbot',
9192
'google-extended',
9293

9394
// Hive AI
@@ -123,6 +124,9 @@ const isAIBot = (resultOrUA) => [
123124
// Runpod
124125
'runpod-bot',
125126

127+
// SB Intuitions
128+
'sbintuitionsbot',
129+
126130
// Semrush
127131
'semrushbot-ocob',
128132

test/data/ua/extension/crawler.json

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,16 @@
579579
"type" : "crawler"
580580
}
581581
},
582+
{
583+
"desc" : "APIs-Google",
584+
"ua" : "APIs-Google (+https://developers.google.com/webmasters/APIs-Google.html)",
585+
"expect" :
586+
{
587+
"name" : "APIs-Google",
588+
"version" : "undefined",
589+
"type" : "crawler"
590+
}
591+
},
582592
{
583593
"desc" : "Googlebot-Video",
584594
"ua" : "Googlebot-Video/1.0",
@@ -679,6 +689,16 @@
679689
"type" : "crawler"
680690
}
681691
},
692+
{
693+
"desc" : "Google-CloudVertexBot",
694+
"ua" : "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.7204.183 Mobile Safari/537.36 (compatible; Google-CloudVertexBot; +https://cloud.google.com/enterprise-search)",
695+
"expect" :
696+
{
697+
"name" : "Google-CloudVertexBot",
698+
"version" : "undefined",
699+
"type" : "crawler"
700+
}
701+
},
682702
{
683703
"desc" : "Google-Safety",
684704
"ua" : "Google-Safety",
@@ -970,6 +990,16 @@
970990
"type" : "crawler"
971991
}
972992
},
993+
{
994+
"desc" : "SBIntuitionsBot",
995+
"ua" : "Mozilla/5.0 (compatible; SBIntuitionsBot/0.1;+https://www.sbintuitions.co.jp/bot/)",
996+
"expect" :
997+
{
998+
"name" : "SBIntuitionsBot",
999+
"version" : "0.1",
1000+
"type" : "crawler"
1001+
}
1002+
},
9731003
{
9741004
"desc" : "SeekportBot",
9751005
"ua" : "Mozilla/5.0 (compatible; SeekportBot; +https://bot.seekport.com)",
@@ -1080,6 +1110,16 @@
10801110
"type" : "crawler"
10811111
}
10821112
},
1113+
{
1114+
"desc" : "TikTokSpider",
1115+
"ua" : "Mozilla/5.0 (Linux; Android 5.0) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; TikTokSpider; ttspider-feedback@tiktok.com)",
1116+
"expect" :
1117+
{
1118+
"name" : "TikTokSpider",
1119+
"version" : "undefined",
1120+
"type" : "crawler"
1121+
}
1122+
},
10831123
{
10841124
"desc" : "Timpibot",
10851125
"ua" : "Timpibot/0.8 (+http://www.timpi.io)",
@@ -1150,6 +1190,26 @@
11501190
"type" : "crawler"
11511191
}
11521192
},
1193+
{
1194+
"desc" : "webzio",
1195+
"ua" : "webzio (+https://webz.io/bot.html)",
1196+
"expect" :
1197+
{
1198+
"name" : "webzio",
1199+
"version" : "undefined",
1200+
"type" : "crawler"
1201+
}
1202+
},
1203+
{
1204+
"desc" : "Webzio-Extended",
1205+
"ua" : "Mozilla/5.0 (compatible; Webzio-Extended/1.0; +https://www.webzio.com/bot.html)",
1206+
"expect" :
1207+
{
1208+
"name" : "Webzio-Extended",
1209+
"version" : "1.0",
1210+
"type" : "crawler"
1211+
}
1212+
},
11531213
{
11541214
"desc" : "Yahoo! Japan",
11551215
"ua" : "Y!J-BRW/1.0 (https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716)",

0 commit comments

Comments
 (0)