Skip to content

Commit ce242a3

Browse files
committed
[extensions][enums] Improve detection for Yandex bots
1 parent 2078b1e commit ce242a3

File tree

4 files changed

+415
-3
lines changed

4 files changed

+415
-3
lines changed

src/enums/ua-parser-enums.js

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,35 @@ const Extension = Object.freeze({
553553
VERCEL_V0BOT: 'v0bot',
554554
YAHOO_JAPAN: 'Y!J-BRW',
555555
YAHOO_SLURP: 'Yahoo! Slurp',
556+
YANDEX_ACCESSIBILITY_BOT: 'YandexAccessibilityBot',
557+
YANDEX_ADDITIONAL_BOT: 'YandexAdditionalBot',
558+
YANDEX_ADNET: 'YandexAdNet',
559+
YANDEX_BLOGS: 'YandexBlogs',
556560
YANDEX_BOT: 'YandexBot',
561+
YANDEX_BOT_MIRRORDETECTOR: 'YandexBot MirrorDetector',
562+
YANDEX_COMBOT: 'YandexComBot',
563+
YANDEX_FAVICONS: 'YandexFavicons',
564+
YANDEX_IMAGE_RESIZER: 'YandexImageResizer',
565+
YANDEX_IMAGES: 'YandexImages',
566+
YANDEX_MARKET: 'YandexMarket',
567+
YANDEX_MEDIA: 'YandexMedia',
568+
YANDEX_METRIKA: 'YandexMetrika',
569+
YANDEX_MOBILE_BOT: 'YandexMobileBot',
570+
YANDEX_MOBILE_SCREENSHOT_BOT: 'YandexMobileScreenShotBot',
571+
YANDEX_NEWS: 'YandexNews',
572+
YANDEX_ONTODB: 'YandexOntoDB',
573+
YANDEX_ONTODB_API: 'YandexOntoDBAPI',
574+
YANDEX_PARTNER: 'YandexPartner',
575+
YANDEX_RCA: 'YandexRCA',
576+
YANDEX_RENDERRESOURCES_BOT: 'YandexRenderResourcesBot',
577+
YANDEX_SCREENSHOT_BOT: 'YandexScreenshotBot',
578+
YANDEX_SPRAV_BOT: 'YandexSpravBot',
579+
YANDEX_TRACKER: 'YandexTracker',
580+
YANDEX_VERTICALS: 'YandexVerticals',
581+
YANDEX_VERTIS: 'YandexVertis',
582+
YANDEX_VIDEO: 'YandexVideo',
583+
YANDEX_VIDEO_PARSER: 'YandexVideoParser',
584+
YANDEX_WEBMASTER: 'YandexWebmaster',
557585
YEP_BOT: 'YepBot',
558586
YETI: 'Yeti',
559587
YISOU_SPIDER: 'YisouSpider',
@@ -624,6 +652,15 @@ const Extension = Object.freeze({
624652
VERCEL_BOT: 'Vercelbot',
625653
VERCEL_FLAGS: 'vercelflags',
626654
VERCEL_TRACING: 'verceltracing',
655+
YANDEX_CALENDAR: 'YandexCalendar',
656+
YANDEX_DIRECT: 'YandexDirect',
657+
YANDEX_DIRECTDYN: 'YandexDirectDyn',
658+
YANDEX_DIRECTFETCHER: 'YaDirectFetcher',
659+
YANDEX_FORDOMAIN: 'YandexForDomain',
660+
YANDEX_PAGECHECKER: 'YandexPagechecker',
661+
YANDEX_SEARCHSHOP: 'YandexSearchShop',
662+
YANDEX_SITELINKS: 'YandexSitelinks',
663+
YANDEX_USERPROXY: 'YandexUserproxy',
627664
WHATSAPP: 'WhatsApp',
628665
ZOOMINFO_BOT: 'Zoombot'
629666
},

src/extensions/ua-parser-extensions.js

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ const Crawlers = Object.freeze({
109109
/(y!?j-(?:asr|br[uw]|dscv|mmp|vsidx|wsc))\/([\w\.]+)/i,
110110

111111
// Yandex Bots - https://yandex.com/bots
112-
/(yandex(?:(?:mobile)?(?:accessibility|additional|renderresources|screenshot|sprav)?bot|image(?:s|resizer)|video(?:parser)?|blogs|adnet|favicons|fordomain|market|media|metrika|news|ontodb(?:api)?|pagechecker|partner|rca|tracker|turbo|vertis|webmaster|antivirus))\/([\w\.]+)/i,
112+
/(yandex(?:(?:mobile)?(?:accessibility|additional|com|renderresources|screenshot|sprav)?bot(?!.+mirror)|image(?:s|resizer)|adnet|blogs|favicons|market|media|metrika|news|ontodb(?:api)?|partner|rca|tracker|turbo|verti(?:cal)?s|webmaster|video(?:parser)?))\/([\w\.]+)/i,
113113

114114
// Yeti (Naver)
115115
/(yeti)\/([\w\.]+)/i,
@@ -119,9 +119,14 @@ const Crawlers = Object.freeze({
119119
// Freespoke - https://docs.freespoke.com/search/bot/
120120
/((?:aihit|blex|diff|huggingface-|msn|pangu|replicate-|runpod-|timpi|together-|xai-|you|zum)bot|(?:magpie-|velenpublicweb)crawler|(?:chatglm-|line|screaming frog seo |yisou)spider|cotoyogi|firecrawlagent|freespoke|omgili(?:bot)?|openai image downloader|startpageprivateimageproxy|twinagent|webzio-extended)\/?([\w\.]*)/i
121121
],
122-
123122
[NAME, VERSION, [TYPE, CRAWLER]],
124123

124+
[
125+
// YandexBot MirrorDetector
126+
/(yandexbot\/([\w\.]+); mirrordetector)/i
127+
],
128+
[[NAME, /\/.+;/ig, ''], VERSION, [TYPE, CRAWLER]],
129+
125130
[
126131
// Google Bots
127132
/((?:adsbot|apis|mediapartners)-google(?:-mobile)?|google-?(?:other|cloudvertexbot|extended|safety))/i,
@@ -260,7 +265,7 @@ const Fetchers = Object.freeze({
260265
// Perplexity-User - https://docs.perplexity.ai/guides/bots
261266
// MistralAI-User - https://docs.mistral.ai/robots/
262267
// Yandex Bots - https://yandex.com/bots
263-
/(asana|ahrefssiteaudit|(?:bing|microsoft)preview|blueno|(?:chatgpt|claude|mistralai|perplexity)-user|cohere-ai|hubspot page fetcher|mastodon|(?:bitly|bufferlinkpreview|discord|duckassist|linkedin|pinterest|reddit|roger|siteaudit|twitter|uptimero|zoom)bot|google-site-verification|iframely|kakaotalk-scrap|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|searchshop)|yadirectfetcher)\/([\w\.]+)/i,
268+
/(asana|ahrefssiteaudit|(?:bing|microsoft)preview|blueno|(?:chatgpt|claude|mistralai|perplexity)-user|cohere-ai|hubspot page fetcher|mastodon|(?:bitly|bufferlinkpreview|discord|duckassist|linkedin|pinterest|reddit|roger|siteaudit|twitter|uptimero|zoom)bot|google-site-verification|iframely|kakaotalk-scrap|meta-externalfetcher|y!?j-dlc|yandex(?:calendar|direct(?:dyn)?|fordomain|pagechecker|searchshop)|yadirectfetcher)\/([\w\.]+)/i,
264269

265270
// Bluesky
266271
/(bluesky) cardyb\/([\w\.]+)/i,

test/data/ua/extension/crawler.json

Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,6 +1170,46 @@
11701170
"type" : "crawler"
11711171
}
11721172
},
1173+
{
1174+
"desc" : "YandexAccessibilityBot",
1175+
"ua" : "Mozilla/5.0 (compatible; YandexAccessibilityBot/3.0; +http://yandex.com/bots)",
1176+
"expect" :
1177+
{
1178+
"name" : "YandexAccessibilityBot",
1179+
"version" : "3.0",
1180+
"type" : "crawler"
1181+
}
1182+
},
1183+
{
1184+
"desc" : "YandexAdditionalBot",
1185+
"ua" : "Mozilla/5.0 (compatible; YandexAdditionalBot/3.0; +http://yandex.com/bots)",
1186+
"expect" :
1187+
{
1188+
"name" : "YandexAdditionalBot",
1189+
"version" : "3.0",
1190+
"type" : "crawler"
1191+
}
1192+
},
1193+
{
1194+
"desc" : "YandexAdNet",
1195+
"ua" : "Mozilla/5.0 (compatible; YandexAdNet/1.0; +http://yandex.com/bots)",
1196+
"expect" :
1197+
{
1198+
"name" : "YandexAdNet",
1199+
"version" : "1.0",
1200+
"type" : "crawler"
1201+
}
1202+
},
1203+
{
1204+
"desc" : "YandexBlogs",
1205+
"ua" : "Mozilla/5.0 (compatible; YandexBlogs/0.99; robot; +http://yandex.com/bots)",
1206+
"expect" :
1207+
{
1208+
"name" : "YandexBlogs",
1209+
"version" : "0.99",
1210+
"type" : "crawler"
1211+
}
1212+
},
11731213
{
11741214
"desc" : "YandexBot",
11751215
"ua" : "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
@@ -1180,6 +1220,246 @@
11801220
"type" : "crawler"
11811221
}
11821222
},
1223+
{
1224+
"desc" : "YandexBot MirrorDetector",
1225+
"ua" : "Mozilla/5.0 (compatible; YandexBot/3.0; MirrorDetector; +http://yandex.com/bots)",
1226+
"expect" :
1227+
{
1228+
"name" : "YandexBot MirrorDetector",
1229+
"version" : "3.0",
1230+
"type" : "crawler"
1231+
}
1232+
},
1233+
{
1234+
"desc" : "YandexComBot",
1235+
"ua" : "Mozilla/5.0 (compatible; YandexComBot/3.0; +http://yandex.com/bots)",
1236+
"expect" :
1237+
{
1238+
"name" : "YandexComBot",
1239+
"version" : "3.0",
1240+
"type" : "crawler"
1241+
}
1242+
},
1243+
{
1244+
"desc" : "YandexFavicons",
1245+
"ua" : "Mozilla/5.0 (compatible; YandexFavicons/1.0; +http://yandex.com/bots)",
1246+
"expect" :
1247+
{
1248+
"name" : "YandexFavicons",
1249+
"version" : "1.0",
1250+
"type" : "crawler"
1251+
}
1252+
},
1253+
{
1254+
"desc" : "YandexImageResizer",
1255+
"ua" : "Mozilla/5.0 (compatible; YandexImageResizer/2.0; +http://yandex.com/bots)",
1256+
"expect" :
1257+
{
1258+
"name" : "YandexImageResizer",
1259+
"version" : "2.0",
1260+
"type" : "crawler"
1261+
}
1262+
},
1263+
{
1264+
"desc" : "YandexImages",
1265+
"ua" : "Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)",
1266+
"expect" :
1267+
{
1268+
"name" : "YandexImages",
1269+
"version" : "3.0",
1270+
"type" : "crawler"
1271+
}
1272+
},
1273+
{
1274+
"desc" : "YandexMarket",
1275+
"ua" : "Mozilla/5.0 (compatible; YandexMarket/1.0; +http://yandex.com/bots)",
1276+
"expect" :
1277+
{
1278+
"name" : "YandexMarket",
1279+
"version" : "1.0",
1280+
"type" : "crawler"
1281+
}
1282+
},
1283+
{
1284+
"desc" : "YandexMetrika",
1285+
"ua" : "Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots)",
1286+
"expect" :
1287+
{
1288+
"name" : "YandexMetrika",
1289+
"version" : "2.0",
1290+
"type" : "crawler"
1291+
}
1292+
},
1293+
{
1294+
"desc" : "YandexMedia",
1295+
"ua" : "Mozilla/5.0 (compatible; YandexMedia/3.0; +http://yandex.com/bots)",
1296+
"expect" :
1297+
{
1298+
"name" : "YandexMedia",
1299+
"version" : "3.0",
1300+
"type" : "crawler"
1301+
}
1302+
},
1303+
{
1304+
"desc" : "YandexMobileBot",
1305+
"ua" : "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexMobileBot/3.0; +http://yandex.com/bots)",
1306+
"expect" :
1307+
{
1308+
"name" : "YandexMobileBot",
1309+
"version" : "3.0",
1310+
"type" : "crawler"
1311+
}
1312+
},
1313+
{
1314+
"desc" : "YandexMobileScreenShotBot",
1315+
"ua" : "Mozilla/5.0 (compatible; YandexMobileScreenShotBot/1.0; +http://yandex.com/bots)",
1316+
"expect" :
1317+
{
1318+
"name" : "YandexMobileScreenShotBot",
1319+
"version" : "1.0",
1320+
"type" : "crawler"
1321+
}
1322+
},
1323+
{
1324+
"desc" : "YandexNews",
1325+
"ua" : "Mozilla/5.0 (compatible; YandexNews/4.0; +http://yandex.com/bots)",
1326+
"expect" :
1327+
{
1328+
"name" : "YandexNews",
1329+
"version" : "4.0",
1330+
"type" : "crawler"
1331+
}
1332+
},
1333+
{
1334+
"desc" : "YandexOntoDB",
1335+
"ua" : "Mozilla/5.0 (compatible; YandexOntoDB/1.0; +http://yandex.com/bots)",
1336+
"expect" :
1337+
{
1338+
"name" : "YandexOntoDB",
1339+
"version" : "1.0",
1340+
"type" : "crawler"
1341+
}
1342+
},
1343+
{
1344+
"desc" : "YandexOntoDBAPI",
1345+
"ua" : "Mozilla/5.0 (compatible; YandexOntoDBAPI/1.0; +http://yandex.com/bots)",
1346+
"expect" :
1347+
{
1348+
"name" : "YandexOntoDBAPI",
1349+
"version" : "1.0",
1350+
"type" : "crawler"
1351+
}
1352+
},
1353+
{
1354+
"desc" : "YandexPartner",
1355+
"ua" : "Mozilla/5.0 (compatible; YandexPartner/3.0; +http://yandex.com/bots)",
1356+
"expect" :
1357+
{
1358+
"name" : "YandexPartner",
1359+
"version" : "3.0",
1360+
"type" : "crawler"
1361+
}
1362+
},
1363+
{
1364+
"desc" : "YandexRCA",
1365+
"ua" : "Mozilla/5.0 (compatible; YandexRCA/1.0; +http://yandex.com/bots)",
1366+
"expect" :
1367+
{
1368+
"name" : "YandexRCA",
1369+
"version" : "1.0",
1370+
"type" : "crawler"
1371+
}
1372+
},
1373+
{
1374+
"desc" : "YandexRenderResourcesBot",
1375+
"ua" : "Mozilla/5.0 (compatible; YandexRenderResourcesBot/1.0; +http://yandex.com/bots)",
1376+
"expect" :
1377+
{
1378+
"name" : "YandexRenderResourcesBot",
1379+
"version" : "1.0",
1380+
"type" : "crawler"
1381+
}
1382+
},
1383+
{
1384+
"desc" : "YandexScreenshotBot",
1385+
"ua" : "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Safari/537.36 (compatible; YandexScreenshotBot/3.0; +http://yandex.com/bots)",
1386+
"expect" :
1387+
{
1388+
"name" : "YandexScreenshotBot",
1389+
"version" : "3.0",
1390+
"type" : "crawler"
1391+
}
1392+
},
1393+
{
1394+
"desc" : "YandexSpravBot",
1395+
"ua" : "Mozilla/5.0 (compatible; YandexSpravBot/1.0; +http://yandex.com/bots)",
1396+
"expect" :
1397+
{
1398+
"name" : "YandexSpravBot",
1399+
"version" : "1.0",
1400+
"type" : "crawler"
1401+
}
1402+
},
1403+
{
1404+
"desc" : "YandexTracker",
1405+
"ua" : "Mozilla/5.0 (compatible; YandexTracker/1.0; +http://yandex.com/bots)",
1406+
"expect" :
1407+
{
1408+
"name" : "YandexTracker",
1409+
"version" : "1.0",
1410+
"type" : "crawler"
1411+
}
1412+
},
1413+
{
1414+
"desc" : "YandexVertis",
1415+
"ua" : "Mozilla/5.0 (compatible; YandexVertis/3.0; +http://yandex.com/bots)",
1416+
"expect" :
1417+
{
1418+
"name" : "YandexVertis",
1419+
"version" : "3.0",
1420+
"type" : "crawler"
1421+
}
1422+
},
1423+
{
1424+
"desc" : "YandexVerticals",
1425+
"ua" : "Mozilla/5.0 (compatible; YandexVerticals/1.0; +http://yandex.com/bots)",
1426+
"expect" :
1427+
{
1428+
"name" : "YandexVerticals",
1429+
"version" : "1.0",
1430+
"type" : "crawler"
1431+
}
1432+
},
1433+
{
1434+
"desc" : "YandexVideo",
1435+
"ua" : "Mozilla/5.0 (compatible; YandexVideo/3.0; +http://yandex.com/bots)",
1436+
"expect" :
1437+
{
1438+
"name" : "YandexVideo",
1439+
"version" : "3.0",
1440+
"type" : "crawler"
1441+
}
1442+
},
1443+
{
1444+
"desc" : "YandexVideoParser",
1445+
"ua" : "Mozilla/5.0 (compatible; YandexVideoParser/1.0; +http://yandex.com/bots)",
1446+
"expect" :
1447+
{
1448+
"name" : "YandexVideoParser",
1449+
"version" : "1.0",
1450+
"type" : "crawler"
1451+
}
1452+
},
1453+
{
1454+
"desc" : "YandexWebmaster",
1455+
"ua" : "Mozilla/5.0 (compatible; YandexWebmaster/2.0; +http://yandex.com/bots)",
1456+
"expect" :
1457+
{
1458+
"name" : "YandexWebmaster",
1459+
"version" : "2.0",
1460+
"type" : "crawler"
1461+
}
1462+
},
11831463
{
11841464
"desc" : "YepBot",
11851465
"ua" : "Mozilla/5.0 (compatible; YepBot/1.0; +http://yep.com/yepbot/)",

0 commit comments

Comments
 (0)