1+ name : " Spam/fraud: Predatory journal/research paper request"
2+ description : " Detects messages related to academic research and publishing that contain suspicious patterns including character manipulation, flattering language, time pressure tactics, and domain registration anomalies. Focuses on unsolicited invitations for manuscript submissions, peer reviews, or editorial roles."
3+ type : " rule"
4+ severity : " medium"
5+ source : |
6+ type.inbound
7+ and any(beta.ml_topic(body.current_thread.text).topics,
8+ .name == "Educational and Research"
9+ and .confidence in ("medium", "high")
10+ )
11+ and not any(beta.ml_topic(body.current_thread.text).topics,
12+ .name in ("Health and Wellness") and .confidence == "high"
13+ )
14+ and ml.nlu_classifier(body.current_thread.text).language == "english"
15+ and 3 of (
16+ regex.count(body.current_thread.text,
17+ '[Æ×æı-ijʼnŒœƁƄƇƊƍƓƖƘƠơƤƦƧƬƳƷƼƽǀǁLJ-njDZ-dzȜȢȣɑɡɣɩɪɯʋʏʣʦʪʫ˛ͺͿΑΒΕ-ΗΙΚΜΝΟΡΤΥΧαγινορσυϒϜϨϱ-ϳϹϺЅІЈАВЕЗКМ-ОР-УХЫЬЮабгеорсухѕіјѡѴѵґҮүһҽӀӏӔӕӠԁԌԛ-ԝՍՏՕագզհոռսցքօ׀וטןסװױاه١٥٧ھہە۱۵۷߀ߊ०০৪৭੦੧੪ଠ୦୨ഠ൦๐໐ဝ၀ყჿሀዐᎠ-ᎢᎤᎥᎩ-ᎬᎳᎷᎻᎽᏀᏂᏃᏎᏏᏒᏔᏕᏙᏚᏞᏟᏢᏦᏧᏮᏳᏴᐯᑌᑧᑭᑯᑲᒆ-ᒈᒍᒪᒿᕁᕼᕽᖇᖯᖴᗅᗞᗪᗰᗷ᙭᙮ᚷᛁᛕᛖᴄᴏᴑᴜᴠ-ᴢᴦᵫᶃᶌẝỿι‖₨₶℀-ℂ℅℆ℊ-ℎℐ-ℓℕ№ℙ-ℝ℡ℤℨℬ-ℱℳℴℹ℻ℽⅅ-ⅉⅠ-ⅿ∞∣∥∨∪⊤⋁⋃⋿⍳⍴⍺⏽⑴-⒵╳⟙⤫⤬⨯ⲅⲎⲒⲔⲘⲚⲞⲟⲢ-ⲦⲨⲬⳊⳌⳐⳒⴸⴹⵏⵔⵕⵝ〇ꓐ-ꓔꓖꓗꓙꓚꓜꓝꓟ-ꓣꓦꓧꓪ-ꓬꓮꓰꓲ-ꓴꙄꙇꚘꚙꛟꛯꜨꜱ-ꜽꝎꝏꝚꝪꝮꝷꭵꮁꮃꮓꮩꮪꮯff-fflstﮦ-ﮭﺍﺎﻩ-ﻬA-CEH-KM-PSTX-Zaceg-jlopsvxy│𐊂𐊆𐊇𐊊𐊐𐊒𐊕-𐊗𐊠-𐊢𐊥𐊫𐊰-𐊲𐊴𐋏𐋵𐌁𐌂𐌉𐌑𐌕𐌗𐌚𐌠𐌢𐐄𐐕𐐛𐐠𐐬𐐽𐑈𐒴𐓂𐓎𐓒𐓪𐓶𐔓𐔖𐔘𐔜𐔝𐔥-𐔧𑓐𑜀𑜆𑜊𑜎𑜏𑢠𑢢-𑢤𑢦𑢩𑢬𑢮𑢯𑢲𑢵𑢸𑢻𑢼𑣀-𑣄𑣆𑣈𑣊𑣌𑣕-𑣘𑣜𑣠𑣣𑣥𑣦𑣩𑣬𑣯𑣲𖼈𖼊𖼖𖼨𖼵𖼺𖼻𖽀𖽂𖽃𝐀-𝑔𝑖-𝒜𝒞𝒟𝒢𝒥𝒦𝒩-𝒬𝒮-𝒹𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆𝕊-𝕐𝕒-𝚤𝚨𝚩𝚬-𝚮𝚰𝚱𝚳𝚴𝚶𝚸𝚻𝚼𝚾𝛂𝛄𝛊𝛎𝛐𝛒𝛔𝛖𝛠𝛢𝛣𝛦-𝛨𝛪𝛫𝛭𝛮𝛰𝛲𝛵𝛶𝛸𝛼𝛾𝜄𝜈𝜊𝜌𝜎𝜐𝜚𝜜𝜝𝜠-𝜢𝜤𝜥𝜧𝜨𝜪𝜬𝜯𝜰𝜲𝜶𝜸𝜾𝝂𝝄𝝆𝝈𝝊𝝔𝝖𝝗𝝚-𝝜𝝞𝝟𝝡𝝢𝝤𝝦𝝩𝝪𝝬𝝰𝝲𝝸𝝼𝝾𝞀𝞂𝞄𝞎𝞐𝞑𝞔-𝞖𝞘𝞙𝞛𝞜𝞞𝞠𝞣𝞤𝞦𝞪𝞬𝞲𝞶𝞸𝞺𝞼𝞾𝟈𝟊𝟎]'
18+ ) > 100,
19+ regex.icontains(strings.replace_confusables(body.current_thread.text),
20+ "Impact Factor",
21+ "Special Issue",
22+ "Guest Editor",
23+ "peer-review",
24+ "manuscript",
25+ "workshop",
26+ "journal (of|editor)",
27+ "inclusive research",
28+ "abstract",
29+ "open-access",
30+ "upcoming edition",
31+ "title of (your (work|published article)|the study)",
32+ "your paper's title",
33+ "and the abstract",
34+ "abstract of (your work|the study)",
35+ "detailed abstract",
36+ 'contribution\b',
37+ "accepted paper",
38+ "submit.{0,20}.(manuscript|article)",
39+ "call for editorial",
40+ "reviewer team",
41+ "review.{0,15}.(journal|issue)"
42+ ),
43+
44+ // flattering language, as seen in previous research
45+ regex.icontains(strings.replace_confusables(body.current_thread.text),
46+ 'your\s+(article|paper|research|publication|work)\s+"?[^"]+?"?\s+(is\s+very\s+excellent|strongly\s+reflects|will\s+be\s+a\s+valuable)',
47+ 'we\s+believe\s+(that\s+)?your\s+(experience|perspective|expertise|comments?)\s+(will\s+add|can\s+play|will\s+be)\s+.{0,100}(important|valuable)',
48+ '(you\s+are\s+one\s+of\s+the\s+leading\s+experts?|someone\s+of\s+your\s+caliber)',
49+ '(emerging\s+voices?\s+like\s+yours|shape\s+the\s+scholarly\s+direction)',
50+ '(learning\s+from\s+the\s+internet|know\s+your\s+"[^"]+"\s+is\s+very\s+excellent)',
51+ '(world''s|global|international)\s+(foremost|leading|top|premier)\s+(authorities|experts|researchers)',
52+ // The "highly valued" variants
53+ 'your\s+(participation|contribution|presence|involvement)\s+(would\s+be|is)\s+(highly|greatly|immensely|extremely)\s+(valued|appreciated|welcomed)',
54+
55+ // Time pressure tactics
56+ '(short\s+notice|busy\s+schedule|quick\s+turnaround|urgent\s+deadline|limited\s+slots)',
57+
58+ // "No charge" red flags
59+ '(no\s+charge|free\s+of\s+charge|waived\s+fee|complimentary|at\s+no\s+cost)',
60+
61+ // Vague topic promises
62+ '(topic\s+of\s+your\s+choice|any\s+topic\s+related|broad\s+range\s+of\s+topics|multidisciplinary\s+approach)',
63+
64+ // Easy publication promises
65+ '(guaranteed\s+publication|fast\s+track\s+review|expedited\s+process|will\s+not\s+be\s+too\s+time-consuming)',
66+
67+ // Template giveaways
68+ '(do\s+hope\s+you\s+can\s+make\s+time|kindly\s+submit|gentle\s+reminder|esteemed\s+researcher)'
69+ ),
70+
71+ // Message contains the users last name, but not their first name
72+ // Presumably, this is because names are listed that way on academic papers
73+ strings.icontains(body.current_thread.text, mailbox.last_name)
74+ and not strings.icontains(body.current_thread.text, mailbox.first_name)
75+ and not any(recipients.to,
76+ strings.icontains(body.current_thread.text, .email.email)
77+ )
78+ and length(mailbox.last_name) > 4,
79+
80+ // Or, message contains the users last, first
81+ // Example: Doe, John
82+ // Presumably, this is because names are listed that way on academic papers
83+ strings.icontains(body.current_thread.text,
84+ strings.concat(mailbox.last_name, ", ", mailbox.first_name)
85+ ),
86+
87+ // new sender or link domain
88+ network.whois(sender.email.domain).days_old < 90,
89+ any(body.links, network.whois(.href_url.domain).days_old < 90),
90+
91+ // Crossref DOI registration abuse (https://doi.org/10.29328)
92+ any(body.links,
93+ .href_url.domain.root_domain == "doi.org"
94+ and strings.istarts_with(.href_url.path, '/10.29328')
95+ ),
96+
97+ // Sender does not match original thread sender
98+ length(body.previous_threads) > 0
99+ and any(regex.iextract(body.html.display_text, 'From: (?P<email_address>\S*)'),
100+ strings.parse_email(.named_groups['email_address']).email != sender.email.email
101+ ),
102+
103+ // sent from Windows Server with default name
104+ strings.contains(headers.message_id, "@DESKTOP-"),
105+
106+ // requesting a manuscript review
107+ strings.ilike(body.current_thread.text, "*review*")
108+ and strings.ilike(body.current_thread.text, "*manuscript*", "*submission*"),
109+
110+ // Chinese registrant country
111+ network.whois(sender.email.domain).registrant_country_code == "CN",
112+ any(body.links,
113+ network.whois(.href_url.domain).registrant_country_code == "CN"
114+ ),
115+
116+ // Alibaba infrastructure
117+ any(headers.domains, .root_domain in ("aliyun.com", "aliyun-inc.com")),
118+
119+ // Known predatory journals that we've observed and matched to beallslist.net
120+ sender.email.domain.root_domain in (
121+ "iris-research.net",
122+ "irispublishers.com",
123+ "lidsen.com"
124+ ),
125+
126+ // sender domain and body link domains do not match, but have the same registration details
127+ (
128+ length(body.links) > 0
129+ and all(body.links,
130+ (
131+ network.whois(.href_url.domain).registrant_company == network.whois(sender.email.domain
132+ ).registrant_company
133+ and network.whois(.href_url.domain).registrar_name == network.whois(sender.email.domain
134+ ).registrar_name
135+ )
136+ and .href_url.domain.root_domain != sender.email.domain.root_domain
137+ )
138+ ),
139+
140+ // known patterns
141+ any(body.links, regex.imatch(.href_url.path, '^/ey[a-z]/.{2,}$'))
142+ )
143+
144+ // negate microsoft quarantine messages
145+ and not (
146+ sender.email.email == "quarantine@messaging.microsoft.com"
147+ and (
148+ headers.auth_summary.dmarc.pass
149+ // no sender auth but MS AuthAs is Internal
150+ or (
151+ not coalesce(headers.auth_summary.dmarc.pass, false)
152+ and any(headers.hops,
153+ .index == 0
154+ and any(.fields,
155+ .name == "X-MS-Exchange-CrossTenant-AuthAs"
156+ and .value == "Internal"
157+ )
158+ )
159+ )
160+ )
161+ )
162+
163+ attack_types :
164+ - " BEC/Fraud"
165+ - " Spam"
166+ tactics_and_techniques :
167+ - " Social engineering"
168+ - " Impersonation: Brand"
169+ - " Lookalike domain"
170+ - " Evasion"
171+ detection_methods :
172+ - " Natural Language Understanding"
173+ - " Content analysis"
174+ - " Sender analysis"
175+ - " URL analysis"
176+ - " Whois"
177+ id : " ba140a59-2b30-5811-b67c-c55440f3c333"
178+ og_id : " 263ca56b-d31b-5b38-b00e-c1c45e5e96bb"
179+ testing_pr : 3459
180+ testing_sha : 126a59ed30096ff804c11c464011ae228bb744c6
0 commit comments