From 287facbb7b331548169f10426a19f23a8c0bcdd0 Mon Sep 17 00:00:00 2001 From: "Brandon 2: Brandon Harder" <189403278+missingn0pe@users.noreply.github.com> Date: Mon, 3 Nov 2025 11:18:49 -0600 Subject: [PATCH] Refine regex patterns for spam detection rules Adding additional coverage for verbiage, and scoping parsing issues for email addresses as links. --- .../spam_website_errors_solicitation.yml | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/detection-rules/spam_website_errors_solicitation.yml b/detection-rules/spam_website_errors_solicitation.yml index a884f8d663a..34caf5756b4 100644 --- a/detection-rules/spam_website_errors_solicitation.yml +++ b/detection-rules/spam_website_errors_solicitation.yml @@ -9,28 +9,37 @@ source: | or not profile.by_sender().solicited ) and 1 of ( - // Single thread message groups + // Single thread message groups ( length(attachments) == 0 and length(body.links) == 0 and length(body.previous_threads) == 0 and 20 < length(body.current_thread.text) < 500 - and regex.icontains(body.current_thread.text, "screenshot") - and regex.icontains(body.current_thread.text, '.*(hi|hello|hey)') + and regex.icontains(body.current_thread.text, "screenshot|error list|plan") + and regex.icontains(body.current_thread.text, ".*(hi|hello|hey)") and regex.icontains(body.current_thread.text, ".*(error|report|issues)") - and regex.icontains(subject.subject, ".*(proposal|cost|report|error|audit|screenshot)") + and regex.icontains(body.current_thread.text, ".*(site|website)") + and regex.icontains(subject.subject, + ".*(proposal|cost|report|error|audit|screenshot)" + ) ), - // Single thread message groups but with 1 unsubscribe link + // Single thread message groups but with 1 unsubscribe link or link is recipient ( length(attachments) == 0 and length(body.links) == 1 - and regex.icontains(body.html.raw, "mailto:*[++unsubscribe@]") + and ( + regex.icontains(body.html.raw, "mailto:*[++unsubscribe@]") + or any(body.links, .href_url.domain.root_domain in~ $org_domains) + ) and length(body.previous_threads) == 0 and 20 < length(body.current_thread.text) < 500 - and regex.icontains(body.current_thread.text, "screenshot") - and regex.icontains(body.current_thread.text, '.*(hi|hello|hey)') + and regex.icontains(body.current_thread.text, "screenshot|error list|plan") + and regex.icontains(body.current_thread.text, ".*(hi|hello|hey)") and regex.icontains(body.current_thread.text, ".*(error|report|issues)") - and regex.icontains(subject.subject, ".*(proposal|cost|report|error|audit|screenshot)") + and regex.icontains(body.current_thread.text, ".*(site|website)") + and regex.icontains(subject.subject, + ".*(proposal|cost|report|error|audit|screenshot)" + ) ), // Multiple thread message groups ( @@ -41,7 +50,7 @@ source: | and any(body.previous_threads, length(.text) < 400 and regex.icontains(.text, '.*(hey|hi|hello)') - and regex.icontains(.text, ".*(error|screenshot|report)") + and regex.icontains(.text, '.*(\berror(?:\s+list)?\b|screenshot|report|plan)') and strings.count(.text, "?") >= 3 and ml.nlu_classifier(.text).language == "english" )