From 287facbb7b331548169f10426a19f23a8c0bcdd0 Mon Sep 17 00:00:00 2001
From: "Brandon 2: Brandon Harder"
 <189403278+missingn0pe@users.noreply.github.com>
Date: Mon, 3 Nov 2025 11:18:49 -0600
Subject: [PATCH] Refine regex patterns for spam detection rules

Adding additional coverage for verbiage, and scoping parsing issues for email addresses as links.
---
 .../spam_website_errors_solicitation.yml      | 29 ++++++++++++-------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/detection-rules/spam_website_errors_solicitation.yml b/detection-rules/spam_website_errors_solicitation.yml
index a884f8d663a..34caf5756b4 100644
--- a/detection-rules/spam_website_errors_solicitation.yml
+++ b/detection-rules/spam_website_errors_solicitation.yml
@@ -9,28 +9,37 @@ source: |
     or not profile.by_sender().solicited
   )
   and 1 of (
-      // Single thread message groups
+    // Single thread message groups
     (
       length(attachments) == 0
       and length(body.links) == 0
       and length(body.previous_threads) == 0
       and 20 < length(body.current_thread.text) < 500
-      and regex.icontains(body.current_thread.text, "screenshot")
-      and regex.icontains(body.current_thread.text, '.*(hi|hello|hey)')
+      and regex.icontains(body.current_thread.text, "screenshot|error list|plan")
+      and regex.icontains(body.current_thread.text, ".*(hi|hello|hey)")
       and regex.icontains(body.current_thread.text, ".*(error|report|issues)")
-      and regex.icontains(subject.subject, ".*(proposal|cost|report|error|audit|screenshot)")
+      and regex.icontains(body.current_thread.text, ".*(site|website)")
+      and regex.icontains(subject.subject,
+                          ".*(proposal|cost|report|error|audit|screenshot)"
+      )
     ),
-    // Single thread message groups but with 1 unsubscribe link
+    // Single thread message groups but with 1 unsubscribe link or link is recipient
     (
       length(attachments) == 0
       and length(body.links) == 1
-      and regex.icontains(body.html.raw, "mailto:*[++unsubscribe@]")
+      and (
+        regex.icontains(body.html.raw, "mailto:*[++unsubscribe@]")
+        or any(body.links, .href_url.domain.root_domain in~ $org_domains)
+      )
       and length(body.previous_threads) == 0
       and 20 < length(body.current_thread.text) < 500
-      and regex.icontains(body.current_thread.text, "screenshot")
-      and regex.icontains(body.current_thread.text, '.*(hi|hello|hey)')
+      and regex.icontains(body.current_thread.text, "screenshot|error list|plan")
+      and regex.icontains(body.current_thread.text, ".*(hi|hello|hey)")
       and regex.icontains(body.current_thread.text, ".*(error|report|issues)")
-      and regex.icontains(subject.subject, ".*(proposal|cost|report|error|audit|screenshot)")
+      and regex.icontains(body.current_thread.text, ".*(site|website)")
+      and regex.icontains(subject.subject,
+                          ".*(proposal|cost|report|error|audit|screenshot)"
+      )
     ),
     // Multiple thread message groups
     (
@@ -41,7 +50,7 @@ source: |
       and any(body.previous_threads,
               length(.text) < 400
               and regex.icontains(.text, '.*(hey|hi|hello)')
-              and regex.icontains(.text, ".*(error|screenshot|report)")
+              and regex.icontains(.text, '.*(\berror(?:\s+list)?\b|screenshot|report|plan)')
               and strings.count(.text, "?") >= 3
               and ml.nlu_classifier(.text).language == "english"
       )