Together-Java
diff --git a/‎application/config.json.template
Lines changed: 3 additions & 1 deletion b/‎application/config.json.template
Lines changed: 3 additions & 1 deletion
diff --git a/‎application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/AnalyseResults.java
Lines changed: 105 additions & 0 deletions b/‎application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/AnalyseResults.java
Lines changed: 105 additions & 0 deletions
diff --git a/‎application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/Attachment.java
Lines changed: 34 additions & 0 deletions b/‎application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/Attachment.java
Lines changed: 34 additions & 0 deletions
diff --git a/‎application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetector.java
Lines changed: 19 additions & 141 deletions b/‎application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetector.java
Lines changed: 19 additions & 141 deletions
@@ -60,7 +60,9 @@
             "gradle.org",
             "help.gradle.org",
             "youtube.com",
-            "www.youtube.com"
+            "www.youtube.com",
+            "cdn.discordapp.com",
+            "media.discordapp.net"
         ],
         "hostBlacklist": [
             "bit.ly",
 
@@ -0,0 +1,105 @@
+package org.togetherjava.tjbot.features.moderation.scam;
+
+import javax.annotation.Nullable;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Objects;
+import java.util.StringJoiner;
+
+final class AnalyseResults {
+    private boolean pingsEveryone;
+    private boolean containsSuspiciousKeyword;
+    private boolean containsDollarSign;
+    private boolean onlyContainsUrls = true;
+    private final Collection<AnalyseUrlResult> urls = new ArrayList<>();
+
+    void addUrlResult(AnalyseUrlResult result) {
+        urls.add(result);
+    }
+
+    boolean hasUrl() {
+        return !urls.isEmpty();
+    }
+
+    boolean hasSuspiciousUrl() {
+        return urls.stream().anyMatch(url -> url.isSuspicious);
+    }
+
+    boolean areAllUrlsWithAttachments() {
+        return urls.stream().allMatch(url -> url.containedAttachment != null);
+    }
+
+    Collection<Attachment> getUrlAttachments() {
+        return urls.stream().map(url -> url.containedAttachment).filter(Objects::nonNull).toList();
+    }
+
+    boolean pingsEveryone() {
+        return pingsEveryone;
+    }
+
+    void markPingsEveryone() {
+        pingsEveryone = true;
+    }
+
+    boolean containsSuspiciousKeyword() {
+        return containsSuspiciousKeyword;
+    }
+
+    void markContainsSuspiciousKeyword() {
+        containsSuspiciousKeyword = true;
+    }
+
+    boolean containsDollarSign() {
+        return containsDollarSign;
+    }
+
+    void markContainsDollarSign() {
+        containsDollarSign = true;
+    }
+
+    boolean onlyContainsUrls() {
+        return onlyContainsUrls;
+    }
+
+    void markNonUrlTokenFound() {
+        onlyContainsUrls = false;
+    }
+
+    @Override
+    public String toString() {
+        return new StringJoiner(", ", AnalyseResults.class.getSimpleName() + "[", "]")
+            .add("pingsEveryone=" + pingsEveryone)
+            .add("containsSuspiciousKeyword=" + containsSuspiciousKeyword)
+            .add("containsDollarSign=" + containsDollarSign)
+            .add("onlyContainsUrls=" + onlyContainsUrls)
+            .add("urls=" + urls)
+            .toString();
+    }
+
+    static final class AnalyseUrlResult {
+        private boolean isSuspicious;
+        @Nullable
+        private Attachment containedAttachment;
+
+        @Override
+        public String toString() {
+            return new StringJoiner(", ", AnalyseUrlResult.class.getSimpleName() + "[", "]")
+                .add("isSuspicious=" + isSuspicious)
+                .add("containedAttachment=" + containedAttachment)
+                .toString();
+        }
+
+        boolean isSuspicious() {
+            return isSuspicious;
+        }
+
+        void markSuspicious() {
+            isSuspicious = true;
+        }
+
+        void setContainedAttachment(Attachment containedAttachment) {
+            this.containedAttachment = containedAttachment;
+        }
+    }
+}
@@ -0,0 +1,34 @@
+package org.togetherjava.tjbot.features.moderation.scam;
+
+import net.dv8tion.jda.api.entities.Message;
+
+import java.util.Optional;
+import java.util.Set;
+
+record Attachment(String fileName) {
+    private static final Set<String> IMAGE_EXTENSIONS =
+            Set.of("jpg", "jpeg", "png", "gif", "webp", "tiff", "svg", "apng");
+
+    boolean isImage() {
+        return getFileExtension().map(IMAGE_EXTENSIONS::contains).orElse(false);
+    }
+
+    private Optional<String> getFileExtension() {
+        int dot = fileName.lastIndexOf('.');
+        if (dot == -1) {
+            return Optional.empty();
+        }
+        String extension = fileName.substring(dot + 1);
+        return Optional.of(extension);
+    }
+
+    static Attachment fromDiscord(Message.Attachment attachment) {
+        return new Attachment(attachment.getFileName());
+    }
+
+    static Attachment fromUrlPath(String urlPath) {
+        int fileNameStart = urlPath.lastIndexOf('/');
+        String fileName = fileNameStart == -1 ? "" : urlPath.substring(fileNameStart + 1);
+        return new Attachment(fileName);
+    }
+}
@@ -6,13 +6,9 @@
 
 import org.togetherjava.tjbot.config.Config;
 import org.togetherjava.tjbot.config.ScamBlockerConfig;
-import org.togetherjava.tjbot.features.utils.StringDistances;
 
-import java.net.URI;
 import java.util.Collection;
 import java.util.List;
-import java.util.Locale;
-import java.util.StringJoiner;
 import java.util.function.Predicate;
 import java.util.regex.Pattern;
 import java.util.stream.Stream;
@@ -28,6 +24,7 @@ public final class ScamDetector {
     private final ScamBlockerConfig config;
     private final Predicate<String> isSuspiciousAttachmentName;
     private final Predicate<String> hasTrustedRole;
+    private final TokenAnalyse tokenAnalyse;
 
     /**
      * Creates a new instance with the given configuration
@@ -42,6 +39,8 @@ public ScamDetector(Config config) {
                     .asMatchPredicate();
         hasTrustedRole =
                 Pattern.compile(this.config.getTrustedUserRolePattern()).asMatchPredicate();
+
+        tokenAnalyse = new TokenAnalyse(this.config);
     }
 
     /**
@@ -59,10 +58,11 @@ public boolean isScam(Message message) {
         }
 
         String content = message.getContentDisplay();
-        List<Message.Attachment> attachments = message.getAttachments();
+        List<Attachment> attachments =
+                message.getAttachments().stream().map(Attachment::fromDiscord).toList();
 
         if (content.isBlank()) {
-            return areAttachmentsSuspicious(attachments);
+            return areAttachmentsScam(attachments);
         }
 
         return isScam(content);
@@ -76,158 +76,36 @@ public boolean isScam(Message message) {
      */
     public boolean isScam(CharSequence message) {
         AnalyseResults results = new AnalyseResults();
-        TOKENIZER.splitAsStream(message).forEach(token -> analyzeToken(token, results));
+        TOKENIZER.splitAsStream(message).forEach(token -> tokenAnalyse.analyze(token, results));
         return isScam(results);
     }
 
     private boolean isScam(AnalyseResults results) {
-        if (results.pingsEveryone && (results.containsSuspiciousKeyword || results.hasUrl
-                || results.containsDollarSign)) {
+        if (results.pingsEveryone() && (results.containsSuspiciousKeyword() || results.hasUrl()
+                || results.containsDollarSign())) {
             return true;
         }
 
-        return Stream
-            .of(results.containsSuspiciousKeyword, results.hasSuspiciousUrl,
-                    results.containsDollarSign)
+        boolean hasTooManySuspiciousFlags = Stream
+            .of(results.containsSuspiciousKeyword(), results.hasSuspiciousUrl(),
+                    results.containsDollarSign())
             .filter(flag -> flag)
             .count() >= 2;
-    }
-
-    private void analyzeToken(String token, AnalyseResults results) {
-        if (token.isBlank()) {
-            return;
-        }
-
-        if (!results.pingsEveryone
-                && ("@everyone".equalsIgnoreCase(token) || "@here".equalsIgnoreCase(token))) {
-            results.pingsEveryone = true;
-        }
-
-        if (!results.containsSuspiciousKeyword && containsSuspiciousKeyword(token)) {
-            results.containsSuspiciousKeyword = true;
-        }
-
-        if (!results.containsDollarSign && (token.contains("$") || "usd".equalsIgnoreCase(token))) {
-            results.containsDollarSign = true;
-        }
-
-        if (token.startsWith("http")) {
-            analyzeUrl(token, results);
-        }
-    }
-
-    private void analyzeUrl(String url, AnalyseResults results) {
-        String host;
-        try {
-            host = URI.create(url).getHost();
-        } catch (IllegalArgumentException _) {
-            // Invalid urls are not scam
-            return;
-        }
-
-        if (host == null) {
-            return;
-        }
-
-        results.hasUrl = true;
-
-        if (config.getHostWhitelist().contains(host)) {
-            return;
-        }
-
-        if (config.getHostBlacklist().contains(host)) {
-            results.hasSuspiciousUrl = true;
-            return;
-        }
-
-        for (String keyword : config.getSuspiciousHostKeywords()) {
-            if (isHostSimilarToKeyword(host, keyword)) {
-                results.hasSuspiciousUrl = true;
-                break;
-            }
+        if (hasTooManySuspiciousFlags) {
+            return true;
         }
-    }
 
-    private boolean containsSuspiciousKeyword(String token) {
-        String preparedToken = token.toLowerCase(Locale.US);
-
-        return config.getSuspiciousKeywords()
-            .stream()
-            .map(keyword -> keyword.toLowerCase(Locale.US))
-            .anyMatch(keyword -> {
-                // Exact match "^foo$"
-                if (startsWith(keyword, '^') && endsWith(keyword, '$')) {
-                    return preparedToken.equals(keyword.substring(1, keyword.length() - 1));
-                }
-                // Simple regex-inspired syntax "^foo"
-                if (startsWith(keyword, '^')) {
-                    return preparedToken.startsWith(keyword.substring(1));
-                }
-                // Simple regex-inspired syntax "foo$"
-                if (endsWith(keyword, '$')) {
-                    return preparedToken.endsWith(keyword.substring(0, keyword.length() - 1));
-                }
-                return preparedToken.contains(keyword);
-            });
+        return results.onlyContainsUrls() && results.areAllUrlsWithAttachments()
+                && areAttachmentsScam(results.getUrlAttachments());
     }
 
-    private boolean areAttachmentsSuspicious(Collection<? extends Message.Attachment> attachments) {
+    private boolean areAttachmentsScam(Collection<Attachment> attachments) {
         long suspiciousAttachments =
                 attachments.stream().filter(this::isAttachmentSuspicious).count();
         return suspiciousAttachments >= config.getSuspiciousAttachmentsThreshold();
     }
 
-    private boolean isAttachmentSuspicious(Message.Attachment attachment) {
-        return attachment.isImage() && isSuspiciousAttachmentName.test(attachment.getFileName());
-    }
-
-    private boolean isHostSimilarToKeyword(String host, String keyword) {
-        // NOTE This algorithm is far from optimal.
-        // It is good enough for our purpose though and not that complex.
-
-        // Rolling window of keyword-size over host.
-        // If any window has a small distance, it is similar
-        int windowStart = 0;
-        int windowEnd = keyword.length();
-        while (windowEnd <= host.length()) {
-            String window = host.substring(windowStart, windowEnd);
-            int distance = StringDistances.editDistance(keyword, window);
-
-            if (distance <= config.getIsHostSimilarToKeywordDistanceThreshold()) {
-                return true;
-            }
-
-            windowStart++;
-            windowEnd++;
-        }
-
-        return false;
-    }
-
-    private static boolean startsWith(CharSequence text, char prefixToTest) {
-        return !text.isEmpty() && text.charAt(0) == prefixToTest;
-    }
-
-    private static boolean endsWith(CharSequence text, char suffixToTest) {
-        return !text.isEmpty() && text.charAt(text.length() - 1) == suffixToTest;
-    }
-
-    private static class AnalyseResults {
-        private boolean pingsEveryone;
-        private boolean containsSuspiciousKeyword;
-        private boolean containsDollarSign;
-        private boolean hasUrl;
-        private boolean hasSuspiciousUrl;
-
-        @Override
-        public String toString() {
-            return new StringJoiner(", ", AnalyseResults.class.getSimpleName() + "[", "]")
-                .add("pingsEveryone=" + pingsEveryone)
-                .add("containsSuspiciousKeyword=" + containsSuspiciousKeyword)
-                .add("containsDollarSign=" + containsDollarSign)
-                .add("hasUrl=" + hasUrl)
-                .add("hasSuspiciousUrl=" + hasSuspiciousUrl)
-                .toString();
-        }
+    private boolean isAttachmentSuspicious(Attachment attachment) {
+        return attachment.isImage() && isSuspiciousAttachmentName.test(attachment.fileName());
     }
 }