Skip to content

Commit 6d085af

Browse files
authored
Block CDN attachment scam (#1293)
* Block CDN attachment scam * added another CDN host to whitelisit * improved readability by moving stuff around * analyse went into its own TokenAnalyse class * data holder classes were extracted as well * added unit test for non trivial stuff in Attachment class * (minor rename for readability)
1 parent e29eb30 commit 6d085af

File tree

8 files changed

+420
-149
lines changed

8 files changed

+420
-149
lines changed

application/config.json.template

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,9 @@
6060
"gradle.org",
6161
"help.gradle.org",
6262
"youtube.com",
63-
"www.youtube.com"
63+
"www.youtube.com",
64+
"cdn.discordapp.com",
65+
"media.discordapp.net"
6466
],
6567
"hostBlacklist": [
6668
"bit.ly",
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
package org.togetherjava.tjbot.features.moderation.scam;
2+
3+
import javax.annotation.Nullable;
4+
5+
import java.util.ArrayList;
6+
import java.util.Collection;
7+
import java.util.Objects;
8+
import java.util.StringJoiner;
9+
10+
final class AnalyseResults {
11+
private boolean pingsEveryone;
12+
private boolean containsSuspiciousKeyword;
13+
private boolean containsDollarSign;
14+
private boolean onlyContainsUrls = true;
15+
private final Collection<AnalyseUrlResult> urls = new ArrayList<>();
16+
17+
void addUrlResult(AnalyseUrlResult result) {
18+
urls.add(result);
19+
}
20+
21+
boolean hasUrl() {
22+
return !urls.isEmpty();
23+
}
24+
25+
boolean hasSuspiciousUrl() {
26+
return urls.stream().anyMatch(url -> url.isSuspicious);
27+
}
28+
29+
boolean areAllUrlsWithAttachments() {
30+
return urls.stream().allMatch(url -> url.containedAttachment != null);
31+
}
32+
33+
Collection<Attachment> getUrlAttachments() {
34+
return urls.stream().map(url -> url.containedAttachment).filter(Objects::nonNull).toList();
35+
}
36+
37+
boolean pingsEveryone() {
38+
return pingsEveryone;
39+
}
40+
41+
void markPingsEveryone() {
42+
pingsEveryone = true;
43+
}
44+
45+
boolean containsSuspiciousKeyword() {
46+
return containsSuspiciousKeyword;
47+
}
48+
49+
void markContainsSuspiciousKeyword() {
50+
containsSuspiciousKeyword = true;
51+
}
52+
53+
boolean containsDollarSign() {
54+
return containsDollarSign;
55+
}
56+
57+
void markContainsDollarSign() {
58+
containsDollarSign = true;
59+
}
60+
61+
boolean onlyContainsUrls() {
62+
return onlyContainsUrls;
63+
}
64+
65+
void markNonUrlTokenFound() {
66+
onlyContainsUrls = false;
67+
}
68+
69+
@Override
70+
public String toString() {
71+
return new StringJoiner(", ", AnalyseResults.class.getSimpleName() + "[", "]")
72+
.add("pingsEveryone=" + pingsEveryone)
73+
.add("containsSuspiciousKeyword=" + containsSuspiciousKeyword)
74+
.add("containsDollarSign=" + containsDollarSign)
75+
.add("onlyContainsUrls=" + onlyContainsUrls)
76+
.add("urls=" + urls)
77+
.toString();
78+
}
79+
80+
static final class AnalyseUrlResult {
81+
private boolean isSuspicious;
82+
@Nullable
83+
private Attachment containedAttachment;
84+
85+
@Override
86+
public String toString() {
87+
return new StringJoiner(", ", AnalyseUrlResult.class.getSimpleName() + "[", "]")
88+
.add("isSuspicious=" + isSuspicious)
89+
.add("containedAttachment=" + containedAttachment)
90+
.toString();
91+
}
92+
93+
boolean isSuspicious() {
94+
return isSuspicious;
95+
}
96+
97+
void markSuspicious() {
98+
isSuspicious = true;
99+
}
100+
101+
void setContainedAttachment(Attachment containedAttachment) {
102+
this.containedAttachment = containedAttachment;
103+
}
104+
}
105+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package org.togetherjava.tjbot.features.moderation.scam;
2+
3+
import net.dv8tion.jda.api.entities.Message;
4+
5+
import java.util.Optional;
6+
import java.util.Set;
7+
8+
record Attachment(String fileName) {
9+
private static final Set<String> IMAGE_EXTENSIONS =
10+
Set.of("jpg", "jpeg", "png", "gif", "webp", "tiff", "svg", "apng");
11+
12+
boolean isImage() {
13+
return getFileExtension().map(IMAGE_EXTENSIONS::contains).orElse(false);
14+
}
15+
16+
private Optional<String> getFileExtension() {
17+
int dot = fileName.lastIndexOf('.');
18+
if (dot == -1) {
19+
return Optional.empty();
20+
}
21+
String extension = fileName.substring(dot + 1);
22+
return Optional.of(extension);
23+
}
24+
25+
static Attachment fromDiscord(Message.Attachment attachment) {
26+
return new Attachment(attachment.getFileName());
27+
}
28+
29+
static Attachment fromUrlPath(String urlPath) {
30+
int fileNameStart = urlPath.lastIndexOf('/');
31+
String fileName = fileNameStart == -1 ? "" : urlPath.substring(fileNameStart + 1);
32+
return new Attachment(fileName);
33+
}
34+
}

application/src/main/java/org/togetherjava/tjbot/features/moderation/scam/ScamDetector.java

Lines changed: 19 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,9 @@
66

77
import org.togetherjava.tjbot.config.Config;
88
import org.togetherjava.tjbot.config.ScamBlockerConfig;
9-
import org.togetherjava.tjbot.features.utils.StringDistances;
109

11-
import java.net.URI;
1210
import java.util.Collection;
1311
import java.util.List;
14-
import java.util.Locale;
15-
import java.util.StringJoiner;
1612
import java.util.function.Predicate;
1713
import java.util.regex.Pattern;
1814
import java.util.stream.Stream;
@@ -28,6 +24,7 @@ public final class ScamDetector {
2824
private final ScamBlockerConfig config;
2925
private final Predicate<String> isSuspiciousAttachmentName;
3026
private final Predicate<String> hasTrustedRole;
27+
private final TokenAnalyse tokenAnalyse;
3128

3229
/**
3330
* Creates a new instance with the given configuration
@@ -42,6 +39,8 @@ public ScamDetector(Config config) {
4239
.asMatchPredicate();
4340
hasTrustedRole =
4441
Pattern.compile(this.config.getTrustedUserRolePattern()).asMatchPredicate();
42+
43+
tokenAnalyse = new TokenAnalyse(this.config);
4544
}
4645

4746
/**
@@ -59,10 +58,11 @@ public boolean isScam(Message message) {
5958
}
6059

6160
String content = message.getContentDisplay();
62-
List<Message.Attachment> attachments = message.getAttachments();
61+
List<Attachment> attachments =
62+
message.getAttachments().stream().map(Attachment::fromDiscord).toList();
6363

6464
if (content.isBlank()) {
65-
return areAttachmentsSuspicious(attachments);
65+
return areAttachmentsScam(attachments);
6666
}
6767

6868
return isScam(content);
@@ -76,158 +76,36 @@ public boolean isScam(Message message) {
7676
*/
7777
public boolean isScam(CharSequence message) {
7878
AnalyseResults results = new AnalyseResults();
79-
TOKENIZER.splitAsStream(message).forEach(token -> analyzeToken(token, results));
79+
TOKENIZER.splitAsStream(message).forEach(token -> tokenAnalyse.analyze(token, results));
8080
return isScam(results);
8181
}
8282

8383
private boolean isScam(AnalyseResults results) {
84-
if (results.pingsEveryone && (results.containsSuspiciousKeyword || results.hasUrl
85-
|| results.containsDollarSign)) {
84+
if (results.pingsEveryone() && (results.containsSuspiciousKeyword() || results.hasUrl()
85+
|| results.containsDollarSign())) {
8686
return true;
8787
}
8888

89-
return Stream
90-
.of(results.containsSuspiciousKeyword, results.hasSuspiciousUrl,
91-
results.containsDollarSign)
89+
boolean hasTooManySuspiciousFlags = Stream
90+
.of(results.containsSuspiciousKeyword(), results.hasSuspiciousUrl(),
91+
results.containsDollarSign())
9292
.filter(flag -> flag)
9393
.count() >= 2;
94-
}
95-
96-
private void analyzeToken(String token, AnalyseResults results) {
97-
if (token.isBlank()) {
98-
return;
99-
}
100-
101-
if (!results.pingsEveryone
102-
&& ("@everyone".equalsIgnoreCase(token) || "@here".equalsIgnoreCase(token))) {
103-
results.pingsEveryone = true;
104-
}
105-
106-
if (!results.containsSuspiciousKeyword && containsSuspiciousKeyword(token)) {
107-
results.containsSuspiciousKeyword = true;
108-
}
109-
110-
if (!results.containsDollarSign && (token.contains("$") || "usd".equalsIgnoreCase(token))) {
111-
results.containsDollarSign = true;
112-
}
113-
114-
if (token.startsWith("http")) {
115-
analyzeUrl(token, results);
116-
}
117-
}
118-
119-
private void analyzeUrl(String url, AnalyseResults results) {
120-
String host;
121-
try {
122-
host = URI.create(url).getHost();
123-
} catch (IllegalArgumentException _) {
124-
// Invalid urls are not scam
125-
return;
126-
}
127-
128-
if (host == null) {
129-
return;
130-
}
131-
132-
results.hasUrl = true;
133-
134-
if (config.getHostWhitelist().contains(host)) {
135-
return;
136-
}
137-
138-
if (config.getHostBlacklist().contains(host)) {
139-
results.hasSuspiciousUrl = true;
140-
return;
141-
}
142-
143-
for (String keyword : config.getSuspiciousHostKeywords()) {
144-
if (isHostSimilarToKeyword(host, keyword)) {
145-
results.hasSuspiciousUrl = true;
146-
break;
147-
}
94+
if (hasTooManySuspiciousFlags) {
95+
return true;
14896
}
149-
}
15097

151-
private boolean containsSuspiciousKeyword(String token) {
152-
String preparedToken = token.toLowerCase(Locale.US);
153-
154-
return config.getSuspiciousKeywords()
155-
.stream()
156-
.map(keyword -> keyword.toLowerCase(Locale.US))
157-
.anyMatch(keyword -> {
158-
// Exact match "^foo$"
159-
if (startsWith(keyword, '^') && endsWith(keyword, '$')) {
160-
return preparedToken.equals(keyword.substring(1, keyword.length() - 1));
161-
}
162-
// Simple regex-inspired syntax "^foo"
163-
if (startsWith(keyword, '^')) {
164-
return preparedToken.startsWith(keyword.substring(1));
165-
}
166-
// Simple regex-inspired syntax "foo$"
167-
if (endsWith(keyword, '$')) {
168-
return preparedToken.endsWith(keyword.substring(0, keyword.length() - 1));
169-
}
170-
return preparedToken.contains(keyword);
171-
});
98+
return results.onlyContainsUrls() && results.areAllUrlsWithAttachments()
99+
&& areAttachmentsScam(results.getUrlAttachments());
172100
}
173101

174-
private boolean areAttachmentsSuspicious(Collection<? extends Message.Attachment> attachments) {
102+
private boolean areAttachmentsScam(Collection<Attachment> attachments) {
175103
long suspiciousAttachments =
176104
attachments.stream().filter(this::isAttachmentSuspicious).count();
177105
return suspiciousAttachments >= config.getSuspiciousAttachmentsThreshold();
178106
}
179107

180-
private boolean isAttachmentSuspicious(Message.Attachment attachment) {
181-
return attachment.isImage() && isSuspiciousAttachmentName.test(attachment.getFileName());
182-
}
183-
184-
private boolean isHostSimilarToKeyword(String host, String keyword) {
185-
// NOTE This algorithm is far from optimal.
186-
// It is good enough for our purpose though and not that complex.
187-
188-
// Rolling window of keyword-size over host.
189-
// If any window has a small distance, it is similar
190-
int windowStart = 0;
191-
int windowEnd = keyword.length();
192-
while (windowEnd <= host.length()) {
193-
String window = host.substring(windowStart, windowEnd);
194-
int distance = StringDistances.editDistance(keyword, window);
195-
196-
if (distance <= config.getIsHostSimilarToKeywordDistanceThreshold()) {
197-
return true;
198-
}
199-
200-
windowStart++;
201-
windowEnd++;
202-
}
203-
204-
return false;
205-
}
206-
207-
private static boolean startsWith(CharSequence text, char prefixToTest) {
208-
return !text.isEmpty() && text.charAt(0) == prefixToTest;
209-
}
210-
211-
private static boolean endsWith(CharSequence text, char suffixToTest) {
212-
return !text.isEmpty() && text.charAt(text.length() - 1) == suffixToTest;
213-
}
214-
215-
private static class AnalyseResults {
216-
private boolean pingsEveryone;
217-
private boolean containsSuspiciousKeyword;
218-
private boolean containsDollarSign;
219-
private boolean hasUrl;
220-
private boolean hasSuspiciousUrl;
221-
222-
@Override
223-
public String toString() {
224-
return new StringJoiner(", ", AnalyseResults.class.getSimpleName() + "[", "]")
225-
.add("pingsEveryone=" + pingsEveryone)
226-
.add("containsSuspiciousKeyword=" + containsSuspiciousKeyword)
227-
.add("containsDollarSign=" + containsDollarSign)
228-
.add("hasUrl=" + hasUrl)
229-
.add("hasSuspiciousUrl=" + hasSuspiciousUrl)
230-
.toString();
231-
}
108+
private boolean isAttachmentSuspicious(Attachment attachment) {
109+
return attachment.isImage() && isSuspiciousAttachmentName.test(attachment.fileName());
232110
}
233111
}

0 commit comments

Comments
 (0)