From 521450e69602d0972e88ffeed66f96accaed2d8b Mon Sep 17 00:00:00 2001 From: Nixos NIXOS Date: Sun, 16 Nov 2025 16:03:49 +0000 Subject: [PATCH 1/4] add implementation for the match-and-report input stream --- src/main/java/dregex/Regex.java | 34 +++ src/main/java/dregex/impl/DfaAlgorithms.java | 63 +++++ src/main/java/dregex/impl/RegexImpl.java | 6 + src/test/java/dregex/MatchTest.java | 245 +++++++++++++++++++ 4 files changed, 348 insertions(+) diff --git a/src/main/java/dregex/Regex.java b/src/main/java/dregex/Regex.java index dcbbcf5..2f38dc3 100644 --- a/src/main/java/dregex/Regex.java +++ b/src/main/java/dregex/Regex.java @@ -1,6 +1,9 @@ package dregex; import dregex.impl.*; + +import java.io.IOException; +import java.io.InputStream; import java.util.List; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -42,6 +45,20 @@ public boolean matches(CharSequence input) { return matchAndReport(input).matches(); } + /** + * Return whether the input stream is matched by the regular expression (i.e. whether the stream is included in the + * language generated by the expression). As the match is done using a DFA, its complexity is O(n), where n is the + * length of the stream. It is constant with respect to the length of the expression. + * + * @param input the stream to match + * + * @return whether the input matches the regex + * @throws IOException if an I/O error occurs while reading the file + */ + public boolean matches(InputStream input) throws IOException { + return matchAndReport(input).matches(); + } + /** * Return whether the input string is matched by the regular expression (i.e. whether the string is included in the * language generated by the expression). As the match is done using a DFA, its complexity is O(n), where n is the @@ -58,6 +75,23 @@ public MatchResult matchAndReport(CharSequence input) { return regexImpl.matchAndReport(input); } + /** + * Return whether the input stream is matched by the regular expression (i.e. whether the string is included in the + * language generated by the expression). As the match is done using a DFA, its complexity is O(n), where n is the + * length of the string. It is constant with respect to the length of the expression. + *

+ * This method is similar to method {@link #matches(InputStream)}, except that also return how many characters + * were successfully matched in case of failure. + * + * @param input the Inputstream to match + * + * @return an object with information about the matching attempt + * @throws IOException if an I/O error occurs while reading the file + */ + public MatchResult matchAndReport(InputStream input) throws IOException { + return regexImpl.matchAndReport(input); + } + /** * Intersect this regular expression with another. The resulting expression will match the strings that are * matched by the operands, and only those. Intersections take O(n⋅m) time, where n and m are the number of states of diff --git a/src/main/java/dregex/impl/DfaAlgorithms.java b/src/main/java/dregex/impl/DfaAlgorithms.java index 52cf68a..9f6ab50 100644 --- a/src/main/java/dregex/impl/DfaAlgorithms.java +++ b/src/main/java/dregex/impl/DfaAlgorithms.java @@ -1,6 +1,11 @@ package dregex.impl; import dregex.MatchResult; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.*; import java.util.function.BiPredicate; import java.util.function.Function; @@ -208,6 +213,64 @@ public static MatchResult matchString(Dfa dfa, CharSequence string) { return new MatchResult(dfa.accepting.contains(current), i); } + public static MatchResult matchInputStream(Dfa dfa, InputStream inputStream) throws IOException { + // Start from the initial state of the DFA + State currentState = dfa.initial; + + // Current position in the input stream + int position = 0; + + // Read the input stream character by character + try (InputStreamReader reader = new InputStreamReader(inputStream, StandardCharsets.UTF_8)) { + int ch; + while ((ch = reader.read()) != -1) { + // Read the next Unicode code point (handles surrogate pairs) + int codePoint = readCodePoint(reader, ch); + if (codePoint == -1) break; // End of stream or invalid surrogate pair + + // Get the next DFA state based on the current state and input character + State nextState = getNextState(dfa, currentState, codePoint); + + if (nextState == null) { + // If not accepting, return failure at current position + return new MatchResult(false, position); + } + + // Move to the next state and increment position + currentState = nextState; + position++; + } + } + + // After reading the stream, check if the current state is accepting + return new MatchResult(dfa.accepting.contains(currentState), position); + } + + // Reads a Unicode code point from the stream, handling surrogate pairs if needed + private static int readCodePoint(InputStreamReader reader, int firstChar) throws IOException { + char c1 = (char) firstChar; + if (Character.isHighSurrogate(c1)) { + int ch2 = reader.read(); + if (ch2 == -1) return -1; // Incomplete surrogate pair + char c2 = (char) ch2; + return Character.toCodePoint(c1, c2); + } + return c1; + } + + // Retrieves the next DFA state based on the current state and input code point + private static State getNextState(Dfa dfa, State current, int codePoint) { + TreeMap transitions = dfa.defTransitions.get(current); + if (transitions == null) return null; + + // Find the transition whose interval includes the code point + var entry = transitions.floorEntry(new CharInterval(codePoint, codePoint)); + if (entry != null && codePoint <= entry.getKey().to) { + return entry.getValue(); + } + return null; + } + /** * Each DFA is also trivially a NFA, return it. */ diff --git a/src/main/java/dregex/impl/RegexImpl.java b/src/main/java/dregex/impl/RegexImpl.java index 8dafe87..0dd4203 100644 --- a/src/main/java/dregex/impl/RegexImpl.java +++ b/src/main/java/dregex/impl/RegexImpl.java @@ -2,6 +2,8 @@ import dregex.IncompatibleRegexException; import dregex.MatchResult; +import java.io.IOException; +import java.io.InputStream; import java.text.Normalizer; import java.time.Duration; import org.slf4j.Logger; @@ -48,6 +50,10 @@ public MatchResult matchAndReport(CharSequence string) { return DfaAlgorithms.matchString(dfa, builder.toString()); } + public MatchResult matchAndReport(InputStream inputStream) throws IOException { + return DfaAlgorithms.matchInputStream(dfa, inputStream); + } + public RegexImpl intersect(RegexImpl other) { checkUniverse(other); var start = System.nanoTime(); diff --git a/src/test/java/dregex/MatchTest.java b/src/test/java/dregex/MatchTest.java index 403645a..68725db 100644 --- a/src/test/java/dregex/MatchTest.java +++ b/src/test/java/dregex/MatchTest.java @@ -5,6 +5,10 @@ import dregex.impl.RegexImpl; import dregex.impl.Universe; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.regex.Pattern; import org.junit.jupiter.api.Test; @@ -104,6 +108,98 @@ void testCharacterClassesSimple() { assertTrue(r.matches("d")); } } + @Test + void testInputStreamClassesSimple() throws IOException { + + assertFalse(RegexImpl.nullRegex(Universe.Empty).matchesAtLeastOne()); + + { + var r = Regex.compile(""); + assertTrue(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile(" "); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream(" ".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream(" ".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("."); + assertTrue(r.matchesAtLeastOne()); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("aa".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("[a-d]"); + assertTrue(r.matchesAtLeastOne()); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("aa".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("x".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("[^a]"); + assertTrue(r.matchesAtLeastOne()); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("b".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("[^ab]"); + assertTrue(r.matchesAtLeastOne()); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("b".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("c".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("[ab-c]"); + assertTrue(r.matchesAtLeastOne()); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("b".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("c".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("d".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("[a-bc]"); + assertTrue(r.matchesAtLeastOne()); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("b".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("c".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("d".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("[^ab-c]"); + assertTrue(r.matchesAtLeastOne()); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("b".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("c".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("d".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("[^a-bc]"); + assertTrue(r.matchesAtLeastOne()); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("b".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("c".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("d".getBytes(StandardCharsets.UTF_8)))); + } + } @Test void testCharacterClassesSpecialCharactersInside() { @@ -189,6 +285,90 @@ void testCharacterClassesSpecialCharactersInside() { } } + @Test + void testInputStreamClassesSpecialCharactersInside() throws IOException { + + // Special characters inside input stream classes + assertTrue(Regex.compile("[.]").matches(new ByteArrayInputStream(".".getBytes(StandardCharsets.UTF_8)))); + assertTrue(Regex.compile("[(]").matches(new ByteArrayInputStream("(".getBytes(StandardCharsets.UTF_8)))); + assertTrue(Regex.compile("[)]").matches(new ByteArrayInputStream(")".getBytes(StandardCharsets.UTF_8)))); + assertTrue(Regex.compile("[$]").matches(new ByteArrayInputStream("$".getBytes(StandardCharsets.UTF_8)))); + assertTrue(Regex.compile("[[]").matches(new ByteArrayInputStream("[".getBytes(StandardCharsets.UTF_8)))); + assertTrue(Regex.compile("[\\]]").matches(new ByteArrayInputStream("]".getBytes(StandardCharsets.UTF_8)))); + + // Dash is interpreted literally inside character classes when it is the first or the last element + + { + var r = Regex.compile("[-]"); + assertTrue(r.matchesAtLeastOne()); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("-".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("X".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("[-a]"); + assertTrue(r.matchesAtLeastOne()); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("-".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("x".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("[a-]"); + assertTrue(r.matchesAtLeastOne()); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("-".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("x".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("[-a-]"); + assertTrue(r.matchesAtLeastOne()); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("-".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("x".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("[^-]"); + assertTrue(r.matchesAtLeastOne()); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("-".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("[^-a]"); + assertTrue(r.matchesAtLeastOne()); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("-".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("b".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("[^a-]"); + assertTrue(r.matchesAtLeastOne()); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("-".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("b".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("[^-a-]"); + assertTrue(r.matchesAtLeastOne()); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("-".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("b".getBytes(StandardCharsets.UTF_8)))); + } + } + @Test void testCharacterClassesShorthand() { @@ -297,6 +477,71 @@ void testCharacterClassesShorthand() { } } + @Test + void testInputStreamClassesShorthand() throws IOException{ + + { + var r = Regex.compile("\\d"); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("0".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("9".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("\\w"); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("0".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("9".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("A".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("_".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream(":".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("\\s"); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream(" ".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("\t".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("\n".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("\r".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("\f".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("\\D"); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("0".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("9".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("\\W"); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("0".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("9".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("A".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("_".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream(":".getBytes(StandardCharsets.UTF_8)))); + } + + { + var r = Regex.compile("\\S"); + assertFalse(r.matches(new ByteArrayInputStream("".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream(" ".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("\\t".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("\\n".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("\\r".getBytes(StandardCharsets.UTF_8)))); + assertFalse(r.matches(new ByteArrayInputStream("\\f".getBytes(StandardCharsets.UTF_8)))); + assertTrue(r.matches(new ByteArrayInputStream("a".getBytes(StandardCharsets.UTF_8)))); + } + + } + @Test void testQuantifiers() { From 4f1a3e456c06f5b78948da3b308831ccb11f4c0d Mon Sep 17 00:00:00 2001 From: fahkone Date: Mon, 17 Nov 2025 08:40:17 +0000 Subject: [PATCH 2/4] update readme file --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 1af9c35..1678c81 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,9 @@ Regex lower = all.diff(upper); System.out.println(lower.matches("aaa")); // true System.out.println(lower.matches("Aaa")); // false +System.out.println(lower.matches(new ByteArrayInputStream("aaa".getBytes(StandardCharsets.UTF_8)))); // true +System.out.println(lower.matches(new ByteArrayInputStream("Aaa".getBytes(StandardCharsets.UTF_8)))); // false + ``` The motivating use case was detecting non-intersecting expressions. Once it can be established that a set of expressions do not intersect (that they are disjoint) it becomes possible to short-circuit evaluations. Moreover, they can be tested in any order, allowing for reordering based on matching statistics. This is especially important in performance-critical paths where multiple expressions are matched, such as in load balancers. From 5cf5f7a4a2857afe42e225459a1b1ee959248b55 Mon Sep 17 00:00:00 2001 From: fahkone Date: Tue, 18 Nov 2025 11:05:11 +0000 Subject: [PATCH 3/4] add a test Simple FileInputStream --- src/test/java/dregex/MatchTest.java | 15 +++++++++++++++ src/test/resources/log.json | 6 ++++++ 2 files changed, 21 insertions(+) create mode 100644 src/test/resources/log.json diff --git a/src/test/java/dregex/MatchTest.java b/src/test/java/dregex/MatchTest.java index 68725db..ab306d7 100644 --- a/src/test/java/dregex/MatchTest.java +++ b/src/test/java/dregex/MatchTest.java @@ -8,6 +8,7 @@ import java.io.ByteArrayInputStream; import java.io.IOException; +import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.util.regex.Pattern; import org.junit.jupiter.api.Test; @@ -1246,4 +1247,18 @@ void testLookbehind() { assertTrue(r.matches("c")); } } + + @Test + void testSimpleFileInputStream() throws IOException { + var compiledRegex = Regex.compile("(\\{\"time\"\\s*:\\s*\"(\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z)\",\"severityText\"\\s*:\\s*(\"INFO\"|\"ERROR\"|\"WARNING\"),\"service.name\"\\s*:\\s*(\"auth-service\"|\"payment-service\"|\"storage-service\"),\"traceId\"\\s*:\\s*\"[a-z]{3}\\d{3}\",\"spanId\"\\s*:\\s*\"[a-z]{3}\\d{3}\"}\\s*)*"); + + String resourceName = "log.json"; + + // Load resource using the context class loader + try (InputStream inputStream = Thread.currentThread() + .getContextClassLoader() + .getResourceAsStream(resourceName)) { + assertTrue(compiledRegex.matches(inputStream)); + } + } } diff --git a/src/test/resources/log.json b/src/test/resources/log.json new file mode 100644 index 0000000..87358ed --- /dev/null +++ b/src/test/resources/log.json @@ -0,0 +1,6 @@ +{"time":"2025-10-30T14:58:43Z","severityText":"INFO","service.name":"auth-service","traceId":"abc123","spanId":"def456"} +{"time":"2025-10-30T14:59:01Z","severityText":"ERROR","service.name":"payment-service","traceId":"ghi789","spanId":"jkl012"} +{"time":"2025-10-30T15:58:43Z","severityText":"INFO","service.name":"auth-service","traceId":"abc124","spanId":"def457"} +{"time":"2025-10-30T15:59:01Z","severityText":"ERROR","service.name":"payment-service","traceId":"ghi799","spanId":"jkl013"} +{"time":"2025-10-30T16:58:43Z","severityText":"INFO","service.name":"auth-service","traceId":"abc125","spanId":"def458"} +{"time":"2025-10-30T16:59:01Z","severityText":"ERROR","service.name":"payment-service","traceId":"ghi889","spanId":"jkl022"} \ No newline at end of file From b34d931c96a273de3a87272d22dea09d2b3d5bf7 Mon Sep 17 00:00:00 2001 From: fahkone Date: Fri, 21 Nov 2025 10:22:14 +0000 Subject: [PATCH 4/4] use log.txt file instead --- src/test/java/dregex/MatchTest.java | 4 ++-- src/test/resources/log.json | 6 ------ src/test/resources/log.txt | 1 + 3 files changed, 3 insertions(+), 8 deletions(-) delete mode 100644 src/test/resources/log.json create mode 100644 src/test/resources/log.txt diff --git a/src/test/java/dregex/MatchTest.java b/src/test/java/dregex/MatchTest.java index ab306d7..5f73501 100644 --- a/src/test/java/dregex/MatchTest.java +++ b/src/test/java/dregex/MatchTest.java @@ -1250,9 +1250,9 @@ void testLookbehind() { @Test void testSimpleFileInputStream() throws IOException { - var compiledRegex = Regex.compile("(\\{\"time\"\\s*:\\s*\"(\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z)\",\"severityText\"\\s*:\\s*(\"INFO\"|\"ERROR\"|\"WARNING\"),\"service.name\"\\s*:\\s*(\"auth-service\"|\"payment-service\"|\"storage-service\"),\"traceId\"\\s*:\\s*\"[a-z]{3}\\d{3}\",\"spanId\"\\s*:\\s*\"[a-z]{3}\\d{3}\"}\\s*)*"); + var compiledRegex = Regex.compile("(time:\\s(\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z),\\sseverityText:\\s(INFO|ERROR|WARNING),\\sservice.name:\\s(auth-service|payment-service|storage-service),\\straceId:\\s[a-z]{3}\\d{3},\\sspanId\\s*:\\s*[a-z]{3}\\d{3}\\s*)*"); - String resourceName = "log.json"; + String resourceName = "log.txt"; // Load resource using the context class loader try (InputStream inputStream = Thread.currentThread() diff --git a/src/test/resources/log.json b/src/test/resources/log.json deleted file mode 100644 index 87358ed..0000000 --- a/src/test/resources/log.json +++ /dev/null @@ -1,6 +0,0 @@ -{"time":"2025-10-30T14:58:43Z","severityText":"INFO","service.name":"auth-service","traceId":"abc123","spanId":"def456"} -{"time":"2025-10-30T14:59:01Z","severityText":"ERROR","service.name":"payment-service","traceId":"ghi789","spanId":"jkl012"} -{"time":"2025-10-30T15:58:43Z","severityText":"INFO","service.name":"auth-service","traceId":"abc124","spanId":"def457"} -{"time":"2025-10-30T15:59:01Z","severityText":"ERROR","service.name":"payment-service","traceId":"ghi799","spanId":"jkl013"} -{"time":"2025-10-30T16:58:43Z","severityText":"INFO","service.name":"auth-service","traceId":"abc125","spanId":"def458"} -{"time":"2025-10-30T16:59:01Z","severityText":"ERROR","service.name":"payment-service","traceId":"ghi889","spanId":"jkl022"} \ No newline at end of file diff --git a/src/test/resources/log.txt b/src/test/resources/log.txt new file mode 100644 index 0000000..a1a7329 --- /dev/null +++ b/src/test/resources/log.txt @@ -0,0 +1 @@ +time: 2025-10-30T14:58:43Z, severityText: INFO, service.name: auth-service, traceId: abc123, spanId: def456time: 2025-10-30T14:59:01Z, severityText: ERROR, service.name: payment-service, traceId: ghi789, spanId:jkl012time: 2025-10-30T15:58:43Z, severityText: INFO, service.name: auth-service, traceId: abc124, spanId: def457time: 2025-10-30T15:59:01Z, severityText: ERROR, service.name: payment-service, traceId: ghi799, spanId: jkl013time: 2025-10-30T16:58:43Z, severityText: INFO, service.name: auth-service, traceId: abc125, spanId: def458time: 2025-10-30T16:59:01Z, severityText: ERROR, service.name: payment-service, traceId: ghi889, spanId: jkl022 \ No newline at end of file