Skip to content

Commit 755edcf

Browse files
committed
Add a test of the leading comma being split
1 parent 1b12faa commit 755edcf

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

test/src/edu/stanford/nlp/process/PTBTokenizerTest.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ public class PTBTokenizerTest {
104104
// the space is because some weirdness happens having an
105105
// unmatched surrogate at the end of a text
106106
"half codepoint:" + ((char) 55296) + " ",
107+
"There are ,2 days left",
107108
};
108109

109110
private final String[][] ptbGold = {
@@ -201,6 +202,7 @@ public class PTBTokenizerTest {
201202
{ "What", "do", "you", "suppose", "is", "in", "the", "file", "thicc_antennae", ".", "asdf", "?" },
202203
{ "two", "character", "codepoint", ":", "😸" },
203204
{ "half", "codepoint", ":", },
205+
{ "There", "are", ",", "2", "days", "left", },
204206
};
205207

206208
private final String[][] ptbGoldSplitHyphenated = {
@@ -306,6 +308,7 @@ public class PTBTokenizerTest {
306308
{ "What", "do", "you", "suppose", "is", "in", "the", "file", "thicc_antennae", ".", "asdf", "?" },
307309
{ "two", "character", "codepoint", ":", "😸" },
308310
{ "half", "codepoint", ":", },
311+
{ "There", "are", ",", "2", "days", "left", },
309312
};
310313

311314
@Test

0 commit comments

Comments
 (0)