Skip to content

Commit b3d13a7

Browse files
authored
csv: remove old workaround in UTF8Reader (#546)
1 parent ed6c536 commit b3d13a7

File tree

3 files changed

+39
-34
lines changed

3 files changed

+39
-34
lines changed

csv/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/UTF8Reader.java

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,16 @@ public final class UTF8Reader
2525

2626
private byte[] _inputBuffer;
2727

28+
/**
29+
* Flag set to indicate {@code inputBuffer} is read-only, and its
30+
* content should not be modified. This is the case when caller
31+
* has passed in a buffer of contents already read, instead of Jackson
32+
* allocating read buffer.
33+
*
34+
* @since 2.19
35+
*/
36+
private final boolean _inputBufferReadOnly;
37+
2838
/**
2939
* Pointer to the next available byte (if any), iff less than
3040
* <code>mByteBufferEnd</code>
@@ -73,7 +83,10 @@ public UTF8Reader(IOContext ctxt, InputStream in, boolean autoClose,
7383
_inputBuffer = buf;
7484
_inputPtr = ptr;
7585
_inputEnd = ptr+len;
76-
_autoClose = autoClose;
86+
_autoClose = autoClose;
87+
// Unmodifiable if there is no stream to actually read from
88+
// (ideally caller should pass explicitly)
89+
_inputBufferReadOnly = (in == null);
7790
}
7891

7992
public UTF8Reader(IOContext ctxt, byte[] buf, int ptr, int len)
@@ -85,6 +98,8 @@ public UTF8Reader(IOContext ctxt, byte[] buf, int ptr, int len)
8598
_inputPtr = ptr;
8699
_inputEnd = ptr+len;
87100
_autoClose = true;
101+
// This is the case when we have a buffer of contents already read
102+
_inputBufferReadOnly = true;
88103
}
89104

90105
public UTF8Reader(IOContext ctxt, InputStream in, boolean autoClose)
@@ -96,15 +111,8 @@ public UTF8Reader(IOContext ctxt, InputStream in, boolean autoClose)
96111
_inputPtr = 0;
97112
_inputEnd = 0;
98113
_autoClose = autoClose;
99-
}
100-
101-
/**
102-
* Method that can be used to see if we can actually modify the
103-
* underlying buffer. This is the case if we are managing the buffer,
104-
* but not if it was just given to us.
105-
*/
106-
protected final boolean canModifyBuffer() {
107-
return (_ioContext != null);
114+
// Buffer allocated above, modifiable as needed
115+
_inputBufferReadOnly = false;
108116
}
109117

110118
/*
@@ -400,27 +408,17 @@ private boolean loadMore(int available) throws IOException
400408
{
401409
_byteCount += (_inputEnd - available);
402410

403-
// Bytes that need to be moved to the beginning of buffer?
404411
if (available > 0) {
412+
// Should we move bytes to the beginning of buffer?
405413
if (_inputPtr > 0) {
406-
if (!canModifyBuffer()) {
407-
// 15-Aug-2022, tatu: Occurs (only) if we have half-decoded UTF-8
408-
// characters; uncovered by:
409-
//
410-
// https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=50036
411-
//
412-
// and need to be reported as IOException
413-
if (_inputSource == null) {
414-
throw new IOException(String.format(
415-
"End-of-input after first %d byte(s) of a UTF-8 character: needed at least one more",
416-
available));
414+
// Can only do so if buffer mutable
415+
if (!_inputBufferReadOnly) {
416+
for (int i = 0; i < available; ++i) {
417+
_inputBuffer[i] = _inputBuffer[_inputPtr+i];
417418
}
419+
_inputPtr = 0;
420+
_inputEnd = available;
418421
}
419-
for (int i = 0; i < available; ++i) {
420-
_inputBuffer[i] = _inputBuffer[_inputPtr+i];
421-
}
422-
_inputPtr = 0;
423-
_inputEnd = available;
424422
}
425423
} else {
426424
// Ok; here we can actually reasonably expect an EOF, so let's do a separate read right away:

csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/FuzzCSVReadTest.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,19 @@
66
import org.junit.jupiter.api.Test;
77

88
import com.fasterxml.jackson.dataformat.csv.CsvMapper;
9+
import com.fasterxml.jackson.dataformat.csv.ModuleTestBase;
910

1011
import static org.assertj.core.api.Assertions.fail;
12+
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
1113

1214
/**
1315
* Collection of OSS-Fuzz found issues for CSV format module.
1416
*/
15-
public class FuzzCSVReadTest extends StreamingCSVReadTest
17+
public class FuzzCSVReadTest extends ModuleTestBase
1618
{
1719
private final CsvMapper CSV_MAPPER = mapperForCsv();
1820
private final byte[] INPUT = new byte[] { 0x20, (byte) 0xCD };
21+
private final byte[] CLONED = INPUT.clone();
1922

2023
// https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=50036
2124
@Test
@@ -25,8 +28,9 @@ public void testUTF8Decoding50036() throws Exception
2528
CSV_MAPPER.readTree(INPUT);
2629
fail("Should not pass");
2730
} catch (IOException e) {
28-
verifyException(e, "End-of-input after first 1 byte");
29-
verifyException(e, "of a UTF-8 character");
31+
verifyException(e, "Unexpected EOF in the middle of a multi-byte UTF-8 character");
32+
// check input was not modified
33+
assertArrayEquals(CLONED, INPUT);
3034
}
3135
}
3236

@@ -38,6 +42,8 @@ public void testUTF8Decoding50036Stream() throws Exception
3842
fail("Should not pass");
3943
} catch (IOException e) {
4044
verifyException(e, "Unexpected EOF in the middle of a multi-byte UTF-8 character");
45+
// check input was not modified
46+
assertArrayEquals(CLONED, INPUT);
4147
}
4248
}
4349
}

csv/src/test/java/com/fasterxml/jackson/dataformat/csv/tofix/UnicodeCSVRead497Test.java renamed to csv/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/UnicodeCSVRead497Test.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package com.fasterxml.jackson.dataformat.csv.tofix;
1+
package com.fasterxml.jackson.dataformat.csv.deser;
22

33
import java.io.ByteArrayInputStream;
44
import java.nio.charset.StandardCharsets;
@@ -8,7 +8,6 @@
88
import com.fasterxml.jackson.databind.JsonNode;
99
import com.fasterxml.jackson.dataformat.csv.CsvMapper;
1010
import com.fasterxml.jackson.dataformat.csv.ModuleTestBase;
11-
import com.fasterxml.jackson.dataformat.csv.testutil.failure.JacksonTestFailureExpected;
1211

1312
import static org.junit.jupiter.api.Assertions.*;
1413

@@ -18,7 +17,6 @@ public class UnicodeCSVRead497Test extends ModuleTestBase
1817
private final CsvMapper MAPPER = mapperForCsv();
1918

2019
// [dataformats-text#497]
21-
@JacksonTestFailureExpected
2220
@Test
2321
public void testUnicodeAtEnd() throws Exception
2422
{
@@ -35,12 +33,15 @@ public void testUnicodeAtEnd() throws Exception
3533
public void testUnicodeAtEnd2() throws Exception
3634
{
3735
String doc = buildTestString2();
36+
final byte[] bytes = doc.getBytes(StandardCharsets.UTF_8);
3837
JsonNode o = MAPPER.reader() //.with(schema)
39-
.readTree(doc.getBytes(StandardCharsets.UTF_8));
38+
.readTree(bytes);
4039
assertNotNull(o);
4140
assertTrue(o.isArray());
4241
assertEquals(1, o.size());
4342
assertEquals(o.get(0).textValue(), doc);
43+
// check byte array was not modified
44+
assertArrayEquals(doc.getBytes(StandardCharsets.UTF_8), bytes);
4445
}
4546

4647
@Test

0 commit comments

Comments
 (0)