Skip to content

Commit 6bb7884

Browse files
author
Robin Duda
committed
fix some issues with line endings CR/LF vs LF
1 parent 303e85c commit 6bb7884

File tree

3 files changed

+28
-15
lines changed

3 files changed

+28
-15
lines changed

src/main/java/com/codingchili/ApplicationLauncher.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,11 @@ public static void main(String[] args) {
3030

3131
private ApplicationLauncher(String[] args) {
3232
VertxOptions options = new VertxOptions();
33-
options.setMaxEventLoopExecuteTime(options.getMaxEventLoopExecuteTime() * 10)
33+
34+
options.setMaxWorkerExecuteTime(options.getMaxWorkerExecuteTime() * 20) // 20 minutes.
35+
.setMaxEventLoopExecuteTime(options.getMaxEventLoopExecuteTime() * 10) // 10 seconds.
3436
.setBlockedThreadCheckInterval(8000);
37+
3538
vertx = Vertx.vertx();
3639

3740
ImportEventCodec.registerOn(vertx);

src/main/java/com/codingchili/Model/CSVParser.java

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -110,21 +110,23 @@ private void readRowCount() {
110110
reset();
111111

112112
for (long i = 0; i < fileSize; i++) {
113-
if (get() == '\r') {
113+
if (get() == TOKEN_LF) {
114114
rows++;
115115
row = rows;
116116
}
117117
}
118118
}
119119

120120
private void readHeaders() {
121+
AtomicInteger fieldId = new AtomicInteger(0);
121122
reset();
122123

123124
for (long i = 0; i < fileSize; i++) {
124125
byte current = get();
125-
if (current == TOKEN_LF || current == TOKEN_CR) {
126+
if (current == TOKEN_LF) {
126127
Arrays.stream(new String(buffer.array()).split(","))
127128
.map(header -> header.replaceAll("\"", ""))
129+
.map(header -> (header.isEmpty()) ? "header_" + fieldId.incrementAndGet() : header)
128130
.map(String::trim).forEach(header -> {
129131
headers.put(header, "<empty>");
130132
});
@@ -183,17 +185,20 @@ private JsonObject readRow() {
183185
process(columnsRead, json);
184186
done = true;
185187
break;
186-
case TOKEN_LF:
187-
// skip LF characters.
188-
break;
189188
case TOKEN_CR:
190-
// final header is being read and EOL appears.
191-
if (columnsRead.get() == headers.size() - 1) {
192-
process(columnsRead, json);
193-
done = true;
194-
break;
195-
} else {
196-
throw new ColumnsHeadersMismatchException(columnsRead.get(), headers.size() - 1, row + 1);
189+
// skip CR characters.
190+
break;
191+
case TOKEN_LF:
192+
// ignore empty lines.
193+
if (buffer.position() > 0) {
194+
// final header is being read and EOL appears.
195+
if (columnsRead.get() == headers.size() - 1) {
196+
process(columnsRead, json);
197+
done = true;
198+
break;
199+
} else {
200+
throw new ColumnsHeadersMismatchException(columnsRead.get(), headers.size() - 1, row + 1);
201+
}
197202
}
198203
case TOKEN_QUOTE:
199204
// toggle quoted to support commas within quotes.
@@ -223,16 +228,19 @@ private JsonObject readRow() {
223228
return json;
224229
}
225230

231+
232+
private static final Predicate<String> floatPattern = Pattern.compile("^[0-9]+\\.[0-9]+$").asPredicate();
226233
private static final Predicate<String> numberPattern = Pattern.compile("^[0-9]+$").asPredicate();
227234
private static final Predicate<String> boolPattern = Pattern.compile("^(true|false)$").asPredicate();
228235

229236
private Object parseDatatype(byte[] data) {
230237
String line = new String(data).trim();
231238

232-
// skip regex parsing on dry-run.
233239
if (line.length() > 0) {
234240
if (numberPattern.test(line)) {
235241
return Long.parseLong(line);
242+
} else if (floatPattern.test(line)) {
243+
return Double.parseDouble(line);
236244
} else if (boolPattern.test(line)) {
237245
return Boolean.parseBoolean(line);
238246
} else {

src/test/resources/test.csv

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
1-
Column 1, Column 2, Column 3cell 6.1, cell 6.2, cell 6.3cell 7.1, cell 7.2, cell 7.3
1+
Column 1, Column 2, Column 3
2+
cell 6.1, cell 6.2, cell 6.3
3+
cell 7.1, cell 7.2, cell 7.3

0 commit comments

Comments
 (0)