Skip to content

Commit a99aa35

Browse files
authored
Merge pull request #6614 from Goober5000/parse_line_updates
upgrade line parsing and end-of-line checks
2 parents 44d3ea9 + 791c250 commit a99aa35

File tree

2 files changed

+87
-24
lines changed

2 files changed

+87
-24
lines changed

code/parse/parselo.cpp

Lines changed: 85 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ int get_line_num()
279279
incomment = false;
280280
}
281281

282-
if (*p++ == EOLN) {
282+
if (*p++ == EOLN) { // in the process of parsing, all line endings are normalized to single-character EOLN
283283
if ( !multiline && incomment )
284284
incomment = false;
285285
count++;
@@ -2137,32 +2137,86 @@ void strip_comments(char *line, bool &in_quote, bool &in_multiline_comment_a, bo
21372137
}
21382138
}
21392139

2140-
int parse_get_line(char *lineout, int max_line_len, const char *start, int max_size, const char *cur)
2140+
// Reads one line of text from the input, returning the number of input chars read. Also sets the line ending type if found;
2141+
// and if there is a mismatch, displays a warning.
2142+
int parse_get_line(char *lineout, int max_line_len, const char *textin, int input_len, int line_num, LineEndingType &file_line_ending_type, bool &warned_for_this_file)
21412143
{
2142-
char * t = lineout;
2143-
int i, num_chars_read=0;
2144-
char c;
2144+
auto found_line_ending = LineEndingType::UNKNOWN;
2145+
char prev_c = '\0';
2146+
int num_chars_written = 0;
21452147

2146-
for ( i = 0; i < max_line_len-1; i++ ) {
2147-
do {
2148-
if ( (cur - start) >= max_size ) {
2149-
*lineout = 0;
2150-
if ( lineout > t ) {
2151-
return num_chars_read;
2152-
} else {
2153-
return 0;
2154-
}
2148+
for (int num_chars_read = 1; num_chars_read <= input_len; ++num_chars_read)
2149+
{
2150+
char c = *textin++;
2151+
2152+
if (c == '\0' || c == EOF) // hard stop
2153+
{
2154+
input_len = num_chars_read;
2155+
break;
2156+
}
2157+
else if (c == EOLN)
2158+
{
2159+
if (prev_c == CARRIAGE_RETURN)
2160+
found_line_ending = LineEndingType::CRLF;
2161+
else
2162+
found_line_ending = LineEndingType::LF;
2163+
}
2164+
else if (c == CARRIAGE_RETURN)
2165+
{
2166+
if (*textin != EOLN)
2167+
found_line_ending = LineEndingType::CR;
2168+
}
2169+
else
2170+
{
2171+
if (num_chars_written == max_line_len)
2172+
{
2173+
num_chars_read--; // back out the character we just read, since we can't write it
2174+
2175+
// terminate the string and return
2176+
*lineout = '\0';
2177+
return num_chars_read;
21552178
}
2156-
c = *cur++;
2157-
num_chars_read++;
2158-
} while ( c == 13 );
21592179

2160-
*lineout++ = c;
2161-
if ( c=='\n' ) break;
2180+
*lineout++ = c;
2181+
num_chars_written++;
2182+
}
2183+
2184+
if (found_line_ending != LineEndingType::UNKNOWN)
2185+
{
2186+
if (file_line_ending_type == LineEndingType::UNKNOWN)
2187+
file_line_ending_type = found_line_ending;
2188+
else if (found_line_ending != file_line_ending_type && !warned_for_this_file)
2189+
{
2190+
// we can't use error_display() here because we're in the middle of reading the file
2191+
Warning(LOCATION, "In %s, an inconsistent line ending was detected on line %d. Please check the file for line ending errors.", Current_filename_sub, line_num);
2192+
warned_for_this_file = true;
2193+
}
2194+
2195+
// ugh, if we're at the max length, we can't write the newline, so back out the newline we read
2196+
if (num_chars_written == max_line_len)
2197+
{
2198+
if (found_line_ending == LineEndingType::CRLF)
2199+
num_chars_read -= 2;
2200+
else
2201+
num_chars_read--;
2202+
}
2203+
else
2204+
{
2205+
*lineout++ = EOLN; // normalize line endings to single-character EOLN
2206+
num_chars_written++;
2207+
}
2208+
2209+
// terminate the string and return
2210+
*lineout = '\0';
2211+
return num_chars_read;
2212+
}
2213+
2214+
prev_c = c;
21622215
}
21632216

2164-
*lineout++ = 0;
2165-
return num_chars_read;
2217+
// we read the entire input without reaching a newline
2218+
*lineout = 0;
2219+
return input_len;
21662220
}
21672221

21682222
// Read mission text, stripping comments.
@@ -2200,7 +2254,8 @@ void read_file_text(const char *filename, int mode, char *processed_text, char *
22002254
void read_file_text_from_default(const default_file& file, char *processed_text, char *raw_text)
22012255
{
22022256
// we have no filename, so copy a substitute
2203-
strcpy_s(Current_filename_sub, "internal default file");
2257+
strcpy_s(Current_filename_sub, "internal default file ");
2258+
strcat_s(Current_filename_sub, file.filename);
22042259

22052260
// if we are paused then processed_text and raw_text must not be NULL!!
22062261
if ( !Bookmarks.empty() && ((processed_text == NULL) || (raw_text == NULL)) ) {
@@ -2447,7 +2502,7 @@ void process_raw_file_text(char* processed_text, char* raw_text)
24472502
bool in_quote = false;
24482503
bool in_multiline_comment_a = false;
24492504
bool in_multiline_comment_b = false;
2450-
int raw_text_len = (int)strlen(raw_text);
2505+
int raw_text_len = static_cast<int>(strlen(raw_text));
24512506

24522507
if (processed_text == NULL)
24532508
processed_text = Parse_text;
@@ -2463,8 +2518,14 @@ void process_raw_file_text(char* processed_text, char* raw_text)
24632518

24642519
// strip comments from raw text, reading into file_text
24652520
int num_chars_read = 0;
2466-
while ((num_chars_read = parse_get_line(outbuf, PARSE_BUF_SIZE, raw_text, raw_text_len, mp_raw)) != 0) {
2521+
int remaining_raw_len = raw_text_len;
2522+
int parsed_line_num = 1;
2523+
auto file_line_ending_type = LineEndingType::UNKNOWN;
2524+
bool warned_for_this_file = false;
2525+
while ((num_chars_read = parse_get_line(outbuf, PARSE_BUF_SIZE-1, mp_raw, remaining_raw_len, parsed_line_num, file_line_ending_type, warned_for_this_file)) != 0) {
24672526
mp_raw += num_chars_read;
2527+
remaining_raw_len -= num_chars_read;
2528+
parsed_line_num++;
24682529

24692530
// stupid hacks to make retail data work with fixed parser, per Mantis #3072
24702531
if (!strcmp(outbuf, parse_exception_1402.c_str())) {

code/parse/parselo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ extern int Token_found_flag;
3838
#define EOLN (char)0x0a
3939
#define CARRIAGE_RETURN (char)0x0d
4040

41+
enum class LineEndingType { UNKNOWN, CR, CRLF, LF };
42+
4143
#define F_NAME 1
4244
#define F_DATE 2
4345
#define F_NOTES 3

0 commit comments

Comments
 (0)