diff --git a/src/logfmter/formatter.py b/src/logfmter/formatter.py index f375431..3babcf5 100644 --- a/src/logfmter/formatter.py +++ b/src/logfmter/formatter.py @@ -35,6 +35,12 @@ "threadName", ) +QUOTE_CHARS = str.maketrans( + "", "", " =" + "".join(chr(c) for c in list(range(0x20)) + [0x7F]) +) +REPLACEMENTS = {chr(c): f"\\u{c:04x}" for c in list(range(0x20)) + [0x7F]} +REPLACEMENTS.update({"\n": "\\n", "\t": "\\t", "\r": "\\r"}) + class _DefaultFormatter(logging.Formatter): def format(self, record): @@ -59,13 +65,7 @@ def format_string(cls, value: str) -> str: Process the provided string with any necessary quoting and/or escaping. """ needs_dquote_escaping = '"' in value - needs_newline_escaping = "\n" in value - needs_quoting = ( - " " in value - or "=" in value - or needs_dquote_escaping - or needs_newline_escaping - ) + needs_quoting = needs_dquote_escaping or value.translate(QUOTE_CHARS) != value needs_backslash_escaping = "\\" in value and needs_quoting if needs_backslash_escaping: @@ -74,8 +74,8 @@ def format_string(cls, value: str) -> str: if needs_dquote_escaping: value = value.replace('"', '\\"') - if needs_newline_escaping: - value = value.replace("\n", "\\n") + for char, replacement in REPLACEMENTS.items(): + value = value.replace(char, replacement) if needs_quoting: value = '"{}"'.format(value) diff --git a/tests/test_formatter.py b/tests/test_formatter.py index f62803b..5a58b03 100644 --- a/tests/test_formatter.py +++ b/tests/test_formatter.py @@ -16,13 +16,24 @@ ("=", '"="'), # All double quotes must be escaped. ('"', '"\\""'), + # Null bytes must be escaped and quoted + ("\x00", r'"\u0000"'), + # All whitespace chars must be escaped and quoted + ("\n", '"\\n"'), + ("\r", '"\\r"'), + ("\t", '"\\t"'), + # All other control chars must be escaped and quoted + ("\x07", r'"\u0007"'), + ( + "".join(chr(c) for c in range(0x20) if chr(c) not in "\t\n\r"), + r'"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u000b\u000c\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f"', + ), # If the string requires escaping and quoting, then both # operations should be performed. (' "', '" \\""'), # If the string is empty, then it should be left empty. ("", ""), # If the string contains a newline, then it should be escaped. - ("\n", '"\\n"'), ("\n\n", '"\\n\\n"'), # If the string contains a backslash and needs to be quoted, then # the backslashes need to be escaped.