Remove support for TEXTDECODER=0 (#24700)

sbc100 · web-flow · commit b490c4440bb4 · 2025-07-14T16:53:48.000-07:00
Setting this to zero doesn't provide much value to anyone since the
conditional usage is relatively cheap (compared to the fallback) and
the API available in all browsers these days.

We still need the fallback for things like audio worklets and JS shell
environments.
diff --git a/ChangeLog.md b/ChangeLog.md
@@ -20,6 +20,10 @@ See docs/process.md for more on how version tagging works.
 
 4.0.12 (in development)
 -----------------------
+- Support for `-sTEXT_DECODER=0` was removed, due to widespread support for
+  `TextDecoder`.  The remaining valid values for this setting are `=1`
+  (conditional use of `TextDecoder` with fallback) and `=2` (unconditional use
+  of `TextDecoder`). (#24700)
 
 4.0.11 - 07/14/25
 -----------------
diff --git a/site/source/docs/tools_reference/settings_reference.rst b/site/source/docs/tools_reference/settings_reference.rst
@@ -2692,10 +2692,11 @@ Default value: 0
 TEXTDECODER
 ===========
 
-If enabled, use the JavaScript TextDecoder API for string marshalling.
-Enabled by default, set this to 0 to disable.
+The default value or 1 means the generated code will use TextDecoder if
+available and fall back to custom decoder code when not available.
 If set to 2, we assume TextDecoder is present and usable, and do not emit
-any JS code to fall back if it is missing.
+any JS code to fall back if it is missing. Setting this zero to avoid even
+conditional usage of TextDecoder is no longer supported.
 Note: In -Oz builds, the default value of TEXTDECODER is set to 2, to save on
 code size (except when AUDIO_WORKLET is specified, or when `shell` is part
 of ENVIRONMENT since TextDecoder is not available in those environments).
diff --git a/src/lib/libstrings.js b/src/lib/libstrings.js
@@ -4,11 +4,15 @@
  * SPDX-License-Identifier: MIT
  */
 
+#if TEXTDECODER != 1 && TEXTDECODER != 2
+#error "TEXTDECODER must be either 1 or 2"
+#endif
+
 addToLibrary({
   // TextDecoder constructor defaults to UTF-8
 #if TEXTDECODER == 2
   $UTF8Decoder: "new TextDecoder()",
-#elif TEXTDECODER == 1
+#else
   $UTF8Decoder: "typeof TextDecoder != 'undefined' ? new TextDecoder() : undefined",
 #endif
 
@@ -35,49 +39,28 @@ addToLibrary({
    * @param {boolean=} ignoreNul - If true, the function will not stop on a NUL character.
    * @return {string}
    */`,
-#if TEXTDECODER
   $UTF8ArrayToString__deps: ['$UTF8Decoder', '$findStringEnd'],
-#endif
   $UTF8ArrayToString: (heapOrArray, idx = 0, maxBytesToRead, ignoreNul) => {
 #if CAN_ADDRESS_2GB
     idx >>>= 0;
 #endif
 
-#if TEXTDECODER
     var endPtr = findStringEnd(heapOrArray, idx, maxBytesToRead, ignoreNul);
-#else
-    var endIdx = idx + maxBytesToRead;
-#endif
 
 #if TEXTDECODER == 2
     return UTF8Decoder.decode(heapOrArray.buffer ? {{{ getUnsharedTextDecoderView('heapOrArray', 'idx', 'endPtr') }}} : new Uint8Array(heapOrArray.slice(idx, endPtr)));
 #else // TEXTDECODER == 2
-#if TEXTDECODER
     // When using conditional TextDecoder, skip it for short strings as the overhead of the native call is not worth it.
     if (endPtr - idx > 16 && heapOrArray.buffer && UTF8Decoder) {
       return UTF8Decoder.decode({{{ getUnsharedTextDecoderView('heapOrArray', 'idx', 'endPtr') }}});
     }
-#endif // TEXTDECODER
     var str = '';
-#if TEXTDECODER
-    // If building with TextDecoder, we have already computed the string length
-    // above, so test loop end condition against that
     while (idx < endPtr) {
-#else
-    while (!(idx >= endIdx)) {
-#endif
       // For UTF8 byte structure, see:
       // http://en.wikipedia.org/wiki/UTF-8#Description
       // https://www.ietf.org/rfc/rfc2279.txt
       // https://tools.ietf.org/html/rfc3629
       var u0 = heapOrArray[idx++];
-#if !TEXTDECODER
-      // If not building with TextDecoder enabled, we don't know the string
-      // length, so scan for \0 byte.
-      // If building with TextDecoder, we know exactly at what byte index the
-      // string ends, so checking for nulls here would be redundant.
-      if (!u0 && !ignoreNul) return str;
-#endif
       if (!(u0 & 0x80)) { str += String.fromCharCode(u0); continue; }
       var u1 = heapOrArray[idx++] & 63;
       if ((u0 & 0xE0) == 0xC0) { str += String.fromCharCode(((u0 & 31) << 6) | u1); continue; }
@@ -310,32 +293,26 @@ addToLibrary({
 
 #if TEXTDECODER == 2
   $UTF16Decoder: "new TextDecoder('utf-16le');",
-#elif TEXTDECODER == 1
+#else
   $UTF16Decoder: "typeof TextDecoder != 'undefined' ? new TextDecoder('utf-16le') : undefined;",
 #endif
 
   // Given a pointer 'ptr' to a null-terminated UTF16LE-encoded string in the
   // emscripten HEAP, returns a copy of that string as a Javascript String
   // object.
-#if TEXTDECODER
   $UTF16ToString__deps: ['$UTF16Decoder', '$findStringEnd'],
-#endif
   $UTF16ToString: (ptr, maxBytesToRead, ignoreNul) => {
 #if ASSERTIONS
     assert(ptr % 2 == 0, 'Pointer passed to UTF16ToString must be aligned to two bytes!');
 #endif
     var idx = {{{ getHeapOffset('ptr', 'u16') }}};
-#if TEXTDECODER
     var endIdx = findStringEnd(HEAPU16, idx, maxBytesToRead / 2, ignoreNul);
 
 #if TEXTDECODER != 2
     // When using conditional TextDecoder, skip it for short strings as the overhead of the native call is not worth it.
     if (endIdx - idx > 16 && UTF16Decoder)
 #endif // TEXTDECODER != 2
       return UTF16Decoder.decode({{{ getUnsharedTextDecoderView('HEAPU16', 'idx', 'endIdx') }}});
-#else
-    var maxIdx = idx + maxBytesToRead / 2;
-#endif // TEXTDECODER
 
 #if TEXTDECODER != 2
     // Fallback: decode without UTF16Decoder
@@ -344,25 +321,8 @@ addToLibrary({
     // If maxBytesToRead is not passed explicitly, it will be undefined, and the
     // for-loop's condition will always evaluate to true. The loop is then
     // terminated on the first null char.
-    for (
-      var i = idx;
-#if TEXTDECODER
-      // If building with TextDecoder, we have already computed the string length
-      // above, so test loop end condition against that
-      i < endIdx;
-#else
-      !(i >= maxIdx);
-#endif
-      ++i
-    ) {
+    for (var i = idx; i < endIdx; ++i) {
       var codeUnit = HEAPU16[i];
-#if !TEXTDECODER
-      // If not building with TextDecoder enabled, we don't know the string
-      // length, so scan for \0 character.
-      // If building with TextDecoder, we know exactly at what index the
-      // string ends, so checking for nulls here would be redundant.
-      if (!codeUnit && !ignoreNul) break;
-#endif
       // fromCharCode constructs a character from a UTF-16 code unit, so we can
       // pass the UTF16 string right through.
       str += String.fromCharCode(codeUnit);
diff --git a/src/settings.js b/src/settings.js
@@ -1769,10 +1769,11 @@ var PTHREADS_DEBUG = false;
 // [link]
 var EVAL_CTORS = 0;
 
-// If enabled, use the JavaScript TextDecoder API for string marshalling.
-// Enabled by default, set this to 0 to disable.
+// The default value or 1 means the generated code will use TextDecoder if
+// available and fall back to custom decoder code when not available.
 // If set to 2, we assume TextDecoder is present and usable, and do not emit
-// any JS code to fall back if it is missing.
+// any JS code to fall back if it is missing. Setting this zero to avoid even
+// conditional usage of TextDecoder is no longer supported.
 // Note: In -Oz builds, the default value of TEXTDECODER is set to 2, to save on
 // code size (except when AUDIO_WORKLET is specified, or when `shell` is part
 // of ENVIRONMENT since TextDecoder is not available in those environments).
diff --git a/test/other/test_unoptimized_code_size.js.size b/test/other/test_unoptimized_code_size.js.size
@@ -1 +1 @@
-54108
+53970
diff --git a/test/other/test_unoptimized_code_size_no_asserts.js.size b/test/other/test_unoptimized_code_size_no_asserts.js.size
@@ -1 +1 @@
-26909
+26771
diff --git a/test/other/test_unoptimized_code_size_strict.js.size b/test/other/test_unoptimized_code_size_strict.js.size
@@ -1 +1 @@
-52158
+52020
diff --git a/test/test_core.py b/test/test_core.py
@@ -391,7 +391,7 @@ def decorated(self, textdecoder, *args, **kwargs):
     self.set_setting('TEXTDECODER', textdecoder)
     f(self, *args, **kwargs)
 
-  parameterize(decorated, {'': (0,), 'textdecoder': (2,)})
+  parameterize(decorated, {'': (1,), 'force_textdecoder': (2,)})
 
   return decorated
 
diff --git a/test/test_other.py b/test/test_other.py
@@ -16364,10 +16364,6 @@ def test_install(self):
   def test_TextDecoder(self, args1, args2):
     self.cflags += args1 + args2
 
-    self.do_runf('hello_world.c', cflags=['-sTEXTDECODER=0'])
-    just_fallback = os.path.getsize('hello_world.js')
-    print('just_fallback:\t%s' % just_fallback)
-
     self.do_runf('hello_world.c')
     td_with_fallback = os.path.getsize('hello_world.js')
     print('td_with_fallback:\t%s' % td_with_fallback)
@@ -16378,7 +16374,10 @@ def test_TextDecoder(self, args1, args2):
 
     # td_with_fallback should always be largest of all three in terms of code side
     self.assertGreater(td_with_fallback, td_without_fallback)
-    self.assertGreater(td_with_fallback, just_fallback)
 
-    # the fallback is also expected to be larger in code size than using td
-    self.assertGreater(just_fallback, td_without_fallback)
+  def test_TextDecoder_invalid(self):
+    err = self.expect_fail([EMCC, test_file('hello_world.c'), '-sTEXTDECODER=0'])
+    self.assertContained('#error "TEXTDECODER must be either 1 or 2"', err)
+
+    err = self.expect_fail([EMCC, test_file('hello_world.c'), '-sTEXTDECODER=3'])
+    self.assertContained('#error "TEXTDECODER must be either 1 or 2"', err)