From 020e1539b4f525e19a70712042d4843b14de0d1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Mon, 3 Nov 2025 15:20:09 +0200 Subject: [PATCH 1/9] Add test for all byte pairs for binary encoding. --- test/test_other.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/test_other.py b/test/test_other.py index 8806d7ff3ed33..bc3ebdcbdcf54 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -17,6 +17,7 @@ import select import shlex import shutil +import struct import subprocess import sys import tarfile @@ -89,6 +90,7 @@ from tools import building, cache, response_file, shared, utils, webassembly from tools.building import get_building_env +from tools.link import binary_encode from tools.settings import settings from tools.shared import ( CLANG_CC, @@ -15158,3 +15160,16 @@ def test_linkable_relocatable(self): # These setting is due for removal: # https://github.com/emscripten-core/emscripten/issues/25262 self.do_run_in_out_file_test('hello_world.c', cflags=['-Wno-deprecated', '-sLINKABLE', '-sRELOCATABLE']) + + # Tests encoding of all byte pairs for binary encoding in SINGLE_FILE mode. + def test_binary_encode(self): + # Encode values 0 .. 65535 into test data + test_data = bytearray(struct.pack('<' + 'H'*65536, *range(65536))) + open('data.tmp', 'wb').write(test_data) + binary_encoded = binary_encode('data.tmp') + test_js = '''var u16 = new Uint16Array(binaryDecode(src).buffer); +for(var i = 0; i < 65536; ++i) + if (u16[i] != i) throw i; +console.log('OK');''' + open('test.js', 'w').write(open(path_from_root('src', 'binaryDecode.js')).read() + '\nvar src = ' + binary_encoded + ';\n' + test_js) + self.assertContained('OK', self.run_js('test.js')) From b77b9dfbcc52457808fea6c12f350f2ea8f2efd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Mon, 3 Nov 2025 15:41:26 +0200 Subject: [PATCH 2/9] Improve documentation on SINGLE_FILE requiring UTF-8 encoding. --- .../docs/tools_reference/settings_reference.rst | 16 ++++++++++++++++ src/settings.js | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/site/source/docs/tools_reference/settings_reference.rst b/site/source/docs/tools_reference/settings_reference.rst index a80c2a73563ee..85c027583bac3 100644 --- a/site/source/docs/tools_reference/settings_reference.rst +++ b/site/source/docs/tools_reference/settings_reference.rst @@ -2837,6 +2837,9 @@ child-src directive to allow blob:. If you aren't using Content Security Policy, or your CSP header doesn't include either script-src or child-src, then you can safely ignore this warning. +Note that SINGLE_FILE with binary encoding requires the HTML/JS files to be +served with UTF-8 encoding. See the details on SINGLE_FILE_BINARY_ENCODE. + Default value: false .. _single_file_binary_encode: @@ -2851,6 +2854,19 @@ issues with the binary encoding. (and please let us know of any such issues) If no issues arise, this option will permanently become the default in the future. +NOTE: Binary encoding requires that the HTML/JS files are served with UTF-8 +encoding, and will not work with the default legacy Windows-1252 encoding +that browsers might use on Windows. To enable UTF-8 encoding in a +hand-crafted index.html file, apply any of: +1. Add inside the section of HTML, or +2. Add + inside , or +3. Add + inside (if using -o foo.js with SINGLE_FILE mode to build HTML+JS), or +4. pass the header `Content-Type: text/html; charset=utf-8` and/or header + `Content-Type: application/javascript; charset=utf-8` when serving the + relevant files that contain binary encoded content. + Default value: true .. _auto_js_libraries: diff --git a/src/settings.js b/src/settings.js index 642570cab4c43..1da25b8348390 100644 --- a/src/settings.js +++ b/src/settings.js @@ -1852,6 +1852,9 @@ var WASMFS = false; // child-src directive to allow blob:. If you aren't using Content Security // Policy, or your CSP header doesn't include either script-src or child-src, // then you can safely ignore this warning. +// +// Note that SINGLE_FILE with binary encoding requires the HTML/JS files to be +// served with UTF-8 encoding. See the details on SINGLE_FILE_BINARY_ENCODE. // [link] var SINGLE_FILE = false; @@ -1861,6 +1864,19 @@ var SINGLE_FILE = false; // issues with the binary encoding. (and please let us know of any such issues) // If no issues arise, this option will permanently become the default in the // future. +// +// NOTE: Binary encoding requires that the HTML/JS files are served with UTF-8 +// encoding, and will not work with the default legacy Windows-1252 encoding +// that browsers might use on Windows. To enable UTF-8 encoding in a +// hand-crafted index.html file, apply any of: +// 1. Add inside the section of HTML, or +// 2. Add +// inside , or +// 3. Add +// inside (if using -o foo.js with SINGLE_FILE mode to build HTML+JS), or +// 4. pass the header `Content-Type: text/html; charset=utf-8` and/or header +// `Content-Type: application/javascript; charset=utf-8` when serving the +// relevant files that contain binary encoded content. // [link] var SINGLE_FILE_BINARY_ENCODE = true; From bb28db1ab865f9680efd383e14cc953391eeec0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Mon, 3 Nov 2025 15:46:55 +0200 Subject: [PATCH 3/9] Update ChangeLog and add note --- ChangeLog.md | 4 ++++ site/source/docs/tools_reference/settings_reference.rst | 2 ++ src/settings.js | 2 ++ 3 files changed, 8 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index 5049d358fb06d..2bedad7726165 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -39,6 +39,10 @@ See docs/process.md for more on how version tagging works. are used via `--use-port=emdawnwebgpu`. See 4.0.10 release notes for details. - A new `CROSS_ORIGIN` setting was added in order to work around issues hosting emscripten programs across different origins (#25581) +- The binary data encoding for `SINGLE_FILE` mode was changed from base64 to + directly embed binary data into UTF-8 string. Users who use the SINGLE_FILE + mode along with a custom HTML file should declare the files to have UTF-8 + encoding. See `src/settings.js` docs on `SINGLE_FILE` (#25599) 4.0.17 - 10/17/25 ----------------- diff --git a/site/source/docs/tools_reference/settings_reference.rst b/site/source/docs/tools_reference/settings_reference.rst index 85c027583bac3..d810104142fb1 100644 --- a/site/source/docs/tools_reference/settings_reference.rst +++ b/site/source/docs/tools_reference/settings_reference.rst @@ -2866,6 +2866,8 @@ hand-crafted index.html file, apply any of: 4. pass the header `Content-Type: text/html; charset=utf-8` and/or header `Content-Type: application/javascript; charset=utf-8` when serving the relevant files that contain binary encoded content. +If none of these are possible, disable binary encoding with +-sSINGLE_FILE_BINARY_ENCODE=0 to fall back to base64 encoding. Default value: true diff --git a/src/settings.js b/src/settings.js index 1da25b8348390..2259067a8cb9b 100644 --- a/src/settings.js +++ b/src/settings.js @@ -1877,6 +1877,8 @@ var SINGLE_FILE = false; // 4. pass the header `Content-Type: text/html; charset=utf-8` and/or header // `Content-Type: application/javascript; charset=utf-8` when serving the // relevant files that contain binary encoded content. +// If none of these are possible, disable binary encoding with +// -sSINGLE_FILE_BINARY_ENCODE=0 to fall back to base64 encoding. // [link] var SINGLE_FILE_BINARY_ENCODE = true; From bf37aba554161f02346438c385a8d386f4eaef35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Mon, 3 Nov 2025 16:11:21 +0200 Subject: [PATCH 4/9] ruff --- test/test_other.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_other.py b/test/test_other.py index bc3ebdcbdcf54..27999209ecf89 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -15164,7 +15164,7 @@ def test_linkable_relocatable(self): # Tests encoding of all byte pairs for binary encoding in SINGLE_FILE mode. def test_binary_encode(self): # Encode values 0 .. 65535 into test data - test_data = bytearray(struct.pack('<' + 'H'*65536, *range(65536))) + test_data = bytearray(struct.pack('<' + 'H' * 65536, *range(65536))) open('data.tmp', 'wb').write(test_data) binary_encoded = binary_encode('data.tmp') test_js = '''var u16 = new Uint16Array(binaryDecode(src).buffer); From 6eade722c8c1063a43c2cf749e5d2e60c3ef6966 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Mon, 3 Nov 2025 16:13:12 +0200 Subject: [PATCH 5/9] docs lint --- .../docs/tools_reference/settings_reference.rst | 13 ++++++------- src/settings.js | 13 ++++++------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/site/source/docs/tools_reference/settings_reference.rst b/site/source/docs/tools_reference/settings_reference.rst index d810104142fb1..4bb4d688ab798 100644 --- a/site/source/docs/tools_reference/settings_reference.rst +++ b/site/source/docs/tools_reference/settings_reference.rst @@ -2858,14 +2858,13 @@ NOTE: Binary encoding requires that the HTML/JS files are served with UTF-8 encoding, and will not work with the default legacy Windows-1252 encoding that browsers might use on Windows. To enable UTF-8 encoding in a hand-crafted index.html file, apply any of: -1. Add inside the section of HTML, or -2. Add - inside , or -3. Add - inside (if using -o foo.js with SINGLE_FILE mode to build HTML+JS), or +1. Add `` inside the section of HTML, or +2. Add ``` inside , or +3. Add `` inside +(if using -o foo.js with SINGLE_FILE mode to build HTML+JS), or 4. pass the header `Content-Type: text/html; charset=utf-8` and/or header - `Content-Type: application/javascript; charset=utf-8` when serving the - relevant files that contain binary encoded content. +`Content-Type: application/javascript; charset=utf-8` when serving the +relevant files that contain binary encoded content. If none of these are possible, disable binary encoding with -sSINGLE_FILE_BINARY_ENCODE=0 to fall back to base64 encoding. diff --git a/src/settings.js b/src/settings.js index 2259067a8cb9b..bf93984630e75 100644 --- a/src/settings.js +++ b/src/settings.js @@ -1869,14 +1869,13 @@ var SINGLE_FILE = false; // encoding, and will not work with the default legacy Windows-1252 encoding // that browsers might use on Windows. To enable UTF-8 encoding in a // hand-crafted index.html file, apply any of: -// 1. Add inside the section of HTML, or -// 2. Add -// inside , or -// 3. Add -// inside (if using -o foo.js with SINGLE_FILE mode to build HTML+JS), or +// 1. Add `` inside the section of HTML, or +// 2. Add ``` inside , or +// 3. Add `` inside +// (if using -o foo.js with SINGLE_FILE mode to build HTML+JS), or // 4. pass the header `Content-Type: text/html; charset=utf-8` and/or header -// `Content-Type: application/javascript; charset=utf-8` when serving the -// relevant files that contain binary encoded content. +// `Content-Type: application/javascript; charset=utf-8` when serving the +// relevant files that contain binary encoded content. // If none of these are possible, disable binary encoding with // -sSINGLE_FILE_BINARY_ENCODE=0 to fall back to base64 encoding. // [link] From c8e4e924d40fc95657a6e0a1caac17a0e4f4a491 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Wed, 5 Nov 2025 16:02:05 +0200 Subject: [PATCH 6/9] Review --- ChangeLog.md | 5 +++-- test/test_other.py | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 2bedad7726165..e6147f901a567 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -40,9 +40,10 @@ See docs/process.md for more on how version tagging works. - A new `CROSS_ORIGIN` setting was added in order to work around issues hosting emscripten programs across different origins (#25581) - The binary data encoding for `SINGLE_FILE` mode was changed from base64 to - directly embed binary data into UTF-8 string. Users who use the SINGLE_FILE + directly embed binary data into UTF-8 string. Users who use the `SINGLE_FILE` mode along with a custom HTML file should declare the files to have UTF-8 - encoding. See `src/settings.js` docs on `SINGLE_FILE` (#25599) + encoding. See `src/settings.js` docs on `SINGLE_FILE`. Use the option + `-sSINGLE_FILE_BINARY_ENCODE=0` to fall back to base64 encoding. (#25599) 4.0.17 - 10/17/25 ----------------- diff --git a/test/test_other.py b/test/test_other.py index 27999209ecf89..b23e0678fae86 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -108,7 +108,7 @@ config, ) from tools.system_libs import DETERMINISTIC_PREFIX -from tools.utils import MACOS, WINDOWS, delete_file, read_binary, read_file, write_file +from tools.utils import MACOS, WINDOWS, delete_file, read_binary, read_file, write_file, write_binary emmake = utils.bat_suffix(path_from_root('emmake')) emconfig = utils.bat_suffix(path_from_root('em-config')) @@ -15165,11 +15165,11 @@ def test_linkable_relocatable(self): def test_binary_encode(self): # Encode values 0 .. 65535 into test data test_data = bytearray(struct.pack('<' + 'H' * 65536, *range(65536))) - open('data.tmp', 'wb').write(test_data) + write_binary('data.tmp', test_data) binary_encoded = binary_encode('data.tmp') test_js = '''var u16 = new Uint16Array(binaryDecode(src).buffer); for(var i = 0; i < 65536; ++i) if (u16[i] != i) throw i; console.log('OK');''' - open('test.js', 'w').write(open(path_from_root('src', 'binaryDecode.js')).read() + '\nvar src = ' + binary_encoded + ';\n' + test_js) + write_file('test.js', open(path_from_root('src', 'binaryDecode.js')).read() + '\nvar src = ' + binary_encoded + ';\n' + test_js) self.assertContained('OK', self.run_js('test.js')) From 882b959e8c10f9b9c37113ec2e849fb6b3dc3b6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Wed, 5 Nov 2025 19:12:33 +0200 Subject: [PATCH 7/9] ruff --- test/test_other.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_other.py b/test/test_other.py index b23e0678fae86..b028f2250f18b 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -108,7 +108,7 @@ config, ) from tools.system_libs import DETERMINISTIC_PREFIX -from tools.utils import MACOS, WINDOWS, delete_file, read_binary, read_file, write_file, write_binary +from tools.utils import MACOS, WINDOWS, delete_file, read_binary, read_file, write_binary, write_file emmake = utils.bat_suffix(path_from_root('emmake')) emconfig = utils.bat_suffix(path_from_root('em-config')) From 27996f46f2478ed0a5ea75f503b934297ad9e1f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Wed, 5 Nov 2025 19:59:34 +0200 Subject: [PATCH 8/9] review --- test/test_other.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_other.py b/test/test_other.py index b028f2250f18b..310c7c201485e 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -15171,5 +15171,5 @@ def test_binary_encode(self): for(var i = 0; i < 65536; ++i) if (u16[i] != i) throw i; console.log('OK');''' - write_file('test.js', open(path_from_root('src', 'binaryDecode.js')).read() + '\nvar src = ' + binary_encoded + ';\n' + test_js) + write_file('test.js', read_file(path_from_root('src/binaryDecode.js')) + '\nvar src = ' + binary_encoded + ';\n' + test_js) self.assertContained('OK', self.run_js('test.js')) From 4783e8a705f91abb5664981811b1d9323a03b776 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Wed, 5 Nov 2025 23:29:13 +0200 Subject: [PATCH 9/9] ruff --- test/test_other.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/test/test_other.py b/test/test_other.py index 310c7c201485e..de50719185cb4 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -108,7 +108,15 @@ config, ) from tools.system_libs import DETERMINISTIC_PREFIX -from tools.utils import MACOS, WINDOWS, delete_file, read_binary, read_file, write_binary, write_file +from tools.utils import ( + MACOS, + WINDOWS, + delete_file, + read_binary, + read_file, + write_binary, + write_file, +) emmake = utils.bat_suffix(path_from_root('emmake')) emconfig = utils.bat_suffix(path_from_root('em-config'))