diff --git a/ChangeLog.md b/ChangeLog.md index 5049d358fb06d..e6147f901a567 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -39,6 +39,11 @@ See docs/process.md for more on how version tagging works. are used via `--use-port=emdawnwebgpu`. See 4.0.10 release notes for details. - A new `CROSS_ORIGIN` setting was added in order to work around issues hosting emscripten programs across different origins (#25581) +- The binary data encoding for `SINGLE_FILE` mode was changed from base64 to + directly embed binary data into UTF-8 string. Users who use the `SINGLE_FILE` + mode along with a custom HTML file should declare the files to have UTF-8 + encoding. See `src/settings.js` docs on `SINGLE_FILE`. Use the option + `-sSINGLE_FILE_BINARY_ENCODE=0` to fall back to base64 encoding. (#25599) 4.0.17 - 10/17/25 ----------------- diff --git a/site/source/docs/tools_reference/settings_reference.rst b/site/source/docs/tools_reference/settings_reference.rst index a80c2a73563ee..4bb4d688ab798 100644 --- a/site/source/docs/tools_reference/settings_reference.rst +++ b/site/source/docs/tools_reference/settings_reference.rst @@ -2837,6 +2837,9 @@ child-src directive to allow blob:. If you aren't using Content Security Policy, or your CSP header doesn't include either script-src or child-src, then you can safely ignore this warning. +Note that SINGLE_FILE with binary encoding requires the HTML/JS files to be +served with UTF-8 encoding. See the details on SINGLE_FILE_BINARY_ENCODE. + Default value: false .. _single_file_binary_encode: @@ -2851,6 +2854,20 @@ issues with the binary encoding. (and please let us know of any such issues) If no issues arise, this option will permanently become the default in the future. +NOTE: Binary encoding requires that the HTML/JS files are served with UTF-8 +encoding, and will not work with the default legacy Windows-1252 encoding +that browsers might use on Windows. To enable UTF-8 encoding in a +hand-crafted index.html file, apply any of: +1. Add `` inside the section of HTML, or +2. Add ``` inside , or +3. Add `` inside +(if using -o foo.js with SINGLE_FILE mode to build HTML+JS), or +4. pass the header `Content-Type: text/html; charset=utf-8` and/or header +`Content-Type: application/javascript; charset=utf-8` when serving the +relevant files that contain binary encoded content. +If none of these are possible, disable binary encoding with +-sSINGLE_FILE_BINARY_ENCODE=0 to fall back to base64 encoding. + Default value: true .. _auto_js_libraries: diff --git a/src/settings.js b/src/settings.js index 642570cab4c43..bf93984630e75 100644 --- a/src/settings.js +++ b/src/settings.js @@ -1852,6 +1852,9 @@ var WASMFS = false; // child-src directive to allow blob:. If you aren't using Content Security // Policy, or your CSP header doesn't include either script-src or child-src, // then you can safely ignore this warning. +// +// Note that SINGLE_FILE with binary encoding requires the HTML/JS files to be +// served with UTF-8 encoding. See the details on SINGLE_FILE_BINARY_ENCODE. // [link] var SINGLE_FILE = false; @@ -1861,6 +1864,20 @@ var SINGLE_FILE = false; // issues with the binary encoding. (and please let us know of any such issues) // If no issues arise, this option will permanently become the default in the // future. +// +// NOTE: Binary encoding requires that the HTML/JS files are served with UTF-8 +// encoding, and will not work with the default legacy Windows-1252 encoding +// that browsers might use on Windows. To enable UTF-8 encoding in a +// hand-crafted index.html file, apply any of: +// 1. Add `` inside the section of HTML, or +// 2. Add ``` inside , or +// 3. Add `` inside +// (if using -o foo.js with SINGLE_FILE mode to build HTML+JS), or +// 4. pass the header `Content-Type: text/html; charset=utf-8` and/or header +// `Content-Type: application/javascript; charset=utf-8` when serving the +// relevant files that contain binary encoded content. +// If none of these are possible, disable binary encoding with +// -sSINGLE_FILE_BINARY_ENCODE=0 to fall back to base64 encoding. // [link] var SINGLE_FILE_BINARY_ENCODE = true; diff --git a/test/test_other.py b/test/test_other.py index 8806d7ff3ed33..de50719185cb4 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -17,6 +17,7 @@ import select import shlex import shutil +import struct import subprocess import sys import tarfile @@ -89,6 +90,7 @@ from tools import building, cache, response_file, shared, utils, webassembly from tools.building import get_building_env +from tools.link import binary_encode from tools.settings import settings from tools.shared import ( CLANG_CC, @@ -106,7 +108,15 @@ config, ) from tools.system_libs import DETERMINISTIC_PREFIX -from tools.utils import MACOS, WINDOWS, delete_file, read_binary, read_file, write_file +from tools.utils import ( + MACOS, + WINDOWS, + delete_file, + read_binary, + read_file, + write_binary, + write_file, +) emmake = utils.bat_suffix(path_from_root('emmake')) emconfig = utils.bat_suffix(path_from_root('em-config')) @@ -15158,3 +15168,16 @@ def test_linkable_relocatable(self): # These setting is due for removal: # https://github.com/emscripten-core/emscripten/issues/25262 self.do_run_in_out_file_test('hello_world.c', cflags=['-Wno-deprecated', '-sLINKABLE', '-sRELOCATABLE']) + + # Tests encoding of all byte pairs for binary encoding in SINGLE_FILE mode. + def test_binary_encode(self): + # Encode values 0 .. 65535 into test data + test_data = bytearray(struct.pack('<' + 'H' * 65536, *range(65536))) + write_binary('data.tmp', test_data) + binary_encoded = binary_encode('data.tmp') + test_js = '''var u16 = new Uint16Array(binaryDecode(src).buffer); +for(var i = 0; i < 65536; ++i) + if (u16[i] != i) throw i; +console.log('OK');''' + write_file('test.js', read_file(path_from_root('src/binaryDecode.js')) + '\nvar src = ' + binary_encoded + ';\n' + test_js) + self.assertContained('OK', self.run_js('test.js'))