microsoft · YehezkelShB · Mar 5, 2025 · Mar 6, 2025 · Mar 6, 2025 · Mar 6, 2025
diff --git a/jest.config.jsdom.js b/jest.config.jsdom.js
@@ -7,5 +7,6 @@ module.exports = {
     "node_modules",
     "src"
   ],
-  "testEnvironment": "jsdom"
+  "testEnvironment": "jsdom",
+  "setupFilesAfterEnv": ["<rootDir>/jest.setup.jsdom.js"]
 }
diff --git a/jest.setup.jsdom.js b/jest.setup.jsdom.js
@@ -0,0 +1,4 @@
+import { TextDecoder, TextEncoder } from 'util';
+
+global.TextEncoder = TextEncoder;
+global.TextDecoder = TextDecoder;
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -42,16 +42,13 @@
   "homepage": "https://github.com/microsoft/connected-workbooks#readme",
   "dependencies": {
     "@xmldom/xmldom": "~0.8.4",
-    "base64-js": "^1.5.1",
-    "buffer": "^6.0.3",
     "jszip": "^3.5.0",
     "xmldom-qsa": "^1.1.3"
   },
   "devDependencies": {
     "@babel/core": "^7.14.6",
     "@babel/preset-env": "^7.14.7",
     "@babel/preset-typescript": "^7.14.5",
-    "@types/base64-js": "^1.3.0",
     "@types/jest": "^26.0.24",
     "@types/jszip": "^3.4.1",
     "@types/webpack-bundle-analyzer": "^4.6.0",
@@ -69,7 +66,7 @@
     "terser-webpack-plugin": "^5.3.9",
     "ts-loader": "^9.4.3",
     "ts-node": "^9.0.0",
-    "typescript": "^4.9.5",
+    "typescript": "^5.9.2",
     "webpack": "^5.86.0",
     "webpack-bundle-analyzer": "^4.9.0",
     "webpack-cli": "^5.1.4"

diff --git a/src/utils/arrayUtils.ts b/src/utils/arrayUtils.ts
@@ -1,33 +1,6 @@
 // Copyright (c) Microsoft Corporation.
 // Licensed under the MIT license.
 
-export class ArrayReader {
-    private _array: ArrayBuffer;
-    private _position: number;
-
-    constructor(array: ArrayBuffer) {
-        this._array = array;
-        this._position = 0;
-    }
-
-    public getInt32(): number {
-        const retVal = new DataView(this._array, this._position, 4).getInt32(0, true);
-        this._position += 4;
-
-        return retVal;
-    }
-
-    getBytes(bytes?: number): Uint8Array {
-        const retVal = this._array.slice(this._position, bytes ? bytes! + this._position : bytes);
-        this._position += retVal.byteLength;
-        return new Uint8Array(retVal);
-    }
-
-    reset(): void {
-        this._position = 0;
-    }
-}
-
 function getInt32Buffer(val: number): Uint8Array {
     const packageSizeBuffer = new ArrayBuffer(4);
     new DataView(packageSizeBuffer).setInt32(0, val, true);
@@ -47,8 +20,191 @@ function concatArrays(...args: Uint8Array[]): Uint8Array {
     return retVal;
 }
 
+const base64Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+function base64ToUint8Array(base64: string): [Uint8Array,DataView] {
+  // Remove any whitespace that might have snuck into the string.
+  base64 = base64.replace(/\s/g, "");
+
+  // Determine the number of padding characters.
+  const len = base64.length;
+  let padding = 0;
+  if (base64.endsWith("==")) {
+    padding = 2;
+  } else if (base64.endsWith("=")) {
+    padding = 1;
+  }
+
+  // Calculate the length of the output.
+  const outputLength = (len * 3) / 4 - padding;
+  const output = new Uint8Array(outputLength);
+
+  let outIndex = 0;
+  // Process 4 characters (24 bits) at a time.
+  for (let i = 0; i < len; i += 4) {
+    // For each 4-character group, map each char to its 6-bit value.
+    const c1 = base64Chars.indexOf(base64.charAt(i));
+    const c2 = base64Chars.indexOf(base64.charAt(i + 1));
+    // If the character is "=" it means that portion is padded; so use 0.
+    const c3 = base64.charAt(i + 2) === '=' ? 0 : base64Chars.indexOf(base64.charAt(i + 2));
+    const c4 = base64.charAt(i + 3) === '=' ? 0 : base64Chars.indexOf(base64.charAt(i + 3));
+
+    // Combine the four 6-bit groups into one 24-bit number.
+    const triple = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
+
+    // Depending on padding, extract the bytes.
+    if (base64.charAt(i + 2) === '=') {
+      // Only one byte of output.
+      output[outIndex++] = (triple >> 16) & 0xFF;
+    } else if (base64.charAt(i + 3) === '=') {
+      // Two bytes of output.
+      output[outIndex++] = (triple >> 16) & 0xFF;
+      output[outIndex++] = (triple >> 8) & 0xFF;
+    } else {
+      // Three bytes of output.
+      output[outIndex++] = (triple >> 16) & 0xFF;
+      output[outIndex++] = (triple >> 8) & 0xFF;
+      output[outIndex++] = triple & 0xFF;
+    }
+  }
+
+  const dataView = new DataView(output.buffer, output.byteOffset, output.byteLength);
+
+  return [output, dataView];
+}
+
+function uint8ArrayToBase64(data: Uint8Array): string {
+  let base64 = "";
+
+  // Process every 3 bytes, turning them into 4 base64 characters.
+  for (let i = 0; i < data.length; i += 3) {
+    // Read bytes; if not enough bytes remain, substitute 0.
+    const byte1 = data[i];
+    const byte2 = i + 1 < data.length ? data[i + 1] : 0;
+    const byte3 = i + 2 < data.length ? data[i + 2] : 0;
+
+    // Combine the three bytes into a 24-bit number.
+    const triple = (byte1 << 16) | (byte2 << 8) | byte3;
+
+    // Split the 24-bit number into four 6-bit numbers.
+    const index1 = (triple >> 18) & 0x3F;
+    const index2 = (triple >> 12) & 0x3F;
+    const index3 = (triple >> 6)  & 0x3F;
+    const index4 = triple & 0x3F;
+
+    // Always add the first two characters.
+    base64 += base64Chars.charAt(index1);
+    base64 += base64Chars.charAt(index2);
+
+    // For the third character, determine if we had a valid byte2.
+    if (i + 1 < data.length) {
+      base64 += base64Chars.charAt(index3);
+    } else {
+      base64 += "=";
+    }
+
+    // For the fourth character, determine if we had a valid byte3.
+    if (i + 2 < data.length) {
+      base64 += base64Chars.charAt(index4);
+    } else {
+      base64 += "=";
+    }
+  }
+
+  return base64;
+}
+
+function encodeStringToUCS2(str: string): Uint8Array {
+  const byteLength = str.length * 2;
+  const buffer = new Uint8Array(byteLength);
+  for (let i = 0; i < str.length; i++) {
+    const code = str.charCodeAt(i);
+    // Store in little-endian order: lower byte first, then the high byte.
+    buffer[i * 2] = code & 0xff;
+    buffer[i * 2 + 1] = code >> 8;
+  }
+  return buffer;
+}
+
+/**
+ * Detects the encoding of a given XML byte array based on its BOM.
+ *
+ * @param xmlBytes - The XML content as a Uint8Array.
+ * @returns "utf-8", "utf-16le" or "utf-16be" if a BOM is detected;
+ *          otherwise, returns "utf-8" as a default.
+ */
+function detectEncoding(xmlBytes: Uint8Array): string | null {
+  if (!xmlBytes || xmlBytes.length === 0) {
+    return null;
+  }
+
+  // Check for UTF-8 BOM: EF BB BF
+  if (
+    xmlBytes.length >= 3 &&
+    xmlBytes[0] === 0xEF &&
+    xmlBytes[1] === 0xBB &&
+    xmlBytes[2] === 0xBF
+  ) {
+    return "utf-8";
+  }
+
+  // Check for UTF-16LE BOM: FF FE
+  if (xmlBytes.length >= 2 && xmlBytes[0] === 0xFF && xmlBytes[1] === 0xFE) {
+    return "utf-16le";
+  }
+
+  // Check for UTF-16BE BOM: FE FF
+  if (xmlBytes.length >= 2 && xmlBytes[0] === 0xFE && xmlBytes[1] === 0xFF) {
+    return "utf-16be";
+  }
+
+  // Default to UTF‑8 if no BOM is present.
+  return "utf-8";
+}
+
+/**
+ * Decodes a Uint8Array containing XML data into a string according
+ * to its detected encoding.
+ *
+ * @param xmlBytes - The XML content as a Uint8Array.
+ * @returns The decoded XML string with any leading BOM removed.
+ * @throws Error if no encoding can be detected.
+ */
+function decodeXml(xmlBytes: Uint8Array): string {
+  const encoding = detectEncoding(xmlBytes);
+  if (!encoding) {
+    throw new Error("Failed to detect xml encoding");
+  }
+
+  let xmlString: string;
+
+  // For UTF-16BE, swap bytes because TextDecoder does not natively support it.
+  if (encoding.toLowerCase() === "utf-16be") {
+    if (xmlBytes.length % 2 !== 0) {
+      throw new Error("Invalid UTF-16BE byte array (should be even length)");
+    }
+    // Create a new Uint8Array with swapped bytes.
+    const swappedBytes = new Uint8Array(xmlBytes.length);
+    for (let i = 0; i < xmlBytes.length; i += 2) {
+      swappedBytes[i] = xmlBytes[i + 1];
+      swappedBytes[i + 1] = xmlBytes[i];
+    }
+    // Now decode as UTF-16LE.
+    xmlString = new TextDecoder("utf-16le").decode(swappedBytes);
+  } else {
+    // For "utf-8" or "utf-16le", decode directly.
+    xmlString = new TextDecoder(encoding as string).decode(xmlBytes);
+  }
+
+  // Remove the BOM if present.
+  return xmlString.replace(/^\ufeff/, "");
+}
+
 export default {
-    ArrayReader,
+    decodeXml,
+    encodeStringToUCS2,
+    uint8ArrayToBase64,
+    base64ToUint8Array,
     getInt32Buffer,
     concatArrays,
 };