diff --git a/jest.config.jsdom.js b/jest.config.jsdom.js index 63bc96b..7af39f9 100644 --- a/jest.config.jsdom.js +++ b/jest.config.jsdom.js @@ -7,5 +7,6 @@ module.exports = { "node_modules", "src" ], - "testEnvironment": "jsdom" + "testEnvironment": "jsdom", + "setupFilesAfterEnv": ["/jest.setup.jsdom.js"] } \ No newline at end of file diff --git a/jest.setup.jsdom.js b/jest.setup.jsdom.js new file mode 100644 index 0000000..388f76e --- /dev/null +++ b/jest.setup.jsdom.js @@ -0,0 +1,4 @@ +import { TextDecoder, TextEncoder } from 'util'; + +global.TextEncoder = TextEncoder; +global.TextDecoder = TextDecoder; \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index e0b63bc..501bc95 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,8 +10,6 @@ "license": "MIT", "dependencies": { "@xmldom/xmldom": "~0.8.4", - "base64-js": "^1.5.1", - "buffer": "^6.0.3", "jszip": "^3.5.0", "xmldom-qsa": "^1.1.3" }, @@ -19,7 +17,6 @@ "@babel/core": "^7.14.6", "@babel/preset-env": "^7.14.7", "@babel/preset-typescript": "^7.14.5", - "@types/base64-js": "^1.3.0", "@types/jest": "^26.0.24", "@types/jszip": "^3.4.1", "@types/webpack-bundle-analyzer": "^4.6.0", @@ -37,7 +34,7 @@ "terser-webpack-plugin": "^5.3.9", "ts-loader": "^9.4.3", "ts-node": "^9.0.0", - "typescript": "^4.9.5", + "typescript": "^5.9.2", "webpack": "^5.86.0", "webpack-bundle-analyzer": "^4.9.0", "webpack-cli": "^5.1.4" @@ -3827,12 +3824,6 @@ "@babel/types": "^7.20.7" } }, - "node_modules/@types/base64-js": { - "version": "1.3.2", - "resolved": "https://registry.npmjs.org/@types/base64-js/-/base64-js-1.3.2.tgz", - "integrity": "sha512-Q2Xn2/vQHRGLRXhQ5+BSLwhHkR3JVflxVKywH0Q6fVoAiUE8fFYL2pE5/l2ZiOiBDfA8qUqRnSxln4G/NFz1Sg==", - "dev": true - }, "node_modules/@types/estree": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.5.tgz", @@ -4905,6 +4896,7 @@ "version": "1.5.1", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "dev": true, "funding": [ { "type": "github", @@ -5100,6 +5092,7 @@ "version": "6.0.3", "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz", "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==", + "dev": true, "funding": [ { "type": "github", @@ -7403,6 +7396,7 @@ "version": "1.2.1", "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", + "dev": true, "funding": [ { "type": "github", @@ -14894,16 +14888,16 @@ } }, "node_modules/typescript": { - "version": "4.9.5", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.9.5.tgz", - "integrity": "sha512-1FXk9E2Hm+QzZQ7z+McJiHL4NW1F2EzMu9Nq9i3zAaGqibafqYwCVU6WyWAuyQRRzOlxou8xZSyXLEN8oKj24g==", + "version": "5.9.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.2.tgz", + "integrity": "sha512-CWBzXQrc/qOkhidw1OzBTQuYRbfyxDXJMVJ1XNwUHGROVmuaeiEm3OslpZ1RV96d7SKKjZKrSJu3+t/xlw3R9A==", "dev": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" }, "engines": { - "node": ">=4.2.0" + "node": ">=14.17" } }, "node_modules/undici-types": { diff --git a/package.json b/package.json index 533165a..5ae0bc3 100644 --- a/package.json +++ b/package.json @@ -42,8 +42,6 @@ "homepage": "https://github.com/microsoft/connected-workbooks#readme", "dependencies": { "@xmldom/xmldom": "~0.8.4", - "base64-js": "^1.5.1", - "buffer": "^6.0.3", "jszip": "^3.5.0", "xmldom-qsa": "^1.1.3" }, @@ -51,7 +49,6 @@ "@babel/core": "^7.14.6", "@babel/preset-env": "^7.14.7", "@babel/preset-typescript": "^7.14.5", - "@types/base64-js": "^1.3.0", "@types/jest": "^26.0.24", "@types/jszip": "^3.4.1", "@types/webpack-bundle-analyzer": "^4.6.0", @@ -69,7 +66,7 @@ "terser-webpack-plugin": "^5.3.9", "ts-loader": "^9.4.3", "ts-node": "^9.0.0", - "typescript": "^4.9.5", + "typescript": "^5.9.2", "webpack": "^5.86.0", "webpack-bundle-analyzer": "^4.9.0", "webpack-cli": "^5.1.4" diff --git a/src/utils/arrayUtils.ts b/src/utils/arrayUtils.ts index acac0b0..b2e00ca 100644 --- a/src/utils/arrayUtils.ts +++ b/src/utils/arrayUtils.ts @@ -1,33 +1,6 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -export class ArrayReader { - private _array: ArrayBuffer; - private _position: number; - - constructor(array: ArrayBuffer) { - this._array = array; - this._position = 0; - } - - public getInt32(): number { - const retVal = new DataView(this._array, this._position, 4).getInt32(0, true); - this._position += 4; - - return retVal; - } - - getBytes(bytes?: number): Uint8Array { - const retVal = this._array.slice(this._position, bytes ? bytes! + this._position : bytes); - this._position += retVal.byteLength; - return new Uint8Array(retVal); - } - - reset(): void { - this._position = 0; - } -} - function getInt32Buffer(val: number): Uint8Array { const packageSizeBuffer = new ArrayBuffer(4); new DataView(packageSizeBuffer).setInt32(0, val, true); @@ -47,8 +20,191 @@ function concatArrays(...args: Uint8Array[]): Uint8Array { return retVal; } +const base64Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +function base64ToUint8Array(base64: string): [Uint8Array,DataView] { + // Remove any whitespace that might have snuck into the string. + base64 = base64.replace(/\s/g, ""); + + // Determine the number of padding characters. + const len = base64.length; + let padding = 0; + if (base64.endsWith("==")) { + padding = 2; + } else if (base64.endsWith("=")) { + padding = 1; + } + + // Calculate the length of the output. + const outputLength = (len * 3) / 4 - padding; + const output = new Uint8Array(outputLength); + + let outIndex = 0; + // Process 4 characters (24 bits) at a time. + for (let i = 0; i < len; i += 4) { + // For each 4-character group, map each char to its 6-bit value. + const c1 = base64Chars.indexOf(base64.charAt(i)); + const c2 = base64Chars.indexOf(base64.charAt(i + 1)); + // If the character is "=" it means that portion is padded; so use 0. + const c3 = base64.charAt(i + 2) === '=' ? 0 : base64Chars.indexOf(base64.charAt(i + 2)); + const c4 = base64.charAt(i + 3) === '=' ? 0 : base64Chars.indexOf(base64.charAt(i + 3)); + + // Combine the four 6-bit groups into one 24-bit number. + const triple = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4; + + // Depending on padding, extract the bytes. + if (base64.charAt(i + 2) === '=') { + // Only one byte of output. + output[outIndex++] = (triple >> 16) & 0xFF; + } else if (base64.charAt(i + 3) === '=') { + // Two bytes of output. + output[outIndex++] = (triple >> 16) & 0xFF; + output[outIndex++] = (triple >> 8) & 0xFF; + } else { + // Three bytes of output. + output[outIndex++] = (triple >> 16) & 0xFF; + output[outIndex++] = (triple >> 8) & 0xFF; + output[outIndex++] = triple & 0xFF; + } + } + + const dataView = new DataView(output.buffer, output.byteOffset, output.byteLength); + + return [output, dataView]; +} + +function uint8ArrayToBase64(data: Uint8Array): string { + let base64 = ""; + + // Process every 3 bytes, turning them into 4 base64 characters. + for (let i = 0; i < data.length; i += 3) { + // Read bytes; if not enough bytes remain, substitute 0. + const byte1 = data[i]; + const byte2 = i + 1 < data.length ? data[i + 1] : 0; + const byte3 = i + 2 < data.length ? data[i + 2] : 0; + + // Combine the three bytes into a 24-bit number. + const triple = (byte1 << 16) | (byte2 << 8) | byte3; + + // Split the 24-bit number into four 6-bit numbers. + const index1 = (triple >> 18) & 0x3F; + const index2 = (triple >> 12) & 0x3F; + const index3 = (triple >> 6) & 0x3F; + const index4 = triple & 0x3F; + + // Always add the first two characters. + base64 += base64Chars.charAt(index1); + base64 += base64Chars.charAt(index2); + + // For the third character, determine if we had a valid byte2. + if (i + 1 < data.length) { + base64 += base64Chars.charAt(index3); + } else { + base64 += "="; + } + + // For the fourth character, determine if we had a valid byte3. + if (i + 2 < data.length) { + base64 += base64Chars.charAt(index4); + } else { + base64 += "="; + } + } + + return base64; +} + +function encodeStringToUCS2(str: string): Uint8Array { + const byteLength = str.length * 2; + const buffer = new Uint8Array(byteLength); + for (let i = 0; i < str.length; i++) { + const code = str.charCodeAt(i); + // Store in little-endian order: lower byte first, then the high byte. + buffer[i * 2] = code & 0xff; + buffer[i * 2 + 1] = code >> 8; + } + return buffer; +} + +/** + * Detects the encoding of a given XML byte array based on its BOM. + * + * @param xmlBytes - The XML content as a Uint8Array. + * @returns "utf-8", "utf-16le" or "utf-16be" if a BOM is detected; + * otherwise, returns "utf-8" as a default. + */ +function detectEncoding(xmlBytes: Uint8Array): string | null { + if (!xmlBytes || xmlBytes.length === 0) { + return null; + } + + // Check for UTF-8 BOM: EF BB BF + if ( + xmlBytes.length >= 3 && + xmlBytes[0] === 0xEF && + xmlBytes[1] === 0xBB && + xmlBytes[2] === 0xBF + ) { + return "utf-8"; + } + + // Check for UTF-16LE BOM: FF FE + if (xmlBytes.length >= 2 && xmlBytes[0] === 0xFF && xmlBytes[1] === 0xFE) { + return "utf-16le"; + } + + // Check for UTF-16BE BOM: FE FF + if (xmlBytes.length >= 2 && xmlBytes[0] === 0xFE && xmlBytes[1] === 0xFF) { + return "utf-16be"; + } + + // Default to UTF‑8 if no BOM is present. + return "utf-8"; +} + +/** + * Decodes a Uint8Array containing XML data into a string according + * to its detected encoding. + * + * @param xmlBytes - The XML content as a Uint8Array. + * @returns The decoded XML string with any leading BOM removed. + * @throws Error if no encoding can be detected. + */ +function decodeXml(xmlBytes: Uint8Array): string { + const encoding = detectEncoding(xmlBytes); + if (!encoding) { + throw new Error("Failed to detect xml encoding"); + } + + let xmlString: string; + + // For UTF-16BE, swap bytes because TextDecoder does not natively support it. + if (encoding.toLowerCase() === "utf-16be") { + if (xmlBytes.length % 2 !== 0) { + throw new Error("Invalid UTF-16BE byte array (should be even length)"); + } + // Create a new Uint8Array with swapped bytes. + const swappedBytes = new Uint8Array(xmlBytes.length); + for (let i = 0; i < xmlBytes.length; i += 2) { + swappedBytes[i] = xmlBytes[i + 1]; + swappedBytes[i + 1] = xmlBytes[i]; + } + // Now decode as UTF-16LE. + xmlString = new TextDecoder("utf-16le").decode(swappedBytes); + } else { + // For "utf-8" or "utf-16le", decode directly. + xmlString = new TextDecoder(encoding as string).decode(xmlBytes); + } + + // Remove the BOM if present. + return xmlString.replace(/^\ufeff/, ""); +} + export default { - ArrayReader, + decodeXml, + encodeStringToUCS2, + uint8ArrayToBase64, + base64ToUint8Array, getInt32Buffer, concatArrays, }; diff --git a/src/utils/mashupDocumentParser.ts b/src/utils/mashupDocumentParser.ts index 9f84a0c..92d6d72 100644 --- a/src/utils/mashupDocumentParser.ts +++ b/src/utils/mashupDocumentParser.ts @@ -1,11 +1,9 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -import * as base64 from "base64-js"; import JSZip from "jszip"; import { section1mPath, - defaults, uint8ArrayType, emptyValue, textResultType, @@ -19,7 +17,6 @@ import { } from "./constants"; import { arrayUtils } from "."; import { Metadata } from "../types"; -import { ArrayReader } from "./arrayUtils"; import { DOMParser, XMLSerializer } from "xmldom-qsa"; export const replaceSingleQuery = async (base64Str: string, queryName: string, queryMashupDoc: string): Promise => { @@ -40,7 +37,7 @@ export const replaceSingleQuery = async (base64Str: string, queryName: string, q endBuffer ); - return base64.fromByteArray(newMashup); + return arrayUtils.uint8ArrayToBase64(newMashup); }; type PackageComponents = { @@ -53,16 +50,19 @@ type PackageComponents = { }; export const getPackageComponents = (base64Str: string): PackageComponents => { - const buffer: ArrayBufferLike = base64.toByteArray(base64Str).buffer; - const mashupArray: ArrayReader = new arrayUtils.ArrayReader(buffer); - const version: Uint8Array = mashupArray.getBytes(4); - const packageSize: number = mashupArray.getInt32(); - const packageOPC: Uint8Array = mashupArray.getBytes(packageSize); - const permissionsSize: number = mashupArray.getInt32(); - const permissions: Uint8Array = mashupArray.getBytes(permissionsSize); - const metadataSize: number = mashupArray.getInt32(); - const metadata: Uint8Array = mashupArray.getBytes(metadataSize); - const endBuffer: Uint8Array = mashupArray.getBytes(); + const [buffer, dataView] = arrayUtils.base64ToUint8Array(base64Str); + const version = buffer.subarray(0,4); + + const packageSize = dataView.getInt32(4, true); + const packageOPC = new Uint8Array(buffer.subarray(8, 8 + packageSize)); + + const permissionsSize = dataView.getInt32(8 + packageSize, true); + const permissions = new Uint8Array(buffer.subarray(12 + packageSize, 12 + packageSize + permissionsSize)); + + const metadataSize = dataView.getInt32(12 + packageSize + permissionsSize, true); + const metadata = new Uint8Array(buffer.subarray(16 + packageSize + permissionsSize, 16 + packageSize + permissionsSize + metadataSize)); + + const endBuffer = new Uint8Array(buffer.subarray(16 + packageSize + permissionsSize + metadataSize)) return { version, @@ -74,7 +74,7 @@ export const getPackageComponents = (base64Str: string): PackageComponents => { }; }; -const editSingleQueryPackage = async (packageOPC: ArrayBuffer, queryMashupDoc: string): Promise => { +const editSingleQueryPackage = async (packageOPC: Uint8Array, queryMashupDoc: string): Promise => { const packageZip: JSZip = await JSZip.loadAsync(packageOPC); setSection1m(queryMashupDoc, packageZip); @@ -93,12 +93,12 @@ const setSection1m = (queryMashupDoc: string, zip: JSZip): void => { }; export const editSingleQueryMetadata = (metadataArray: Uint8Array, metadata: Metadata): Uint8Array => { - //extract metadataXml - const mashupArray: ArrayReader = new arrayUtils.ArrayReader(metadataArray.buffer); - const metadataVersion: Uint8Array = mashupArray.getBytes(4); - const metadataXmlSize: number = mashupArray.getInt32(); - const metadataXml: Uint8Array = mashupArray.getBytes(metadataXmlSize); - const endBuffer: Uint8Array = mashupArray.getBytes(); + + const dataView = new DataView(metadataArray.buffer, metadataArray.byteOffset, metadataArray.byteLength); + const metadataVersion = metadataArray.subarray(0, 4); + const metadataXmlSize = dataView.getInt32(4, true); + const metadataXml: Uint8Array = metadataArray.subarray(8, 8 + metadataXmlSize); + const endBuffer: Uint8Array = metadataArray.subarray(8+metadataXmlSize); //parse metdataXml const textDecoder: TextDecoder = new TextDecoder(); diff --git a/src/utils/pqUtils.ts b/src/utils/pqUtils.ts index f029fad..4610033 100644 --- a/src/utils/pqUtils.ts +++ b/src/utils/pqUtils.ts @@ -4,8 +4,8 @@ import JSZip from "jszip"; import { maxQueryLength, URLS, BOM, Errors } from "./constants"; import { generateMashupXMLTemplate, generateCustomXmlFilePath } from "../generators"; -import { Buffer } from "buffer"; import { DOMParser } from "xmldom-qsa"; +import arrayUtils from "./arrayUtils"; type CustomXmlFile = { found: boolean; @@ -21,7 +21,7 @@ const getBase64 = async (zip: JSZip): Promise => { const setBase64 = async (zip: JSZip, base64: string): Promise => { const newXml = generateMashupXMLTemplate(base64); - const encoded = Buffer.from(BOM + newXml, "ucs2"); + const encoded = arrayUtils.encodeStringToUCS2(BOM + newXml); const mashup = await getDataMashupFile(zip); zip.file(mashup?.path, encoded); }; @@ -64,16 +64,8 @@ const getCustomXmlFile = async (zip: JSZip, url: string): Promise if (xmlValue === undefined) { break; } + xmlString = arrayUtils.decodeXml(xmlValue); - const buffer: Buffer = Buffer.from(xmlValue); - const encoding: string | null = detectEncoding(xmlValue); - if (!encoding){ - throw new Error("Failed to detect xml encoding") - } - - xmlString = buffer - .toString(encoding as BufferEncoding) - .replace(/^\ufeff/, ""); const doc: Document = parser.parseFromString(xmlString, "text/xml"); found = doc?.documentElement?.namespaceURI === url; @@ -118,26 +110,6 @@ const validateQueryName = (queryName: string): void => { } }; -const detectEncoding = (xmlBytes: Uint8Array): string | null => { - if (!xmlBytes){ - return null; - } - - if (xmlBytes.length >= 3 && xmlBytes[0] === 0xEF && xmlBytes[1] === 0xBB && xmlBytes[2] === 0xBF) { - return 'utf-8'; - } - if (xmlBytes.length >= 3 && xmlBytes[0] === 0xFF && xmlBytes[1] === 0xFE) { - return 'utf-16le'; - } - - if (xmlBytes.length >= 3 && xmlBytes[0] === 0xFE && xmlBytes[1] === 0xFF) { - return 'utf-16be'; - } - - // Default to utf-8, that not required a BOM for encoding. - return 'utf-8'; -} - export default { getBase64, setBase64, diff --git a/tests/arrayUtils.test.ts b/tests/arrayUtils.test.ts index 0c878da..e3588db 100644 --- a/tests/arrayUtils.test.ts +++ b/tests/arrayUtils.test.ts @@ -3,32 +3,7 @@ import { describe, test, expect } from '@jest/globals'; import { arrayUtils } from "../src/utils/"; -import * as base64 from "base64-js"; -describe("ArrayReader tests", () => { - const buffer = base64.toByteArray("UHJhaXNlIFRoZSBTdW4h").buffer; - const arrReader = new arrayUtils.ArrayReader(buffer); - - test("getInt32 test", () => { - const int32 = arrReader.getInt32(); - - expect(int32).toEqual(1767993936); - expect((arrReader as any)._position).toEqual(4); - }); - - test("getBytes test", () => { - const bytes = arrReader.getBytes(4); - - expect(bytes).toEqual(new Uint8Array([115, 101, 32, 84])); - expect((arrReader as any)._position).toEqual(8); - }); - - test("reset test", () => { - arrReader.reset(); - - expect((arrReader as any)._position).toEqual(0); - }); -}); test("getInt32Buffer test", () => { const size = 4; @@ -55,3 +30,344 @@ test("concatArrays test", () => { expect(actual).toStrictEqual(expected); }); + +describe("base64ToUint8Array tests", () => { + test("basic base64 decoding", () => { + const base64 = "SGVsbG8gV29ybGQ="; // "Hello World" + const [result, dataView] = arrayUtils.base64ToUint8Array(base64); + const expected = new Uint8Array([72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100]); + + expect(result).toStrictEqual(expected); + expect(dataView).toBeInstanceOf(DataView); + expect(dataView.byteLength).toBe(expected.length); + }); + + test("base64 with single padding", () => { + const base64 = "SGVsbG8="; // "Hello" + const [result] = arrayUtils.base64ToUint8Array(base64); + const expected = new Uint8Array([72, 101, 108, 108, 111]); + + expect(result).toStrictEqual(expected); + }); + + test("base64 with double padding", () => { + const base64 = "QQ=="; // Single character "A" + const [result] = arrayUtils.base64ToUint8Array(base64); + const expected = new Uint8Array([65]); // "A" = 65 + + expect(result).toStrictEqual(expected); + }); + + test("base64 with no padding", () => { + const base64 = "SGVsbG8h"; // "Hello!" + const [result] = arrayUtils.base64ToUint8Array(base64); + const expected = new Uint8Array([72, 101, 108, 108, 111, 33]); + + expect(result).toStrictEqual(expected); + }); + + test("empty base64 string", () => { + const base64 = ""; + const [result] = arrayUtils.base64ToUint8Array(base64); + + expect(result).toStrictEqual(new Uint8Array(0)); + }); + + test("base64 with whitespace", () => { + const base64 = "SGVs bG8g V29y bGQ="; // "Hello World" with spaces + const [result] = arrayUtils.base64ToUint8Array(base64); + const expected = new Uint8Array([72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100]); + + expect(result).toStrictEqual(expected); + }); +}); + +describe("uint8ArrayToBase64 tests", () => { + test("basic uint8array to base64", () => { + const input = new Uint8Array([72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100]); // "Hello World" + const result = arrayUtils.uint8ArrayToBase64(input); + + expect(result).toBe("SGVsbG8gV29ybGQ="); + }); + + test("uint8array requiring single padding", () => { + const input = new Uint8Array([72, 101, 108, 108, 111]); // "Hello" + const result = arrayUtils.uint8ArrayToBase64(input); + + expect(result).toBe("SGVsbG8="); + }); + + test("uint8array requiring double padding", () => { + const input = new Uint8Array([65]); // Single byte "A" + const result = arrayUtils.uint8ArrayToBase64(input); + + expect(result).toBe("QQ=="); + }); + + test("uint8array with two bytes (single padding)", () => { + const input = new Uint8Array([72, 105]); // "Hi" + const result = arrayUtils.uint8ArrayToBase64(input); + + expect(result).toBe("SGk="); + }); + + test("uint8array with no padding needed", () => { + const input = new Uint8Array([72, 101, 108, 108, 111, 33]); // "Hello!" + const result = arrayUtils.uint8ArrayToBase64(input); + + expect(result).toBe("SGVsbG8h"); + }); + + test("empty uint8array", () => { + const input = new Uint8Array(0); + const result = arrayUtils.uint8ArrayToBase64(input); + + expect(result).toBe(""); + }); + + test("basic ASCII string", () => { + const input = "Hello"; + const result = arrayUtils.encodeStringToUCS2(input); + // H=72, e=101, l=108, l=108, o=111 in little-endian UCS-2 + const expected = new Uint8Array([72, 0, 101, 0, 108, 0, 108, 0, 111, 0]); + + expect(result).toStrictEqual(expected); + }); + + test("empty string", () => { + const input = ""; + const result = arrayUtils.encodeStringToUCS2(input); + + expect(result).toStrictEqual(new Uint8Array(0)); + }); + + test("string with unicode characters", () => { + const input = "A€"; // A=65, Euro=8364 + const result = arrayUtils.encodeStringToUCS2(input); + // A=65 (0x41), €=8364 (0x20AC) in little-endian + const expected = new Uint8Array([65, 0, 172, 32]); + + expect(result).toStrictEqual(expected); + }); + + test("string with high unicode character", () => { + const input = "🙂"; // Emoji - this will be encoded as surrogate pair + const result = arrayUtils.encodeStringToUCS2(input); + + expect(result).toHaveLength(4); // 2 characters * 2 bytes each (surrogate pair) + }); + + test("single character", () => { + const input = "X"; + const result = arrayUtils.encodeStringToUCS2(input); + const expected = new Uint8Array([88, 0]); // X=88 + + expect(result).toStrictEqual(expected); + }); +}); + +describe("base64 and uint8Array roundtrip tests", () => { + test("empty data roundtrip", () => { + const original = new Uint8Array(0); + const base64 = arrayUtils.uint8ArrayToBase64(original); + const [decoded] = arrayUtils.base64ToUint8Array(base64); + + expect(decoded).toStrictEqual(original); + expect(base64).toBe(""); + }); + + test("single byte roundtrip", () => { + const original = new Uint8Array([42]); + const base64 = arrayUtils.uint8ArrayToBase64(original); + const [decoded] = arrayUtils.base64ToUint8Array(base64); + + expect(decoded).toStrictEqual(original); + expect(base64.endsWith("==")).toBe(true); // Should have double padding + }); + + test("two bytes roundtrip", () => { + const original = new Uint8Array([42, 123]); + const base64 = arrayUtils.uint8ArrayToBase64(original); + const [decoded] = arrayUtils.base64ToUint8Array(base64); + + expect(decoded).toStrictEqual(original); + expect(base64.endsWith("=")).toBe(true); // Should have single padding + expect(base64.endsWith("==")).toBe(false); + }); + + test("three bytes roundtrip (no padding)", () => { + const original = new Uint8Array([42, 123, 200]); + const base64 = arrayUtils.uint8ArrayToBase64(original); + const [decoded] = arrayUtils.base64ToUint8Array(base64); + + expect(decoded).toStrictEqual(original); + expect(base64.includes("=")).toBe(false); // No padding needed + }); + + test("random data roundtrip", () => { + for (let size = 0; size < 100; size++) { + const original = new Uint8Array(size); + for (let i = 0; i < size; i++) { + original[i] = Math.floor(Math.random() * 256); + } + + const base64 = arrayUtils.uint8ArrayToBase64(original); + const [decoded, dataView] = arrayUtils.base64ToUint8Array(base64); + + // Verify roundtrip + expect(decoded).toStrictEqual(original); + // Verify DataView is properly constructed + expect(dataView.byteLength).toBe(original.length); + } + }); + + test("base64 with whitespace handling", () => { + const original = new Uint8Array([1, 2, 3, 4, 5]); + const base64Clean = arrayUtils.uint8ArrayToBase64(original); + const base64WithWhitespace = base64Clean.split('').join(' '); // Add spaces + + const [decoded] = arrayUtils.base64ToUint8Array(base64WithWhitespace); + expect(decoded).toStrictEqual(original); + }); + + test("base64 properties", () => { + const testCases = [ + new Uint8Array([]), + new Uint8Array([1]), + new Uint8Array([1, 2]), + new Uint8Array([1, 2, 3]), + new Uint8Array([1, 2, 3, 4]), + new Uint8Array([255, 255, 255]) + ]; + + testCases.forEach(original => { + const base64 = arrayUtils.uint8ArrayToBase64(original); + + // Base64 should only contain valid characters + expect(base64).toMatch(/^[A-Za-z0-9+/]*={0,2}$/); + + // Length should be correct (padded to multiple of 4) + expect(base64.length % 4).toBe(0); + + // Roundtrip should work + const [decoded] = arrayUtils.base64ToUint8Array(base64); + expect(decoded).toStrictEqual(original); + }); + }); +}); + +describe("encodeStringToUCS2 tests", () => { + test("empty string", () => { + const result = arrayUtils.encodeStringToUCS2(""); + expect(result).toStrictEqual(new Uint8Array(0)); + }); + + test("string length matches byte length", () => { + const testStrings = ["A", "AB", "Hello", "Hello World"]; + + testStrings.forEach(str => { + const result = arrayUtils.encodeStringToUCS2(str); + expect(result.length).toBe(str.length * 2); + }); + }); + + test("ASCII characters are encoded correctly", () => { + // Test that we can decode what we encode using built-in TextDecoder + const testStrings = ["A", "Hello", "Test123"]; + + testStrings.forEach(str => { + const encoded = arrayUtils.encodeStringToUCS2(str); + const decoded = new TextDecoder('utf-16le').decode(encoded); + expect(decoded).toBe(str); + }); + }); + + test("unicode characters are handled", () => { + const testStrings = ["€", "🙂", "café"]; + + testStrings.forEach(str => { + const encoded = arrayUtils.encodeStringToUCS2(str); + const decoded = new TextDecoder('utf-16le').decode(encoded); + expect(decoded).toBe(str); + }); + }); + + test("little-endian byte order", () => { + const result = arrayUtils.encodeStringToUCS2("A"); // A = 65 = 0x41 + expect(result).toStrictEqual(new Uint8Array([0x41, 0x00])); // Little-endian + }); +}); + +describe("decodeXml tests", () => { + const sampleXml = 'test'; + + test("UTF-8 without BOM", () => { + const xmlBytes = new TextEncoder().encode(sampleXml); + const result = arrayUtils.decodeXml(xmlBytes); + expect(result).toBe(sampleXml); + }); + + test("UTF-8 with BOM is stripped", () => { + const xmlBytes = new Uint8Array([0xEF, 0xBB, 0xBF, ...new TextEncoder().encode(sampleXml)]); + const result = arrayUtils.decodeXml(xmlBytes); + expect(result).toBe(sampleXml); + expect(result.charCodeAt(0)).not.toBe(0xFEFF); // BOM should be removed + }); + + test("UTF-16LE with BOM", () => { + // Create UTF-16LE with BOM manually + const utf16Bytes = new Uint8Array(2 + sampleXml.length * 2); + utf16Bytes[0] = 0xFF; // BOM + utf16Bytes[1] = 0xFE; + + // Encode each character as little-endian UTF-16 + for (let i = 0; i < sampleXml.length; i++) { + const code = sampleXml.charCodeAt(i); + utf16Bytes[2 + i * 2] = code & 0xff; + utf16Bytes[2 + i * 2 + 1] = code >> 8; + } + + const result = arrayUtils.decodeXml(utf16Bytes); + expect(result).toBe(sampleXml); + }); + + test("UTF-16BE with BOM", () => { + // Create UTF-16BE with BOM manually + const utf16Bytes = new Uint8Array(2 + sampleXml.length * 2); + utf16Bytes[0] = 0xFE; // BOM + utf16Bytes[1] = 0xFF; + + // Encode each character as big-endian UTF-16 + for (let i = 0; i < sampleXml.length; i++) { + const code = sampleXml.charCodeAt(i); + utf16Bytes[2 + i * 2] = code >> 8; + utf16Bytes[2 + i * 2 + 1] = code & 0xff; + } + + const result = arrayUtils.decodeXml(utf16Bytes); + expect(result).toBe(sampleXml); + }); + + test("empty bytes throws error", () => { + expect(() => arrayUtils.decodeXml(new Uint8Array(0))).toThrow("Failed to detect xml encoding"); + }); + + test("invalid UTF-16BE throws error", () => { + const invalidBytes = new Uint8Array([0xFE, 0xFF, 0x00]); // BOM + odd length + expect(() => arrayUtils.decodeXml(invalidBytes)).toThrow("Invalid UTF-16BE byte array"); + }); + + test("encoding detection works correctly", () => { + // Test that different BOMs are detected and handled + const testCases = [ + { bytes: new Uint8Array([0xEF, 0xBB, 0xBF, 65]), name: "UTF-8 BOM" }, + { bytes: new Uint8Array([0xFF, 0xFE, 65, 0]), name: "UTF-16LE BOM" }, + { bytes: new Uint8Array([0xFE, 0xFF, 0, 65]), name: "UTF-16BE BOM" }, + { bytes: new Uint8Array([65]), name: "No BOM (defaults to UTF-8)" } + ]; + + testCases.forEach(({ bytes }) => { + expect(() => arrayUtils.decodeXml(bytes)).not.toThrow(); + }); + }); +}); \ No newline at end of file diff --git a/tests/mashupDocumentParser.test.ts b/tests/mashupDocumentParser.test.ts index b36d7f1..e7d7d6f 100644 --- a/tests/mashupDocumentParser.test.ts +++ b/tests/mashupDocumentParser.test.ts @@ -5,7 +5,6 @@ import { TextDecoder, TextEncoder } from "util"; import { replaceSingleQuery, getPackageComponents, editSingleQueryMetadata } from "../src/utils/mashupDocumentParser"; import { arrayUtils, pqUtils } from "../src/utils"; import { section1mNewQueryNameSimpleMock, pqMetadataXmlMockPart1, pqMetadataXmlMockPart2 } from "./mocks"; -import base64 from "base64-js"; import JSZip from "jszip"; import { SIMPLE_QUERY_WORKBOOK_TEMPLATE } from "../src/workbookTemplate"; import { section1mPath } from "../src/utils/constants"; @@ -23,11 +22,10 @@ describe("Mashup Document Parser tests", () => { if (originalBase64Str) { const replacedQueryBase64Str = await replaceSingleQuery(originalBase64Str, "newQueryName", section1mNewQueryNameSimpleMock); - const buffer = base64.toByteArray(replacedQueryBase64Str).buffer; - const mashupArray = new arrayUtils.ArrayReader(buffer); - const startArray = mashupArray.getBytes(4); - const packageSize = mashupArray.getInt32(); - const packageOPC = mashupArray.getBytes(packageSize); + const [buffer,dataView] = arrayUtils.base64ToUint8Array(replacedQueryBase64Str); + + const packageSize = dataView.getInt32(4, true); + const packageOPC = buffer.subarray(8, 8 + packageSize); const zip = await JSZip.loadAsync(packageOPC); const section1m = await zip.file(section1mPath)?.async("text"); if (section1m) { @@ -42,7 +40,7 @@ describe("Mashup Document Parser tests", () => { const originalBase64Str = await pqUtils.getBase64(defaultZipFile); if (originalBase64Str) { const { metadata } = getPackageComponents(originalBase64Str); - const newMetadataArray = editSingleQueryMetadata(metadata, { queryName: "newQueryName" }); + const newMetadataArray = editSingleQueryMetadata(metadata as Uint8Array, { queryName: "newQueryName" }); const metadataString = new util.TextDecoder("utf-8").decode(newMetadataArray); expect(metadataString.replace(/ /g, "")).toContain(pqMetadataXmlMockPart1.replace(/ /g, "")); expect(metadataString.replace(/ /g, "")).toContain(pqMetadataXmlMockPart2.replace(/ /g, ""));