diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json index 434c8bb619..f6b5896b8c 100644 --- a/src/core/config/Categories.json +++ b/src/core/config/Categories.json @@ -33,6 +33,8 @@ "Show Base64 offsets", "To Base92", "From Base92", + "To Base91", + "From Base91", "To Base85", "From Base85", "To Base", @@ -128,6 +130,8 @@ "From Morse Code", "Bacon Cipher Encode", "Bacon Cipher Decode", + "Dancing Men Encode", + "Dancing Men Decode", "Bifid Cipher Encode", "Bifid Cipher Decode", "Caesar Box Cipher", diff --git a/src/core/lib/Base91.mjs b/src/core/lib/Base91.mjs new file mode 100644 index 0000000000..da9e296f4f --- /dev/null +++ b/src/core/lib/Base91.mjs @@ -0,0 +1,125 @@ +/** + * Base91 resources. + * + * Based on the original basE91 algorithm by Joachim Henke + * http://base91.sourceforge.net/ + * + * @author CyberChef Base91 Implementation + * @copyright Crown Copyright 2024 + * @license Apache-2.0 + * @modified-by Izai Alejandro Zalles Merino (ialejandrozalles) + * @modified-date 2025-10-01 + * © 2025 Izai Alejandro Zalles Merino + + */ + +import OperationError from "../errors/OperationError.mjs"; + +/** + * Base91 alphabet - 91 printable ASCII characters + */ +const BASE91_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!#$%&()*+,./:;<=>?@[]^_`{|}~\""; + +/** + * Decode table for Base91 + */ +const BASE91_DECODE_TABLE = new Array(256).fill(-1); +for (let i = 0; i < BASE91_ALPHABET.length; i++) { + BASE91_DECODE_TABLE[BASE91_ALPHABET.charCodeAt(i)] = i; +} + +/** + * Encode bytes to Base91 + * + * @param {Uint8Array} data - Input byte array + * @returns {string} Base91 encoded string + * @modified-by Izai Alejandro Zalles Merino (ialejandrozalles) + + */ +export function encodeBase91(data) { + let accumulator = 0; + let accumulatorBits = 0; + let output = ""; + + for (let i = 0; i < data.length; i++) { + accumulator |= data[i] << accumulatorBits; + accumulatorBits += 8; + + if (accumulatorBits > 13) { + let value = accumulator & 8191; + + if (value > 88) { + accumulator >>= 13; + accumulatorBits -= 13; + } else { + value = accumulator & 16383; + accumulator >>= 14; + accumulatorBits -= 14; + } + + output += BASE91_ALPHABET[value % 91] + BASE91_ALPHABET[Math.floor(value / 91)]; + } + } + + if (accumulatorBits > 0) { + output += BASE91_ALPHABET[accumulator % 91]; + + if (accumulatorBits > 7 || accumulator > 90) { + output += BASE91_ALPHABET[Math.floor(accumulator / 91)]; + } + } + + return output; +} + +/** + * Decode Base91 string to bytes + * + * @param {string} str - Base91 encoded string + * @returns {Uint8Array} Decoded byte array + * @modified-by Izai Alejandro Zalles Merino (ialejandrozalles) + + */ +export function decodeBase91(str) { + let accumulator = 0; + let accumulatorBits = 0; + let value = -1; + const output = []; + + for (let i = 0; i < str.length; i++) { + const charCode = str.charCodeAt(i); + const decodeValue = BASE91_DECODE_TABLE[charCode]; + + if (decodeValue === -1) { + throw new OperationError(`Invalid Base91 character: ${str[i]}`); + } + + if (value === -1) { + value = decodeValue; + } else { + value += decodeValue * 91; + accumulator |= (value << accumulatorBits); + + if (value > 88) { + accumulatorBits += 13; + } else { + accumulatorBits += 14; + } + + value = -1; + + while (accumulatorBits > 7) { + output.push(accumulator & 255); + accumulator >>= 8; + accumulatorBits -= 8; + } + } + } + + if (value !== -1) { + accumulator |= value << accumulatorBits; + output.push(accumulator & 255); + } + + return new Uint8Array(output); +} diff --git a/src/core/operations/DancingMenDecode.mjs b/src/core/operations/DancingMenDecode.mjs new file mode 100644 index 0000000000..258694c671 --- /dev/null +++ b/src/core/operations/DancingMenDecode.mjs @@ -0,0 +1,76 @@ +/** + * @author Agent Mode + * @license Apache-2.0 + */ + +import Operation from "../Operation.mjs"; + +/** + * Dancing Men Decode operation + * + * Decodes textual Dancing Men tokens like char(97)..char(122) back to letters a-z. + * If a token is suffixed with '!' (flag), it can be interpreted as a word separator. + */ +class DancingMenDecode extends Operation { + + /** + * DancingMenDecode constructor + */ + constructor() { + super(); + + this.name = "Dancing Men Decode"; + this.module = "Ciphers"; + this.description = "Decode Dancing Men token format (char(97)..char(122), optional ! for flags) back to text."; + this.infoURL = "https://www.dcode.fr/dancing-men-cipher"; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + name: "Flags indicate spaces", + type: "boolean", + value: false + } + ]; + // Magic detection: sequence of 3+ char(ddd) tokens optionally with trailing '!' + this.checks = [ + { + pattern: "^(?:\\s*char\\(\\d{2,3}\\)!?\\s*){3,}$", + args: [false], + useful: true + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const [flagsAsSpaces] = args; + const tokenRe = /char\((\d{2,3})\)(!?)/g; + let out = ""; + let lastIndex = 0; + let m; + while ((m = tokenRe.exec(input)) !== null) { + // Append any intermediary non-token text unchanged + if (m.index > lastIndex) { + out += input.slice(lastIndex, m.index); + } + const code = parseInt(m[1], 10); + let ch = ""; + if (code >= 97 && code <= 122) ch = String.fromCharCode(code); + else if (code >= 65 && code <= 90) ch = String.fromCharCode(code).toLowerCase(); + else ch = ""; // Unknown token range -> drop + out += ch; + if (flagsAsSpaces && m[2] === "!") out += " "; + lastIndex = tokenRe.lastIndex; + } + // Append any remainder + if (lastIndex < input.length) out += input.slice(lastIndex); + return out; + } +} + +export default DancingMenDecode; diff --git a/src/core/operations/DancingMenEncode.mjs b/src/core/operations/DancingMenEncode.mjs new file mode 100644 index 0000000000..b5346adb76 --- /dev/null +++ b/src/core/operations/DancingMenEncode.mjs @@ -0,0 +1,97 @@ +/** + * @author Agent Mode + * @license Apache-2.0 + */ + +import Operation from "../Operation.mjs"; + +/** + * Dancing Men Encode operation + * + * Encodes Latin letters a-z into textual Dancing Men tokens of the form char(97)..char(122). + * Optionally, spaces can be represented by a flag marker appended to the previous token ("!") + * to mimic the word-separator flag described in Conan Doyle's short story. + */ +class DancingMenEncode extends Operation { + + /** + * DancingMenEncode constructor + */ + constructor() { + super(); + + this.name = "Dancing Men Encode"; + this.module = "Ciphers"; + this.description = "Encode plaintext to Dancing Men token format using tokens like char(97)..char(122). Optionally mark word boundaries with a flag (!)."; + this.infoURL = "https://www.dcode.fr/dancing-men-cipher"; + this.inputType = "string"; + this.outputType = "string"; + this.args = [ + { + name: "Use flags as word separators", + type: "boolean", + value: false + }, + { + name: "Separator between tokens", + type: "option", + value: ["Space", "None"], + defaultIndex: 0 + } + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const [useFlags, sepChoice] = args; + const sep = sepChoice === "None" ? "" : " "; + + const out = []; + let prevIdx = -1; + + for (let i = 0; i < input.length; i++) { + const ch = input[i]; + const code = ch.toLowerCase().charCodeAt(0); + if (code >= 97 && code <= 122) { + out.push(`char(${code})`); + prevIdx = out.length - 1; + } else if (ch === " ") { + if (useFlags && prevIdx >= 0) { + // Append a flag marker to the previous token to denote word boundary + out[prevIdx] = out[prevIdx] + "!"; + } else { + // Represent space explicitly in the stream + out.push(" "); + prevIdx = -1; + } + } else if (ch === "\n" || ch === "\r" || ch === "\t") { + out.push(ch); + prevIdx = -1; + } else { + // Pass-through other characters as-is + out.push(ch); + prevIdx = -1; + } + } + + // Join but preserve already injected spaces/newlines + // We only join char(...) tokens using the chosen separator + // Build final by inserting sep between adjacent char(...) tokens (and their optional !) + const tokens = []; + for (let i = 0; i < out.length; i++) { + const cur = out[i]; + tokens.push(cur); + const curIsToken = /^char\(\d{2,3}\)!?$/.test(cur); + const next = out[i + 1]; + const nextIsToken = typeof next === "string" && /^char\(\d{2,3}\)!?$/.test(next); + if (sep && curIsToken && nextIsToken) tokens.push(sep); + } + return tokens.join(""); + } +} + +export default DancingMenEncode; diff --git a/src/core/operations/FromBase91.mjs b/src/core/operations/FromBase91.mjs new file mode 100644 index 0000000000..e9a24c307f --- /dev/null +++ b/src/core/operations/FromBase91.mjs @@ -0,0 +1,51 @@ +/** + * @author Izai Alejandro Zalles Merino (ialejandrozalles) + * @copyright © 2025 Izai Alejandro Zalles Merino + * @license Apache-2.0 + */ +import { decodeBase91 } from "../lib/Base91.mjs"; +import Operation from "../Operation.mjs"; + +/** + * From Base91 operation + */ +class FromBase91 extends Operation { + /** + * FromBase91 constructor + */ + constructor() { + super(); + + this.name = "From Base91"; + this.module = "Default"; + this.description = "Base91 is a binary-to-text encoding scheme that uses 91 printable ASCII characters. It provides better space efficiency than Base64 while maintaining readability. This operation decodes Base91-encoded text back to its original binary data."; + this.infoURL = "https://en.wikipedia.org/wiki/Binary-to-text_encoding#Encoding_standards"; + this.inputType = "string"; + this.outputType = "ArrayBuffer"; + /* eslint-disable no-useless-escape */ + this.checks = [ + { + pattern: + "^" + + "[\\s0-9A-Za-z!#$%&()*+,./:;<=>?@\\\[\\\]\\^_`{|}~\"]*" + + "[0-9A-Za-z!#$%&()*+,./:;<=>?@\\\[\\\]\\^_`{|}~\"]{15}" + + "[\\s0-9A-Za-z!#$%&()*+,./:;<=>?@\\\[\\\]\\^_`{|}~\"]*" + + "$", + args: [] + } + /* eslint-enable no-useless-escape */ + ]; + } + + /** + * @param {string} input + * @param {Object[]} args + * @returns {ArrayBuffer} + */ + run(input, args) { + const decoded = decodeBase91(input); + return decoded.buffer.slice(decoded.byteOffset, decoded.byteOffset + decoded.byteLength); + } +} + +export default FromBase91; diff --git a/src/core/operations/FromMorseCode.mjs b/src/core/operations/FromMorseCode.mjs index b0aa4ef289..8dacfb4aae 100644 --- a/src/core/operations/FromMorseCode.mjs +++ b/src/core/operations/FromMorseCode.mjs @@ -42,8 +42,20 @@ class FromMorseCode extends Operation { pattern: "(?:^[-. \\n]{5,}$|^[_. \\n]{5,}$|^(?:dash|dot| |\\n){5,}$)", flags: "i", args: ["Space", "Line feed"] - } - ]; + }, + { + pattern: "^(?=.*/)[-./ \n]{5,}$", + args: ["Space", "Forward slash"] + }, + { + pattern: "^(?=.*/)[_. /\n]{5,}$", + args: ["Space", "Forward slash"] + }, + { + pattern: "^(?=.*/)(?:dash|dot| |/|\n){5,}$", + flags: "i", + args: ["Space", "Forward slash"] + }]; } /** diff --git a/src/core/operations/OpticalCharacterRecognition.mjs b/src/core/operations/OpticalCharacterRecognition.mjs index dfcff96546..4be7d41cf7 100644 --- a/src/core/operations/OpticalCharacterRecognition.mjs +++ b/src/core/operations/OpticalCharacterRecognition.mjs @@ -15,6 +15,7 @@ import { isWorkerEnvironment } from "../Utils.mjs"; import { createWorker } from "tesseract.js"; const OEM_MODES = ["Tesseract only", "LSTM only", "Tesseract/LSTM Combined"]; +const OCR_DEFAULT_WHITELIST = ""; /** * Optical Character Recognition operation @@ -34,6 +35,11 @@ class OpticalCharacterRecognition extends Operation { this.inputType = "ArrayBuffer"; this.outputType = "string"; this.args = [ + { + name: "Character whitelist (optional)", + type: "string", + value: OCR_DEFAULT_WHITELIST + }, { name: "Show confidence", type: "boolean", @@ -44,6 +50,12 @@ class OpticalCharacterRecognition extends Operation { type: "option", value: OEM_MODES, defaultIndex: 1 + }, + // New option appended to avoid breaking existing saved recipes + { + name: "Remove line breaks", + type: "boolean", + value: false } ]; } @@ -54,7 +66,7 @@ class OpticalCharacterRecognition extends Operation { * @returns {string} */ async run(input, args) { - const [showConfidence, oemChoice] = args; + const [showConfidence, oemChoice, removeLineBreaks = false] = args; if (!isWorkerEnvironment()) throw new OperationError("This operation only works in a browser"); @@ -75,17 +87,25 @@ class OpticalCharacterRecognition extends Operation { corePath: `${assetDir}tesseract/tesseract-core.wasm.js`, logger: progress => { if (isWorkerEnvironment()) { - self.sendStatusMessage(`Status: ${progress.status}${progress.status === "recognizing text" ? ` - ${(parseFloat(progress.progress)*100).toFixed(2)}%`: "" }`); + self.sendStatusMessage(`Status: ${progress.status}${progress.status === "recognizing text" ? ` - ${(parseFloat(progress.progress) * 100).toFixed(2)}%` : ""}`); } } }); + self.sendStatusMessage("Configuring OCR parameters..."); + if (whitelist && whitelist.length) { + await worker.setParameters({ /* eslint-disable camelcase */ tessedit_char_whitelist: whitelist /* eslint-enable camelcase */ }); + } self.sendStatusMessage("Finding text..."); const result = await worker.recognize(image); + let text = result?.data?.text ?? ""; + if (removeLineBreaks) { + text = text.replace(/\r?\n/g, ""); + } if (showConfidence) { - return `Confidence: ${result.data.confidence}%\n\n${result.data.text}`; + return `Confidence: ${result.data.confidence}%\n\n${text}`; } else { - return result.data.text; + return text; } } catch (err) { throw new OperationError(`Error performing OCR on image. (${err})`); diff --git a/src/core/operations/RenderImage.mjs b/src/core/operations/RenderImage.mjs index 5dee6d3c88..61b57de61c 100644 --- a/src/core/operations/RenderImage.mjs +++ b/src/core/operations/RenderImage.mjs @@ -8,7 +8,7 @@ import { fromBase64, toBase64 } from "../lib/Base64.mjs"; import { fromHex } from "../lib/Hex.mjs"; import Operation from "../Operation.mjs"; import OperationError from "../errors/OperationError.mjs"; -import Utils from "../Utils.mjs"; +import Utils, { isWorkerEnvironment } from "../Utils.mjs"; import {isImage} from "../lib/FileType.mjs"; /** @@ -104,7 +104,12 @@ class RenderImage extends Operation { // Add image data to URI dataURI += "base64," + toBase64(data); - return ""; + let html = ""; + if (isWorkerEnvironment()) { + const ocrLink = "#recipe=Optical_Character_Recognition('Show confidence',true,'OCR Engine Mode','LSTM only')"; + html = "
" + html + "
"; + } + return html; } } diff --git a/src/core/operations/ToBase91.mjs b/src/core/operations/ToBase91.mjs new file mode 100644 index 0000000000..88fa5cfb5b --- /dev/null +++ b/src/core/operations/ToBase91.mjs @@ -0,0 +1,39 @@ +/** + * @author Izai Alejandro Zalles Merino (ialejandrozalles) + * @copyright © 2025 Izai Alejandro Zalles Merino + * @license Apache-2.0 + */ + +import { encodeBase91 } from "../lib/Base91.mjs"; +import Operation from "../Operation.mjs"; + +/** + * To Base91 operation + */ +class ToBase91 extends Operation { + /** + * ToBase91 constructor + */ + constructor() { + super(); + + this.name = "To Base91"; + this.module = "Default"; + this.description = "Base91 is a binary-to-text encoding scheme that uses 91 printable ASCII characters. It provides better space efficiency than Base64 while maintaining readability. Base91 encodes arbitrary binary data using characters A-Z, a-z, 0-9, and various symbols (excluding hyphen, backslash, and single quote)."; + this.infoURL = "https://en.wikipedia.org/wiki/Binary-to-text_encoding#Encoding_standards"; + this.inputType = "ArrayBuffer"; + this.outputType = "string"; + } + + /** + * @param {ArrayBuffer} input + * @param {Object[]} args + * @returns {string} + */ + run(input, args) { + const data = new Uint8Array(input); + return encodeBase91(data); + } +} + +export default ToBase91; diff --git a/src/web/waiters/InputWaiter.mjs b/src/web/waiters/InputWaiter.mjs index d32ed9d150..179c7c6565 100644 --- a/src/web/waiters/InputWaiter.mjs +++ b/src/web/waiters/InputWaiter.mjs @@ -1665,7 +1665,12 @@ class InputWaiter { */ handlePostMessage(e) { log.debug(e); - if ("data" in e && "id" in e.data && "value" in e.data) { + // Guard against non-object events (e.g., HMR messages may set e.data to a string like 'webpackHotUpdate...') + if ( + e && typeof e === "object" && + "data" in e && e.data && typeof e.data === "object" && + "id" in e.data && "value" in e.data + ) { if (e.data.id === "setInput") { this.setInput(e.data.value); } diff --git a/tests/node/tests/nodeApi.mjs b/tests/node/tests/nodeApi.mjs index 29a47ffc8d..2aa1c5fec0 100644 --- a/tests/node/tests/nodeApi.mjs +++ b/tests/node/tests/nodeApi.mjs @@ -136,7 +136,7 @@ TestRegister.addApiTests([ it("chef.help: returns multiple results", () => { const result = chef.help("base 64"); - assert.strictEqual(result.length, 13); + assert.strictEqual(result.length, 15); }), it("chef.help: looks in description for matches too", () => { diff --git a/tests/operations/index.mjs b/tests/operations/index.mjs index f147e9e7c7..9b25fb2f3f 100644 --- a/tests/operations/index.mjs +++ b/tests/operations/index.mjs @@ -24,6 +24,7 @@ import "./tests/Base58.mjs"; import "./tests/Base62.mjs"; import "./tests/Base64.mjs"; import "./tests/Base85.mjs"; +import "./tests/Base91.mjs"; import "./tests/Base92.mjs"; import "./tests/BCD.mjs"; import "./tests/BitwiseOp.mjs"; diff --git a/tests/operations/tests/Base91.mjs b/tests/operations/tests/Base91.mjs new file mode 100644 index 0000000000..f75d2b96ac --- /dev/null +++ b/tests/operations/tests/Base91.mjs @@ -0,0 +1,35 @@ +/** + * Base91 tests + * + * @author ialejandrozalles + * @license Apache-2.0 + */ +import TestRegister from "../../lib/TestRegister.mjs"; + +TestRegister.addTests([ + { + name: "To Base91", + input: "hello", + expectedOutput: "TPwJh>A", + recipeConfig: [ + { op: "To Base91", args: [] } + ] + }, + { + name: "From Base91", + input: "TPwJh>A", + expectedOutput: "hello", + recipeConfig: [ + { op: "From Base91", args: [] } + ] + }, + { + name: "To/From Base91 Roundtrip", + input: "The quick brown fox jumps over the lazy dog", + expectedOutput: "The quick brown fox jumps over the lazy dog", + recipeConfig: [ + { op: "To Base91", args: [] }, + { op: "From Base91", args: [] } + ] + } +]); diff --git a/tests/operations/tests/Magic.mjs b/tests/operations/tests/Magic.mjs index 90401dc19c..b8aa5e2c0e 100644 --- a/tests/operations/tests/Magic.mjs +++ b/tests/operations/tests/Magic.mjs @@ -152,5 +152,16 @@ TestRegister.addTests([ args: [1, false, false] } ] + }, + { + name: "Magic Chain: Base91", + input: "xD7ghoHB4!#/Tm_ogr$J9[JTrUJ*|jgSr!=EWoFB", + expectedMatch: /From_Base91\(\)/, + recipeConfig: [ + { + op: "Magic", + args: [3, false, false] + } + ], } ]);