From 3e837715df395286ea564d4ccde60c537a4956d1 Mon Sep 17 00:00:00 2001 From: "Ashlynn \"Void Voxel\" Juniper" <160202125+voidvoxel@users.noreply.github.com> Date: Fri, 19 Apr 2024 20:21:17 -0400 Subject: [PATCH 01/14] Port the AE from `@voidvoxel/auto-encoder` --- src/auto-encoder.ts | 571 ++++++++++++++++++++++++++++++++++++++++++++ src/index.ts | 2 + 2 files changed, 573 insertions(+) create mode 100644 src/auto-encoder.ts diff --git a/src/auto-encoder.ts b/src/auto-encoder.ts new file mode 100644 index 00000000..78d60f21 --- /dev/null +++ b/src/auto-encoder.ts @@ -0,0 +1,571 @@ +import { INeuralNetworkData, INeuralNetworkTrainOptions } from "./neural-network"; +import { INeuralNetworkGPUOptions, NeuralNetworkGPU } from "./neural-network-gpu"; + +import { ITrainingDatum } from "./lookup"; +import { IFeedForwardTrainingData } from "./feed-forward"; + + +/** + * @typedef {Object} AETrainOptions + * @property {number} errorThresh + * Once the training error reaches `errorThresh`, training will be complete. + * @property {number} iterations + * Once the training epoch count reaches `iterations`, training will be + * complete. + * @property {number} learningRate + * The rate at which values will be changed. + */ + +interface AETrainOptions extends INeuralNetworkTrainOptions {}; + +/** + * @typedef {import('brain.js/dist/lookup').ITrainingDatum[]} ITrainingData + */ + +type ITrainingData = ITrainingDatum[]; + +/** + * + * @param {string} word + * The word to convert into a vector. + * @param {number} wordLength + * The maximum possible length of a word. + * @returns {Float32Array} + */ +function word2vec ( + word: string, + wordLength: number = 16 +) { + if (wordLength) { + word = word.padEnd(wordLength); + } + + const byteLength = wordLength * 4; + const bitLength = byteLength * 8; + + const vec = new Float32Array(bitLength); + + let index = 0; + + for (let char of word) { + let byte = char.charCodeAt(0); + + vec[index++] = byte & 0b0000_0001; + vec[index++] = (byte & 0b0000_0010) >> 1; + vec[index++] = (byte & 0b0000_0100) >> 2; + vec[index++] = (byte & 0b0000_1000) >> 3; + vec[index++] = (byte & 0b0001_0000) >> 4; + vec[index++] = (byte & 0b0010_0000) >> 5; + vec[index++] = (byte & 0b0100_0000) >> 6; + vec[index++] = (byte & 0b1000_0000) >> 7; + } + + return vec; +} + +/** + * Convert a vector of bits into a word. + * @param {number[]} vec The vector of bits to convert into a word. + * @returns {string} The decoded word. + */ +function vec2word ( + vec: number[] +) { + const bytes = []; + + for ( + let vecIndex = 0; + vecIndex < vec.length; + vecIndex += 8 + ) { + let byte = 0x00; + + for ( + let localBitIndex = 0; + localBitIndex < 8; + localBitIndex++ + ) { + const bitIndex = vecIndex + localBitIndex; + const predictedBit = vec[bitIndex]; + + const bit = Math.round(predictedBit); + + byte |= bit << localBitIndex; + } + + bytes.push(byte); + } + + let word = String.fromCharCode(...bytes).trim(); + + return word; +} + +/** + * @typedef {DataType[] | string} AutoDecodedData + */ + +type AutoDecodedData + = DataType[] + | boolean[] + | number[] + | string + ; + +/** + * @typedef {Float32Array} AutoEncodedData + */ + +type AutoEncodedData = Float32Array; + +/** + * @typedef {"boolean"|"number"|"string"} DataType + */ + +type DataType = boolean | number | string; + +/** + * @typedef {Object} AE + */ + +/** + * An Auto Encoder (AE) is a type of neural network consisting of two + * subnetworks: an encoder, and a decoder. + * The encoder is responsible for converting the input into a smaller + * representation via feature extraction. + * The decoder is responsible for reconstructing the original input from a + * vector of extracted features. + * + * Example usage: + * ``` + * const ae = new AE(10, 1, 'string'); + * + * ae.train(["this", "is", "an", "example"]); + * + * const encoded = ae.encode("example"); + * const decoded = ae.decode(encoded); + * + * console.log(encoded, '->', decoded); + * ``` + */ +export class AE< + InputType extends INeuralNetworkData, + OutputType extends INeuralNetworkData +> { + _dataType: DataType; + _encodedDataSize: number; + _transcodedDataSize: number; + _decodedDataSize: number; + encoder: NeuralNetworkGPU; + decoder: NeuralNetworkGPU; + + /** + * Create a new auto encoder. + * @param {number} decodedDataSize + * The size of the data prior to encoding, and after decoding. + * @param {number} encodedDataSize + * The size of the data after encoding, and prior to decoding. + * @param {DataType} dataType + * The type of data to encode. + */ + constructor ( + decodedDataSize: number, + encodedDataSize: number, + dataType: DataType = 'number' + ) { + const transcodedDataSize = Math.round( + (encodedDataSize + decodedDataSize) * 0.5 + ); + + /** + * @type {DataType} + */ + this._dataType = dataType; + + /** + * @type {number} + */ + this._encodedDataSize = encodedDataSize; + + /** + * @type {number} + */ + this._transcodedDataSize = transcodedDataSize; + + /** + * @type {number} + */ + this._decodedDataSize = decodedDataSize; + + /** + * @type {NeuralNetworkGPU} + */ + this.encoder = new NeuralNetworkGPU( + { + hiddenLayers: [ + this._getTranscodedDataSize(), + this._getEncodedDataSize(), + this._getTranscodedDataSize() + ], + inputSize: this._getDecodedDataSize(), + outputSize: this._getDecodedDataSize() + } + ); + + /** + * @type {NeuralNetworkGPU} + */ + this.decoder = new NeuralNetworkGPU( + { + hiddenLayers: [ this._getTranscodedDataSize() ], + inputSize: this._getEncodedDataSize(), + outputSize: this._getDecodedDataSize() + } + ); + } + + /** + * Parse a stringified `AE`. + * @param {string} jsonString + * A JSON string containing a stringified `AE`. + * @returns + */ + static parse ( + jsonString: string + ) { + const json = JSON.parse(jsonString); + + const autoEncoder = new AE( + json.decodedDataSize, + json.encodedDataSize, + json.dataType + ); + + autoEncoder.fromJSON(json); + + return autoEncoder; + } + + _accuracy ( + input: Array + ) { + const encoded = this.encode(input); + const decoded = this.decode(encoded); + + let accuracy = 0; + + for ( + let i = 0; + i < decoded.length; + i++ + ) { + const inputValue = input[i]; + // TODO: Support types other than 'number' here. + const decodedValue = Math.round(decoded[i] as number); + + const isCorrect = inputValue === decodedValue; + + if (isCorrect) { + accuracy += 1; + } + } + + accuracy /= decoded.length; + + return accuracy; + } + + accuracy ( + trainingData: DataType[] | DataType[][] + ) { + if ( + !trainingData.hasOwnProperty('length') || + typeof trainingData[0] !== 'object' + ) { + return this._accuracy(trainingData as DataType[]); + } + + trainingData = trainingData as DataType[][]; + + let accuracy = 0; + + for (let input of trainingData) { + accuracy += this._accuracy(input); + } + + accuracy /= trainingData.length; + + return accuracy; + } + + /** + * Decode encoded data. + * @param {Float32Array} encodedData The encoded data to decode. + * @returns {boolean[]|number[]|string} The decoded data. + */ + decode (encodedData: Float32Array) { + let decodedDataObject = this.decoder.run(encodedData as InputType); + + let decodedData: DataType[] | string = []; + + for (let extract in decodedDataObject) { + const i = extract as unknown as number; + decodedData[i] = (decodedDataObject as number[])[i]; + + if (this._dataType === 'boolean') { + decodedData[i] = (decodedData[i] as number) >= 0.5; + } + } + + if (this._dataType === 'string') { + decodedData = vec2word(decodedData as number[]); + decodedData = decodedData.substring(0, decodedData.indexOf(' ')); + } + + return decodedData; + } + + /** + * Encode data. + * @param {AutoDecodedData} data + * The data to encode. + * @returns {AutoEncodedData} + */ + encode (data: AutoDecodedData) { + let encoderInput: Float32Array | AutoDecodedData = data; + + if (this._dataType === 'string') { + const dataString = data as string; + + if (dataString.length < this._getWordSize()) { + dataString.padEnd(this._getWordSize()); + } + + encoderInput = word2vec( + dataString, + this._getWordSize() + ); + } + + this.encoder.run(encoderInput as InputType); + + const encodedDataLayer = this.encoder.outputs[2]; + + let encodedData = encodedDataLayer.toArray(); + + return encodedData; + } + + /** + * Load this `AE`'s data from JSON. + * @param {any} json JSON representation of an `AE`. + */ + fromJSON (json: any) { + if (typeof json === 'string') json = JSON.parse(json); + + this._decodedDataSize = json.decodedDataSize; + this._transcodedDataSize = json.transcodedDataSize; + this._encodedDataSize = json.encodedDataSize; + + this.encoder.fromJSON(json.encoder); + this.decoder.fromJSON(json.decoder); + } + + /** + * Predict the decoded output of a given input data. + * @param {AutoDecodedData} input + * The input to predict the decoded output of. + * @returns + */ + run (input: AutoDecodedData) { + return this.decode(this.encode(input)); + } + + /** + * Stringify this `AE`. + * @returns {string} + * A JSON `string` containing this `AE`. + */ + stringify () { + return JSON.stringify(this.toJSON()); + } + + /** + * + * @returns {object} + * An object suitable for passing to `JSON.stringify()`. + */ + toJSON () { + return { + encoder: this.encoder.toJSON(), + decoder: this.decoder.toJSON() + }; + } + + /** + * Train the auto encoder on a training data set. + * @param {ITrainingData} data + * The data set to train the neural networks on. + * @param {AETrainOptions} options + * The options to pass to the neural network trainers. + */ + train ( + data: ITrainingData, + options: Partial = {} + ) { + this._trainEncoder(data, options); + this._trainDecoder(data, options); + } + + /** + * Validate input by asserting that decoding the output of the encoder + * reproduces the original input. + * @param {AutoDecodedData} input + * The input to validate. + * @returns + */ + validate (input: AutoDecodedData) { + const output = this.run(input); + if (typeof output === 'string') return output === input; + else throw new Error(`\`validate()\` not yet implemented for data type '${this._dataType}'.`); + } + + _getDecodedDataSize () { + let size = this._decodedDataSize; + + if (this._dataType === 'string') { + size *= 8; + } + + return size; + } + + _getEncodedDataSize () { + let size = this._encodedDataSize; + + if (this._dataType === 'string') { + size *= 8; + } + + return Math.round(size); + } + + _getTranscodedDataSize () { + let size + = ( + this._getEncodedDataSize() + + this._getDecodedDataSize() + ) + * 0.5 + ; + + return Math.round(size); + } + + _getVecSize () { + return this._getWordSize() * 8; + } + + _getWordSize () { + return this._getDecodedDataSize() / 8; + } + + _trainDecoder ( + data: ITrainingData | string[], + options: Partial + ) { + const trainingData = []; + + for (let output of data) { + let finalOutput: ITrainingDatum | Float32Array; + if (this._dataType === 'string') { + output = (output as string).padEnd(this._getWordSize()); + } + + if (typeof output === 'string') { + finalOutput = word2vec( + output as string, + this._getWordSize() + ); + + this._dataType = 'string'; + } else { + finalOutput = output; + } + + const input = this.encode(finalOutput as unknown as AutoDecodedData); + + const entry = { + input, + output + }; + + trainingData.push(entry); + } + + this.decoder.train( + trainingData as unknown as Array< + IFeedForwardTrainingData< + InputType, + OutputType + > + >, + options + ); + } + + _trainEncoder ( + data: ITrainingData | string[], + options: Partial + ) { + const trainingData: ITrainingData = []; + + for (let input of data) { + let finalInput: ITrainingDatum | Float32Array; + + if (this._dataType === 'string') { + input = (input as string).padEnd(this._getWordSize()); + } + + if (typeof input === 'string') { + finalInput = word2vec( + input, + this._getWordSize() + ); + + this._dataType = 'string'; + } else { + finalInput = input; + } + + let output = input; + + let finalOutput: ITrainingDatum | Float32Array; + + if (typeof output === 'string') { + output = output.padEnd(this._getWordSize()); + + finalOutput = word2vec( + output, + this._getWordSize() + ); + + this._dataType = 'string'; + } else { + finalOutput = output; + } + + const entry = { + input: finalInput, + output: finalOutput + }; + + trainingData.push(entry); + } + + this.encoder.train( + trainingData, + options + ); + } +} diff --git a/src/index.ts b/src/index.ts index edbd05da..f2e13a39 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,4 +1,5 @@ import * as activation from './activation'; +import { AE } from './auto-encoder'; import CrossValidate from './cross-validate'; import { FeedForward } from './feed-forward'; import * as layer from './layer'; @@ -53,6 +54,7 @@ const utilities = { export { activation, + AE, CrossValidate, likely, layer, From d2153e6bfcb008153424a92a97d1ad5840a6e016 Mon Sep 17 00:00:00 2001 From: voidvoxel <160202125+voidvoxel@users.noreply.github.com> Date: Sun, 26 May 2024 18:41:13 -0400 Subject: [PATCH 02/14] Rewrite class `AutoEncoder` from scratch --- src/auto-encoder.ts | 650 +++++++++----------------------------------- src/index.ts | 4 +- 2 files changed, 123 insertions(+), 531 deletions(-) diff --git a/src/auto-encoder.ts b/src/auto-encoder.ts index 78d60f21..ecb90b11 100644 --- a/src/auto-encoder.ts +++ b/src/auto-encoder.ts @@ -1,571 +1,163 @@ -import { INeuralNetworkData, INeuralNetworkTrainOptions } from "./neural-network"; +import { Texture } from "gpu.js"; +import { IJSONLayer, INeuralNetworkData, INeuralNetworkDatum, INeuralNetworkTrainOptions } from "./neural-network"; import { INeuralNetworkGPUOptions, NeuralNetworkGPU } from "./neural-network-gpu"; +import { INeuralNetworkState } from "./neural-network-types"; -import { ITrainingDatum } from "./lookup"; -import { IFeedForwardTrainingData } from "./feed-forward"; - - -/** - * @typedef {Object} AETrainOptions - * @property {number} errorThresh - * Once the training error reaches `errorThresh`, training will be complete. - * @property {number} iterations - * Once the training epoch count reaches `iterations`, training will be - * complete. - * @property {number} learningRate - * The rate at which values will be changed. - */ - -interface AETrainOptions extends INeuralNetworkTrainOptions {}; - -/** - * @typedef {import('brain.js/dist/lookup').ITrainingDatum[]} ITrainingData - */ - -type ITrainingData = ITrainingDatum[]; - -/** - * - * @param {string} word - * The word to convert into a vector. - * @param {number} wordLength - * The maximum possible length of a word. - * @returns {Float32Array} - */ -function word2vec ( - word: string, - wordLength: number = 16 -) { - if (wordLength) { - word = word.padEnd(wordLength); - } - - const byteLength = wordLength * 4; - const bitLength = byteLength * 8; - - const vec = new Float32Array(bitLength); - - let index = 0; - - for (let char of word) { - let byte = char.charCodeAt(0); - - vec[index++] = byte & 0b0000_0001; - vec[index++] = (byte & 0b0000_0010) >> 1; - vec[index++] = (byte & 0b0000_0100) >> 2; - vec[index++] = (byte & 0b0000_1000) >> 3; - vec[index++] = (byte & 0b0001_0000) >> 4; - vec[index++] = (byte & 0b0010_0000) >> 5; - vec[index++] = (byte & 0b0100_0000) >> 6; - vec[index++] = (byte & 0b1000_0000) >> 7; - } - - return vec; -} - -/** - * Convert a vector of bits into a word. - * @param {number[]} vec The vector of bits to convert into a word. - * @returns {string} The decoded word. - */ -function vec2word ( - vec: number[] -) { - const bytes = []; - - for ( - let vecIndex = 0; - vecIndex < vec.length; - vecIndex += 8 - ) { - let byte = 0x00; - - for ( - let localBitIndex = 0; - localBitIndex < 8; - localBitIndex++ - ) { - const bitIndex = vecIndex + localBitIndex; - const predictedBit = vec[bitIndex]; - - const bit = Math.round(predictedBit); - - byte |= bit << localBitIndex; - } - - bytes.push(byte); - } - - let word = String.fromCharCode(...bytes).trim(); - - return word; +export interface IAutoEncoderOptions { + binaryThresh?: number; + decodedSize?: number; + hiddenLayers?: number[]; } /** - * @typedef {DataType[] | string} AutoDecodedData + * An autoencoder learns to compress input data down to relevant features and reconstruct input data from its compressed representation. */ +export class AutoEncoder { + #decoder?: NeuralNetworkGPU; + #denoiser: NeuralNetworkGPU; + #encoder?: NeuralNetworkGPU; + + constructor ( + options?: Partial + ) { + options ??= {}; -type AutoDecodedData - = DataType[] - | boolean[] - | number[] - | string - ; + const denoiserOptions: Partial = {}; -/** - * @typedef {Float32Array} AutoEncodedData - */ + denoiserOptions.binaryThresh = options.binaryThresh; + denoiserOptions.hiddenLayers = options.hiddenLayers; -type AutoEncodedData = Float32Array; + denoiserOptions.inputSize = denoiserOptions.outputSize = options.decodedSize; -/** - * @typedef {"boolean"|"number"|"string"} DataType - */ + this.#denoiser = new NeuralNetworkGPU(options); + } + + /** + * Denoise input data, removing any anomalies from the data. + * @param {DecodedData} input + * @returns {DecodedData} + */ + denoise(input: DecodedData): DecodedData { + return this.#denoiser.run(input); + } -type DataType = boolean | number | string; + /** + * Test a data sample for anomalies. + * + * @param {DecodedData} input + * @returns {boolean} + */ + includesAnomalies(input: DecodedData, anomalyThreshold: number = 0.2): boolean { + const anomalies: number[] = []; -/** - * @typedef {Object} AE - */ + const denoised = this.denoise(input); -/** - * An Auto Encoder (AE) is a type of neural network consisting of two - * subnetworks: an encoder, and a decoder. - * The encoder is responsible for converting the input into a smaller - * representation via feature extraction. - * The decoder is responsible for reconstructing the original input from a - * vector of extracted features. - * - * Example usage: - * ``` - * const ae = new AE(10, 1, 'string'); - * - * ae.train(["this", "is", "an", "example"]); - * - * const encoded = ae.encode("example"); - * const decoded = ae.decode(encoded); - * - * console.log(encoded, '->', decoded); - * ``` - */ -export class AE< - InputType extends INeuralNetworkData, - OutputType extends INeuralNetworkData -> { - _dataType: DataType; - _encodedDataSize: number; - _transcodedDataSize: number; - _decodedDataSize: number; - encoder: NeuralNetworkGPU; - decoder: NeuralNetworkGPU; - - /** - * Create a new auto encoder. - * @param {number} decodedDataSize - * The size of the data prior to encoding, and after decoding. - * @param {number} encodedDataSize - * The size of the data after encoding, and prior to decoding. - * @param {DataType} dataType - * The type of data to encode. - */ - constructor ( - decodedDataSize: number, - encodedDataSize: number, - dataType: DataType = 'number' - ) { - const transcodedDataSize = Math.round( - (encodedDataSize + decodedDataSize) * 0.5 - ); - - /** - * @type {DataType} - */ - this._dataType = dataType; - - /** - * @type {number} - */ - this._encodedDataSize = encodedDataSize; - - /** - * @type {number} - */ - this._transcodedDataSize = transcodedDataSize; - - /** - * @type {number} - */ - this._decodedDataSize = decodedDataSize; - - /** - * @type {NeuralNetworkGPU} - */ - this.encoder = new NeuralNetworkGPU( - { - hiddenLayers: [ - this._getTranscodedDataSize(), - this._getEncodedDataSize(), - this._getTranscodedDataSize() - ], - inputSize: this._getDecodedDataSize(), - outputSize: this._getDecodedDataSize() - } - ); - - /** - * @type {NeuralNetworkGPU} - */ - this.decoder = new NeuralNetworkGPU( - { - hiddenLayers: [ this._getTranscodedDataSize() ], - inputSize: this._getEncodedDataSize(), - outputSize: this._getDecodedDataSize() - } - ); + for (let i = 0; i < (input.length ?? 0); i++) { + anomalies[i] = Math.abs((input as number[])[i] - (denoised as number[])[i]); } - /** - * Parse a stringified `AE`. - * @param {string} jsonString - * A JSON string containing a stringified `AE`. - * @returns - */ - static parse ( - jsonString: string - ) { - const json = JSON.parse(jsonString); - - const autoEncoder = new AE( - json.decodedDataSize, - json.encodedDataSize, - json.dataType - ); - - autoEncoder.fromJSON(json); - - return autoEncoder; - } + const sum = anomalies.reduce( + (previousValue, value) => previousValue + value + ); - _accuracy ( - input: Array - ) { - const encoded = this.encode(input); - const decoded = this.decode(encoded); + const mean = sum / (input as number[]).length; - let accuracy = 0; + return mean > anomalyThreshold; + } - for ( - let i = 0; - i < decoded.length; - i++ - ) { - const inputValue = input[i]; - // TODO: Support types other than 'number' here. - const decodedValue = Math.round(decoded[i] as number); + /** + * Decode `EncodedData` into an approximation of its original form. + * + * @param {EncodedData} input + * @returns {DecodedData} + */ + decode(input: EncodedData): DecodedData { + // Decode the encoded input. + let output = this.#decoder?.run(input); - const isCorrect = inputValue === decodedValue; + if (!output) throw new Error("Cannot decode data before training the auto encoder."); - if (isCorrect) { - accuracy += 1; - } - } + return output as DecodedData; + } - accuracy /= decoded.length; + /** + * Encode data to extract features, reduce dimensionality, etc. + * + * @param {DecodedData} input + * @returns {EncodedData} + */ + encode(input: DecodedData): EncodedData { + // Process the input. + this.#denoiser.run(input); - return accuracy; - } + // Calculate the index of the auto-encoded layer. + const index = this.decodedLayerIndex; - accuracy ( - trainingData: DataType[] | DataType[][] - ) { - if ( - !trainingData.hasOwnProperty('length') || - typeof trainingData[0] !== 'object' - ) { - return this._accuracy(trainingData as DataType[]); - } + // Get the auto-encoded input. + let encodedInput = ((globalThis as any)["structuredClone"] as any)(this.#denoiser.outputs[index]); - trainingData = trainingData as DataType[][]; + console.log(this.#denoiser.outputs); - let accuracy = 0; + // If the encoded input is a `Texture`, convert it into an `Array`. + if (encodedInput instanceof Texture) encodedInput = encodedInput.toArray(); - for (let input of trainingData) { - accuracy += this._accuracy(input); - } + // Return the encoded input. + return encodedInput as EncodedData; + } - accuracy /= trainingData.length; + /** + * Train the auto encoder. + * + * @param {DecodedData[]} data + * @param {Partial} options + * @returns {INeuralNetworkState} + */ + train(data: DecodedData[], options?: Partial): INeuralNetworkState { + const preprocessedData: INeuralNetworkDatum, Partial>[] = []; - return accuracy; + for (let datum of data) { + preprocessedData.push( { input: datum, output: datum } ); } - /** - * Decode encoded data. - * @param {Float32Array} encodedData The encoded data to decode. - * @returns {boolean[]|number[]|string} The decoded data. - */ - decode (encodedData: Float32Array) { - let decodedDataObject = this.decoder.run(encodedData as InputType); + const results = this.#denoiser.train(preprocessedData, options); - let decodedData: DataType[] | string = []; + this.#decoder = this.createDecoder(); - for (let extract in decodedDataObject) { - const i = extract as unknown as number; - decodedData[i] = (decodedDataObject as number[])[i]; + return results; + } - if (this._dataType === 'boolean') { - decodedData[i] = (decodedData[i] as number) >= 0.5; - } - } + /** + * Create a new decoder from the trained denoiser. + * + * @returns {NeuralNetworkGPU} + */ + private createDecoder() { + const json = this.#denoiser.toJSON(); - if (this._dataType === 'string') { - decodedData = vec2word(decodedData as number[]); - decodedData = decodedData.substring(0, decodedData.indexOf(' ')); - } + const layers: IJSONLayer[] = []; + const sizes: number[] = []; - return decodedData; + for (let i = this.decodedLayerIndex; i < this.#denoiser.sizes.length; i++) { + layers.push(json.layers[i]); + sizes.push(json.sizes[i]); } - /** - * Encode data. - * @param {AutoDecodedData} data - * The data to encode. - * @returns {AutoEncodedData} - */ - encode (data: AutoDecodedData) { - let encoderInput: Float32Array | AutoDecodedData = data; - - if (this._dataType === 'string') { - const dataString = data as string; - - if (dataString.length < this._getWordSize()) { - dataString.padEnd(this._getWordSize()); - } + json.layers = layers; + json.sizes = sizes; - encoderInput = word2vec( - dataString, - this._getWordSize() - ); - } - - this.encoder.run(encoderInput as InputType); - - const encodedDataLayer = this.encoder.outputs[2]; - - let encodedData = encodedDataLayer.toArray(); - - return encodedData; - } + json.options.inputSize = json.sizes[0]; - /** - * Load this `AE`'s data from JSON. - * @param {any} json JSON representation of an `AE`. - */ - fromJSON (json: any) { - if (typeof json === 'string') json = JSON.parse(json); + const decoder = new NeuralNetworkGPU().fromJSON(json); - this._decodedDataSize = json.decodedDataSize; - this._transcodedDataSize = json.transcodedDataSize; - this._encodedDataSize = json.encodedDataSize; + return decoder as unknown as NeuralNetworkGPU; + } - this.encoder.fromJSON(json.encoder); - this.decoder.fromJSON(json.decoder); - } - - /** - * Predict the decoded output of a given input data. - * @param {AutoDecodedData} input - * The input to predict the decoded output of. - * @returns - */ - run (input: AutoDecodedData) { - return this.decode(this.encode(input)); - } - - /** - * Stringify this `AE`. - * @returns {string} - * A JSON `string` containing this `AE`. - */ - stringify () { - return JSON.stringify(this.toJSON()); - } - - /** - * - * @returns {object} - * An object suitable for passing to `JSON.stringify()`. - */ - toJSON () { - return { - encoder: this.encoder.toJSON(), - decoder: this.decoder.toJSON() - }; - } - - /** - * Train the auto encoder on a training data set. - * @param {ITrainingData} data - * The data set to train the neural networks on. - * @param {AETrainOptions} options - * The options to pass to the neural network trainers. - */ - train ( - data: ITrainingData, - options: Partial = {} - ) { - this._trainEncoder(data, options); - this._trainDecoder(data, options); - } - - /** - * Validate input by asserting that decoding the output of the encoder - * reproduces the original input. - * @param {AutoDecodedData} input - * The input to validate. - * @returns - */ - validate (input: AutoDecodedData) { - const output = this.run(input); - if (typeof output === 'string') return output === input; - else throw new Error(`\`validate()\` not yet implemented for data type '${this._dataType}'.`); - } - - _getDecodedDataSize () { - let size = this._decodedDataSize; - - if (this._dataType === 'string') { - size *= 8; - } - - return size; - } - - _getEncodedDataSize () { - let size = this._encodedDataSize; - - if (this._dataType === 'string') { - size *= 8; - } - - return Math.round(size); - } - - _getTranscodedDataSize () { - let size - = ( - this._getEncodedDataSize() - + this._getDecodedDataSize() - ) - * 0.5 - ; - - return Math.round(size); - } - - _getVecSize () { - return this._getWordSize() * 8; - } - - _getWordSize () { - return this._getDecodedDataSize() / 8; - } - - _trainDecoder ( - data: ITrainingData | string[], - options: Partial - ) { - const trainingData = []; - - for (let output of data) { - let finalOutput: ITrainingDatum | Float32Array; - if (this._dataType === 'string') { - output = (output as string).padEnd(this._getWordSize()); - } - - if (typeof output === 'string') { - finalOutput = word2vec( - output as string, - this._getWordSize() - ); - - this._dataType = 'string'; - } else { - finalOutput = output; - } - - const input = this.encode(finalOutput as unknown as AutoDecodedData); - - const entry = { - input, - output - }; - - trainingData.push(entry); - } - - this.decoder.train( - trainingData as unknown as Array< - IFeedForwardTrainingData< - InputType, - OutputType - > - >, - options - ); - } - - _trainEncoder ( - data: ITrainingData | string[], - options: Partial - ) { - const trainingData: ITrainingData = []; - - for (let input of data) { - let finalInput: ITrainingDatum | Float32Array; - - if (this._dataType === 'string') { - input = (input as string).padEnd(this._getWordSize()); - } - - if (typeof input === 'string') { - finalInput = word2vec( - input, - this._getWordSize() - ); - - this._dataType = 'string'; - } else { - finalInput = input; - } - - let output = input; - - let finalOutput: ITrainingDatum | Float32Array; - - if (typeof output === 'string') { - output = output.padEnd(this._getWordSize()); - - finalOutput = word2vec( - output, - this._getWordSize() - ); - - this._dataType = 'string'; - } else { - finalOutput = output; - } - - const entry = { - input: finalInput, - output: finalOutput - }; - - trainingData.push(entry); - } - - this.encoder.train( - trainingData, - options - ); - } + /** + * Get the offset of the decoded layer. + */ + private get decodedLayerIndex(): number { + return Math.round(this.#denoiser.outputs.length * 0.5) - 1; + } } + +export default AutoEncoder; diff --git a/src/index.ts b/src/index.ts index f2e13a39..77b41d26 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,5 @@ import * as activation from './activation'; -import { AE } from './auto-encoder'; +import { AutoEncoder } from './auto-encoder'; import CrossValidate from './cross-validate'; import { FeedForward } from './feed-forward'; import * as layer from './layer'; @@ -54,7 +54,7 @@ const utilities = { export { activation, - AE, + AutoEncoder, CrossValidate, likely, layer, From d1bab11fcdc359b0a1019ed00da151b94f17dcfc Mon Sep 17 00:00:00 2001 From: voidvoxel <160202125+voidvoxel@users.noreply.github.com> Date: Sun, 26 May 2024 20:15:09 -0400 Subject: [PATCH 03/14] Remove unused property --- src/auto-encoder.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/auto-encoder.ts b/src/auto-encoder.ts index ecb90b11..0055de24 100644 --- a/src/auto-encoder.ts +++ b/src/auto-encoder.ts @@ -15,7 +15,6 @@ export interface IAutoEncoderOptions { export class AutoEncoder { #decoder?: NeuralNetworkGPU; #denoiser: NeuralNetworkGPU; - #encoder?: NeuralNetworkGPU; constructor ( options?: Partial From 3eccd36a60d5dd7b4f77f2ce9206850901895db7 Mon Sep 17 00:00:00 2001 From: voidvoxel <160202125+voidvoxel@users.noreply.github.com> Date: Sun, 26 May 2024 20:59:45 -0400 Subject: [PATCH 04/14] Use shallow clone --- src/auto-encoder.ts | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/auto-encoder.ts b/src/auto-encoder.ts index 0055de24..1e6ec454 100644 --- a/src/auto-encoder.ts +++ b/src/auto-encoder.ts @@ -1,8 +1,16 @@ -import { Texture } from "gpu.js"; +import { KernelOutput, Texture, TextureArrayOutput } from "gpu.js"; import { IJSONLayer, INeuralNetworkData, INeuralNetworkDatum, INeuralNetworkTrainOptions } from "./neural-network"; import { INeuralNetworkGPUOptions, NeuralNetworkGPU } from "./neural-network-gpu"; import { INeuralNetworkState } from "./neural-network-types"; +function shallowClone(value: TextureArrayOutput): TextureArrayOutput { + const clone: TextureArrayOutput = []; + + for (let i = 0; i < value.length; i++) clone[i] = value[i]; + + return clone; +} + export interface IAutoEncoderOptions { binaryThresh?: number; decodedSize?: number; @@ -89,19 +97,14 @@ export class AutoEncoder; } + private get decodedLayer(): KernelOutput { + return this.#denoiser.outputs[this.decodedLayerIndex]; + } + /** * Get the offset of the decoded layer. */ From 98b47ce11b219358bd9907fb20a71586025bfd82 Mon Sep 17 00:00:00 2001 From: voidvoxel <160202125+voidvoxel@users.noreply.github.com> Date: Sun, 26 May 2024 20:59:53 -0400 Subject: [PATCH 05/14] Add unit tests --- src/auto-encoder.test.ts | 79 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 src/auto-encoder.test.ts diff --git a/src/auto-encoder.test.ts b/src/auto-encoder.test.ts new file mode 100644 index 00000000..0f956953 --- /dev/null +++ b/src/auto-encoder.test.ts @@ -0,0 +1,79 @@ +import AutoEncoder from "./auto-encoder"; + +const trainingData = [ + [0, 0, 0], + [0, 1, 1], + [1, 0, 1], + [1, 1, 0] +]; + +const xornet = new AutoEncoder( + { + decodedSize: 3, + hiddenLayers: [ 5, 2, 5 ] + } +); + +const errorThresh = 0.011; + +const result = xornet.train( + trainingData, { + iterations: 100000, + errorThresh + } +); + +test( + "denoise a data sample", + async () => { + expect(result.error).toBeLessThanOrEqual(errorThresh); + + function xor(...args: number[]) { + return Math.round(xornet.denoise(args)[2]); + } + + const run1 = xor(0, 0, 0); + const run2 = xor(0, 1, 1); + const run3 = xor(1, 0, 1); + const run4 = xor(1, 1, 0); + + expect(run1).toBe(0); + expect(run2).toBe(1); + expect(run3).toBe(1); + expect(run4).toBe(0); + } +); + +test( + "encode and decode a data sample", + async () => { + expect(result.error).toBeLessThanOrEqual(errorThresh); + + const run1$input = [0, 0, 0]; + const run1$encoded = xornet.encode(run1$input); + const run1$decoded = xornet.decode(run1$encoded); + + const run2$input = [0, 1, 1]; + const run2$encoded = xornet.encode(run2$input); + const run2$decoded = xornet.decode(run2$encoded); + + for (let i = 0; i < 3; i++) expect(Math.round(run1$decoded[i])).toBe(run1$input[i]); + for (let i = 0; i < 3; i++) expect(Math.round(run2$decoded[i])).toBe(run2$input[i]); + } +); + +test( + "test a data sample for anomalies", + async () => { + expect(result.error).toBeLessThanOrEqual(errorThresh); + + function includesAnomalies(...args: number[]) { + expect(xornet.includesAnomalies(args)).toBe(false); + } + + includesAnomalies(0, 0, 0); + includesAnomalies(0, 1, 1); + includesAnomalies(1, 0, 1); + includesAnomalies(1, 1, 0); + } +); From 6719978886fae7abb8062060891557d4ee2c1641 Mon Sep 17 00:00:00 2001 From: voidvoxel <160202125+voidvoxel@users.noreply.github.com> Date: Sun, 26 May 2024 21:22:14 -0400 Subject: [PATCH 06/14] Replace `shallowClone` with `deepClone` --- src/auto-encoder.ts | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/auto-encoder.ts b/src/auto-encoder.ts index 1e6ec454..6ec64f8e 100644 --- a/src/auto-encoder.ts +++ b/src/auto-encoder.ts @@ -3,10 +3,13 @@ import { IJSONLayer, INeuralNetworkData, INeuralNetworkDatum, INeuralNetworkTrai import { INeuralNetworkGPUOptions, NeuralNetworkGPU } from "./neural-network-gpu"; import { INeuralNetworkState } from "./neural-network-types"; -function shallowClone(value: TextureArrayOutput): TextureArrayOutput { +function deepClone(value: TextureArrayOutput): TextureArrayOutput { const clone: TextureArrayOutput = []; - for (let i = 0; i < value.length; i++) clone[i] = value[i]; + for (let i = 0; i < value.length; i++) { + if (typeof value[i] === "object") (clone[i] as any) = deepClone(value[i] as any); + else clone[i] = value[i]; + } return clone; } @@ -104,7 +107,7 @@ export class AutoEncoder Date: Sun, 26 May 2024 21:40:09 -0400 Subject: [PATCH 07/14] Rename private properties --- src/auto-encoder.ts | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/auto-encoder.ts b/src/auto-encoder.ts index 6ec64f8e..558f92fc 100644 --- a/src/auto-encoder.ts +++ b/src/auto-encoder.ts @@ -101,7 +101,7 @@ export class AutoEncoder; } - private get decodedLayer(): KernelOutput { - return this.#denoiser.outputs[this.decodedLayerIndex]; + /** + * Get the layer containing the encoded representation. + */ + private get encodedLayer(): KernelOutput { + return this.#denoiser.outputs[this.encodedLayerIndex]; } /** - * Get the offset of the decoded layer. + * Get the offset of the encoded layer. */ - private get decodedLayerIndex(): number { + private get encodedLayerIndex(): number { return Math.round(this.#denoiser.outputs.length * 0.5) - 1; } } From 2408360f3081c90f32b3c8d8982ff9005ba98e32 Mon Sep 17 00:00:00 2001 From: voidvoxel <160202125+voidvoxel@users.noreply.github.com> Date: Sun, 26 May 2024 21:50:27 -0400 Subject: [PATCH 08/14] Remove the space in the word "autoencoder" [Wikipedia said "Autoencoder" and not "Auto encoder" or "Auto-encoder"](https://en.wikipedia.org/wiki/Autoencoder) --- src/{auto-encoder.test.ts => autoencoder.test.ts} | 4 ++-- src/{auto-encoder.ts => autoencoder.ts} | 8 ++++---- src/index.ts | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) rename src/{auto-encoder.test.ts => autoencoder.test.ts} (94%) rename src/{auto-encoder.ts => autoencoder.ts} (96%) diff --git a/src/auto-encoder.test.ts b/src/autoencoder.test.ts similarity index 94% rename from src/auto-encoder.test.ts rename to src/autoencoder.test.ts index 0f956953..2b7caf09 100644 --- a/src/auto-encoder.test.ts +++ b/src/autoencoder.test.ts @@ -1,4 +1,4 @@ -import AutoEncoder from "./auto-encoder"; +import Autoencoder from "./autoencoder"; const trainingData = [ [0, 0, 0], @@ -7,7 +7,7 @@ const trainingData = [ [1, 1, 0] ]; -const xornet = new AutoEncoder( +const xornet = new Autoencoder( { decodedSize: 3, hiddenLayers: [ 5, 2, 5 ] diff --git a/src/auto-encoder.ts b/src/autoencoder.ts similarity index 96% rename from src/auto-encoder.ts rename to src/autoencoder.ts index 558f92fc..50b9d676 100644 --- a/src/auto-encoder.ts +++ b/src/autoencoder.ts @@ -14,7 +14,7 @@ function deepClone(value: TextureArrayOutput): TextureArrayOutput { return clone; } -export interface IAutoEncoderOptions { +export interface IAutoencoderOptions { binaryThresh?: number; decodedSize?: number; hiddenLayers?: number[]; @@ -23,12 +23,12 @@ export interface IAutoEncoderOptions { /** * An autoencoder learns to compress input data down to relevant features and reconstruct input data from its compressed representation. */ -export class AutoEncoder { +export class Autoencoder { #decoder?: NeuralNetworkGPU; #denoiser: NeuralNetworkGPU; constructor ( - options?: Partial + options?: Partial ) { options ??= {}; @@ -172,4 +172,4 @@ export class AutoEncoder Date: Sun, 26 May 2024 22:18:50 -0400 Subject: [PATCH 09/14] Rename class `Autoencoder` to `AE` The other classes in this library use their respective acronyms with the exceptions of `FeedForward`, `NeuralNetwork`, and `NeuralNetworkGPU`. Furthermore, `AE` variants of existing classes will likely be made, so an acronym equivalent would be desirable. I'm considering the naming conventions for classes such as `LSTMTimeStep`. For example maybe a future `VAE` class could be made to represent variational autoencoders. --- src/autoencoder.test.ts | 4 ++-- src/autoencoder.ts | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/autoencoder.test.ts b/src/autoencoder.test.ts index 2b7caf09..a14512d8 100644 --- a/src/autoencoder.test.ts +++ b/src/autoencoder.test.ts @@ -1,4 +1,4 @@ -import Autoencoder from "./autoencoder"; +import AE from "./autoencoder"; const trainingData = [ [0, 0, 0], @@ -7,7 +7,7 @@ const trainingData = [ [1, 1, 0] ]; -const xornet = new Autoencoder( +const xornet = new AE( { decodedSize: 3, hiddenLayers: [ 5, 2, 5 ] diff --git a/src/autoencoder.ts b/src/autoencoder.ts index 50b9d676..76b49273 100644 --- a/src/autoencoder.ts +++ b/src/autoencoder.ts @@ -23,7 +23,7 @@ export interface IAutoencoderOptions { /** * An autoencoder learns to compress input data down to relevant features and reconstruct input data from its compressed representation. */ -export class Autoencoder { +export class AE { #decoder?: NeuralNetworkGPU; #denoiser: NeuralNetworkGPU; @@ -37,7 +37,7 @@ export class Autoencoder(options); } @@ -172,4 +172,4 @@ export class Autoencoder Date: Sun, 26 May 2024 23:01:47 -0400 Subject: [PATCH 10/14] Add `AE` usage to README.md --- README.md | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e12f0413..2d07e04c 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ GPU accelerated Neural networks in JavaScript for Browsers and Node.js ![CI](https://github.com/BrainJS/brain.js/workflows/CI/badge.svg) [![codecov](https://codecov.io/gh/BrainJS/brain.js/branch/master/graph/badge.svg?token=3SJIBJ1679)](https://codecov.io/gh/BrainJS/brain.js) Twitter - + [![NPM](https://nodei.co/npm/brain.js.png?compact=true)](https://nodei.co/npm/brain.js/)

@@ -43,6 +43,7 @@ GPU accelerated Neural networks in JavaScript for Browsers and Node.js - [For training with NeuralNetwork](#for-training-with-neuralnetwork) - [For training with `RNNTimeStep`, `LSTMTimeStep` and `GRUTimeStep`](#for-training-with-rnntimestep-lstmtimestep-and-grutimestep) - [For training with `RNN`, `LSTM` and `GRU`](#for-training-with-rnn-lstm-and-gru) + - [For training with `AE`](#for-training-with-ae) - [Training Options](#training-options) - [Async Training](#async-training) - [Cross Validation](#cross-validation) @@ -317,6 +318,54 @@ net.train([ const output = net.run('I feel great about the world!'); // 'happy' ``` +#### For training with `AE` + +Each training pattern can either: + +- Be an array of numbers +- Be an array of arrays of numbers + +Training an autoencoder to compress the values of a XOR calculation: + +```javascript +const net = new brain.AE( + { + hiddenLayers: [ 5, 2, 5 ] + } +); + +net.train([ + [ 0, 0, 0 ], + [ 0, 1, 1 ], + [ 1, 0, 1 ], + [ 1, 1, 0 ] +]); +``` + +Encoding/decoding: + +```javascript +const input = [ 0, 1, 1 ]; + +const encoded = net.encode(input); +const decoded = net.decode(encoded); +``` + +Denoise noisy data: + +```javascript +const noisyData = [ 0, 1, 0 ]; + +const data = net.denoise(noisyData); +``` + +Test for anomalies in data samples: + +```javascript +const shouldBeFalse = net.includesAnomalies([0, 1, 1]); +const shouldBeTrue = net.includesAnomalies([0, 1, 0]); +``` + ### Training Options `train()` takes a hash of options as its second argument: @@ -595,6 +644,7 @@ The user interface used: - [`brain.NeuralNetwork`](src/neural-network.ts) - [Feedforward Neural Network](https://en.wikipedia.org/wiki/Feedforward_neural_network) with backpropagation - [`brain.NeuralNetworkGPU`](src/neural-network-gpu.ts) - [Feedforward Neural Network](https://en.wikipedia.org/wiki/Feedforward_neural_network) with backpropagation, GPU version +- [`brain.AE`](src/autoencoder.ts) - [Autoencoder or "AE"](https://en.wikipedia.org/wiki/Autoencoder) with backpropogation and GPU support - [`brain.recurrent.RNNTimeStep`](src/recurrent/rnn-time-step.ts) - [Time Step Recurrent Neural Network or "RNN"](https://en.wikipedia.org/wiki/Recurrent_neural_network) - [`brain.recurrent.LSTMTimeStep`](src/recurrent/lstm-time-step.ts) - [Time Step Long Short Term Memory Neural Network or "LSTM"](https://en.wikipedia.org/wiki/Long_short-term_memory) - [`brain.recurrent.GRUTimeStep`](src/recurrent/gru-time-step.ts) - [Time Step Gated Recurrent Unit or "GRU"](https://en.wikipedia.org/wiki/Gated_recurrent_unit) From 8572cc81a892068b3621514a6f154636d89c1df3 Mon Sep 17 00:00:00 2001 From: voidvoxel <160202125+voidvoxel@users.noreply.github.com> Date: Mon, 27 May 2024 01:31:31 -0400 Subject: [PATCH 11/14] Update references to `AE` --- src/index.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/index.ts b/src/index.ts index c027196a..1d410f76 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,5 @@ import * as activation from './activation'; -import { Autoencoder } from './autoencoder'; +import { AE } from './autoencoder'; import CrossValidate from './cross-validate'; import { FeedForward } from './feed-forward'; import * as layer from './layer'; @@ -54,7 +54,7 @@ const utilities = { export { activation, - Autoencoder, + AE, CrossValidate, likely, layer, From 03def5893e3ff3e33ea63f1bffb21cba6d14d785 Mon Sep 17 00:00:00 2001 From: voidvoxel <160202125+voidvoxel@users.noreply.github.com> Date: Mon, 27 May 2024 01:33:15 -0400 Subject: [PATCH 12/14] Use Partial instead of nullable properties --- src/autoencoder.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/autoencoder.ts b/src/autoencoder.ts index 76b49273..7b9dcc33 100644 --- a/src/autoencoder.ts +++ b/src/autoencoder.ts @@ -15,9 +15,9 @@ function deepClone(value: TextureArrayOutput): TextureArrayOutput { } export interface IAutoencoderOptions { - binaryThresh?: number; - decodedSize?: number; - hiddenLayers?: number[]; + binaryThresh: number; + decodedSize: number; + hiddenLayers: number[]; } /** From e3c4c6d6b88e288da3bd3e8f8871b7bc178490f5 Mon Sep 17 00:00:00 2001 From: voidvoxel <160202125+voidvoxel@users.noreply.github.com> Date: Tue, 28 May 2024 02:23:58 -0400 Subject: [PATCH 13/14] Update autoencoder.ts --- src/autoencoder.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/autoencoder.ts b/src/autoencoder.ts index 7b9dcc33..71debc8a 100644 --- a/src/autoencoder.ts +++ b/src/autoencoder.ts @@ -14,7 +14,7 @@ function deepClone(value: TextureArrayOutput): TextureArrayOutput { return clone; } -export interface IAutoencoderOptions { +export interface IAEOptions { binaryThresh: number; decodedSize: number; hiddenLayers: number[]; @@ -28,7 +28,7 @@ export class AE; constructor ( - options?: Partial + options?: Partial ) { options ??= {}; From a4257fd6e676ba03b9f1cc3a2e9c47364134b590 Mon Sep 17 00:00:00 2001 From: voidvoxel <160202125+voidvoxel@users.noreply.github.com> Date: Sat, 8 Jun 2024 07:58:31 -0400 Subject: [PATCH 14/14] Minor improvements * Fix "@rollup/plugin-typescript TS2807" * Choose a more accurate name for `includesAnomalies` (`likelyIncludesAnomalies`, as it makes no guarantees that anomalies are truly present and only provides an intuitive guess) --- src/autoencoder.test.ts | 2 +- src/autoencoder.ts | 118 +++++++++++-------- src/errors/untrained-neural-network-error.ts | 7 ++ 3 files changed, 74 insertions(+), 53 deletions(-) create mode 100644 src/errors/untrained-neural-network-error.ts diff --git a/src/autoencoder.test.ts b/src/autoencoder.test.ts index a14512d8..7838fe1e 100644 --- a/src/autoencoder.test.ts +++ b/src/autoencoder.test.ts @@ -68,7 +68,7 @@ test( expect(result.error).toBeLessThanOrEqual(errorThresh); function includesAnomalies(...args: number[]) { - expect(xornet.includesAnomalies(args)).toBe(false); + expect(xornet.likelyIncludesAnomalies(args)).toBe(false); } includesAnomalies(0, 0, 0); diff --git a/src/autoencoder.ts b/src/autoencoder.ts index 71debc8a..e799b042 100644 --- a/src/autoencoder.ts +++ b/src/autoencoder.ts @@ -2,17 +2,7 @@ import { KernelOutput, Texture, TextureArrayOutput } from "gpu.js"; import { IJSONLayer, INeuralNetworkData, INeuralNetworkDatum, INeuralNetworkTrainOptions } from "./neural-network"; import { INeuralNetworkGPUOptions, NeuralNetworkGPU } from "./neural-network-gpu"; import { INeuralNetworkState } from "./neural-network-types"; - -function deepClone(value: TextureArrayOutput): TextureArrayOutput { - const clone: TextureArrayOutput = []; - - for (let i = 0; i < value.length; i++) { - if (typeof value[i] === "object") (clone[i] as any) = deepClone(value[i] as any); - else clone[i] = value[i]; - } - - return clone; -} +import { UntrainedNeuralNetworkError } from "./errors/untrained-neural-network-error"; export interface IAEOptions { binaryThresh: number; @@ -24,22 +14,28 @@ export interface IAEOptions { * An autoencoder learns to compress input data down to relevant features and reconstruct input data from its compressed representation. */ export class AE { - #decoder?: NeuralNetworkGPU; - #denoiser: NeuralNetworkGPU; + private decoder?: NeuralNetworkGPU; + private denoiser: NeuralNetworkGPU; constructor ( options?: Partial ) { + // Create default options for the autoencoder. options ??= {}; + // Create default options for the autoencoder's denoiser subnet. const denoiserOptions: Partial = {}; + // Inherit the binary threshold of the parent autoencoder. denoiserOptions.binaryThresh = options.binaryThresh; + // Inherit the hidden layers of the parent autoencoder. denoiserOptions.hiddenLayers = options.hiddenLayers; + // Define the denoiser subnet's input and output sizes. if (options.decodedSize) denoiserOptions.inputSize = denoiserOptions.outputSize = options.decodedSize; - this.#denoiser = new NeuralNetworkGPU(options); + // Create the denoiser subnet of the autoencoder. + this.denoiser = new NeuralNetworkGPU(options); } /** @@ -48,31 +44,14 @@ export class AE previousValue + value - ); - - const mean = sum / (input as number[]).length; - - return mean > anomalyThreshold; + // Run the input through the generic denoiser. + // This isn't the best denoiser implementation, but it's efficient. + // Efficiency is important here because training should focus on + // optimizing for feature extraction as quickly as possible rather than + // denoising and anomaly detection; there are other specialized topologies + // better suited for these tasks anyways, many of which can be implemented + // by using an autoencoder. + return this.denoiser.run(input); } /** @@ -82,12 +61,11 @@ export class AE previousValue + value + ); + + // Calculate the mean anomaly. + const mean = sum / (input as number[]).length; + + // Return whether or not the mean anomaly rate is greater than the anomaly threshold. + return mean > anomalyThreshold; } /** @@ -124,9 +138,9 @@ export class AE} */ private createDecoder() { - const json = this.#denoiser.toJSON(); + const json = this.denoiser.toJSON(); const layers: IJSONLayer[] = []; const sizes: number[] = []; - for (let i = this.encodedLayerIndex; i < this.#denoiser.sizes.length; i++) { + for (let i = this.encodedLayerIndex; i < this.denoiser.sizes.length; i++) { layers.push(json.layers[i]); sizes.push(json.sizes[i]); } @@ -161,14 +175,14 @@ export class AE