From bd9f840574ba1564919b27685f2427de4c688ab2 Mon Sep 17 00:00:00 2001 From: TaylorHawkes Date: Fri, 15 Nov 2024 21:19:15 +0000 Subject: [PATCH] Added Conv2d & Maxpool layers --- src/index.ts | 8 +- src/layers.ts | 74 ++++++++++++++++- src/tensor.ts | 217 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 296 insertions(+), 3 deletions(-) diff --git a/src/index.ts b/src/index.ts index b54c36f..774c5c1 100644 --- a/src/index.ts +++ b/src/index.ts @@ -41,7 +41,9 @@ import { CrossEntropyLoss, MSELoss, save, - load + load, + Conv2D, + MaxPool2D } from "./layers.js"; import { Adam } from "./optim.js"; import { getShape } from "./utils.js"; @@ -59,7 +61,9 @@ const nn = { Dropout, LayerNorm, CrossEntropyLoss, - MSELoss + MSELoss, + Conv2D, + MaxPool2D }; const optim = { Adam }; diff --git a/src/layers.ts b/src/layers.ts index 6c1f9c4..616a3c0 100644 --- a/src/layers.ts +++ b/src/layers.ts @@ -574,6 +574,78 @@ export class MSELoss extends Module { } } + +export class Conv2D extends Module { + constructor( + in_channels, out_channels, kernel_size, stride = 1, padding = "same", + dilation = 1, groups = 1, bias = true, device = "cpu" + ) { + super(); + + const [kh, kw] = Array.isArray(kernel_size) ? kernel_size : [kernel_size, kernel_size]; + const [sh, sw] = Array.isArray(stride) ? stride : [stride, stride]; + const [dh, dw] = Array.isArray(dilation) ? dilation : [dilation, dilation]; + + let ph, pw; + if (padding === "same") { + ph = Math.floor(((kh - 1) * dh + 1 - sh) / 2); + pw = Math.floor(((kw - 1) * dw + 1 - sw) / 2); + } else if (Array.isArray(padding)) { + [ph, pw] = padding; + } else { + ph = pw = padding; + } + + const weight_shape = [out_channels, Math.floor(in_channels / groups), kh, kw]; + this.W = randn(weight_shape, true, device, false); + this.b = bias ? zeros([out_channels], true) : null; + this.has_bias = bias; + + this.stride = [sh, sw]; + this.padding = [ph, pw]; + this.dilation = [dh, dw]; + this.groups = groups; + } + + forward(x) { + const [kernel_height, kernel_width] = [this.W.shape[2], this.W.shape[3]]; + const [batch, out_channels] = [x.shape[0], this.W.shape[0]]; + const out_height = Math.floor((x.shape[2] + 2 * this.padding[0] - kernel_height) / this.stride[0]) + 1; + const out_width = Math.floor((x.shape[3] + 2 * this.padding[1] - kernel_width) / this.stride[1]) + 1; + + + x = x.img2col(kernel_height, kernel_width, this.stride, this.padding); + + let reshaped_weights = this.W.reshape([this.W.shape[0], this.W.shape[1] * kernel_height * kernel_width]).transpose(0, 1); + + x = x.matmul(reshaped_weights); + + x = x.reshape([batch, out_channels, out_height, out_width]); + + if (this.has_bias && this.b) { + x = x.add(this.b);//not sure bias is working correctly + } + + return x; + } +} + +export class MaxPool2D extends Module { + public kernel_size: [number, number]; + public stride: [number, number]; + + constructor(kernel_size: number | [number, number], stride?: number | [number, number]) { + super(); + this.kernel_size = Array.isArray(kernel_size) ? kernel_size : [kernel_size, kernel_size]; + this.stride = stride ? (Array.isArray(stride) ? stride : [stride, stride]) : this.kernel_size; + } + + forward(x: Tensor): Tensor { + x=x.maxpool(this.kernel_size,this.stride); + return x; + } +} + /** * Saves the model to a JSON file. * @param {Module} model - Model to be saved in JSON file. @@ -629,4 +701,4 @@ function loadParameters(source: Module, target: Module) { } } -} \ No newline at end of file +} diff --git a/src/tensor.ts b/src/tensor.ts index c82d9b5..efd146b 100644 --- a/src/tensor.ts +++ b/src/tensor.ts @@ -418,6 +418,16 @@ export class Tensor { const operation = new Reshape(); return operation.forward(this, shape); } + + img2col(kernel_height: number, kernel_width: number, stride: [number, number], padding: [number, number]): Tensor { + const operation = new Img2Col(); + return operation.forward(this, kernel_height,kernel_width,stride,padding); + } + + maxpool(kernel_size: [number, number], stride: [number, number]):Tensor { + const operation = new MaxPool(); + return operation.forward(this, kernel_size, stride); + } } // <<< Parameter class, tensor that always tracks gradients >>> // @@ -1237,6 +1247,213 @@ export class Reshape { } } +export class MaxPool { + cache: any; + + forward(a: Tensor, kernel_size: [number, number], stride: [number, number]): Tensor { + const [batch, channels, height, width] = a.shape; + const [kh, kw] = kernel_size; + const [sh, sw] = stride; + + const out_height = Math.floor((height - kh) / sh + 1); + const out_width = Math.floor((width - kw) / sw + 1); + const outputData = new Array(batch).fill(0).map(() => + new Array(channels).fill(0).map(() => + new Array(out_height).fill(0).map(() => new Array(out_width).fill(0)) + ) + ); + + // Store max indices for backpropagation + const maxIndices = new Array(batch).fill(0).map(() => + new Array(channels).fill(0).map(() => + new Array(out_height).fill(0).map(() => new Array(out_width).fill([0, 0])) + ) + ); + + // Perform max pooling operation using plain arrays + for (let b = 0; b < batch; b++) { + for (let c = 0; c < channels; c++) { + for (let i = 0; i < out_height; i++) { + for (let j = 0; j < out_width; j++) { + const h_start = i * sh; + const w_start = j * sw; + const h_end = h_start + kh; + const w_end = w_start + kw; + + // Extract the region to pool + let max_val = -Infinity; + let max_idx = [0, 0]; + for (let ki = h_start; ki < h_end; ki++) { + for (let kj = w_start; kj < w_end; kj++) { + if (ki >= 0 && ki < height && kj >= 0 && kj < width) { + const val = a.data[b][c][ki][kj]; + if (val > max_val) { + max_val = val; + max_idx = [ki - h_start, kj - w_start]; // Store relative indices + } + } + } + } + + outputData[b][c][i][j] = max_val; + maxIndices[b][c][i][j] = max_idx; // Store indices relative to the window + } + } + } + } + + // Create output tensor + this.cache = { x: a, maxIndices, kernel_size, stride }; + + const z = new Tensor(outputData, requiresGrad(a)); + if (a instanceof Tensor && requiresGrad(a)) { + z.parents.push(a); + a.children.push(z); + } + + z.operation = this; + + return z; + } + + backward(dz: Tensor, z: Tensor) { + const { x, maxIndices, kernel_size, stride } = this.cache; + const [kh, kw] = kernel_size; + const [sh, sw] = stride; + const [batch, channels, out_height, out_width] = dz.shape; + + // Initialize gradient tensor for input + const dx = new Array(batch).fill(0).map(() => + new Array(channels).fill(0).map(() => + new Array(x.shape[2]).fill(0).map(() => new Array(x.shape[3]).fill(0)) + ) + ); + + // Propagate gradients based on stored max indices + for (let b = 0; b < batch; b++) { + for (let c = 0; c < channels; c++) { + for (let i = 0; i < out_height; i++) { + for (let j = 0; j < out_width; j++) { + const [h_idx, w_idx] = maxIndices[b][c][i][j]; + const h_start = i * sh; + const w_start = j * sw; + + // Assign gradient to the max index position + dx[b][c][h_start + h_idx][w_start + w_idx] += dz.data[b][c][i][j]; + } + } + } + } + + // Use the `backward()` call to propagate gradients further + if (x.requires_grad) { + const dxTensor = new Tensor(dx); + x.backward(dxTensor, z); + } + } + +} + +export class Img2Col { + cache: any; + + forward(a: Tensor, kernel_height: number, kernel_width: number, stride: [number, number], padding: [number, number]): Tensor { + this.cache = [a, kernel_height, kernel_width, stride, padding]; // Cache all relevant data + + const [batch, channels, height, width] = a.shape; + const out_height = Math.floor((height + 2 * padding[0] - kernel_height) / stride[0]) + 1; + const out_width = Math.floor((width + 2 * padding[1] - kernel_width) / stride[1]) + 1; + + const col_data = []; + + + for (let b = 0; b < batch; b++) { + for (let i = 0; i < out_height; i++) { + for (let j = 0; j < out_width; j++) { + + const patch = []; + for (let c = 0; c < channels; c++) { + for (let kh = 0; kh < kernel_height; kh++) { + for (let kw = 0; kw < kernel_width; kw++) { + + const h_idx = i * stride[0] - padding[0] + kh; + const w_idx = j * stride[1] - padding[1] + kw; + if (h_idx >= 0 && h_idx < height && w_idx >= 0 && w_idx < width) { + patch.push(a.data[b][c][h_idx][w_idx]); + } else { + patch.push(0); // Zero-padding + } + } + } + } + col_data.push(patch); + } + } + } + + const z = new Tensor(col_data,requiresGrad(a)); + if (a instanceof Tensor && requiresGrad(a)) { + z.parents.push(a); + a.children.push(z); + } + + z.operation = this; + + return z; + } + + +backward(dz: Tensor, z: Tensor) { + const [a, kernel_height, kernel_width, stride, padding] = this.cache; + const [batch, channels, height, width] = a.shape; + const out_height = Math.floor((height + 2 * padding[0] - kernel_height) / stride[0]) + 1; + const out_width = Math.floor((width + 2 * padding[1] - kernel_width) / stride[1]) + 1; + + // Initialize gradient tensor for dx with the same shape as input a + const dx = new Tensor(new Array(batch).fill(0).map(() => + new Array(channels).fill(0).map(() => + new Array(height).fill(0).map(() => new Array(width).fill(0)) + ) + )); + + // Calculate the number of elements in each patch (channels * kernel_height * kernel_width) + const patch_size = channels * kernel_height * kernel_width; + + let col_index = 0; + for (let b = 0; b < batch; b++) { + for (let i = 0; i < out_height; i++) { + for (let j = 0; j < out_width; j++) { + // Extract the gradient patch for this output position + const gradient_patch = dz.data[col_index]; + let patch_index = 0; // Index to iterate through the patch values + + for (let c = 0; c < channels; c++) { + for (let kh = 0; kh < kernel_height; kh++) { + for (let kw = 0; kw < kernel_width; kw++) { + const h_idx = i * stride[0] - padding[0] + kh; + const w_idx = j * stride[1] - padding[1] + kw; + + if (h_idx >= 0 && h_idx < height && w_idx >= 0 && w_idx < width) { + // Accumulate the gradient from the current patch position + dx.data[b][c][h_idx][w_idx] += gradient_patch[patch_index]; + } + patch_index++; + } + } + } + + col_index++; + } + } + } + if (a.requires_grad) { + a.backward(dx, z); + } +} +} + + + // <<< Tensor Operation Aliases >>> // /**