diff --git a/.gitignore b/.gitignore index 92459a9d2f..78d62bebcc 100644 --- a/.gitignore +++ b/.gitignore @@ -129,4 +129,6 @@ dmypy.json .vscode/ # no tracking mypy config file -mypy.ini \ No newline at end of file +mypy.ini +recognition/XUE4645768/README.md +recognition/ISICs_UNet/README.md diff --git a/recognition/ISICs_UNet/README.md b/recognition/ISICs_UNet/README.md index 788ea17b79..f2c009212e 100644 --- a/recognition/ISICs_UNet/README.md +++ b/recognition/ISICs_UNet/README.md @@ -1,101 +1,52 @@ -# Segment the ISICs data set with the U-net +# Segmenting ISICs with U-Net -## Project Overview -This project aim to solve the segmentation of skin lesian (ISIC2018 data set) using the U-net, with all labels having a minimum Dice similarity coefficient of 0.7 on the test set[Task 3]. +COMP3710 Report recognition problem 3 (Segmenting ISICs data set with U-Net) solved in TensorFlow -## ISIC2018 -![ISIC example](imgs/example.jpg) +Created by Christopher Bailey (45576430) -Skin Lesion Analysis towards Melanoma Detection +## The problem and algorithm +The problem solved by this program is binary segmentation of the ISICs skin lesion data set. Segmentation is a way to label pixels in an image according to some grouping, in this case lesion or non-lesion. This translates images of skin to masks representing areas of concern for skin lesions. -Task found in https://challenge2018.isic-archive.com/ +U-Net is a form of autoencoder where the downsampling path is expected to learn the features of the image and the upsampling path learns how to recreate the masks. Long skip connections between downpooling and upsampling layers are utilised to overcome the bottleneck in traditional autoencoders allowing feature representations to be recreated. +## How it works +A four layer padded U-Net is used, preserving skin features and mask resolution. The implementation utilises Adam as the optimizer and implements Dice distance as the loss function as this appeared to give quicker convergence than other methods (eg. binary cross-entropy). -## U-net -![UNet](imgs/uent.png) +The utilised metric is a Dice coefficient implementation. My initial implementation appeared faulty and was replaced with a 3rd party implementation which appears correct. 3 epochs was observed to be generally sufficient to observe Dice coefficients of 0.8+ on test datasets but occasional non-convergence was observed and could be curbed by increasing the number of epochs. Visualisation of predictions is also implemented and shows reasonable correspondence. Orange bandaids represent an interesting challenge for the implementation as presented. -U-net is one of the popular image segmentation architectures used mostly in biomedical purposes. The name UNet is because it’s architecture contains a compressive path and an expansive path which can be viewed as a U shape. This architecture is built in such a way that it could generate better results even for a less number of training data sets. +### Training, validation and testing split +Training, validation and testing uses a respective 60:20:20 split, a commonly assumed starting point suggested by course staff. U-Net in particular was developed to work "with very few training images" (Ronneberger et al, 2015) The input data for this problem consists of 2594 images and masks. This split appears to provide satisfactory results. -## Data Set Structure +## Using the model +### Dependencies required +* Python3 (tested with 3.8) +* TensorFlow 2.x (tested with 2.3) +* glob (used to load filenames) +* matplotlib (used for visualisations, tested with 3.3) -data set folder need to be stored in same directory with structure same as below -```bash -ISIC2018 - |_ ISIC2018_Task1-2_Training_Input_x2 - |_ ISIC_0000000 - |_ ISIC_0000001 - |_ ... - |_ ISIC2018_Task1_Training_GroundTruth_x2 - |_ ISIC_0000000_segmentation - |_ ISIC_0000001_segmentation - |_ ... -``` +### Parameter tuning +The model was developed on a GTX 1660 TI (6GB VRAM) and certain values (notably batch size and image resolution) were set lower than might otherwise be ideal on more capable hardware. This is commented in the relevant code. -## Dice Coefficient +### Running the model +The model is executed via the main.py script. -The Sørensen–Dice coefficient is a statistic used to gauge the similarity of two samples. +### Example output +Given a batch size of 1 and 3 epochs the following output was observed on a single run: +Era | Loss | Dice coefficient +--- | ---- | ---------------- +Epoch 1 | 0.7433 | 0.2567 +Epoch 2 | 0.3197 | 0.6803 +Epoch 3 | 0.2657 | 0.7343 +Testing | 0.1820 | 0.8180 -Further information in https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient -## Dependencies +### Figure 1 - example visualisation plot +Skin images in left column, true mask middle, predicted mask right column +![Visualisation of predictions](visual.png) -- python 3 -- tensorflow 2.1.0 -- pandas 1.1.4 -- numpy 1.19.2 -- matplotlib 3.3.2 -- scikit-learn 0.23.2 -- pillow 8.0.1 - - -## Usages - -- Run `train.py` for training the UNet on ISIC data. -- Run `evaluation.py` for evaluation and case present. - -## Advance - -- Modify `setting.py` for custom setting, such as different batch size. -- Modify `unet.py` for custom UNet, such as different kernel size. - -## Algorithm - -- data set: - - The data set we used is the training set of ISIC 2018 challenge data which has segmentation labels. - - Training: Validation: Test = 1660: 415: 519 = 0.64: 0.16 : 0.2 (Training: Test = 4: 1 and in Training, further split 4: 1 for Training: Validation) - - Training data augmentations: rescale, rotate, shift, zoom, grayscale -- model: - - Original UNet with padding which can keep the shape of input and output same. - - The first convolutional layers has 16 output channels. - - The activation function of all convolutional layers is ELU. - - Without batch normalization layers. - - The inputs is (384, 512, 1) - - The output is (384, 512, 1) after sigmoid activation. - - Optimizer: Adam, lr = 1e-4 - - Loss: dice coefficient loss - - Metrics: accuracy & dice coefficient - -## Results - -Evaluation dice coefficient is 0.805256724357605. - -plot of train/valid Dice coefficient: - -![img](imgs/train_and_valid_dice_coef.png) - -case present: - -![case](imgs/case%20present.png) - -## Reference -Manna, S. (2020). K-Fold Cross Validation for Deep Learning using Keras. [online] Medium. Available at: https://medium.com/the-owl/k-fold-cross-validation-in-keras-3ec4a3a00538 [Accessed 24 Nov. 2020]. - -zhixuhao (2020). zhixuhao/unet. [online] GitHub. Available at: https://github.com/zhixuhao/unet. - -GitHub. (n.d.). NifTK/NiftyNet. [online] Available at: https://github.com/NifTK/NiftyNet/blob/a383ba342e3e38a7ad7eed7538bfb34960f80c8d/niftynet/layer/loss_segmentation.py [Accessed 24 Nov. 2020]. - -Team, K. (n.d.). Keras documentation: Losses. [online] keras.io. Available at: https://keras.io/api/losses/#creating-custom-losses [Accessed 24 Nov. 2020]. - -262588213843476 (n.d.). unet.py. [online] Gist. Available at: https://gist.github.com/abhinavsagar/fe0c900133cafe93194c069fe655ef6e [Accessed 24 Nov. 2020]. - -Stack Overflow. (n.d.). python - Disable Tensorflow debugging information. [online] Available at: https://stackoverflow.com/questions/35911252/disable-tensorflow-debugging-information [Accessed 24 Nov. 2020]. +## References +Segments of code in this assignment were used from or based on the following sources: +1. COMP3710-demo-code.ipynb from Guest Lecture +1. https://www.tensorflow.org/tutorials/load_data/images +1. https://www.tensorflow.org/guide/gpu +1. Karan Jakhar (2019) https://medium.com/@karan_jakhar/100-days-of-code-day-7-84e4918cb72c diff --git a/recognition/XUE4645768/README.md b/recognition/XUE4645768/README.md index 36250adaa3..94bc1848c0 100644 --- a/recognition/XUE4645768/README.md +++ b/recognition/XUE4645768/README.md @@ -53,6 +53,52 @@ python gcn.py Warning: Please pay attention to whether the data path is correct when you run the gcn.py. +# Training + +Learning rate= 0.01 +Weight dacay =0.005 + +For 200 epoches: +```Epoch 000: Loss 0.2894, TrainAcc 0.9126, ValAcc 0.8954 +Epoch 001: Loss 0.2880, TrainAcc 0.9126, ValAcc 0.895 +Epoch 002: Loss 0.2866, TrainAcc 0.9126, ValAcc 0.8961 +Epoch 003: Loss 0.2853, TrainAcc 0.9132, ValAcc 0.8961 +Epoch 004: Loss 0.2839, TrainAcc 0.9137, ValAcc 0.8961 +Epoch 005: Loss 0.2826, TrainAcc 0.9141, ValAcc 0.8963 +Epoch 006: Loss 0.2813, TrainAcc 0.9146, ValAcc 0.8956 +Epoch 007: Loss 0.2800, TrainAcc 0.9146, ValAcc 0.8956 +Epoch 008: Loss 0.2788, TrainAcc 0.9146, ValAcc 0.8959 +Epoch 009: Loss 0.2775, TrainAcc 0.9146, ValAcc 0.8970 +Epoch 010: Loss 0.2763, TrainAcc 0.915, ValAcc 0.8974 +Epoch 011: Loss 0.2751, TrainAcc 0.915, ValAcc 0.8972 +Epoch 012: Loss 0.2739, TrainAcc 0.915, ValAcc 0.8976 +Epoch 013: Loss 0.2727, TrainAcc 0.9157, ValAcc 0.8979 +Epoch 014: Loss 0.2716, TrainAcc 0.9157, ValAcc 0.8983 +Epoch 015: Loss 0.2704, TrainAcc 0.9161, ValAcc 0.8990 +Epoch 016: Loss 0.2693, TrainAcc 0.9168, ValAcc 0.8988 +Epoch 017: Loss 0.2682, TrainAcc 0.9181, ValAcc 0.8990 +Epoch 018: Loss 0.2671, TrainAcc 0.9179, ValAcc 0.8990 +Epoch 019: Loss 0.2660, TrainAcc 0.9179, ValAcc 0.8992 +Epoch 020: Loss 0.2650, TrainAcc 0.9188, ValAcc 0.8996 +...... +Epoch 190: Loss 0.1623, TrainAcc 0.9553, ValAcc 0.9134 +Epoch 191: Loss 0.1619, TrainAcc 0.9555, ValAcc 0.9134 +Epoch 192: Loss 0.1615, TrainAcc 0.9555, ValAcc 0.9132 +Epoch 193: Loss 0.1611, TrainAcc 0.9557, ValAcc 0.9130 +Epoch 194: Loss 0.1607, TrainAcc 0.9562, ValAcc 0.9130 +Epoch 195: Loss 0.1603, TrainAcc 0.9559, ValAcc 0.9130 +Epoch 196: Loss 0.1599, TrainAcc 0.9562, ValAcc 0.9126 +Epoch 197: Loss 0.1595, TrainAcc 0.9562, ValAcc 0.9123 +Epoch 198: Loss 0.1591, TrainAcc 0.9562, ValAcc 0.9123 +Epoch 199: Loss 0.1587, TrainAcc 0.9562, ValAcc 0.9123``` + +For test accuracy:around 0.9 + +# TSNE +For the test:iteration=500, with lower dimension to 2 + + + ```python diff --git a/recognition/s4640439_siamese_network/README.MD b/recognition/s4640439_siamese_network/README.MD new file mode 100644 index 0000000000..d13fdc1a32 --- /dev/null +++ b/recognition/s4640439_siamese_network/README.MD @@ -0,0 +1,56 @@ +# Siamese Networks for Alzheimer's Disease Classification Using MRI Images + +## Description and Problem +This project aims to use ADNI brain MRI images to classify Alzheimer's disease. Using raw images, the algorithm outputs a prediction of whether or not the pictured brain has Alzheimer's disease. This is done using a combination of a Siamese Neural Network and a Binary Classifier Neural Netowrk. + +## How the Algorithm Works +Siamese Networks are essentially a slightly modified version of a CNN. How they work is, you pass two images samples into the model, one after the other. The model transforms these images into vector embeddings. Then, a distance is computed between these vectors, using one of potentially many distance metrics. Ideally, two images of the same class are very close in distance and images of different classes are very far in distance. The loss function and optimiser then work to update the weightings to move towards the ideal behaviour. + +After the Siamese Network is trained, you then have a transformer which converts images to vector embeddings, keeping similar images close in distance together. + +You then use these embeddings to train a dense layered Binary Classifier. + +With luck, your Binary Classifier can then be used to accurately predict images by first converting test images to embeddings, and then classifying them into either a positive or negative class. + +## Results +Unfortunately, as of the current version, this implementation has failed to construct a suitable classifier. + +All attempts at training a Binary Classifier using my Siamese Model to generate embeddings led to the Classifier getting stuck at a 51% accuracy (the ratio of negative to positive samples). + +I tried many different model structures, as well as tweaked various hyperparameters, but was not able to get the Siamese Model to generate satisfactory embeddings with which to classify with. + +This leads to the Binary Classifier quickly getting stuck in a local minimum, unable to differentiate between the classes just by the embeddings. + +Running principal component analysis revealed that the issue was with the siamese model. Taking the two principal components with the highest variance and plotting them for each embedding resulted in the following scatter plot: +![PCA graph for first two principal components of data embeddings](images/PCA.png) + +As can be seen, jsut considering the two components with the highest variance, there is a major overlap between the two classes. Thus, it is no wonder that the binary classifier was unable to assess the data sufficiently. + +I attempted many different tweaks to my Siamese Model in order to try to improve the embeddings. + +Techniques attempted include: +* Trying various batch sizes in range (32, 128) +* Trying various epochs num in range (30, 100) +* Changing structure of model - size of convolutions, number of convolutions, strides, max padding +* Changing the margin in the loss function in the range (0.1, 0.5) + +If more time were available, I would try to train the Siamese Model using various other loss functions instead, for example Triplet Loss. Additionally, I also would have liked to try different distance metrics for the embeddings. + +## Running the Code +### Dependencies +Requires Python version 3.9 or above (for type hinting) +Requires tensorflow version 2.8.2 or above +Requires numpy version 1.21.3 or above +Requires matplotlib 3.4.3 or above +Requires pandas 1.3.2 or above +Requires scikit-learn 1.0.1 or above + +### Dataset and Pre-processing +Original dataset sourced from: [ADNI dataset for Alzheimer's disease](http://adni.loni.usc.edu/) +Pre-processed dataset (used in this project) available from: [UQ Blackboard](https://cloudstor.aarnet.edu.au/plus/s/L6bbssKhUoUdTSI) + +### Instructions +1. Run dataset.py - being sure to adjust path constants to match your personal setup +2. Run modules.py - adjusting image size and embedding shape as necessary +3. Run train.py - being sure to change the model save directory constant to where you would like to save your models +4. Run predict.py - being sure to change the path names for the test data diff --git a/recognition/s4640439_siamese_network/dataset.py b/recognition/s4640439_siamese_network/dataset.py new file mode 100644 index 0000000000..93892a2663 --- /dev/null +++ b/recognition/s4640439_siamese_network/dataset.py @@ -0,0 +1,80 @@ +import numpy as np +from PIL import Image +import os +import time + +# Data has already been separated into training and test data +AD_TEST_PATH = "E:/ADNI/AD_NC/test/AD/" +AD_TRAIN_PATH = "E:/ADNI/AD_NC/train/AD/" +NC_TEST_PATH = "E:/ADNI/AD_NC/test/NC/" +NC_TRAIN_PATH = "E:/ADNI/AD_NC/train/NC/" + +PRE_PROC_DATA_SAVE_LOC = "E:/ADNI/Processed" + +# image constants +WIDTH = 256 +HEIGHT = 240 +CHANNELS = 1 + +def load_data(directory_path: str, prefix: str) -> np.ndarray: + """ + Processes and saves image data as a numpy array. + + Attempts to find pre-processed data and load it from a save. + If a save cannot be found, processes the data. + + Parameters: + - directory_path: Path to folder containing images to process + - prefix: String representing data type. Used for save filename + + Returns: + - processed image dataset as numpy array. + """ + save_path = os.path.join(PRE_PROC_DATA_SAVE_LOC, f"{prefix}_preprocessed.npy") + + if not os.path.isfile(save_path): + # save cannot be found + start = time.time() + print("Processing data for file", save_path) + + data = [] + + # loop through and process images + for filename in os.listdir(directory_path): + path = os.path.join(directory_path, filename) + + img = Image.open(path) + img_arr = np.asarray(img).astype(np.float32) + + # normalise + img_arr = img_arr / 127.5 - 1 + data.append(img_arr) + + data = np.reshape(data, (-1, HEIGHT, WIDTH, CHANNELS)) + + print("Saving data") + np.save(save_path, data) + + elapsed = time.time() - start + print (f'Image preprocess time: {elapsed}') + + else: + # save found + print("Loading preprocessed data") + data = np.load(save_path) + + return data + +def main(): + """ + Performs the first loading and pre-processing of the data. + + load_data() function saves the data to avoid these computations needing to be re-computed. + """ + training_data_positive = load_data(AD_TRAIN_PATH, "ad_train") + training_data_negative = load_data(NC_TRAIN_PATH, "nc_train") + test_data_positive = load_data(AD_TRAIN_PATH, "ad_test") + test_data_negative = load_data(NC_TRAIN_PATH, "nc_test") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/recognition/s4640439_siamese_network/images/PCA.png b/recognition/s4640439_siamese_network/images/PCA.png new file mode 100644 index 0000000000..aa33ac7a66 Binary files /dev/null and b/recognition/s4640439_siamese_network/images/PCA.png differ diff --git a/recognition/s4640439_siamese_network/modules.py b/recognition/s4640439_siamese_network/modules.py new file mode 100644 index 0000000000..07bb1756eb --- /dev/null +++ b/recognition/s4640439_siamese_network/modules.py @@ -0,0 +1,72 @@ +import tensorflow as tf +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Conv2D, LeakyReLU, Flatten, MaxPooling2D +from tensorflow.keras.layers import BatchNormalization, Dropout, Dense + +IMAGE_SIZE = (240,256,1) +ALPHA = 0.2 + +SIAMESE_OUTPUT_SHAPE = (512,) + +def build_siamese(): + """ + Generate Siamese model + This model needs to be a CNN that reduces an image to a vector + """ + model = Sequential() + + model.add(Conv2D(32, kernel_size=3, strides=2, input_shape=IMAGE_SIZE, + padding="same")) + model.add(LeakyReLU(alpha=0.2)) + + model.add(Dropout(0.25)) + model.add(Conv2D(32, kernel_size=3, strides=2, padding="same")) + model.add(BatchNormalization(momentum=0.8)) + model.add(LeakyReLU(alpha=0.2)) + + model.add(MaxPooling2D((2, 2))) + + model.add(Dropout(0.25)) + model.add(Conv2D(64, kernel_size=3, strides=2, padding="same")) + model.add(BatchNormalization(momentum=0.8)) + model.add(LeakyReLU(alpha=0.2)) + + model.add(Dropout(0.25)) + model.add(Conv2D(64, kernel_size=3, strides=2, padding="same")) + model.add(BatchNormalization(momentum=0.8)) + model.add(LeakyReLU(alpha=0.2)) + + model.add(MaxPooling2D((2, 2))) + + model.add(Dropout(0.25)) + model.add(Conv2D(128, kernel_size=3, strides=2, padding="same")) + model.add(BatchNormalization(momentum=0.8)) + model.add(LeakyReLU(alpha=0.2)) + + model.add(Dropout(0.25)) + model.add(Conv2D(128, kernel_size=3, strides=2, padding="same")) + model.add(BatchNormalization(momentum=0.8)) + model.add(LeakyReLU(alpha=0.2)) + + model.add(Dropout(0.25)) + model.add(Flatten()) + model.add(LeakyReLU(alpha=0.2)) + + return model + +def build_binary(): + """ + Generate binary classifier + This model needs to be a binary classifier that takes an output embedding from + siamese model and converts it into a single value in the range [0,1] for classification + """ + model = Sequential() + + model.add(Dense(64, input_shape=SIAMESE_OUTPUT_SHAPE, activation="relu")) + model.add(Dense(16, activation="relu")) + model.add(Dense(4, activation="relu")) + model.add(Dense(1, activation="sigmoid")) + + model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]) + + return model \ No newline at end of file diff --git a/recognition/s4640439_siamese_network/predict.py b/recognition/s4640439_siamese_network/predict.py new file mode 100644 index 0000000000..5d4e1fca14 --- /dev/null +++ b/recognition/s4640439_siamese_network/predict.py @@ -0,0 +1,38 @@ +import tensorflow as tf +from train import * +from dataset import * + +TEST_DATA_POSITIVE_LOC = "ad_test" +TEST_DATA_NEGATIVE_LOC = "nc_test" + +def main(): + """ + Used to load pre-trained models and then evaluate them using previously unseen test data + """ + + # load testing data + test_data_positive = load_data(AD_TRAIN_PATH, TEST_DATA_POSITIVE_LOC) + test_data_negative = load_data(NC_TRAIN_PATH, TEST_DATA_NEGATIVE_LOC) + + # load models + siamese_model = tf.keras.models.load_model(os.path.join(MODEL_SAVE_DIR, "siamese_model.h5")) + binary_model = tf.keras.models.load_model(os.path.join(MODEL_SAVE_DIR, "binary_model.h5")) + + # generate labels - 1: positive, 0: negative + pos_labels = np.ones(test_data_positive.shape[0]) + neg_labels = np.zeros(test_data_negative.shape[0]) + + # convert image data to embeddings + pos_embeddings = siamese_model.predict(test_data_positive) + neg_embeddings = siamese_model.predict(test_data_negative) + + # merge positive and negative datasets + embeddings = np.concatenate((pos_embeddings, neg_embeddings)) + labels = np.concatenate((pos_labels, neg_labels)) + + results = binary_model.evaluate(embeddings, labels) + + print(results) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/recognition/s4640439_siamese_network/train.py b/recognition/s4640439_siamese_network/train.py new file mode 100644 index 0000000000..fc3b780981 --- /dev/null +++ b/recognition/s4640439_siamese_network/train.py @@ -0,0 +1,255 @@ +import tensorflow as tf + +from modules import * +from dataset import * +import time +import os +from sklearn.decomposition import PCA +import pandas as pd +import matplotlib.pyplot as plt + +EPOCHS = 100 +BATCH_SIZE = 64 +BUFFER_SIZE = 20000 +MARGIN = 0.2 + +MODEL_SAVE_DIR = "E:/ADNI/models" + + +def siamese_loss(x0, x1, label: int, margin: float) -> float: + """ + Custom loss function for siamese network. + + Based on contrastive loss. + + Takes two vectors, then calculates their distance. + + Vectors of the same class are rewarded for being close and punished for being far away. + Vectors of different classes are punished for being close and rewarded for being far away. + + Parameters: + - x0, x1 -- tensor batch of vectors. Shape: (batch size, embedding size) + - label -- whether or not the two vectors are from the same class. 1 = yes, 0 = no + + Returns: + - loss value + """ + dist = tf.reduce_sum(tf.square(x0 - x1), 1) + dist_sqrt = tf.sqrt(dist) + + loss = label * tf.square(tf.maximum(0., margin - dist_sqrt)) + (1 - label) * dist + loss = 0.5 * tf.reduce_mean(loss) + + return loss + + +@tf.function +def train_step(siamese, siamese_optimiser, images1, images2, same_class: bool): + """ + Executes one step of training the siamese model. + Backpropogates to update weightings. + + Parameters: + - siamese -- the siamese network + - siamese_optimiser -- the optimiser which will be used for backprop + - images1, images2 -- batch of image data which is either positive or negative + shape: (batch size, width, height, number of channels) + - same_class -- bool flag representing whether the two sets of images are of the same class + + Returns: + - loss value from this the training step + + """ + with tf.GradientTape() as siamese_tape: + + # convert images to embeddings + x0 = siamese(images1, training=True) + x1 = siamese(images2, training=True) + label = int(same_class) + + loss = siamese_loss(x0, x1, label, MARGIN) + + siamese_gradients = siamese_tape.gradient(\ + loss, siamese.trainable_variables) + + siamese_optimiser.apply_gradients(zip( + siamese_gradients, siamese.trainable_variables)) + + return loss + + +def train_siamese_model(model, optimiser, pos_dataset, neg_dataset, epochs) -> None: + """ + Trains the siamese model. + + Alternates between training images of the same class then images of different classes. + + Parameters: + - model -- the siamese model to train + - optimiser -- the optimiser used for back propogation + - pos_dataset, neg_dataset -- pre-batched tensorflow dataset + - epochs -- number of epochs to train for + """ + + start = time.time() + print("Beginning Siamese Network Training") + + for epoch in range(epochs): + epoch_start = time.time() + + i = 1 + for pos_batch, neg_batch in zip(pos_dataset, neg_dataset): + if i % 20 == 0: + print("-----------------------") + print("Batch number", i, "complete") + print(f"{i} batches completed in {time.time() - epoch_start}") + print(f"Avg batch time: {(time.time() - epoch_start) / i}") + + # alternate between same-same training and same-diff training + if i % 2 == 0: + # same training + same_class = True + + #split batches + pos1, pos2 = tf.split(pos_batch, num_or_size_splits=2) + neg1, neg2 = tf.split(neg_batch, num_or_size_splits=2) + + pos_loss = train_step(model, optimiser, pos1, pos2 , same_class) + neg_loss = train_step(model, optimiser, neg1, neg2 , same_class) + + else: + # diff training + same_class = False + diff_loss = train_step(model, optimiser, pos_batch, neg_batch, same_class) + + i += 1 + + epoch_elapsed = time.time() - epoch_start + print(f"Epoch {epoch} - training time: {epoch_elapsed}") + + elapsed = time.time() - start + print(f"Siamese Network Training Completed in {elapsed}") + + +def train_binary_classifier(model, siamese_model, training_data_positive, training_data_negative) -> None: + """ + Trains the binary classifier used to classify the images into one of the two classes. + + Converts raw data to embeddings then fits the model. + + Parameters: + - model -- the binary classification model to train + - siamese_model -- the pre-trained siamese model used to generate embeddings + - training_data_positive, training_data_negative -- raw image data + """ + start = time.time() + print("Beginning Binary Classifier Training") + + # generate labels - 1: positive, 0: negative + pos_labels = np.ones(training_data_positive.shape[0]) + neg_labels = np.zeros(training_data_negative.shape[0]) + + # convert image data to embeddings + pos_embeddings = siamese_model.predict(training_data_positive) + neg_embeddings = siamese_model.predict(training_data_negative) + + # merge positive and negative datasets + embeddings = np.concatenate((pos_embeddings, neg_embeddings)) + labels = np.concatenate((pos_labels, neg_labels)) + + history = model.fit(embeddings, labels, epochs=EPOCHS, batch_size=BATCH_SIZE) + + elapsed = time.time() - start + print(f"Binary Classifier Training Completed in {elapsed}") + + return history + +def run_pca(siamese_model, training_data_positive, training_data_negative): + """ + Run Principle Component Analysis on the Siamese Model Embeddings and plot the + two features with the highest variance. + + Code adapted from https://towardsdatascience.com/pca-using-python-scikit-learn-e653f8989e60 + + Parameters: + - siamese_model -- The model with which to generate the embeddings to perform PCA on + - training_data_positive, training_data_negative -- raw image data as numpy arrays + """ + + pos_labels = np.ones(training_data_positive.shape[0]) + neg_labels = np.zeros(training_data_negative.shape[0]) + + pos_embeddings = siamese_model.predict(training_data_positive) + neg_embeddings = siamese_model.predict(training_data_negative) + + embeddings = np.concatenate((pos_embeddings, neg_embeddings)) + labels = np.concatenate((pos_labels, neg_labels)) + + pca = PCA(n_components=2) + principal_components = pca.fit_transform(embeddings) + + principalDf = pd.DataFrame(data = principal_components + , columns = ['principal component 1', 'principal component 2']) + labelsDf = pd.DataFrame(labels, columns=["label"]) + finalDf = pd.concat([principalDf, labelsDf], axis = 1) + + # plot first two principal components + fig = plt.figure(figsize = (8,8)) + ax = fig.add_subplot(1,1,1) + ax.set_xlabel('Principal Component 1', fontsize = 15) + ax.set_ylabel('Principal Component 2', fontsize = 15) + ax.set_title('2 component PCA', fontsize = 20) + targets = [1.0, 0.0] + colors = ['r', 'g'] + for target, color in zip(targets,colors): + indicesToKeep = finalDf['label'] == target + ax.scatter(finalDf.loc[indicesToKeep, 'principal component 1'] + , finalDf.loc[indicesToKeep, 'principal component 2'] + , c = color + , s = 50) + ax.legend(targets) + ax.grid() + +def main(): + """ + Trains the models + + Loads training data using dataset.py + Generates the models using modules.py + Uses functions defined above to train the models + Saves the models for later prediction + """ + + # get training data + training_data_positive = load_data(AD_TRAIN_PATH, "ad_train") + training_data_negative = load_data(NC_TRAIN_PATH, "nc_train") + + # convert to tensors for siamese training + train_data_pos = tf.data.Dataset.from_tensor_slices(training_data_positive + ).shuffle(BUFFER_SIZE, reshuffle_each_iteration=True).batch(BATCH_SIZE, drop_remainder=True) + train_data_neg = tf.data.Dataset.from_tensor_slices(training_data_negative + ).shuffle(BUFFER_SIZE, reshuffle_each_iteration=True).batch(BATCH_SIZE, drop_remainder=True) + + # build models + siamese_model = build_siamese() + binary_classifier = build_binary() + + # create optimiser for siamese model + siamese_optimiser = tf.keras.optimizers.Adam(0.05) + + # train the models + train_siamese_model(siamese_model, siamese_optimiser, train_data_pos, train_data_neg, EPOCHS) + + # optionally, run principle component analysis on siamese model to assess embeddings + run_pca(siamese_model, training_data_positive, training_data_negative) + + train_binary_classifier(binary_classifier, siamese_model, training_data_positive, training_data_negative) + + # save the models + siamese_model.save(os.path.join(MODEL_SAVE_DIR, "siamese_model.h5")) + binary_classifier.save(os.path.join(MODEL_SAVE_DIR, "binary_model.h5")) + + + +if __name__ == "__main__": + main() \ No newline at end of file