From f01aa9fe0d908a8f3d60fad8c48cfb41a74b838f Mon Sep 17 00:00:00 2001
From: iamaayushrivastava <iamaayushrivastava@gmail.com>
Date: Sun, 5 Jan 2025 17:03:13 +0530
Subject: [PATCH 1/3] Add scripts

---
 scripts/create-ll-files.sh                    |  57 ++++
 scripts/models/histogram-mlp-model.py         | 125 +++++++
 scripts/models/ir2vec-classifier.py           | 315 ++++++++++++++++++
 scripts/models/milepost-mlp-model.py          | 159 +++++++++
 scripts/preprocessing/copy-folders.py         |  20 ++
 .../preprocessing/copy-profiled-ll-files.py   |  44 +++
 scripts/preprocessing/delete-subfolders.py    |  32 ++
 scripts/preprocessing/folder-count.py         |  40 +++
 scripts/preprocessing/folder-preprocessing.py |  50 +++
 .../generate-ir2vec-embeddings.py             |  84 +++++
 scripts/preprocessing/ir2vec-preprocess.py    | 132 ++++++++
 scripts/preprocessing/merge-directories.py    |  46 +++
 scripts/preprocessing/rename-folders.py       |  39 +++
 scripts/preprocessing/split-dataset.py        | 206 ++++++++++++
 .../preprocessing/train-test-val-to-csv.py    |  98 ++++++
 .../generate-input-folder-with-input-files.py |  82 +++++
 ...led-ll-files-with-testcases-using-cores.sh | 118 +++++++
 .../profiling/profiling-without-parallel.sh   | 123 +++++++
 .../profiling/profiling-without-testcases.sh  | 101 ++++++
 scripts/read-npz.files.py                     |   7 +
 20 files changed, 1878 insertions(+)
 create mode 100644 scripts/create-ll-files.sh
 create mode 100644 scripts/models/histogram-mlp-model.py
 create mode 100644 scripts/models/ir2vec-classifier.py
 create mode 100644 scripts/models/milepost-mlp-model.py
 create mode 100644 scripts/preprocessing/copy-folders.py
 create mode 100644 scripts/preprocessing/copy-profiled-ll-files.py
 create mode 100644 scripts/preprocessing/delete-subfolders.py
 create mode 100644 scripts/preprocessing/folder-count.py
 create mode 100644 scripts/preprocessing/folder-preprocessing.py
 create mode 100644 scripts/preprocessing/generate-ir2vec-embeddings.py
 create mode 100644 scripts/preprocessing/ir2vec-preprocess.py
 create mode 100644 scripts/preprocessing/merge-directories.py
 create mode 100644 scripts/preprocessing/rename-folders.py
 create mode 100644 scripts/preprocessing/split-dataset.py
 create mode 100644 scripts/preprocessing/train-test-val-to-csv.py
 create mode 100644 scripts/profiling/generate-input-folder-with-input-files.py
 create mode 100644 scripts/profiling/generate-profiled-ll-files-with-testcases-using-cores.sh
 create mode 100644 scripts/profiling/profiling-without-parallel.sh
 create mode 100644 scripts/profiling/profiling-without-testcases.sh
 create mode 100644 scripts/read-npz.files.py

diff --git a/scripts/create-ll-files.sh b/scripts/create-ll-files.sh
new file mode 100644
index 0000000..6b9f2f0
--- /dev/null
+++ b/scripts/create-ll-files.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+CLANG=/usr/lib/llvm-17/bin/clang-17
+SRC_DIR=/Pramana/IR2Vec/datasets/Codeforces-Src-Files
+ll_FD=/Pramana/IR2Vec/dataset-opt-levels/codeforces/O0
+
+mkdir -p ${ll_FD}
+
+# Determine the range of numeric subfolder names
+FIRST=$(find "${SRC_DIR}" -mindepth 1 -maxdepth 1 -type d -exec basename {} \; | grep -E '^[0-9]+$' | sort -n | head -1)
+LAST=$(find "${SRC_DIR}" -mindepth 1 -maxdepth 1 -type d -exec basename {} \; | grep -E '^[0-9]+$' | sort -n | tail -1)
+
+echo "First: $FIRST"
+echo "Last: $LAST"
+
+# Validate that FIRST and LAST are numeric
+if ! [[ "$FIRST" =~ ^[0-9]+$ && "$LAST" =~ ^[0-9]+$ ]]; then
+    echo "Error: Subfolder names must be numeric. Check the directory structure."
+    exit 1
+fi
+
+# Create a semaphore with 20 slots
+MAX_CORES=40
+semaphore() {
+    while [ $(jobs -r | wc -l) -ge $MAX_CORES ]; do
+        sleep 1
+    done
+}
+
+# Loop through the dynamically calculated range of subfolders
+for dir in $(seq $FIRST $LAST); do
+    DIR=${dir}
+    FULL_DIR="${SRC_DIR}/${DIR}"
+    echo "${DIR} ${FULL_DIR}"
+
+    # Check if the directory exists
+    if [ -d "$FULL_DIR" ]; then
+        mkdir -p ${ll_FD}/${DIR}
+
+        find "$FULL_DIR" -regex '.*\.\(c\|cc\|cpp\)' -print0 |
+            while IFS= read -r -d '' line; do
+                semaphore # Wait if too many jobs are running
+                (
+                    filename=$(basename "$line")
+                    filename=${filename%.*}
+                    ${CLANG} -O0 -S -emit-llvm -I "$FULL_DIR" "$line" -o "${ll_FD}/${DIR}/${filename}.ll"
+                    # ${CLANG} -Xclang -disable-O0-optnone -S -emit-llvm -I $dir "$line" -o ${ll_FD}/${DIR}/"${filename}.ll"
+                ) &
+            done
+    else
+        echo "Directory ${FULL_DIR} does not exist. Skipping."
+    fi
+done
+
+wait
+
+echo "Done"
\ No newline at end of file
diff --git a/scripts/models/histogram-mlp-model.py b/scripts/models/histogram-mlp-model.py
new file mode 100644
index 0000000..d523624
--- /dev/null
+++ b/scripts/models/histogram-mlp-model.py
@@ -0,0 +1,125 @@
+import os
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
+
+# Import TensorFlow Keras
+from tensorflow import keras
+from tensorflow.keras import optimizers
+from tensorflow.keras.utils import to_categorical
+from tensorflow.keras.layers import (Activation, Dense, Dropout, BatchNormalization)
+from tensorflow.keras.models import Sequential
+
+# Model definition
+def getModel(input_dim, output_dim):
+    model = Sequential()
+
+    model.add(Dense(650, input_shape=(input_dim,), kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Dropout(0.25))
+
+    model.add(Dense(600, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Dropout(0.25))
+
+    model.add(Dense(500, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Dropout(0.25))
+
+    model.add(Dense(output_dim, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation('softmax'))
+
+    opt = keras.optimizers.Adam(learning_rate=0.001)
+    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
+    model.summary()
+
+    return model
+
+# Load data from directory
+def load_data_from_directory(directory):
+    data = []
+    labels = []
+    classes = sorted(os.listdir(directory))  # Ensure consistent label mapping
+    class_to_label = {cls: idx for idx, cls in enumerate(classes)}
+
+    for cls in classes:
+        class_path = os.path.join(directory, cls)
+        if os.path.isdir(class_path):
+            for file_name in os.listdir(class_path):
+                if file_name.endswith(".npz"):
+                    file_path = os.path.join(class_path, file_name)
+                    try:
+                        loaded = np.load(file_path)["values"]
+                        data.append(loaded.flatten())
+                        labels.append(class_to_label[cls])
+                    except Exception as e:
+                        print(f"Failed to load {file_path}: {e}")
+
+    return np.array(data), np.array(labels)
+
+# Prepare train and test data
+def prepare_data(train_dir, test_dir):
+    X_train, y_train = load_data_from_directory(train_dir)
+    X_test, y_test = load_data_from_directory(test_dir)
+
+    return X_train, y_train, X_test, y_test
+
+# Main function
+def main():
+    # Paths to the train and test directories
+    train_dir = "/home/aayusphere/Program-Classification/yali/Volume/Embeddings/milepost/codeforcestrainO0"
+    test_dir = "/home/aayusphere/Program-Classification/yali/Volume/Embeddings/milepost/codeforcestestO0"
+
+    # Prepare data
+    X_train, y_train, X_test, y_test = prepare_data(train_dir, test_dir)
+
+    # Check data shapes
+    print(f"Training data shape: {X_train.shape}")
+    print(f"Training labels shape: {y_train.shape}")
+    print(f"Testing data shape: {X_test.shape}")
+    print(f"Testing labels shape: {y_test.shape}")
+
+    # One-hot encode labels
+    num_classes = len(np.unique(y_train))
+    y_train = to_categorical(y_train, num_classes)
+    y_test = to_categorical(y_test, num_classes)
+
+    # No train-test split for validation, using all X_train and y_train for training
+    model = getModel(X_train.shape[1], num_classes)
+
+    mc = keras.callbacks.ModelCheckpoint(
+    filepath='/home/aayusphere/Program-Classification/milepost/weights_epoch_{epoch:08d}.weights.h5', 
+    save_weights_only=True, 
+    save_freq=500)
+
+
+    # Train the model
+    model.fit(X_train,
+              y_train,
+              batch_size=128,
+              epochs=2000,
+              verbose=1, 
+              callbacks=[mc])
+
+    # Evaluate model
+    y_pred = np.argmax(model.predict(X_test), axis=1)
+    y_true = np.argmax(y_test, axis=1)
+    print("Classification Report:")
+    print(classification_report(y_true, y_pred))
+    print("Confusion Matrix:")
+    print(confusion_matrix(y_true, y_pred))
+    print(f"Accuracy: {accuracy_score(y_true, y_pred):.4f}")
+
+    # Save the trained model
+    model.save("codeforces-milepost-ir2vec-model.h5")
+    print("Saved model to disk as 'codeforces-milepost-ir2vec-model.keras'.")
+
+    return model
+
+# Execute the script
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/scripts/models/ir2vec-classifier.py b/scripts/models/ir2vec-classifier.py
new file mode 100644
index 0000000..fa87090
--- /dev/null
+++ b/scripts/models/ir2vec-classifier.py
@@ -0,0 +1,315 @@
+# IITH-Compilers - Rohit Aggarwal, VenkataKeerthy
+
+# First run preprocess.py to split the data into training, testing and val if the splited data is not present.
+
+# Usage Instructions
+# python IR2Vec_classifier.py [options]
+# --train: Path of the training data file
+# --test: Path of the testing data file
+# --val: Path of the validation data file
+# --epochs: Number of Epochs
+# --batch_size: Size of the batch
+# --model: Path of the trained Model
+
+# Structure of the Input data
+# label<\t>vector_dim1<\t>vector_dim2<\t>.......<\t>vector_dimN
+
+# For Training:
+# python IR2Vec_classifier.py --train <PATH of the train data file> [--test <Path of the test data file>] [--val <Path of the val data file>] [--epochs XX] [--batch_size YY]
+# While training snapshot of the model is saved after 10 epochs.
+# dictionary.pkl file is produced which have the data used in testing
+
+# For retraining the model further from saved checkpoint of the model:
+# python IR2Vec_classifier.py --data <PATH of the data file> [--test <Path of the test data file>] [--val <Path of the val data file>] [--epochs XX] [--batch_size YY] --model <saved model path>
+
+# For Testing:
+# python IR2Vec_classifier.py --test <Path of the test data file>  --model <saved model path>
+#---------------------------------------------------------------------------------------------------
+
+# import numpy as np
+# import pandas as pd
+# from sklearn.decomposition import IncrementalPCA
+# from sklearn.metrics import accuracy_score
+# from sklearn.model_selection import train_test_split
+
+# import keras
+# from keras import optimizers
+# from keras.layers import (Activation, Dense, Dropout)
+# from keras.layers.normalization import BatchNormalization
+# from keras.models import Sequential
+# import argparse
+# import pickle
+
+import numpy as np
+import pandas as pd
+from sklearn.decomposition import IncrementalPCA
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import train_test_split
+
+# Import TensorFlow Keras
+from tensorflow import keras
+from tensorflow.keras import optimizers
+from tensorflow.keras.layers import (Activation, Dense, Dropout, BatchNormalization)
+from tensorflow.keras.models import Sequential
+import argparse
+import pickle
+
+# Create the model
+def getModel(input_dim, output_dim):
+    model = Sequential()
+    
+    model.add(
+        Dense(650,
+              input_shape=(input_dim, ),
+              kernel_initializer=keras.initializers.glorot_normal(seed=None))) # Initializes weights using Glorot normal initializer
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Dropout(0.25))
+    
+    model.add(
+        Dense(600,
+              kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Dropout(0.25))
+    
+    model.add(
+        Dense(500,
+              kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Dropout(0.25))
+    
+    model.add(
+        Dense(output_dim,
+              kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation('softmax'))
+    opt = keras.optimizers.Adam(lr=0.001,beta_1=0.9, beta_2=0.999, decay=0.0, amsgrad=False)
+    model.compile(loss=keras.losses.categorical_crossentropy,
+                  optimizer=opt, metrics=['accuracy'])
+    model.summary()
+
+    return model
+
+# train the model on the  given data
+def train(x_train, y_train, x_test, y_test,x_val, y_val, options, model):
+    X_min = x_train.min()
+    X_max = x_train.max()
+
+    # Ensure the correct number of classes is derived
+    num_classes = np.unique(y_train).shape[0]
+    print(f" Number of classes: {num_classes}") 
+    # num_classes = np.unique(y_train).shape[0]
+
+    # Ensure the correct number of classes (only for Code Jam dataset)
+    # num_classes= num_classes+1
+
+    num_classes = np.max(y_train) + 1
+    print(f"Adjusted number of classes: {num_classes}")
+
+    # Normalize and preprocess data
+    x_train = (x_train - X_min) / (X_max - X_min)
+    x_train = np.array(x_train)
+    y_train = np.array(y_train)
+
+    y_train = y_train - 1
+    print(f" After subtracting -1 from labels: {y_train}")
+    print(f" After subtracting -1 from labels: {np.unique(y_train).shape[0]}")
+
+    # Print the unique values in y_train to check the range of labels
+    print("Unique values in y_train:", np.unique(y_train))
+    
+    y_train = keras.utils.to_categorical(y_train, num_classes)
+
+    print(y_train)
+
+    # PCA transformation
+    ipca = IncrementalPCA(n_components=300)
+    ipca.fit(x_train)
+    x_train = ipca.transform(x_train)
+   
+    # Handle validation data similarly
+    val_tuple = None
+    if x_val is not None:
+        x_val = (x_val - X_min) / (X_max - X_min)
+        x_val = np.array(x_val)
+        y_val = np.array(y_val)
+        y_val = y_val - 1
+        y_val = keras.utils.to_categorical(y_val, num_classes)
+        x_val = ipca.transform(x_val)
+        val_tuple = (x_val, y_val)
+        # print(x_val[0])
+
+    # Setup model and training parameters
+    batch_size = options.batch_size
+    epochs = options.epochs
+
+    # from keras.callbacks import Callback
+
+    # class SaveEveryNepochs(Callback):
+    #     def __init__(self, filepath, save_every=100):
+    #         super(SaveEveryNepochs, self).__init__()
+    #         self.filepath = filepath
+    #         self.save_every = save_every
+
+    #     def on_epoch_end(self, epoch, logs=None):
+    #         if (epoch + 1) % self.save_every == 0:  # Save every N epochs
+    #             self.model.save_weights(self.filepath.format(epoch=epoch+1))
+
+    # # Use the custom callback
+    # mc = SaveEveryNepochs(filepath='weights{epoch:08d}.h5', save_every=100)
+    
+    # mc = keras.callbacks.ModelCheckpoint('weights{epoch:08d}.h5', save_weights_only=False, period=10)
+    mc = keras.callbacks.ModelCheckpoint(filepath='/home/cs24mtech02001/Aayush-IR2Vec/program-classification-model-weights/ir2vec/fa/codejam-ir2vec-fa-model/weights{epoch:08d}.h5', save_weights_only=False, save_freq='epoch', period=500)
+    # mc = keras.callbacks.ModelCheckpoint('weights{epoch:08d}.keras', save_weights_only=False, save_freq=10)
+
+    if model is None:
+        model = getModel(x_train.shape[1], num_classes)
+    
+    model.fit(x_train,
+              y_train,
+              batch_size=batch_size,
+              epochs=epochs,
+              verbose=1,
+              validation_data=val_tuple, callbacks=[mc])
+    
+    # model.save("codejam-combined-last-model.h5")
+    model.save("codejam-137-ir2vec-fa-model.h5")
+    print("Saved model to disk")
+
+    if x_test is not None:
+        x_test = (x_test - X_min) / (X_max - X_min)
+        x_test = np.array(x_test)
+        y_test = np.array(y_test)
+        y_test = y_test - 1
+        y_test = keras.utils.to_categorical(y_test, num_classes)
+        x_test = ipca.transform(x_test)
+        score = model.evaluate(x_test, y_test, verbose=0)
+        print('Test Accuracy : {acc:.3f}%'.format(acc=score[1]*100))
+    
+    with open('dictionary.pkl', 'wb') as f:
+        pickle.dump(num_classes, f)
+        pickle.dump(X_min, f)
+        pickle.dump(X_max, f)
+        pickle.dump(ipca, f)
+
+
+# test the learnt model on the data
+def test(X, targetLabel, model):
+    with open('dictionary.pkl', 'rb') as f:
+        num_classes = pickle.load(f)
+        X_min = pickle.load(f)
+        X_max = pickle.load(f)
+        ipca=pickle.load(f)
+    
+    X = (X - X_min) / (X_max - X_min)
+    X = np.array(X)
+    targetLabel = np.array(targetLabel)
+    targetLabel = targetLabel - 1
+    targetLabel = keras.utils.to_categorical(targetLabel, num_classes)  
+    X = ipca.transform(X)
+    
+    score = model.evaluate(X, targetLabel, verbose=0)
+    print('Test accuracy : {acc:.3f}%'.format(acc=score[1]*100))
+
+# Entry Point of the program
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-tr', '--train', dest='train', metavar='FILE', help='Path Of the Data/embedding file having training data', default=None)
+    parser.add_argument('-t', '--test', dest='test', metavar='FILE', help='Path Of the Data/embedding file having testing data', default=None)
+    parser.add_argument('-v', '--val', dest='val', metavar='FILE', help='Path Of the Data/embedding file having validation data', default=None)
+    
+    
+    # parser.add_argument('-t', '--test', dest='test', action="store_true")
+    parser.add_argument('-e', '--epochs', dest='epochs', required=False, type=int, help='Number of Epoches', default=100)
+    parser.add_argument('-bs', '--batch_size', dest='batch_size', required=False, type=int, help='Tune the batch size', default=32)
+    parser.add_argument('-m', '--model', dest='model', metavar='FILE', help='Path Of the file with learnt weights.', required=False, default=None) 
+    args = parser.parse_args()
+    
+    # trained/Learnt model is required for the testing phase.
+    if args.test is None and  args.train is None:
+        print("Enter training or testing data")
+        exit()
+     
+    X_test = None
+    y_test = None
+    # if args.test is not None:
+    #     X_test = pd.read_csv(args.test, sep='\t', header=None)
+    #     y_test = X_test.loc[:,0]
+    #     X_test =  X_test.loc[:,1:]
+    #     X_test.columns = range(X_test.shape[1])
+
+    # if args.train is not None:
+    #     X = pd.read_csv(args.train, sep='\t',header=None)
+    #     Y = X.loc[:,0]
+    #     X = X.loc[:,1:]
+    #     X.columns = range(X.shape[1])
+
+    #     X_val = None
+    #     y_val = None
+    #     if args.val is not None:
+    #         X_val = pd.read_csv(args.val, sep='\t', header=None)
+    #         y_val = X_val.loc[:,0]
+    #         X_val = X_val.loc[:,1:]
+    #         X_val.columns =range(X_val.shape[1])
+        
+    #     model = None 
+    #     if args.model is not None:
+    #         print('============================The trained weight to initialize the NN=========================================')
+    #         model = keras.models.load_model(args.model)
+    #         model.summary()
+
+    #     train(X, Y, X_test, y_test,X_val, y_val, args, model)
+    # 
+    # if args.test is not None:
+    X_test = pd.read_csv(args.test, sep='\t', header=None)
+    y_test = X_test.loc[:,0]
+    X_test = X_test.loc[:,1:]
+    X_test.columns = range(X_test.shape[1])
+    
+    print("Test Set:")
+    print(f"X_test shape: {X_test.shape}")
+    print(f"y_test unique counts: \n{y_test.value_counts()}")
+
+    if args.train is not None:
+        X = pd.read_csv(args.train, sep='\t', header=None)
+        Y = X.loc[:,0]
+        X = X.loc[:,1:]
+        X.columns = range(X.shape[1])
+        
+        print("Train Set:")
+        print(f"X_train shape: {X.shape}")
+        print(f"y_train unique counts: \n{Y.value_counts()}")
+        
+        X_val = None
+        y_val = None
+        if args.val is not None:
+            X_val = pd.read_csv(args.val, sep='\t', header=None)
+            y_val = X_val.loc[:,0]
+            X_val = X_val.loc[:,1:]
+            X_val.columns = range(X_val.shape[1])
+            
+            print("Validation Set:")
+            print(f"X_val shape: {X_val.shape}")
+            print(f"y_val unique counts: \n{y_val.value_counts()}")
+        
+        model = None 
+        if args.model is not None:
+            print('============================The trained weight to initialize the NN=========================================')
+            model = keras.models.load_model(args.model)
+            model.summary()
+
+        train(X, Y, X_test, y_test, X_val, y_val, args, model)
+ 
+    elif args.test is not None:
+        
+        if args.model is None:
+            print('***********************Model is not passed in the testing**************')
+            exit()
+
+        model = keras.models.load_model(args.model) 
+        model.summary()
+        
+        test(X_test, y_test, model)
\ No newline at end of file
diff --git a/scripts/models/milepost-mlp-model.py b/scripts/models/milepost-mlp-model.py
new file mode 100644
index 0000000..8f69ab0
--- /dev/null
+++ b/scripts/models/milepost-mlp-model.py
@@ -0,0 +1,159 @@
+import os
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
+
+# Import TensorFlow Keras
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import optimizers
+from tensorflow.keras.utils import to_categorical
+from tensorflow.keras.layers import (Activation, Dense, Dropout, BatchNormalization)
+from tensorflow.keras.models import Sequential
+
+# Model definition
+def getModel(input_dim, output_dim):
+    model = Sequential()
+
+    model.add(Dense(650, input_shape=(input_dim,), kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Dropout(0.25))
+
+    model.add(Dense(600, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Dropout(0.25))
+
+    model.add(Dense(500, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation('relu'))
+    model.add(Dropout(0.25))
+
+    model.add(Dense(output_dim, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation('softmax'))
+
+    opt = keras.optimizers.Adam(learning_rate=0.001)
+    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
+    model.summary()
+
+    return model
+
+# Load data from directory
+# def load_data_from_directory(directory):
+#     data = []
+#     labels = []
+#     classes = sorted(os.listdir(directory))  # Ensure consistent label mapping
+#     class_to_label = {cls: idx for idx, cls in enumerate(classes)}
+
+#     for cls in classes:
+#         class_path = os.path.join(directory, cls)
+#         if os.path.isdir(class_path):
+#             for file_name in os.listdir(class_path):
+#                 if file_name.endswith(".npz"):
+#                     file_path = os.path.join(class_path, file_name)
+#                     try:
+#                         loaded = np.load(file_path)["values"]
+#                         data.append(loaded.flatten())
+#                         labels.append(class_to_label[cls])
+#                     except Exception as e:
+#                         print(f"Failed to load {file_path}: {e}")
+
+#     return np.array(data), np.array(labels)
+
+def load_data_from_directory(directory, max_length=56):
+    data = []
+    labels = []
+    classes = sorted(os.listdir(directory))  # Ensure consistent label mapping
+    class_to_label = {cls: idx for idx, cls in enumerate(classes)}
+
+    for cls in classes:
+        class_path = os.path.join(directory, cls)
+        if os.path.isdir(class_path):
+            for file_name in os.listdir(class_path):
+                if file_name.endswith(".npz"):
+                    file_path = os.path.join(class_path, file_name)
+                    try:
+                        loaded = np.load(file_path)["values"]
+                        flattened = loaded.flatten()
+                        
+                        # Handle sequences with length 0
+                        if len(flattened) == 0:
+                            print(f"Replacing zero-length data in {file_path}")
+                            flattened = np.zeros(max_length)
+
+                        # Pad or truncate the sequence to max_length
+                        if len(flattened) < max_length:
+                            padded = np.pad(flattened, (0, max_length - len(flattened)), 'constant')
+                        else:
+                            padded = flattened[:max_length]
+
+                        data.append(padded)
+                        labels.append(class_to_label[cls])
+                    except Exception as e:
+                        print(f"Failed to load {file_path}: {e}")
+
+    return np.array(data), np.array(labels)
+
+# Prepare train and test data
+def prepare_data(train_dir, test_dir):
+    X_train, y_train = load_data_from_directory(train_dir)
+    X_test, y_test = load_data_from_directory(test_dir)
+
+    return X_train, y_train, X_test, y_test
+
+# Main function
+def main():
+    # Paths to the train and test directories
+    train_dir = "/home/cs24mtech02001/Program-Classification/yali/Volume/Embeddings/milepost/codejam-trainO0"
+    test_dir = "/home/cs24mtech02001/Program-Classification/yali/Volume/Embeddings/milepost/codejam-testO0"
+
+    # Prepare data
+    X_train, y_train, X_test, y_test = prepare_data(train_dir, test_dir)
+
+    # Check data shapes
+    print(f"Training data shape: {X_train.shape}")
+    print(f"Training labels shape: {y_train.shape}")
+    print(f"Testing data shape: {X_test.shape}")
+    print(f"Testing labels shape: {y_test.shape}")
+
+    # One-hot encode labels
+    num_classes = len(np.unique(y_train))
+    y_train = to_categorical(y_train, num_classes)
+    y_test = to_categorical(y_test, num_classes)
+
+    # No train-test split for validation, using all X_train and y_train for training
+    model = getModel(X_train.shape[1], num_classes)
+
+    mc = keras.callbacks.ModelCheckpoint(
+    filepath='/Pramana/IR2Vec/pc-embeddings-model-weight/codejam-milepost/weights_epoch_{epoch:08d}.weights.h5', 
+    save_weights_only=True, 
+    save_freq=500)
+
+    # Train the model
+    model.fit(X_train,
+              y_train,
+              batch_size=128,
+              epochs=2000,
+              verbose=1, 
+              callbacks=[mc])
+
+    # Evaluate model
+    y_pred = np.argmax(model.predict(X_test), axis=1)
+    y_true = np.argmax(y_test, axis=1)
+    print("Classification Report:")
+    print(classification_report(y_true, y_pred))
+    print("Confusion Matrix:")
+    print(confusion_matrix(y_true, y_pred))
+    print(f"Accuracy: {accuracy_score(y_true, y_pred):.4f}")
+
+    # Save the trained model
+    model.save("milepost-ir2vec-codejam-model.h5")
+    print("Saved model to disk as 'milepost-ir2vec-codejam-model.keras'.")
+
+    return model
+
+# Execute the script
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/scripts/preprocessing/copy-folders.py b/scripts/preprocessing/copy-folders.py
new file mode 100644
index 0000000..47dc9a5
--- /dev/null
+++ b/scripts/preprocessing/copy-folders.py
@@ -0,0 +1,20 @@
+import os
+import shutil
+
+# Define the source and destination directories
+source_dir = "/home/cs24mtech02001/Aayush-IR2Vec/CodeJam-data/srcfiles"
+destination_dir = "/home/cs24mtech02001/Aayush-IR2Vec/CodeJam-data/data"
+
+# Ensure the destination directory exists
+os.makedirs(destination_dir, exist_ok=True)
+
+# Iterate over all items in the source directory
+for folder in os.listdir(source_dir):
+    folder_path = os.path.join(source_dir, folder)
+    if os.path.isdir(folder_path):  # Check if it's a folder
+        # Define the destination path for the folder
+        dest_folder_path = os.path.join(destination_dir, folder)
+        # Copy the folder to the destination directory
+        shutil.copytree(folder_path, dest_folder_path, dirs_exist_ok=True)
+
+print("All folders copied successfully to", destination_dir)
\ No newline at end of file
diff --git a/scripts/preprocessing/copy-profiled-ll-files.py b/scripts/preprocessing/copy-profiled-ll-files.py
new file mode 100644
index 0000000..cf8d9a5
--- /dev/null
+++ b/scripts/preprocessing/copy-profiled-ll-files.py
@@ -0,0 +1,44 @@
+import os
+import shutil
+
+def copy_profiled_ll_files(source_top_level, target_top_level):
+    """
+    Copies all the files from `profiled-ll-files` directories under the source folder
+    to corresponding subdirectories in the target folder, preserving parent directory structure.
+
+    :param source_top_level: The source top-level directory containing subdirectories.
+    :param target_top_level: The target top-level directory where subdirectories and files will be copied.
+    """
+    # Ensure the target top-level directory exists
+    os.makedirs(target_top_level, exist_ok=True)
+
+    # Walk through each subdirectory in the source top-level directory
+    for root, dirs, files in os.walk(source_top_level):
+        if 'profiled-ll-files' in dirs:
+            # Extract the parent directory name
+            parent_dir_name = os.path.basename(root)
+
+            # Define source and target paths for profiled-ll-files
+            source_profiled_path = os.path.join(root, 'profiled-ll-files')
+            target_subdir_path = os.path.join(target_top_level, parent_dir_name)
+
+            # Create the corresponding target subdirectory
+            os.makedirs(target_subdir_path, exist_ok=True)
+
+            # Copy all files from the source profiled-ll-files to the target subdirectory
+            for file_name in os.listdir(source_profiled_path):
+                source_file = os.path.join(source_profiled_path, file_name)
+                target_file = os.path.join(target_subdir_path, file_name)
+
+                if os.path.isfile(source_file):
+                    shutil.copy2(source_file, target_file)
+
+if __name__ == "__main__":
+    # Replace with your source and target top-level paths
+    source_top_level = "/Pramana/IR2Vec/cofo"
+    target_top_level = "/Pramana/IR2Vec/COFO-profiled-ll-files-17.x"
+
+    copy_profiled_ll_files(source_top_level, target_top_level)
+    print(f"Files copied successfully to {target_top_level}")
+
+# /Pramana/IR2Vec/Aayush-IR2Vec-Brahmaputra
\ No newline at end of file
diff --git a/scripts/preprocessing/delete-subfolders.py b/scripts/preprocessing/delete-subfolders.py
new file mode 100644
index 0000000..e7518d1
--- /dev/null
+++ b/scripts/preprocessing/delete-subfolders.py
@@ -0,0 +1,32 @@
+import os
+import shutil
+
+def delete_small_subfolders(top_level_dir, file_threshold=200):
+    if not os.path.exists(top_level_dir):
+        print(f"The provided directory '{top_level_dir}' does not exist.")
+        return
+
+    # Iterate through all subfolders in the top-level directory
+    for subfolder in os.listdir(top_level_dir):
+        subfolder_path = os.path.join(top_level_dir, subfolder)
+        
+        # Skip if it's not a directory
+        if not os.path.isdir(subfolder_path):
+            continue
+
+        # Count the number of files in the subfolder
+        file_count = sum([1 for f in os.listdir(subfolder_path) if os.path.isfile(os.path.join(subfolder_path, f))])
+
+        # If file count is less than the threshold, delete the subfolder
+        if file_count < file_threshold:
+            try:
+                shutil.rmtree(subfolder_path)
+                print(f"Deleted '{subfolder}' as it contains fewer than {file_threshold} files.")
+            except Exception as e:
+                print(f"Failed to delete '{subfolder}': {e}")
+
+if __name__ == "__main__":
+    # Provide the path to the top-level directory
+    top_level_dir = "/path/to/top_level_directory"  # Replace with your directory path
+    file_threshold=200
+    delete_small_subfolders(top_level_dir, file_threshold)
\ No newline at end of file
diff --git a/scripts/preprocessing/folder-count.py b/scripts/preprocessing/folder-count.py
new file mode 100644
index 0000000..ee1a662
--- /dev/null
+++ b/scripts/preprocessing/folder-count.py
@@ -0,0 +1,40 @@
+import os
+import matplotlib.pyplot as plt
+
+# Define the base path where the folders are located
+base_path = '/Pramana/IR2Vec/Program-Classification/datasets-profiled-llvm-17.x/poj-104-profiled-ll-files'
+
+# Initialize lists to store folder names, file counts, and empty folders
+folder_names = []
+file_counts = []
+empty_folders = []
+
+# Dynamically list all folder names in the base path
+for folder in os.listdir(base_path):
+    folder_path = os.path.join(base_path, folder)
+    if os.path.isdir(folder_path):  # Check if it is a folder
+        num_files = len([f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))])
+        folder_names.append(folder)
+        file_counts.append(num_files)
+        if num_files == 0:
+            empty_folders.append(folder)
+
+# Sort folders and file counts by folder name (numerically)
+folder_names, file_counts = zip(*sorted(zip(folder_names, file_counts), key=lambda x: int(x[0])))
+
+# Print folders and their file counts
+print("Folders and their file counts:")
+for folder, count in zip(folder_names, file_counts):
+    print(f"Folder name: {folder:>5}, No. of files: {count:>5}")
+
+# Print empty folders
+if empty_folders:
+    print("\nFolders with zero files:")
+    for folder in sorted(empty_folders, key=int):
+        print(f"Folder name: {folder}")
+else:
+    print("\nNo folders with zero files.")
+
+# Calculate the total number of folders
+total_folders = len(folder_names)
+print(f"\nTotal number of folders: {total_folders}")
\ No newline at end of file
diff --git a/scripts/preprocessing/folder-preprocessing.py b/scripts/preprocessing/folder-preprocessing.py
new file mode 100644
index 0000000..491d3f2
--- /dev/null
+++ b/scripts/preprocessing/folder-preprocessing.py
@@ -0,0 +1,50 @@
+import os
+import shutil
+
+# Define the base path where the folders are located
+base_path = '/Pramana/IR2Vec/codeforces-profiled-ll-files-llvm17'
+
+# Initialize lists to store folder names, file counts, and folders to delete
+folder_names = []
+file_counts = []
+folders_to_delete = []
+
+# Dynamically list all folder names in the base path
+for folder in os.listdir(base_path):
+    folder_path = os.path.join(base_path, folder)
+    if os.path.isdir(folder_path):  # Check if it is a folder
+        num_files = len([f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))])
+        folder_names.append(folder)
+        file_counts.append(num_files)
+        # Mark folders with less than 20 files or 0 files for deletion
+        # if num_files == 0 or num_files < 20:
+        #     folders_to_delete.append(folder_path)
+        if num_files == 0:
+            folders_to_delete.append(folder_path)
+        
+# Delete the marked folders
+for folder_path in folders_to_delete:
+    try:
+        shutil.rmtree(folder_path)  # Delete the folder and all its contents
+        print(f"Deleted folder: {folder_path}")
+    except Exception as e:
+        print(f"Error deleting folder {folder_path}: {e}")
+
+# Recheck remaining folders
+folder_names = []
+file_counts = []
+
+for folder in os.listdir(base_path):
+    folder_path = os.path.join(base_path, folder)
+    if os.path.isdir(folder_path):  # Check if it is a folder
+        num_files = len([f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))])
+        folder_names.append(folder)
+        file_counts.append(num_files)
+
+# Sort folders and file counts by folder name (numerically)
+folder_names, file_counts = zip(*sorted(zip(folder_names, file_counts), key=lambda x: int(x[0])))
+
+# Print the remaining folders and their file counts
+print("\nRemaining folders and their file counts:")
+for folder, count in zip(folder_names, file_counts):
+    print(f"Folder name: {folder:>5}, No. of files: {count:>5}")
\ No newline at end of file
diff --git a/scripts/preprocessing/generate-ir2vec-embeddings.py b/scripts/preprocessing/generate-ir2vec-embeddings.py
new file mode 100644
index 0000000..72c6f72
--- /dev/null
+++ b/scripts/preprocessing/generate-ir2vec-embeddings.py
@@ -0,0 +1,84 @@
+import os
+import ir2vec
+import concurrent.futures
+import gc
+
+def process_file(file_path, folder_name, encoding_type="sym", level="p", dim=300):
+    """
+    Processes a single .ll file to generate its embedding.
+    """
+    try:
+        # Initialize IR2Vec embedding
+        initObj = ir2vec.initEmbedding(file_path, encoding_type, level, dim)
+        
+        # Get the program-level vector representation
+        progVector = initObj.getProgramVector()
+        
+        # Prepare the output line: `label<\t>embedding_values`
+        output_line = f"{folder_name}\t" + "\t".join(map(str, progVector)) + "\n"
+        
+        # Explicitly clean up the embedding object to free memory
+        del initObj
+        return output_line
+    except Exception as e:
+        print(f"Error processing file {file_path}: {e}")
+        return None
+
+def process_folder_parallel(folder_path, folder_name, encoding_type="sym", level="p", dim=300):
+    """
+    Processes all .ll files in a folder in parallel and returns the embeddings as lines.
+    """
+    lines = []
+    file_paths = [
+        os.path.join(folder_path, filename)
+        for filename in os.listdir(folder_path)
+        if filename.endswith(".ll")
+    ]
+    
+    # Process files in parallel using ThreadPoolExecutor or ProcessPoolExecutor
+    with concurrent.futures.ProcessPoolExecutor(max_workers=30) as executor:
+        futures = [
+            executor.submit(process_file, file_path, folder_name, encoding_type, level, dim)
+            for file_path in file_paths
+        ]
+        for future in concurrent.futures.as_completed(futures):
+            result = future.result()
+            if result:
+                lines.append(result)
+    
+    return lines
+
+def generate_embeddings(input_folder, output_txt_path, encoding_type="sym", level="p", dim=300):
+    """
+    Iterates over all folders to generate embeddings for .ll files, processing each folder one at a time.
+    """
+    with open(output_txt_path, 'w') as output_file:
+        # Iterate over all folders
+        for i in range(1, 343):
+            folder_name = str(i)
+            folder_path = os.path.join(input_folder, folder_name)
+            
+            # Check if the folder exists
+            if os.path.isdir(folder_path):
+                print(f"Processing folder {folder_name}...")
+                
+                # Process all files in the folder in parallel
+                lines = process_folder_parallel(folder_path, folder_name, encoding_type, level, dim)
+                
+                # Write results to the output file
+                output_file.writelines(lines)
+                
+                # Force garbage collection to free memory after processing a folder
+                gc.collect()
+
+    print(f"Embeddings for all files saved to {output_txt_path}.")
+
+# Specify the input folder and output text file path
+input_folder = "/home/cs24mtech02001/Aayush-IR2Vec/datasets-17.x/codeforces/test"
+output_txt_path = "/home/cs24mtech02001/Aayush-IR2Vec/datasets-17.x/codeforces/sym/codeforces-sym-test.txt"
+encoding_type="sym"
+level="p"
+dim=300
+
+# Generate embeddings for all .ll files across all folders and save them in the text file
+generate_embeddings(input_folder, output_txt_path)
\ No newline at end of file
diff --git a/scripts/preprocessing/ir2vec-preprocess.py b/scripts/preprocessing/ir2vec-preprocess.py
new file mode 100644
index 0000000..ed67023
--- /dev/null
+++ b/scripts/preprocessing/ir2vec-preprocess.py
@@ -0,0 +1,132 @@
+# IITH-Compilers - Rohit Aggarwal, VenkataKeerthy
+# 
+# Usage Instructions
+# python preprocess.py [options]
+# --data: Path of the data file
+#
+# Structure of the Input data
+# label<\t>vector_dim1<\t>vector_dim2<\t>.......<\t>vector_dimN
+#
+# For spliting the data:
+# python preprocess.py --data <PATH of the data file> 
+# 
+#------------------------------------------------------------------------------------------#
+import argparse
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+import os
+
+# Load the data file
+def load_data(filepath):
+    lines = [line.strip('\n\t') for line in open(filepath)]
+    entity = []
+    rep = []
+    targetLabel = []
+    flag = 0
+    for line in lines:
+        if flag == 0:
+            flag = 1
+            continue
+        else:
+            r = line.split('\t')
+            targetLabel.append(int(r[0]))
+            res = r[1:]
+            res_double = [float(val) for val in res]
+            rep.append(res_double)
+    
+    X = pd.DataFrame(rep)
+
+    return X, targetLabel
+
+# Save the data to the file
+def saveToFile(X,Y,filepath):
+    X = pd.DataFrame(X)
+    Y = pd.DataFrame(Y)
+    temp = pd.concat([Y, X], axis=1)
+    temp.columns = range(temp.shape[1])
+    temp.to_csv(filepath,header=None,index=False,sep='\t')
+
+# def splitData(X, Y, args):
+#     from collections import Counter  
+#     X = np.array(X)
+#     Y = np.array(Y)
+    
+#     # Check if stratified splitting is feasible
+#     class_counts = Counter(Y)
+#     min_class_count = min(class_counts.values())
+#     if min_class_count < 2:
+#         print(f"Warning: Some classes have fewer than 2 samples. Skipping stratified splitting.")
+#         stratify = None
+#     else:
+#         stratify = Y
+
+#     x_train, x_test, y_train, y_test = train_test_split(
+#         X, Y, train_size=0.6, test_size=0.4, random_state=123, stratify=stratify)
+
+#     x_test, x_val, y_test, y_val = train_test_split(
+#         x_test, y_test, train_size=0.5, test_size=0.5, random_state=123, stratify=y_test if stratify is not None else None)
+
+#     dirname = os.path.basename(args.data).replace(".txt", "")
+#     if not os.path.exists(dirname):
+#         os.makedirs(dirname)
+
+#     train_file_path = os.path.join(dirname, "training.csv")
+#     saveToFile(x_train, y_train, train_file_path)
+#     print(f'Training data created =====> {train_file_path}.')
+    
+#     test_file_path = os.path.join(dirname, "testing.csv")
+#     saveToFile(x_test, y_test, test_file_path)
+#     print(f'Testing data created =====> {test_file_path}.')
+    
+#     val_file_path = os.path.join(dirname, "val.csv")
+#     saveToFile(x_val, y_val, val_file_path)
+#     print(f'Validation data created =====> {val_file_path}.')
+
+
+# Split the data into train, test and val
+def splitData(X, Y, args):
+    X = np.array(X)
+    Y = np.array(Y)
+    x_train, x_test, y_train, y_test = train_test_split(X,
+                                                        Y,
+                                                        train_size=0.6,
+                                                        test_size=0.4,
+                                                        random_state=123,
+                                                        stratify=Y)
+    
+    
+    x_test, x_val, y_test, y_val = train_test_split(x_test,
+                                                    y_test,
+                                                    train_size=0.5,
+                                                    test_size=0.5,
+                                                    random_state=123,
+                                                    stratify=y_test)
+    
+    dirname = os.path.basename(args.data).replace(".txt","")
+    if not os.path.exists(dirname):
+        os.makedirs(dirname)
+    train_file_path=os.path.join(dirname,"training.csv")
+    saveToFile(x_train,y_train,train_file_path)
+    print('Training data created =====> {}.'.format(train_file_path))
+    
+    test_file_path= os.path.join(dirname, "testing.csv")
+    saveToFile(x_test,y_test,test_file_path)
+    print('Testing data created =====> {}.'.format(test_file_path))
+    
+    val_file_path= os.path.join(dirname, "val.csv")
+    saveToFile(x_val,y_val,val_file_path)
+    print('validation data created =====> {}.'.format(val_file_path))
+
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-d', '--data', dest='data', metavar='FILE', help='Path Of the Data/embedding file', required=True)
+    args = parser.parse_args()
+    
+     
+    X,Y = load_data(args.data)
+    print('Data loaded. Start the Splitting of the data....')
+    print(f"Loaded data: X.shape={len(X)}, Y.shape={len(Y)}")
+    splitData(X,Y, args)
\ No newline at end of file
diff --git a/scripts/preprocessing/merge-directories.py b/scripts/preprocessing/merge-directories.py
new file mode 100644
index 0000000..c80d6c3
--- /dev/null
+++ b/scripts/preprocessing/merge-directories.py
@@ -0,0 +1,46 @@
+import os
+import shutil
+
+def merge_directories(source_dirs, destination_dir):
+    """
+    Merges files from multiple source directories into the destination directory,
+    preserving the subdirectory structure and ensuring unique filenames.
+
+    Parameters:
+    - source_dirs: List of source directories to merge
+    - destination_dir: Path to the destination directory
+    """
+    # Create the destination directory if it doesn't exist
+    if not os.path.exists(destination_dir):
+        os.makedirs(destination_dir)
+
+    for source_dir in source_dirs:
+        # Iterate through all subdirectories in the source directory
+        for subdir in os.listdir(source_dir):
+            source_subdir = os.path.join(source_dir, subdir)
+            destination_subdir = os.path.join(destination_dir, subdir)
+
+            # Ensure the destination subdirectory exists
+            if not os.path.exists(destination_subdir):
+                os.makedirs(destination_subdir)
+
+            # Copy files from the source subdirectory
+            if os.path.isdir(source_subdir):
+                for filename in os.listdir(source_subdir):
+                    src_file = os.path.join(source_subdir, filename)
+                    if os.path.isfile(src_file):
+                        # Append the source directory name to the filename for uniqueness
+                        unique_filename = f"{os.path.basename(source_dir)}_{filename}"
+                        dst_file = os.path.join(destination_subdir, unique_filename)
+                        shutil.copy(src_file, dst_file)
+
+# Usage example
+source_directories = [
+    '/Pramana/IR2Vec/datasets/CodeJam-data/code-jam-00-ll-files',
+    '/Pramana/IR2Vec/datasets/CodeJam-data/code-jam-01-ll-files',
+    '/Pramana/IR2Vec/datasets/CodeJam-data/code-jam-02-ll-files',
+    '/Pramana/IR2Vec/datasets/CodeJam-data/code-jam-03-ll-files'
+]
+destination_directory = '/Pramana/IR2Vec/datasets/CodeJam-data/llvm17-ll-files'
+
+merge_directories(source_directories, destination_directory)
\ No newline at end of file
diff --git a/scripts/preprocessing/rename-folders.py b/scripts/preprocessing/rename-folders.py
new file mode 100644
index 0000000..bb8c756
--- /dev/null
+++ b/scripts/preprocessing/rename-folders.py
@@ -0,0 +1,39 @@
+import os
+
+def rename_folders_sequentially(directory_path):
+    try:
+        # Get a list of all subfolders in the directory
+        subfolders = [folder for folder in os.listdir(directory_path) if os.path.isdir(os.path.join(directory_path, folder))]
+        
+        # Print the total count of subfolders
+        total_folders = len(subfolders)
+        print(f"Total number of subfolders: {total_folders}")
+        
+        # Sort the subfolders alphabetically
+        subfolders.sort()
+        
+        # Step 1: Rename all folders to temporary names to avoid conflicts
+        temp_names = {}
+        for index, folder in enumerate(subfolders, start=1):
+            old_path = os.path.join(directory_path, folder)
+            temp_name = f"temp_{index}"
+            temp_path = os.path.join(directory_path, temp_name)
+            os.rename(old_path, temp_path)
+            temp_names[temp_name] = folder  # Track the original name
+            
+        # Step 2: Rename temporary names to sequential numbers
+        for index, temp_name in enumerate(temp_names.keys(), start=1):
+            temp_path = os.path.join(directory_path, temp_name)
+            new_folder_name = str(index)
+            new_path = os.path.join(directory_path, new_folder_name)
+            os.rename(temp_path, new_path)
+            print(f"Renamed: {temp_names[temp_name]} -> {new_folder_name}")
+        
+        print("Renaming completed successfully.")
+    
+    except Exception as e:
+        print(f"An error occurred: {e}")
+
+# Replace 'your_directory_path' with the actual path to the top-level directory
+directory_path = "/Pramana/IR2Vec/yali/codeforces/test"
+rename_folders_sequentially(directory_path)
\ No newline at end of file
diff --git a/scripts/preprocessing/split-dataset.py b/scripts/preprocessing/split-dataset.py
new file mode 100644
index 0000000..0bc053d
--- /dev/null
+++ b/scripts/preprocessing/split-dataset.py
@@ -0,0 +1,206 @@
+# import os
+# import shutil
+# from sklearn.model_selection import train_test_split
+
+# def split_dataset(source_dir, train_dir, test_dir, test_size=0.4):
+#     """
+#     Splits a dataset into training and testing sets.
+    
+#     :param source_dir: Path to the source directory containing class subfolders.
+#     :param train_dir: Path to the training directory to be created.
+#     :param test_dir: Path to the testing directory to be created.
+#     :param test_size: Proportion of the dataset to include in the test split.
+#     """
+#     # Ensure the output directories are empty
+#     if os.path.exists(train_dir):
+#         shutil.rmtree(train_dir)
+#     if os.path.exists(test_dir):
+#         shutil.rmtree(test_dir)
+#     os.makedirs(train_dir)
+#     os.makedirs(test_dir)
+
+#     # Iterate through each class directory
+#     for class_name in os.listdir(source_dir):
+#         class_path = os.path.join(source_dir, class_name)
+#         if os.path.isdir(class_path):
+#             # Collect all files in the class directory
+#             files = [os.path.join(class_path, file) for file in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, file))]
+            
+#             # Split files into training and testing sets
+#             train_files, test_files = train_test_split(files, test_size=test_size, random_state=42)
+            
+#             # Create class subdirectories in train and test directories
+#             train_class_dir = os.path.join(train_dir, class_name)
+#             test_class_dir = os.path.join(test_dir, class_name)
+#             os.makedirs(train_class_dir, exist_ok=True)
+#             os.makedirs(test_class_dir, exist_ok=True)
+            
+#             # Copy files to respective directories
+#             for file in train_files:
+#                 shutil.copy(file, train_class_dir)
+#             for file in test_files:
+#                 shutil.copy(file, test_class_dir)
+    
+#     print(f"Dataset split completed. Training data in '{train_dir}', testing data in '{test_dir}'.")
+
+# def split_dataset(source_directory, train_directory, test_directory, test_size=0.4):
+#     # Iterate over subdirectories (classes) in the source directory
+#     for sub_dir in os.listdir(source_directory):
+#         sub_dir_path = os.path.join(source_directory, sub_dir)
+
+#         # Ensure it is a directory
+#         if not os.path.isdir(sub_dir_path):
+#             continue
+
+#         # Get the list of files in the subdirectory
+#         files = [f for f in os.listdir(sub_dir_path) if os.path.isfile(os.path.join(sub_dir_path, f))]
+#         num_files = len(files)
+
+#         # Skip subdirectories with no files
+#         if num_files == 0:
+#             print(f"Skipping empty subdirectory: {sub_dir_path}")
+#             continue
+
+#         print(f"Processing subdirectory: {sub_dir_path}")
+
+#         # Handle case with only one file
+#         if num_files == 1:
+#             print(f"Only one file found in {sub_dir_path}. Copying the file to both train and test directories.")
+#             train_sub_dir = os.path.join(train_directory, sub_dir)
+#             test_sub_dir = os.path.join(test_directory, sub_dir)
+
+#             os.makedirs(train_sub_dir, exist_ok=True)
+#             os.makedirs(test_sub_dir, exist_ok=True)
+
+#             file = files[0]
+#             shutil.copy(os.path.join(sub_dir_path, file), os.path.join(train_sub_dir, file))
+#             shutil.copy(os.path.join(sub_dir_path, file), os.path.join(test_sub_dir, file))
+#             print(f"Copied {file} to both train and test directories.")
+#             continue
+
+#         # Handle case with more than one file
+#         train_files, test_files = train_test_split(files, test_size=test_size, random_state=42)
+
+#         # Create destination subdirectories if they don't exist
+#         train_sub_dir = os.path.join(train_directory, sub_dir)
+#         test_sub_dir = os.path.join(test_directory, sub_dir)
+
+#         os.makedirs(train_sub_dir, exist_ok=True)
+#         os.makedirs(test_sub_dir, exist_ok=True)
+
+#         # Move files to the respective directories
+#         for file in train_files:
+#             shutil.move(os.path.join(sub_dir_path, file), os.path.join(train_sub_dir, file))
+
+#         for file in test_files:
+#             shutil.move(os.path.join(sub_dir_path, file), os.path.join(test_sub_dir, file))
+
+#         print(f"Dataset split completed for {sub_dir_path}. {len(train_files)} files in train, {len(test_files)} files in test.")
+
+# if __name__ == "__main__":
+#     # Hard-code the directories here
+#     # source_directory = "/path/to/source_directory"  # Replace with actual path
+#     # train_directory = "/path/to/train_directory"    # Replace with actual path
+#     # test_directory = "/path/to/test_directory"      # Replace with actual path
+
+#     # split_dataset(source_directory, train_directory, test_directory, test_size=0.4)
+#     source_directory = "/Pramana/IR2Vec/Program-Classification/datasets-profiled-llvm-17.x/codejam-profiled-ll-files"
+#     train_directory = "/Pramana/IR2Vec/train-test-split-datasets/codejam/train"
+#     test_directory = "/Pramana/IR2Vec/train-test-split-datasets/codejam/test"
+#     split_dataset(source_directory, train_directory, test_directory, test_size=0.4)
+
+# # # Example usage
+# # source_directory = "/Pramana/IR2Vec/Program-Classification/datasets-profiled-llvm-17.x/codejam-profiled-ll-files"
+# # train_directory = "/Pramana/IR2Vec/train-test-split-datasets/codejam/train"
+# # test_directory = "/Pramana/IR2Vec/train-test-split-datasets/codejam/test"
+# # split_dataset(source_directory, train_directory, test_directory, test_size=0.4)
+
+import os
+import shutil
+from sklearn.model_selection import train_test_split
+
+def split_dataset(source_directory, train_directory, test_directory, val_directory, train_ratio=0.6, test_ratio=0.2):
+    """
+    Splits the dataset in the source directory into train, test, and validation sets.
+    
+    Parameters:
+        source_directory: Path to the source directory containing class subdirectories.
+        train_directory: Path to the directory where the training set will be stored.
+        test_directory: Path to the directory where the test set will be stored.
+        val_directory: Path to the directory where the validation set will be stored.
+        train_ratio: Proportion of data to allocate to the training set.
+        test_ratio: Proportion of data to allocate to the test set.
+    """
+    for sub_dir in os.listdir(source_directory):
+        sub_dir_path = os.path.join(source_directory, sub_dir)
+
+        # Ensure it is a directory
+        if not os.path.isdir(sub_dir_path):
+            continue
+
+        # Get the list of files in the subdirectory
+        files = [f for f in os.listdir(sub_dir_path) if os.path.isfile(os.path.join(sub_dir_path, f))]
+        num_files = len(files)
+
+        # Skip subdirectories with fewer than 200 files
+        if num_files < 200:
+            print(f"Skipping subdirectory with less than 200 files: {sub_dir_path} ({num_files} files)")
+            continue
+
+        print(f"Processing subdirectory: {sub_dir_path}")
+
+        # # Handle case with only one file
+        # if num_files == 1:
+        #     print(f"Only one file found in {sub_dir_path}. Copying the file to train, test, and validation directories.")
+        #     train_sub_dir = os.path.join(train_directory, sub_dir)
+        #     test_sub_dir = os.path.join(test_directory, sub_dir)
+        #     val_sub_dir = os.path.join(val_directory, sub_dir)
+
+        #     os.makedirs(train_sub_dir, exist_ok=True)
+        #     os.makedirs(test_sub_dir, exist_ok=True)
+        #     os.makedirs(val_sub_dir, exist_ok=True)
+
+        #     file = files[0]
+        #     shutil.copy(os.path.join(sub_dir_path, file), os.path.join(train_sub_dir, file))
+        #     shutil.copy(os.path.join(sub_dir_path, file), os.path.join(test_sub_dir, file))
+        #     shutil.copy(os.path.join(sub_dir_path, file), os.path.join(val_sub_dir, file))
+        #     print(f"Copied {file} to train, test, and validation directories.")
+        #     continue
+
+        # Split files into train and temp (test + validation)
+        train_files, temp_files = train_test_split(files, test_size=(1 - train_ratio), random_state=42)
+
+        # Split temp into test and validation
+        test_files, val_files = train_test_split(temp_files, test_size=(test_ratio / (1 - train_ratio)), random_state=42)
+
+        # Create destination subdirectories if they don't exist
+        train_sub_dir = os.path.join(train_directory, sub_dir)
+        test_sub_dir = os.path.join(test_directory, sub_dir)
+        val_sub_dir = os.path.join(val_directory, sub_dir)
+
+        os.makedirs(train_sub_dir, exist_ok=True)
+        os.makedirs(test_sub_dir, exist_ok=True)
+        os.makedirs(val_sub_dir, exist_ok=True)
+
+        # Move files to the respective directories
+        for file in train_files:
+            shutil.copy(os.path.join(sub_dir_path, file), os.path.join(train_sub_dir, file))
+
+        for file in test_files:
+            shutil.copy(os.path.join(sub_dir_path, file), os.path.join(test_sub_dir, file))
+
+        for file in val_files:
+            shutil.copy(os.path.join(sub_dir_path, file), os.path.join(val_sub_dir, file))
+
+        print(f"Dataset split completed for {sub_dir_path}. "
+              f"{len(train_files)} files in train, {len(test_files)} files in test, {len(val_files)} files in validation.")
+
+if __name__ == "__main__":
+    # Hard-code the directories here
+    source_directory = "/Pramana/IR2Vec/dataset-opt-levels/codejam/O0"
+    # source_directory = "/Pramana/IR2Vec/test"
+    train_directory = "/home/cs24mtech02001/Aayush-IR2Vec/datasets-17.x/codejam/train"
+    test_directory = "/home/cs24mtech02001/Aayush-IR2Vec/datasets-17.x/codejam/test"
+    val_directory = "/home/cs24mtech02001/Aayush-IR2Vec/datasets-17.x/codejam/val"
+
+    split_dataset(source_directory, train_directory, test_directory, val_directory, train_ratio=0.6, test_ratio=0.2)
\ No newline at end of file
diff --git a/scripts/preprocessing/train-test-val-to-csv.py b/scripts/preprocessing/train-test-val-to-csv.py
new file mode 100644
index 0000000..92d88c4
--- /dev/null
+++ b/scripts/preprocessing/train-test-val-to-csv.py
@@ -0,0 +1,98 @@
+# Updated Script for Handling Separate Folders for Train, Test, and Val
+import argparse
+import pandas as pd
+import numpy as np
+import os
+from sklearn.model_selection import train_test_split
+from collections import Counter
+
+def load_data(filepath):
+    lines = [line.strip('\n\t') for line in open(filepath)]
+    rep, targetLabel = [], []
+    flag = 0
+    for line in lines:
+        if flag == 0:
+            flag = 1
+            continue
+        else:
+            r = line.split('\t')
+            targetLabel.append(int(r[0]))
+            res_double = [float(val) for val in r[1:]]
+            rep.append(res_double)
+    X = pd.DataFrame(rep)
+    return X, targetLabel
+
+def save_to_file(X, Y, filepath):
+    X = pd.DataFrame(X)
+    Y = pd.DataFrame(Y)
+    temp = pd.concat([Y, X], axis=1)
+    temp.columns = range(temp.shape[1])
+    temp.to_csv(filepath, header=None, index=False, sep='\t')
+
+# def process_and_save(folder_path, output_path, split_name):
+#     if not os.path.exists(folder_path):
+#         print(f"Warning: {split_name} folder does not exist: {folder_path}")
+#         return
+
+#     # Load data from the folder
+#     input_file = os.path.join(folder_path, "data.txt")
+#     if not os.path.isfile(input_file):
+#         print(f"Warning: Data file not found in {folder_path}: {input_file}")
+#         return
+
+#     X, Y = load_data(input_file)
+#     output_file = os.path.join(output_path, f"{split_name}.csv")
+#     save_to_file(X, Y, output_file)
+#     print(f"{split_name.capitalize()} data saved to {output_file}.")
+
+def process_and_save(data_path, output_path, filename):
+    if not os.path.exists(data_path):
+        print(f"Warning: Data file not found at {data_path}")
+        return
+
+    X, Y = load_data(data_path)
+    print(f"Loaded data from {data_path}: X.shape={len(X)}, Y.shape={len(Y)}")
+    save_to_file(X, Y, os.path.join(output_path, filename))
+    print(f"Data saved to {os.path.join(output_path, filename)}")
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--train', required=True, help='Path to the training data file')
+    parser.add_argument('--test', required=True, help='Path to the testing data file')
+    parser.add_argument('--val', required=True, help='Path to the validation data file')
+    parser.add_argument('--output', required=True, help='Output directory for processed CSV files')
+
+    args = parser.parse_args()
+
+    if not os.path.exists(args.output):
+        os.makedirs(args.output)
+
+    process_and_save(args.train, args.output, 'training.csv')
+    process_and_save(args.test, args.output, 'testing.csv')
+    process_and_save(args.val, args.output, 'val.csv')
+
+
+# if __name__ == '__main__':
+#     parser = argparse.ArgumentParser()
+#     parser.add_argument('--train', dest='train', metavar='TRAIN', help='Path to the training folder')
+#     parser.add_argument('--test', dest='test', metavar='TEST', help='Path to the testing folder')
+#     parser.add_argument('--val', dest='val', metavar='VAL', help='Path to the validation folder')
+#     parser.add_argument('--output', dest='output', metavar='OUTPUT', required=True, help='Path to save the output CSV files')
+
+#     args = parser.parse_args()
+
+#     # Ensure the output directory exists
+#     if not os.path.exists(args.output):
+#         os.makedirs(args.output)
+
+#     # Process each folder separately
+#     if args.train:
+#         process_and_save(args.train, args.output, "training")
+
+#     if args.test:
+#         process_and_save(args.test, args.output, "testing")
+
+#     if args.val:
+#         process_and_save(args.val, args.output, "validation")
+
+# python preprocess.py --train path/to/train --test path/to/test --val path/to/val --output path/to/output
\ No newline at end of file
diff --git a/scripts/profiling/generate-input-folder-with-input-files.py b/scripts/profiling/generate-input-folder-with-input-files.py
new file mode 100644
index 0000000..af5b51d
--- /dev/null
+++ b/scripts/profiling/generate-input-folder-with-input-files.py
@@ -0,0 +1,82 @@
+import subprocess
+import os
+import re
+
+def parse_testcases(testcases_path):
+    """Parse test cases from testcases.txt."""
+    print(f"\nEntering into parse_testcases function\n")
+    with open(testcases_path, 'r') as file:
+        content = file.read()
+
+    testcases = []
+    tests = re.split(r"Test: #[0-9]+,", content)
+    for test in tests[1:]:
+        input_match = re.search(r"Input\n([\s\S]*?)Output", test)
+        output_match = re.search(r"Output\n([\s\S]*?)Answer", test)
+
+        if input_match and output_match:
+            test_input = input_match.group(1).strip()
+            expected_output = output_match.group(1).strip()
+            testcases.append((test_input, expected_output))
+    return testcases
+
+def create_input_files(folder_path, testcases):
+    print(f"\nEntering into create_input_files function\n")
+    """Create input files for each test case."""
+    testcases_folder = os.path.join(folder_path, "testcases")
+    os.makedirs(testcases_folder, exist_ok=True)
+
+    input_files = []
+    for i, (test_input, _) in enumerate(testcases, start=1):
+        input_file = os.path.join(testcases_folder, f"input{i}.txt")
+        with open(input_file, 'w') as f:
+            f.write(test_input)
+        input_files.append(input_file)
+    
+    return input_files
+
+def process_subfolder(folder_path):
+    """Processes a single folder to generate input files."""
+    # Locate all C/C++ files and testcases.txt
+    testcases_file = None
+    file_count=0
+    for filename in os.listdir(folder_path):
+        if filename.endswith(".c") or filename.endswith(".cpp"):
+            # print(f"c/cpp filename: {filename}\n")
+            file_count+=1
+        elif filename == "testcases.txt":
+            testcases_file = os.path.join(folder_path, filename)
+
+    if not testcases_file:
+        print(f"testcases.txt not found in {folder_path}.")
+        return
+    print("-"*20)
+    print(f"\nTotal number of files in the current folder --> {folder_path} is {file_count}\n")
+    print("-" * 20)
+
+    # Parse test cases
+    testcases = parse_testcases(testcases_file)
+
+    # Create input files for the test cases
+    input_files = create_input_files(folder_path, testcases)
+
+def main(top_level_directory):
+    """Main function to process all subdirectories."""
+    for root, dirs, files in os.walk(top_level_directory):
+        for subdir in dirs:
+            # print(subdir)
+            if subdir == 'testcases':
+                return
+            folder_path = os.path.join(root, subdir)
+            print(f"Subdirectory Path: {folder_path}\n")
+            print(f"Entering into process_folder function\n")
+            process_subfolder(folder_path)
+            print("\n")
+            print ("-" * 20)
+            print(f"Processed subdirectory --> {subdir}")
+            print("-" * 20)
+
+if __name__ == "__main__":
+    # Specify the top-level output directory containing subfolders
+    top_level_dir = "/Pramana/IR2Vec/codeforces-dataset-with-tc"
+    main(top_level_dir)
\ No newline at end of file
diff --git a/scripts/profiling/generate-profiled-ll-files-with-testcases-using-cores.sh b/scripts/profiling/generate-profiled-ll-files-with-testcases-using-cores.sh
new file mode 100644
index 0000000..7c5f373
--- /dev/null
+++ b/scripts/profiling/generate-profiled-ll-files-with-testcases-using-cores.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+
+# Define the directory containing subfolders with source files and test cases
+BASE_DIR="$1"
+NUM_CORES="$2"  # Number of cores to use for parallelism
+
+# Compiler and flags for coverage instrumentation
+# /home/cs24mtech02001/Aayush-IR2Vec/llvm-project/build-llvm17/bin/clang-17
+# COMPILER="clang++"
+COMPILER=/home/cs24mtech02001/LLVM/llvm-project/build-llvm17/bin/clang++
+# STD="-std=c++17"
+PROFILING_FLAGS="-fprofile-generate"
+OPT=-O1
+OPTIMIZED_FLAGS="-fprofile-instr-use"
+# PROFILER="/home/cs24mtech02001/LLVM/llvm-project/build-llvm17/bin/llvm-profdata"
+
+# Function to handle individual source files
+process_file()
+{
+    SUBDIR="$1"
+    SRC="$2"
+
+    BASE_NAME=$(basename "$SRC" | sed 's/\.[^.]*$//')
+    OUT_DIR="$SUBDIR/executables"
+    PROF_DIR="$SUBDIR/profiles"
+    LL_DIR="$SUBDIR/profiled-ll-files"
+
+    EXECUTABLE="$OUT_DIR/${BASE_NAME}.out"
+
+    echo
+    echo "Compiling $SRC with profiling flags..."
+    # $COMPILER $STD $PROFILING_FLAGS "$SRC" -o "$EXECUTABLE"
+    $COMPILER $OPT $PROFILING_FLAGS "$SRC" -o "$EXECUTABLE"
+
+    if [[ $? -ne 0 ]]; then
+        echo "Compilation failed for $SRC"
+        return
+    fi
+
+    # Create a profile subfolder for the current source file
+    SRC_PROF_DIR="$PROF_DIR/$BASE_NAME"
+    mkdir -p "$SRC_PROF_DIR"
+
+    # Step 2: Run the executable with each test case in the testcases folder
+    TESTCASE_DIR="$SUBDIR/testcases"
+    if [[ -d "$TESTCASE_DIR" ]]; then
+        for INPUT_FILE in "$TESTCASE_DIR"/*.txt; do
+            if [[ -f "$INPUT_FILE" ]]; then
+                echo "Running $EXECUTABLE with input $INPUT_FILE..."
+
+                PROFILE_FILE="$SRC_PROF_DIR/$(basename "$INPUT_FILE" .txt).profraw"
+
+                # Use timeout to enforce a time limit of 5 seconds
+                timeout 5s bash -c "LLVM_PROFILE_FILE=\"$PROFILE_FILE\" \"$EXECUTABLE\" < \"$INPUT_FILE\" > /dev/null 2>&1"
+
+                EXIT_CODE=$?
+                if [[ $EXIT_CODE -eq 124 ]]; then
+                    echo "Skipping input file $INPUT_FILE (execution time exceeded 5 seconds)."
+                    continue
+                elif [[ $EXIT_CODE -ne 0 ]]; then
+                    echo "Execution failed for $EXECUTABLE with input $INPUT_FILE"
+                    continue
+                fi
+            fi
+        done
+    else
+        echo "No testcases folder found in $SUBDIR"
+    fi
+
+    # Step 3: Merge raw profiles into a single profile data file
+    MERGED_PROFILE="$SRC_PROF_DIR/${BASE_NAME}.profdata"
+    echo
+    echo "Merging raw profiles for $BASE_NAME..."
+    llvm-profdata-17 merge -output="$MERGED_PROFILE" "$SRC_PROF_DIR"/*.profraw
+    # $PROFILER merge -output="$MERGED_PROFILE" "$SRC_PROF_DIR"/*.profraw
+
+    if [[ $? -ne 0 ]]; then
+        echo "Failed to merge profiles for $BASE_NAME."
+        return
+    fi
+
+    # Step 4: Generate profiled LLVM IR files
+    PROFILED_LL_FILE="$LL_DIR/${BASE_NAME}.ll"
+    echo "Generating profiled LLVM IR for $BASE_NAME..."
+    # $COMPILER $STD $OPT $OPTIMIZED_FLAGS="$MERGED_PROFILE" "$SRC" -S -emit-llvm -o "$PROFILED_LL_FILE"
+    $COMPILER $OPT $OPTIMIZED_FLAGS="$MERGED_PROFILE" "$SRC" -S -emit-llvm -o "$PROFILED_LL_FILE"
+
+    if [[ $? -ne 0 ]]; then
+        echo "Failed to generate LLVM IR for $BASE_NAME."
+        return
+    fi
+}
+
+export -f process_file  # Export the function for parallel processing
+# export COMPILER STD OPT PROFILING_FLAGS OPTIMIZED_FLAGS  # Export variables for use in subshells
+export COMPILER OPT PROFILING_FLAGS OPTIMIZED_FLAGS  # Export variables for use in subshells
+
+# Step 1: Iterate through subdirectories and process source files in parallel
+for SUBDIR in "$BASE_DIR"/*/; do
+    echo "*****************************"
+    echo "Processing directory: $SUBDIR"
+    echo "*****************************"
+
+    # Create directories for outputs and profiles
+    OUT_DIR="$SUBDIR/executables"
+    PROF_DIR="$SUBDIR/profiles"
+    LL_DIR="$SUBDIR/profiled-ll-files"
+
+    # Delete the existing subdirectories if already present
+    rm -rf "$OUT_DIR" "$PROF_DIR" "$LL_DIR"
+    mkdir -p "$OUT_DIR" "$PROF_DIR" "$LL_DIR"
+
+    # Find source files and process them in parallel
+    find "$SUBDIR" -maxdepth 1 -type f \( -name "*.c" -o -name "*.cpp" \) | \
+        parallel -j "$NUM_CORES" process_file "$SUBDIR" {}
+done
+
+echo "All operations completed successfully."
\ No newline at end of file
diff --git a/scripts/profiling/profiling-without-parallel.sh b/scripts/profiling/profiling-without-parallel.sh
new file mode 100644
index 0000000..1124d88
--- /dev/null
+++ b/scripts/profiling/profiling-without-parallel.sh
@@ -0,0 +1,123 @@
+#!/bin/bash
+
+# Define the directory containing subfolders with source files and test cases
+BASE_DIR="$1"
+NUM_CORES="$2"  # Number of cores to use for parallelism
+
+# Compiler and flags for coverage instrumentation
+COMPILER="/home/cs24mtech02001/Aayush-IR2Vec/llvm-project/build-llvm17/bin/clang++"
+# STD="-std=c++17"
+OPT="-O1"
+PROFILING_FLAGS="-fprofile-generate"
+OPTIMIZED_FLAGS="-fprofile-instr-use"
+
+# Function to handle individual source files
+process_file() {
+    SUBDIR="$1"
+    SRC="$2"
+
+    BASE_NAME=$(basename "$SRC" | sed 's/\.[^.]*$//')
+    OUT_DIR="$SUBDIR/executables"
+    PROF_DIR="$SUBDIR/profiles"
+    LL_DIR="$SUBDIR/profiled-ll-files"
+
+    EXECUTABLE="$OUT_DIR/${BASE_NAME}.out"
+
+    echo
+    echo "Compiling $SRC with profiling flags..."
+    $COMPILER $OPT $PROFILING_FLAGS "$SRC" -o "$EXECUTABLE"
+    if [[ $? -ne 0 ]]; then
+        echo "Compilation failed for $SRC"
+        return
+    fi
+
+    # Create a profile subfolder for the current source file
+    SRC_PROF_DIR="$PROF_DIR/$BASE_NAME"
+    mkdir -p "$SRC_PROF_DIR"
+
+    # Step 2: Run the executable with each test case in the testcases folder
+    TESTCASE_DIR="$SUBDIR/testcases"
+    if [[ -d "$TESTCASE_DIR" ]]; then
+        for INPUT_FILE in "$TESTCASE_DIR"/*.txt; do
+            if [[ -f "$INPUT_FILE" ]]; then
+                echo "Running $EXECUTABLE with input $INPUT_FILE..."
+
+                PROFILE_FILE="$SRC_PROF_DIR/$(basename "$INPUT_FILE" .txt).profraw"
+
+                # Use timeout to enforce a time limit of 3 seconds
+                timeout 3s bash -c "LLVM_PROFILE_FILE=\"$PROFILE_FILE\" \"$EXECUTABLE\" < \"$INPUT_FILE\" > /dev/null 2>&1"
+
+                EXIT_CODE=$?
+                if [[ $EXIT_CODE -eq 124 ]]; then
+                    echo "Skipping input file $INPUT_FILE (execution time exceeded 3 seconds)."
+                    continue
+                elif [[ $EXIT_CODE -ne 0 ]]; then
+                    echo "Execution failed for $EXECUTABLE with input $INPUT_FILE"
+                    continue
+                fi
+            fi
+        done
+    else
+        echo "No testcases folder found in $SUBDIR"
+    fi
+
+    # Step 3: Merge raw profiles into a single profile data file
+    MERGED_PROFILE="$SRC_PROF_DIR/${BASE_NAME}.profdata"
+    echo
+    echo "Merging raw profiles for $BASE_NAME..."
+    llvm-profdata merge -output="$MERGED_PROFILE" "$SRC_PROF_DIR"/*.profraw
+    if [[ $? -ne 0 ]]; then
+        echo "Failed to merge profiles for $BASE_NAME."
+        return
+    fi
+
+    # Step 4: Generate profiled LLVM IR files
+    PROFILED_LL_FILE="$LL_DIR/${BASE_NAME}.ll"
+    echo "Generating profiled LLVM IR for $BASE_NAME..."
+    $COMPILER $OPT $OPTIMIZED_FLAGS="$MERGED_PROFILE" "$SRC" -S -emit-llvm -o "$PROFILED_LL_FILE"
+    if [[ $? -ne 0 ]]; then
+        echo "Failed to generate LLVM IR for $BASE_NAME."
+        return
+    fi
+}
+
+export -f process_file  # Export the function for subshells
+export COMPILER OPT PROFILING_FLAGS OPTIMIZED_FLAGS  # Export variables for use in subshells
+
+# Step 1: Iterate through subdirectories and process source files
+for SUBDIR in "$BASE_DIR"/*/; do
+    echo "*****************************"
+    echo "Processing directory: $SUBDIR"
+    echo "*****************************"
+
+    # Create directories for outputs and profiles
+    OUT_DIR="$SUBDIR/executables"
+    PROF_DIR="$SUBDIR/profiles"
+    LL_DIR="$SUBDIR/profiled-ll-files"
+
+    # Delete the existing subdirectories if already present
+    rm -rf "$OUT_DIR" "$PROF_DIR" "$LL_DIR"
+    mkdir -p "$OUT_DIR" "$PROF_DIR" "$LL_DIR"
+
+    # Initialize job counter
+    job_count=0
+
+    # Find source files and process them
+    for SRC in "$SUBDIR"*.c "$SUBDIR"*.cpp; do
+        if [[ -f "$SRC" ]]; then
+            process_file "$SUBDIR" "$SRC" &  # Run in background
+            ((job_count++))
+
+            # Wait if the number of background jobs reaches NUM_CORES
+            if ((job_count >= NUM_CORES)); then
+                wait
+                job_count=0
+            fi
+        fi
+    done
+
+    # Wait for remaining background jobs to complete
+    wait
+done
+
+echo "All operations completed successfully."
\ No newline at end of file
diff --git a/scripts/profiling/profiling-without-testcases.sh b/scripts/profiling/profiling-without-testcases.sh
new file mode 100644
index 0000000..be0b2cc
--- /dev/null
+++ b/scripts/profiling/profiling-without-testcases.sh
@@ -0,0 +1,101 @@
+#!/bin/bash
+
+# Define the directory containing subfolders with source files
+BASE_DIR="$1"
+CORE_COUNT="$2"
+
+# Compiler and flags for instrumentation
+COMPILER=/home/cs24mtech02001/LLVM/llvm-project/build-llvm17/bin/clang++
+# STD="-std=c++17"
+PROFILING_FLAGS="-fprofile-generate"
+OPT="-O1"
+OPTIMIZED_FLAGS="-fprofile-instr-use"
+
+# Function to handle individual source files
+process_source_file()
+{
+    local SUBDIR="$1"
+    local SRC="$2"
+    local OUT_DIR="$3"
+    local PROF_DIR="$4"
+    local LL_DIR="$5"
+
+    BASE_NAME=$(basename "$SRC" | sed 's/\.[^.]*$//')
+    EXECUTABLE="$OUT_DIR/${BASE_NAME}.out"
+
+    echo
+    echo "Compiling $SRC with profiling flags..."
+    # $COMPILER $STD $PROFILING_FLAGS "$SRC" -o "$EXECUTABLE"
+    $COMPILER $OPT $PROFILING_FLAGS "$SRC" -o "$EXECUTABLE"
+    if [[ $? -ne 0 ]]; then
+        echo "Compilation failed for $SRC"
+        return
+    fi
+
+    # Run the executable to generate the .profraw file
+    SRC_PROF_DIR="$PROF_DIR/$BASE_NAME"
+    mkdir -p "$SRC_PROF_DIR"
+    PROFILE_FILE="$SRC_PROF_DIR/${BASE_NAME}.profraw"
+
+    echo
+    echo "Running $EXECUTABLE to generate profile data..."
+    timeout 5s bash -c "LLVM_PROFILE_FILE=\"$PROFILE_FILE\" \"$EXECUTABLE\" > /dev/null 2>&1"
+
+    EXIT_CODE=$?
+    if [[ $EXIT_CODE -eq 124 ]]; then
+        echo "Skipping the file (execution time exceeded 5 seconds)."
+        return
+    elif [[ $EXIT_CODE -ne 0 ]]; then
+        echo "Execution failed for $EXECUTABLE with input $INPUT_FILE"
+        return
+    fi
+
+    # Merge the raw profile into a single profile data file
+    MERGED_PROFILE="$SRC_PROF_DIR/${BASE_NAME}.profdata"
+    echo
+    echo "Merging raw profile for $BASE_NAME..."
+    llvm-profdata-17 merge -output="$MERGED_PROFILE" "$SRC_PROF_DIR"/*.profraw
+    if [[ $? -ne 0 ]]; then
+        echo "Failed to merge profiles for $BASE_NAME."
+        return
+    fi
+
+    # Generate profiled LLVM IR files
+    PROFILED_LL_FILE="$LL_DIR/${BASE_NAME}.ll"
+    echo "Generating profiled LLVM IR for $BASE_NAME..."
+    # $COMPILER $STD $OPT $OPTIMIZED_FLAGS="$MERGED_PROFILE" "$SRC" -S -emit-llvm -o "$PROFILED_LL_FILE"
+    $COMPILER $OPT $OPTIMIZED_FLAGS="$MERGED_PROFILE" "$SRC" -S -emit-llvm -o "$PROFILED_LL_FILE"
+    if [[ $? -ne 0 ]]; then
+        echo "Failed to generate LLVM IR for $BASE_NAME."
+        return
+    fi
+}
+
+# Export the function for parallel execution
+export -f process_source_file
+# export COMPILER STD OPT PROFILING_FLAGS OPTIMIZED_FLAGS
+export COMPILER OPT PROFILING_FLAGS OPTIMIZED_FLAGS
+
+# Step 1: Iterate through subdirectories and process source files in parallel
+for SUBDIR in "$BASE_DIR"/*/; do
+    echo "*****************************"
+    echo "Processing directory: $SUBDIR"
+    echo "*****************************"
+
+    # Create directories for outputs and profiles
+    OUT_DIR="$SUBDIR/executables"
+    PROF_DIR="$SUBDIR/profiles"
+    LL_DIR="$SUBDIR/profiled-ll-files"
+
+    # Delete the existing subdirectories if already present
+    rm -rf "$OUT_DIR" "$PROF_DIR" "$LL_DIR"
+
+    mkdir -p "$OUT_DIR" "$PROF_DIR" "$LL_DIR"
+
+    # Find source files and process them in parallel using parallel
+    find "$SUBDIR" -maxdepth 1 \( -name "*.c" -o -name "*.cpp" \) | \
+        parallel -j "$CORE_COUNT" process_source_file "$SUBDIR" {} "$OUT_DIR" "$PROF_DIR" "$LL_DIR"
+
+done
+
+echo "All operations completed successfully."
\ No newline at end of file
diff --git a/scripts/read-npz.files.py b/scripts/read-npz.files.py
new file mode 100644
index 0000000..db5fa71
--- /dev/null
+++ b/scripts/read-npz.files.py
@@ -0,0 +1,7 @@
+from numpy import load
+
+data = load('/home/aayusphere/Embeddings/poj/milepost/trainO0/1/1-14.npz')
+lst = data.files
+for item in lst:
+    print(item)
+    print(data[item])
\ No newline at end of file

From a6e408a08a468fe7986e0a188f2d8e5318bf7f6c Mon Sep 17 00:00:00 2001
From: iamaayushrivastava <iamaayushrivastava@gmail.com>
Date: Fri, 10 Jan 2025 17:45:34 +0530
Subject: [PATCH 2/3] FlowAware.cpp

---
 code/FlowAware.cpp | 2320 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 2320 insertions(+)
 create mode 100755 code/FlowAware.cpp

diff --git a/code/FlowAware.cpp b/code/FlowAware.cpp
new file mode 100755
index 0000000..fdd9f14
--- /dev/null
+++ b/code/FlowAware.cpp
@@ -0,0 +1,2320 @@
+#include "FlowAware.h"
+#include "VectorSolver.h"
+
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/CallGraph.h"
+
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Passes/PassPlugin.h"
+#include "llvm/InitializePasses.h"
+// #include "llvm/Support/BranchProbability.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/ADT/MapVector.h"
+
+
+#include <algorithm> // for transform
+
+#include <functional>
+#include <regex>
+#include <iostream>
+
+using namespace llvm;
+using namespace std;
+using namespace IR2Vec;
+
+BranchProbabilityInfo *IR2Vec_FA::getBPI(Function *F, FunctionAnalysisManager &FAM) {
+  auto It = bpiMap.find(F);
+  if (It != bpiMap.end())
+  {
+    return It->second;
+  }
+  // BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfoWrapperPass>(*F).getBPI();
+  // BranchProbabilityInfo &BPI = &FAM.getResult<BranchProbabilityInfoWrapperPass>(F).getBPI();
+  // bpiMap[F] = &FAM.getResult<BranchProbabilityInfoWrapperPass>(*F).getBPI();
+  // Get new BPI analysis result
+  BranchProbabilityInfo *BPI = &FAM.getResult<BranchProbabilityAnalysis>(*F);
+  bpiMap[F] = BPI;
+  return bpiMap[F];
+}
+
+// Scales a vector by multiplying each element by a factor
+void IR2Vec_FA::scaleVector(SmallVector<double, DIM> &vec, float factor) {
+  for (unsigned i = 0; i < vec.size(); i++) {
+    vec[i] = vec[i] * factor;
+  }
+}
+
+void IR2Vec_FA::killAndUpdate(Instruction *I, SmallVector<double, DIM> val) {
+  // LLVM_DEBUG(dbgs() << "kill and update: \n");
+  // LLVM_DEBUG(I->dump());
+  if (I == nullptr)
+    return;
+  auto It1 = instVecMap.find(I);
+  assert(It1 != instVecMap.end() && "Instruction should be defined in map");
+  It1->second = val;
+
+  auto It2 = livelinessMap.find(I);
+  assert(It2 != livelinessMap.end() &&
+         "Instruction should be in livelinessMap");
+  It2->second = false;
+
+  transitiveKillAndUpdate(I, val, false);
+}
+
+// Ensures that the vector updates propagate through all related memory operations.
+void IR2Vec_FA::transitiveKillAndUpdate(Instruction *I,
+                                        SmallVector<double, DIM> val,
+                                        bool avg) {
+  assert(I != nullptr);
+  // LLVM_DEBUG(dbgs() << "I: ");
+  // LLVM_DEBUG(I->dump());
+  unsigned operandNum;
+  bool isMemAccess = isMemOp(I->getOpcodeName(), operandNum, memAccessOps);
+  if (!isMemAccess)
+    return;
+
+  auto parentI = dyn_cast<Instruction>(I->getOperand(operandNum));
+  if (parentI == nullptr)
+    return;
+  // assert(parentI != nullptr);
+  // LLVM_DEBUG(dbgs() << "\n parentI: ");
+  // LLVM_DEBUG(parentI->dump());
+
+  if (strcmp(parentI->getOpcodeName(), "getelementptr") == 0)
+    avg = true;
+
+  // LLVM_DEBUG(dbgs() << "\nVal : "; for (auto i : val) { dbgs() << i << " "; });
+  auto It1 = instVecMap.find(parentI);
+  assert(It1 != instVecMap.end() && "Instruction should be defined in map");
+
+  // LLVM_DEBUG(dbgs() << "\nIt.second =  : ";
+  //            for (auto i
+  //                 : It1->second) { dbgs() << i << " "; });
+
+  if (avg) {
+    std::transform(It1->second.begin(), It1->second.end(), val.begin(),
+                   It1->second.begin(), std::plus<double>());
+    scaleVector(It1->second, WT);
+  } else {
+    It1->second = val;
+  }
+  // LLVM_DEBUG(dbgs() << "\nafter transforming : ";
+  //            for (auto i
+  //                 : It1->second) { dbgs() << i << " "; });
+  auto It2 = livelinessMap.find(parentI);
+  assert(It2 != livelinessMap.end() &&
+         "Instruction should be in livelinessMap");
+  It2->second = false;
+
+  transitiveKillAndUpdate(parentI, val, avg);
+}
+
+// void IR2Vec_FA::collectData() {
+//   static bool wasExecuted = false;
+//   if (!wasExecuted) {
+//     errs() << "Reading from " + fname + "\n";
+//     std::ifstream i(fname);
+//     std::string delimiter = ":";
+//     for (std::string line; getline(i, line);) {
+//       std::string token = line.substr(0, line.find(delimiter));
+//       SmallVector<double, DIM> rep;
+//       std::string vec = line.substr(line.find(delimiter) + 1, line.length());
+//       std::string val = vec.substr(vec.find("[") + 1, vec.find(", ") - 1);
+//       rep.push_back(stod(val));
+//       int pos = vec.find(", ");
+//       vec = vec.substr(pos + 1);
+//       for (int i = 1; i < DIM - 1; i++) {
+//         val = vec.substr(1, vec.find(", ") - 1);
+//         rep.push_back(stod(val));
+//         pos = vec.find(", ");
+//         vec = vec.substr(pos + 1);
+//       }
+//       val = vec.substr(1, vec.find("]") - 1);
+//       rep.push_back(stod(val));
+//       opcMap[token] = rep;
+//     }
+//     wasExecuted = true;
+//   }
+// }
+
+// Performs recursive analysis of how instructions are used
+// Recursively analyzes transitive uses of memory operations
+void IR2Vec_FA::getTransitiveUse(
+    const Instruction *root, const Instruction *def,
+    SmallVector<const Instruction *, 100> &visitedList,
+    SmallVector<const Instruction *, 10> toAppend) {
+  unsigned operandNum = 0;
+  visitedList.push_back(def);
+
+  for (auto U : def->users()) {
+    if (auto use = dyn_cast<Instruction>(U)) {
+      if (std::find(visitedList.begin(), visitedList.end(), use) ==
+          visitedList.end()) {
+        IR2VEC_DEBUG(outs() << "\nDef " << /* def << */ " ";
+                     def->print(outs(), true); outs() << "\n";);
+        IR2VEC_DEBUG(outs() << "Use " << /* use << */ " ";
+                     use->print(outs(), true); outs() << "\n";);
+        if (isMemOp(use->getOpcodeName(), operandNum, memWriteOps) &&
+            use->getOperand(operandNum) == def) {
+          writeDefsMap[root].push_back(use);
+        }
+        // If it's a memory access operation, continue the transitive analysis
+        else if (isMemOp(use->getOpcodeName(), operandNum, memAccessOps) &&
+                   use->getOperand(operandNum) == def) {
+          getTransitiveUse(root, use, visitedList, toAppend);
+        }
+      }
+    }
+  }
+  return;
+}
+// Connects root instructions to their dependent write operations
+void IR2Vec_FA::collectWriteDefsMap(Module &M) {
+  SmallVector<const Instruction *, 100> visitedList;
+  for (auto &F : M) {
+    if (!F.isDeclaration()) {
+      EliminateUnreachableBlocks(F);
+      for (auto &BB : F) {
+        for (auto &I : BB) {
+          unsigned operandNum = 0;
+          if ((isMemOp(I.getOpcodeName(), operandNum, memAccessOps) ||
+               isMemOp(I.getOpcodeName(), operandNum, memWriteOps) ||
+               strcmp(I.getOpcodeName(), "alloca") == 0) &&
+              std::find(visitedList.begin(), visitedList.end(), &I) ==
+                  visitedList.end()) {
+            if (I.getNumOperands() > 0) {
+              // IR2VEC_DEBUG(I.print(outs()); outs() << "\n");
+              // IR2VEC_DEBUG(outs() << "operandnum = " << operandNum << "\n");
+              if (auto parent =
+                      dyn_cast<Instruction>(I.getOperand(operandNum))) {
+                if (std::find(visitedList.begin(), visitedList.end(), parent) ==
+                    visitedList.end()) {
+                  visitedList.push_back(parent);
+                  getTransitiveUse(parent, parent, visitedList);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+Vector IR2Vec_FA::getValue(std::string key) {
+  // printf("entering get value");
+  Vector vec;
+  if (opcMap.find(key) == opcMap.end()) {
+    IR2VEC_DEBUG(errs() << "cannot find key in map : " << key << "\n");
+    dataMissCounter++;
+  } else
+    vec = opcMap[key];
+  // for(auto x: opcMap){
+  //   cout<< "x.first : "<<x.first<<"\n";
+  // }
+  return vec;
+}
+
+// Function to update funcVecMap of function with vectors of it's callee list
+void IR2Vec_FA::updateFuncVecMapWithCallee(const llvm::Function *function) {
+  if (funcCallMap.find(function) != funcCallMap.end()) {
+
+    auto calleelist = funcCallMap[function];
+    Vector calleeVector(DIM, 0);
+    for (auto funcs : calleelist) {
+
+      auto tmp = funcVecMap[funcs];
+      std::transform(tmp.begin(), tmp.end(), calleeVector.begin(),
+                     calleeVector.begin(), std::plus<double>());
+    }
+
+    scaleVector(calleeVector, WA);
+    auto tmpParent = funcVecMap[function];
+    std::transform(calleeVector.begin(), calleeVector.end(), tmpParent.begin(),
+                   tmpParent.begin(), std::plus<double>());
+    funcVecMap[function] = tmpParent;
+  }
+}
+
+void IR2Vec_FA::generateFlowAwareEncodings(std::ostream *o,
+                                           std::ostream *missCount,
+                                           std::ostream *cyclicCount) {
+
+  // collectWriteDefsMap(M);
+  cout<<"it reaches generateFlow encodings right?"<<"\n";
+  int noOfFunc = 0;
+
+  llvm::FunctionAnalysisManager FAM;
+  // FAM.add(new BranchProbabilityAnalysis());
+  
+  // FAM.addPass(BranchProbabilityAnalysis());
+
+  llvm::PassBuilder PB;
+  PB.registerFunctionAnalyses(FAM);
+  
+  // FAM.registerPass([] { return llvm::BranchProbabilityAnalysis(); });
+
+  // better to run bpi for all the functions at the start itself i guess, then no issues here and there
+  for (auto &f : M) {
+    if (!f.isDeclaration()) {
+      getBPI(&f,FAM);
+    }
+  }
+  
+  for (auto &f : M) {
+    if (!f.isDeclaration()) {
+
+      // BranchProbabilityInfo *BPI = &FAM.getResult<BranchProbabilityAnalysis>(f);
+
+      SmallVector<Function *, 15> funcStack;
+      // auto x = getBPI(&f, BPI);
+      // if(x != nullptr){
+      //   cout<<"atleast stuff is not empty" << "\n";
+      // }
+      // for(auto entry : bpiMap){
+      //   Function *func = entry.first;
+      //   BranchProbabilityInfo *bpi = entry.second;
+
+      //   outs() << func->getName() << "\n";
+      // }
+      cout<<"tmp gets filled here and func2Vec gets called here, right?"<<"\n";
+      auto tmp = func2Vec (f, funcStack, getBPI(&f, FAM));
+      // auto tmp = func2Vec(f, funcStack, BPI);
+      funcVecMap[&f] = tmp;
+    }
+  }
+
+  // printing the bpiMap over here, should contain the entire list of functions and their bpi
+  cout<<"printing the contents of bpiMap over here :"<<"\n";
+  for (auto &entry : bpiMap) {
+        llvm::Function *func = entry.first;
+        llvm::BranchProbabilityInfo *bpi = entry.second;
+
+        // Print the addresses of the Function and BranchProbabilityInfo pointers
+        std::cout << "Function pointer: " << func << "\n";
+        std::cout << "BranchProbabilityInfo pointer: " << bpi << "\n";
+        std::cout << "-------------------------\n";
+    }
+
+  // for (auto funcit : funcVecMap) {
+  //   updateFuncVecMapWithCallee(funcit.first);
+  // }
+
+  for (auto &f : M) {
+    if (!f.isDeclaration()) {
+      Vector tmp;
+      SmallVector<Function *, 15> funcStack;
+      tmp = funcVecMap[&f];
+
+      if (level == 'f') {
+        res += updatedRes(tmp, &f, &M);
+        res += "\n";
+        noOfFunc++;
+      }
+
+      // else if (level == 'p') {
+      std::transform(pgmVector.begin(), pgmVector.end(), tmp.begin(),
+                     pgmVector.begin(), std::plus<double>());
+      // }
+    }
+  }
+
+  if (level == 'p') {
+    if (cls != -1)
+      res += std::to_string(cls) + "\t";
+
+    for (auto i : pgmVector) {
+      if ((i <= 0.0001 && i > 0) || (i < 0 && i >= -0.0001)) {
+        i = 0;
+      }
+      res += std::to_string(i) + "\t";
+    }
+    res += "\n";
+  }
+
+  if (o)
+    *o << res;
+
+  if (missCount) {
+    std::string missEntry =
+        (M.getSourceFileName() + "\t" + std::to_string(dataMissCounter) + "\n");
+    *missCount << missEntry;
+  }
+
+  if (cyclicCount)
+    *cyclicCount << (M.getSourceFileName() + "\t" +
+                     std::to_string(cyclicCounter) + "\n");
+}
+
+// This function will update funcVecMap by doing DFS starting from parent
+// function
+void IR2Vec_FA::updateFuncVecMap(
+    llvm::Function *function,
+    llvm::SmallSet<const llvm::Function *, 16> &visitedFunctions) {
+  visitedFunctions.insert(function);
+  SmallVector<Function *, 15> funcStack;
+  funcStack.clear();
+  auto tmpParent = func2Vec(*function, funcStack, bpiMap[function]);
+  // funcVecMap is updated with vectors returned by func2Vec
+  funcVecMap[function] = tmpParent;
+  auto calledFunctions = funcCallMap[function];
+  for (auto &calledFunction : calledFunctions) {
+    if (calledFunction && !calledFunction->isDeclaration() &&
+        visitedFunctions.count(calledFunction) == 0) {
+      // doing casting since calledFunctions is of type of const
+      // llvm::Function* and we need llvm::Function* as argument
+      auto *callee = const_cast<Function *>(calledFunction);
+      // This function is called recursively to update funcVecMap
+      updateFuncVecMap(callee, visitedFunctions);
+    }
+  }
+}
+
+void IR2Vec_FA::generateFlowAwareEncodingsForFunction(
+    std::ostream *o, std::string name, std::ostream *missCount,
+    std::ostream *cyclicCount) {
+
+  int noOfFunc = 0;
+  for (auto &f : M) {
+
+    auto Result = getActualName(&f);
+    if (!f.isDeclaration() && Result == name) {
+      // If funcName is matched with one of the functions in module, we
+      // will update funcVecMap of it and it's child functions recursively
+      llvm::SmallSet<const Function *, 16> visitedFunctions;
+      updateFuncVecMap(&f, visitedFunctions);
+    }
+  }
+  // iterating over all functions in module instead of funcVecMap to preserve
+  // order
+  for (auto &f : M) {
+    if (funcVecMap.find(&f) != funcVecMap.end()) {
+      auto *function = const_cast<const Function *>(&f);
+      updateFuncVecMapWithCallee(function);
+    }
+  }
+
+  for (auto &f : M) {
+    auto Result = getActualName(&f);
+    if (!f.isDeclaration() && Result == name) {
+      Vector tmp;
+      SmallVector<Function *, 15> funcStack;
+      tmp = funcVecMap[&f];
+
+      if (level == 'f') {
+        res += updatedRes(tmp, &f, &M);
+        res += "\n";
+        noOfFunc++;
+      }
+    }
+  }
+
+  if (o)
+    *o << res;
+
+  if (missCount) {
+    std::string missEntry =
+        (M.getSourceFileName() + "\t" + std::to_string(dataMissCounter) + "\n");
+    *missCount << missEntry;
+  }
+
+  if (cyclicCount)
+    *cyclicCount << (M.getSourceFileName() + "\t" +
+                     std::to_string(cyclicCounter) + "\n");
+}
+
+void IR2Vec_FA::topoDFS(int vertex, std::vector<bool> &Visited,
+                        std::vector<int> &visitStack) {
+
+  Visited[vertex] = true;
+
+  auto list = SCCAdjList[vertex];
+
+  for (auto nodes : list) {
+    if (Visited[nodes] == false)
+      topoDFS(nodes, Visited, visitStack);
+  }
+
+  visitStack.push_back(vertex);
+}
+
+std::vector<int> IR2Vec_FA::topoOrder(int size) {
+  std::vector<bool> Visited(size, false);
+  std::vector<int> visitStack;
+
+  for (auto &nodes : SCCAdjList) {
+    if (Visited[nodes.first] == false) {
+      topoDFS(nodes.first, Visited, visitStack);
+    }
+  }
+
+  return visitStack;
+}
+
+void IR2Vec_FA::TransitiveReads(SmallVector<Instruction *, 16> &Killlist,
+                                Instruction *Inst, BasicBlock *ParentBB) {
+  assert(Inst != nullptr);
+  unsigned operandNum;
+  bool isMemAccess = isMemOp(Inst->getOpcodeName(), operandNum, memAccessOps);
+
+  if (!isMemAccess)
+    return;
+  auto parentI = dyn_cast<Instruction>(Inst->getOperand(operandNum));
+  if (parentI == nullptr)
+    return;
+  if (ParentBB == parentI->getParent())
+    Killlist.push_back(parentI);
+  TransitiveReads(Killlist, parentI, ParentBB);
+}
+
+SmallVector<Instruction *, 16>
+IR2Vec_FA::createKilllist(Instruction *Arg, Instruction *writeInst) {
+
+  SmallVector<Instruction *, 16> KillList;
+  SmallVector<Instruction *, 16> tempList;
+  BasicBlock *ParentBB = writeInst->getParent();
+
+  unsigned opnum;
+
+  for (User *U : Arg->users()) {
+    if (Instruction *UseInst = dyn_cast<Instruction>(U)) {
+      if (isMemOp(UseInst->getOpcodeName(), opnum, memWriteOps)) {
+        Instruction *OpInst = dyn_cast<Instruction>(UseInst->getOperand(opnum));
+        if (OpInst && OpInst == Arg)
+          tempList.push_back(UseInst);
+      }
+    }
+  }
+
+  for (auto I = tempList.rbegin(); I != tempList.rend(); I++) {
+    if (*I == writeInst)
+      break;
+    if (ParentBB == (*I)->getParent())
+      KillList.push_back(*I);
+  }
+
+  return KillList;
+}
+
+// Vector IR2Vec_FA::func2Vec(Function &F, SmallVector<Function *, 15> &funcStack, BranchProbabilityInfo *bpi){
+Vector IR2Vec_FA::func2Vec(Function &F,
+                           SmallVector<Function *, 15> &funcStack,
+                           BranchProbabilityInfo *bpi) {
+  auto It = funcVecMap.find(&F);
+  if (It != funcVecMap.end()) {
+    return It->second;
+  }
+
+  funcStack.push_back(&F);
+
+  // instReachingDefsMap.clear();
+  // allSCCs.clear();
+  // reverseReachingDefsMap.clear();
+  // SCCAdjList.clear();
+
+  Vector funcVector(DIM, 0); // Initialize zero vector
+
+  MapVector<const BasicBlock *, MapVector<BasicBlock *, double>> succMap;
+  MapVector<const BasicBlock *, double> cumulativeScore;
+
+  if(bpi) {
+    // MapVector<const BasicBlock *, MapVector<BasicBlock *, double>> succMap;
+    // MapVector<const BasicBlock *, double> cumulativeScore;
+
+    for (auto &b : F) {
+      MapVector<BasicBlock *, double> succs;
+      for (auto it = succ_begin(&b), et = succ_end(&b); it != et; ++it) {
+        BasicBlock *t = *it;
+        auto bp = bpi->getEdgeProbability(&b, t);
+        double prob = double(bp.getNumerator()) / double(bp.getDenominator());
+        std::cout << "Probability : " << prob << "\n";
+        succs[*it] = prob;
+      }
+      succMap[&b] = succs;
+      cumulativeScore[&b] = 0;
+    }
+  }
+
+  ReversePostOrderTraversal<Function *> RPOT(&F);
+
+  bool isHeader = true;
+  if(bpi){
+    for (auto *b : RPOT) {
+      if (isHeader)
+        cumulativeScore[b] = 1;
+      if (succMap.find(b) != succMap.end()) {
+        for (auto element : succMap[b]) {
+          auto currentPtr = cumulativeScore[b];
+          cumulativeScore[element.first] =
+              (currentPtr * element.second) + cumulativeScore[element.first];
+        }
+      }
+      isHeader = false;
+    }
+
+    // cout<< "cumulative score here : " << "\n";
+    // for(auto x : cumulativeScore){
+    //   cout<<"x.first : " << x.first<< "\n";
+    //   cout<<"x.second : "<< x.second<< "\n";
+    // }
+  }
+
+  // for (auto *b : RPOT) {
+  //   unsigned opnum;
+  //   SmallVector<Instruction *, 16> lists;
+  //   for (auto &I : *b) {
+  //     lists.clear();
+  //     if (isMemOp(I.getOpcodeName(), opnum, memWriteOps) &&
+  //         dyn_cast<Instruction>(I.getOperand(opnum))) {
+  //       Instruction *argI = cast<Instruction>(I.getOperand(opnum));
+  //       lists = createKilllist(argI, &I);
+  //       TransitiveReads(lists, argI, I.getParent());
+  //       if (argI->getParent() == I.getParent())
+  //         lists.push_back(argI);
+  //       killMap[&I] = lists;
+  //     }
+  //   }
+  // }
+
+  // for (auto *b : RPOT) {
+  //   for (auto &I : *b) {
+  //     for (int i = 0; i < I.getNumOperands(); i++) {
+  //       if (isa<Instruction>(I.getOperand(i))) {
+  //         auto RD = getReachingDefs(&I, i);
+  //         if (instReachingDefsMap.find(&I) == instReachingDefsMap.end()) {
+  //           instReachingDefsMap[&I] = RD;
+  //         } else {
+  //           auto RDList = instReachingDefsMap[&I];
+  //           RDList.insert(RDList.end(), RD.begin(), RD.end());
+  //           instReachingDefsMap[&I] = RDList;
+  //         }
+  //       }
+  //     }
+  //   }
+  // }
+
+  // IR2VEC_DEBUG(for (auto &Inst
+  //                   : instReachingDefsMap) {
+  //   auto RD = Inst.second;
+  //   outs() << "(" << Inst.first << ")";
+  //   Inst.first->print(outs());
+  //   outs() << "\n RD : ";
+  //   for (auto defs : RD) {
+  //     defs->print(outs());
+  //     outs() << "(" << defs << ") ";
+  //   }
+  //   outs() << "\n";
+  // });
+
+  // // one time Reversing instReachingDefsMap to be used to calculate SCCs
+  // for (auto &I : instReachingDefsMap) {
+  //   auto RD = I.second;
+  //   for (auto defs : RD) {
+  //     if (reverseReachingDefsMap.find(defs) == reverseReachingDefsMap.end()) {
+  //       llvm::SmallVector<const llvm::Instruction *, 10> revDefs;
+  //       revDefs.push_back(I.first);
+  //       reverseReachingDefsMap[defs] = revDefs;
+  //     } else {
+  //       auto defVector = reverseReachingDefsMap[defs];
+  //       defVector.push_back(I.first);
+  //       reverseReachingDefsMap[defs] = defVector;
+  //     }
+  //   }
+  // }
+
+  // getAllSCC();
+
+  // std::sort(allSCCs.begin(), allSCCs.end(),
+  //           [](llvm::SmallVector<const llvm::Instruction *, 10> &a,
+  //              llvm::SmallVector<const llvm::Instruction *, 10> &b) {
+  //             return a.size() < b.size();
+  //           });
+
+  // IR2VEC_DEBUG(int i = 0; for (auto &sets
+  //                              : allSCCs) {
+  //   outs() << "set: " << i << "\n";
+  //   for (auto insts : sets) {
+  //     insts->print(outs());
+  //     outs() << "  " << insts << " ";
+  //   }
+  //   outs() << "\n";
+  //   i++;
+  // });
+
+  // for (int i = 0; i < allSCCs.size(); i++) {
+  //   auto set = allSCCs[i];
+  //   for (int j = 0; j < set.size(); j++) {
+  //     auto RD = instReachingDefsMap[set[j]];
+  //     if (!RD.empty()) {
+  //       for (auto defs : RD) {
+  //         for (int k = 0; k < allSCCs.size(); k++) {
+  //           if (k == i)
+  //             continue;
+  //           auto sccSet = allSCCs[k];
+  //           if (std::find(sccSet.begin(), sccSet.end(), defs) != sccSet.end()) {
+  //             // outs() << i << " depends on " << k << "\n";
+  //             if (SCCAdjList.find(k) == SCCAdjList.end()) {
+  //               std::vector<int> temp;
+  //               temp.push_back(i);
+  //               SCCAdjList[k] = temp;
+  //             } else {
+  //               auto temp = SCCAdjList[k];
+  //               if (std::find(temp.begin(), temp.end(), i) == temp.end())
+  //                 temp.push_back(i);
+  //               SCCAdjList[k] = temp;
+  //             }
+  //           }
+  //         }
+  //       }
+  //     }
+  //   }
+  // }
+
+  // IR2VEC_DEBUG(outs() << "\nAdjList:\n"; for (auto &nodes
+  //                                             : SCCAdjList) {
+  //   outs() << "Adjlist for: " << nodes.first << "\n";
+  //   for (auto components : nodes.second) {
+  //     outs() << components << " ";
+  //   }
+  //   outs() << "\n";
+  // });
+
+  // std::vector<int> stack;
+
+  // stack = topoOrder(allSCCs.size());
+
+  // for (int i = 0; i < allSCCs.size(); i++) {
+  //   if (std::find(stack.begin(), stack.end(), i) == stack.end()) {
+  //     stack.insert(stack.begin(), i);
+  //   }
+  // }
+
+  // IR2VEC_DEBUG(outs() << "New topo order: \n"; for (auto sets
+  //                                                   : stack) {
+  //   outs() << sets << " ";
+  // } outs() << "\n";);
+
+  // SmallVector<double, DIM> prevVec;
+  // Instruction *argToKill = nullptr;
+
+  // while (stack.size() != 0) {
+  //   int idx = stack.back();
+  //   stack.pop_back();
+  //   auto component = allSCCs[idx];
+  //   SmallMapVector<const Instruction *, Vector, 16> partialInstValMap;
+  //   if (component.size() == 1) {
+  //     auto defs = component[0];
+  //     partialInstValMap[defs] = {};
+  //     getPartialVec(*defs, partialInstValMap);
+  //     solveSingleComponent(*defs, partialInstValMap, funcStack);
+  //     partialInstValMap.erase(defs);
+  //   } else {
+  //     cyclicCounter++; // for components with length more than 1 will
+  //                      // represent cycles
+  //     for (auto defs : component) {
+  //       partialInstValMap[defs] = {};
+  //       getPartialVec(*defs, partialInstValMap);
+  //     }
+
+  //     if (!partialInstValMap.empty())
+  //       solveInsts(partialInstValMap, funcStack);
+  //   }
+  // }
+
+  for (auto *b : RPOT) {
+    bb2Vec(*b, funcStack);
+    Vector bbVector(DIM, 0);
+    // IR2VEC_DEBUG(outs() << "-------------------------------------------\n");
+    for (auto &I : *b) {
+      auto It1 = livelinessMap.find(&I);
+      if (It1->second == true) {
+        // IR2VEC_DEBUG(I.print(outs()); outs() << "\n");
+        auto vec = instVecMap.find(&I)->second;
+        // IR2VEC_DEBUG(outs() << vec[0] << "\n\n");
+        std::transform(bbVector.begin(), bbVector.end(), vec.begin(),
+                       bbVector.begin(), std::plus<double>());
+      }
+    }
+
+    // IR2VEC_DEBUG(outs() << "-------------------------------------------\n");
+    for (auto i : bbVector) {
+      if ((i <= 0.0001 && i > 0) || (i < 0 && i >= -0.0001)) {
+        i = 0;
+      }
+    }
+
+    if(bpi){
+      auto prob = cumulativeScore[b];
+      Vector weightedBBVector;
+
+      // main thing changes here
+      for(auto p : bbVector){
+        // cout<< "value of p here : " << p<< "\n";
+        weightedBBVector.push_back(prob * p);
+      }
+
+      // cout << "weightedBBVector here : " << "\n";
+      // for(auto x : weightedBBVector){
+      //   cout<<x<<",";
+      // }
+      // cout<<endl;
+      // cout << "size of bbVector : " << bbVector.size() <<"\n";
+      // cout << "size of funcVector : " << funcVector.size() <<"\n";
+      // cout << "size of weightedBBVector : " << weightedBBVector.size() << "\n";
+      std::transform(funcVector.begin(), funcVector.end(),
+                    weightedBBVector.begin(), funcVector.begin(),
+                    std::plus<double>());
+    }
+    else{
+      std::transform(funcVector.begin(), funcVector.end(), bbVector.begin(),
+                   funcVector.begin(), std::plus<double>());
+    }
+  }
+
+  // cout<< "funcVector here : "<<endl;
+  // for(auto x : funcVector){
+  //   cout<<x<<",";
+  // }
+  // cout<<endl;
+
+  funcStack.pop_back();
+  funcVecMap[&F] = funcVector;
+  return funcVector;
+}
+
+// LoopInfo contains a mapping from basic block to the innermost loop. Find
+// the outermost loop in the loop nest that contains BB.
+static const Loop *getOutermostLoop(const LoopInfo *LI, const BasicBlock *BB) {
+  const Loop *L = LI->getLoopFor(BB);
+  if (L) {
+    while (const Loop *Parent = L->getParentLoop())
+      L = Parent;
+  }
+  return L;
+}
+
+double IR2Vec_FA::getRDProb(const Instruction *src, const Instruction *tgt,
+                            llvm::SmallVector<const Instruction *, 10> writeSet) {
+  // if(bprob == 0)
+  //       return 1;
+  // assert(instVecMap.find(src)!=instVecMap.end() && "Vector of the instruction
+  // should be available at this point");
+  // if (bprob == 0)
+  //   return 1;
+  // LLVM_DEBUG(errs() << "YOLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLOOO\n");
+  // LLVM_DEBUG(src->dump());
+  // LLVM_DEBUG(tgt->dump());
+  // LLVM_DEBUG(errs() << "yooooooodoooaaaaaaaaaaaaaaawwwwwwwwwwwggg\n");
+  auto srcParent = src->getParent();
+  auto tgtParent = tgt->getParent();
+
+  SmallPtrSet<const BasicBlock *, 20> writingBB;
+
+  for (auto I : writeSet)
+  {
+    writingBB.insert(I->getParent());
+    llvm::errs() << "Writing Basic Block: " << I->getParent()->getName() << "\n";
+  }
+
+  if (srcParent == tgtParent) {
+    // auto It1 = instVecMap.find(src);
+    // assert (It1 != instVecMap.end() && "Instruction should be defined in
+    // map"); return It1->second;
+    llvm::errs() << "Source and Target are in the same BasicBlock\n";
+    return 1;
+  }
+
+  SmallVector<const BasicBlock *, 20> stack;
+  // SmallDenseMap<const BasicBlock *, bool> visited;
+  // SmallDenseMap<const Instruction *, unsigned> last_seen;
+  SmallMapVector<const BasicBlock *, bool, 16> visited;
+  SmallMapVector<const Instruction *, unsigned, 16> last_seen;
+
+  auto curNode = srcParent;
+  auto curNodeTerminatorInst = curNode->getTerminator();
+  bool flag = false;
+  double prob = 1;
+  llvm::errs() << "Starting traversal from: " << srcParent->getName() << "\n";
+  do {
+    visited[curNode] = true;
+    if (flag) {
+      stack.pop_back();
+      if (stack.empty())
+        break;
+      curNode = stack.back();
+      curNodeTerminatorInst = curNode->getTerminator();
+    } else {
+      stack.push_back(curNode);
+    }
+    flag = true;
+    if (!last_seen[curNodeTerminatorInst]) {
+      last_seen[curNodeTerminatorInst] = 0;
+    }
+    for (unsigned i = last_seen[curNodeTerminatorInst];
+         i < curNodeTerminatorInst->getNumSuccessors(); i++) {
+      last_seen[curNodeTerminatorInst]++;
+      auto succ = curNodeTerminatorInst->getSuccessor(i);
+      if (succ == tgtParent) {
+        // issues can happen here ?
+
+        // auto bpi = bpiMap[(const_cast<BasicBlock *>(stack.front())->getParent())];
+        // MAKING CHANGES HERE: 
+        Function* parent = (const_cast<BasicBlock*>(stack.front())->getParent());
+        auto it = bpiMap.find(parent);
+        cout<<"parent here : "<<parent<<"\n";
+        llvm::errs() << "Found path to target BasicBlock: " << tgtParent->getName() << "\n";
+        // auto bpi;
+        BranchProbabilityInfo *bpi;
+        if(it!=bpiMap.end()){
+          cout<<"MEANS IT IS NOT EMPTY HERE"<<"\n";
+          bpi = bpiMap[parent];
+        }
+        else{
+          cout<<"HOW IS IT COMING AS EMPTY ?"<< "\n";
+          llvm::FunctionAnalysisManager FAM;
+          // FAM.add(new BranchProbabilityAnalysis());
+          
+          // FAM.addPass(BranchProbabilityAnalysis());
+
+          llvm::PassBuilder PB;
+          PB.registerFunctionAnalyses(FAM);
+          bpiMap[parent]=getBPI(parent, FAM);
+          bpi = bpiMap[parent];
+        }
+        cout<<"value of bpi :"<<bpi<<"\n";
+        // LLVM_DEBUG(errs() << "wasuuuuuuuuuuuuuuuuupppppppppppppppp\n");
+        bool init = true;
+        const BasicBlock *prev;
+
+        for (auto BB : stack) {
+          if (init) {
+            init = false;
+            prev = BB;
+            continue;
+          }
+          auto bp = bpi->getEdgeProbability(prev, BB);
+          cout<<"is bp coming correctly :"<<&bp<<"\n";
+          llvm::errs() << "Edge Probability " << prev->getName() << " -> " << BB->getName() << " : " << double(bp.getNumerator()) / bp.getDenominator() << "\n";
+          prob = prob * double(bp.getNumerator()) / double(bp.getDenominator());
+          prev = BB;
+          // LLVM_DEBUG(BB->dump());
+        }
+        auto bp = bpi->getEdgeProbability(prev, succ);
+        llvm::errs() << "Final Edge Probability " << prev->getName() << " -> " << succ->getName() << " : " << double(bp.getNumerator()) / bp.getDenominator() << "\n";
+        prob = prob * double(bp.getNumerator()) / double(bp.getDenominator());
+        // LLVM_DEBUG(succ->dump());
+        // LLVM_DEBUG(errs() << "alllllllllgoooooooooooooooodddddddddd\n");
+        curNode = succ;
+        curNodeTerminatorInst = curNode->getTerminator();
+        flag = false;
+        break;
+      } else if (!visited[succ] && writingBB.find(succ) == writingBB.end()) {
+        llvm::errs() << "Traversing to successor BasicBlock: " << succ->getName() << "\n";
+        curNode = succ;
+        curNodeTerminatorInst = curNode->getTerminator();
+        flag = false;
+        break;
+      }
+    }
+  } while (!stack.empty());
+
+  // LLVM_DEBUG(dbgs() << "Returning from RD Value\n");
+  llvm::errs() << "Computed Probability: " << prob << "\n";
+  cout<<"value of prob here , going out successfully :" << prob <<"\n";
+  return prob;
+}
+
+bool isPotentiallyReachableFromMany(
+    SmallVectorImpl<BasicBlock *> &Worklist, BasicBlock *StopBB,
+    const SmallPtrSetImpl<const BasicBlock *> *ExclusionSet,
+    const DominatorTree *DT, const LoopInfo *LI) {
+  // When the stop block is unreachable, it's dominated from everywhere,
+  // regardless of whether there's a path between the two blocks.
+  if (DT && !DT->isReachableFromEntry(StopBB))
+    DT = nullptr;
+
+  // We can't skip directly from a block that dominates the stop block if the
+  // exclusion block is potentially in between.
+  if (ExclusionSet && !ExclusionSet->empty())
+    DT = nullptr;
+
+  // Normally any block in a loop is reachable from any other block in a loop,
+  // however excluded blocks might partition the body of a loop to make that
+  // untrue.
+
+  SmallPtrSet<const Loop *, 8> LoopsWithHoles;
+  if (LI && ExclusionSet) {
+    for (auto BB : *ExclusionSet) {
+      if (const Loop *L = getOutermostLoop(LI, BB))
+        LoopsWithHoles.insert(L);
+    }
+  }
+
+  const Loop *StopLoop = LI ? getOutermostLoop(LI, StopBB) : nullptr;
+
+  // Limit the number of blocks we visit. The goal is to avoid run-away
+  // compile times on large CFGs without hampering sensible code. Arbitrarily
+  // chosen.
+  unsigned Limit = 32;
+
+  SmallPtrSet<const BasicBlock *, 32> Visited;
+  do {
+    BasicBlock *BB = Worklist.pop_back_val();
+    if (!Visited.insert(BB).second)
+      continue;
+    if (BB == StopBB)
+      return true;
+    if (ExclusionSet && ExclusionSet->count(BB))
+      continue;
+    if (DT && DT->dominates(BB, StopBB))
+      return true;
+
+    const Loop *Outer = nullptr;
+    if (LI) {
+      Outer = getOutermostLoop(LI, BB);
+      // If we're in a loop with a hole, not all blocks in the loop are
+      // reachable from all other blocks. That implies we can't simply
+      // jump to the loop's exit blocks, as that exit might need to pass
+      // through an excluded block. Clear Outer so we process BB's
+      // successors.
+      if (LoopsWithHoles.count(Outer))
+        Outer = nullptr;
+      if (StopLoop && Outer == StopLoop)
+        return true;
+    }
+
+    if (!--Limit) {
+      // We haven't been able to prove it one way or the other.
+      // Conservatively answer true -- that there is potentially a path.
+      return true;
+    }
+
+    if (Outer) {
+      // All blocks in a single loop are reachable from all other blocks.
+      // From any of these blocks, we can skip directly to the exits of
+      // the loop, ignoring any other blocks inside the loop body.
+      Outer->getExitBlocks(Worklist);
+    } else {
+      Worklist.append(succ_begin(BB), succ_end(BB));
+    }
+  } while (!Worklist.empty());
+
+  // We have exhausted all possible paths and are certain that 'To' can not be
+  // reached from 'From'.
+  return false;
+}
+
+bool isPotentiallyReachable(
+    const Instruction *A, const Instruction *B,
+    const SmallPtrSetImpl<const BasicBlock *> *ExclusionSet,
+    const DominatorTree *DT, const LoopInfo *LI) {
+  assert(A->getParent()->getParent() == B->getParent()->getParent() &&
+         "This analysis is function-local!");
+
+  SmallVector<BasicBlock *, 32> Worklist;
+
+  if (A->getParent() == B->getParent()) {
+    // The same block case is special because it's the only time we're
+    // looking within a single block to see which instruction comes first.
+    // Once we start looking at multiple blocks, the first instruction of
+    // the block is reachable, so we only need to determine reachability
+    // between whole blocks.
+    BasicBlock *BB = const_cast<BasicBlock *>(A->getParent());
+
+    // If the block is in a loop then we can reach any instruction in the
+    // block from any other instruction in the block by going around a
+    // backedge.
+    if (LI && LI->getLoopFor(BB) != nullptr)
+      return true;
+
+    // Linear scan, start at 'A', see whether we hit 'B' or the end first.
+    for (BasicBlock::const_iterator I = A->getIterator(), E = BB->end(); I != E;
+         ++I) {
+      if (&*I == B)
+        return true;
+    }
+
+    // Can't be in a loop if it's the entry block -- the entry block may not
+    // have predecessors.
+    if (BB == &BB->getParent()->getEntryBlock())
+      return false;
+
+    // Otherwise, continue doing the normal per-BB CFG walk.
+    Worklist.append(succ_begin(BB), succ_end(BB));
+
+    if (Worklist.empty()) {
+      // We've proven that there's no path!
+      return false;
+    }
+  } else {
+    Worklist.push_back(const_cast<BasicBlock *>(A->getParent()));
+  }
+
+  if (DT) {
+    if (DT->isReachableFromEntry(A->getParent()) &&
+        !DT->isReachableFromEntry(B->getParent()))
+      return false;
+    if (!ExclusionSet || ExclusionSet->empty()) {
+      if (A->getParent() == &A->getParent()->getParent()->getEntryBlock() &&
+          DT->isReachableFromEntry(B->getParent()))
+        return true;
+      if (B->getParent() == &A->getParent()->getParent()->getEntryBlock() &&
+          DT->isReachableFromEntry(A->getParent()))
+        return false;
+    }
+  }
+
+  return isPotentiallyReachableFromMany(
+      Worklist, const_cast<BasicBlock *>(B->getParent()), ExclusionSet, DT, LI);
+}
+
+SmallVector<const Instruction *, 10>
+IR2Vec_FA::getReachingDefs(const Instruction *I, unsigned loc) {
+  IR2VEC_DEBUG(
+      outs()
+      << "Call to getReachingDefs Started****************************\n");
+  auto parent = dyn_cast<Instruction>(I->getOperand(loc));
+  if (!parent)
+    return {};
+  SmallVector<const Instruction *, 10> RD;
+  SmallVector<const Instruction *, 10> probableRD;
+  IR2VEC_DEBUG(outs() << "Inside RD for : ");
+  IR2VEC_DEBUG(I->print(outs()); outs() << "\n");
+
+  if (writeDefsMap[parent].empty()) {
+    RD.push_back(parent);
+    return RD;
+  }
+
+  if (writeDefsMap[parent].size() >= 1) {
+    SmallMapVector<const BasicBlock *, SmallVector<const Instruction *, 10>, 16>
+        bbInstMap;
+    // Remove definitions which don't reach I
+    for (auto it : writeDefsMap[parent]) {
+      if (it != I && isPotentiallyReachable(it, I)) {
+
+        probableRD.push_back(it);
+      }
+    }
+    probableRD.push_back(parent);
+    IR2VEC_DEBUG(outs() << "----PROBABLE RD---"
+                        << "\n");
+    for (auto i : probableRD) {
+      IR2VEC_DEBUG(i->print(outs()); outs() << "\n");
+      bbInstMap[i->getParent()].push_back(i);
+    }
+
+    IR2VEC_DEBUG(outs() << "contents of bbinstmap:\n"; for (auto i
+                                                            : bbInstMap) {
+      for (auto j : i.second) {
+        j->print(outs());
+        outs() << "\n";
+      }
+      outs() << "+++++++++++++++++++++++++\n";
+    });
+
+    // If there is a reachable write within I's basic block only that defn
+    // would reach always If there are more than one defn, take the
+    // immediate defn before I
+    if (!bbInstMap[I->getParent()].empty()) {
+      IR2VEC_DEBUG(outs() << "--------Within BB--------\n");
+      IR2VEC_DEBUG(I->print(outs()); outs() << "\n");
+      auto orderedVec = bbInstMap[I->getParent()];
+      const Instruction *probableRD = nullptr;
+      for (auto &i : *(I->getParent())) {
+        if (&i == I)
+          break;
+        else {
+          if (std::find(orderedVec.begin(), orderedVec.end(), &i) !=
+              orderedVec.end())
+            probableRD = &i;
+        }
+      }
+
+      if (probableRD != nullptr) {
+        IR2VEC_DEBUG(outs() << "Returning: ");
+        IR2VEC_DEBUG(probableRD->print(outs()); outs() << "\n");
+        RD.push_back(probableRD);
+        return RD;
+      }
+    }
+
+    IR2VEC_DEBUG(outs() << "--------Across BB--------\n");
+    SmallVector<const Instruction *, 10> toDelete;
+    for (auto it : bbInstMap) {
+      IR2VEC_DEBUG(outs() << "--------INSTMAP BEGIN--------\n";
+                   it.first->print(outs()); outs() << "\n");
+      bool first = true;
+      for (auto it1 : bbInstMap[it.first]) {
+        if (first) {
+          first = false;
+          continue;
+        }
+        toDelete.push_back(it1);
+        IR2VEC_DEBUG(it1->print(outs()); outs() << "\n");
+      }
+      IR2VEC_DEBUG(outs() << "--------INSTMAP END--------\n");
+    }
+    auto tmp = probableRD;
+    probableRD = {};
+    for (auto i : tmp) {
+      if (std::find(toDelete.begin(), toDelete.end(), i) == toDelete.end())
+        probableRD.push_back(i);
+    }
+
+    IR2VEC_DEBUG(I->print(outs()); outs() << "\n"; outs() << "probableRD: \n";
+                 for (auto i
+                      : probableRD) i->print(outs());
+                 outs() << "\n"; outs() << "-----------------\n");
+
+    SmallPtrSet<const BasicBlock *, 10> bbSet;
+    SmallMapVector<const BasicBlock *, const Instruction *, 16> refBBInstMap;
+
+    for (auto i : probableRD) {
+      bbSet.insert(i->getParent());
+      refBBInstMap[i->getParent()] = i;
+      IR2VEC_DEBUG(outs() << i->getParent()->getName().str() << "\n");
+    }
+    for (auto i : bbSet) {
+      IR2VEC_DEBUG(i->print(outs()); outs() << "\n");
+      auto exclusionSet = bbSet;
+      exclusionSet.erase(i);
+      if (isPotentiallyReachable(refBBInstMap[i], I, &exclusionSet, nullptr,
+                                 nullptr)) {
+        RD.push_back(refBBInstMap[i]);
+        IR2VEC_DEBUG(outs() << "refBBInstMap : ";
+                     refBBInstMap[i]->print(outs()); outs() << "\n");
+      }
+    }
+    IR2VEC_DEBUG(
+        outs() << "****************************\n";
+        outs() << "Reaching defn for "; I->print(outs()); outs() << "\n";
+        for (auto i
+             : RD) i->print(outs());
+        outs() << "\n";
+        outs()
+        << "Call to getReachingDefs Ended****************************\n");
+    return RD;
+  }
+
+  llvm_unreachable("unreachable");
+  return {};
+}
+
+bool IR2Vec_FA::isMemOp(StringRef opcode, unsigned &operand,
+                        SmallDenseMap<StringRef, unsigned> map) {
+  bool isMemOperand = false;
+  auto It = map.find(opcode);
+  if (It != map.end()) {
+    isMemOperand = true;
+    operand = It->second;
+  }
+  return isMemOperand;
+}
+
+/*----------------------------------------------------------------------------------
+  Function to get Partial Vector of an instruction
+  ----------------------------------------------------------------------------------
+*/
+void IR2Vec_FA::getPartialVec(
+    const Instruction &I,
+    SmallMapVector<const Instruction *, Vector, 16> &partialInstValMap) {
+
+  if (instVecMap.find(&I) != instVecMap.end()) {
+    IR2VEC_DEBUG(outs() << "Returning from inst2Vec() I found in Map\n");
+    return;
+  }
+
+  Vector instVector(DIM, 0);
+  StringRef opcodeName = I.getOpcodeName();
+  auto vec = getValue(opcodeName.str());
+  IR2VEC_DEBUG(I.print(outs()); outs() << "\n");
+  std::transform(instVector.begin(), instVector.end(), vec.begin(),
+                 instVector.begin(), std::plus<double>());
+  partialInstValMap[&I] = instVector;
+
+  IR2VEC_DEBUG(outs() << "contents of partialInstValMap:\n";
+               for (auto i
+                    : partialInstValMap) {
+                 i.first->print(outs());
+                 outs() << "\n";
+               });
+  auto type = I.getType();
+
+  if (type->isVoidTy()) {
+    vec = getValue("voidTy");
+  } else if (type->isFloatingPointTy()) {
+    vec = getValue("floatTy");
+  } else if (type->isIntegerTy()) {
+    vec = getValue("integerTy");
+  } else if (type->isFunctionTy()) {
+    vec = getValue("functionTy");
+  } else if (type->isStructTy()) {
+    vec = getValue("structTy");
+  } else if (type->isArrayTy()) {
+    vec = getValue("arrayTy");
+  } else if (type->isPointerTy()) {
+    vec = getValue("pointerTy");
+  } else if (type->isVectorTy()) {
+    vec = getValue("vectorTy");
+  } else if (type->isEmptyTy()) {
+    vec = getValue("emptyTy");
+  } else if (type->isLabelTy()) {
+    vec = getValue("labelTy");
+  } else if (type->isTokenTy()) {
+    vec = getValue("tokenTy");
+  } else if (type->isMetadataTy()) {
+    vec = getValue("metadataTy");
+  } else {
+    vec = getValue("unknownTy");
+  }
+
+  scaleVector(vec, WT);
+  std::transform(instVector.begin(), instVector.end(), vec.begin(),
+                 instVector.begin(), std::plus<double>());
+
+  partialInstValMap[&I] = instVector;
+}
+/*----------------------------------------------------------------------------------
+  Function to solve circular dependencies in Instructions
+  ----------------------------------------------------------------------------------
+*/
+void IR2Vec_FA::solveInsts(
+    llvm::SmallMapVector<const llvm::Instruction *, IR2Vec::Vector, 16>
+        &partialInstValMap, SmallVector<Function *, 15> &funcStack) {
+  std::map<unsigned, const Instruction *> xI;
+  std::map<const Instruction *, unsigned> Ix;
+  std::vector<std::vector<double>> A, B;
+  SmallMapVector<const Instruction *,
+                 SmallMapVector<const Instruction *, double, 16>, 16>
+      RDValMap;
+  unsigned pos = 0;
+  for (auto It : partialInstValMap) {
+    auto inst = It.first;
+    if (instVecMap.find(inst) == instVecMap.end()) {
+      Ix[inst] = pos;
+      xI[pos++] = inst;
+      std::vector<double> tmp;
+      for (auto i : It.second) {
+        tmp.push_back((int)(i * 10) / 10.0);
+      }
+      B.push_back(tmp);
+      for (unsigned i = 0; i < inst->getNumOperands(); i++) {
+        if (isa<Function>(inst->getOperand(i))) {
+          auto f = getValue("function");
+          if (isa<CallInst>(inst)) {
+            auto ci = dyn_cast<CallInst>(inst);
+            Function *func = ci->getCalledFunction();
+            if (func) {
+              if (!func->isDeclaration() && std::find(funcStack.begin(), funcStack.end(), func) ==
+                        funcStack.end()) {
+                // Will be dealt with later
+                // change might be needed here, don't know for sure
+                Vector tempCall(DIM, 0);
+                // f = tempCall;
+                f = func2Vec(*func, funcStack, bpiMap[func]);
+              }
+            }
+          }
+          auto svtmp = f;
+          scaleVector(svtmp, WA);
+          std::vector<double> vtmp(svtmp.begin(), svtmp.end());
+          std::vector<double> vec = B.back();
+          IR2VEC_DEBUG(outs() << vec.back() << "\n");
+          IR2VEC_DEBUG(outs() << vtmp.back() << "\n");
+          B.pop_back();
+          std::transform(vtmp.begin(), vtmp.end(), vec.begin(), vec.begin(),
+                         std::plus<double>());
+          IR2VEC_DEBUG(outs() << vec.back() << "\n");
+          B.push_back(vec);
+        } else if (isa<Constant>(inst->getOperand(i)) &&
+                   !isa<PointerType>(inst->getOperand(i)->getType())) {
+          auto c = getValue("constant");
+          auto svtmp = c;
+          scaleVector(svtmp, WA);
+          std::vector<double> vtmp(svtmp.begin(), svtmp.end());
+          std::vector<double> vec = B.back();
+          IR2VEC_DEBUG(outs() << vec.back() << "\n");
+          IR2VEC_DEBUG(outs() << vtmp.back() << "\n");
+          B.pop_back();
+          std::transform(vtmp.begin(), vtmp.end(), vec.begin(), vec.begin(),
+                         std::plus<double>());
+          IR2VEC_DEBUG(outs() << vec.back() << "\n");
+          B.push_back(vec);
+        } else if (isa<BasicBlock>(inst->getOperand(i))) {
+          auto l = getValue("label");
+          auto svtmp = l;
+          scaleVector(svtmp, WA);
+          std::vector<double> vtmp(svtmp.begin(), svtmp.end());
+          std::vector<double> vec = B.back();
+          IR2VEC_DEBUG(outs() << vec.back() << "\n");
+          IR2VEC_DEBUG(outs() << vtmp.back() << "\n");
+          B.pop_back();
+          std::transform(vtmp.begin(), vtmp.end(), vec.begin(), vec.begin(),
+                         std::plus<double>());
+          IR2VEC_DEBUG(outs() << vec.back() << "\n");
+          B.push_back(vec);
+        } else {
+          /*
+          if (isa<Instruction>(inst->getOperand(i))) {
+            auto RD = getReachingDefs(inst, i);
+            for (auto i : RD) {
+              // Check if value of RD is precomputed
+              if (instVecMap.find(i) == instVecMap.end()) {
+                if (partialInstValMap.find(i) == partialInstValMap.end()) {
+                  assert(partialInstValMap.find(i) != partialInstValMap.end() &&
+                         "Should not reach");
+                }
+                if (RDValMap.find(inst) == RDValMap.end()) {
+                  SmallMapVector<const Instruction *, double, 16> tmp;
+                  // change needed over here
+                  tmp[i] = WA;
+                  RDValMap[inst] = tmp;
+                } else {
+                  RDValMap[inst][i] = WA;
+                }
+              } else {
+                auto svtmp = instVecMap[i];
+                scaleVector(svtmp, WA);
+                std::vector<double> vtmp(svtmp.begin(), svtmp.end());
+                std::vector<double> vec = B.back();
+                IR2VEC_DEBUG(outs() << vec.back() << "\n");
+                IR2VEC_DEBUG(outs() << vtmp.back() << "\n");
+                B.pop_back();
+                std::transform(vtmp.begin(), vtmp.end(), vec.begin(),
+                               vec.begin(), std::plus<double>());
+                IR2VEC_DEBUG(outs() << vec.back() << "\n");
+                B.push_back(vec);
+              }
+            }
+          } else if (isa<PointerType>(inst->getOperand(i)->getType())) {
+            auto l = getValue("pointer");
+            auto svtmp = l;
+            scaleVector(svtmp, WA);
+            std::vector<double> vtmp(svtmp.begin(), svtmp.end());
+            std::vector<double> vec = B.back();
+            IR2VEC_DEBUG(outs() << vec.back() << "\n");
+            IR2VEC_DEBUG(outs() << vtmp.back() << "\n");
+            B.pop_back();
+            std::transform(vtmp.begin(), vtmp.end(), vec.begin(), vec.begin(),
+                           std::plus<double>());
+            IR2VEC_DEBUG(outs() << vec.back() << "\n");
+            B.push_back(vec);
+          } else {
+            auto l = getValue("variable");
+            auto svtmp = l;
+            scaleVector(svtmp, WA);
+            std::vector<double> vtmp(svtmp.begin(), svtmp.end());
+            std::vector<double> vec = B.back();
+            IR2VEC_DEBUG(outs() << vec.back() << "\n");
+            IR2VEC_DEBUG(outs() << vtmp.back() << "\n");
+            B.pop_back();
+            std::transform(vtmp.begin(), vtmp.end(), vec.begin(), vec.begin(),
+                           std::plus<double>());
+            IR2VEC_DEBUG(outs() << vec.back() << "\n");
+            B.push_back(vec);
+          }
+
+          */
+          auto RD = getReachingDefs(inst, i);
+          for (auto i : RD) {
+            // Check if value of RD is precomputed
+            if (instVecMap.find(i) == instVecMap.end()) {
+              if (partialInstValMap.find(i) == partialInstValMap.end()) {
+                llvm_unreachable("Should not reach");
+              }
+              if (RDValMap.find(inst) == RDValMap.end()) {
+                // SmallDenseMap<const Instruction *, double> tmp;
+                SmallMapVector<const Instruction *, double, 16> tmp;
+                tmp[i] = WA * getRDProb(i, inst, RD);
+                RDValMap[inst] = tmp;
+              } else {
+                RDValMap[inst][i] = WA * getRDProb(i, inst, RD);
+              }
+            } else {
+              auto prob = getRDProb(i, inst, RD);
+              auto svtmp = instVecMap[i];
+              scaleVector(svtmp, prob * WA);
+              std::vector<double> vtmp(svtmp.begin(), svtmp.end());
+              std::vector<double> vec = B.back();
+              // LLVM_DEBUG(dbgs() << vec.back() << "\n");
+              // LLVM_DEBUG(dbgs() << vtmp.back() << "\n");
+              B.pop_back();
+              std::transform(vtmp.begin(), vtmp.end(), vec.begin(),
+                              vec.begin(), std::plus<double>());
+              // LLVM_DEBUG(dbgs() << vec.back() << "\n");
+              B.push_back(vec);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  for (unsigned i = 0; i < xI.size(); i++) {
+    std::vector<double> tmp(xI.size(), 0);
+    A.push_back(tmp);
+  }
+
+  for (unsigned i = 0; i < xI.size(); i++) {
+    A[i][i] = 1;
+    auto tmp = A[i];
+    auto instRDVal = RDValMap[xI[i]];
+    for (auto j : instRDVal) {
+      A[i][Ix[j.first]] = (int)((A[i][Ix[j.first]] - j.second) * 10) / 10.0;
+    }
+  }
+
+  for (unsigned i = 0; i < B.size(); i++) {
+    auto Bvec = B[i];
+    for (unsigned j = 0; j < B[i].size(); j++) {
+      B[i][j] = (int)(B[i][j] * 10) / 10.0;
+    }
+  }
+
+  auto C = solve(A, B);
+  SmallMapVector<const BasicBlock *, SmallVector<const Instruction *, 10>, 16>
+      bbInstMap;
+
+  for (unsigned i = 0; i < C.size(); i++) {
+    Vector tmp(C[i].begin(), C[i].end());
+    IR2VEC_DEBUG(outs() << "inst:"
+                        << "\t";
+                 xI[i]->print(outs()); outs() << "\nVAL: " << tmp[0] << "\n");
+
+    instVecMap[xI[i]] = tmp;
+    livelinessMap.try_emplace(xI[i], true);
+
+    instSolvedBySolver.push_back(xI[i]);
+    bbInstMap[xI[i]->getParent()].push_back(xI[i]);
+  }
+
+  for (auto BB : bbInstMap) {
+    unsigned opnum;
+    auto orderedInstVec = BB.second;
+    for (auto I : orderedInstVec) {
+      if (killMap.find(I) != killMap.end()) {
+        auto list = killMap[I];
+        for (auto defs : list) {
+          auto It2 = livelinessMap.find(defs);
+          if (It2 == livelinessMap.end())
+            livelinessMap.try_emplace(defs, false);
+          else
+            It2->second = false;
+        }
+      }
+    }
+  }
+}
+
+/*----------------------------------------------------------------------------------
+  Function to solve a single instruction usually forming a SCC
+  ----------------------------------------------------------------------------------
+*/
+
+void IR2Vec_FA::solveSingleComponent(
+    const Instruction &I,
+    SmallMapVector<const Instruction *, Vector, 16> &partialInstValMap, SmallVector<Function *, 15> &funcStack) {
+
+  if (instVecMap.find(&I) != instVecMap.end()) {
+    IR2VEC_DEBUG(outs() << "Returning from inst2Vec() I found in Map\n");
+    return;
+  }
+
+  Vector instVector(DIM, 0);
+  StringRef opcodeName = I.getOpcodeName();
+
+  instVector = partialInstValMap[&I];
+
+  unsigned operandNum;
+  bool isMemWrite = isMemOp(opcodeName, operandNum, memWriteOps);
+  bool isCyclic = false;
+  Vector VecArgs(DIM, 0);
+
+  SmallVector<const Instruction *, 10> RDList;
+  RDList.clear();
+
+  for (unsigned i = 0; i < I.getNumOperands() /*&& !isCyclic*/; i++) {
+    Vector vecOp(DIM, 0);
+    if (isa<Function>(I.getOperand(i))) {
+      vecOp = getValue("function");
+      if (isa<CallInst>(I)) {
+        auto ci = dyn_cast<CallInst>(&I);
+        Function *func = ci->getCalledFunction();
+        if (func) {
+          if (!func->isDeclaration() && std::find(funcStack.begin(), funcStack.end(), func) ==
+                  funcStack.end()) {
+            // Will be dealt with later
+            // probably over here as well change ?
+            Vector tempCall(DIM, 0);
+            // vecOp = tempCall;
+            vecOp = func2Vec(*func, funcStack, bpiMap[func]);
+
+          }
+        }
+      }
+    }
+    // Checking that the argument is not of pointer type because some
+    // non-numeric/alphabetic constants are also caught as pointer types
+    else if (isa<Constant>(I.getOperand(i)) &&
+             !isa<PointerType>(I.getOperand(i)->getType())) {
+      vecOp = getValue("constant");
+    } else if (isa<BasicBlock>(I.getOperand(i))) {
+      vecOp = getValue("label");
+    } else {
+      if (isa<Instruction>(I.getOperand(i))) {
+        auto RD = getReachingDefs(&I, i);
+
+        if (!RD.empty()) {
+          vecOp = SmallVector<double, DIM>(DIM, 0);
+          for (auto i : RD) {
+            // Check if value of RD is precomputed
+            if (instVecMap.find(i) == instVecMap.end()) {
+              if (partialInstValMap.find(i) == partialInstValMap.end()) {
+                partialInstValMap[i] = {};
+                inst2Vec(*i, funcStack, partialInstValMap);
+                partialInstValMap.erase(i);
+
+                if (std::find(instSolvedBySolver.begin(),
+                              instSolvedBySolver.end(),
+                              &I) != instSolvedBySolver.end())
+                  return;
+
+                auto prob = getRDProb(i, &I, RD);
+                auto tmp = instVecMap[i];
+                scaleVector(tmp, prob);
+                std::transform(tmp.begin(), tmp.end(), vecOp.begin(), vecOp.begin(),
+                               std::plus<double>());
+
+              } else {
+                isCyclic = true;
+                break;
+              }
+            } else {
+              auto prob = getRDProb(i, &I, RD);
+              auto tmp = instVecMap[i];
+              scaleVector(tmp, prob);
+              std::transform(tmp.begin(), tmp.end(), vecOp.begin(), vecOp.begin(),
+                             std::plus<double>());
+            }
+          }
+        }
+
+        RDList.insert(RDList.end(), RD.begin(), RD.end());
+      } else if (isa<PointerType>(I.getOperand(i)->getType())) {
+        vecOp = getValue("pointer");
+      } else
+        vecOp = getValue("variable");
+    }
+
+    std::transform(VecArgs.begin(), VecArgs.end(), vecOp.begin(),
+                   VecArgs.begin(), std::plus<double>());
+  // }
+
+  Vector vecInst = Vector(DIM, 0);
+
+  // if (!RDList.empty()) {
+  //   for (auto i : RDList) {
+  //     // Check if value of RD is precomputed
+  //     if (instVecMap.find(i) == instVecMap.end()) {
+
+  //       /*Some phi instructions reach themselves and hence may not be in
+  //       the instVecMap but should be in the partialInstValMap*/
+
+  //       if (partialInstValMap.find(i) == partialInstValMap.end()) {
+  //         assert(partialInstValMap.find(i) != partialInstValMap.end() &&
+  //                "Should have been in instvecmap or partialmap");
+  //       }
+  //     } else {
+  //       std::transform(instVecMap[i].begin(), instVecMap[i].end(),
+  //                      vecInst.begin(), vecInst.begin(), std::plus<double>());
+  //     }
+  //   }
+  // }
+
+  if (!isCyclic) {
+    std::transform(VecArgs.begin(), VecArgs.end(), vecInst.begin(),
+                   VecArgs.begin(), std::plus<double>());
+
+    IR2VEC_DEBUG(outs() << VecArgs[0]);
+
+    scaleVector(VecArgs, WA);
+    IR2VEC_DEBUG(outs() << VecArgs.front());
+    // std::transform(instVector.begin(), instVector.end(), VecArgs.begin(),
+    //                instVector.begin(), std::plus<double>());
+    std::transform(instVector.begin(), instVector.end(), vecOp.begin(),
+                     instVector.begin(), std::plus<double>());
+    IR2VEC_DEBUG(outs() << instVector.front());
+
+    instVecMap[&I] = instVector;
+    livelinessMap.try_emplace(&I, true);
+
+    if (killMap.find(&I) != killMap.end()) {
+      auto list = killMap[&I];
+      for (auto defs : list) {
+        auto It2 = livelinessMap.find(defs);
+        if (It2 == livelinessMap.end())
+          livelinessMap.try_emplace(defs, false);
+        else
+          It2->second = false;
+      }
+    }
+  }
+  assert(isCyclic == false && "A Single Component should not have a cycle!");
+    }
+}
+
+/*----------------------------------------------------------------------------------
+  Function to solve left over instructions after all dependencies are solved
+  ----------------------------------------------------------------------------------
+*/
+
+void IR2Vec_FA::inst2Vec(
+    const Instruction &I, SmallVector<Function *, 15> &funcStack,
+    SmallMapVector<const Instruction *, Vector, 16> &partialInstValMap) {
+
+  if (instVecMap.find(&I) != instVecMap.end()) {
+    IR2VEC_DEBUG(outs() << "Returning from inst2Vec() I found in Map\n");
+    return;
+  }
+  // cout<<"ENTERING INST2VEC"<<"\n";
+
+  Vector instVector(DIM, 0);
+  StringRef opcodeName = I.getOpcodeName();
+  auto vec = getValue(opcodeName.str());
+  IR2VEC_DEBUG(I.print(outs()); outs() << "\n");
+  std::transform(instVector.begin(), instVector.end(), vec.begin(),
+                 instVector.begin(), std::plus<double>());
+  partialInstValMap[&I] = instVector;
+
+  IR2VEC_DEBUG(outs() << "contents of partialInstValMap:\n";
+               for (auto i
+                    : partialInstValMap) {
+                 i.first->print(outs());
+                 outs() << "\n";
+               });
+
+  auto type = I.getType();
+
+  if (type->isVoidTy()) {
+    vec = getValue("voidTy");
+  } else if (type->isFloatingPointTy()) {
+    vec = getValue("floatTy");
+  } else if (type->isIntegerTy()) {
+    vec = getValue("integerTy");
+  } else if (type->isFunctionTy()) {
+    vec = getValue("functionTy");
+  } else if (type->isStructTy()) {
+    vec = getValue("structTy");
+  } else if (type->isArrayTy()) {
+    vec = getValue("arrayTy");
+  } else if (type->isPointerTy()) {
+    vec = getValue("pointerTy");
+  } else if (type->isVectorTy()) {
+    vec = getValue("vectorTy");
+  } else if (type->isEmptyTy()) {
+    vec = getValue("emptyTy");
+  } else if (type->isLabelTy()) {
+    vec = getValue("labelTy");
+  } else if (type->isTokenTy()) {
+    vec = getValue("tokenTy");
+  } else if (type->isMetadataTy()) {
+    vec = getValue("metadataTy");
+  } else {
+    vec = getValue("unknownTy");
+  }
+  scaleVector(vec, WT);
+  std::transform(instVector.begin(), instVector.end(), vec.begin(),
+                 instVector.begin(), std::plus<double>());
+  partialInstValMap[&I] = instVector;
+
+  unsigned operandNum;
+  bool isMemWrite = isMemOp(opcodeName, operandNum, memWriteOps);
+  bool isCyclic = false;
+  Vector VecArgs(DIM, 0);
+
+  SmallVector<const Instruction *, 10> RDList;
+  RDList.clear();
+
+  for (unsigned i = 0; i < I.getNumOperands() /*&& !isCyclic*/; i++) {
+    Vector vecOp(DIM, 0);
+    if (isa<Function>(I.getOperand(i))) {
+      vecOp = getValue("function");
+      if (isa<CallInst>(I)) {
+        auto ci = dyn_cast<CallInst>(&I);
+        Function *func = ci->getCalledFunction();
+        if (func) {
+          // if (!func->isDeclaration()) {
+            if (!func->isDeclaration() && std::find(funcStack.begin(), funcStack.end(), func) ==
+                  funcStack.end()) {
+            // Will be dealt with later
+            Vector tempCall(DIM, 0);
+            // vecOp = tempCall;
+            cout<<"NOT ABLE TO FIND FUNC SOMEHOW ?"<<"\n";
+            vecOp = func2Vec(*func, funcStack, bpiMap[func]);
+          }
+        }
+      }
+    }
+
+    // old code : 
+
+    else if (isa<Constant>(I.getOperand(i)) &&
+             !isa<PointerType>(I.getOperand(i)->getType())) {
+      // out << " constant ";
+      vec = getValue("constant");
+    } else if (isa<BasicBlock>(I.getOperand(i))) {
+      // out << " label ";
+      vec = getValue("label");
+    } else {
+      // out << " variable ";
+      if (isa<PointerType>(I.getOperand(i)->getType()))
+        vec = getValue("pointer");
+      else
+        vec = getValue("variable");
+      if (isa<Instruction>(I.getOperand(i))) {
+        auto RD = getReachingDefs(&I, i);
+        // For every RD, get its contribution to the final vector
+        if (!RD.empty()) {
+          vec = SmallVector<double, DIM>(DIM, 0);
+          for (auto i : RD) {
+            // Check if value of RD is precomputed
+            if (instVecMap.find(i) == instVecMap.end()) {
+              if (partialInstValMap.find(i) == partialInstValMap.end()) {
+                partialInstValMap[i] = {};
+                inst2Vec(*i, funcStack, partialInstValMap);
+                partialInstValMap.erase(i);
+
+                if (std::find(instSolvedBySolver.begin(),
+                              instSolvedBySolver.end(),
+                              &I) != instSolvedBySolver.end())
+                  return;
+
+                auto prob = getRDProb(i, &I, RD);
+                auto tmp = instVecMap[i];
+                scaleVector(tmp, prob);
+                std::transform(tmp.begin(), tmp.end(), vec.begin(), vec.begin(),
+                               std::plus<double>());
+
+              } else {
+                isCyclic = true;
+                break;
+              }
+            } else {
+              auto prob = getRDProb(i, &I, RD);
+              auto tmp = instVecMap[i];
+              scaleVector(tmp, prob);
+              std::transform(tmp.begin(), tmp.end(), vec.begin(), vec.begin(),
+                             std::plus<double>());
+            }
+          }
+        }
+        // if(!isCyclic)
+        //     vec = lookupOrInsertIntoMap(inst, vec);
+      }
+    }
+
+    if (!isCyclic) {
+      // LLVM_DEBUG(dbgs() << vec[0]);
+      scaleVector(vec, WA);
+      // LLVM_DEBUG(dbgs() << vec.front());
+      std::transform(instVector.begin(), instVector.end(), vec.begin(),
+                     instVector.begin(), std::plus<double>());
+      // LLVM_DEBUG(dbgs() << instVector.front());
+
+      partialInstValMap[&I] = instVector;
+    }
+  }
+
+  if (isCyclic) {
+    // LLVM_DEBUG(dbgs() << "XX------------Cyclic dependncy in the "
+    //                      "IRs---------------------XX \n");
+    cyclicCounter++;
+    // There is a chance that all operands of an instruction has not been
+    // processed. In such a case for a cyclic dependencies, process all unseen
+    // operands now.
+    const auto tmp = partialInstValMap;
+    for (auto It : tmp) {
+      auto inst = It.first;
+      for (unsigned i = 0; i < inst->getNumOperands(); i++) {
+        if (isa<Constant>(inst->getOperand(i)) ||
+            isa<BasicBlock>(inst->getOperand(i)) ||
+            isa<Function>(inst->getOperand(i)))
+          continue;
+
+        else {
+          auto RD = getReachingDefs(inst, i);
+          for (auto i : RD) {
+            // Check if value of RD is precomputed
+            if (instVecMap.find(i) == instVecMap.end()) {
+              if (partialInstValMap.find(i) == partialInstValMap.end()) {
+                partialInstValMap[i] = {};
+                inst2Vec(*i, funcStack, partialInstValMap);
+                partialInstValMap.erase(i);
+
+                if (std::find(instSolvedBySolver.begin(),
+                              instSolvedBySolver.end(),
+                              &I) != instSolvedBySolver.end())
+                  return;
+              }
+            }
+          }
+        }
+      }
+    }
+    std::map<unsigned, const Instruction *> xI;
+    std::map<const Instruction *, unsigned> Ix;
+    std::vector<std::vector<double>> A, B;
+    /*  SmallDenseMap<const Instruction *,
+                   SmallDenseMap<const Instruction *, double>>
+         RDValMap; */
+    SmallMapVector<const Instruction *,
+                   SmallMapVector<const Instruction *, double, 16>, 16>
+        RDValMap;
+    unsigned pos = 0;
+    for (auto It : partialInstValMap) {
+      auto inst = It.first;
+      if (instVecMap.find(inst) == instVecMap.end()) {
+        Ix[inst] = pos;
+        xI[pos++] = inst;
+        std::vector<double> tmp;
+        for (auto i : It.second) {
+          tmp.push_back((int)(i * 10) / 10.0);
+          // tmp.push_back(i);
+        }
+        B.push_back(tmp);
+        for (unsigned i = 0; i < inst->getNumOperands(); i++) {
+          if (isa<Function>(inst->getOperand(i))) {
+            // out << " function ";
+            auto f = getValue("function");
+            if (isa<CallInst>(inst)) {
+              auto ci = dyn_cast<CallInst>(inst);
+              Function *func = ci->getCalledFunction();
+              if (func) {
+                if (!func->isDeclaration() &&
+                    std::find(funcStack.begin(), funcStack.end(), func) ==
+                        funcStack.end()) {
+                  // issues may be arising here ?
+                  cout<<"SECOND TIME IN INST2VEC, SOMEHOW FUNC IS EMPTY"<<"\n";
+                  f = func2Vec(*func, funcStack, bpiMap[func]);
+                }
+              }
+            }
+            auto svtmp = f;
+            scaleVector(svtmp, WA);
+            std::vector<double> vtmp(svtmp.begin(), svtmp.end());
+            std::vector<double> vec = B.back();
+            // LLVM_DEBUG(dbgs() << vec.back() << "\n");
+            // LLVM_DEBUG(dbgs() << vtmp.back() << "\n");
+            B.pop_back();
+            std::transform(vtmp.begin(), vtmp.end(), vec.begin(), vec.begin(),
+                           std::plus<double>());
+            // LLVM_DEBUG(dbgs() << vec.back() << "\n");
+            B.push_back(vec);
+          } else if (isa<Constant>(inst->getOperand(i))) {
+            // out << " constant ";
+            auto c = getValue("constant");
+            auto svtmp = c;
+            scaleVector(svtmp, WA);
+            std::vector<double> vtmp(svtmp.begin(), svtmp.end());
+            std::vector<double> vec = B.back();
+            // LLVM_DEBUG(dbgs() << vec.back() << "\n");
+            // LLVM_DEBUG(dbgs() << vtmp.back() << "\n");
+            B.pop_back();
+            std::transform(vtmp.begin(), vtmp.end(), vec.begin(), vec.begin(),
+                           std::plus<double>());
+            // LLVM_DEBUG(dbgs() << vec.back() << "\n");
+            B.push_back(vec);
+          } else if (isa<BasicBlock>(inst->getOperand(i))) {
+            // out << " label ";
+            auto l = getValue("label");
+
+            auto svtmp = l;
+            scaleVector(svtmp, WA);
+            std::vector<double> vtmp(svtmp.begin(), svtmp.end());
+            std::vector<double> vec = B.back();
+            // LLVM_DEBUG(dbgs() << vec.back() << "\n");
+            // LLVM_DEBUG(dbgs() << vtmp.back() << "\n");
+            B.pop_back();
+            std::transform(vtmp.begin(), vtmp.end(), vec.begin(), vec.begin(),
+                           std::plus<double>());
+            // LLVM_DEBUG(dbgs() << vec.back() << "\n");
+            B.push_back(vec);
+          } else {
+            auto RD = getReachingDefs(inst, i);
+            for (auto i : RD) {
+              // Check if value of RD is precomputed
+              if (instVecMap.find(i) == instVecMap.end()) {
+                if (partialInstValMap.find(i) == partialInstValMap.end()) {
+                  llvm_unreachable("Should not reach");
+                }
+                if (RDValMap.find(inst) == RDValMap.end()) {
+                  // SmallDenseMap<const Instruction *, double> tmp;
+                  SmallMapVector<const Instruction *, double, 16> tmp;
+                  tmp[i] = WA * getRDProb(i, inst, RD);
+                  RDValMap[inst] = tmp;
+                } else {
+                  RDValMap[inst][i] = WA * getRDProb(i, inst, RD);
+                }
+              } else {
+                auto prob = getRDProb(i, inst, RD);
+                auto svtmp = instVecMap[i];
+                scaleVector(svtmp, prob * WA);
+                std::vector<double> vtmp(svtmp.begin(), svtmp.end());
+                std::vector<double> vec = B.back();
+                // LLVM_DEBUG(dbgs() << vec.back() << "\n");
+                // LLVM_DEBUG(dbgs() << vtmp.back() << "\n");
+                B.pop_back();
+                std::transform(vtmp.begin(), vtmp.end(), vec.begin(),
+                               vec.begin(), std::plus<double>());
+                // LLVM_DEBUG(dbgs() << vec.back() << "\n");
+                B.push_back(vec);
+              }
+            }
+          }
+        }
+      }
+    }
+
+    for (unsigned i = 0; i < xI.size(); i++) {
+      std::vector<double> tmp(xI.size(), 0);
+      A.push_back(tmp);
+    }
+
+    for (unsigned i = 0; i < xI.size(); i++) {
+      A[i][i] = 1;
+      auto tmp = A[i];
+      auto instRDVal = RDValMap[xI[i]];
+      for (auto j : instRDVal) {
+        // To-Do: If j.first not found in Ix?
+        A[i][Ix[j.first]] = (int)((A[i][Ix[j.first]] - j.second) * 10) / 10.0;
+        // A[i][Ix[j.first]] = A[i][Ix[j.first]] - j.second;
+      }
+    }
+
+    for (unsigned i = 0; i < B.size(); i++) {
+      auto Bvec = B[i];
+      for (unsigned j = 0; j < B[i].size(); j++) {
+        B[i][j] = (int)(B[i][j] * 10) / 10.0;
+      }
+    }
+
+    auto C = solve(A, B);
+    // SmallDenseMap<const BasicBlock *, SmallVector<const Instruction *, 10>>
+    //     bbInstMap;
+    SmallMapVector<const BasicBlock *, SmallVector<const Instruction *, 10>, 16>
+        bbInstMap;
+    for (unsigned i = 0; i < C.size(); i++) {
+      SmallVector<double, DIM> tmp(C[i].begin(), C[i].end());
+      // LLVM_DEBUG(dbgs() << "inst:"
+      //                   << "\t";
+      //            xI[i]->dump(); dbgs() << "VAL: " << tmp[0] << "\n");
+
+      instVecMap.try_emplace(xI[i], tmp);
+      // instVecMap.insert(std::make_pair(xI, std::move(tmp)));
+      livelinessMap.try_emplace(xI[i], true);
+
+      instSolvedBySolver.push_back(xI[i]);
+      bbInstMap[xI[i]->getParent()].push_back(xI[i]);
+    }
+
+    for (auto BB : bbInstMap) {
+      unsigned opnum;
+      auto orderedInstVec = BB.second;
+      // Sorting not needed?
+      // sort(orderedInstVec.begin(), orderedInstVec.end());
+      for (auto I : orderedInstVec) {
+        if (isMemOp(I->getOpcodeName(), opnum, memWriteOps) &&
+            dyn_cast<Instruction>(I->getOperand(opnum))) {
+          // LLVM_DEBUG(dbgs() << I->getParent()->getParent()->getName() << "\n");
+          // LLVM_DEBUG(I->dump());
+          killAndUpdate(dyn_cast<Instruction>(I->getOperand(opnum)),
+                        instVecMap[I]);
+        }
+      }
+    }
+    // LLVM_DEBUG(dbgs() << "\nYY------------Cyclic dependncy in the "
+    //                      "IRs---------------------YY\n");
+  }
+
+  else {
+    instVecMap.try_emplace(&I, instVector);
+    livelinessMap.try_emplace(&I, true);
+
+    // kill and update
+    if (isMemWrite && dyn_cast<Instruction>(I.getOperand(operandNum))) {
+      // LLVM_DEBUG(I.dump());
+      killAndUpdate(dyn_cast<Instruction>(I.getOperand(operandNum)),
+                    instVector);
+    }
+  }
+    // Checking that the argument is not of pointer type because some
+    // non-numeric/alphabetic constants are also caught as pointer types
+  //   else if (isa<Constant>(I.getOperand(i)) &&
+  //            !isa<PointerType>(I.getOperand(i)->getType())) {
+  //     vecOp = getValue("constant");
+  //   } else if (isa<BasicBlock>(I.getOperand(i))) {
+  //     vecOp = getValue("label");
+  //   } else {
+  //     if (isa<Instruction>(I.getOperand(i))) {
+  //       // over here, a lot of stuff was happening previously
+  //       auto RD = getReachingDefs(&I, i);
+  //       // let's see how it goes
+  //       if (!RD.empty()) {
+  //         vecOp = SmallVector<double, DIM>(DIM, 0);
+  //         for (auto i : RD) {
+  //           // Check if value of RD is precomputed
+  //           if (instVecMap.find(i) == instVecMap.end()) {
+  //             if (partialInstValMap.find(i) == partialInstValMap.end()) {
+  //               partialInstValMap[i] = {};
+  //               inst2Vec(*i, funcStack, partialInstValMap);
+  //               partialInstValMap.erase(i);
+
+  //               if (std::find(instSolvedBySolver.begin(),
+  //                             instSolvedBySolver.end(),
+  //                             &I) != instSolvedBySolver.end())
+  //                 return;
+
+  //               auto prob = getRDProb(i, &I, RD);
+  //               auto tmp = instVecMap[i];
+  //               scaleVector(tmp, prob);
+  //               std::transform(tmp.begin(), tmp.end(), vecOp.begin(), vecOp.begin(),
+  //                              std::plus<double>());
+
+  //             } else {
+  //               isCyclic = true;
+  //               break;
+  //             }
+  //           } else {
+  //             auto prob = getRDProb(i, &I, RD);
+  //             auto tmp = instVecMap[i];
+  //             scaleVector(tmp, prob);
+  //             std::transform(tmp.begin(), tmp.end(), vecOp.begin(), vecOp.begin(),
+  //                            std::plus<double>());
+  //           }
+  //         }
+  //       }
+
+  //       RDList.insert(RDList.end(), RD.begin(), RD.end());
+        
+  //     } else if (isa<PointerType>(I.getOperand(i)->getType()))
+  //       vecOp = getValue("pointer");
+  //     else
+  //       vecOp = getValue("variable");
+  //   }
+
+  //   std::transform(VecArgs.begin(), VecArgs.end(), vecOp.begin(),
+  //                  VecArgs.begin(), std::plus<double>());
+  // // }  // moving this bracket to keep the !isCyclic inside the loop body
+
+  // Vector vecInst = Vector(DIM, 0);
+
+  // if (!RDList.empty()) {
+  //   for (auto i : RDList) {
+  //     // changes might be needed over here
+  //     // Check if value of RD is precomputed
+  //     if (instVecMap.find(i) == instVecMap.end()) {
+  //       assert(instVecMap.find(i) != instVecMap.end() &&
+  //              "All RDs should have been solved by Topo Order!");
+  //     } else {
+  //       std::transform(instVecMap[i].begin(), instVecMap[i].end(),
+  //                      vecInst.begin(), vecInst.begin(), std::plus<double>());
+  //     }
+  //   }
+  // }
+
+  // if (!isCyclic) {
+  //   std::transform(VecArgs.begin(), VecArgs.end(), vecInst.begin(),
+  //                  VecArgs.begin(), std::plus<double>());
+
+  //   IR2VEC_DEBUG(outs() << VecArgs[0]);
+
+  //   scaleVector(VecArgs, WA);
+  //   IR2VEC_DEBUG(outs() << VecArgs.front());
+  //   // std::transform(instVector.begin(), instVector.end(), VecArgs.begin(),
+  //   //                instVector.begin(), std::plus<double>());
+  //   // making change here to make it similar to IR2vec-Rd
+  //   std::transform(instVector.begin(), instVector.end(), vecOp.begin(),
+  //                    instVector.begin(), std::plus<double>());
+  //   IR2VEC_DEBUG(outs() << instVector.front());
+  //   instVecMap[&I] = instVector;
+  //   livelinessMap.try_emplace(&I, true);
+
+  //   if (killMap.find(&I) != killMap.end()) {
+  //     auto list = killMap[&I];
+  //     for (auto defs : list) {
+  //       auto It2 = livelinessMap.find(defs);
+  //       if (It2 == livelinessMap.end())
+  //         livelinessMap.try_emplace(defs, false);
+  //       else
+  //         It2->second = false;
+  //     }
+  //   }
+  // }
+  //   assert(isCyclic == false && "All dependencies should have been solved!");
+  }
+
+/*----------------------------------------------------------------------------------
+  Utility function : Traverses Reaching definitions
+  ----------------------------------------------------------------------------------
+*/
+
+void IR2Vec_FA::traverseRD(
+    const llvm::Instruction *inst,
+    std::unordered_map<const llvm::Instruction *, bool> &Visited,
+    llvm::SmallVector<const llvm::Instruction *, 10> &timeStack) {
+
+  auto RDit = instReachingDefsMap.find(inst);
+
+  Visited[inst] = true;
+
+  if (RDit != instReachingDefsMap.end()) {
+
+    auto RD = RDit->second;
+
+    for (auto defs : RD) {
+      if (Visited.find(defs) == Visited.end())
+        traverseRD(defs, Visited, timeStack);
+    }
+  }
+  // All the children (RDs) of current node is done push to timeStack
+  timeStack.push_back(inst);
+}
+
+void IR2Vec_FA::DFSUtil(
+    const llvm::Instruction *inst,
+    std::unordered_map<const llvm::Instruction *, bool> &Visited,
+    llvm::SmallVector<const llvm::Instruction *, 10> &set) {
+
+  Visited[inst] = true;
+  auto RD = reverseReachingDefsMap[inst];
+
+  for (auto defs : RD) {
+    if (Visited.find(defs) == Visited.end()) {
+      set.push_back(defs);
+      DFSUtil(defs, Visited, set);
+    }
+  }
+}
+
+/*----------------------------------------------------------------------------------
+  Utility function : Creates and returns all SCCs
+  ----------------------------------------------------------------------------------
+*/
+
+void IR2Vec_FA::getAllSCC() {
+
+  std::unordered_map<const llvm::Instruction *, bool> Visited;
+
+  llvm::SmallVector<const llvm::Instruction *, 10> timeStack;
+
+  for (auto &I : instReachingDefsMap) {
+    if (Visited.find(I.first) == Visited.end()) {
+      traverseRD(I.first, Visited, timeStack);
+    }
+  }
+
+  IR2VEC_DEBUG(for (auto &defs : timeStack) { outs() << defs << "\n"; });
+
+  Visited.clear();
+
+  // Second pass getting SCCs
+  while (timeStack.size() != 0) {
+    auto inst = timeStack.back();
+    timeStack.pop_back();
+    if (Visited.find(inst) == Visited.end()) {
+      llvm::SmallVector<const llvm::Instruction *, 10> set;
+      set.push_back(inst);
+      DFSUtil(inst, Visited, set);
+      if (set.size() != 0)
+        allSCCs.push_back(set);
+    }
+  }
+}
+
+void IR2Vec_FA::bb2Vec(BasicBlock &B, SmallVector<Function *, 15> &funcStack) {
+  SmallMapVector<const Instruction *, Vector, 16> partialInstValMap;
+
+  for (auto &I : B) {
+
+    partialInstValMap[&I] = {};
+    IR2VEC_DEBUG(outs() << "XX------------ Call from bb2vec function "
+                           "Started---------------------XX\n");
+    inst2Vec(I, funcStack, partialInstValMap);
+    IR2VEC_DEBUG(outs() << "YY------------Call from bb2vec function "
+                           "Ended---------------------YY\n");
+    partialInstValMap.erase(&I);
+  }
+}
+
+// INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+
+// void IR2Vec_FA::getAnalysisUsage(AnalysisUsage &AU) const{
+//   AU.addRequired<LoopInfoWrapperPass>();
+//   AU.addRequired<BranchProbabilityInfoWrapperPass>();
+//   AU.addRequired<DominatorTreeWrapperPass>();
+//   AU.setPreservesAll();
+// }
+
+// extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() {
+//     return {
+//         LLVM_PLUGIN_API_VERSION, "IR2Vec_FA", LLVM_VERSION_STRING,
+//         [](PassBuilder &PB) {
+//             PB.registerPipelineParsingCallback(
+//                 [](StringRef Name, ModulePassManager &MPM, ArrayRef<PassBuilder::PipelineElement>) {
+//                     if (Name == "IR2Vec_FA") {
+//                         // FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass(true,true)));
+//                         MPM.addPass(IR2Vec_FA());
+                         
+//                         return true;
+//                     }
+//                     return false;
+//                 });
+//         }
+
+
+//     };
+// }
+
+// extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() {
+//   return {
+//     LLVM_PLUGIN_API_VERSION, "MyPassPlugin", LLVM_VERSION_STRING,
+//     [](PassBuilder &PB) {
+//       // Register your analysis pass
+//       PB.registerFunctionAnalyses([](FunctionAnalysisManager &FAM) {
+//         FAM.registerPass([] { return BranchProbabilityAnalysis(); });
+//       });
+
+//       // If you have a new pass, register it like this
+//       // PB.registerPipelineParsingCallback(
+//       //   [](StringRef Name, FunctionPassManager &FPM, ArrayRef<PassBuilder::PipelineElement>) {
+//       //     if (Name == "my-new-pass") {
+//       //       FPM.addPass(MyNewPass());
+//       //       return true;
+//       //     }
+//       //     return false;
+//       //   });
+//     }
+//   };
+// }
+
+

From 75dd8b64713bba487c416a39326d3e78e0d9a045 Mon Sep 17 00:00:00 2001
From: iamaayushrivastava <iamaayushrivastava@gmail.com>
Date: Tue, 25 Feb 2025 16:26:20 +0530
Subject: [PATCH 3/3] Added models for hyperparameter tuning and inference

---
 .../hypertuning/mlp_model.py                  |  44 +++
 .../hypertuning/model_tuner.py                | 345 ++++++++++++++++++
 .../models/histogram_model.py                 | 228 ++++++++++++
 .../models/ir2vec_fa_model.py                 | 273 ++++++++++++++
 .../models/ir2vec_static_model.py             | 270 ++++++++++++++
 .../models/ir2vec_sym_model.py                | 267 ++++++++++++++
 .../models/milepost_model.py                  | 311 ++++++++++++++++
 7 files changed, 1738 insertions(+)
 create mode 100644 hyperparameter-tuning/hypertuning/mlp_model.py
 create mode 100644 hyperparameter-tuning/hypertuning/model_tuner.py
 create mode 100644 hyperparameter-tuning/models/histogram_model.py
 create mode 100644 hyperparameter-tuning/models/ir2vec_fa_model.py
 create mode 100644 hyperparameter-tuning/models/ir2vec_static_model.py
 create mode 100644 hyperparameter-tuning/models/ir2vec_sym_model.py
 create mode 100644 hyperparameter-tuning/models/milepost_model.py

diff --git a/hyperparameter-tuning/hypertuning/mlp_model.py b/hyperparameter-tuning/hypertuning/mlp_model.py
new file mode 100644
index 0000000..309c4dc
--- /dev/null
+++ b/hyperparameter-tuning/hypertuning/mlp_model.py
@@ -0,0 +1,44 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, TensorDataset, Dataset
+import ray
+from ray import tune
+from ray.tune.schedulers import ASHAScheduler
+from ray.tune.search.optuna import OptunaSearch
+import pandas as pd
+import logging
+import numpy as np
+
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S"
+)
+logger = logging.getLogger(__name__)
+
+class MLP(nn.Module):
+    def __init__(self, input_dim, num_classes, num_layers, units_per_layer, dropout, normalize_input, activation):
+        super(MLP, self).__init__()
+        
+        logger.info("Initializing MLP model...")
+        
+        layers = []
+        for i in range(num_layers):
+            in_features = input_dim if i == 0 else units_per_layer[i - 1]
+            out_features = units_per_layer[i]
+            layers.append(nn.Linear(in_features, out_features))
+            layers.append(nn.BatchNorm1d(out_features))  # Always use BatchNorm
+            layers.append(activation)
+            if dropout > 0:
+                layers.append(nn.Dropout(dropout))
+        layers.append(nn.Linear(units_per_layer[-1], num_classes))
+        self.net = nn.Sequential(*layers)
+        self.normalize_input = normalize_input
+        logger.info("MLP model initialized.")
+
+    def forward(self, x):
+        if self.normalize_input:
+            x = nn.functional.normalize(x, p=2, dim=1)  # L2 Normalization
+        return self.net(x)
\ No newline at end of file
diff --git a/hyperparameter-tuning/hypertuning/model_tuner.py b/hyperparameter-tuning/hypertuning/model_tuner.py
new file mode 100644
index 0000000..5838856
--- /dev/null
+++ b/hyperparameter-tuning/hypertuning/model_tuner.py
@@ -0,0 +1,345 @@
+import ray.train
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, TensorDataset, Dataset
+import ray
+from ray import tune
+from ray.tune.schedulers import ASHAScheduler
+from ray.tune.search.optuna import OptunaSearch
+import pandas as pd
+import logging
+import json
+import os
+import numpy as np
+import random
+import tempfile
+from ray import train, tune
+import sys
+# sys.path.append("/home/intern24009/IR2Vec-Classification/tune-ir2vec/")
+sys.path.append("/home/cs24mtech02001/Program-Classification/ir2vec-model-tuning/")
+from mlp_model import MLP
+from datetime import datetime
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S"
+)
+logger = logging.getLogger(__name__)
+
+class CSVDataset(Dataset):
+    def __init__(self, file_path):
+        print(f"Loading dataset from: {file_path}")
+        
+        try:
+            self.data = pd.read_csv(file_path, delimiter='\t', header=None)
+            # print(f"First 5 rows of the dataset:\n{self.data.head()}")
+        except Exception as e:
+            print(f"Error reading CSV: {e}")
+            return
+
+        try:
+            self.labels = torch.tensor(self.data.iloc[:, 0].values, dtype=torch.long)
+            self.features = torch.tensor(self.data.iloc[:, 1:].values, dtype=torch.float32)
+        except Exception as e:
+            print(f"Error processing data: {e}")
+            return
+
+        # print(f"Column data types:\n{self.data.dtypes}")
+        
+        if not pd.api.types.is_numeric_dtype(self.data.iloc[:, 0]):
+            print("Error: Non-numeric labels detected in the first column.")
+            return
+        
+        # Adjust labels to be 0-based (subtract 1 for 1-based labels)
+        self.labels = self.labels - 1  # Make labels 0-based
+        
+        print("Dataset loaded successfully.")
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self, idx):
+        return self.features[idx], self.labels[idx]
+
+
+# Define the MLP model
+# class MLP(nn.Module):
+#     def __init__(self, input_dim, num_classes, num_layers, units_per_layer, dropout, normalize_input, activation):
+#         super(MLP, self).__init__()
+        
+#         logger.info("Initializing MLP model...")
+        
+#         layers = []
+#         for i in range(num_layers):
+#             in_features = input_dim if i == 0 else units_per_layer
+#             layers.append(nn.Linear(in_features, units_per_layer))
+#             layers.append(nn.BatchNorm1d(units_per_layer))  # Always use BatchNorm
+#             layers.append(activation)
+#             if dropout > 0:
+#                 layers.append(nn.Dropout(dropout))
+#         layers.append(nn.Linear(units_per_layer, num_classes))
+#         self.net = nn.Sequential(*layers)
+#         self.normalize_input = normalize_input
+#         logger.info("MLP model initialized.")
+
+#     def forward(self, x):
+#         if self.normalize_input:
+#             x = nn.functional.normalize(x, p=2, dim=1)  # L2 Normalization
+#         return self.net(x)
+
+# Training function
+def train_model(config, checkpoint_dir=None):
+    # Simulated dataset (replace with your dataset)
+    logger.info(f"Trial Config: num_layers={config['num_layers']}, units_per_layer={config['units_per_layer']}")
+
+    logger.info("Starting training process...")
+    input_dim = 300 # For IR2Vec, DIM=300
+    num_classes = 342
+    
+    train_dataset_path="/Pramana/IR2Vec/Codeforces-Profiled-Dataset/profile-aware-embeddings/O0/training.csv"
+    test_dataset_path="/Pramana/IR2Vec/Codeforces-Profiled-Dataset/profile-aware-embeddings/O0/testing.csv"
+    val_dataset_path="/Pramana/IR2Vec/Codeforces-Profiled-Dataset/profile-aware-embeddings/O0/val.csv"
+    
+    train_dataset = CSVDataset(train_dataset_path)
+    val_dataset = CSVDataset(val_dataset_path)
+    test_dataset = CSVDataset(test_dataset_path)
+
+    train_loader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True)
+    val_loader = DataLoader(val_dataset, batch_size=config["batch_size"], shuffle=False)
+    test_loader = DataLoader(test_dataset, batch_size=config["batch_size"], shuffle=False)
+
+    logger.info("Datasets and DataLoaders prepared for codeforces-ir2vec-fa-dynamic-O0-model, gpu cuda:0")
+    
+    # Initialize model
+    model = MLP(
+        input_dim=input_dim,
+        num_classes=num_classes,
+        num_layers=config["num_layers"],
+        units_per_layer=config["units_per_layer"],
+        dropout=config["dropout"],
+        normalize_input=config["normalize_input"],
+        activation=config["activation"]
+    )
+
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    # print(f"Using device: {device}")
+    logger.info("This is cuda:0")
+
+    model.to(device)
+    # print(f"Model moved to {device}")
+    
+    # Define loss and optimizer
+    criterion = nn.CrossEntropyLoss()
+    optimizer = getattr(optim, config["optimizer"])(
+        model.parameters(), lr=config["lr"]
+    )
+    
+    best_val_accuracy = 0.0
+
+    # Training loop
+    logger.info("Starting training loop...")
+    for epoch in range(config["epochs"]):
+        model.train()
+        running_loss = 0.0
+        correct_train = 0
+        total_train = 0
+
+        # Train the model
+        for batch in train_loader:
+            inputs, labels = batch
+            inputs, labels = inputs.to(device), labels.to(device)
+            
+            # logger.info(f"Labels range: min={labels.min()}, max={labels.max()}")
+
+            optimizer.zero_grad()
+            outputs = model(inputs)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+
+            running_loss += loss.item()
+
+            # Calculate train accuracy
+            _, predicted = torch.max(outputs, 1)
+            total_train += labels.size(0)
+            correct_train += (predicted == labels).sum().item()
+
+        train_loss = running_loss / len(train_loader)
+        train_accuracy = correct_train / total_train
+
+        # Evaluate on validation data
+        model.eval()
+        running_val_loss = 0.0
+        correct_val = 0
+        total_val = 0
+
+        with torch.no_grad():
+            for batch in val_loader:
+                inputs, labels = batch
+                inputs, labels = inputs.to(device), labels.to(device)
+
+                outputs = model(inputs)
+                loss = criterion(outputs, labels)
+                running_val_loss += loss.item()
+
+                # Calculate validation accuracy
+                _, predicted = torch.max(outputs, 1)
+                total_val += labels.size(0)
+                correct_val += (predicted == labels).sum().item()
+
+        val_loss = running_val_loss / len(val_loader)
+        val_accuracy = correct_val / total_val
+
+        logger.info(f"Epoch [{epoch+1}/{config['epochs']}]: Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, "
+                    f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")
+        
+        # if val_accuracy>best_val_accuracy:
+        #     best_val_accuracy = val_accuracy
+        with tune.checkpoint_dir(step=epoch) as checkpoint_dir:
+            model_path = os.path.join(checkpoint_dir, "model_checkpoint.model")
+            torch.save(model, model_path)
+            print(f"Model checkpoint saved at {model_path}")
+
+        tune.report(train_loss=train_loss, val_loss=val_loss, train_accuracy=train_accuracy, val_accuracy=val_accuracy)
+
+def custom_serializer(obj):
+    if isinstance(obj, torch.Tensor):
+        return obj.tolist()
+    return str(obj)
+
+# Main function to run Ray Tune
+def main():
+    input_dim = 300  # Example input dimension
+    num_classes = 342  # Example number of classes # POJ-104
+    epochs = 2000
+    # # Hyperparameter search space
+    # config = {
+    #     "input_dim": input_dim,
+    #     "num_classes": num_classes,
+    #     "num_layers": tune.randint(1, 5),
+    #     "units_per_layer": tune.choice([64, 128, 256, 512]),
+    #     "dropout": tune.uniform(0.0, 0.2),
+    #     "normalize_input": tune.choice([True, False]),
+    #     "activation": tune.choice([nn.ReLU(), nn.LeakyReLU(), nn.Tanh(), nn.SiLU()]),
+    #     "optimizer": tune.choice(["Adam", "SGD"]),
+    #     "lr": tune.loguniform(1e-4, 1e-1),
+    #     "batch_size": tune.choice([16, 32, 64, 128, 256, 512, 1024]),
+    #     "epochs": 5000,
+    # }
+    
+    config = {
+        "input_dim": input_dim,
+        "num_classes": num_classes,
+        "num_layers": tune.randint(3, 8),
+        # "units_per_layer": tune.choice([64, 128, 256, 512]),
+        # "units_per_layer": tune.sample_from(lambda spec : np.random.randint(64, high=2048, size=spec.config.num_layers)),
+        "units_per_layer": tune.sample_from(lambda spec: [ random.choice([64, 128, 256, 512]) for _ in range(spec.config["num_layers"])]),
+        # "dropout": tune.sample_from(lambda spec : np.random.uniform(0, high=0.3, size=spec.config.num_layers)),
+        # "units_per_layer": tune.sample_from(lambda spec: generate_units_per_layer({"num_layers": spec.config["num_layers"]})), 
+        # "units_per_layer": tune.sample_from(lambda spec: [random.choice([64, 128, 256, 512]) for _ in range(4)]),    
+        "dropout": tune.uniform(0.0, 0.3),
+        "normalize_input": tune.choice([True, False]),
+        "activation": tune.choice([nn.ReLU(), nn.LeakyReLU(), nn.Tanh(), nn.SiLU()]),
+        "optimizer": tune.choice(["Adam"]), #tune.choice(["Adam", "SGD"]),
+        "lr": tune.loguniform(1e-4, 1e-2),
+        "batch_size": tune.choice([32, 64, 128, 256, 512, 1024]),
+        "epochs": epochs,
+    }
+
+    # Define scheduler and search algorithm
+    scheduler = ASHAScheduler(
+        # metric="val_accuracy",  # Use validation loss for early stopping
+        # mode="max",
+        max_t=epochs,
+        grace_period=25,
+        reduction_factor=2
+    )
+
+    # search_alg = OptunaSearch(metric="val_accuracy", mode="max")
+
+    # # Run Ray Tune
+    # ray.init()
+    # analysis = tune.run(
+    #     train_model,
+    #     config=config,
+    #     metric="val_accuracy",
+    #     mode="max",
+    #     scheduler=scheduler,
+    #     search_alg=search_alg,
+    #     num_samples=1000,
+    #     max_concurrent_trials=4,
+    #     resources_per_trial={"cpu": 10, "gpu": 0.25}
+    # )
+    ray.init(_temp_dir="/Pramana/IR2Vec/ir2vec_tuned_models")
+    analysis = tune.run(
+        train_model,
+        config=config,
+        metric="val_accuracy",
+        mode="max",
+        keep_checkpoints_num=5,
+        # checkpoint_score_attr="val_accuracy",
+        scheduler=scheduler,
+        # search_alg=search_alg,
+        num_samples=1000,
+        max_concurrent_trials=4,
+        resources_per_trial={"cpu": 10, "gpu": 0.125},
+        local_dir="/Pramana/IR2Vec/ir2vec_tuned_models/tmp/ray_results"
+    )
+    
+    best_trial = analysis.get_best_trial(metric="val_accuracy", mode="max", scope="all")
+    best_checkpoint = analysis.get_best_checkpoint(best_trial, metric="val_accuracy", mode="max")
+    print(f"Best checkpoint saved at: {best_checkpoint}")
+    
+    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+
+    # Print the best result
+    # logger.info("Best hyperparameters found were:")
+    # logger.info(analysis.best_config)
+    
+    best_config = analysis.best_config
+    logger.info("Best hyperparameters found were:")
+    logger.info(best_config)
+    
+    best_trial = analysis.get_best_trial(metric="val_accuracy", mode="max", scope="all")
+    best_results = best_trial.last_result
+    logger.info(f"Best results: {best_results}")
+    
+    results = {
+        "best_config": best_config,
+        "best_results": best_results,
+        "input_csv_paths": {
+            "train": "/Pramana/IR2Vec/Codeforces-Profiled-Dataset/profile-aware-embeddings/O0/training.csv",
+            "val": "/Pramana/IR2Vec/Codeforces-Profiled-Dataset/profile-aware-embeddings/O0/val.csv",
+            "test": "/Pramana/IR2Vec/Codeforces-Profiled-Dataset/profile-aware-embeddings/O0/testing.csv",
+        },
+    }
+    trials_data = []
+    for trial in analysis.trials:
+        trial_data = trial.config
+        trial_data.update(trial.last_result)
+        trials_data.append(trial_data)
+
+    trials_df = pd.DataFrame(trials_data)
+
+    trials_table_path = os.path.join("results", f"{timestamp}_ir2vec_O0_dynamic_codeforces_hyperparameter_tuning_results_sample_1000_epoch_2000.csv")
+    os.makedirs("results", exist_ok=True)    
+
+    trials_df.to_csv(trials_table_path, index=False)
+
+    results["all_trials"] = trials_data
+
+    output_dir = "results"
+    os.makedirs(output_dir, exist_ok=True)
+
+    # Save the results to a JSON file
+    result_file_path = os.path.join(output_dir, f"{timestamp}_ir2vec_O0_dynamic_codeforces_tune_results_sample_1000_epoch_2000.json")
+    with open(result_file_path, "w") as f:
+        json.dump(results, f, indent=4, default=custom_serializer)
+
+    logger.info(f"Results saved to {result_file_path}")
+    logger.info(f"Trials table saved to {trials_table_path}")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/hyperparameter-tuning/models/histogram_model.py b/hyperparameter-tuning/models/histogram_model.py
new file mode 100644
index 0000000..458cfd4
--- /dev/null
+++ b/hyperparameter-tuning/models/histogram_model.py
@@ -0,0 +1,228 @@
+import os
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
+
+# Import TensorFlow Keras
+from tensorflow import keras
+from tensorflow.keras import optimizers
+from tensorflow.keras.utils import to_categorical
+from tensorflow.keras.layers import (Activation, Dense, Dropout, BatchNormalization)
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.activations import swish as SiLU
+from tensorflow.keras.activations import relu
+from tensorflow.keras.models import load_model
+import argparse
+import pickle
+
+# Model definition
+
+# config': {'input_dim': 65, 'num_classes': 98, 'num_layers': 4, 'units_per_layer': [512, 128, 512, 256], 'dropout': 0.27774903408254686, 'normalize_input': False, 'activation': SiLU(), 'optimizer': 'Adam', 'lr': 0.0009554640387111394, 'batch_size': 1024, 'epochs': 2000}
+
+# Histogram-O0
+# def getModel(input_dim, output_dim):
+#     model = Sequential()
+
+#     model.add(Dense(512, input_shape=(input_dim,), kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     # model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.27774903408254686))
+
+#     model.add(Dense(128, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     # model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.27774903408254686))
+
+#     model.add(Dense(512, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     # model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.27774903408254686))
+
+#     model.add(Dense(256, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     # model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.27774903408254686))
+
+#     model.add(Dense(output_dim, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation('softmax'))
+
+#     opt = keras.optimizers.Adam(learning_rate=0.0009554640387111394)
+#     model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
+#     model.summary()
+
+#     return model
+
+# config': {'input_dim': 65, 'num_classes': 98, 'num_layers': 4, 'units_per_layer': [512, 512, 256, 256], 'dropout': 0.22272317313484666, 'normalize_input': False, 'activation': ReLU(), 'optimizer': 'Adam', 'lr': 0.0004475656736901494, 'batch_size': 128, 'epochs': 2000}
+
+# Histogram-O3
+def getModel(input_dim, output_dim):
+    model = Sequential()
+
+    model.add(Dense(512, input_shape=(input_dim,), kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    # model.add(BatchNormalization())
+    model.add(Activation(relu))
+    model.add(Dropout(0.22272317313484666))
+
+    model.add(Dense(512, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    # model.add(BatchNormalization())
+    model.add(Activation(relu))
+    model.add(Dropout(0.22272317313484666))
+
+    model.add(Dense(256, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    # model.add(BatchNormalization())
+    model.add(Activation(relu))
+    model.add(Dropout(0.22272317313484666))
+
+    model.add(Dense(256, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    # model.add(BatchNormalization())
+    model.add(Activation(relu))
+    model.add(Dropout(0.22272317313484666))
+
+    model.add(Dense(output_dim, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation('softmax'))
+
+    opt = keras.optimizers.Adam(learning_rate=0.0004475656736901494)
+    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
+    model.summary()
+
+    return model
+
+# Load data from directory
+def load_data_from_directory(directory):
+    data = []
+    labels = []
+    classes = sorted(os.listdir(directory))  # Ensure consistent label mapping
+    class_to_label = {cls: idx for idx, cls in enumerate(classes)}
+
+    for cls in classes:
+        class_path = os.path.join(directory, cls)
+        if os.path.isdir(class_path):
+            for file_name in os.listdir(class_path):
+                if file_name.endswith(".npz"):
+                    file_path = os.path.join(class_path, file_name)
+                    try:
+                        loaded = np.load(file_path)["values"]
+                        data.append(loaded.flatten())
+                        labels.append(class_to_label[cls])
+                    except Exception as e:
+                        print(f"Failed to load {file_path}: {e}")
+
+    return np.array(data), np.array(labels)
+
+# Prepare train and test data
+# def prepare_data(train_dir, test_dir):
+#     X_train, y_train = load_data_from_directory(train_dir)
+#     X_test, y_test = load_data_from_directory(test_dir)
+
+#     return X_train, y_train, X_test, y_test
+
+def prepare_data(train_dir, test_dir, val_dir=None):
+    X_train, y_train = load_data_from_directory(train_dir)
+    X_test, y_test = load_data_from_directory(test_dir)
+    X_val, y_val = None, None
+
+    if val_dir:
+        X_val, y_val = load_data_from_directory(val_dir)
+
+    return X_train, y_train, X_test, y_test, X_val, y_val
+
+# Main function
+def main():
+    # Paths to the train and test directories
+    train_dir = "/Pramana/IR2Vec/Yali-Embeddings/histogram/O3/codeforces/train/codeforcestrainO3"
+    test_dir = "/Pramana/IR2Vec/Yali-Embeddings/histogram/O3/codeforces/test/codeforcestestO3"
+    val_dir="/Pramana/IR2Vec/Yali-Embeddings/histogram/O3/codeforces/val/codeforcesvalO3"
+
+    # train_dir = "/Pramana/IR2Vec/Yali-Embeddings/histogram/O0/codeforces/train/codeforcestrainO0"
+    # test_dir = "/Pramana/IR2Vec/Yali-Embeddings/histogram/O0/codeforces/test/codeforcestestO0"
+    # val_dir="/Pramana/IR2Vec/Yali-Embeddings/histogram/O0/codeforces/val/codeforcesvalO0"
+
+    # train_dir = "/Pramana/IR2Vec/Yali-Embeddings/histogram/O3/codejam/codejamtrainO3"
+    # test_dir = "/Pramana/IR2Vec/Yali-Embeddings/histogram/O3/codejam/codejamtestO3"
+    # val_dir="/Pramana/IR2Vec/Yali-Embeddings/histogram/O3/codejam/codejamvalO3"
+
+    # # Prepare data
+    # X_train, y_train, X_test, y_test = prepare_data(train_dir, test_dir)
+
+    # # Check data shapes
+    # print(f"Training data shape: {X_train.shape}")
+    # print(f"Training labels shape: {y_train.shape}")
+    # print(f"Testing data shape: {X_test.shape}")
+    # print(f"Testing labels shape: {y_test.shape}")
+
+    # # One-hot encode labels
+    # num_classes = len(np.unique(y_train))
+    # y_train = to_categorical(y_train, num_classes)
+    # y_test = to_categorical(y_test, num_classes)
+
+    # # No train-test split for validation, using all X_train and y_train for training
+    # model = getModel(X_train.shape[1], num_classes)
+
+    # Prepare data
+    X_train, y_train, X_test, y_test, X_val, y_val = prepare_data(train_dir, test_dir, val_dir)
+
+    # Check data shapes
+    print(f"Training data shape: {X_train.shape}")
+    print(f"Training labels shape: {y_train.shape}")
+    print(f"Testing data shape: {X_test.shape}")
+    print(f"Testing labels shape: {y_test.shape}")
+    if X_val is not None and y_val is not None:
+        print(f"Validation data shape: {X_val.shape}")
+        print(f"Validation labels shape: {y_val.shape}")
+
+    # One-hot encode labels
+    num_classes = len(np.unique(y_train))
+    y_train = to_categorical(y_train, num_classes)
+    y_test = to_categorical(y_test, num_classes)
+    if X_val is not None and y_val is not None:
+        y_val = to_categorical(y_val, num_classes)
+
+    # No train-test split for validation, using X_val and y_val for validation
+    model = getModel(X_train.shape[1], num_classes)
+
+    # mc = keras.callbacks.ModelCheckpoint(
+    #     filepath='/home/cs24mtech02001/IR2Vec-Classification/weights/milepost-O0/codeforces/weights_epoch_{epoch:08d}.weights.keras',
+    #     save_weights_only=True,
+    #     save_freq=500
+    # )
+
+    # mc = keras.callbacks.ModelCheckpoint(
+    # filepath='/home/cs24mtech02001/Program-Classification/ir2vec-model-weights-O3/codeforces/histogram/weights_epoch_{epoch:08d}.weights.h5', 
+    # save_weights_only=True, 
+    # save_freq=500)
+
+    mc = keras.callbacks.ModelCheckpoint(
+    filepath="/home/cs24mtech02001/IR2Vec-Classification/weights/histogram-O3/codeforces/weights_epoch_{epoch:08d}.weights.h5",
+    save_weights_only=True,
+    save_best_only=True,
+    monitor="val_loss",
+    mode="min"
+    )
+
+    # Train the model with validation data
+    model.fit(
+        X_train,
+        y_train,
+        validation_data=(X_val, y_val) if X_val is not None and y_val is not None else None,
+        batch_size=128,
+        epochs=2000,
+        verbose=1,
+        callbacks=[mc]
+    )
+
+    # Evaluate model
+    y_pred = np.argmax(model.predict(X_test), axis=1)
+    y_true = np.argmax(y_test, axis=1)
+    print(f"Accuracy: {accuracy_score(y_true, y_pred):.13f}")
+
+    # Save the trained model
+    model.save("codeforces-O3-histogram-ir2vec-hypertuned-model.h5")
+    print("Saved model to disk as 'Kodanda-codeforces-O3-histogram-ir2vec-hypertuned-model.keras'.")
+
+    return model
+
+# Execute the script
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/hyperparameter-tuning/models/ir2vec_fa_model.py b/hyperparameter-tuning/models/ir2vec_fa_model.py
new file mode 100644
index 0000000..5982b39
--- /dev/null
+++ b/hyperparameter-tuning/models/ir2vec_fa_model.py
@@ -0,0 +1,273 @@
+import numpy as np
+import pandas as pd
+from sklearn.decomposition import IncrementalPCA
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import train_test_split
+
+# Import TensorFlow Keras
+from tensorflow import keras
+from tensorflow.keras import optimizers
+from tensorflow.keras.layers import (Activation, Dense, Dropout, BatchNormalization)
+from tensorflow.keras.activations import swish as SiLU
+from tensorflow.keras.activations import tanh as Tanh
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.models import load_model
+import argparse
+import pickle
+
+# Flowaware - O0
+
+# {'input_dim': 300, 'num_classes': 98, 'num_layers': 5, 'units_per_layer': [512, 128, 512, 128, 512], 'dropout': 0.28877129358258796, 'normalize_input': True, 'activation': SiLU(), 'optimizer': 'Adam', 'lr': 0.00014769411188154336, 'batch_size': 32, 'epochs': 5000}
+
+
+# def getModel(input_dim, output_dim):
+#     model = Sequential()
+
+#     # Input Layer
+#     model.add(Dense(512, input_shape=(input_dim,), kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.28877129358258796))
+    
+#     # Hidden Layer 2
+#     model.add(Dense(128, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.28877129358258796))
+    
+#     # Hidden Layer 3
+#     model.add(Dense(512, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.28877129358258796))
+
+#     # Hidden Layer 4
+#     model.add(Dense(128, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.28877129358258796))
+
+#     # Hidden Layer 5
+#     model.add(Dense(512, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.28877129358258796))
+    
+#     # Output Layer
+#     model.add(Dense(output_dim, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation('softmax'))
+    
+#     # Optimizer
+#     opt = keras.optimizers.Adam(learning_rate=0.00014769411188154336)
+#     model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy'])
+    
+#     model.summary()
+#     return model
+
+# {'input_dim': 300, 'num_classes': 98, 'num_layers': 3, 'units_per_layer': [128, 256, 512], 'dropout': 0.21644468951221385, 'normalize_input': True, 'activation': Tanh(), 'optimizer': 'Adam', 'lr': 0.0001302138918461736, 'batch_size': 64, 'epochs': 5000}
+
+# Flowaware - O3
+def getModel(input_dim, output_dim):
+    model = Sequential()
+
+    # Input Layer
+    model.add(Dense(128, input_shape=(input_dim,), kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation(Tanh))
+    model.add(Dropout(0.21644468951221385))
+    
+    # Hidden Layer 2
+    model.add(Dense(256, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation(Tanh))
+    model.add(Dropout(0.21644468951221385))
+    
+    # Hidden Layer 3
+    model.add(Dense(512, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation(Tanh))
+    model.add(Dropout(0.21644468951221385))
+    
+    # Output Layer
+    model.add(Dense(output_dim, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation('softmax'))
+    
+    # Optimizer
+    opt = keras.optimizers.Adam(learning_rate=0.0001302138918461736)
+    model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy'])
+    
+    model.summary()
+    return model
+
+# train the model on the  given data
+def train(x_train, y_train, x_test, y_test, x_val, y_val, epochs, batch_size, model):
+    X_min = x_train.min()
+    X_max = x_train.max()
+    num_classes = np.unique(y_train).shape[0]
+    print(f" Number of classes: {num_classes}") 
+
+    x_train = (x_train - X_min) / (X_max - X_min)
+    x_train = np.array(x_train)
+    y_train = np.array(y_train)
+    y_train = y_train - 1
+    print(f"\nAfter subtracting -1 from labels: {y_train}")
+    print(f"\nAfter subtracting -1 from labels: {np.unique(y_train).shape[0]}")
+    
+    y_train = keras.utils.to_categorical(y_train, num_classes)
+    print(y_train)
+    
+    # PCA transformation
+    ipca = IncrementalPCA(n_components=300)
+    ipca.fit(x_train)
+    x_train = ipca.transform(x_train)
+   
+    val_tuple = None
+    if x_val is not None:
+        x_val = (x_val - X_min) / (X_max - X_min)
+        x_val = np.array(x_val)
+        y_val = np.array(y_val)
+        y_val = y_val - 1
+        y_val = keras.utils.to_categorical(y_val, num_classes)
+        x_val = ipca.transform(x_val)
+        val_tuple = (x_val, y_val)
+
+    # Setup model and training parameters
+    # mc = keras.callbacks.ModelCheckpoint(filepath='/home/intern24009/tune-ir2vec/hypertuned-models/O3/fa/codeforces/weights{epoch:08d}.h5', save_weights_only=False, save_freq='epoch', period=500)
+
+    mc = keras.callbacks.ModelCheckpoint(filepath='/home/cs24mtech02001/IR2Vec-Classification/weights/fa-O3/codejam-O3/weights{epoch:08d}.keras', save_weights_only=False, save_freq='epoch')
+    
+    if model is None:
+        model = getModel(x_train.shape[1], num_classes)
+    
+    model.fit(x_train,
+              y_train,
+              batch_size=batch_size,
+              epochs=epochs,
+              verbose=1,
+              validation_data=val_tuple, callbacks=[mc])
+    
+    # model.save("/home/cs22mtech12011/Aayush-IR2Vec/codeforces-O3-fa-hypertuned-ir2vec-model.h5")
+    model.save("codejam-O3-fa-hypertuned-ir2vec-model.keras")
+    print("Saved model to disk --> Kodanda-codejam-O3-fa-hypertuned-ir2vec-model.keras")
+
+    if x_test is not None:
+        x_test = (x_test - X_min) / (X_max - X_min)
+        x_test = np.array(x_test)
+        y_test = np.array(y_test)
+        y_test = y_test - 1
+        y_test = keras.utils.to_categorical(y_test, num_classes)
+        x_test = ipca.transform(x_test)
+        score = model.evaluate(x_test, y_test, verbose=0)
+        print('Test Accuracy (After Training) : {acc:.13f}%'.format(acc=score[1]*100))
+    
+    with open('dictionary.pkl', 'wb') as f:
+        pickle.dump(num_classes, f)
+        pickle.dump(X_min, f)
+        pickle.dump(X_max, f)
+        pickle.dump(ipca, f)
+
+
+# test the learnt model on the data
+def test(X, targetLabel, model):
+    with open('dictionary.pkl', 'rb') as f:
+        num_classes = pickle.load(f)
+        X_min = pickle.load(f)
+        X_max = pickle.load(f)
+        ipca=pickle.load(f)
+    
+    X = (X - X_min) / (X_max - X_min)
+    X = np.array(X)
+    targetLabel = np.array(targetLabel)
+    targetLabel = targetLabel - 1
+    targetLabel = keras.utils.to_categorical(targetLabel, num_classes)  
+    X = ipca.transform(X)
+    
+    score = model.evaluate(X, targetLabel, verbose=0)
+    print('Test accuracy : {acc:.13f}%'.format(acc=score[1]*100))
+
+# Entry Point of the program
+if __name__ == '__main__':
+
+    # train_file = '/Pramana/IR2Vec/O3/A-ir2vec-17.x/codeforces/embeddings/fa/training.csv'
+
+    # test_file = '/Pramana/IR2Vec/O3/A-ir2vec-17.x/codeforces/embeddings/fa/testing.csv'
+
+    # val_file = '/Pramana/IR2Vec/O3/A-ir2vec-17.x/codeforces/embeddings/fa/val.csv'
+
+    # train_file = '/Pramana/IR2Vec/IR2Vec-ProgramClassification/datasets-17.x-O0/codejam/fa/training.csv'
+
+    # test_file = '/Pramana/IR2Vec/IR2Vec-ProgramClassification/datasets-17.x-O0/codejam/fa/testing.csv'
+
+    # val_file = '/Pramana/IR2Vec/IR2Vec-ProgramClassification/datasets-17.x-O0/codejam/fa/val.csv'
+
+    train_file = '/Pramana/IR2Vec/O3/A-ir2vec-17.x/codejam/embeddings/fa/training.csv'
+
+    test_file = '/Pramana/IR2Vec/O3/A-ir2vec-17.x/codejam/embeddings/fa/testing.csv'
+
+    val_file = '/Pramana/IR2Vec/O3/A-ir2vec-17.x/codejam/embeddings/fa/val.csv'
+
+    epochs = 2000
+    batch_size = 64
+
+    model = None  # No pre-trained model is being loaded
+
+    # trained/Learnt model is required for the testing phase.
+    if test_file is None and train_file is None:
+        print("Enter training or testing data")
+        exit()
+
+    X_test = None
+    y_test = None
+    if test_file is not None:
+        X_test = pd.read_csv(test_file, sep='\t', header=None)
+        y_test = X_test.loc[:, 0]
+        X_test = X_test.loc[:, 1:]
+        X_test.columns = range(X_test.shape[1])
+
+        print("Test Set:")
+        print(f"X_test shape: {X_test.shape}")
+        print(f"y_test unique counts: \n{y_test.value_counts()}")
+
+    if train_file is not None:
+        X = pd.read_csv(train_file, sep='\t', header=None)
+        Y = X.loc[:, 0]
+        X = X.loc[:, 1:]
+        X.columns = range(X.shape[1])
+
+        print("Train Set:")
+        print(f"X_train shape: {X.shape}")
+        print(f"y_train unique counts: \n{Y.value_counts()}")
+
+        X_val = None
+        y_val = None
+        if val_file is not None:
+            X_val = pd.read_csv(val_file, sep='\t', header=None)
+            y_val = X_val.loc[:, 0]
+            X_val = X_val.loc[:, 1:]
+            X_val.columns = range(X_val.shape[1])
+
+            print("Validation Set:")
+            print(f"X_val shape: {X_val.shape}")
+            print(f"y_val unique counts: \n{y_val.value_counts()}")
+        
+        train(X, Y, X_test, y_test, X_val, y_val, epochs, batch_size, model)
+
+        # Load the model checkpoint
+        # model_checkpoint_path = "/home/intern24009/tune-ir2vec/hypertuned-models/O3/fa/codeforces/weights00000878.keras"
+        # model = load_model(model_checkpoint_path, custom_objects={'swish': SiLU})  # Include custom_objects if using custom activation functions
+
+        # Continue training from the checkpoint
+        # train(X, Y, X_test, y_test, X_val, y_val, epochs, batch_size, model)
+
+    elif test_file is not None:
+        
+        if model is None:
+            print('***********************Model is not passed in the testing**************')
+            exit()
+
+        # Skip model loading if it's not being used
+        print("Model not loaded; skipping testing.")
+        # You could directly use a trained model here if you have one
+        # test(X_test, y_test, model)
\ No newline at end of file
diff --git a/hyperparameter-tuning/models/ir2vec_static_model.py b/hyperparameter-tuning/models/ir2vec_static_model.py
new file mode 100644
index 0000000..b64db35
--- /dev/null
+++ b/hyperparameter-tuning/models/ir2vec_static_model.py
@@ -0,0 +1,270 @@
+import numpy as np
+import pandas as pd
+from sklearn.decomposition import IncrementalPCA
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import train_test_split
+
+# Import TensorFlow Keras
+from tensorflow import keras
+from tensorflow.keras import optimizers
+from tensorflow.keras.layers import (Activation, Dense, Dropout, BatchNormalization)
+from tensorflow.keras.activations import swish as SiLU
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.models import load_model
+import argparse
+import pickle
+
+# Static - O0
+# def getModel(input_dim, output_dim):
+#     model = Sequential()
+
+#     # Input Layer
+#     model.add(Dense(128, input_shape=(input_dim,), kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.14414245564202546))
+    
+#     # Hidden Layer 2
+#     model.add(Dense(256, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.14414245564202546))
+    
+#     # Hidden Layer 3
+#     model.add(Dense(128, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.14414245564202546))
+
+#     # Hidden Layer 4
+#     model.add(Dense(128, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.14414245564202546))
+
+#     # Hidden Layer 5
+#     model.add(Dense(256, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.14414245564202546))
+    
+#     # Output Layer
+#     model.add(Dense(output_dim, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation('softmax'))
+    
+#     # Optimizer
+#     opt = keras.optimizers.Adam(learning_rate=0.0002935566846936451)
+#     model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy'])
+    
+#     model.summary()
+#     return model
+
+# 'input_dim': 300, 'num_classes': 98, 'num_layers': 3, 'units_per_layer': [512, 128, 512], 'dropout': 0.26164844577753404, 'normalize_input': True, 'activation': SiLU(), 'optimizer': 'Adam', 'lr': 0.00021151571457278203, 'batch_size': 1024, 'epochs': 2000
+
+# Static - O3
+def getModel(input_dim, output_dim):
+    model = Sequential()
+
+    # Input Layer
+    model.add(Dense(512, input_shape=(input_dim,), kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation(SiLU))
+    model.add(Dropout(0.26164844577753404))
+    
+    # Hidden Layer 2
+    model.add(Dense(128, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation(SiLU))
+    model.add(Dropout(0.26164844577753404))
+    
+    # Hidden Layer 3
+    model.add(Dense(512, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation(SiLU))
+    model.add(Dropout(0.26164844577753404))
+    
+    # Output Layer
+    model.add(Dense(output_dim, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation('softmax'))
+    
+    # Optimizer
+    opt = keras.optimizers.Adam(learning_rate=0.00021151571457278203)
+    model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy'])
+    
+    model.summary()
+    return model
+
+# train the model on the  given data
+def train(x_train, y_train, x_test, y_test, x_val, y_val, epochs, batch_size, model):
+    X_min = x_train.min()
+    X_max = x_train.max()
+    num_classes = np.unique(y_train).shape[0]
+    print(f" Number of classes: {num_classes}") 
+
+    x_train = (x_train - X_min) / (X_max - X_min)
+    x_train = np.array(x_train)
+    y_train = np.array(y_train)
+    y_train = y_train - 1
+    print(f"\nAfter subtracting -1 from labels: {y_train}")
+    print(f"\nAfter subtracting -1 from labels: {np.unique(y_train).shape[0]}")
+    
+    y_train = keras.utils.to_categorical(y_train, num_classes)
+    print(y_train)
+    
+    # PCA transformation
+    ipca = IncrementalPCA(n_components=300)
+    ipca.fit(x_train)
+    x_train = ipca.transform(x_train)
+   
+    val_tuple = None
+    if x_val is not None:
+        x_val = (x_val - X_min) / (X_max - X_min)
+        x_val = np.array(x_val)
+        y_val = np.array(y_val)
+        y_val = y_val - 1
+        y_val = keras.utils.to_categorical(y_val, num_classes)
+        x_val = ipca.transform(x_val)
+        val_tuple = (x_val, y_val)
+
+    # Setup model and training parameters
+    # mc = keras.callbacks.ModelCheckpoint(filepath='/home/intern24009/tune-ir2vec/hypertuned-models/O0/fa/codeforces/weights{epoch:08d}.h5', save_weights_only=False, save_freq='epoch', period=500)
+
+    mc = keras.callbacks.ModelCheckpoint(filepath='/home/cs24mtech02001/IR2Vec-Classification/weights/static-O3/codejam-O3/weights{epoch:08d}.keras', save_weights_only=False, save_freq='epoch')
+    
+    if model is None:
+        model = getModel(x_train.shape[1], num_classes)
+    
+    model.fit(x_train,
+              y_train,
+              batch_size=batch_size,
+              epochs=epochs,
+              verbose=1,
+              validation_data=val_tuple, callbacks=[mc])
+    
+    model.save("/home/cs24mtech02001/IR2Vec-Classification/codejam-O3-fa-static-hypertuned-ir2vec-model.h5")
+    print("Saved model to disk")
+
+    if x_test is not None:
+        x_test = (x_test - X_min) / (X_max - X_min)
+        x_test = np.array(x_test)
+        y_test = np.array(y_test)
+        y_test = y_test - 1
+        y_test = keras.utils.to_categorical(y_test, num_classes)
+        x_test = ipca.transform(x_test)
+        score = model.evaluate(x_test, y_test, verbose=0)
+        print('Test Accuracy (After Training) : {acc:.13f}%'.format(acc=score[1]*100))
+    
+    with open('dictionary.pkl', 'wb') as f:
+        pickle.dump(num_classes, f)
+        pickle.dump(X_min, f)
+        pickle.dump(X_max, f)
+        pickle.dump(ipca, f)
+
+
+# test the learnt model on the data
+def test(X, targetLabel, model):
+    with open('dictionary.pkl', 'rb') as f:
+        num_classes = pickle.load(f)
+        X_min = pickle.load(f)
+        X_max = pickle.load(f)
+        ipca=pickle.load(f)
+    
+    X = (X - X_min) / (X_max - X_min)
+    X = np.array(X)
+    targetLabel = np.array(targetLabel)
+    targetLabel = targetLabel - 1
+    targetLabel = keras.utils.to_categorical(targetLabel, num_classes)  
+    X = ipca.transform(X)
+    
+    score = model.evaluate(X, targetLabel, verbose=0)
+    print('Test accuracy : {acc:.13f}%'.format(acc=score[1]*100))
+
+# Entry Point of the program
+if __name__ == '__main__':
+
+    # train_file = '/Pramana/IR2Vec/IR2Vec-Embeddings/O0/Codeforces/csv/training.csv'
+
+    # test_file = '/Pramana/IR2Vec/IR2Vec-Embeddings/O0/Codeforces/csv/testing.csv'
+
+    # val_file = '/Pramana/IR2Vec/IR2Vec-Embeddings/O0/Codeforces/csv/val.csv'
+
+    # train_file = '/Pramana/IR2Vec/IR2Vec-Embeddings/O3/Codeforces/csv/training.csv'
+
+    # test_file = '/Pramana/IR2Vec/IR2Vec-Embeddings/O3/Codeforces/csv/testing.csv'
+
+    # val_file = '/Pramana/IR2Vec/IR2Vec-Embeddings/O3/Codeforces/csv/val.csv'
+
+    train_file = '/Pramana/IR2Vec/IR2Vec-Embeddings/O3/Codejam/csv/training.csv'
+
+    test_file = '/Pramana/IR2Vec/IR2Vec-Embeddings/O3/Codejam/csv/testing.csv'
+
+    val_file = '/Pramana/IR2Vec/IR2Vec-Embeddings/O3/Codejam/csv/val.csv'
+
+    epochs = 2000
+    # batch_size = 32
+    batch_size = 1024
+
+    # model = None  # No pre-trained model is being loaded
+    # model = "/home/intern24009/tune-ir2vec/hypertuned-models/O0/fa/codeforces/weights00001973.keras"
+    model = None
+
+    # trained/Learnt model is required for the testing phase.
+    if test_file is None and train_file is None:
+        print("Enter training or testing data")
+        exit()
+
+    X_test = None
+    y_test = None
+    if test_file is not None:
+        X_test = pd.read_csv(test_file, sep='\t', header=None)
+        y_test = X_test.loc[:, 0]
+        X_test = X_test.loc[:, 1:]
+        X_test.columns = range(X_test.shape[1])
+
+        print("Test Set:")
+        print(f"X_test shape: {X_test.shape}")
+        print(f"y_test unique counts: \n{y_test.value_counts()}")
+
+    if train_file is not None:
+        X = pd.read_csv(train_file, sep='\t', header=None)
+        Y = X.loc[:, 0]
+        X = X.loc[:, 1:]
+        X.columns = range(X.shape[1])
+
+        print("Train Set:")
+        print(f"X_train shape: {X.shape}")
+        print(f"y_train unique counts: \n{Y.value_counts()}")
+
+        X_val = None
+        y_val = None
+        if val_file is not None:
+            X_val = pd.read_csv(val_file, sep='\t', header=None)
+            y_val = X_val.loc[:, 0]
+            X_val = X_val.loc[:, 1:]
+            X_val.columns = range(X_val.shape[1])
+
+            print("Validation Set:")
+            print(f"X_val shape: {X_val.shape}")
+            print(f"y_val unique counts: \n{y_val.value_counts()}")
+        
+        train(X, Y, X_test, y_test, X_val, y_val, epochs, batch_size, model)
+
+        # model_checkpoint_path = "/home/intern24009/tune-ir2vec/hypertuned-models/O0/fa/codeforces/weights00001973.keras"
+
+        # model = load_model(model_checkpoint_path, custom_objects={'swish': SiLU})  # Include custom_objects if using custom activation functions
+
+        # # Continue training from the checkpoint
+        # train(X, Y, X_test, y_test, X_val, y_val, epochs, batch_size, model)
+
+    elif test_file is not None:
+        
+        if model is None:
+            print('***********************Model is not passed in the testing**************')
+            exit()
+
+        # Skip model loading if it's not being used
+        print("Model not loaded; skipping testing.")
+        # You could directly use a trained model here if you have one
+        # test(X_test, y_test, model)
\ No newline at end of file
diff --git a/hyperparameter-tuning/models/ir2vec_sym_model.py b/hyperparameter-tuning/models/ir2vec_sym_model.py
new file mode 100644
index 0000000..b92c52c
--- /dev/null
+++ b/hyperparameter-tuning/models/ir2vec_sym_model.py
@@ -0,0 +1,267 @@
+import numpy as np
+import pandas as pd
+from sklearn.decomposition import IncrementalPCA
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import train_test_split
+
+# Import TensorFlow Keras
+from tensorflow import keras
+from tensorflow.keras import optimizers
+from tensorflow.keras.layers import (Activation, Dense, Dropout, BatchNormalization)
+from tensorflow.keras.activations import swish as SiLU
+from tensorflow.keras.models import Sequential
+import argparse
+import pickle
+
+# 'input_dim': 300, 'num_classes': 98, 'num_layers': 3, 'units_per_layer': [256, 512, 128], 'dropout': 0.26394223847024845, 'nor malize_input': True, 'activation': SiLU(), 'optimizer': 'Adam', 'lr': 0.00012758233973417935, 'batch_size': 64, 'epochs': 5000}"
+
+# Symbolic - O0
+# def getModel(input_dim, output_dim):
+#     model = Sequential()
+
+#     # Input Layer
+#     model.add(Dense(256, input_shape=(input_dim,), kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.26394223847024845))
+    
+#     # Hidden Layer 2
+#     model.add(Dense(512, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.26394223847024845))
+    
+#     # Hidden Layer 3
+#     model.add(Dense(128, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.26394223847024845))
+    
+#     # Output Layer
+#     model.add(Dense(output_dim, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation('softmax'))
+    
+#     # Optimizer
+#     opt = keras.optimizers.Adam(learning_rate=0.00012758233973417935)
+#     model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy'])
+    
+#     model.summary()
+#     return model
+
+# Symbolic - O3
+
+# {'input_dim': 300, 'num_classes': 98, 'num_layers': 5, 'units_per_layer': [256, 512, 256, 128, 512], 'dropout': 0.1743626588566297, 'normalize_input': True, 'activation': SiLU(), 'optimizer': 'Adam', 'lr': 0.0001028291067528109, 'batch_size': 32, 'epochs': 5000}
+
+def getModel(input_dim, output_dim):
+    model = Sequential()
+
+    # Input Layer
+    model.add(Dense(256, input_shape=(input_dim,), kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation(SiLU))
+    model.add(Dropout(0.1743626588566297))
+    
+    # Hidden Layer 2
+    model.add(Dense(512, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation(SiLU))
+    model.add(Dropout(0.1743626588566297))
+    
+    # Hidden Layer 3
+    model.add(Dense(256, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation(SiLU))
+    model.add(Dropout(0.1743626588566297))
+
+    # Hidden Layer 4
+    model.add(Dense(128, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation(SiLU))
+    model.add(Dropout(0.1743626588566297))
+
+    # Hidden Layer 5
+    model.add(Dense(512, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation(SiLU))
+    model.add(Dropout(0.1743626588566297))
+
+    # Output Layer
+    model.add(Dense(output_dim, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation('softmax'))
+    
+    # Optimizer
+    opt = keras.optimizers.Adam(learning_rate=0.0001028291067528109)
+    model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy'])
+    
+    model.summary()
+    return model
+
+# train the model on the  given data
+def train(x_train, y_train, x_test, y_test, x_val, y_val, epochs, batch_size, model):
+    X_min = x_train.min()
+    X_max = x_train.max()
+    num_classes = np.unique(y_train).shape[0]
+    print(f" Number of classes: {num_classes}") 
+
+    x_train = (x_train - X_min) / (X_max - X_min)
+    x_train = np.array(x_train)
+    y_train = np.array(y_train)
+    y_train = y_train - 1
+    print(f"\nAfter subtracting -1 from labels: {y_train}")
+    print(f"\nAfter subtracting -1 from labels: {np.unique(y_train).shape[0]}")
+    
+    y_train = keras.utils.to_categorical(y_train, num_classes)
+    print(y_train)
+    
+    # PCA transformation
+    ipca = IncrementalPCA(n_components=300)
+    ipca.fit(x_train)
+    x_train = ipca.transform(x_train)
+   
+    val_tuple = None
+    if x_val is not None:
+        x_val = (x_val - X_min) / (X_max - X_min)
+        x_val = np.array(x_val)
+        y_val = np.array(y_val)
+        y_val = y_val - 1
+        y_val = keras.utils.to_categorical(y_val, num_classes)
+        x_val = ipca.transform(x_val)
+        val_tuple = (x_val, y_val)
+
+    # Setup model and training parameters
+    # mc = keras.callbacks.ModelCheckpoint(filepath='/home/cs24mtech02001/IR2Vec-Classification/weights/static-O0/codejam/weights{epoch:08d}.h5', save_weights_only=False, save_freq='epoch', period=500)
+
+    mc = keras.callbacks.ModelCheckpoint(
+    filepath='/home/cs24mtech02001/IR2Vec-Classification/weights/sym-O3/codejam/weights{epoch:08d}.weights.h5',
+    save_weights_only=True,
+    save_best_only=True,
+    monitor="val_loss",
+    mode="min"
+    )
+    
+    if model is None:
+        model = getModel(x_train.shape[1], num_classes)
+    
+    model.fit(x_train,
+              y_train,
+              batch_size=batch_size,
+              epochs=epochs,
+              verbose=1,
+              validation_data=val_tuple, callbacks=[mc])
+    
+    # model.save("codejam-O0-sym-hypertuned-ir2vec-model.h5")
+    # print("Saved model to disk --> Pramana-codejam-O0-sym-hypertuned-ir2vec-model")
+
+    model.save("codejam-O3-sym-hypertuned-ir2vec-model.h5")
+    print("Saved model to disk --> Pramana-codejam-O3-sym-hypertuned-ir2vec-model")
+
+    if x_test is not None:
+        x_test = (x_test - X_min) / (X_max - X_min)
+        x_test = np.array(x_test)
+        y_test = np.array(y_test)
+        y_test = y_test - 1
+        y_test = keras.utils.to_categorical(y_test, num_classes)
+        x_test = ipca.transform(x_test)
+        score = model.evaluate(x_test, y_test, verbose=0)
+        print('Test Accuracy (After Training) : {acc:.13f}%'.format(acc=score[1]*100))
+    
+    with open('dictionary.pkl', 'wb') as f:
+        pickle.dump(num_classes, f)
+        pickle.dump(X_min, f)
+        pickle.dump(X_max, f)
+        pickle.dump(ipca, f)
+
+
+# test the learnt model on the data
+def test(X, targetLabel, model):
+    with open('dictionary.pkl', 'rb') as f:
+        num_classes = pickle.load(f)
+        X_min = pickle.load(f)
+        X_max = pickle.load(f)
+        ipca=pickle.load(f)
+    
+    X = (X - X_min) / (X_max - X_min)
+    X = np.array(X)
+    targetLabel = np.array(targetLabel)
+    targetLabel = targetLabel - 1
+    targetLabel = keras.utils.to_categorical(targetLabel, num_classes)  
+    X = ipca.transform(X)
+    
+    score = model.evaluate(X, targetLabel, verbose=0)
+    print('Test accuracy : {acc:.13f}%'.format(acc=score[1]*100))
+
+# Entry Point of the program
+if __name__ == '__main__':
+
+    # Codejam-O0
+    # train_file = '/Pramana/IR2Vec/IR2Vec-ProgramClassification/datasets-17.x-O0/codejam/sym/training.csv'
+
+    # test_file = '/Pramana/IR2Vec/IR2Vec-ProgramClassification/datasets-17.x-O0/codejam/sym/testing.csv'
+
+    # val_file = '/Pramana/IR2Vec/IR2Vec-ProgramClassification/datasets-17.x-O0/codejam/sym/val.csv'
+
+    # Codejam-O3
+    train_file = '/Pramana/IR2Vec/O3/A-ir2vec-17.x/codejam/embeddings/sym/training.csv'
+
+    test_file = '/Pramana/IR2Vec/O3/A-ir2vec-17.x/codejam/embeddings/sym/testing.csv'
+
+    val_file = '/Pramana/IR2Vec/O3/A-ir2vec-17.x/codejam/embeddings/sym/val.csv'
+
+    epochs = 2000
+    batch_size = 32
+
+    model = None  # No pre-trained model is being loaded
+
+    # trained/Learnt model is required for the testing phase.
+    if test_file is None and train_file is None:
+        print("Enter training or testing data")
+        exit()
+
+    X_test = None
+    y_test = None
+    if test_file is not None:
+        X_test = pd.read_csv(test_file, sep='\t', header=None)
+        y_test = X_test.loc[:, 0]
+        X_test = X_test.loc[:, 1:]
+        X_test.columns = range(X_test.shape[1])
+
+        print("Test Set:")
+        print(f"X_test shape: {X_test.shape}")
+        print(f"y_test unique counts: \n{y_test.value_counts()}")
+
+    if train_file is not None:
+        X = pd.read_csv(train_file, sep='\t', header=None)
+        Y = X.loc[:, 0]
+        X = X.loc[:, 1:]
+        X.columns = range(X.shape[1])
+
+        print("Train Set:")
+        print(f"X_train shape: {X.shape}")
+        print(f"y_train unique counts: \n{Y.value_counts()}")
+
+        X_val = None
+        y_val = None
+        if val_file is not None:
+            X_val = pd.read_csv(val_file, sep='\t', header=None)
+            y_val = X_val.loc[:, 0]
+            X_val = X_val.loc[:, 1:]
+            X_val.columns = range(X_val.shape[1])
+
+            print("Validation Set:")
+            print(f"X_val shape: {X_val.shape}")
+            print(f"y_val unique counts: \n{y_val.value_counts()}")
+        
+        train(X, Y, X_test, y_test, X_val, y_val, epochs, batch_size, model)
+
+    elif test_file is not None:
+        
+        if model is None:
+            print('***********************Model is not passed in the testing**************')
+            exit()
+
+        # Skip model loading if it's not being used
+        print("Model not loaded; skipping testing.")
+        # You could directly use a trained model here if you have one
+        # test(X_test, y_test, model)
\ No newline at end of file
diff --git a/hyperparameter-tuning/models/milepost_model.py b/hyperparameter-tuning/models/milepost_model.py
new file mode 100644
index 0000000..c10ba39
--- /dev/null
+++ b/hyperparameter-tuning/models/milepost_model.py
@@ -0,0 +1,311 @@
+import os
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
+
+# Import TensorFlow Keras
+from tensorflow import keras
+from tensorflow.keras import optimizers
+from tensorflow.keras.utils import to_categorical
+from tensorflow.keras.layers import (Activation, Dense, Dropout, BatchNormalization)
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.activations import swish as SiLU
+from tensorflow.keras.models import load_model
+import argparse
+import pickle
+
+# Model definition
+
+# 'config': {'input_dim': 56, 'num_classes': 98, 'num_layers': 4, 'units_per_layer': [256, 128, 512, 512], 'dropout': 0.26338369031159503, 'normalize_input': True, 'activation': SiLU(), 'optimizer': 'Adam', 'lr': 0.0001271463116097739, 'batch_size': 128, 'epochs': 2000}
+
+# Milepost-O0
+# def getModel(input_dim, output_dim):
+#     model = Sequential()
+
+#     # Input Layer
+#     model.add(Dense(256, input_shape=(input_dim,), kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.26338369031159503))
+
+#     # Hidden Layer 2
+#     model.add(Dense(128, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.26338369031159503))
+
+#     # Hidden Layer 3
+#     model.add(Dense(512, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.26338369031159503))
+
+#     # Hidden Layer 4
+#     model.add(Dense(512, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation(SiLU))
+#     model.add(Dropout(0.26338369031159503))
+
+#     model.add(Dense(output_dim, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+#     model.add(BatchNormalization())
+#     model.add(Activation('softmax'))
+
+#     opt = keras.optimizers.Adam(learning_rate=0.0001271463116097739)
+#     model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy'])
+#     model.summary()
+
+#     return model
+
+# config': {'input_dim': 56, 'num_classes': 98, 'num_layers': 5, 'units_per_layer': [256, 256, 512, 128, 256], 'dropout': 0.20077533375677442, 'normalize_input': True, 'activation': SiLU(), 'optimizer': 'Adam', 'lr': 0.0009488463996149118, 'batch_size': 32, 'epochs': 5000}
+
+# Milepost-O3
+def getModel(input_dim, output_dim):
+    model = Sequential()
+
+    # Input Layer
+    model.add(Dense(256, input_shape=(input_dim,), kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation(SiLU))
+    model.add(Dropout(0.20077533375677442))
+
+    # Hidden Layer 2
+    model.add(Dense(256, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation(SiLU))
+    model.add(Dropout(0.20077533375677442))
+
+    # Hidden Layer 3
+    model.add(Dense(512, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation(SiLU))
+    model.add(Dropout(0.20077533375677442))
+
+    # Hidden Layer 4
+    model.add(Dense(128, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation(SiLU))
+    model.add(Dropout(0.20077533375677442))
+
+    # Hidden Layer 5
+    model.add(Dense(256, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation(SiLU))
+    model.add(Dropout(0.20077533375677442))
+
+    model.add(Dense(output_dim, kernel_initializer=keras.initializers.glorot_normal(seed=None)))
+    model.add(BatchNormalization())
+    model.add(Activation('softmax'))
+
+    opt = keras.optimizers.Adam(learning_rate=0.0009488463996149118)
+    model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy'])
+    model.summary()
+
+    return model
+
+# Load data from directory
+# def load_data_from_directory(directory):
+#     data = []
+#     labels = []
+#     classes = sorted(os.listdir(directory))  # Ensure consistent label mapping
+#     class_to_label = {cls: idx for idx, cls in enumerate(classes)}
+
+#     for cls in classes:
+#         class_path = os.path.join(directory, cls)
+#         if os.path.isdir(class_path):
+#             for file_name in os.listdir(class_path):
+#                 if file_name.endswith(".npz"):
+#                     file_path = os.path.join(class_path, file_name)
+#                     try:
+#                         loaded = np.load(file_path)["values"]
+#                         data.append(loaded.flatten())
+#                         labels.append(class_to_label[cls])
+#                     except Exception as e:
+#                         print(f"Failed to load {file_path}: {e}")
+
+#     return np.array(data), np.array(labels)
+
+def load_data_from_directory(directory):
+    data = []
+    labels = []
+    classes = sorted(os.listdir(directory))  # Ensure consistent label mapping
+    class_to_label = {cls: idx for idx, cls in enumerate(classes)}
+
+    for cls in classes:
+        class_path = os.path.join(directory, cls)
+        if os.path.isdir(class_path):
+            for file_name in os.listdir(class_path):
+                if file_name.endswith(".npz"):
+                    file_path = os.path.join(class_path, file_name)
+                    try:
+                        loaded = np.load(file_path)["values"]
+                        data.append(loaded.flatten())
+                        labels.append(class_to_label[cls])
+                    except Exception as e:
+                        print(f"Failed to load {file_path}: {e}")
+
+    # Replace empty arrays with zeros
+    for idx, element in enumerate(data):
+        if len(element) == 0:
+            data[idx] = np.zeros((56,))  # Replace empty elements with zeros of size 56
+
+    # Convert data to consistent size
+    max_features = 56  # Assuming size 56 for all non-empty data
+    data = [x[:max_features] if len(x) > max_features else np.pad(x, (0, max_features - len(x)), 'constant') for x in data]
+
+    # Debugging information
+    unique_lengths = set(len(x) for x in data)
+    print(f"Total unique data shapes after fix: {len(unique_lengths)}")
+    print(f"Unique lengths: {unique_lengths}")
+
+    return np.array(data), np.array(labels)
+
+# Prepare train and test data
+# def prepare_data(train_dir, test_dir):
+#     X_train, y_train = load_data_from_directory(train_dir)
+#     X_test, y_test = load_data_from_directory(test_dir)
+
+#     return X_train, y_train, X_test, y_test
+
+# # Main function
+# def main():
+#     # Paths to the train and test directories
+#     train_dir = "/Pramana/IR2Vec/Yali-Embeddings/milepost/O3/codeforces/codeforcestrainO3"
+#     test_dir = "/Pramana/IR2Vec/Yali-Embeddings/milepost/O3/codeforces/codeforcestestO3"
+
+#     # Prepare data
+#     X_train, y_train, X_test, y_test = prepare_data(train_dir, test_dir)
+
+#     # Check data shapes
+#     print(f"Training data shape: {X_train.shape}")
+#     print(f"Training labels shape: {y_train.shape}")
+#     print(f"Testing data shape: {X_test.shape}")
+#     print(f"Testing labels shape: {y_test.shape}")
+
+#     # One-hot encode labels
+#     num_classes = len(np.unique(y_train))
+#     y_train = to_categorical(y_train, num_classes)
+#     y_test = to_categorical(y_test, num_classes)
+
+#     # No train-test split for validation, using all X_train and y_train for training
+#     model = getModel(X_train.shape[1], num_classes)
+
+#     mc = keras.callbacks.ModelCheckpoint(
+#     filepath='/home/cs24mtech02001/IR2Vec-Classification/weights/milepost-O0/codeforces/weights_epoch_{epoch:08d}.weights.keras', 
+#     save_weights_only=True, 
+#     save_freq=500)
+
+
+#     # Train the model
+#     model.fit(X_train,
+#               y_train,
+#               batch_size=128,
+#               epochs=2000,
+#               verbose=1, 
+#               callbacks=[mc])
+
+#     # Evaluate model
+#     y_pred = np.argmax(model.predict(X_test), axis=1)
+#     y_true = np.argmax(y_test, axis=1)
+#     # print("Classification Report:")
+#     # print(classification_report(y_true, y_pred))
+#     # print("Confusion Matrix:")
+#     # print(confusion_matrix(y_true, y_pred))
+#     print(f"Accuracy: {accuracy_score(y_true, y_pred):.13f}")
+
+#     # Save the trained model
+#     model.save("codeforces-O0-milepost-ir2vec-hypertuned-model.h5")
+#     print("Saved model to disk as 'codeforces-O0-milepost-ir2vec-model.keras'.")
+
+#     return model
+
+# # Execute the script
+# if __name__ == "__main__":
+#     main()
+
+# Prepare train, test, and validation data
+def prepare_data(train_dir, test_dir, val_dir=None):
+    X_train, y_train = load_data_from_directory(train_dir)
+    X_test, y_test = load_data_from_directory(test_dir)
+    X_val, y_val = None, None
+
+    if val_dir:
+        X_val, y_val = load_data_from_directory(val_dir)
+
+    return X_train, y_train, X_test, y_test, X_val, y_val
+
+# Main function
+def main():
+    # Paths to the train, test, and validation directories
+    train_dir = "/Pramana/IR2Vec/Milepost/O3/codeforcestrainO3"
+    test_dir = "/Pramana/IR2Vec/Milepost/O3/codeforcestestO3"
+    val_dir = "/Pramana/IR2Vec/Milepost/O3/codeforcesvalO3"  # Replace with your validation data path
+
+    # train_dir = "/Pramana/IR2Vec/Yali-Embeddings/milepost/O0/codeforces/train/codeforcestrainO0"
+    # test_dir = "/Pramana/IR2Vec/Yali-Embeddings/milepost/O0/codeforces/test/codeforcestestO0"
+    # val_dir = "/Pramana/IR2Vec/Yali-Embeddings/milepost/O0/codeforces/val/codeforcesvalO0"
+
+    # train_dir = "/Pramana/IR2Vec/Milepost/O0/codejamtrainO0"
+    # test_dir = "/Pramana/IR2Vec/Milepost/O0/codejamtestO0"
+    # val_dir = "/Pramana/IR2Vec/Milepost/O0/codejamvalO0"
+
+    # Prepare data
+    X_train, y_train, X_test, y_test, X_val, y_val = prepare_data(train_dir, test_dir, val_dir)
+
+    # Check data shapes
+    print(f"Training data shape: {X_train.shape}")
+    print(f"Training labels shape: {y_train.shape}")
+    print(f"Testing data shape: {X_test.shape}")
+    print(f"Testing labels shape: {y_test.shape}")
+    if X_val is not None and y_val is not None:
+        print(f"Validation data shape: {X_val.shape}")
+        print(f"Validation labels shape: {y_val.shape}")
+
+    # One-hot encode labels
+    num_classes = len(np.unique(y_train))
+    y_train = to_categorical(y_train, num_classes)
+    y_test = to_categorical(y_test, num_classes)
+    if X_val is not None and y_val is not None:
+        y_val = to_categorical(y_val, num_classes)
+
+    # No train-test split for validation, using X_val and y_val for validation
+    model = getModel(X_train.shape[1], num_classes)
+
+    # mc = keras.callbacks.ModelCheckpoint(
+    #     filepath='/home/cs24mtech02001/IR2Vec-Classification/weights/milepost-O0/codeforces/weights_epoch_{epoch:08d}.weights.keras',
+    #     save_weights_only=True,
+    #     save_freq=500
+    # )
+
+    mc = keras.callbacks.ModelCheckpoint(
+    filepath="/home/cs24mtech02001/IR2Vec-Classification/weights/milepost-O3/codeforces/weights_epoch_{epoch:08d}.weights.h5",
+    save_weights_only=True,
+    save_best_only=True,
+    monitor="val_loss",
+    mode="min"
+    )
+
+    # Train the model with validation data
+    model.fit(
+        X_train,
+        y_train,
+        validation_data=(X_val, y_val) if X_val is not None and y_val is not None else None,
+        batch_size=32,
+        epochs=2000,
+        verbose=1,
+        callbacks=[mc]
+    )
+
+    # Evaluate model
+    y_pred = np.argmax(model.predict(X_test), axis=1)
+    y_true = np.argmax(y_test, axis=1)
+    print(f"Accuracy: {accuracy_score(y_true, y_pred):.13f}")
+
+    # Save the trained model
+    model.save("new-codeforces-O3-milepost-ir2vec-hypertuned-model.h5")
+    print("Saved model to disk as 'Kodanda-new-codeforces-on-new-data-O3-milepost-ir2vec-hypertuned-model.keras'.")
+
+    return model
+
+# Execute the script
+if __name__ == "__main__":
+    main()
\ No newline at end of file