From 2c6f0496c907a29711f91a04779d7ad1200f0d96 Mon Sep 17 00:00:00 2001 From: siddheshtv Date: Sat, 30 Dec 2023 14:00:28 +0530 Subject: [PATCH] minor fixes. update helper, ae, main --- autoencoder.py | 4 ++-- helper.py | 2 +- main.py | 11 +++++++++-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/autoencoder.py b/autoencoder.py index 40a49d6..10d455d 100644 --- a/autoencoder.py +++ b/autoencoder.py @@ -51,8 +51,8 @@ def __init__(self, num_features, verbose=True, mse_threshold = 0.5, archi="U15,D def accuracy(self, y_true, y_pred): - mse = K.mean(K.square((y_true - y_pred)), axis = 1) - temp = K.ones(K.shape(mse)) + mse = K.mean(K.square((y_true - y_pred)), axis=1) + temp = K.ones_like(mse) # Resolve out-of-scope error return K.mean(K.equal(temp, K.cast(mse < self.mse_threshold, temp.dtype))) def loss(self, y_true,y_pred): diff --git a/helper.py b/helper.py index 4735073..c351cf5 100644 --- a/helper.py +++ b/helper.py @@ -20,7 +20,7 @@ def dataframe_drop_correlated_columns(df, threshold=0.95, verbose=False): corr_matrix = df.corr().abs() # Select upper triangle of correlation matrix - upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool)) + upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool)) # Depracated np.bool, change to bool # Find index of feature columns with correlation greater than 0.95 to_drop = [column for column in upper.columns if any(upper[column] > threshold)] diff --git a/main.py b/main.py index db3ffd7..14fb490 100644 --- a/main.py +++ b/main.py @@ -42,7 +42,10 @@ def evaluate(model, valid_X, attack_path, output_file): label_encoder_1 = preprocessing.LabelEncoder() label_encoder_2 = preprocessing.LabelEncoder() label_encoder_3 = preprocessing.LabelEncoder() -one_hot_encoder = preprocessing.OneHotEncoder(categorical_features = [1,2,3]) +# one_hot_encoder = preprocessing.OneHotEncoder(categorical_features = [1,2,3]) + + + def read_kdd_dataset(path): global label_encoder_1, label_encoder_2, label_encoder_3, one_hot_encoder @@ -59,7 +62,11 @@ def read_kdd_dataset(path): dataset[:, 1] = label_encoder_1.fit_transform(dataset[:, 1]) dataset[:, 2] = label_encoder_2.fit_transform(dataset[:, 2]) dataset[:, 3] = label_encoder_3.fit_transform(dataset[:, 3]) - dataset_features = one_hot_encoder.fit_transform(dataset[:, :-2]).toarray() + # dataset_features = one_hot_encoder.fit_transform(dataset[:, :-2]).toarray() + categorical_cols = [1, 2, 3] # Specify the categorical columns here + # Ensure categorical columns are properly encoded using OneHotEncoder + one_hot_encoder = preprocessing.OneHotEncoder(categories='auto', sparse=False, handle_unknown='ignore') + one_hot_encoded_cols = one_hot_encoder.fit_transform(dataset[:, categorical_cols]) else: dataset[:, 1] = label_encoder_1.transform(dataset[:, 1]) dataset[:, 2] = label_encoder_2.transform(dataset[:, 2])