From 44414267f5b6b40c951652de995e4df097fbdb01 Mon Sep 17 00:00:00 2001 From: Patrick Jiang Date: Sat, 24 Aug 2024 13:13:52 -0700 Subject: [PATCH 1/6] Update __init__.py --- contrastive/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/contrastive/__init__.py b/contrastive/__init__.py index 6e00d28..039797e 100644 --- a/contrastive/__init__.py +++ b/contrastive/__init__.py @@ -24,8 +24,13 @@ def get_data(self): return self.data def get_bg(self): return self.bg + def get_bg_cov(self): + return self.bg_cov def get_fg(self): return self.fg + def get_fg_cov(self): + return self.fg_cov + def get_active_labels(self): return self.active_labels def get_pca_directions(self): From a83b5207fa49050f2bd9e47bb88e76ed08dfccad Mon Sep 17 00:00:00 2001 From: Patrick Jiang Date: Sat, 24 Aug 2024 16:15:45 -0700 Subject: [PATCH 2/6] Update __init__.py Added displayed ratio of variance as well as raw variance amounts for each axis --- contrastive/__init__.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/contrastive/__init__.py b/contrastive/__init__.py index 039797e..b39e2e5 100644 --- a/contrastive/__init__.py +++ b/contrastive/__init__.py @@ -51,6 +51,11 @@ def __init__(self, n_components=2, standardize=True, verbose=False): self.n_components = n_components self.verbose = verbose self.fitted = False + self.rawvar = 0 + self.pervar = 0 + + self.rawvars = [] + self.pervars = [] """ Finds the covariance matrices of the foreground and background datasets, @@ -226,6 +231,8 @@ def update(value): for i, l in enumerate(np.sort(np.unique(self.active_labels))): idx = np.where(self.active_labels==l) plt.scatter(fg[idx,0],fg[idx,1], color=self.colors[i%len(self.colors)], alpha=0.6, label='Class '+str(i)) + plt.xlabel(f"PC1: %var = {round(self.pervars[j][0], 2)}, raw = {round(self.rawvars[j][0], 2)}") + plt.ylabel(f"PC2: %var = {round(self.pervars[j][1], 2)}, raw = {round(self.rawvars[j][1], 2)}") plt.title('Alpha='+str(np.round(best_alphas[j],2))) if len(np.unique(self.active_labels))>1: plt.legend() @@ -237,6 +244,8 @@ def update(value): idx = np.where(self.active_labels==l) plt.scatter(fg[idx,0],fg[idx,1], color=self.colors[i%len(self.colors)], alpha=0.6, label='Class '+str(i)) plt.title('Alpha=' + str(alpha_value)) + plt.xlabel(f"PC1: %var = {round(self.pervar[0], 2)}, raw = {round(self.rawvar[0], 2)}") + plt.ylabel(f"PC2: %var = {round(self.pervar[1], 2)}, raw = {round(self.rawvar[1], 2)}") plt.legend() plt.show() @@ -270,9 +279,13 @@ def automated_cpca(self, dataset, n_alphas_to_return, n_alphas, max_log_alpha): best_alphas, all_alphas, _, _ = self.find_spectral_alphas(n_alphas, max_log_alpha, n_alphas_to_return) best_alphas = np.concatenate(([0], best_alphas)) #one of the alphas is always alpha=0 data_to_plot = [] + raw = [] + per = [] for alpha in best_alphas: transformed_dataset = self.cpca_alpha(dataset=dataset, alpha=alpha) data_to_plot.append(transformed_dataset) + self.rawvars.append(self.rawvar) + self.pervars.append(self.pervar) return data_to_plot, best_alphas """ @@ -286,6 +299,8 @@ def all_cpca(self, dataset, n_alphas, max_log_alpha): for alpha in alphas: transformed_dataset = self.cpca_alpha(dataset=dataset, alpha=alpha) data_to_plot.append(transformed_dataset) + self.rawvars.append(self.rawvar) + self.pervars.append(self.pervar) return data_to_plot, alphas """ @@ -298,6 +313,8 @@ def cpca_alpha(self, dataset, alpha=1): w, v = LA.eig(sigma) eig_idx = np.argpartition(w, -n_components)[-n_components:] eig_idx = eig_idx[np.argsort(-w[eig_idx])] + self.rawvar = w[eig_idx] + self.pervar = [i/np.sum(self.rawvar) for i in self.rawvar] v_top = v[:,eig_idx] reduced_dataset = dataset.dot(v_top) reduced_dataset[:,0] = reduced_dataset[:,0]*np.sign(reduced_dataset[0,0]) From 054cb1248b11c864d6b644876ccf5511c178cc96 Mon Sep 17 00:00:00 2001 From: Patrick Jiang Date: Sat, 24 Aug 2024 16:43:46 -0700 Subject: [PATCH 3/6] Update __init__.py --- contrastive/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/contrastive/__init__.py b/contrastive/__init__.py index b39e2e5..97bb48a 100644 --- a/contrastive/__init__.py +++ b/contrastive/__init__.py @@ -236,6 +236,7 @@ def update(value): plt.title('Alpha='+str(np.round(best_alphas[j],2))) if len(np.unique(self.active_labels))>1: plt.legend() + plt.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.9, wspace=0.4, hspace=0.4) plt.show() elif (alpha_selection=='manual'): fg = self.cpca_alpha(dataset, alpha_value) From 5ee7bbdbab8ac2721085c6ce86599f26bd01301c Mon Sep 17 00:00:00 2001 From: Patrick Jiang Date: Sat, 24 Aug 2024 18:37:24 -0700 Subject: [PATCH 4/6] Update __init__.py --- contrastive/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrastive/__init__.py b/contrastive/__init__.py index 97bb48a..6795c30 100644 --- a/contrastive/__init__.py +++ b/contrastive/__init__.py @@ -315,7 +315,7 @@ def cpca_alpha(self, dataset, alpha=1): eig_idx = np.argpartition(w, -n_components)[-n_components:] eig_idx = eig_idx[np.argsort(-w[eig_idx])] self.rawvar = w[eig_idx] - self.pervar = [i/np.sum(self.rawvar) for i in self.rawvar] + self.pervar = [i/np.sum(w) for i in self.rawvar] v_top = v[:,eig_idx] reduced_dataset = dataset.dot(v_top) reduced_dataset[:,0] = reduced_dataset[:,0]*np.sign(reduced_dataset[0,0]) From 5641b501045594641789086fd826d23fe51ea408 Mon Sep 17 00:00:00 2001 From: Patrick Jiang Date: Sat, 24 Aug 2024 19:42:53 -0700 Subject: [PATCH 5/6] Update __init__.py Added functionality to display scree plots --- contrastive/__init__.py | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/contrastive/__init__.py b/contrastive/__init__.py index 6795c30..fa5d41c 100644 --- a/contrastive/__init__.py +++ b/contrastive/__init__.py @@ -46,16 +46,19 @@ def standardize_array(self, array): return np.nan_to_num(standardized_array) #stores - def __init__(self, n_components=2, standardize=True, verbose=False): + def __init__(self, n_components=2, standardize=True, verbose=False, show_scree = True): self.standardize = standardize self.n_components = n_components self.verbose = verbose self.fitted = False + self.show = show_scree self.rawvar = 0 self.pervar = 0 + self.pcs = [] self.rawvars = [] self.pervars = [] + self.full_pc = [] """ Finds the covariance matrices of the foreground and background datasets, @@ -225,17 +228,35 @@ def update(value): raise ImportError("Something wrong while loading matplotlib.pyplot! You probably don't have plotting libraries installed.") if (alpha_selection=='auto'): transformed_data, best_alphas = self.automated_cpca(dataset, n_alphas_to_return, n_alphas, max_log_alpha) - plt.figure(figsize=[14,3]) + if self.show: + plt.figure(figsize=[8,14]) + else: + plt.figure(figsize=[14,3]) + for j, fg in enumerate(transformed_data): - plt.subplot(1,4,j+1) + if self.show: + plt.subplot(4,2,2*(j+1) - 1) + else: + plt.subplot(1,4,j+1) for i, l in enumerate(np.sort(np.unique(self.active_labels))): idx = np.where(self.active_labels==l) plt.scatter(fg[idx,0],fg[idx,1], color=self.colors[i%len(self.colors)], alpha=0.6, label='Class '+str(i)) plt.xlabel(f"PC1: %var = {round(self.pervars[j][0], 2)}, raw = {round(self.rawvars[j][0], 2)}") plt.ylabel(f"PC2: %var = {round(self.pervars[j][1], 2)}, raw = {round(self.rawvars[j][1], 2)}") plt.title('Alpha='+str(np.round(best_alphas[j],2))) - if len(np.unique(self.active_labels))>1: - plt.legend() + if len(np.unique(self.active_labels))>1: + plt.legend() + + if self.show: + plt.subplot(4,2,2*(j+1)) + plt.plot(np.arange(1,(len(self.full_pc[j]) + 1)), self.full_pc[j], 'o-c', label = 'screeplot') + plt.xlabel("Index of Principal Component") + plt.ylabel("Raw Explained Variance") + plt.title('Scree Plot of Eigenvalues') + + + + plt.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.9, wspace=0.4, hspace=0.4) plt.show() elif (alpha_selection=='manual'): @@ -287,6 +308,7 @@ def automated_cpca(self, dataset, n_alphas_to_return, n_alphas, max_log_alpha): data_to_plot.append(transformed_dataset) self.rawvars.append(self.rawvar) self.pervars.append(self.pervar) + self.full_pc.append(self.pcs) return data_to_plot, best_alphas """ @@ -302,6 +324,7 @@ def all_cpca(self, dataset, n_alphas, max_log_alpha): data_to_plot.append(transformed_dataset) self.rawvars.append(self.rawvar) self.pervars.append(self.pervar) + self.full_pc.append(self.pcs) return data_to_plot, alphas """ @@ -314,6 +337,7 @@ def cpca_alpha(self, dataset, alpha=1): w, v = LA.eig(sigma) eig_idx = np.argpartition(w, -n_components)[-n_components:] eig_idx = eig_idx[np.argsort(-w[eig_idx])] + self.pcs = -1*np.sort(-w) self.rawvar = w[eig_idx] self.pervar = [i/np.sum(w) for i in self.rawvar] v_top = v[:,eig_idx] From ea8d4a7d4aa78f06741c3a88e52fca311dc04ea3 Mon Sep 17 00:00:00 2001 From: Patrick Jiang Date: Thu, 29 Aug 2024 14:49:36 -0700 Subject: [PATCH 6/6] Update __init__.py --- contrastive/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrastive/__init__.py b/contrastive/__init__.py index fa5d41c..531a164 100644 --- a/contrastive/__init__.py +++ b/contrastive/__init__.py @@ -66,8 +66,8 @@ def __init__(self, n_components=2, standardize=True, verbose=False, show_scree = Parameters: see self.fit() and self.transform() for parameter description """ - def fit_transform(self, foreground, background, plot=False, gui=False, alpha_selection='auto', n_alphas=40, max_log_alpha=3, n_alphas_to_return=4, active_labels = None, colors=None, legend=None, alpha_value=None, return_alphas=False): - self.fit(foreground, background) + def fit_transform(self, foreground, background, preprocess_with_pca_dim=None, plot=False, gui=False, alpha_selection='auto', n_alphas=40, max_log_alpha=3, n_alphas_to_return=4, active_labels = None, colors=None, legend=None, alpha_value=None, return_alphas=False): + self.fit(foreground, background, preprocess_with_pca_dim) return self.transform(dataset=self.fg, alpha_selection=alpha_selection, n_alphas=n_alphas, max_log_alpha=max_log_alpha, n_alphas_to_return=n_alphas_to_return, plot=plot, gui=gui, active_labels=active_labels, colors=colors, legend=legend, alpha_value=alpha_value, return_alphas=return_alphas) """