Skip to content

Unable to reproduce the same ARI score in MouseBrain dataset #2

@locokun

Description

@locokun

Thank you for your great job!
I want to reproduce your results in the paper, but failed in the spatial ATAC-RNA seq MouseBrain dataset. It's so strange that I use exactly the same tutorial you shared on the github, and checked every paramater to stay the same. But I still can't get ari=0.63 which is post in the paper, I can only get ari=0.43.
I wonder if there's something wrong with your code, or maybe I missed something. I hope you can help me with it.

Here's my code and result.
import pandas as pd
import numpy as np
import scanpy as sc
import matplotlib
import matplotlib.pyplot as plt
from umap import UMAP
import sklearn
import seaborn as sns
from COSMOS import cosmos
from COSMOS.pyWNN import pyWNN
import h5py
import warnings
warnings.filterwarnings('ignore')
random_seed = 20
data_mat = h5py.File('./ATAC_RNA_Seq_MouseBrain_RNA_ATAC.h5', 'r')
df_data_RNA = np.array(data_mat['X_RNA']).astype('float64') # gene count matrix
df_data_ATAC= np.array(data_mat['X_ATAC']).astype('float64') # protein count matrix
loc = np.array(data_mat['Pos']).astype('float64')
LayerName = list(data_mat['LayerName'])
LayerName = [item.decode("utf-8") for item in LayerName]

adata1 = sc.AnnData(df_data_RNA, dtype="float64")
adata1.obsm['spatial'] = np.array(loc)
adata1.obs['LayerName'] = LayerName
adata1.obs['x_pos'] = np.array(loc)[:,0]
adata1.obs['y_pos'] = np.array(loc)[:,1]

adata2 = sc.AnnData(df_data_ATAC, dtype="float64")
adata2.obsm['spatial'] = np.array(loc)
adata2.obs['LayerName'] = LayerName
adata2.obs['x_pos'] = np.array(loc)[:,0]
adata2.obs['y_pos'] = np.array(loc)[:,1]
adata_new = adata1.copy()
adata_new.obs["LayerName"]=adata_new.obs["LayerName"].astype('category')

matplotlib.rcParams['font.size'] = 12.0
fig, axes = plt.subplots(1, 1, figsize=(3.5,3))
sz = 20
plot_color=['#D1D1D1','#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231', '#911eb4', '#46f0f0', '#f032e6',
'#bcf60c', '#fabebe', '#008080', '#e6beff', '#9a6324', '#ffd8b1', '#800000', '#aaffc3', '#808000', '#000075', '#000000', '#808080', '#ffffff', '#fffac8']

domains="LayerName"
num_celltype=len(adata_new.obs[domains].unique())
adata_new.uns[domains+"_colors"]=list(plot_color[:num_celltype])
titles = 'Annotation '
ax=sc.pl.scatter(adata_new,alpha=1,x="x_pos",y="y_pos",color=domains,title=titles ,color_map=plot_color,show=False,size=sz,ax = axes)
ax.axis('off')

COSMOS training

cosmos_comb = cosmos.Cosmos(adata1=adata1,adata2=adata2)
cosmos_comb.preprocessing_data(n_neighbors = 10)
cosmos_comb.train(spatial_regularization_strength=0.01, z_dim=50,
lr=1e-3, wnn_epoch = 500, total_epoch=1000, max_patience_bef=10, max_patience_aft=30, min_stop=200,
random_seed=random_seed, gpu=0, regularization_acceleration=True, edge_subset_sz=1000000)
weights = cosmos_comb.weights
df_embedding = pd.DataFrame(cosmos_comb.embedding)
adata_new = adata1.copy()
embedding_adata = sc.AnnData(df_embedding)
sc.pp.neighbors(embedding_adata, n_neighbors=50, use_rep='X')

Manualy setting resolution for clustering

res = 1.0
sc.tl.louvain(embedding_adata, resolution=res)
adata_new.obs['Cluster_cosmos'] = list(embedding_adata.obs["louvain"].cat.codes)
adata_new.obs["Cluster_cosmos"]=adata_new.obs["Cluster_cosmos"].astype('category')

Relabeling clusters with layer names

digit_labels = list(adata_new.obs['Cluster_cosmos'])
for i in range(len(digit_labels)):
if digit_labels[i] not in [0,1,2,4,5,6,7,8,11]:
digit_labels[i] = 100
adata_new.obs['Cluster_cosmos'] = digit_labels
adata_new.obs["Cluster_cosmos"]=adata_new.obs["Cluster_cosmos"].astype('category')
adata_new.obs['Cluster_cosmos'].cat.rename_categories({0: 'CP2',
1: 'L5',
2: 'CP1',
4: 'L4',
5: 'ccg/aco',
6: 'ACB',
7: 'L1-L3',
8: 'L6a/b',
11: 'VL',
100: '1_others'
}, inplace=True)

ordered_cluster = np.unique(list(adata_new.obs['Cluster_cosmos']))
adata_new.obs['Cluster_cosmos'] = adata_new.obs['Cluster_cosmos'].cat.reorder_categories(ordered_cluster, ordered=True)

Calculating ARI

opt_cluster_cosmos = list(adata_new.obs['Cluster_cosmos'])
opt_ari_cosmos = sklearn.metrics.adjusted_rand_score(LayerName, opt_cluster_cosmos)
opt_ari_cosmos = round(opt_ari_cosmos, 2)

Ploting figures

matplotlib.rcParams['font.size'] = 8.0
fig, axes = plt.subplots(1, 2, figsize=(6,2))
sz = 10
plot_color=['#D1D1D1','#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231', '#911eb4', '#46f0f0', '#f032e6',
'#bcf60c', '#fabebe', '#008080', '#e6beff', '#9a6324', '#ffd8b1', '#800000', '#aaffc3', '#808000', '#000075', '#000000', '#808080', '#ffffff', '#fffac8']
domains="LayerName"
num_celltype=len(adata_new.obs[domains].unique())
adata_new.uns[domains+"_colors"]=list(plot_color[:num_celltype])
titles = 'Annotation'
ax=sc.pl.scatter(adata_new,alpha=1,x="x_pos",y="y_pos",color=domains,title=titles ,color_map=plot_color,show=False,size=sz,ax = axes[0])
ax.axis('off')

plot_color_1=['#D1D1D1','#e6194b', '#3cb44b', '#bcf60c','#ffe119', '#4363d8', '#f58231', '#911eb4', '#46f0f0', '#f032e6',
'#fabebe', '#008080', '#e6beff', '#9a6324', '#ffd8b1', '#800000', '#aaffc3', '#808000', '#000075', '#000000', '#808080', '#ffffff', '#fffac8']
domains="Cluster_cosmos"
num_celltype=len(adata_new.obs[domains].unique())
adata_new.uns[domains+"_colors"]=list(plot_color_1[:num_celltype])
titles = 'COSMOS, ARI = ' + str(opt_ari_cosmos)
ax=sc.pl.scatter(adata_new,alpha=1,x="x_pos",y="y_pos",color=domains,title=titles ,color_map=plot_color_1,show=False,size=sz,ax = axes[1])
ax.axis('off')
plt.tight_layout()

Image

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions