Clustering Comparision

Preamble

import pandas as pd
import numpy as np
import scanpy as sc
from sklearn.metrics.cluster import normalized_mutual_info_score, adjusted_rand_score
from sklearn.metrics import homogeneity_score, completeness_score, fowlkes_mallows_score, silhouette_score, davies_bouldin_score, calinski_harabasz_score
from sklearn.metrics.cluster import contingency_matrix, pair_confusion_matrix
from src.utils import sankey_plot
from sklearn.decomposition import PCA
import kaleido
from sklearn.preprocessing import StandardScaler
import plotly.io as pio
import matplotlib.pyplot as plt
import seaborn as sns
DIR = 'Data/'
DATASET_NAMES = ['PBMC1', 'PBMC2', 'PBMC3','PBMC4']
TOOLS = ['monocle', 'scanpy', 'scvi-tools', 'seurat', 'COTAN']
PARAMS_TUNING = ['default', 'celltypist', 'antibody']
min_size_cluster = 10
labels_df = pd.read_csv(f'{DIR}PBMC3/COTAN/default/clustering_labels.csv', index_col=0,usecols=["cell","cluster"])
labels_df.rename(columns={"cluster": "cluster_COTAN"}, inplace=True)
print(labels_df.shape)
#print(labels_df.shape)
for tool in [t for t in TOOLS if t != 'COTAN']:
    tool_labels_df = pd.read_csv(f'{DIR}PBMC3/{tool}/default/clustering_labels.csv', index_col=0)
    labels_df = labels_df.merge(tool_labels_df, how='inner', on='cell')
    labels_df.rename(columns={"cluster": f"cluster_{tool}"}, inplace=True)
#    print("labels_df size"+tool)
#    print(labels_df.shape)
(10944, 1)
labels_df['cluster_COTAN'].value_counts()
cluster_COTAN
3     1689
9      688
20     609
4      433
8      432
5      390
10     362
27     331
47     324
13     305
52     302
19     282
46     260
32     253
43     242
45     228
54     219
23     190
2      189
12     177
35     166
39     162
42     159
17     151
55     149
14     146
18     145
6      143
28     132
53     130
7      120
40     111
51     109
16     100
50      99
44      98
36      79
30      78
41      76
11      74
37      67
22      58
57      52
1       49
56      40
48      39
34      37
31      35
38      30
49      29
15      29
25      28
26      27
29      27
33      24
21      21
24      20
Name: count, dtype: int64
labels_df.shape
(3405, 5)
labels_df = drop_small_clusters(df = labels_df,min_size = min_size_cluster)
'cluster_COTAN - cluster_COTAN\n12    809\n16    462\n7     392\n18    272\n8     254\n4     177\n10    152\n11    145\n3     122\n19    117\n20    106\n5      97\n13     94\n6      77\n2      75\n21     55\n15     42\n17     35\n1      35\n14     28\n9      27\n23     22\n22     15\nName: count, dtype: int64'
"Index([15, 17, 1, 14, 9, 23, 22], dtype='int64', name='cluster_COTAN')"
'cluster_monocle - cluster_monocle\n1    1913\n2    1195\n3     298\nName: count, dtype: int64'
"Index([], dtype='int64', name='cluster_monocle')"
'cluster_scanpy - cluster_scanpy\n3     323\n1     315\n2     309\n4     286\n7     268\n6     261\n8     236\n5     233\n9     200\n10    182\n11    158\n12    144\n13    141\n14    105\n15     91\n16     77\n17     76\n18      1\nName: count, dtype: int64'
"Index([18], dtype='int64', name='cluster_scanpy')"
'cluster_scvi-tools - cluster_scvi-tools\n1     635\n2     467\n3     421\n4     390\n5     364\n6     283\n7     277\n8     149\n9     146\n10    101\n11     92\n12     80\nName: count, dtype: int64'
"Index([], dtype='int64', name='cluster_scvi-tools')"
'cluster_seurat - cluster_seurat\n1     879\n3     581\n2     504\n4     408\n5     273\n6     260\n7     153\n8     144\n9     129\n10     74\nName: count, dtype: int64'
"Index([], dtype='int64', name='cluster_seurat')"
# load and concat celltypist labels
celltypist_df = pd.read_csv(f'{DIR}{dataset}/celltypist/celltypist_labels.csv', index_col=0)
celltypist_df.index = celltypist_df.index.str[:-2]
celltypist_df = labels_df.merge(celltypist_df, how='inner', on='cell')
celltypist_df.rename(columns={"cluster.ids": f"cluster_celltypist"}, inplace=True)
celltypist_mapping_df = pd.read_csv(f'{DIR}{dataset}/celltypist/celltypist_mapping.csv', index_col=0)
#print("celltypist_df size")
#print(celltypist_df.shape)

celltypist_df = drop_small_clusters(df = celltypist_df, min_size = min_size_cluster)

# load and concat protein surface labels
antibody_df = pd.read_csv(f'{DIR}{dataset}/antibody_annotation/antibody_labels_postproc.csv', index_col=0)
antibody_df = labels_df.merge(antibody_df, how='inner', on='cell')
antibody_df.rename(columns={"cluster.ids": f"cluster_antibody"}, inplace=True)
antibody_mapping_df = pd.read_csv(f'{DIR}{dataset}/antibody_annotation/antibody_mapping.csv', index_col=1, encoding='latin1')
    #print("antibody_df size")
    #print(antibody_df.shape)

antibody_df = drop_small_clusters(df = antibody_df, min_size = min_size_cluster )
def drop_small_clusters(df, min_size):
    for col in df.columns:
        # Count the number of occurrences of each cluster
        cluster_counts = df[col].value_counts()
        #display(f'{col} - {cluster_counts}')
        # Find clusters that are smaller than the minimum size
        small_clusters = cluster_counts[cluster_counts < min_size].index
        #display(f'{small_clusters}')
        # Drop rows corresponding to these clusters
        df = df[~df[col].isin(small_clusters)]
    return df


def compute_scores(dir, dataset, labels_df, labels_matched, ground_truth_labels):
    scores = {}
    scores['NMI'] = {}
    scores['ARI'] = {}
    scores['homogeneity'] = {}
    scores['completeness'] = {}
    scores['fowlkes_mallows'] = {}
    scores['precision'] = {}
    scores['recall'] = {}
    
    for tool in TOOLS:
        scores['NMI'][tool] = normalized_mutual_info_score(labels_pred=labels_df['cluster_'+tool], labels_true=labels_df[f'cluster_{ground_truth_labels}'], average_method='arithmetic')
        scores['ARI'][tool] = adjusted_rand_score(labels_pred=labels_df['cluster_'+tool], labels_true=labels_df[f'cluster_{ground_truth_labels}'])
        scores['homogeneity'][tool] = homogeneity_score(labels_pred=labels_df['cluster_'+tool], labels_true=labels_df[f'cluster_{ground_truth_labels}'])
        scores['completeness'][tool] = completeness_score(labels_pred=labels_df['cluster_'+tool], labels_true=labels_df[f'cluster_{ground_truth_labels}'])
        scores['fowlkes_mallows'][tool] = fowlkes_mallows_score(labels_pred=labels_df['cluster_'+tool], labels_true=labels_df[f'cluster_{ground_truth_labels}'])
        sc = pair_confusion_matrix(labels_pred=labels_df['cluster_'+tool], labels_true=labels_df[f'cluster_{ground_truth_labels}'])
        TP = sc[1,1]
        FP = sc[0,1]
        FN = sc[1,0]
        P_score = TP/(TP+FP)
        scores['precision'][tool] = P_score
        scores['recall'][tool] = TP/(TP+FN)
        
        
        
        
    scores_df = pd.DataFrame(scores)
    scores_df.to_csv(f'{dir}{dataset}/scores_{labels_matched}_{ground_truth_labels}.csv')
    scores_df.to_latex(f'{dir}{dataset}/scores_{labels_matched}_{ground_truth_labels}.tex')
    display(scores_df)


def print_scores(dataset,tuning):
    
    
    # concat tools labels
    labels_df = pd.read_csv(f'{DIR}{dataset}/COTAN/{tuning}/clustering_labels.csv', index_col=0)
    labels_df.rename(columns={"cluster": "cluster_COTAN"}, inplace=True)
    #print("labels_df size")
    #print(labels_df.shape)
    for tool in [t for t in TOOLS if t != 'COTAN']:
        tool_labels_df = pd.read_csv(f'{DIR}{dataset}/{tool}/{tuning}/clustering_labels.csv', index_col=0)
        labels_df = labels_df.merge(tool_labels_df, how='inner', on='cell')
        labels_df.rename(columns={"cluster": f"cluster_{tool}"}, inplace=True)
    #    print("labels_df size"+tool)
    #    print(labels_df.shape)

    labels_df = drop_small_clusters(df = labels_df,min_size = min_size_cluster)
    
    # load and concat celltypist labels
    celltypist_df = pd.read_csv(f'{DIR}{dataset}/celltypist/celltypist_labels.csv', index_col=0)
    celltypist_df.index = celltypist_df.index.str[:-2]
    celltypist_df = labels_df.merge(celltypist_df, how='inner', on='cell')
    celltypist_df.rename(columns={"cluster.ids": f"cluster_celltypist"}, inplace=True)
    celltypist_mapping_df = pd.read_csv(f'{DIR}{dataset}/celltypist/celltypist_mapping.csv', index_col=0)
    #print("celltypist_df size")
    #print(celltypist_df.shape)

    celltypist_df = drop_small_clusters(df = celltypist_df, min_size = min_size_cluster)
    
    # load and concat protein surface labels
    antibody_df = pd.read_csv(f'{DIR}{dataset}/antibody_annotation/antibody_labels_postproc.csv', index_col=0)
    antibody_df = labels_df.merge(antibody_df, how='inner', on='cell')
    antibody_df.rename(columns={"cluster.ids": f"cluster_antibody"}, inplace=True)
    antibody_mapping_df = pd.read_csv(f'{DIR}{dataset}/antibody_annotation/antibody_mapping.csv', index_col=1, encoding='latin1')
    #print("antibody_df size")
    #print(antibody_df.shape)

    antibody_df = drop_small_clusters(df = antibody_df, min_size = min_size_cluster )

    # read dataset
    adata = sc.read_10x_mtx(
        f'{DIR}{dataset}/filtered/10X/',
        var_names='gene_symbols',
        cache=False
    )
    # keep only labelled cells
    adata.var_names_make_unique()
    if tuning=='celltypist':
        subset_cells = adata.obs_names.isin(celltypist_df.index)
        adata = adata[subset_cells, :]
    elif tuning=='antibody':
        subset_cells = adata.obs_names.isin(antibody_df.index)
        adata = adata[subset_cells, :]
    else:
        subset_cells = adata.obs_names.isin(labels_df.index)
        adata = adata[subset_cells, :]

    mito_genes = adata.var_names.str.startswith('MT-')
    # for each cell compute fraction of counts in mito genes vs. all genes
    # the `.A1` is only necessary as X is sparse (to transform to a dense array after summing)
    adata.obs['percent_mito'] = np.sum(adata[:, mito_genes].X, axis=1).A1 / np.sum(adata.X, axis=1).A1
    # add the total counts per cell as observations-annotation to adata
    adata.obs['n_counts'] = adata.X.sum(axis=1).A1

    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata, min_mean=0.00125, max_mean=3, min_disp=0.5)
    adata.raw = adata
    adata = adata[:, adata.var.highly_variable]
    #sc.pp.regress_out(adata, ['n_counts', 'percent_mito'])
    sc.pp.scale(adata, max_value=10)
    sc.tl.pca(adata, svd_solver='arpack',n_comps=20)
    pca_matrix = adata.obsm['X_pca']
    scaler = StandardScaler()
    scaled_pca_matrix = scaler.fit_transform(pca_matrix)

    #Custers number
    
    df = {}
    for tool in TOOLS:
        df[tool] = labels_df[f'cluster_{tool}'].unique().shape[0]
    df_size = pd.DataFrame(df, index=[0])
    display(f'{dataset} - number of clusters')
    display(df_size)

    # compute silhouette, Calinski_Harabasz and davies_bouldin scores with scaled PCA    
    silhouette = {}
    Calinski_Harabasz = {}
    davies_bouldin = {}
    for tool in TOOLS:
        
        if tuning=='celltypist':
            # Convert scaled_pca_matrix to DataFrame to filter by index
            #scaled_pca_matrix_df = pd.DataFrame(scaled_pca_matrix, index=adata.obs_names)
            # Filter PCA matrix based on celltypist_df index
            #scaled_pca_matrix_filtered = scaled_pca_matrix_df.loc[celltypist_df.index]
            # Convert back to numpy array for compatibility with metrics
            #scaled_pca_matrix = scaled_pca_matrix_filtered.values
            silhouette[tool] = silhouette_score(scaled_pca_matrix, celltypist_df[f'cluster_{tool}'])
            Calinski_Harabasz[tool] =  calinski_harabasz_score(scaled_pca_matrix, celltypist_df[f'cluster_{tool}'])
            davies_bouldin[tool] = davies_bouldin_score(scaled_pca_matrix, celltypist_df[f'cluster_{tool}'])
            
            silhouette['celltypist'] = silhouette_score(scaled_pca_matrix, celltypist_df[f'cluster_celltypist'])
            Calinski_Harabasz['celltypist'] = calinski_harabasz_score(scaled_pca_matrix, celltypist_df[f'cluster_celltypist'])
            davies_bouldin['celltypist'] = davies_bouldin_score(scaled_pca_matrix, celltypist_df[f'cluster_celltypist'])
        elif tuning=='antibody':
            # Repeat similar steps for antibody_df
            #scaled_pca_matrix_df = pd.DataFrame(scaled_pca_matrix, index=adata.obs_names)
            #scaled_pca_matrix_filtered = scaled_pca_matrix_df.loc[antibody_df.index]
            #scaled_pca_matrix = scaled_pca_matrix_filtered.values
            silhouette[tool] = silhouette_score(scaled_pca_matrix, antibody_df[f'cluster_{tool}'])
            Calinski_Harabasz[tool] =  calinski_harabasz_score(scaled_pca_matrix, antibody_df[f'cluster_{tool}'])
            davies_bouldin[tool] = davies_bouldin_score(scaled_pca_matrix, antibody_df[f'cluster_{tool}'])
            
            silhouette['antibody'] = silhouette_score(scaled_pca_matrix, antibody_df[f'cluster_antibody'])
            Calinski_Harabasz['antibody'] = calinski_harabasz_score(scaled_pca_matrix, antibody_df[f'cluster_antibody'])
            davies_bouldin['antibody'] = davies_bouldin_score(scaled_pca_matrix, antibody_df[f'cluster_antibody'])

        else:
            silhouette[tool] = silhouette_score(scaled_pca_matrix, labels_df[f'cluster_{tool}'])
            Calinski_Harabasz[tool] =  calinski_harabasz_score(scaled_pca_matrix, labels_df[f'cluster_{tool}'])
            davies_bouldin[tool] = davies_bouldin_score(scaled_pca_matrix, labels_df[f'cluster_{tool}'])
        
        
    silhouette_df = pd.DataFrame(silhouette, index=[0])
    silhouette_df.to_csv(f'{DIR}{dataset}/{tuning}_silhouette.csv')
    silhouette_df.to_latex(f'{DIR}{dataset}/{tuning}_silhouette.tex')
    display(f'{dataset} - Silhuette (higher is better)')
    display(silhouette_df)

    Calinski_Harabasz_df = pd.DataFrame(Calinski_Harabasz, index=[0])
    Calinski_Harabasz_df.to_csv(f'{DIR}{dataset}/{tuning}_Calinski_Harabasz.csv')
    Calinski_Harabasz_df.to_latex(f'{DIR}{dataset}/{tuning}_Calinski_Harabasz.tex')
    display(f'{dataset} - Calinski_Harabasz (higher is better)')
    display(Calinski_Harabasz_df)

    davies_bouldin_df = pd.DataFrame(davies_bouldin, index=[0])
    davies_bouldin_df.to_csv(f'{DIR}{dataset}/{tuning}_davies_bouldin.csv')
    davies_bouldin_df.to_latex(f'{DIR}{dataset}/{tuning}_davies_bouldin.tex')
    display(f'{dataset} - davies_bouldin (lower is better)')
    display(davies_bouldin_df)

    # compute silhouette, Calinski_Harabasz and davies_bouldin scores with cellTypist probability
    celltypist_prob_df = pd.read_csv(f'{DIR}{dataset}/celltypist/Immune_All_Low_probability_matrix.csv', index_col=0)
    #labels_df = pd.read_csv(f'{DIR}{dataset}/COTAN/{tuning}/clustering_labels.csv', index_col=0)
    celltypist_prob_df.index = celltypist_prob_df.index.str[:-2]
    #subset_cells = celltypist_prob_df.index.isin(labels_df.index)
    #celltypist_prob_df = celltypist_prob_df[subset_cells]

    if tuning=='celltypist':
        subset_cells = celltypist_prob_df.index.isin(celltypist_df.index)
        celltypist_prob_df = celltypist_prob_df[subset_cells]
    elif tuning=='antibody':
        subset_cells = celltypist_prob_df.index.isin(antibody_df.index)
        celltypist_prob_df = celltypist_prob_df[subset_cells]
    else:
        subset_cells = celltypist_prob_df.index.isin(labels_df.index)
        celltypist_prob_df = celltypist_prob_df[subset_cells]
    
    
    pca = PCA(n_components=20,svd_solver='arpack')
    pca_data = pca.fit_transform(celltypist_prob_df)
    df_prob = pd.DataFrame(pca_data)
    df_prob.index = celltypist_prob_df.index
    scaler = StandardScaler()
    scaled_pca_data = pd.DataFrame(scaler.fit_transform(df_prob))
    scaled_pca_data.index = celltypist_prob_df.index
    
    silhouette = {}
    Calinski_Harabasz = {}
    davies_bouldin = {}
    for tool in TOOLS:
        
        if tuning=='celltypist':
            # Convert scaled_pca_matrix to DataFrame to filter by index
            #scaled_pca_matrix_df = pd.DataFrame(scaled_pca_matrix, index=adata.obs_names)
            # Filter PCA matrix based on celltypist_df index
            #scaled_pca_matrix_filtered = scaled_pca_matrix_df.loc[celltypist_df.index]
            # Convert back to numpy array for compatibility with metrics
            #scaled_pca_matrix = scaled_pca_matrix_filtered.values
            silhouette[tool] = silhouette_score(scaled_pca_data, celltypist_df[f'cluster_{tool}'])
            Calinski_Harabasz[tool] =  calinski_harabasz_score(scaled_pca_data, celltypist_df[f'cluster_{tool}'])
            davies_bouldin[tool] = davies_bouldin_score(scaled_pca_data, celltypist_df[f'cluster_{tool}'])
            
            silhouette['celltypist'] = silhouette_score(scaled_pca_data, celltypist_df[f'cluster_celltypist'])
            Calinski_Harabasz['celltypist'] = calinski_harabasz_score(scaled_pca_data, celltypist_df[f'cluster_celltypist'])
            davies_bouldin['celltypist'] = davies_bouldin_score(scaled_pca_data, celltypist_df[f'cluster_celltypist'])
        elif tuning=='antibody':
            # Repeat similar steps for antibody_df
            #scaled_pca_matrix_df = pd.DataFrame(scaled_pca_matrix, index=adata.obs_names)
            #scaled_pca_matrix_filtered = scaled_pca_matrix_df.loc[antibody_df.index]
            #scaled_pca_matrix = scaled_pca_matrix_filtered.values
            silhouette[tool] = silhouette_score(scaled_pca_data, antibody_df[f'cluster_{tool}'])
            Calinski_Harabasz[tool] =  calinski_harabasz_score(scaled_pca_data, antibody_df[f'cluster_{tool}'])
            davies_bouldin[tool] = davies_bouldin_score(scaled_pca_data, antibody_df[f'cluster_{tool}'])
            
            silhouette['antibody'] = silhouette_score(scaled_pca_data, antibody_df[f'cluster_antibody'])
            Calinski_Harabasz['antibody'] = calinski_harabasz_score(scaled_pca_data, antibody_df[f'cluster_antibody'])
            davies_bouldin['antibody'] = davies_bouldin_score(scaled_pca_data, antibody_df[f'cluster_antibody'])

        else:
            silhouette[tool] = silhouette_score(scaled_pca_data, labels_df[f'cluster_{tool}'])
            Calinski_Harabasz[tool] =  calinski_harabasz_score(scaled_pca_data, labels_df[f'cluster_{tool}'])
            davies_bouldin[tool] = davies_bouldin_score(scaled_pca_data, labels_df[f'cluster_{tool}'])
        
    silhouette_df = pd.DataFrame(silhouette, index=[0])
    silhouette_df.to_csv(f'{DIR}{dataset}/{tuning}_silhouette_fromProb.csv')
    silhouette_df.to_latex(f'{DIR}{dataset}/{tuning}_silhouette_fromProb.tex')
    display(f'{dataset} - Silhuette from Prob. (higher is better)')
    display(silhouette_df)

    Calinski_Harabasz_df = pd.DataFrame(Calinski_Harabasz, index=[0])
    Calinski_Harabasz_df.to_csv(f'{DIR}{dataset}/{tuning}_Calinski_Harabasz_fromProb.csv')
    Calinski_Harabasz_df.to_latex(f'{DIR}{dataset}/{tuning}_Calinski_Harabasz_fromProb.tex')
    display(f'{dataset} - Calinski_Harabasz from Prob. (higher is better)')
    display(Calinski_Harabasz_df)

    davies_bouldin_df = pd.DataFrame(davies_bouldin, index=[0])
    davies_bouldin_df.to_csv(f'{DIR}{dataset}/{tuning}_davies_bouldin_fromProb.csv')
    davies_bouldin_df.to_latex(f'{DIR}{dataset}/{tuning}_davies_bouldin_fromProb.tex')
    display(f'{dataset} - davies_bouldin  from Prob. (lower is better)')
    display(davies_bouldin_df)

    display(f'{dataset} - matching {tuning} labels' if tuning != 'default' else f'{dataset} - default labels')

    # compute scores comparing each tool labels with celltypist labels
    if tuning == 'celltypist' or tuning == 'default':
        compute_scores(DIR, dataset, celltypist_df, tuning, 'celltypist')
        labels = []
        labels_titles = []
        for tool in TOOLS:
            labels.append(celltypist_df[f'cluster_{tool}'].to_list())
            labels_titles.append(tool)
        labels.append(celltypist_df[f'cluster_celltypist'].map(celltypist_mapping_df['go'].to_dict()).to_list())
        labels_titles.append('celltypist')
        title = f'{dataset} - matching {tuning} labels' if tuning != 'default' else f'{dataset} - default labels'
        sankey_plot(labels=labels, labels_titles=labels_titles, title=title, path=f'{DIR}{dataset}/{tuning}_celltypist.html')
    
    # compute scores comparing each tool labels with protein labels
    if tuning == 'antibody' or tuning == 'default':
        compute_scores(DIR, dataset, antibody_df, tuning, 'antibody')
        labels = []
        labels_titles = []
        for tool in TOOLS:
            labels.append(antibody_df[f'cluster_{tool}'].to_list())
            labels_titles.append(tool)
        labels.append(antibody_df[f'cluster_antibody'].map(antibody_mapping_df['go'].to_dict()).to_list())
        labels_titles.append('antibody')
        title = f'{dataset} - matching {tuning} labels' if tuning != 'default' else f'{dataset} - default labels'
        sankey_plot(labels=labels, labels_titles=labels_titles, title=title, path=f'{DIR}{dataset}/{tuning}_antibody.html')
def print_clustering_data(dataset,tuning):
    # concat tools labels
    labels_df = pd.read_csv(f'{DIR}{dataset}/COTAN/{tuning}/clustering_labels.csv', index_col=0)
    labels_df.rename(columns={"cluster": "cluster_COTAN"}, inplace=True)
    display(f'Initial COTAN cluster number:')
    display(labels_df.cluster_COTAN.unique().shape[0])
    #print("labels_df size")
    #print(labels_df.shape)
    for tool in [t for t in TOOLS if t != 'COTAN']:
        tool_labels_df = pd.read_csv(f'{DIR}{dataset}/{tool}/{tuning}/clustering_labels.csv', index_col=0)
        display(f'Initial {tool} cluster number:')
        display(labels_df[labels_df.columns[-1]].unique().shape[0])
        labels_df = labels_df.merge(tool_labels_df, how='inner', on='cell')
        labels_df.rename(columns={"cluster": f"cluster_{tool}"}, inplace=True)

    
    labels_df = drop_small_clusters(df = labels_df,min_size = min_size_cluster)
    #    print("labels_df size"+tool)
    #    print(labels_df.shape)
    
    if tuning == 'celltypist' or tuning == 'default':
    # load and concat celltypist labels
        celltypist_df = pd.read_csv(f'{DIR}{dataset}/celltypist/celltypist_labels.csv', index_col=0)
        celltypist_df.index = celltypist_df.index.str[:-2]
        celltypist_df = labels_df.merge(celltypist_df, how='inner', on='cell')
        celltypist_df.rename(columns={"cluster.ids": f"cluster_celltypist"}, inplace=True)
        celltypist_mapping_df = pd.read_csv(f'{DIR}{dataset}/celltypist/celltypist_mapping.csv', index_col=0)

        celltypist_df = drop_small_clusters(df = celltypist_df, min_size = min_size_cluster)
        
        #print("celltypist_df size")
        #print(celltypist_df.shape)
        labels_cluster_celltypist = np.unique(celltypist_df["cluster_celltypist"])
        for tool in TOOLS:
            labels_cluster_tool = np.unique(celltypist_df[f'cluster_{tool}'])
            cm =contingency_matrix(celltypist_df["cluster_celltypist"], celltypist_df[f'cluster_{tool}'])
            cm = pd.DataFrame(cm,index=labels_cluster_celltypist,columns=labels_cluster_tool)
            display(f'{dataset} - contingency_matrix (rows: cellTypist - cols: {tool})')
            display(cm)

    if tuning == 'antibody' or tuning == 'default':
        #load and concat protein surface labels
        antibody_df = pd.read_csv(f'{DIR}{dataset}/antibody_annotation/antibody_labels_postproc.csv', index_col=0)
        display("Initial antibody cell/cluster table:")
        display(antibody_df["cluster.ids"].value_counts())
        antibody_df = labels_df.merge(antibody_df, how='inner', on='cell')
        antibody_df.rename(columns={"cluster.ids": f"cluster_antibody"}, inplace=True)
        
        antibody_df = drop_small_clusters(df = antibody_df, min_size = min_size_cluster )
        
        antibody_mapping_df = pd.read_csv(f'{DIR}{dataset}/antibody_annotation/antibody_mapping.csv', index_col=1, encoding='latin1')
        labels_cluster_antybody = np.unique(antibody_df["cluster_antibody"])
        for tool in TOOLS:
            labels_cluster_tool = np.unique(antibody_df[f'cluster_{tool}'])
            cm =contingency_matrix(antibody_df["cluster_antibody"], antibody_df[f'cluster_{tool}'])
            cm = pd.DataFrame(cm,index=labels_cluster_antybody,columns=labels_cluster_tool)
            display(f'{dataset} - contingency_matrix (rows: antibody - cols: {tool})')
            display(cm)
        

Data summary information

Default parameters

print_clustering_data(tuning = 'default',dataset="PBMC1")
'Initial COTAN cluster number:'
23
'Initial monocle cluster number:'
1
'Initial scanpy cluster number:'
3
'Initial scvi-tools cluster number:'
18
'Initial seurat cluster number:'
13
'PBMC1 - contingency_matrix (rows: cellTypist - cols: monocle)'
1 2 3
1 8 970 1
2 943 0 0
3 47 0 0
4 0 78 0
5 309 0 0
6 0 0 142
7 82 0 0
8 278 0 1
9 81 0 0
10 0 171 0
11 70 0 0
12 240 0 0
13 0 28 0
14 0 0 155
'PBMC1 - contingency_matrix (rows: cellTypist - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 0 0 246 0 267 0 263 0 200 0 1 0 2 0 0 0 0 0
2 88 321 0 281 1 0 0 241 0 1 0 1 0 1 0 8 0 0
3 45 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0
4 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 76 0
5 250 5 0 0 0 0 0 2 0 0 0 52 0 0 0 0 0 0
6 0 0 0 0 0 0 0 0 0 0 4 0 138 0 0 0 0 0
7 8 0 0 0 0 0 0 0 0 19 0 37 0 18 0 0 0 0
8 0 0 0 0 0 263 0 0 0 16 0 0 0 0 0 0 0 0
9 2 2 0 5 0 0 0 5 0 0 0 0 0 1 0 66 0 0
10 0 0 75 0 0 0 5 0 0 0 0 0 0 0 91 0 0 0
11 0 0 0 0 0 5 0 0 0 65 0 0 0 0 0 0 0 0
12 6 0 0 0 0 0 0 4 0 82 0 59 0 86 0 3 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28
14 0 0 0 0 0 0 0 0 0 0 154 0 1 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13
1 659 0 0 0 289 0 1 0 0 30 0 0 0
2 0 485 48 402 0 2 0 0 0 0 0 6 0
3 0 1 41 5 0 0 0 0 0 0 0 0 0
4 0 0 0 0 6 0 0 0 0 72 0 0 0
5 0 5 288 12 0 4 0 0 0 0 0 0 0
6 0 0 0 0 0 0 1 2 139 0 0 0 0
7 0 0 58 0 0 23 1 0 0 0 0 0 0
8 0 0 0 0 0 0 279 0 0 0 0 0 0
9 0 4 0 2 0 2 0 0 0 0 0 73 0
10 1 0 0 0 78 0 0 0 0 0 92 0 0
11 0 0 0 0 0 67 3 0 0 0 0 0 0
12 0 1 48 1 0 189 0 0 0 0 0 1 0
13 0 0 0 0 0 0 0 0 0 0 0 0 28
14 0 0 0 0 0 1 0 147 7 0 0 0 0
'PBMC1 - contingency_matrix (rows: cellTypist - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11
1 0 0 616 361 1 0 0 0 1 0 0
2 798 145 0 0 0 0 0 0 0 0 0
3 1 46 0 0 0 0 0 0 0 0 0
4 0 0 0 4 0 0 0 0 0 74 0
5 0 309 0 0 0 0 0 0 0 0 0
6 0 0 0 0 0 0 0 142 0 0 0
7 0 55 0 0 0 27 0 0 0 0 0
8 0 0 0 0 274 5 0 0 0 0 0
9 78 3 0 0 0 0 0 0 0 0 0
10 0 0 0 43 0 0 0 0 128 0 0
11 0 0 0 0 5 65 0 0 0 0 0
12 7 69 0 0 0 164 0 0 0 0 0
13 0 0 0 0 0 0 0 0 0 1 27
14 0 0 0 0 0 0 153 2 0 0 0
'PBMC1 - contingency_matrix (rows: cellTypist - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 ... 14 15 16 17 18 19 20 21 22 23
1 35 0 0 175 97 75 389 207 0 0 ... 0 0 0 0 0 1 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 ... 20 24 56 29 0 0 0 0 3 0
3 0 0 0 0 0 0 0 0 0 0 ... 0 1 9 1 0 0 0 6 7 21
4 0 73 0 0 0 0 0 5 0 0 ... 0 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0 0 0 ... 7 8 290 3 0 0 0 0 0 1
6 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
7 0 0 0 0 0 0 0 0 0 0 ... 0 0 56 0 25 1 0 0 0 0
8 0 0 0 0 0 0 0 0 0 0 ... 0 3 1 0 4 115 104 47 5 0
9 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 0 0 0 0 0
10 0 1 122 0 0 2 3 42 0 0 ... 0 0 0 0 0 0 0 0 0 0
11 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 67 0 2 1 0 0
12 0 0 0 0 0 0 0 0 0 0 ... 0 5 50 1 175 0 0 1 0 0
13 0 1 0 0 0 0 0 0 27 0 ... 0 0 0 0 0 0 0 0 0 0
14 0 0 0 0 0 0 0 0 0 152 ... 0 0 0 0 0 0 0 0 0 0

14 rows × 23 columns

'Initial antibody cell/cluster table:'
cluster.ids
7     1338
8      876
3      748
4      341
9      331
5      211
1      202
6      131
2       62
10      51
12      16
Name: count, dtype: int64
'PBMC1 - contingency_matrix (rows: antibody - cols: monocle)'
1 2 3
1 161 0 0
2 0 43 3
3 600 0 0
4 262 1 0
5 158 0 0
6 1 86 0
7 10 1115 0
8 812 1 1
9 1 0 294
10 44 0 0
12 10 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 7 6 0 4 0 0 0 9 0 23 0 7 0 32 0 73 0 0
2 0 0 3 0 0 0 0 0 0 0 2 0 3 0 38 0 0 0
3 366 33 0 5 0 0 0 28 0 43 0 122 0 3 0 0 0 0
4 0 0 0 0 1 249 0 0 0 13 0 0 0 0 0 0 0 0
5 9 0 0 0 0 0 0 2 0 64 0 15 0 68 0 0 0 0
6 1 3 0 0 0 0 0 0 0 0 0 0 0 0 1 0 56 26
7 1 0 319 0 267 0 267 1 198 0 0 0 0 0 52 0 19 1
8 18 288 0 277 0 1 0 214 0 3 1 5 0 2 0 4 0 1
9 0 0 0 0 0 1 0 0 0 1 156 0 137 0 0 0 0 0
10 0 0 0 0 0 7 0 0 0 36 0 0 0 1 0 0 0 0
12 0 0 0 0 0 10 0 0 0 0 0 0 0 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13
1 0 6 12 5 0 62 0 0 0 0 0 76 0
2 2 0 0 0 3 0 0 1 1 0 39 0 0
3 0 49 441 66 0 44 0 0 0 0 0 0 0
4 1 0 0 0 0 0 262 0 0 0 0 0 0
5 0 2 12 1 0 141 0 0 0 0 0 2 0
6 0 1 0 0 0 3 0 0 0 56 1 0 26
7 655 3 0 0 368 0 1 0 0 45 52 0 1
8 0 438 17 349 0 4 3 0 0 0 0 2 1
9 0 0 0 0 0 0 3 148 144 0 0 0 0
10 0 0 1 0 0 37 6 0 0 0 0 0 0
12 0 0 0 0 0 0 10 0 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11
1 93 17 0 0 0 51 0 0 0 0 0
2 0 0 1 4 0 0 1 1 39 0 0
3 23 540 0 0 0 37 0 0 0 0 0
4 0 0 1 0 260 2 0 0 0 0 0
5 1 28 0 0 0 129 0 0 0 0 0
6 3 1 0 1 0 0 0 0 1 56 25
7 2 1 611 402 1 0 0 0 89 18 1
8 766 41 0 0 1 4 1 0 0 0 1
9 0 0 0 0 2 0 151 142 0 0 0
10 0 0 0 0 8 36 0 0 0 0 0
12 0 0 0 0 8 2 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 ... 14 15 16 17 18 19 20 21 22 23
1 0 0 0 0 0 0 0 0 0 0 ... 0 2 10 0 54 0 0 1 0 0
2 1 1 38 0 0 1 1 2 0 1 ... 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 0 ... 26 23 417 25 39 0 0 6 10 22
4 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 113 99 45 4 0
5 0 0 0 0 0 0 0 0 0 0 ... 0 1 20 0 135 0 0 0 0 0
6 0 56 1 1 0 0 0 1 25 0 ... 1 1 0 1 0 0 0 0 0 0
7 33 17 83 175 96 76 389 251 1 0 ... 0 1 0 0 0 1 0 0 0 0
8 0 0 0 0 0 0 0 0 1 1 ... 1 12 14 9 4 0 0 0 0 0
9 0 0 0 0 0 0 0 0 0 150 ... 0 0 0 0 0 1 0 1 0 0
10 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 38 0 3 2 1 0
12 0 0 0 0 0 0 0 0 0 0 ... 0 2 1 0 1 2 4 0 0 0

11 rows × 23 columns

print_clustering_data(tuning = 'default',dataset="PBMC2")
'Initial COTAN cluster number:'
31
'Initial monocle cluster number:'
1
'Initial scanpy cluster number:'
2
'Initial scvi-tools cluster number:'
18
'Initial seurat cluster number:'
20
'PBMC2 - contingency_matrix (rows: cellTypist - cols: monocle)'
1 2
1 230 1
2 427 0
3 2139 3
4 700 7
5 316 0
6 0 93
7 0 567
8 674 0
9 0 186
10 52 0
11 0 228
12 0 204
13 0 48
14 0 14
15 80 0
'PBMC2 - contingency_matrix (rows: cellTypist - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 0 0 0 2 0 0 1 22 1 2 5 50 0 148 0 0 0 0
2 0 91 0 273 0 0 0 3 2 0 56 1 0 1 0 0 0 0
3 942 508 21 183 0 0 0 21 295 2 100 42 0 26 1 0 0 1
4 0 0 0 1 0 463 8 2 0 230 0 0 0 2 1 0 0 0
5 0 0 0 2 0 0 0 266 0 0 42 0 0 6 0 0 0 0
6 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 88 0 0
7 0 0 0 0 466 0 0 0 0 0 0 0 15 0 86 0 0 0
8 2 1 558 0 0 0 0 3 0 0 0 110 0 0 0 0 0 0
9 0 0 0 0 12 0 0 0 0 0 0 0 174 0 0 0 0 0
10 0 0 0 0 0 0 0 0 0 50 2 0 0 0 0 0 0 0
11 0 0 0 0 0 0 228 0 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 204 0 0 0 0 0 0 0 0 0 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 48 0
14 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 13
15 0 25 0 46 0 0 0 1 2 1 2 2 0 1 0 0 0 0
'PBMC2 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 0 0 0 0 0 0 222 1 2 0 0 4 1 1 0 0 0 0 0 0
2 331 4 1 0 0 0 7 10 53 0 0 1 0 10 4 0 6 0 0 0
3 391 733 2 1 19 1 41 304 158 185 0 2 43 90 75 0 71 0 0 26
4 1 0 675 1 0 8 3 0 0 0 0 0 0 0 0 0 0 19 0 0
5 7 0 1 0 0 0 11 9 66 0 0 151 71 0 0 0 0 0 0 0
6 0 0 0 12 0 0 0 0 0 0 0 0 0 0 0 78 0 0 3 0
7 0 0 0 564 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0
8 0 2 0 1 561 0 103 1 0 2 0 1 3 0 0 0 0 0 0 0
9 0 0 0 11 0 0 0 0 0 0 175 0 0 0 0 0 0 0 0 0
10 2 0 4 0 0 0 0 0 0 0 0 0 2 0 0 0 0 44 0 0
11 0 0 0 0 0 228 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 204 0 0 0 0 0 0 0 0 0 0 0 0 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 48 0
14 0 0 0 0 0 0 0 0 0 14 0 0 0 0 0 0 0 0 0 0
15 58 0 1 0 0 0 5 5 1 5 0 0 1 3 0 0 1 0 0 0
'PBMC2 - contingency_matrix (rows: cellTypist - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14
1 0 4 6 0 0 0 219 0 1 0 0 1 0 0
2 0 400 23 0 0 0 1 0 3 0 0 0 0 0
3 1151 352 550 0 2 15 69 1 0 0 0 2 0 0
4 0 0 2 635 0 0 3 7 0 0 1 59 0 0
5 0 42 0 0 0 0 7 0 267 0 0 0 0 0
6 0 0 0 0 11 0 0 0 0 0 0 0 82 0
7 0 0 0 0 567 0 0 0 0 0 0 0 0 0
8 8 4 2 0 0 541 119 0 0 0 0 0 0 0
9 0 0 0 0 14 0 0 0 0 172 0 0 0 0
10 0 0 0 3 0 0 0 0 0 0 0 49 0 0
11 0 0 0 0 0 0 0 83 0 0 145 0 0 0
12 0 0 0 0 0 0 0 201 1 0 2 0 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 48
14 0 0 0 0 0 0 0 0 0 0 0 14 0 0
15 0 0 77 0 0 0 2 0 0 0 0 1 0 0
'PBMC2 - contingency_matrix (rows: cellTypist - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 ... 22 23 24 25 26 27 28 29 30 31
1 0 0 0 0 0 0 1 25 54 3 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 1 0 2 ... 0 0 0 0 0 0 0 0 0 0
3 2 0 0 0 1172 14 56 7 10 3 ... 0 0 0 0 0 0 0 0 0 1
4 56 141 383 115 0 0 0 0 0 0 ... 0 0 0 0 0 2 1 0 2 2
5 0 0 0 0 0 0 1 1 4 270 ... 0 0 0 0 0 0 0 0 0 0
6 0 0 0 0 0 0 0 0 0 0 ... 0 0 7 0 0 0 0 0 0 0
7 0 0 0 0 0 0 0 0 1 0 ... 36 111 211 150 0 0 0 0 0 0
8 0 0 0 0 6 562 2 78 17 0 ... 0 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 0 0 0 ... 0 1 10 0 0 0 0 0 0 1
10 51 0 0 1 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
11 0 0 0 0 0 0 0 1 0 0 ... 0 0 0 0 0 1 0 141 72 13
12 0 0 0 0 0 0 0 0 0 1 ... 0 0 0 0 0 8 47 2 50 96
13 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 48 0 0 0 0 0
14 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 10 0 0 0 3
15 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

15 rows × 31 columns

'Initial antibody cell/cluster table:'
cluster.ids
4     1510
11    1130
8      695
12     570
6      424
13     275
5      197
2      150
10     122
3       84
7       76
Name: count, dtype: int64
'PBMC2 - contingency_matrix (rows: antibody - cols: monocle)'
1 2
2 0 145
3 60 19
4 1480 5
5 196 0
6 416 1
7 68 5
8 680 3
10 0 115
11 1115 7
12 566 2
13 0 262
'PBMC2 - contingency_matrix (rows: antibody - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 16
2 0 0 0 0 4 0 0 0 0 0 0 0 141 0 0
3 0 1 1 0 0 3 0 0 0 54 0 0 18 2 0
4 89 588 0 478 0 0 0 56 63 0 194 10 0 3 4
5 7 1 14 2 0 0 0 118 2 2 4 23 0 23 0
6 0 2 9 13 0 0 1 128 1 0 3 124 0 136 0
7 1 3 27 3 0 0 0 6 1 0 1 23 0 3 5
8 0 0 0 0 0 459 2 0 0 220 0 0 2 0 0
10 0 0 0 0 106 0 0 0 0 0 0 0 3 0 6
11 843 26 5 5 0 0 5 4 229 0 1 2 0 0 2
12 2 0 522 0 0 0 2 1 1 2 0 21 0 17 0
13 0 0 0 0 0 0 262 0 0 0 0 0 0 0 0
'PBMC2 - contingency_matrix (rows: antibody - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 20
2 0 0 0 3 0 0 0 0 0 0 142 0 0 0 0 0 0 0 0
3 0 0 55 0 1 0 0 1 0 0 18 0 0 0 0 0 0 4 0
4 758 143 0 0 0 0 30 125 259 51 0 5 4 54 26 4 23 0 3
5 4 8 1 0 5 0 27 4 6 0 0 36 105 0 0 0 0 0 0
6 4 0 0 0 17 0 270 2 9 0 0 112 3 0 0 0 0 0 0
7 0 1 0 1 31 0 27 2 1 1 0 5 0 0 0 4 0 0 0
8 0 0 622 0 0 2 0 0 0 0 2 0 1 0 0 0 0 56 0
10 0 0 0 107 0 0 0 0 0 0 4 0 0 0 0 4 0 0 0
11 14 581 0 0 5 5 2 191 5 138 0 0 1 50 51 2 54 0 23
12 0 2 1 0 520 2 34 1 0 1 0 0 6 0 0 0 1 0 0
13 0 0 0 0 0 262 0 0 0 0 0 0 0 0 0 0 0 0 0
'PBMC2 - contingency_matrix (rows: antibody - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11 12 13
2 0 0 0 0 6 0 0 0 0 139 0 0 0
3 0 0 1 9 1 1 1 0 0 18 0 48 0
4 65 756 607 0 0 0 10 0 42 0 0 1 4
5 6 6 2 0 0 12 65 0 104 0 0 1 0
6 0 22 3 0 0 7 267 0 118 0 0 0 0
7 2 7 5 0 0 21 30 0 3 0 0 0 5
8 0 0 0 627 0 0 0 2 0 2 0 52 0
10 0 0 0 0 109 0 0 0 0 3 0 0 3
11 1073 4 35 0 0 1 1 4 1 0 1 0 2
12 9 0 0 0 0 512 44 1 0 0 1 1 0
13 0 0 0 0 0 0 0 159 1 0 102 0 0
'PBMC2 - contingency_matrix (rows: antibody - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 ... 19 20 21 23 24 25 28 29 30 31
2 0 0 0 0 0 0 0 0 0 0 ... 4 136 0 1 4 0 0 0 0 0
3 49 0 4 4 0 1 0 0 0 0 ... 17 1 0 0 0 0 0 0 0 0
4 0 0 0 0 76 0 1 5 6 44 ... 0 0 4 0 0 0 0 0 0 0
5 0 0 0 0 9 10 37 3 4 105 ... 0 0 0 0 0 0 0 0 0 0
6 0 0 0 0 0 11 11 81 64 120 ... 0 0 0 0 0 0 0 0 0 0
7 0 0 0 0 1 31 0 14 1 4 ... 0 0 5 0 0 0 0 0 0 0
8 50 140 378 112 0 0 0 0 0 0 ... 2 0 0 0 0 0 1 0 0 0
10 0 0 0 0 0 0 0 0 0 0 ... 0 3 5 26 59 22 0 0 0 0
11 0 0 0 0 1081 1 0 0 0 1 ... 0 0 2 0 0 0 1 1 2 1
12 1 0 0 0 6 520 11 9 9 1 ... 0 0 0 0 0 0 0 0 1 0
13 0 0 0 0 0 0 0 0 0 1 ... 0 0 0 0 0 0 31 102 59 69

11 rows × 26 columns

print_clustering_data(tuning = 'default',dataset="PBMC3")
'Initial COTAN cluster number:'
57
'Initial monocle cluster number:'
1
'Initial scanpy cluster number:'
3
'Initial scvi-tools cluster number:'
22
'Initial seurat cluster number:'
17
'PBMC3 - contingency_matrix (rows: cellTypist - cols: monocle)'
1 2 3
1 3021 0 0
2 1 1471 0
3 6 1 655
4 1100 0 0
5 1183 26 33
6 0 156 0
7 1112 1 0
8 484 0 0
9 0 0 396
10 0 408 0
11 430 0 0
13 233 0 0
14 111 0 0
15 4 16 0
16 0 11 0
18 0 57 0
19 0 12 0
'PBMC3 - contingency_matrix (rows: cellTypist - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 ... 13 14 15 16 17 18 19 20 21 22
1 1401 0 153 0 0 29 26 534 121 429 ... 1 0 100 0 0 0 0 0 28 0
2 0 0 0 0 816 0 0 0 0 0 ... 0 2 0 227 0 5 0 35 0 0
3 0 0 0 543 0 0 0 0 5 0 ... 0 0 0 0 111 0 0 2 0 0
4 26 0 0 0 0 806 12 5 29 6 ... 0 0 155 0 0 0 0 0 1 0
5 0 961 0 0 0 0 3 0 0 0 ... 216 0 0 29 33 0 0 0 0 0
6 0 0 0 0 8 0 0 0 0 0 ... 0 0 0 0 0 147 0 0 0 0
7 0 0 683 0 0 0 7 128 236 15 ... 0 0 6 1 0 0 0 0 0 0
8 0 0 0 0 0 0 423 0 32 0 ... 4 0 6 0 0 0 0 0 0 0
9 0 0 0 395 0 0 0 0 0 0 ... 0 0 0 0 1 0 0 0 0 0
10 0 0 0 0 90 0 0 0 0 0 ... 0 311 0 1 0 1 0 1 0 0
11 0 2 0 0 0 0 281 2 54 0 ... 4 0 42 0 0 0 0 0 0 0
13 12 0 116 0 0 0 1 52 9 11 ... 0 0 1 0 0 0 0 0 0 0
14 0 5 1 0 0 0 5 0 3 0 ... 97 0 0 0 0 0 0 0 0 0
15 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 20
16 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 11 0 0 0 0 0 0
18 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 57 0 0 0
19 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 12 0 0 0 0 0

17 rows × 22 columns

'PBMC3 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
1 740 1603 2 1 1 40 86 23 248 1 197 79 0 0 0 0 0
2 0 0 1460 0 1 0 0 0 0 3 0 0 8 0 0 0 0
3 0 0 4 2 655 0 1 0 0 0 0 0 0 0 0 0 0
4 6 11 0 1 1 912 162 2 1 0 4 0 0 0 0 0 0
5 0 0 29 1165 31 0 7 1 0 0 0 0 0 8 0 1 0
6 0 0 10 0 0 0 0 0 0 0 0 0 146 0 0 0 0
7 883 14 1 0 0 1 25 29 80 0 70 10 0 0 0 0 0
8 24 0 0 6 0 1 5 313 0 0 0 135 0 0 0 0 0
9 0 0 0 0 396 0 0 0 0 0 0 0 0 0 0 0 0
10 0 0 97 0 0 0 0 0 0 311 0 0 0 0 0 0 0
11 10 0 0 2 0 2 408 2 0 0 1 4 0 1 0 0 0
13 174 26 0 0 0 0 3 0 25 0 5 0 0 0 0 0 0
14 0 0 1 19 0 0 1 0 0 0 0 0 0 90 0 0 0
15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 20 0
16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11
18 0 0 1 0 1 0 0 0 0 0 0 0 0 0 55 0 0
19 0 0 0 0 0 12 0 0 0 0 0 0 0 0 0 0 0
'PBMC3 - contingency_matrix (rows: cellTypist - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 1740 415 0 0 681 20 162 0 3 0 0 0 0 0 0 0 0 0
2 0 0 1013 0 0 0 0 0 0 0 0 227 0 229 3 0 0 0
3 0 0 0 0 0 0 2 535 0 2 0 1 2 0 0 120 0 0
4 16 6 0 0 3 886 188 0 0 0 0 0 0 0 0 1 0 0
5 0 1 4 1043 0 0 2 1 1 0 0 24 136 0 0 30 0 0
6 0 0 7 0 0 0 0 0 0 0 0 0 0 0 149 0 0 0
7 0 980 0 0 108 0 18 0 7 0 0 0 0 0 0 0 0 0
8 0 18 0 3 0 0 8 0 454 0 0 0 1 0 0 0 0 0
9 0 0 0 0 0 0 0 57 0 336 0 0 0 0 0 3 0 0
10 0 0 84 0 0 0 0 0 0 0 319 1 0 4 0 0 0 0
11 0 14 0 0 0 2 411 0 3 0 0 0 0 0 0 0 0 0
13 13 5 0 0 208 0 7 0 0 0 0 0 0 0 0 0 0 0
14 0 0 0 2 0 0 0 0 0 0 0 0 109 0 0 0 0 0
15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 20
16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11
18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 52 0
19 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12
'PBMC3 - contingency_matrix (rows: cellTypist - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 ... 48 49 50 51 52 53 54 55 56 57
1 8 131 1674 5 9 2 47 40 87 291 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 1 0 0 0 0 0 0
3 0 1 0 0 0 0 0 1 1 0 ... 32 25 94 108 268 104 2 7 15 0
4 40 52 10 428 380 0 0 2 7 5 ... 0 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 1 0 0 ... 0 3 2 0 0 25 0 0 0 0
6 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
7 0 0 0 0 0 4 4 384 571 53 ... 0 0 0 0 0 0 0 0 0 0
8 0 0 0 0 0 0 0 0 16 1 ... 0 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 0 0 0 ... 1 0 0 0 34 1 217 142 1 0
10 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
11 1 3 0 0 1 0 0 0 4 11 ... 0 0 0 0 0 0 0 0 0 0
13 0 2 5 0 0 137 69 4 1 1 ... 0 0 0 0 0 0 0 0 0 0
14 0 0 0 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
15 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 16 0
16 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
18 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 52
19 0 0 0 0 0 0 0 0 0 0 ... 1 1 0 0 0 0 0 0 8 0

17 rows × 57 columns

'Initial antibody cell/cluster table:'
cluster.ids
9     2220
10    1635
7     1271
13    1067
5     1010
12     909
6      744
2      271
4      214
14     168
3      149
23     133
22      71
Name: count, dtype: int64
'PBMC3 - contingency_matrix (rows: antibody - cols: monocle)'
1 2 3
2 2 264 0
3 132 12 3
4 209 2 1
5 993 11 1
6 730 0 0
7 4 1223 0
9 2201 9 1
10 1614 8 3
12 902 2 1
13 2 41 1002
14 140 20 1
22 69 0 0
23 130 0 0
'PBMC3 - contingency_matrix (rows: antibody - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 20 21
2 0 1 0 0 17 0 0 0 0 0 0 0 1 223 0 24 0 0 0 0
3 0 14 2 0 0 1 5 0 3 0 0 1 106 12 0 0 3 0 0 0
4 4 0 5 0 0 50 25 10 28 15 0 13 1 0 57 0 1 2 0 1
5 0 911 0 0 0 0 2 0 0 0 0 0 80 10 0 1 1 0 0 0
6 1 0 8 0 0 9 422 11 75 1 0 48 3 0 151 1 0 0 0 0
7 0 0 1 0 735 0 0 1 0 0 275 0 0 20 0 167 0 28 0 0
9 92 0 808 1 0 1 26 593 323 196 0 136 0 3 19 1 0 5 0 7
10 1252 2 4 0 0 15 3 18 4 205 0 82 3 1 5 3 4 4 0 20
12 25 1 0 0 1 737 32 0 8 2 0 52 5 0 40 1 1 0 0 0
13 0 0 0 873 1 0 1 0 1 0 1 1 0 6 0 1 128 2 30 0
14 0 17 0 0 0 0 3 0 2 1 0 0 116 20 0 1 1 0 0 0
22 10 0 9 0 0 0 1 18 6 19 0 4 0 0 1 0 0 0 0 1
23 0 0 90 0 0 0 0 18 1 2 0 19 0 0 0 0 0 0 0 0
'PBMC3 - contingency_matrix (rows: antibody - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14
2 0 0 41 2 0 0 0 0 0 223 0 0 0 0
3 3 0 1 48 2 0 6 1 0 12 0 0 0 74
4 18 8 1 0 2 76 80 18 0 0 7 0 2 0
5 0 0 1 979 1 0 0 0 0 10 0 0 0 14
6 14 1 1 1 0 23 457 221 0 0 6 3 0 3
7 2 0 1181 0 0 0 0 0 0 18 0 0 26 0
9 1524 275 1 0 2 1 26 47 171 3 134 22 5 0
10 52 1281 1 6 4 13 4 2 149 2 101 4 6 0
12 0 5 2 4 1 816 64 5 2 0 1 5 0 0
13 0 0 31 1 994 0 1 1 8 7 0 0 2 0
14 0 0 1 129 1 0 0 3 0 20 0 0 0 7
22 39 14 0 0 0 0 0 4 4 0 6 2 0 0
23 111 2 0 0 0 0 0 0 13 0 4 0 0 0
'PBMC3 - contingency_matrix (rows: antibody - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
2 0 0 18 1 0 0 0 0 0 0 226 20 1 0 0 0
3 0 3 0 25 0 1 1 0 2 0 12 0 101 0 0 2
4 11 28 0 0 12 48 97 0 12 0 0 0 1 0 2 1
5 0 0 0 979 0 0 0 0 0 0 10 1 14 0 0 1
6 1 22 0 0 6 16 461 0 221 0 0 1 2 0 0 0
7 0 2 873 0 0 0 0 0 0 0 22 139 0 164 27 0
9 129 1302 0 0 716 0 31 1 23 0 3 1 0 0 5 0
10 1543 8 1 2 46 3 6 0 2 0 0 3 3 0 4 4
12 6 0 1 0 3 816 69 0 4 0 0 1 4 0 0 1
13 0 0 1 1 0 0 0 549 1 327 7 30 0 1 2 126
14 0 1 0 14 1 0 0 0 2 0 20 1 121 0 0 1
22 12 19 0 0 34 0 3 0 1 0 0 0 0 0 0 0
23 0 4 0 0 126 0 0 0 0 0 0 0 0 0 0 0
'PBMC3 - contingency_matrix (rows: antibody - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 ... 47 48 49 50 51 52 53 54 55 56
2 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 1 0 0 0 1 3 ... 0 0 1 0 0 0 1 0 0 0
4 1 5 14 46 8 1 4 10 16 11 ... 0 0 0 0 0 0 1 0 0 0
5 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
6 2 5 2 7 2 1 0 14 9 6 ... 0 0 0 0 0 0 0 0 0 0
7 0 0 0 0 0 0 0 0 1 1 ... 255 0 0 0 0 0 0 0 0 0
9 0 25 135 0 0 28 69 384 639 293 ... 0 0 0 0 0 1 0 0 0 0
10 2 89 1461 2 1 0 19 6 2 1 ... 0 0 0 0 0 0 3 0 0 0
12 33 47 1 373 368 0 0 0 1 0 ... 0 0 0 0 0 0 1 0 0 0
13 0 1 0 0 0 0 0 0 1 0 ... 0 36 26 90 98 276 104 212 144 14
14 0 0 0 0 0 0 0 1 0 0 ... 0 0 0 0 0 0 1 0 0 0
22 0 0 18 0 0 1 3 5 6 3 ... 0 0 0 0 0 0 0 0 0 0
23 0 0 0 0 0 108 14 4 0 3 ... 0 0 0 0 0 0 0 0 0 0

13 rows × 54 columns

print_clustering_data(tuning = 'default',dataset="PBMC4")
'Initial COTAN cluster number:'
34
'Initial monocle cluster number:'
1
'Initial scanpy cluster number:'
3
'Initial scvi-tools cluster number:'
22
'Initial seurat cluster number:'
16
'PBMC4 - contingency_matrix (rows: cellTypist - cols: monocle)'
1 2 3
1 407 0 0
2 11 0 797
3 1330 1 0
4 108 0 0
5 9 2178 13
6 308 0 0
7 77 0 0
8 538 0 0
9 358 1 0
10 0 307 0
11 1 1 222
12 0 28 0
13 8 3 2
14 106 0 0
15 0 92 1
16 0 59 0
'PBMC4 - contingency_matrix (rows: cellTypist - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 ... 13 14 15 16 17 18 19 20 21 22
1 10 0 0 0 0 0 0 384 1 0 ... 0 6 0 0 0 1 5 0 0 0
2 0 673 0 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 34
3 300 0 0 0 496 385 0 1 0 0 ... 136 10 3 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 6 10 0 ... 0 7 0 0 0 0 85 0 0 0
5 1 0 596 456 0 0 427 0 0 281 ... 0 0 0 169 76 145 0 48 0 0
6 7 0 0 0 4 3 0 0 0 0 ... 8 99 187 0 0 0 0 0 0 0
7 3 0 0 0 0 0 0 1 0 0 ... 0 67 0 0 0 0 6 0 0 0
8 462 0 0 0 8 46 0 1 0 0 ... 19 2 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 5 348 0 ... 1 0 0 0 0 0 5 0 0 0
10 0 0 0 74 0 0 1 0 0 2 ... 0 0 0 10 2 1 0 1 0 0
11 0 46 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 0 0 0 0 5
12 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 28 0 0
13 7 0 0 0 0 0 0 0 0 0 ... 6 0 0 0 0 0 0 0 0 0
14 42 0 0 0 1 13 0 1 0 0 ... 47 2 0 0 0 0 0 0 0 0
15 0 0 0 0 0 0 0 0 0 1 ... 0 0 0 2 88 2 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 59 0

16 rows × 22 columns

'PBMC4 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
1 0 0 4 1 0 0 1 0 315 0 0 0 86 0 0 0
2 800 0 2 1 0 0 2 0 1 0 1 0 1 0 0 0
3 0 939 357 2 0 0 20 0 1 0 0 0 0 0 0 12
4 0 0 0 0 0 0 105 2 0 0 0 0 1 0 0 0
5 4 0 1 805 746 413 0 4 2 1 167 15 1 41 0 0
6 0 8 8 0 0 0 291 0 1 0 0 0 0 0 0 0
7 0 0 3 0 0 0 67 1 6 0 0 0 0 0 0 0
8 0 31 483 0 0 0 3 0 6 0 0 0 0 0 0 15
9 0 0 1 2 0 0 0 355 0 0 0 0 1 0 0 0
10 0 0 0 14 0 94 1 0 0 194 4 0 0 0 0 0
11 217 0 0 0 0 0 0 0 0 0 7 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 0 0 0 0 28 0 0
13 0 1 1 0 0 0 0 6 0 0 0 0 0 0 5 0
14 1 9 89 0 0 0 4 0 1 0 0 0 0 0 0 2
15 0 0 0 2 0 3 0 0 0 0 1 87 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0 0 1 0 0 58 0
'PBMC4 - contingency_matrix (rows: cellTypist - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
1 5 0 0 0 0 0 398 0 0 4 0 0 0 0 0 0 0 0 0
2 0 0 0 0 632 0 1 0 122 1 0 0 0 0 0 0 0 52 0
3 356 854 0 0 0 0 0 0 0 2 116 0 3 0 0 0 0 0 0
4 0 0 0 0 0 0 0 1 0 107 0 0 0 0 0 0 0 0 0
5 2 0 780 723 0 447 2 0 2 0 1 1 0 136 28 35 0 0 43
6 31 15 0 0 0 0 0 0 0 68 5 0 189 0 0 0 0 0 0
7 3 0 0 0 0 0 3 0 0 69 2 0 0 0 0 0 0 0 0
8 525 0 0 0 0 0 1 0 0 2 10 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 3 354 1 0 1 0 0 0 0 0 0 0 0
10 0 0 0 7 0 86 0 0 0 0 0 213 0 1 0 0 0 0 0
11 0 0 0 0 1 0 0 0 223 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28 0 0 0
13 10 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 0
14 4 0 0 0 0 0 0 0 0 1 101 0 0 0 0 0 0 0 0
15 0 0 0 1 0 1 0 0 0 0 0 0 0 0 90 0 0 0 1
16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 59 0 0
'PBMC4 - contingency_matrix (rows: cellTypist - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 ... 25 26 27 28 29 30 31 32 33 34
1 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 3 1
2 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
3 0 0 0 0 0 0 0 0 0 0 ... 1 0 0 0 41 4 958 5 321 1
4 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 3
5 40 131 59 247 199 293 724 448 1 27 ... 0 0 0 0 0 0 0 0 1 0
6 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 4 0 7 202 26 69
7 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 2 0 0 0 2 66
8 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 7 0 528 2
9 0 0 0 0 0 0 0 0 0 0 ... 55 36 118 147 0 1 0 0 1 0
10 0 1 65 18 0 2 6 0 215 0 ... 0 0 0 0 0 0 0 0 0 0
11 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
13 0 0 0 0 0 0 0 0 0 1 ... 8 0 0 0 0 0 1 0 1 0
14 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 30 63 1 0 11 1
15 1 0 0 1 0 0 1 0 0 90 ... 0 0 0 0 0 0 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

16 rows × 34 columns

'Initial antibody cell/cluster table:'
cluster.ids
3     2280
1     1367
10    1018
9      488
2      351
14     348
4      242
5      224
24     194
26      64
22      43
12      41
Name: count, dtype: int64
'PBMC4 - contingency_matrix (rows: antibody - cols: monocle)'
1 2 3
1 1341 4 0
2 334 3 0
3 8 2153 0
4 241 0 0
5 16 195 0
9 473 1 0
10 12 39 931
12 38 0 0
14 343 1 0
22 42 0 0
24 192 0 0
26 63 0 0
'PBMC4 - contingency_matrix (rows: antibody - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 ... 12 13 14 15 16 17 18 19 20 22
1 744 0 0 0 195 247 0 7 1 0 ... 0 139 6 0 0 3 1 2 0 0
2 10 0 0 0 0 0 0 4 317 0 ... 0 0 0 0 0 3 0 3 0 0
3 1 0 573 514 1 0 405 0 0 273 ... 29 2 0 0 178 26 118 0 41 0
4 3 0 0 0 3 2 0 1 2 0 ... 0 8 30 174 0 0 0 18 0 0
5 0 0 1 0 0 0 1 0 14 0 ... 178 0 0 0 0 3 11 2 1 0
9 10 0 0 0 1 3 0 283 0 0 ... 0 2 130 8 0 1 0 36 0 0
10 1 657 1 0 0 0 1 0 5 1 ... 1 0 0 0 0 40 0 0 3 37
12 0 0 0 0 0 0 0 2 3 0 ... 0 0 2 0 0 0 0 31 0 0
14 10 0 0 0 205 105 0 0 0 0 ... 0 23 0 0 0 1 0 0 0 0
22 6 0 0 0 2 2 0 2 0 0 ... 0 0 24 6 0 0 0 0 0 0
24 9 0 0 0 97 75 0 0 1 0 ... 0 9 0 1 0 0 0 0 0 0
26 27 0 0 0 1 12 0 1 0 0 ... 0 22 0 0 0 0 0 0 0 0

12 rows × 21 columns

'PBMC4 - contingency_matrix (rows: antibody - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 14 16
1 0 454 839 0 1 1 8 1 12 0 0 2 0 27
2 0 1 4 1 0 0 0 328 0 0 0 3 0 0
3 0 2 0 719 730 476 1 0 2 15 168 14 34 0
4 0 4 2 0 0 0 230 3 2 0 0 0 0 0
5 0 0 0 10 1 7 0 19 0 170 0 3 1 0
9 0 2 8 0 0 0 175 0 288 0 0 1 0 0
10 921 0 3 36 4 1 0 5 0 2 8 0 2 0
12 0 0 0 0 0 0 38 0 0 0 0 0 0 0
14 0 324 17 0 0 0 0 0 0 0 0 1 0 2
22 0 3 4 0 0 0 27 0 8 0 0 0 0 0
24 0 185 4 0 0 0 2 1 0 0 0 0 0 0
26 1 4 58 0 0 0 0 0 0 0 0 0 0 0
'PBMC4 - contingency_matrix (rows: antibody - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 18 19
1 852 323 0 0 0 0 9 1 0 5 151 0 0 1 3 0 0 0
2 9 0 0 0 0 0 2 323 0 0 0 0 0 0 3 0 0 0
3 4 1 750 693 0 519 1 0 0 0 1 25 0 112 26 29 0 0
4 1 11 0 0 0 0 1 2 0 46 2 0 178 0 0 0 0 0
5 0 0 1 2 0 2 1 15 0 0 0 177 0 9 3 1 0 0
9 28 5 0 0 0 0 285 0 0 140 9 0 6 0 1 0 0 0
10 1 0 3 3 598 0 0 4 304 0 0 1 0 0 0 2 29 37
12 0 0 0 0 0 0 0 1 0 36 1 0 0 0 0 0 0 0
14 10 326 0 0 0 0 0 0 0 0 6 0 1 0 1 0 0 0
22 12 11 0 0 0 0 4 0 0 9 0 0 6 0 0 0 0 0
24 3 186 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0
26 3 0 0 0 0 0 0 0 0 0 60 0 0 0 0 0 0 0
'PBMC4 - contingency_matrix (rows: antibody - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 ... 25 26 27 28 29 30 31 32 33 34
1 0 1 0 0 0 0 0 0 0 3 ... 0 0 1 0 60 15 431 0 819 4
2 0 0 0 0 0 0 0 0 0 3 ... 56 29 102 142 0 0 0 0 4 0
3 0 109 119 254 195 292 687 427 26 25 ... 0 0 0 0 0 0 2 0 2 0
4 0 0 0 0 0 0 0 0 0 0 ... 1 0 1 0 1 0 4 187 2 26
5 0 9 0 0 0 1 2 1 178 3 ... 0 4 10 1 0 0 0 0 1 0
9 0 0 0 0 0 0 0 0 0 1 ... 0 0 0 0 5 1 3 9 27 102
10 33 1 1 1 1 0 4 2 1 0 ... 1 1 1 2 0 0 1 0 1 0
12 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 0 0 0 0 1
14 0 0 0 0 0 0 0 0 0 1 ... 0 0 0 0 0 1 332 1 9 0
22 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 8 9 11 8
24 0 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 186 1 4 0
26 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 6 51 1 0 5 0

12 rows × 31 columns

Against cellTypist cluster number

print_clustering_data(tuning = 'celltypist',dataset="PBMC1")
'Initial COTAN cluster number:'
18
'Initial monocle cluster number:'
18
'Initial scanpy cluster number:'
18
'Initial scvi-tools cluster number:'
17
'Initial seurat cluster number:'
20
'PBMC1 - contingency_matrix (rows: cellTypist - cols: monocle)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 0 1 3 1 273 0 0 0 237 227 147 0 6 6 0 77 1 0
2 66 0 0 0 0 230 228 218 0 0 0 145 25 0 31 0 0 0
3 3 0 0 0 0 0 2 0 0 0 0 0 36 0 6 0 0 0
4 0 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 73 0
5 200 0 0 7 0 1 0 0 0 0 0 1 65 0 35 0 0 0
6 0 142 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
7 36 0 0 40 0 0 0 0 0 0 0 0 6 0 0 0 0 0
8 0 1 267 10 0 0 0 0 0 0 0 0 1 0 0 0 0 0
9 2 0 0 0 0 22 15 20 0 0 0 21 0 0 1 0 0 0
10 0 0 0 0 1 0 0 0 0 2 37 0 0 131 0 0 0 0
11 0 0 21 49 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12 27 0 0 179 0 3 2 1 0 0 0 4 14 0 10 0 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28
14 0 155 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: cellTypist - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
1 0 0 369 243 292 0 0 0 0 1 0 0 0 0 0 74 0
2 474 89 0 0 0 258 0 2 0 0 0 111 0 8 0 1 0
3 0 45 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0
4 0 0 0 2 0 0 0 0 0 0 0 0 0 0 76 0 0
5 2 260 0 0 0 2 0 42 0 0 0 3 0 0 0 0 0
6 0 0 0 0 0 0 0 0 0 4 138 0 0 0 0 0 0
7 0 8 0 0 0 0 0 55 19 0 0 0 0 0 0 0 0
8 0 0 0 0 0 0 263 0 16 0 0 0 0 0 0 0 0
9 5 2 0 0 0 5 0 1 0 0 0 2 0 66 0 0 0
10 0 0 0 78 2 0 0 0 0 0 0 0 91 0 0 0 0
11 0 0 0 0 0 0 5 0 65 0 0 0 0 0 0 0 0
12 0 11 0 0 0 6 0 143 78 0 0 0 0 2 0 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28
14 0 0 0 0 0 0 0 0 0 154 1 0 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 1 353 261 1 2 0 1 183 0 4 0 152 0 1 19 1 0 0 0 0
2 43 0 0 278 0 227 187 0 180 0 0 0 0 0 0 0 9 16 0 3
3 4 0 0 9 1 0 0 0 2 0 0 0 0 0 0 0 0 31 0 0
4 0 0 5 0 0 0 0 0 0 0 0 0 0 0 73 0 0 0 0 0
5 279 0 0 11 4 9 2 0 3 0 0 0 0 0 0 0 0 1 0 0
6 0 0 0 0 0 0 0 0 0 0 142 0 0 0 0 0 0 0 0 0
7 55 0 0 0 26 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
8 0 0 0 0 1 0 0 0 0 152 1 0 0 125 0 0 0 0 0 0
9 1 0 0 1 1 3 1 0 0 0 0 0 0 0 0 0 72 0 0 2
10 0 0 86 0 0 0 0 4 0 0 0 0 0 0 0 81 0 0 0 0
11 0 0 0 0 65 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0
12 45 0 0 1 173 1 0 0 2 0 0 0 0 0 0 0 0 1 0 17
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28 0
14 0 0 0 0 0 0 0 0 0 0 9 0 145 0 0 0 0 0 0 1
'PBMC1 - contingency_matrix (rows: cellTypist - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 ... 12 13 14 15 16 17 18 19 20 21
1 0 0 351 267 0 229 0 0 0 0 ... 0 3 0 1 88 0 0 40 0 0
2 423 119 0 0 238 0 154 0 0 0 ... 0 0 0 0 0 0 7 0 1 0
3 1 15 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 31 0
4 0 0 0 4 0 0 0 0 0 0 ... 0 0 0 0 0 74 0 0 0 0
5 0 258 0 0 0 0 1 0 1 0 ... 0 0 0 0 0 0 0 0 2 0
6 0 0 0 0 0 0 0 0 0 0 ... 142 0 0 0 0 0 0 0 0 0
7 0 4 0 0 0 0 0 0 13 0 ... 0 0 12 1 0 0 0 0 0 0
8 0 0 0 0 0 0 0 151 0 0 ... 0 0 5 123 0 0 0 0 0 0
9 10 2 0 0 11 0 11 0 1 0 ... 0 0 0 0 0 0 46 0 0 0
10 0 0 0 38 0 0 0 0 0 0 ... 0 130 0 0 3 0 0 0 0 0
11 0 0 0 0 0 0 0 4 0 0 ... 0 0 66 0 0 0 0 0 0 0
12 1 3 0 0 4 0 0 0 138 0 ... 0 0 44 0 0 0 2 0 1 0
13 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 27
14 0 0 0 0 0 0 0 0 0 153 ... 2 0 0 0 0 0 0 0 0 0

14 rows × 21 columns

'PBMC1 - contingency_matrix (rows: cellTypist - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 35 0 0 272 464 207 0 0 0 0 0 0 0 0 0 1 0 0
2 0 0 0 0 0 0 0 0 0 800 31 24 85 3 0 0 0 0
3 0 0 0 0 0 0 0 0 0 2 0 22 10 7 0 0 0 6
4 0 73 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0 0 0 7 9 293 0 0 0 0 0
6 0 0 0 0 0 0 0 0 142 0 0 0 0 0 0 0 0 0
7 0 0 0 0 0 0 0 0 0 0 0 0 56 0 25 1 0 0
8 0 0 0 0 0 0 0 0 0 0 0 3 1 5 4 115 104 47
9 0 0 0 0 0 0 0 0 0 3 77 0 0 0 1 0 0 0
10 0 1 122 0 5 42 0 0 0 0 1 0 0 0 0 0 0 0
11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 67 0 2 1
12 0 0 0 0 0 0 0 0 0 4 4 5 51 0 175 0 0 1
13 0 1 0 0 0 0 27 0 0 0 0 0 0 0 0 0 0 0
14 0 0 0 0 0 0 0 152 3 0 0 0 0 0 0 0 0 0
print_clustering_data(tuning = 'celltypist',dataset="PBMC2")
'Initial COTAN cluster number:'
17
'Initial monocle cluster number:'
17
'Initial scanpy cluster number:'
18
'Initial scvi-tools cluster number:'
20
'Initial seurat cluster number:'
19
'PBMC2 - contingency_matrix (rows: cellTypist - cols: monocle)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 1 0 0 1 0 5 0 55 0 0 12 38 6 90 1 0 0 22
2 0 0 0 0 0 279 0 6 22 0 53 22 24 1 19 0 0 1
3 0 1 332 1 407 130 338 27 297 36 202 95 99 38 132 0 0 7
4 577 1 0 6 0 0 0 2 0 0 0 2 2 42 0 0 0 75
5 1 0 0 0 0 15 0 258 0 0 9 9 2 13 0 0 0 9
6 0 16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 77 0
7 0 558 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
8 0 0 142 0 32 4 69 4 11 292 24 59 4 2 31 0 0 0
9 0 13 0 0 0 0 0 0 0 0 0 0 0 0 0 173 0 0
10 43 0 0 0 0 0 0 1 0 0 0 0 0 5 0 0 0 3
11 0 0 0 228 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12 0 0 0 204 0 0 0 0 0 0 0 0 0 0 0 0 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 48 0
14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14 0
15 0 0 0 0 0 4 0 1 3 0 0 0 66 3 3 0 0 0
'PBMC2 - contingency_matrix (rows: cellTypist - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 0 0 0 0 0 1 0 2 1 0 15 2 57 5 0 148 0 0 0 0
2 0 0 0 0 0 0 131 200 35 2 2 0 1 55 0 1 0 0 0 0
3 21 575 490 0 0 0 230 107 248 286 10 2 48 98 0 26 0 0 0 1
4 0 0 0 0 471 8 0 1 0 0 2 222 0 0 0 2 1 0 0 0
5 0 0 0 0 0 0 1 1 0 0 253 0 3 52 0 6 0 0 0 0
6 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 88 0 0
7 0 0 0 464 0 0 0 0 0 0 0 0 0 0 6 0 88 0 0 0
8 558 2 0 0 0 0 1 0 0 0 0 0 112 1 0 0 0 0 0 0
9 0 0 0 12 0 0 0 0 0 0 0 0 0 0 174 0 0 0 0 0
10 0 0 0 0 0 0 0 0 0 0 0 50 0 2 0 0 0 0 0 0
11 0 0 0 0 0 228 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 204 0 0 0 0 0 0 0 0 0 0 0 0 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 48 0
14 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 13
15 0 0 0 0 0 0 10 44 17 1 1 1 2 3 0 1 0 0 0 0
'PBMC2 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 0 0 0 0 1 0 217 0 0 0 7 0 3 0 3 0 0 0
2 348 2 0 0 0 4 7 0 2 3 60 0 1 0 0 0 0 0
3 469 4 1 15 1 412 69 356 324 322 165 0 4 0 0 0 0 0
4 1 667 1 0 8 0 6 0 0 0 0 0 0 0 1 23 0 0
5 10 2 0 0 0 0 7 0 1 0 65 0 159 0 72 0 0 0
6 0 0 9 0 0 0 0 0 0 0 0 0 0 80 0 0 4 0
7 0 0 556 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0
8 1 0 1 561 0 2 99 1 2 2 5 0 0 0 0 0 0 0
9 0 0 13 0 0 0 0 0 0 0 0 173 0 0 0 0 0 0
10 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 47 0 0
11 0 0 0 0 228 0 0 0 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 204 0 0 0 0 0 0 0 0 0 0 0 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 48 0
14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14
15 61 0 0 0 0 3 2 1 1 3 9 0 0 0 0 0 0 0
'PBMC2 - contingency_matrix (rows: cellTypist - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 2 0 0 0 0 0 0 0 7 1 0 1 41 178 0 0 1 0 0 0
2 389 0 0 0 0 0 0 1 10 3 0 23 1 0 0 0 0 0 0 0
3 259 0 578 568 15 0 1 290 188 0 2 169 62 8 0 0 2 0 0 0
4 0 635 0 0 0 0 7 0 2 0 0 0 0 3 0 1 59 0 0 0
5 8 0 0 0 0 0 0 0 1 268 0 32 1 6 0 0 0 0 0 0
6 0 0 0 0 0 11 0 0 0 0 0 0 0 0 0 0 0 82 0 0
7 0 0 0 0 0 305 0 0 0 0 253 0 0 0 0 0 0 0 0 0
8 1 0 8 0 541 0 0 0 1 0 0 3 119 1 0 0 0 0 0 0
9 0 0 0 0 0 8 0 0 0 0 6 0 0 0 172 0 0 0 0 0
10 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 50 0 0 0
11 0 0 0 0 0 0 83 0 0 0 0 0 0 0 0 145 0 0 0 0
12 0 0 0 0 0 0 201 0 0 1 0 0 0 0 0 2 0 0 0 0
13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 48 0
14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14
15 1 0 0 0 0 0 0 0 76 0 0 0 0 2 0 0 1 0 0 0
'PBMC2 - contingency_matrix (rows: cellTypist - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
1 0 0 0 3 4 220 3 0 0 0 1 0 0 0 0 0 0
2 0 0 0 416 9 0 2 0 0 0 0 0 0 0 0 0 0
3 0 2 1186 847 31 72 3 0 0 0 0 0 0 0 0 0 1
4 639 56 0 0 2 3 0 0 0 0 0 0 0 0 1 5 1
5 0 0 0 39 0 7 270 0 0 0 0 0 0 0 0 0 0
6 0 0 0 0 0 0 0 0 0 86 0 7 0 0 0 0 0
7 1 0 0 0 0 0 0 0 0 0 81 300 174 0 0 2 0
8 0 0 568 9 1 96 0 0 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 154 24 0 0 8 0 0 0 0 0
10 1 51 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 145 69 14
12 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 22 180
13 0 0 0 0 0 0 0 0 0 0 0 0 0 48 0 0 0
14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 13 1
15 0 1 0 6 72 1 0 0 0 0 0 0 0 0 0 0 0
print_clustering_data(tuning = 'celltypist',dataset="PBMC3")
'Initial COTAN cluster number:'
23
'Initial monocle cluster number:'
23
'Initial scanpy cluster number:'
17
'Initial scvi-tools cluster number:'
18
'Initial seurat cluster number:'
20
'PBMC3 - contingency_matrix (rows: cellTypist - cols: monocle)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
1 860 0 0 0 38 559 313 338 231 324 158 0 195 0 0 5 0
2 0 1041 0 0 0 0 1 0 0 0 0 390 0 2 7 0 31
3 0 0 0 654 0 0 2 2 1 0 0 0 1 0 0 0 2
4 400 0 0 0 12 309 29 47 12 157 78 0 56 0 0 0 0
5 0 22 1036 33 22 0 0 0 0 0 3 2 1 0 2 121 0
6 0 11 0 0 0 0 0 0 0 0 0 1 0 0 144 0 0
7 0 0 0 0 134 1 247 305 248 0 135 1 42 0 0 0 0
8 0 0 3 0 435 0 3 7 0 0 12 0 20 0 0 4 0
9 0 0 0 396 0 0 0 0 0 0 0 0 0 0 0 0 0
10 0 73 0 0 0 0 0 0 0 0 0 4 0 328 2 0 1
11 0 0 8 0 277 2 21 26 1 0 47 0 46 0 0 2 0
13 11 0 0 0 0 8 157 9 19 1 7 0 21 0 0 0 0
14 0 0 79 0 12 0 0 0 0 0 0 0 0 0 1 19 0
15 1 0 0 0 0 0 0 3 0 0 0 0 0 0 16 0 0
16 0 0 0 0 0 0 0 0 0 0 0 11 0 0 0 0 0
18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 57 0 0
19 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12 0 0
'PBMC3 - contingency_matrix (rows: cellTypist - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 1670 0 0 212 0 32 234 621 23 228 0 0 1 0 0 0 0 0
2 0 1228 0 0 0 0 0 0 0 0 2 202 0 0 5 0 35 0
3 0 0 0 0 543 0 5 0 0 1 0 0 0 111 0 0 2 0
4 29 0 0 0 0 811 192 5 4 59 0 0 0 0 0 0 0 0
5 0 0 1028 0 0 0 0 0 7 0 0 29 145 33 0 0 0 0
6 0 7 0 0 0 0 0 0 0 0 0 0 0 0 149 0 0 0
7 1 0 0 702 0 0 230 99 7 73 0 1 0 0 0 0 0 0
8 0 0 3 0 0 0 47 0 433 0 0 0 1 0 0 0 0 0
9 0 0 0 0 395 0 0 0 0 0 0 0 0 1 0 0 0 0
10 0 94 0 0 0 0 0 0 0 0 311 1 0 0 1 0 1 0
11 0 0 2 0 0 0 116 0 290 21 0 0 1 0 0 0 0 0
13 16 0 0 44 0 0 10 55 1 107 0 0 0 0 0 0 0 0
14 0 0 11 0 0 0 4 0 5 1 0 0 90 0 0 0 0 0
15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 20
16 0 0 0 0 0 0 0 0 0 0 0 11 0 0 0 0 0 0
18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 57 0 0
19 0 0 0 0 0 0 0 0 0 0 0 0 0 12 0 0 0 0
'PBMC3 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
1 2 657 1415 2 2 43 614 91 5 0 64 0 14 49 0 0 32 31 0
2 1463 0 0 0 0 0 0 0 0 1 0 8 0 0 0 0 0 0 0
3 4 0 1 0 654 0 2 1 0 0 0 0 0 0 0 0 0 0 0
4 0 6 13 1 0 856 4 168 0 0 0 0 0 52 0 0 0 0 0
5 29 0 0 1172 32 0 0 2 0 0 1 0 0 0 5 0 0 0 1
6 9 0 0 0 0 0 0 0 0 0 0 147 0 0 0 0 0 0 0
7 1 827 5 0 0 1 187 35 22 0 0 0 24 8 0 0 1 2 0
8 0 30 0 2 0 1 3 12 305 0 131 0 0 0 0 0 0 0 0
9 0 0 0 0 396 0 0 0 0 0 0 0 0 0 0 0 0 0 0
10 83 0 0 0 0 0 0 0 0 324 0 1 0 0 0 0 0 0 0
11 0 15 0 2 1 3 1 399 3 0 5 0 0 0 1 0 0 0 0
13 0 49 21 0 0 0 22 6 0 0 0 0 131 2 0 0 2 0 0
14 0 0 0 31 0 0 0 0 0 0 0 0 0 0 80 0 0 0 0
15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 20
16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 0 0
18 0 0 0 0 0 0 0 0 0 0 0 5 0 0 0 52 0 0 0
19 0 0 0 0 0 0 0 0 0 0 12 0 0 0 0 0 0 0 0
'PBMC3 - contingency_matrix (rows: cellTypist - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 1744 412 0 0 681 20 161 0 3 0 0 0 0 0 0 0 0 0
2 0 0 1011 0 0 0 0 0 0 0 0 227 0 231 3 0 0 0
3 0 0 0 0 0 0 2 535 0 2 0 1 2 0 0 120 0 0
4 16 6 0 0 3 886 188 0 0 0 0 0 0 0 0 1 0 0
5 0 1 4 1043 0 0 2 1 1 0 0 24 136 0 0 30 0 0
6 0 0 7 0 0 0 0 0 0 0 0 0 0 0 149 0 0 0
7 0 995 0 0 93 0 18 0 7 0 0 0 0 0 0 0 0 0
8 0 18 0 3 0 0 8 0 454 0 0 0 1 0 0 0 0 0
9 0 0 0 0 0 0 0 57 0 336 0 0 0 0 0 3 0 0
10 0 0 89 0 0 0 0 0 0 0 314 1 0 4 0 0 0 0
11 0 14 0 0 0 2 411 0 3 0 0 0 0 0 0 0 0 0
13 13 8 0 0 205 0 7 0 0 0 0 0 0 0 0 0 0 0
14 0 0 0 2 0 0 0 0 0 0 0 0 109 0 0 0 0 0
15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 20
16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11
18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 52 0
19 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12
'PBMC3 - contingency_matrix (rows: cellTypist - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 ... 14 15 16 17 18 19 20 21 22 23
1 1822 80 633 127 291 53 12 0 0 3 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 1 0 0 0 0 1 1 0 ... 68 517 404 324 0 0 0 1 0 0
3 1 0 2 2 0 2 0 0 0 0 ... 0 0 0 0 9 136 387 108 15 0
4 482 430 156 9 5 17 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
5 0 0 0 1 0 138 2 28 281 755 ... 0 1 0 0 0 25 5 0 0 0
6 0 0 0 0 0 0 0 0 0 0 ... 5 1 4 0 0 0 0 0 0 0
7 0 0 87 955 53 11 7 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
8 0 0 1 16 1 8 454 0 3 1 ... 0 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 359 2 34 0 1 0
10 0 0 0 0 0 0 0 0 0 0 ... 82 31 91 0 0 0 0 0 0 0
11 5 1 86 4 11 29 255 0 24 15 ... 0 0 0 0 0 0 0 0 0 0
13 7 0 210 5 1 10 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
14 0 0 0 1 0 10 0 0 100 0 ... 0 0 0 0 0 0 0 0 0 0
15 0 0 3 0 0 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 16 0
16 0 0 0 0 0 0 0 0 0 0 ... 0 11 0 0 0 0 0 0 0 0
18 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 52
19 0 0 0 0 0 1 0 0 0 1 ... 0 0 0 0 0 1 1 0 8 0

17 rows × 23 columns

print_clustering_data(tuning = 'celltypist',dataset="PBMC4")
'Initial COTAN cluster number:'
15
'Initial monocle cluster number:'
21
'Initial scanpy cluster number:'
16
'Initial scvi-tools cluster number:'
18
'Initial seurat cluster number:'
18
'PBMC4 - contingency_matrix (rows: cellTypist - cols: monocle)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
1 0 0 9 0 0 0 0 12 0 381 1 0 0 0 0 0
2 755 2 1 0 0 0 0 2 0 3 3 0 0 0 0 41
3 0 705 149 0 0 0 379 0 3 0 92 0 0 0 0 0
4 0 0 1 0 0 0 0 97 0 8 1 0 0 0 0 0
5 5 2 2 647 637 405 0 1 419 3 2 3 34 0 29 0
6 0 196 83 0 0 0 19 1 0 0 9 0 0 0 0 0
7 0 0 51 0 0 0 0 9 0 15 2 0 0 0 0 0
8 0 2 417 0 0 0 77 0 0 5 37 0 0 0 0 0
9 0 0 0 0 0 0 0 351 1 3 4 0 0 0 0 0
10 0 0 0 2 3 76 0 0 5 0 0 219 2 0 0 0
11 220 0 0 0 0 0 0 0 1 0 1 0 0 0 0 2
12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28 0
13 1 2 2 0 0 0 0 0 0 4 0 0 0 4 0 0
14 0 4 4 0 0 0 1 0 0 0 97 0 0 0 0 0
15 0 0 0 11 5 0 0 0 1 0 0 1 72 3 0 0
16 0 0 0 0 0 0 0 0 0 0 0 0 0 59 0 0
'PBMC4 - contingency_matrix (rows: cellTypist - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 0 10 0 0 0 0 381 5 1 0 0 0 0 1 5 0 0 0
2 0 0 0 672 0 0 0 0 1 134 0 0 0 0 0 0 0 0
3 0 346 0 0 508 386 1 10 0 0 0 0 0 0 0 77 0 0
4 0 0 0 0 0 0 6 7 10 0 0 0 0 0 84 0 0 0
5 924 1 818 0 0 0 0 0 0 0 1 181 77 140 0 0 47 0
6 0 7 0 0 5 2 0 286 0 0 0 0 0 0 0 8 0 0
7 0 3 0 0 0 0 1 67 0 0 0 0 0 0 6 0 0 0
8 0 483 0 0 8 43 1 2 0 0 0 0 0 0 0 1 0 0
9 0 1 0 0 0 0 5 0 348 0 0 0 0 0 5 0 0 0
10 74 0 2 0 0 0 0 0 0 0 216 10 2 2 0 0 1 0
11 0 0 0 46 0 0 0 0 0 177 0 0 1 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28 0
13 0 13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
14 0 68 0 0 1 22 1 2 0 0 0 0 0 0 0 12 0 0
15 0 0 0 0 0 0 0 0 0 0 0 2 89 2 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 59
'PBMC4 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 3 0 0 0 0 0 0 0 311 0 0 1 0 0 86 2 0 0
2 1 1 0 0 686 0 1 1 10 0 104 2 0 0 0 1 0 0
3 438 838 1 0 0 0 0 21 0 0 0 0 30 0 0 0 0 0
4 0 0 0 0 0 0 7 1 0 0 0 99 0 0 0 0 0 0
5 1 1 772 696 2 506 15 0 0 65 2 1 0 23 2 83 0 20
6 10 8 0 0 0 0 0 287 1 0 0 1 1 0 0 0 0 0
7 5 0 0 0 0 0 0 33 5 0 0 34 0 0 0 0 0 0
8 507 15 0 0 0 0 1 3 2 0 0 0 10 0 0 0 0 0
9 0 0 0 0 0 0 358 0 0 0 0 0 1 0 0 0 0 0
10 0 0 13 1 0 27 0 0 0 265 0 0 0 0 0 1 0 0
11 0 0 0 0 5 0 0 0 0 0 219 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28
13 6 0 0 0 0 0 0 6 1 0 0 0 0 0 0 0 0 0
14 16 5 0 0 0 0 0 0 0 0 0 4 81 0 0 0 0 0
15 0 0 2 0 0 1 1 0 0 0 0 0 0 89 0 0 0 0
16 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 57 0
'PBMC4 - contingency_matrix (rows: cellTypist - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
1 5 0 0 0 0 0 394 0 0 4 0 0 0 0 0 0 0 0 0
2 0 0 0 0 631 0 1 0 122 1 0 0 0 0 0 0 0 52 0
3 361 841 0 0 0 0 0 0 0 2 121 0 3 0 0 0 0 0 0
4 0 0 0 0 0 0 0 1 0 106 0 0 0 0 0 0 0 0 0
5 2 0 770 728 0 445 2 0 2 0 1 1 0 133 27 35 0 0 43
6 31 15 0 0 0 0 0 0 0 68 5 0 189 0 0 0 0 0 0
7 3 0 0 0 0 0 3 0 0 69 2 0 0 0 0 0 0 0 0
8 525 0 0 0 0 0 1 0 0 2 10 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 3 354 1 0 1 0 0 0 0 0 0 0 0
10 0 0 0 7 0 86 0 0 0 0 0 213 0 1 0 0 0 0 0
11 0 0 0 0 1 0 0 0 223 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28 0 0 0
13 10 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 0
14 4 0 0 0 0 0 0 0 0 1 101 0 0 0 0 0 0 0 0
15 0 0 0 1 0 1 0 0 0 0 0 0 0 0 90 0 0 0 1
16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 59 0 0
'PBMC4 - contingency_matrix (rows: cellTypist - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
1 0 0 0 0 0 0 4 0 1 398 0 0 0 0 0
2 0 0 0 0 0 0 0 1 1 1 47 344 413 0 0
3 0 0 0 0 0 0 366 961 1 0 0 0 0 0 0
4 0 0 0 0 0 0 3 0 104 0 0 0 0 0 0
5 129 719 530 739 66 1 1 0 0 2 0 1 1 0 0
6 0 0 0 0 0 0 99 209 0 0 0 0 0 0 0
7 0 0 0 0 0 0 70 0 3 4 0 0 0 0 0
8 0 0 0 0 0 0 530 7 0 1 0 0 0 0 0
9 0 0 0 0 0 0 2 0 356 1 0 0 0 0 0
10 1 6 83 2 0 215 0 0 0 0 0 0 0 0 0
11 0 0 0 0 0 0 0 0 0 0 24 199 1 0 0
12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28
13 0 0 0 0 1 0 1 1 8 0 1 1 0 0 0
14 0 0 0 0 0 0 105 1 0 0 0 0 0 0 0
15 0 1 1 0 91 0 0 0 0 0 0 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0 0 0 0 59 0

Against antibody cluster number

print_clustering_data(tuning = 'antibody',dataset="PBMC1")
'Initial COTAN cluster number:'
13
'Initial monocle cluster number:'
1
'Initial scanpy cluster number:'
9
'Initial scvi-tools cluster number:'
11
'Initial seurat cluster number:'
10
'Initial antibody cell/cluster table:'
cluster.ids
7     1338
8      876
3      748
4      341
9      331
5      211
1      202
6      131
2       62
10      51
12      16
Name: count, dtype: int64
'PBMC1 - contingency_matrix (rows: antibody - cols: monocle)'
1 2 3 4 5 6 7 8 9
1 94 0 17 50 0 0 0 0 0
2 0 4 0 0 3 0 38 1 0
3 35 0 500 65 0 0 0 0 0
4 0 1 0 262 0 0 0 0 0
5 2 0 29 127 0 0 0 0 0
6 0 1 0 0 0 1 1 58 26
7 2 731 0 4 0 275 95 17 1
8 776 0 30 6 1 0 0 0 1
9 0 0 0 1 294 0 0 0 0
10 0 0 0 44 0 0 0 0 0
12 0 0 0 10 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11
1 22 0 7 60 0 0 0 0 0 0 72
2 0 0 0 0 5 0 2 1 0 38 0
3 67 0 408 125 0 0 0 0 0 0 0
4 0 1 0 13 0 249 0 0 0 0 0
5 2 0 10 146 0 0 0 0 0 0 0
6 3 0 1 0 0 0 0 0 82 1 0
7 2 702 1 0 348 0 0 0 20 52 0
8 780 0 21 6 0 1 1 0 1 0 4
9 0 0 0 1 0 1 156 137 0 0 0
10 0 0 0 37 0 7 0 0 0 0 0
12 0 0 0 0 0 10 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10
1 28 0 0 22 0 1 36 0 74 0
2 0 3 42 0 1 0 0 0 0 0
3 123 0 0 434 0 0 43 0 0 0
4 0 1 0 0 0 259 3 0 0 0
5 0 0 0 17 0 0 138 0 3 0
6 0 1 1 0 0 0 0 56 3 26
7 1 644 451 0 0 1 0 27 0 1
8 786 0 0 19 1 2 3 0 2 1
9 0 0 0 0 294 1 0 0 0 0
10 0 0 0 1 0 9 34 0 0 0
12 0 0 0 0 0 10 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11
1 93 14 0 0 0 54 0 0 0 0 0
2 0 0 1 4 0 0 1 1 39 0 0
3 22 536 0 0 0 42 0 0 0 0 0
4 0 0 1 0 261 1 0 0 0 0 0
5 1 18 0 0 0 139 0 0 0 0 0
6 3 1 0 1 0 0 0 0 1 56 25
7 2 1 524 488 2 0 0 0 89 18 1
8 766 40 0 0 1 5 1 0 0 0 1
9 0 0 0 0 2 0 151 142 0 0 0
10 0 0 0 0 8 36 0 0 0 0 0
12 0 0 0 0 8 2 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 11 12 13
1 0 0 0 0 0 0 0 0 54 1 2 94 10
2 1 1 38 0 4 0 1 1 0 0 0 0 0
3 0 0 0 0 0 0 0 0 39 6 55 58 442
4 1 0 0 0 0 0 0 0 1 257 4 0 0
5 0 0 0 0 0 0 0 0 135 0 1 2 20
6 0 56 1 1 1 25 0 0 0 0 1 1 1
7 33 17 83 271 716 1 0 0 0 1 1 2 0
8 0 0 0 0 0 1 1 0 4 0 12 773 23
9 0 0 0 0 0 0 150 143 0 2 0 0 0
10 0 0 0 0 0 0 0 0 38 5 1 0 0
12 0 0 0 0 0 0 0 0 1 6 2 0 1
print_clustering_data(tuning = 'antibody',dataset="PBMC1")
'Initial COTAN cluster number:'
13
'Initial monocle cluster number:'
1
'Initial scanpy cluster number:'
9
'Initial scvi-tools cluster number:'
11
'Initial seurat cluster number:'
10
'Initial antibody cell/cluster table:'
cluster.ids
7     1338
8      876
3      748
4      341
9      331
5      211
1      202
6      131
2       62
10      51
12      16
Name: count, dtype: int64
'PBMC1 - contingency_matrix (rows: antibody - cols: monocle)'
1 2 3 4 5 6 7 8 9
1 94 0 17 50 0 0 0 0 0
2 0 4 0 0 3 0 38 1 0
3 35 0 500 65 0 0 0 0 0
4 0 1 0 262 0 0 0 0 0
5 2 0 29 127 0 0 0 0 0
6 0 1 0 0 0 1 1 58 26
7 2 731 0 4 0 275 95 17 1
8 776 0 30 6 1 0 0 0 1
9 0 0 0 1 294 0 0 0 0
10 0 0 0 44 0 0 0 0 0
12 0 0 0 10 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11
1 22 0 7 60 0 0 0 0 0 0 72
2 0 0 0 0 5 0 2 1 0 38 0
3 67 0 408 125 0 0 0 0 0 0 0
4 0 1 0 13 0 249 0 0 0 0 0
5 2 0 10 146 0 0 0 0 0 0 0
6 3 0 1 0 0 0 0 0 82 1 0
7 2 702 1 0 348 0 0 0 20 52 0
8 780 0 21 6 0 1 1 0 1 0 4
9 0 0 0 1 0 1 156 137 0 0 0
10 0 0 0 37 0 7 0 0 0 0 0
12 0 0 0 0 0 10 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10
1 28 0 0 22 0 1 36 0 74 0
2 0 3 42 0 1 0 0 0 0 0
3 123 0 0 434 0 0 43 0 0 0
4 0 1 0 0 0 259 3 0 0 0
5 0 0 0 17 0 0 138 0 3 0
6 0 1 1 0 0 0 0 56 3 26
7 1 644 451 0 0 1 0 27 0 1
8 786 0 0 19 1 2 3 0 2 1
9 0 0 0 0 294 1 0 0 0 0
10 0 0 0 1 0 9 34 0 0 0
12 0 0 0 0 0 10 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11
1 93 14 0 0 0 54 0 0 0 0 0
2 0 0 1 4 0 0 1 1 39 0 0
3 22 536 0 0 0 42 0 0 0 0 0
4 0 0 1 0 261 1 0 0 0 0 0
5 1 18 0 0 0 139 0 0 0 0 0
6 3 1 0 1 0 0 0 0 1 56 25
7 2 1 524 488 2 0 0 0 89 18 1
8 766 40 0 0 1 5 1 0 0 0 1
9 0 0 0 0 2 0 151 142 0 0 0
10 0 0 0 0 8 36 0 0 0 0 0
12 0 0 0 0 8 2 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 11 12 13
1 0 0 0 0 0 0 0 0 54 1 2 94 10
2 1 1 38 0 4 0 1 1 0 0 0 0 0
3 0 0 0 0 0 0 0 0 39 6 55 58 442
4 1 0 0 0 0 0 0 0 1 257 4 0 0
5 0 0 0 0 0 0 0 0 135 0 1 2 20
6 0 56 1 1 1 25 0 0 0 0 1 1 1
7 33 17 83 271 716 1 0 0 0 1 1 2 0
8 0 0 0 0 0 1 1 0 4 0 12 773 23
9 0 0 0 0 0 0 150 143 0 2 0 0 0
10 0 0 0 0 0 0 0 0 38 5 1 0 0
12 0 0 0 0 0 0 0 0 1 6 2 0 1
print_clustering_data(tuning = 'antibody',dataset="PBMC1")
'Initial COTAN cluster number:'
13
'Initial monocle cluster number:'
1
'Initial scanpy cluster number:'
9
'Initial scvi-tools cluster number:'
11
'Initial seurat cluster number:'
10
'Initial antibody cell/cluster table:'
cluster.ids
7     1338
8      876
3      748
4      341
9      331
5      211
1      202
6      131
2       62
10      51
12      16
Name: count, dtype: int64
'PBMC1 - contingency_matrix (rows: antibody - cols: monocle)'
1 2 3 4 5 6 7 8 9
1 94 0 17 50 0 0 0 0 0
2 0 4 0 0 3 0 38 1 0
3 35 0 500 65 0 0 0 0 0
4 0 1 0 262 0 0 0 0 0
5 2 0 29 127 0 0 0 0 0
6 0 1 0 0 0 1 1 58 26
7 2 731 0 4 0 275 95 17 1
8 776 0 30 6 1 0 0 0 1
9 0 0 0 1 294 0 0 0 0
10 0 0 0 44 0 0 0 0 0
12 0 0 0 10 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: scanpy)'
1 2 3 4 5 6 7 8 9 10 11
1 22 0 7 60 0 0 0 0 0 0 72
2 0 0 0 0 5 0 2 1 0 38 0
3 67 0 408 125 0 0 0 0 0 0 0
4 0 1 0 13 0 249 0 0 0 0 0
5 2 0 10 146 0 0 0 0 0 0 0
6 3 0 1 0 0 0 0 0 82 1 0
7 2 702 1 0 348 0 0 0 20 52 0
8 780 0 21 6 0 1 1 0 1 0 4
9 0 0 0 1 0 1 156 137 0 0 0
10 0 0 0 37 0 7 0 0 0 0 0
12 0 0 0 0 0 10 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: scvi-tools)'
1 2 3 4 5 6 7 8 9 10
1 28 0 0 22 0 1 36 0 74 0
2 0 3 42 0 1 0 0 0 0 0
3 123 0 0 434 0 0 43 0 0 0
4 0 1 0 0 0 259 3 0 0 0
5 0 0 0 17 0 0 138 0 3 0
6 0 1 1 0 0 0 0 56 3 26
7 1 644 451 0 0 1 0 27 0 1
8 786 0 0 19 1 2 3 0 2 1
9 0 0 0 0 294 1 0 0 0 0
10 0 0 0 1 0 9 34 0 0 0
12 0 0 0 0 0 10 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: seurat)'
1 2 3 4 5 6 7 8 9 10 11
1 93 14 0 0 0 54 0 0 0 0 0
2 0 0 1 4 0 0 1 1 39 0 0
3 22 536 0 0 0 42 0 0 0 0 0
4 0 0 1 0 261 1 0 0 0 0 0
5 1 18 0 0 0 139 0 0 0 0 0
6 3 1 0 1 0 0 0 0 1 56 25
7 2 1 524 488 2 0 0 0 89 18 1
8 766 40 0 0 1 5 1 0 0 0 1
9 0 0 0 0 2 0 151 142 0 0 0
10 0 0 0 0 8 36 0 0 0 0 0
12 0 0 0 0 8 2 0 0 0 0 0
'PBMC1 - contingency_matrix (rows: antibody - cols: COTAN)'
1 2 3 4 5 6 7 8 9 10 11 12 13
1 0 0 0 0 0 0 0 0 54 1 2 94 10
2 1 1 38 0 4 0 1 1 0 0 0 0 0
3 0 0 0 0 0 0 0 0 39 6 55 58 442
4 1 0 0 0 0 0 0 0 1 257 4 0 0
5 0 0 0 0 0 0 0 0 135 0 1 2 20
6 0 56 1 1 1 25 0 0 0 0 1 1 1
7 33 17 83 271 716 1 0 0 0 1 1 2 0
8 0 0 0 0 0 1 1 0 4 0 12 773 23
9 0 0 0 0 0 0 150 143 0 2 0 0 0
10 0 0 0 0 0 0 0 0 38 5 1 0 0
12 0 0 0 0 0 0 0 0 1 6 2 0 1

Default parameters

print_scores(tuning = 'default',dataset="PBMC1")
'PBMC1 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 3 18 13 11 23
'PBMC1 - Silhuette (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 0.106025 0.062012 0.087622 0.168956 0.1122
'PBMC1 - Calinski_Harabasz (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 194.710746 159.568549 193.151278 235.185139 166.209735
'PBMC1 - davies_bouldin (lower is better)'
monocle scanpy scvi-tools seurat COTAN
0 3.046493 2.534291 2.547137 1.695538 2.199121
'PBMC1 - Silhuette from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 0.182454 -0.006178 0.148873 0.235918 0.139282
'PBMC1 - Calinski_Harabasz from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 187.272904 160.02968 199.227613 213.845901 133.985915
'PBMC1 - davies_bouldin  from Prob. (lower is better)'
monocle scanpy scvi-tools seurat COTAN
0 2.582264 2.79635 2.842419 1.973727 3.407326
'PBMC1 - default labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.580616 0.385715 0.412315 0.981082 0.603445 0.367190 0.991712
scanpy 0.722333 0.405339 0.828504 0.640283 0.509402 0.791080 0.328020
scvi-tools 0.777869 0.600555 0.813260 0.745430 0.667316 0.813991 0.547070
seurat 0.795611 0.651612 0.787916 0.803458 0.707763 0.751355 0.666699
COTAN 0.747622 0.582668 0.839063 0.674153 0.655776 0.845150 0.508836
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.622344 0.439255 0.458549 0.968180 0.645589 0.421345 0.989178
scanpy 0.662480 0.389320 0.814398 0.558329 0.511739 0.851844 0.307424
scvi-tools 0.718265 0.557951 0.800919 0.651075 0.643426 0.842101 0.491625
seurat 0.747924 0.647338 0.787235 0.712353 0.712527 0.810669 0.626267
COTAN 0.684979 0.529470 0.823524 0.586337 0.623261 0.862630 0.450315
print_scores(tuning = 'default',dataset="PBMC2")
'PBMC2 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 2 18 20 14 31
'PBMC2 - Silhuette (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 0.237524 0.077322 0.018324 0.134064 0.112282
'PBMC2 - Calinski_Harabasz (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 298.25227 270.502074 223.039427 367.295749 222.980901
'PBMC2 - davies_bouldin (lower is better)'
monocle scanpy scvi-tools seurat COTAN
0 3.89379 2.581588 3.703433 1.958013 2.8615
'PBMC2 - Silhuette from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 0.283181 0.1987 0.064022 0.358299 0.19162
'PBMC2 - Calinski_Harabasz from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 284.036162 259.900464 223.875031 377.870193 213.170805
'PBMC2 - davies_bouldin  from Prob. (lower is better)'
monocle scanpy scvi-tools seurat COTAN
0 3.514194 2.322847 5.400931 1.992412 4.535166
'PBMC2 - default labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.393439 0.206935 0.246216 0.978567 0.521545 0.273004 0.996358
scanpy 0.719426 0.457389 0.805848 0.649745 0.557061 0.815952 0.380312
scvi-tools 0.699891 0.424655 0.787025 0.630128 0.525155 0.763216 0.361349
seurat 0.776310 0.562412 0.820809 0.736387 0.640230 0.816710 0.501886
COTAN 0.724561 0.492226 0.862550 0.624634 0.593654 0.891369 0.395375
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.266290 0.092131 0.156254 0.900271 0.459354 0.212893 0.991137
scanpy 0.693344 0.526719 0.779700 0.624210 0.610277 0.815918 0.456465
scvi-tools 0.661538 0.488989 0.757752 0.587004 0.576896 0.781492 0.425864
seurat 0.757858 0.683647 0.801432 0.718778 0.738063 0.851801 0.639512
COTAN 0.693767 0.569696 0.814741 0.604074 0.647577 0.850076 0.493316
print_scores(tuning = 'default',dataset="PBMC3")
'PBMC3 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 3 22 17 18 57
'PBMC3 - Silhuette (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 0.173831 0.017764 0.066172 0.12701 0.043145
'PBMC3 - Calinski_Harabasz (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 565.456442 389.223708 568.006153 568.200931 269.332151
'PBMC3 - davies_bouldin (lower is better)'
monocle scanpy scvi-tools seurat COTAN
0 3.238128 3.245809 2.168128 2.441035 2.894026
'PBMC3 - Silhuette from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 0.214085 0.065185 0.226855 0.282982 -0.001861
'PBMC3 - Calinski_Harabasz from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 531.480656 382.798915 537.03678 586.377699 255.33551
'PBMC3 - davies_bouldin  from Prob. (lower is better)'
monocle scanpy scvi-tools seurat COTAN
0 2.634444 3.87979 2.321242 2.318734 4.285919
'PBMC3 - default labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.501257 0.233289 0.339140 0.960303 0.500276 0.252654 0.990586
scanpy 0.685942 0.462727 0.765047 0.621663 0.541439 0.751303 0.390196
scvi-tools 0.738810 0.579719 0.758792 0.719853 0.635430 0.710503 0.568289
seurat 0.771188 0.585275 0.823308 0.725274 0.644396 0.790547 0.525264
COTAN 0.684907 0.420422 0.880300 0.560498 0.531259 0.917456 0.307629
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.473374 0.197340 0.318745 0.919383 0.474384 0.228968 0.982845
scanpy 0.678005 0.546455 0.758365 0.613045 0.613385 0.808137 0.465567
scvi-tools 0.725083 0.668897 0.739961 0.710792 0.711791 0.755603 0.670519
seurat 0.752260 0.669108 0.800562 0.709455 0.714356 0.824890 0.618633
COTAN 0.642410 0.427885 0.825485 0.525799 0.523267 0.834295 0.328191
print_scores(tuning = 'default',dataset="PBMC4")
'PBMC4 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 3 22 16 19 34
'PBMC4 - Silhuette (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 0.081399 0.063742 0.075337 0.12954 0.120257
'PBMC4 - Calinski_Harabasz (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 364.985136 267.681245 341.396665 364.393784 285.369852
'PBMC4 - davies_bouldin (lower is better)'
monocle scanpy scvi-tools seurat COTAN
0 3.354088 2.496024 2.226 2.224448 2.372892
'PBMC4 - Silhuette from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 0.193766 0.025023 0.077663 0.187532 0.062563
'PBMC4 - Calinski_Harabasz from Prob. (higher is better)'
monocle scanpy scvi-tools seurat COTAN
0 353.871309 254.540593 284.048471 347.979408 270.2767
'PBMC4 - davies_bouldin  from Prob. (lower is better)'
monocle scanpy scvi-tools seurat COTAN
0 2.775993 3.467425 2.808762 2.299231 3.452692
'PBMC4 - default labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.617025 0.470070 0.453383 0.965513 0.647279 0.425154 0.985455
scanpy 0.701228 0.380357 0.819943 0.612541 0.487560 0.777350 0.305802
scvi-tools 0.739299 0.504966 0.788229 0.696088 0.584900 0.745208 0.459077
seurat 0.760207 0.494746 0.847372 0.689301 0.583823 0.820228 0.415555
COTAN 0.726404 0.422436 0.881515 0.617712 0.528853 0.837081 0.334120
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.639609 0.527106 0.485153 0.938343 0.690625 0.486090 0.981225
scanpy 0.645222 0.369607 0.792297 0.544201 0.492852 0.824066 0.294762
scvi-tools 0.701912 0.483655 0.772259 0.643310 0.578775 0.767482 0.436467
seurat 0.693570 0.445739 0.804501 0.609523 0.551688 0.803926 0.378591
COTAN 0.644162 0.338121 0.813229 0.533293 0.457870 0.743146 0.282105

Matching cellTypist clusters number

print_scores(tuning = 'celltypist',dataset="PBMC1")
'PBMC1 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 18 17 20 21 18
'PBMC1 - Silhuette (higher is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 0.018958 0.099632 0.064412 0.073234 0.097248 0.119959
'PBMC1 - Calinski_Harabasz (higher is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 130.295857 189.243925 161.219024 164.798269 187.055701 181.752646
'PBMC1 - davies_bouldin (lower is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 2.822402 1.766278 2.822667 2.83544 2.107907 2.206314
'PBMC1 - Silhuette from Prob. (higher is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 -0.045945 0.425583 0.035198 -0.003457 0.038689 0.17801
'PBMC1 - Calinski_Harabasz from Prob. (higher is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 105.849177 258.209764 166.895151 148.432443 163.330165 155.720126
'PBMC1 - davies_bouldin  from Prob. (lower is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 5.911548 1.299157 4.052006 4.249784 3.73374 2.738509
'PBMC1 - matching celltypist labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.658735 0.341968 0.759474 0.581592 0.448922 0.715120 0.281815
scanpy 0.736780 0.460617 0.825488 0.665288 0.554390 0.798681 0.384820
scvi-tools 0.700899 0.375385 0.811930 0.616582 0.480324 0.750160 0.307549
seurat 0.730959 0.423158 0.851564 0.640279 0.527781 0.824168 0.337980
COTAN 0.760567 0.614586 0.823060 0.706893 0.680190 0.835363 0.553840
print_scores(tuning = 'celltypist',dataset="PBMC2")
'PBMC2 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 18 20 18 20 17
'PBMC2 - Silhuette (higher is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 -0.027381 0.142246 0.03787 0.039284 0.074705 0.129948
'PBMC2 - Calinski_Harabasz (higher is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 172.476071 369.182347 250.226682 275.332265 313.66872 297.21664
'PBMC2 - davies_bouldin (lower is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 3.456318 1.519571 2.755944 3.916041 2.073804 2.590932
'PBMC2 - Silhuette from Prob. (higher is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 0.016222 0.428361 0.111552 0.14981 0.180354 0.269737
'PBMC2 - Calinski_Harabasz from Prob. (higher is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 166.040634 405.143887 239.361751 274.501619 315.911839 295.576497
'PBMC2 - davies_bouldin  from Prob. (lower is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 3.973793 1.205375 3.43661 3.947089 2.428992 2.216364
'PBMC2 - matching celltypist labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.605793 0.310821 0.700581 0.533598 0.424516 0.695894 0.258968
scanpy 0.699382 0.378427 0.812919 0.613673 0.493978 0.814699 0.299515
scvi-tools 0.709756 0.399001 0.791766 0.643141 0.501208 0.730913 0.343692
seurat 0.737794 0.418471 0.850711 0.651340 0.528956 0.837754 0.333981
COTAN 0.731121 0.473101 0.747806 0.715164 0.563052 0.591871 0.535636
print_scores(tuning = 'celltypist',dataset="PBMC3")
'PBMC3 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 17 18 19 18 23
'PBMC3 - Silhuette (higher is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 -0.023054 0.150798 0.055521 0.018785 0.131058 0.057865
'PBMC3 - Calinski_Harabasz (higher is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 297.808015 700.217364 456.227491 496.1459 574.480162 400.155157
'PBMC3 - davies_bouldin (lower is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 3.985684 1.442211 2.995741 2.362272 2.426385 2.598886
'PBMC3 - Silhuette from Prob. (higher is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 -0.029642 0.377306 0.25164 0.174206 0.275693 0.088576
'PBMC3 - Calinski_Harabasz from Prob. (higher is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 262.360318 739.44889 472.946903 516.505211 586.79879 354.586581
'PBMC3 - davies_bouldin  from Prob. (lower is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 11.904188 1.372576 2.656352 2.572155 2.308715 2.895497
'PBMC3 - matching celltypist labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.593633 0.350472 0.645190 0.549707 0.432543 0.575952 0.324842
scanpy 0.712471 0.545890 0.759378 0.671021 0.609493 0.757648 0.490309
scvi-tools 0.734566 0.564923 0.767509 0.704334 0.623340 0.727403 0.534165
seurat 0.771725 0.587107 0.823703 0.725918 0.645977 0.791597 0.527144
COTAN 0.670438 0.459746 0.737597 0.614488 0.530538 0.653153 0.430941
print_scores(tuning = 'celltypist',dataset="PBMC4")
'PBMC4 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 16 18 18 19 15
'PBMC4 - Silhuette (higher is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 0.039137 0.094551 0.065347 0.129055 0.131231 0.099607
'PBMC4 - Calinski_Harabasz (higher is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 295.870856 361.837214 295.550588 369.578328 361.987263 327.83418
'PBMC4 - davies_bouldin (lower is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 2.558795 1.645136 2.486482 1.950227 2.225131 2.847014
'PBMC4 - Silhuette from Prob. (higher is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 0.046714 0.42606 0.093135 0.170808 0.186825 0.062989
'PBMC4 - Calinski_Harabasz from Prob. (higher is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 248.702625 497.518258 282.658631 374.867341 347.797347 303.69576
'PBMC4 - davies_bouldin  from Prob. (lower is better)'
monocle celltypist scanpy scvi-tools seurat COTAN
0 3.291098 1.09626 3.338571 2.073425 2.297124 2.483086
'PBMC4 - matching celltypist labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.686023 0.421456 0.747151 0.634141 0.512790 0.699270 0.376040
scanpy 0.730083 0.473671 0.809851 0.664619 0.562408 0.777976 0.406571
scvi-tools 0.752863 0.501079 0.830838 0.688268 0.587423 0.808372 0.426864
seurat 0.759689 0.493258 0.846652 0.688926 0.582261 0.817627 0.414649
COTAN 0.724775 0.450018 0.766000 0.687761 0.534712 0.666348 0.429080

Matching antibody clusters number

print_scores(tuning = 'antibody',dataset="PBMC1")
'PBMC1 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 9 11 10 11 13
'PBMC1 - Silhuette (higher is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 0.123097 0.069567 0.097602 0.094258 0.171754 0.157886
'PBMC1 - Calinski_Harabasz (higher is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 203.253687 131.570445 193.550388 189.978034 237.429051 201.498385
'PBMC1 - davies_bouldin (lower is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 2.027098 2.515129 1.886001 1.890236 1.677632 1.976209
'PBMC1 - Silhuette from Prob. (higher is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 0.101586 0.245087 0.297043 0.261342 0.218776 0.162724
'PBMC1 - Calinski_Harabasz from Prob. (higher is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 184.932102 137.566711 202.80892 206.264825 214.61793 186.472326
'PBMC1 - davies_bouldin  from Prob. (lower is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 2.395779 2.16692 1.863548 1.700282 1.947515 2.059359
'PBMC1 - matching antibody labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.724319 0.641765 0.727281 0.721381 0.707943 0.753325 0.665295
scanpy 0.746106 0.652841 0.792721 0.704669 0.717629 0.829289 0.621003
scvi-tools 0.757587 0.658079 0.782127 0.734540 0.721084 0.800236 0.649760
seurat 0.749425 0.642110 0.790860 0.712116 0.708375 0.813318 0.616972
COTAN 0.721299 0.633200 0.772738 0.676282 0.700712 0.798916 0.614579
print_scores(tuning = 'antibody',dataset="PBMC2")
'PBMC2 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 11 9 11 12 11
'PBMC2 - Silhuette (higher is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 -0.037233 0.053999 0.047735 -0.01373 0.107504 0.078974
'PBMC2 - Calinski_Harabasz (higher is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 183.966932 197.645911 256.298442 177.876897 291.531393 203.655796
'PBMC2 - davies_bouldin (lower is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 3.431006 2.999071 2.597576 5.123237 1.843028 3.072363
'PBMC2 - Silhuette from Prob. (higher is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 0.037684 0.242566 0.260903 0.074589 0.359083 0.234126
'PBMC2 - Calinski_Harabasz from Prob. (higher is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 187.161715 197.483785 259.897112 187.848746 297.046578 208.763542
'PBMC2 - davies_bouldin  from Prob. (lower is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 6.262231 2.570934 2.234808 4.149191 1.494746 2.238186
'PBMC2 - matching antibody labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.592156 0.458950 0.611387 0.574098 0.545023 0.612345 0.485102
scanpy 0.749075 0.650770 0.757603 0.740737 0.708423 0.779752 0.643619
scvi-tools 0.674668 0.578230 0.709325 0.643240 0.647236 0.750492 0.558185
seurat 0.762283 0.762523 0.779418 0.745886 0.802406 0.838975 0.767431
COTAN 0.738004 0.674191 0.688420 0.795286 0.744862 0.649628 0.854057
print_scores(tuning = 'antibody',dataset="PBMC3")
'PBMC3 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 12 14 13 14 12
'PBMC3 - Silhuette (higher is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 -0.040176 0.037871 0.034398 0.001717 0.076119 0.066886
'PBMC3 - Calinski_Harabasz (higher is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 303.562678 309.45087 332.440157 368.948628 434.276887 338.97586
'PBMC3 - davies_bouldin (lower is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 3.604809 3.04294 3.434343 3.282334 2.612535 3.274719
'PBMC3 - Silhuette from Prob. (higher is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 0.074904 0.205664 0.23307 0.17138 0.305558 0.220006
'PBMC3 - Calinski_Harabasz from Prob. (higher is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 298.489075 331.435612 382.997054 438.502899 489.196185 393.696152
'PBMC3 - davies_bouldin  from Prob. (lower is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 12.995929 2.780055 2.882473 4.362136 1.884472 2.454523
'PBMC3 - matching antibody labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.644484 0.537094 0.639574 0.649469 0.598577 0.600917 0.596245
scanpy 0.729603 0.683244 0.752370 0.708173 0.724410 0.784386 0.669020
scvi-tools 0.726492 0.670063 0.728625 0.724372 0.713239 0.729596 0.697249
seurat 0.764843 0.698339 0.799673 0.732920 0.738860 0.829783 0.657901
COTAN 0.691237 0.607331 0.643954 0.746015 0.676860 0.575699 0.795798
print_scores(tuning = 'antibody',dataset="PBMC4")
'PBMC4 - number of clusters'
monocle scanpy scvi-tools seurat COTAN
0 12 11 10 13 10
'PBMC4 - Silhuette (higher is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 0.002385 -0.036094 0.050455 0.05113 0.077562 0.045683
'PBMC4 - Calinski_Harabasz (higher is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 235.482647 197.341814 270.202541 343.073339 320.138454 229.318527
'PBMC4 - davies_bouldin (lower is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 3.083971 4.423945 2.822422 2.12666 2.473315 3.226592
'PBMC4 - Silhuette from Prob. (higher is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 0.049133 0.172283 0.157433 0.088787 0.040314 0.088272
'PBMC4 - Calinski_Harabasz from Prob. (higher is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 200.437686 236.062289 249.06549 280.961153 300.480476 200.115128
'PBMC4 - davies_bouldin  from Prob. (lower is better)'
monocle antibody scanpy scvi-tools seurat COTAN
0 3.494187 3.01011 2.595129 2.489428 2.124028 2.931601
'PBMC4 - matching antibody labels'
NMI ARI homogeneity completeness fowlkes_mallows precision recall
monocle 0.644898 0.463759 0.695399 0.601235 0.559620 0.723970 0.432579
scanpy 0.722095 0.587951 0.761954 0.686198 0.664451 0.799173 0.552440
scvi-tools 0.733942 0.592083 0.764983 0.705322 0.667996 0.803599 0.555275
seurat 0.723924 0.571960 0.786070 0.670884 0.652635 0.815198 0.522489
COTAN 0.677301 0.519220 0.678168 0.676436 0.606874 0.658843 0.559005

Check cellTypist vs Antibody

def compute_clustering_scores(output_dir, dataset):#celltypist_df, antibody_df,
    # Merge the dataframes on the common 'cell' column
    #cotan_df = pd.read_csv(f'{DIR}{dataset}/COTAN/antibody/clustering_labels.csv', index_col=0)
    #display("Cotan clusters objetc dimension ",cotan_df.shape)
    #display("----------------------------------------")

    celltypist_df = pd.read_csv(f'{DIR}{dataset}/celltypist/celltypist_labels.csv', index_col=0)
    celltypist_df.index = celltypist_df.index.str[:-2]
    antibody_df = pd.read_csv(f'{DIR}{dataset}/antibody_annotation/antibody_labels_postproc.csv', index_col=0)
    #antibody_df = labels_df.merge(antibody_df, how='inner', on='cell')
    #all_in_antibody = celltypist_df.index.isin(antibody_df.index).all()
    #all_in_celltypist = antibody_df.index.isin(celltypist_df.index).all()

    #display("All celltypist indices in antibody: ",all_in_antibody, celltypist_df.index.isin(antibody_df.index).sum(),celltypist_df.shape)
    #display("All antibody indices in cellTypist:", all_in_celltypist)

    #display("----------------------------------------")
    
    merged_df = celltypist_df.merge(antibody_df, how='inner',left_index=True, right_index=True)# on='cell')

    merged_df.columns = ['cluster_celltypist','cluster_antibody']
    
    # Initialize scores dictionary
    scores = {
        'NMI': normalized_mutual_info_score(merged_df['cluster_celltypist'], merged_df['cluster_antibody'], average_method='arithmetic'),
        'ARI': adjusted_rand_score(merged_df['cluster_celltypist'], merged_df['cluster_antibody']),
        'Homogeneity': homogeneity_score(merged_df['cluster_celltypist'], merged_df['cluster_antibody']),
        'Completeness': completeness_score(merged_df['cluster_celltypist'], merged_df['cluster_antibody']),
        'Fowlkes_Mallows': fowlkes_mallows_score(merged_df['cluster_celltypist'], merged_df['cluster_antibody'])
    }
    
    # Convert scores to DataFrame
    scores_df = pd.DataFrame([scores])
    
    # Save scores to CSV and LaTeX
    #scores_df.to_csv(f'{output_dir}{dataset}/clustering_comparison_scores.csv')
    #scores_df.to_latex(f'{output_dir}{dataset}/clustering_comparison_scores.tex')
    
    # Display scores DataFrame
    display(scores_df)
for dataset in DATASET_NAMES:
    #display('------------------------------')
    display(f'{dataset} - Clustering Comparison between CellTypist and Antibody')

    # Assuming celltypist_df and antibody_df are defined elsewhere and available here
    compute_clustering_scores(DIR, dataset)
'PBMC1 - Clustering Comparison between CellTypist and Antibody'
NMI ARI Homogeneity Completeness Fowlkes_Mallows
0 0.752326 0.731095 0.708308 0.802178 0.78126
'PBMC2 - Clustering Comparison between CellTypist and Antibody'
NMI ARI Homogeneity Completeness Fowlkes_Mallows
0 0.659259 0.481537 0.667725 0.651004 0.585734
'PBMC3 - Clustering Comparison between CellTypist and Antibody'
NMI ARI Homogeneity Completeness Fowlkes_Mallows
0 0.693433 0.555502 0.693429 0.693436 0.618105
'PBMC4 - Clustering Comparison between CellTypist and Antibody'
NMI ARI Homogeneity Completeness Fowlkes_Mallows
0 0.751294 0.7252 0.728817 0.775201 0.776972

Summary

External measures

def load_scores(tuning, dataset):
    scores = pd.read_csv(f'{DIR}{dataset}/scores_{tuning}.csv')
    scores = scores.rename(columns={"Unnamed: 0": "tool"})
    scores['tuning'] = tuning
    return scores
datasets = ['PBMC1', 'PBMC2', 'PBMC3', 'PBMC4']
tunings = ['default_celltypist', 'default_antibody', 'celltypist_celltypist', 'antibody_antibody']

scores_list = []

# Concatenate all scores into one DataFrame
for dataset in datasets:
    for tuning in tunings:
        scores = load_scores(tuning, dataset)
        scores['dataset'] = dataset
        scores_list.append(scores)

all_scores = pd.concat(scores_list)

# Prepare data for plotting
all_scores_melted = all_scores.melt(id_vars=['tool', 'tuning', 'dataset'], var_name='score', value_name='value')

sns.set_context("talk") 
# Define custom colors
custom_palette = { 
    "seurat": "#4575B4",
    "monocle": "#DAABE9",
    "scanpy": "#7F9B5C",
    "COTAN": "#F73604",
    "scvi-tools": "#B6A18F"
}

g = sns.FacetGrid(all_scores_melted, row='score', col='tuning', sharey=False, height=4, aspect=1.3)
g.map(sns.pointplot, 'tool', 'value', palette=custom_palette,capsize=0.2, errwidth=2)

# Set titles and labels
g.set_titles(col_template="{col_name}", row_template="{row_name}")
g.set_axis_labels("Tool", "Score Value")
plt.subplots_adjust(top=1.4)
#g.fig.suptitle('Comparison of Clustering Tools by Various Scores and Conditions')
# Rotate x-axis labels
for ax in g.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)

g.savefig(f"{DIR}ClusteringToolsComparison{min_size_cluster}.pdf")
plt.show()

Internal measures

# Load your data (assuming you have CSV files for the scores)
def load_scores(tuning, dataset, score_type):
    file_path = f'{DIR}{dataset}/{tuning}_{score_type}.csv'
    print(f"Loading {file_path}")
    scores = pd.read_csv(file_path, header=0)  # Read the CSV file without an index column
    scores_melted = scores.melt(var_name='tool', value_name='value')
    scores_melted['tuning'] = tuning
    scores_melted['dataset'] = dataset
    scores_melted['score_type'] = score_type
    return scores_melted

datasets = ['PBMC1', 'PBMC2', 'PBMC3', 'PBMC4']
tunings = ['default', 'celltypist', 'antibody']
score_types = ['silhouette', 'davies_bouldin','Calinski_Harabasz','silhouette_fromProb', 'davies_bouldin_fromProb','Calinski_Harabasz_fromProb']
scores_list = []

# Concatenate all scores into one DataFrame
for dataset in datasets:
    for tuning in tunings:
        for score_type in score_types:
            scores = load_scores(tuning, dataset, score_type)
            scores_list.append(scores)

all_scores = pd.concat(scores_list)

# Debug: Check the loaded data
print(all_scores.head())

# Define custom colors
custom_palette = {
    "seurat": "#4575B4",
    "monocle": "#DAABE9",
    "scanpy": "#7F9B5C",
    "COTAN": "#F73604",
    "scvi-tools": "#B6A18F"
}

# Filter for silhouette and davies_bouldin scores
silhouette_scores = all_scores[all_scores['score_type'] == 'silhouette']
davies_bouldin_scores = all_scores[all_scores['score_type'] == 'davies_bouldin']
Calinski_Harabasz_scores = all_scores[all_scores['score_type'] == 'Calinski_Harabasz']
silhouette_scores_fromProb = all_scores[all_scores['score_type'] == 'silhouette_fromProb']
davies_bouldin_scores_fromProb = all_scores[all_scores['score_type'] == 'davies_bouldin_fromProb']
Calinski_Harabasz_scores_fromProb = all_scores[all_scores['score_type'] == 'Calinski_Harabasz_fromProb']

# Plot Silhouette scores
g1 = sns.FacetGrid(silhouette_scores, col='tuning', sharey=False, height=4, aspect=1.8)
g1.map(sns.pointplot, 'tool', 'value', palette=custom_palette, order=[ "monocle", "scanpy", "scvi-tools","seurat","COTAN"],capsize=0.2, errwidth=2)
g1.set_titles(col_template="{col_name}")
g1.set_axis_labels("Tool", "Silhouette Score")
g1.fig.suptitle('Silhouette Scores by Tool and Tuning Condition', y=1.25)
plt.subplots_adjust(top=0.85)
# Rotate x-axis labels
for ax in g1.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)

# Plot Davies-Bouldin scores
g2 = sns.FacetGrid(davies_bouldin_scores, col='tuning', sharey=False, height=4, aspect=1.8)
g2.map(sns.pointplot, 'tool', 'value', palette=custom_palette, order=["monocle", "scanpy",  "scvi-tools","seurat","COTAN"],capsize=0.2, errwidth=2)
g2.set_titles(col_template="{col_name}")
g2.set_axis_labels("Tool", "Davies-Bouldin Score")
g2.fig.suptitle('Davies-Bouldin Scores by Tool and Tuning Condition', y=1.85)
plt.subplots_adjust(top=1.5)
# Rotate x-axis labels
for ax in g2.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)

# Plot Calinski_Harabasz scores
g3 = sns.FacetGrid(Calinski_Harabasz_scores, col='tuning', sharey=False, height=4, aspect=1.8)
g3.map(sns.pointplot, 'tool', 'value', palette=custom_palette, order=["monocle", "scanpy",  "scvi-tools","seurat","COTAN"],capsize=0.2, errwidth=2)
g3.set_titles(col_template="{col_name}")
g3.set_axis_labels("Tool", "Calinski_Harabasz Score")
g3.fig.suptitle('Calinski Harabasz Scores by Tool and Tuning Condition', y=1.85)
plt.subplots_adjust(top=1.5)
# Rotate x-axis labels
for ax in g3.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)


# Plot Silhouette scores
g4 = sns.FacetGrid(silhouette_scores_fromProb, col='tuning', sharey=False, height=4, aspect=1.8)
g4.map(sns.pointplot, 'tool', 'value', palette=custom_palette, order=[ "monocle", "scanpy", "scvi-tools","seurat","COTAN"],capsize=0.2, errwidth=2)
g4.set_titles(col_template="{col_name}")
g4.set_axis_labels("Tool", "Silhouette Score")
g4.fig.suptitle('Silhouette Scores From Prob. by Tool and Tuning Condition', y=1.25)
plt.subplots_adjust(top=0.85)
# Rotate x-axis labels
for ax in g4.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)

# Plot Davies-Bouldin scores
g5 = sns.FacetGrid(davies_bouldin_scores_fromProb, col='tuning', sharey=False, height=4, aspect=1.8)
g5.map(sns.pointplot, 'tool', 'value', palette=custom_palette, order=["monocle", "scanpy",  "scvi-tools","seurat","COTAN"],capsize=0.2, errwidth=2)
g5.set_titles(col_template="{col_name}")
g5.set_axis_labels("Tool", "Davies-Bouldin Score")
g5.fig.suptitle('Davies-Bouldin Scores From Prob. by Tool and Tuning Condition', y=1.85)
plt.subplots_adjust(top=1.5)
# Rotate x-axis labels
for ax in g5.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)

# Plot Calinski_Harabasz scores
g6 = sns.FacetGrid(Calinski_Harabasz_scores_fromProb, col='tuning', sharey=False, height=4, aspect=1.8)
g6.map(sns.pointplot, 'tool', 'value', palette=custom_palette, order=["monocle", "scanpy",  "scvi-tools","seurat","COTAN"],capsize=0.2, errwidth=2)
g6.set_titles(col_template="{col_name}")
g6.set_axis_labels("Tool", "Calinski_Harabasz Score")
g6.fig.suptitle('Calinski Harabasz Scores From Prob. by Tool and Tuning Condition', y=1.85)
plt.subplots_adjust(top=1.5)
# Rotate x-axis labels
for ax in g6.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)


g1.savefig(f"{DIR}Silhouette{min_size_cluster}.pdf")
g2.savefig(f"{DIR}Calinski_Harabasz{min_size_cluster}.pdf")
g3.savefig(f"{DIR}Davies_Bouldin{min_size_cluster}.pdf")

g4.savefig(f"{DIR}SilhouetteFromProb{min_size_cluster}.pdf")
g5.savefig(f"{DIR}Calinski_HarabaszFromProb{min_size_cluster}.pdf")
g6.savefig(f"{DIR}Davies_BouldinFromProb{min_size_cluster}.pdf")

plt.show()
Loading Data/PBMC1/default_silhouette.csv
Loading Data/PBMC1/default_davies_bouldin.csv
Loading Data/PBMC1/default_Calinski_Harabasz.csv
Loading Data/PBMC1/default_silhouette_fromProb.csv
Loading Data/PBMC1/default_davies_bouldin_fromProb.csv
Loading Data/PBMC1/default_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC1/celltypist_silhouette.csv
Loading Data/PBMC1/celltypist_davies_bouldin.csv
Loading Data/PBMC1/celltypist_Calinski_Harabasz.csv
Loading Data/PBMC1/celltypist_silhouette_fromProb.csv
Loading Data/PBMC1/celltypist_davies_bouldin_fromProb.csv
Loading Data/PBMC1/celltypist_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC1/antibody_silhouette.csv
Loading Data/PBMC1/antibody_davies_bouldin.csv
Loading Data/PBMC1/antibody_Calinski_Harabasz.csv
Loading Data/PBMC1/antibody_silhouette_fromProb.csv
Loading Data/PBMC1/antibody_davies_bouldin_fromProb.csv
Loading Data/PBMC1/antibody_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC2/default_silhouette.csv
Loading Data/PBMC2/default_davies_bouldin.csv
Loading Data/PBMC2/default_Calinski_Harabasz.csv
Loading Data/PBMC2/default_silhouette_fromProb.csv
Loading Data/PBMC2/default_davies_bouldin_fromProb.csv
Loading Data/PBMC2/default_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC2/celltypist_silhouette.csv
Loading Data/PBMC2/celltypist_davies_bouldin.csv
Loading Data/PBMC2/celltypist_Calinski_Harabasz.csv
Loading Data/PBMC2/celltypist_silhouette_fromProb.csv
Loading Data/PBMC2/celltypist_davies_bouldin_fromProb.csv
Loading Data/PBMC2/celltypist_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC2/antibody_silhouette.csv
Loading Data/PBMC2/antibody_davies_bouldin.csv
Loading Data/PBMC2/antibody_Calinski_Harabasz.csv
Loading Data/PBMC2/antibody_silhouette_fromProb.csv
Loading Data/PBMC2/antibody_davies_bouldin_fromProb.csv
Loading Data/PBMC2/antibody_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC3/default_silhouette.csv
Loading Data/PBMC3/default_davies_bouldin.csv
Loading Data/PBMC3/default_Calinski_Harabasz.csv
Loading Data/PBMC3/default_silhouette_fromProb.csv
Loading Data/PBMC3/default_davies_bouldin_fromProb.csv
Loading Data/PBMC3/default_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC3/celltypist_silhouette.csv
Loading Data/PBMC3/celltypist_davies_bouldin.csv
Loading Data/PBMC3/celltypist_Calinski_Harabasz.csv
Loading Data/PBMC3/celltypist_silhouette_fromProb.csv
Loading Data/PBMC3/celltypist_davies_bouldin_fromProb.csv
Loading Data/PBMC3/celltypist_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC3/antibody_silhouette.csv
Loading Data/PBMC3/antibody_davies_bouldin.csv
Loading Data/PBMC3/antibody_Calinski_Harabasz.csv
Loading Data/PBMC3/antibody_silhouette_fromProb.csv
Loading Data/PBMC3/antibody_davies_bouldin_fromProb.csv
Loading Data/PBMC3/antibody_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC4/default_silhouette.csv
Loading Data/PBMC4/default_davies_bouldin.csv
Loading Data/PBMC4/default_Calinski_Harabasz.csv
Loading Data/PBMC4/default_silhouette_fromProb.csv
Loading Data/PBMC4/default_davies_bouldin_fromProb.csv
Loading Data/PBMC4/default_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC4/celltypist_silhouette.csv
Loading Data/PBMC4/celltypist_davies_bouldin.csv
Loading Data/PBMC4/celltypist_Calinski_Harabasz.csv
Loading Data/PBMC4/celltypist_silhouette_fromProb.csv
Loading Data/PBMC4/celltypist_davies_bouldin_fromProb.csv
Loading Data/PBMC4/celltypist_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC4/antibody_silhouette.csv
Loading Data/PBMC4/antibody_davies_bouldin.csv
Loading Data/PBMC4/antibody_Calinski_Harabasz.csv
Loading Data/PBMC4/antibody_silhouette_fromProb.csv
Loading Data/PBMC4/antibody_davies_bouldin_fromProb.csv
Loading Data/PBMC4/antibody_Calinski_Harabasz_fromProb.csv
         tool     value   tuning dataset  score_type
0  Unnamed: 0  0.000000  default   PBMC1  silhouette
1     monocle  0.106025  default   PBMC1  silhouette
2      scanpy  0.062012  default   PBMC1  silhouette
3  scvi-tools  0.087622  default   PBMC1  silhouette
4      seurat  0.168956  default   PBMC1  silhouette