Clustering Comparision

Preamble

import pandas as pd
import numpy as np
import scanpy as sc
from sklearn.metrics.cluster import normalized_mutual_info_score, adjusted_rand_score
from sklearn.metrics import homogeneity_score, completeness_score, fowlkes_mallows_score, silhouette_score, davies_bouldin_score, calinski_harabasz_score
from sklearn.metrics.cluster import contingency_matrix, pair_confusion_matrix
from src.utils import sankey_plot
from sklearn.decomposition import PCA
import kaleido
from sklearn.preprocessing import StandardScaler
import plotly.io as pio
import matplotlib.pyplot as plt
import seaborn as sns

DIR = 'Data/'
DATASET_NAMES = ['PBMC1', 'PBMC2', 'PBMC3','PBMC4']
TOOLS = ['monocle', 'scanpy', 'scvi-tools', 'seurat', 'COTAN']
PARAMS_TUNING = ['default', 'celltypist', 'antibody']
min_size_cluster = 10

labels_df = pd.read_csv(f'{DIR}PBMC3/COTAN/default/clustering_labels.csv', index_col=0,usecols=["cell","cluster"])
labels_df.rename(columns={"cluster": "cluster_COTAN"}, inplace=True)
print(labels_df.shape)
#print(labels_df.shape)
for tool in [t for t in TOOLS if t != 'COTAN']:
    tool_labels_df = pd.read_csv(f'{DIR}PBMC3/{tool}/default/clustering_labels.csv', index_col=0)
    labels_df = labels_df.merge(tool_labels_df, how='inner', on='cell')
    labels_df.rename(columns={"cluster": f"cluster_{tool}"}, inplace=True)
#    print("labels_df size"+tool)
#    print(labels_df.shape)

(10944, 1)

labels_df['cluster_COTAN'].value_counts()

cluster_COTAN
3     1689
9      688
20     609
4      433
8      432
5      390
10     362
27     331
47     324
13     305
52     302
19     282
46     260
32     253
43     242
45     228
54     219
23     190
2      189
12     177
35     166
39     162
42     159
17     151
55     149
14     146
18     145
6      143
28     132
53     130
7      120
40     111
51     109
16     100
50      99
44      98
36      79
30      78
41      76
11      74
37      67
22      58
57      52
1       49
56      40
48      39
34      37
31      35
38      30
49      29
15      29
25      28
26      27
29      27
33      24
21      21
24      20
Name: count, dtype: int64

labels_df.shape

(3405, 5)

labels_df = drop_small_clusters(df = labels_df,min_size = min_size_cluster)

'cluster_COTAN - cluster_COTAN\n12    809\n16    462\n7     392\n18    272\n8     254\n4     177\n10    152\n11    145\n3     122\n19    117\n20    106\n5      97\n13     94\n6      77\n2      75\n21     55\n15     42\n17     35\n1      35\n14     28\n9      27\n23     22\n22     15\nName: count, dtype: int64'

"Index([15, 17, 1, 14, 9, 23, 22], dtype='int64', name='cluster_COTAN')"

'cluster_monocle - cluster_monocle\n1    1913\n2    1195\n3     298\nName: count, dtype: int64'

"Index([], dtype='int64', name='cluster_monocle')"

'cluster_scanpy - cluster_scanpy\n3     323\n1     315\n2     309\n4     286\n7     268\n6     261\n8     236\n5     233\n9     200\n10    182\n11    158\n12    144\n13    141\n14    105\n15     91\n16     77\n17     76\n18      1\nName: count, dtype: int64'

"Index([18], dtype='int64', name='cluster_scanpy')"

'cluster_scvi-tools - cluster_scvi-tools\n1     635\n2     467\n3     421\n4     390\n5     364\n6     283\n7     277\n8     149\n9     146\n10    101\n11     92\n12     80\nName: count, dtype: int64'

"Index([], dtype='int64', name='cluster_scvi-tools')"

'cluster_seurat - cluster_seurat\n1     879\n3     581\n2     504\n4     408\n5     273\n6     260\n7     153\n8     144\n9     129\n10     74\nName: count, dtype: int64'

"Index([], dtype='int64', name='cluster_seurat')"

# load and concat celltypist labels
celltypist_df = pd.read_csv(f'{DIR}{dataset}/celltypist/celltypist_labels.csv', index_col=0)
celltypist_df.index = celltypist_df.index.str[:-2]
celltypist_df = labels_df.merge(celltypist_df, how='inner', on='cell')
celltypist_df.rename(columns={"cluster.ids": f"cluster_celltypist"}, inplace=True)
celltypist_mapping_df = pd.read_csv(f'{DIR}{dataset}/celltypist/celltypist_mapping.csv', index_col=0)
#print("celltypist_df size")
#print(celltypist_df.shape)

celltypist_df = drop_small_clusters(df = celltypist_df, min_size = min_size_cluster)

# load and concat protein surface labels
antibody_df = pd.read_csv(f'{DIR}{dataset}/antibody_annotation/antibody_labels_postproc.csv', index_col=0)
antibody_df = labels_df.merge(antibody_df, how='inner', on='cell')
antibody_df.rename(columns={"cluster.ids": f"cluster_antibody"}, inplace=True)
antibody_mapping_df = pd.read_csv(f'{DIR}{dataset}/antibody_annotation/antibody_mapping.csv', index_col=1, encoding='latin1')
    #print("antibody_df size")
    #print(antibody_df.shape)

antibody_df = drop_small_clusters(df = antibody_df, min_size = min_size_cluster )

def drop_small_clusters(df, min_size):
    for col in df.columns:
        # Count the number of occurrences of each cluster
        cluster_counts = df[col].value_counts()
        #display(f'{col} - {cluster_counts}')
        # Find clusters that are smaller than the minimum size
        small_clusters = cluster_counts[cluster_counts < min_size].index
        #display(f'{small_clusters}')
        # Drop rows corresponding to these clusters
        df = df[~df[col].isin(small_clusters)]
    return df


def compute_scores(dir, dataset, labels_df, labels_matched, ground_truth_labels):
    scores = {}
    scores['NMI'] = {}
    scores['ARI'] = {}
    scores['homogeneity'] = {}
    scores['completeness'] = {}
    scores['fowlkes_mallows'] = {}
    scores['precision'] = {}
    scores['recall'] = {}
    
    for tool in TOOLS:
        scores['NMI'][tool] = normalized_mutual_info_score(labels_pred=labels_df['cluster_'+tool], labels_true=labels_df[f'cluster_{ground_truth_labels}'], average_method='arithmetic')
        scores['ARI'][tool] = adjusted_rand_score(labels_pred=labels_df['cluster_'+tool], labels_true=labels_df[f'cluster_{ground_truth_labels}'])
        scores['homogeneity'][tool] = homogeneity_score(labels_pred=labels_df['cluster_'+tool], labels_true=labels_df[f'cluster_{ground_truth_labels}'])
        scores['completeness'][tool] = completeness_score(labels_pred=labels_df['cluster_'+tool], labels_true=labels_df[f'cluster_{ground_truth_labels}'])
        scores['fowlkes_mallows'][tool] = fowlkes_mallows_score(labels_pred=labels_df['cluster_'+tool], labels_true=labels_df[f'cluster_{ground_truth_labels}'])
        sc = pair_confusion_matrix(labels_pred=labels_df['cluster_'+tool], labels_true=labels_df[f'cluster_{ground_truth_labels}'])
        TP = sc[1,1]
        FP = sc[0,1]
        FN = sc[1,0]
        P_score = TP/(TP+FP)
        scores['precision'][tool] = P_score
        scores['recall'][tool] = TP/(TP+FN)
        
        
        
        
    scores_df = pd.DataFrame(scores)
    scores_df.to_csv(f'{dir}{dataset}/scores_{labels_matched}_{ground_truth_labels}.csv')
    scores_df.to_latex(f'{dir}{dataset}/scores_{labels_matched}_{ground_truth_labels}.tex')
    display(scores_df)


def print_scores(dataset,tuning):
    
    
    # concat tools labels
    labels_df = pd.read_csv(f'{DIR}{dataset}/COTAN/{tuning}/clustering_labels.csv', index_col=0)
    labels_df.rename(columns={"cluster": "cluster_COTAN"}, inplace=True)
    #print("labels_df size")
    #print(labels_df.shape)
    for tool in [t for t in TOOLS if t != 'COTAN']:
        tool_labels_df = pd.read_csv(f'{DIR}{dataset}/{tool}/{tuning}/clustering_labels.csv', index_col=0)
        labels_df = labels_df.merge(tool_labels_df, how='inner', on='cell')
        labels_df.rename(columns={"cluster": f"cluster_{tool}"}, inplace=True)
    #    print("labels_df size"+tool)
    #    print(labels_df.shape)

    labels_df = drop_small_clusters(df = labels_df,min_size = min_size_cluster)
    
    # load and concat celltypist labels
    celltypist_df = pd.read_csv(f'{DIR}{dataset}/celltypist/celltypist_labels.csv', index_col=0)
    celltypist_df.index = celltypist_df.index.str[:-2]
    celltypist_df = labels_df.merge(celltypist_df, how='inner', on='cell')
    celltypist_df.rename(columns={"cluster.ids": f"cluster_celltypist"}, inplace=True)
    celltypist_mapping_df = pd.read_csv(f'{DIR}{dataset}/celltypist/celltypist_mapping.csv', index_col=0)
    #print("celltypist_df size")
    #print(celltypist_df.shape)

    celltypist_df = drop_small_clusters(df = celltypist_df, min_size = min_size_cluster)
    
    # load and concat protein surface labels
    antibody_df = pd.read_csv(f'{DIR}{dataset}/antibody_annotation/antibody_labels_postproc.csv', index_col=0)
    antibody_df = labels_df.merge(antibody_df, how='inner', on='cell')
    antibody_df.rename(columns={"cluster.ids": f"cluster_antibody"}, inplace=True)
    antibody_mapping_df = pd.read_csv(f'{DIR}{dataset}/antibody_annotation/antibody_mapping.csv', index_col=1, encoding='latin1')
    #print("antibody_df size")
    #print(antibody_df.shape)

    antibody_df = drop_small_clusters(df = antibody_df, min_size = min_size_cluster )

    # read dataset
    adata = sc.read_10x_mtx(
        f'{DIR}{dataset}/filtered/10X/',
        var_names='gene_symbols',
        cache=False
    )
    # keep only labelled cells
    adata.var_names_make_unique()
    if tuning=='celltypist':
        subset_cells = adata.obs_names.isin(celltypist_df.index)
        adata = adata[subset_cells, :]
    elif tuning=='antibody':
        subset_cells = adata.obs_names.isin(antibody_df.index)
        adata = adata[subset_cells, :]
    else:
        subset_cells = adata.obs_names.isin(labels_df.index)
        adata = adata[subset_cells, :]

    mito_genes = adata.var_names.str.startswith('MT-')
    # for each cell compute fraction of counts in mito genes vs. all genes
    # the `.A1` is only necessary as X is sparse (to transform to a dense array after summing)
    adata.obs['percent_mito'] = np.sum(adata[:, mito_genes].X, axis=1).A1 / np.sum(adata.X, axis=1).A1
    # add the total counts per cell as observations-annotation to adata
    adata.obs['n_counts'] = adata.X.sum(axis=1).A1

    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata, min_mean=0.00125, max_mean=3, min_disp=0.5)
    adata.raw = adata
    adata = adata[:, adata.var.highly_variable]
    #sc.pp.regress_out(adata, ['n_counts', 'percent_mito'])
    sc.pp.scale(adata, max_value=10)
    sc.tl.pca(adata, svd_solver='arpack',n_comps=20)
    pca_matrix = adata.obsm['X_pca']
    scaler = StandardScaler()
    scaled_pca_matrix = scaler.fit_transform(pca_matrix)

    #Custers number
    
    df = {}
    for tool in TOOLS:
        df[tool] = labels_df[f'cluster_{tool}'].unique().shape[0]
    df_size = pd.DataFrame(df, index=[0])
    display(f'{dataset} - number of clusters')
    display(df_size)

    # compute silhouette, Calinski_Harabasz and davies_bouldin scores with scaled PCA    
    silhouette = {}
    Calinski_Harabasz = {}
    davies_bouldin = {}
    for tool in TOOLS:
        
        if tuning=='celltypist':
            # Convert scaled_pca_matrix to DataFrame to filter by index
            #scaled_pca_matrix_df = pd.DataFrame(scaled_pca_matrix, index=adata.obs_names)
            # Filter PCA matrix based on celltypist_df index
            #scaled_pca_matrix_filtered = scaled_pca_matrix_df.loc[celltypist_df.index]
            # Convert back to numpy array for compatibility with metrics
            #scaled_pca_matrix = scaled_pca_matrix_filtered.values
            silhouette[tool] = silhouette_score(scaled_pca_matrix, celltypist_df[f'cluster_{tool}'])
            Calinski_Harabasz[tool] =  calinski_harabasz_score(scaled_pca_matrix, celltypist_df[f'cluster_{tool}'])
            davies_bouldin[tool] = davies_bouldin_score(scaled_pca_matrix, celltypist_df[f'cluster_{tool}'])
            
            silhouette['celltypist'] = silhouette_score(scaled_pca_matrix, celltypist_df[f'cluster_celltypist'])
            Calinski_Harabasz['celltypist'] = calinski_harabasz_score(scaled_pca_matrix, celltypist_df[f'cluster_celltypist'])
            davies_bouldin['celltypist'] = davies_bouldin_score(scaled_pca_matrix, celltypist_df[f'cluster_celltypist'])
        elif tuning=='antibody':
            # Repeat similar steps for antibody_df
            #scaled_pca_matrix_df = pd.DataFrame(scaled_pca_matrix, index=adata.obs_names)
            #scaled_pca_matrix_filtered = scaled_pca_matrix_df.loc[antibody_df.index]
            #scaled_pca_matrix = scaled_pca_matrix_filtered.values
            silhouette[tool] = silhouette_score(scaled_pca_matrix, antibody_df[f'cluster_{tool}'])
            Calinski_Harabasz[tool] =  calinski_harabasz_score(scaled_pca_matrix, antibody_df[f'cluster_{tool}'])
            davies_bouldin[tool] = davies_bouldin_score(scaled_pca_matrix, antibody_df[f'cluster_{tool}'])
            
            silhouette['antibody'] = silhouette_score(scaled_pca_matrix, antibody_df[f'cluster_antibody'])
            Calinski_Harabasz['antibody'] = calinski_harabasz_score(scaled_pca_matrix, antibody_df[f'cluster_antibody'])
            davies_bouldin['antibody'] = davies_bouldin_score(scaled_pca_matrix, antibody_df[f'cluster_antibody'])

        else:
            silhouette[tool] = silhouette_score(scaled_pca_matrix, labels_df[f'cluster_{tool}'])
            Calinski_Harabasz[tool] =  calinski_harabasz_score(scaled_pca_matrix, labels_df[f'cluster_{tool}'])
            davies_bouldin[tool] = davies_bouldin_score(scaled_pca_matrix, labels_df[f'cluster_{tool}'])
        
        
    silhouette_df = pd.DataFrame(silhouette, index=[0])
    silhouette_df.to_csv(f'{DIR}{dataset}/{tuning}_silhouette.csv')
    silhouette_df.to_latex(f'{DIR}{dataset}/{tuning}_silhouette.tex')
    display(f'{dataset} - Silhuette (higher is better)')
    display(silhouette_df)

    Calinski_Harabasz_df = pd.DataFrame(Calinski_Harabasz, index=[0])
    Calinski_Harabasz_df.to_csv(f'{DIR}{dataset}/{tuning}_Calinski_Harabasz.csv')
    Calinski_Harabasz_df.to_latex(f'{DIR}{dataset}/{tuning}_Calinski_Harabasz.tex')
    display(f'{dataset} - Calinski_Harabasz (higher is better)')
    display(Calinski_Harabasz_df)

    davies_bouldin_df = pd.DataFrame(davies_bouldin, index=[0])
    davies_bouldin_df.to_csv(f'{DIR}{dataset}/{tuning}_davies_bouldin.csv')
    davies_bouldin_df.to_latex(f'{DIR}{dataset}/{tuning}_davies_bouldin.tex')
    display(f'{dataset} - davies_bouldin (lower is better)')
    display(davies_bouldin_df)

    # compute silhouette, Calinski_Harabasz and davies_bouldin scores with cellTypist probability
    celltypist_prob_df = pd.read_csv(f'{DIR}{dataset}/celltypist/Immune_All_Low_probability_matrix.csv', index_col=0)
    #labels_df = pd.read_csv(f'{DIR}{dataset}/COTAN/{tuning}/clustering_labels.csv', index_col=0)
    celltypist_prob_df.index = celltypist_prob_df.index.str[:-2]
    #subset_cells = celltypist_prob_df.index.isin(labels_df.index)
    #celltypist_prob_df = celltypist_prob_df[subset_cells]

    if tuning=='celltypist':
        subset_cells = celltypist_prob_df.index.isin(celltypist_df.index)
        celltypist_prob_df = celltypist_prob_df[subset_cells]
    elif tuning=='antibody':
        subset_cells = celltypist_prob_df.index.isin(antibody_df.index)
        celltypist_prob_df = celltypist_prob_df[subset_cells]
    else:
        subset_cells = celltypist_prob_df.index.isin(labels_df.index)
        celltypist_prob_df = celltypist_prob_df[subset_cells]
    
    
    pca = PCA(n_components=20,svd_solver='arpack')
    pca_data = pca.fit_transform(celltypist_prob_df)
    df_prob = pd.DataFrame(pca_data)
    df_prob.index = celltypist_prob_df.index
    scaler = StandardScaler()
    scaled_pca_data = pd.DataFrame(scaler.fit_transform(df_prob))
    scaled_pca_data.index = celltypist_prob_df.index
    
    silhouette = {}
    Calinski_Harabasz = {}
    davies_bouldin = {}
    for tool in TOOLS:
        
        if tuning=='celltypist':
            # Convert scaled_pca_matrix to DataFrame to filter by index
            #scaled_pca_matrix_df = pd.DataFrame(scaled_pca_matrix, index=adata.obs_names)
            # Filter PCA matrix based on celltypist_df index
            #scaled_pca_matrix_filtered = scaled_pca_matrix_df.loc[celltypist_df.index]
            # Convert back to numpy array for compatibility with metrics
            #scaled_pca_matrix = scaled_pca_matrix_filtered.values
            silhouette[tool] = silhouette_score(scaled_pca_data, celltypist_df[f'cluster_{tool}'])
            Calinski_Harabasz[tool] =  calinski_harabasz_score(scaled_pca_data, celltypist_df[f'cluster_{tool}'])
            davies_bouldin[tool] = davies_bouldin_score(scaled_pca_data, celltypist_df[f'cluster_{tool}'])
            
            silhouette['celltypist'] = silhouette_score(scaled_pca_data, celltypist_df[f'cluster_celltypist'])
            Calinski_Harabasz['celltypist'] = calinski_harabasz_score(scaled_pca_data, celltypist_df[f'cluster_celltypist'])
            davies_bouldin['celltypist'] = davies_bouldin_score(scaled_pca_data, celltypist_df[f'cluster_celltypist'])
        elif tuning=='antibody':
            # Repeat similar steps for antibody_df
            #scaled_pca_matrix_df = pd.DataFrame(scaled_pca_matrix, index=adata.obs_names)
            #scaled_pca_matrix_filtered = scaled_pca_matrix_df.loc[antibody_df.index]
            #scaled_pca_matrix = scaled_pca_matrix_filtered.values
            silhouette[tool] = silhouette_score(scaled_pca_data, antibody_df[f'cluster_{tool}'])
            Calinski_Harabasz[tool] =  calinski_harabasz_score(scaled_pca_data, antibody_df[f'cluster_{tool}'])
            davies_bouldin[tool] = davies_bouldin_score(scaled_pca_data, antibody_df[f'cluster_{tool}'])
            
            silhouette['antibody'] = silhouette_score(scaled_pca_data, antibody_df[f'cluster_antibody'])
            Calinski_Harabasz['antibody'] = calinski_harabasz_score(scaled_pca_data, antibody_df[f'cluster_antibody'])
            davies_bouldin['antibody'] = davies_bouldin_score(scaled_pca_data, antibody_df[f'cluster_antibody'])

        else:
            silhouette[tool] = silhouette_score(scaled_pca_data, labels_df[f'cluster_{tool}'])
            Calinski_Harabasz[tool] =  calinski_harabasz_score(scaled_pca_data, labels_df[f'cluster_{tool}'])
            davies_bouldin[tool] = davies_bouldin_score(scaled_pca_data, labels_df[f'cluster_{tool}'])
        
    silhouette_df = pd.DataFrame(silhouette, index=[0])
    silhouette_df.to_csv(f'{DIR}{dataset}/{tuning}_silhouette_fromProb.csv')
    silhouette_df.to_latex(f'{DIR}{dataset}/{tuning}_silhouette_fromProb.tex')
    display(f'{dataset} - Silhuette from Prob. (higher is better)')
    display(silhouette_df)

    Calinski_Harabasz_df = pd.DataFrame(Calinski_Harabasz, index=[0])
    Calinski_Harabasz_df.to_csv(f'{DIR}{dataset}/{tuning}_Calinski_Harabasz_fromProb.csv')
    Calinski_Harabasz_df.to_latex(f'{DIR}{dataset}/{tuning}_Calinski_Harabasz_fromProb.tex')
    display(f'{dataset} - Calinski_Harabasz from Prob. (higher is better)')
    display(Calinski_Harabasz_df)

    davies_bouldin_df = pd.DataFrame(davies_bouldin, index=[0])
    davies_bouldin_df.to_csv(f'{DIR}{dataset}/{tuning}_davies_bouldin_fromProb.csv')
    davies_bouldin_df.to_latex(f'{DIR}{dataset}/{tuning}_davies_bouldin_fromProb.tex')
    display(f'{dataset} - davies_bouldin  from Prob. (lower is better)')
    display(davies_bouldin_df)

    display(f'{dataset} - matching {tuning} labels' if tuning != 'default' else f'{dataset} - default labels')

    # compute scores comparing each tool labels with celltypist labels
    if tuning == 'celltypist' or tuning == 'default':
        compute_scores(DIR, dataset, celltypist_df, tuning, 'celltypist')
        labels = []
        labels_titles = []
        for tool in TOOLS:
            labels.append(celltypist_df[f'cluster_{tool}'].to_list())
            labels_titles.append(tool)
        labels.append(celltypist_df[f'cluster_celltypist'].map(celltypist_mapping_df['go'].to_dict()).to_list())
        labels_titles.append('celltypist')
        title = f'{dataset} - matching {tuning} labels' if tuning != 'default' else f'{dataset} - default labels'
        sankey_plot(labels=labels, labels_titles=labels_titles, title=title, path=f'{DIR}{dataset}/{tuning}_celltypist.html')
    
    # compute scores comparing each tool labels with protein labels
    if tuning == 'antibody' or tuning == 'default':
        compute_scores(DIR, dataset, antibody_df, tuning, 'antibody')
        labels = []
        labels_titles = []
        for tool in TOOLS:
            labels.append(antibody_df[f'cluster_{tool}'].to_list())
            labels_titles.append(tool)
        labels.append(antibody_df[f'cluster_antibody'].map(antibody_mapping_df['go'].to_dict()).to_list())
        labels_titles.append('antibody')
        title = f'{dataset} - matching {tuning} labels' if tuning != 'default' else f'{dataset} - default labels'
        sankey_plot(labels=labels, labels_titles=labels_titles, title=title, path=f'{DIR}{dataset}/{tuning}_antibody.html')

def print_clustering_data(dataset,tuning):
    # concat tools labels
    labels_df = pd.read_csv(f'{DIR}{dataset}/COTAN/{tuning}/clustering_labels.csv', index_col=0)
    labels_df.rename(columns={"cluster": "cluster_COTAN"}, inplace=True)
    display(f'Initial COTAN cluster number:')
    display(labels_df.cluster_COTAN.unique().shape[0])
    #print("labels_df size")
    #print(labels_df.shape)
    for tool in [t for t in TOOLS if t != 'COTAN']:
        tool_labels_df = pd.read_csv(f'{DIR}{dataset}/{tool}/{tuning}/clustering_labels.csv', index_col=0)
        display(f'Initial {tool} cluster number:')
        display(labels_df[labels_df.columns[-1]].unique().shape[0])
        labels_df = labels_df.merge(tool_labels_df, how='inner', on='cell')
        labels_df.rename(columns={"cluster": f"cluster_{tool}"}, inplace=True)

    
    labels_df = drop_small_clusters(df = labels_df,min_size = min_size_cluster)
    #    print("labels_df size"+tool)
    #    print(labels_df.shape)
    
    if tuning == 'celltypist' or tuning == 'default':
    # load and concat celltypist labels
        celltypist_df = pd.read_csv(f'{DIR}{dataset}/celltypist/celltypist_labels.csv', index_col=0)
        celltypist_df.index = celltypist_df.index.str[:-2]
        celltypist_df = labels_df.merge(celltypist_df, how='inner', on='cell')
        celltypist_df.rename(columns={"cluster.ids": f"cluster_celltypist"}, inplace=True)
        celltypist_mapping_df = pd.read_csv(f'{DIR}{dataset}/celltypist/celltypist_mapping.csv', index_col=0)

        celltypist_df = drop_small_clusters(df = celltypist_df, min_size = min_size_cluster)
        
        #print("celltypist_df size")
        #print(celltypist_df.shape)
        labels_cluster_celltypist = np.unique(celltypist_df["cluster_celltypist"])
        for tool in TOOLS:
            labels_cluster_tool = np.unique(celltypist_df[f'cluster_{tool}'])
            cm =contingency_matrix(celltypist_df["cluster_celltypist"], celltypist_df[f'cluster_{tool}'])
            cm = pd.DataFrame(cm,index=labels_cluster_celltypist,columns=labels_cluster_tool)
            display(f'{dataset} - contingency_matrix (rows: cellTypist - cols: {tool})')
            display(cm)

    if tuning == 'antibody' or tuning == 'default':
        #load and concat protein surface labels
        antibody_df = pd.read_csv(f'{DIR}{dataset}/antibody_annotation/antibody_labels_postproc.csv', index_col=0)
        display("Initial antibody cell/cluster table:")
        display(antibody_df["cluster.ids"].value_counts())
        antibody_df = labels_df.merge(antibody_df, how='inner', on='cell')
        antibody_df.rename(columns={"cluster.ids": f"cluster_antibody"}, inplace=True)
        
        antibody_df = drop_small_clusters(df = antibody_df, min_size = min_size_cluster )
        
        antibody_mapping_df = pd.read_csv(f'{DIR}{dataset}/antibody_annotation/antibody_mapping.csv', index_col=1, encoding='latin1')
        labels_cluster_antybody = np.unique(antibody_df["cluster_antibody"])
        for tool in TOOLS:
            labels_cluster_tool = np.unique(antibody_df[f'cluster_{tool}'])
            cm =contingency_matrix(antibody_df["cluster_antibody"], antibody_df[f'cluster_{tool}'])
            cm = pd.DataFrame(cm,index=labels_cluster_antybody,columns=labels_cluster_tool)
            display(f'{dataset} - contingency_matrix (rows: antibody - cols: {tool})')
            display(cm)

Data summary information

Default parameters

print_clustering_data(tuning = 'default',dataset="PBMC1")

'Initial COTAN cluster number:'

'Initial monocle cluster number:'

'Initial scanpy cluster number:'

'Initial scvi-tools cluster number:'

'Initial seurat cluster number:'

'PBMC1 - contingency_matrix (rows: cellTypist - cols: monocle)'

	1	2	3
1	8	970	1
2	943	0	0
3	47	0	0
4	0	78	0
5	309	0	0
6	0	0	142
7	82	0	0
8	278	0	1
9	81	0	0
10	0	171	0
11	70	0	0
12	240	0	0
13	0	28	0
14	0	0	155

'PBMC1 - contingency_matrix (rows: cellTypist - cols: scanpy)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18
1	0	0	246	0	267	0	263	0	200	0	1	0	2	0	0	0	0	0
2	88	321	0	281	1	0	0	241	0	1	0	1	0	1	0	8	0	0
3	45	0	0	0	0	0	0	2	0	0	0	0	0	0	0	0	0	0
4	0	0	2	0	0	0	0	0	0	0	0	0	0	0	0	0	76	0
5	250	5	0	0	0	0	0	2	0	0	0	52	0	0	0	0	0	0
6	0	0	0	0	0	0	0	0	0	0	4	0	138	0	0	0	0	0
7	8	0	0	0	0	0	0	0	0	19	0	37	0	18	0	0	0	0
8	0	0	0	0	0	263	0	0	0	16	0	0	0	0	0	0	0	0
9	2	2	0	5	0	0	0	5	0	0	0	0	0	1	0	66	0	0
10	0	0	75	0	0	0	5	0	0	0	0	0	0	0	91	0	0	0
11	0	0	0	0	0	5	0	0	0	65	0	0	0	0	0	0	0	0
12	6	0	0	0	0	0	0	4	0	82	0	59	0	86	0	3	0	0
13	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	28
14	0	0	0	0	0	0	0	0	0	0	154	0	1	0	0	0	0	0

'PBMC1 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'

	1	2	3	4	5	6	7	8	9	10	11	12	13
1	659	0	0	0	289	0	1	0	0	30	0	0	0
2	0	485	48	402	0	2	0	0	0	0	0	6	0
3	0	1	41	5	0	0	0	0	0	0	0	0	0
4	0	0	0	0	6	0	0	0	0	72	0	0	0
5	0	5	288	12	0	4	0	0	0	0	0	0	0
6	0	0	0	0	0	0	1	2	139	0	0	0	0
7	0	0	58	0	0	23	1	0	0	0	0	0	0
8	0	0	0	0	0	0	279	0	0	0	0	0	0
9	0	4	0	2	0	2	0	0	0	0	0	73	0
10	1	0	0	0	78	0	0	0	0	0	92	0	0
11	0	0	0	0	0	67	3	0	0	0	0	0	0
12	0	1	48	1	0	189	0	0	0	0	0	1	0
13	0	0	0	0	0	0	0	0	0	0	0	0	28
14	0	0	0	0	0	1	0	147	7	0	0	0	0

'PBMC1 - contingency_matrix (rows: cellTypist - cols: seurat)'

	1	2	3	4	5	6	7	8	9	10	11
1	0	0	616	361	1	0	0	0	1	0	0
2	798	145	0	0	0	0	0	0	0	0	0
3	1	46	0	0	0	0	0	0	0	0	0
4	0	0	0	4	0	0	0	0	0	74	0
5	0	309	0	0	0	0	0	0	0	0	0
6	0	0	0	0	0	0	0	142	0	0	0
7	0	55	0	0	0	27	0	0	0	0	0
8	0	0	0	0	274	5	0	0	0	0	0
9	78	3	0	0	0	0	0	0	0	0	0
10	0	0	0	43	0	0	0	0	128	0	0
11	0	0	0	0	5	65	0	0	0	0	0
12	7	69	0	0	0	164	0	0	0	0	0
13	0	0	0	0	0	0	0	0	0	1	27
14	0	0	0	0	0	0	153	2	0	0	0

'PBMC1 - contingency_matrix (rows: cellTypist - cols: COTAN)'

	1	2	3	4	5	6	7	8	9	10	...	14	15	16	17	18	19	20	21	22	23
1	35	0	0	175	97	75	389	207	0	0	...	0	0	0	0	0	1	0	0	0	0
2	0	0	0	0	0	0	0	0	0	0	...	20	24	56	29	0	0	0	0	3	0
3	0	0	0	0	0	0	0	0	0	0	...	0	1	9	1	0	0	0	6	7	21
4	0	73	0	0	0	0	0	5	0	0	...	0	0	0	0	0	0	0	0	0	0
5	0	0	0	0	0	0	0	0	0	0	...	7	8	290	3	0	0	0	0	0	1
6	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
7	0	0	0	0	0	0	0	0	0	0	...	0	0	56	0	25	1	0	0	0	0
8	0	0	0	0	0	0	0	0	0	0	...	0	3	1	0	4	115	104	47	5	0
9	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	1	0	0	0	0	0
10	0	1	122	0	0	2	3	42	0	0	...	0	0	0	0	0	0	0	0	0	0
11	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	67	0	2	1	0	0
12	0	0	0	0	0	0	0	0	0	0	...	0	5	50	1	175	0	0	1	0	0
13	0	1	0	0	0	0	0	0	27	0	...	0	0	0	0	0	0	0	0	0	0
14	0	0	0	0	0	0	0	0	0	152	...	0	0	0	0	0	0	0	0	0	0

14 rows × 23 columns

'Initial antibody cell/cluster table:'

cluster.ids
7     1338
8      876
3      748
4      341
9      331
5      211
1      202
6      131
2       62
10      51
12      16
Name: count, dtype: int64

'PBMC1 - contingency_matrix (rows: antibody - cols: monocle)'

	1	2	3
1	161	0	0
2	0	43	3
3	600	0	0
4	262	1	0
5	158	0	0
6	1	86	0
7	10	1115	0
8	812	1	1
9	1	0	294
10	44	0	0
12	10	0	0

'PBMC1 - contingency_matrix (rows: antibody - cols: scanpy)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18
1	7	6	0	4	0	0	0	9	0	23	0	7	0	32	0	73	0	0
2	0	0	3	0	0	0	0	0	0	0	2	0	3	0	38	0	0	0
3	366	33	0	5	0	0	0	28	0	43	0	122	0	3	0	0	0	0
4	0	0	0	0	1	249	0	0	0	13	0	0	0	0	0	0	0	0
5	9	0	0	0	0	0	0	2	0	64	0	15	0	68	0	0	0	0
6	1	3	0	0	0	0	0	0	0	0	0	0	0	0	1	0	56	26
7	1	0	319	0	267	0	267	1	198	0	0	0	0	0	52	0	19	1
8	18	288	0	277	0	1	0	214	0	3	1	5	0	2	0	4	0	1
9	0	0	0	0	0	1	0	0	0	1	156	0	137	0	0	0	0	0
10	0	0	0	0	0	7	0	0	0	36	0	0	0	1	0	0	0	0
12	0	0	0	0	0	10	0	0	0	0	0	0	0	0	0	0	0	0

'PBMC1 - contingency_matrix (rows: antibody - cols: scvi-tools)'

	1	2	3	4	5	6	7	8	9	10	11	12	13
1	0	6	12	5	0	62	0	0	0	0	0	76	0
2	2	0	0	0	3	0	0	1	1	0	39	0	0
3	0	49	441	66	0	44	0	0	0	0	0	0	0
4	1	0	0	0	0	0	262	0	0	0	0	0	0
5	0	2	12	1	0	141	0	0	0	0	0	2	0
6	0	1	0	0	0	3	0	0	0	56	1	0	26
7	655	3	0	0	368	0	1	0	0	45	52	0	1
8	0	438	17	349	0	4	3	0	0	0	0	2	1
9	0	0	0	0	0	0	3	148	144	0	0	0	0
10	0	0	1	0	0	37	6	0	0	0	0	0	0
12	0	0	0	0	0	0	10	0	0	0	0	0	0

'PBMC1 - contingency_matrix (rows: antibody - cols: seurat)'

	1	2	3	4	5	6	7	8	9	10	11
1	93	17	0	0	0	51	0	0	0	0	0
2	0	0	1	4	0	0	1	1	39	0	0
3	23	540	0	0	0	37	0	0	0	0	0
4	0	0	1	0	260	2	0	0	0	0	0
5	1	28	0	0	0	129	0	0	0	0	0
6	3	1	0	1	0	0	0	0	1	56	25
7	2	1	611	402	1	0	0	0	89	18	1
8	766	41	0	0	1	4	1	0	0	0	1
9	0	0	0	0	2	0	151	142	0	0	0
10	0	0	0	0	8	36	0	0	0	0	0
12	0	0	0	0	8	2	0	0	0	0	0

'PBMC1 - contingency_matrix (rows: antibody - cols: COTAN)'

	1	2	3	4	5	6	7	8	9	10	...	14	15	16	17	18	19	20	21	22	23
1	0	0	0	0	0	0	0	0	0	0	...	0	2	10	0	54	0	0	1	0	0
2	1	1	38	0	0	1	1	2	0	1	...	0	0	0	0	0	0	0	0	0	0
3	0	0	0	0	0	0	0	0	0	0	...	26	23	417	25	39	0	0	6	10	22
4	1	0	0	0	0	0	0	0	0	0	...	0	0	0	0	1	113	99	45	4	0
5	0	0	0	0	0	0	0	0	0	0	...	0	1	20	0	135	0	0	0	0	0
6	0	56	1	1	0	0	0	1	25	0	...	1	1	0	1	0	0	0	0	0	0
7	33	17	83	175	96	76	389	251	1	0	...	0	1	0	0	0	1	0	0	0	0
8	0	0	0	0	0	0	0	0	1	1	...	1	12	14	9	4	0	0	0	0	0
9	0	0	0	0	0	0	0	0	0	150	...	0	0	0	0	0	1	0	1	0	0
10	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	38	0	3	2	1	0
12	0	0	0	0	0	0	0	0	0	0	...	0	2	1	0	1	2	4	0	0	0

11 rows × 23 columns

print_clustering_data(tuning = 'default',dataset="PBMC2")

'Initial COTAN cluster number:'

'Initial monocle cluster number:'

'Initial scanpy cluster number:'

'Initial scvi-tools cluster number:'

'Initial seurat cluster number:'

'PBMC2 - contingency_matrix (rows: cellTypist - cols: monocle)'

	1	2
1	230	1
2	427	0
3	2139	3
4	700	7
5	316	0
6	0	93
7	0	567
8	674	0
9	0	186
10	52	0
11	0	228
12	0	204
13	0	48
14	0	14
15	80	0

'PBMC2 - contingency_matrix (rows: cellTypist - cols: scanpy)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18
1	0	0	0	2	0	0	1	22	1	2	5	50	0	148	0	0	0	0
2	0	91	0	273	0	0	0	3	2	0	56	1	0	1	0	0	0	0
3	942	508	21	183	0	0	0	21	295	2	100	42	0	26	1	0	0	1
4	0	0	0	1	0	463	8	2	0	230	0	0	0	2	1	0	0	0
5	0	0	0	2	0	0	0	266	0	0	42	0	0	6	0	0	0	0
6	0	0	0	0	5	0	0	0	0	0	0	0	0	0	0	88	0	0
7	0	0	0	0	466	0	0	0	0	0	0	0	15	0	86	0	0	0
8	2	1	558	0	0	0	0	3	0	0	0	110	0	0	0	0	0	0
9	0	0	0	0	12	0	0	0	0	0	0	0	174	0	0	0	0	0
10	0	0	0	0	0	0	0	0	0	50	2	0	0	0	0	0	0	0
11	0	0	0	0	0	0	228	0	0	0	0	0	0	0	0	0	0	0
12	0	0	0	0	0	0	204	0	0	0	0	0	0	0	0	0	0	0
13	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	48	0
14	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	13
15	0	25	0	46	0	0	0	1	2	1	2	2	0	1	0	0	0	0

'PBMC2 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20
1	0	0	0	0	0	0	222	1	2	0	0	4	1	1	0	0	0	0	0	0
2	331	4	1	0	0	0	7	10	53	0	0	1	0	10	4	0	6	0	0	0
3	391	733	2	1	19	1	41	304	158	185	0	2	43	90	75	0	71	0	0	26
4	1	0	675	1	0	8	3	0	0	0	0	0	0	0	0	0	0	19	0	0
5	7	0	1	0	0	0	11	9	66	0	0	151	71	0	0	0	0	0	0	0
6	0	0	0	12	0	0	0	0	0	0	0	0	0	0	0	78	0	0	3	0
7	0	0	0	564	0	0	0	0	0	0	3	0	0	0	0	0	0	0	0	0
8	0	2	0	1	561	0	103	1	0	2	0	1	3	0	0	0	0	0	0	0
9	0	0	0	11	0	0	0	0	0	0	175	0	0	0	0	0	0	0	0	0
10	2	0	4	0	0	0	0	0	0	0	0	0	2	0	0	0	0	44	0	0
11	0	0	0	0	0	228	0	0	0	0	0	0	0	0	0	0	0	0	0	0
12	0	0	0	0	0	204	0	0	0	0	0	0	0	0	0	0	0	0	0	0
13	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	48	0
14	0	0	0	0	0	0	0	0	0	14	0	0	0	0	0	0	0	0	0	0
15	58	0	1	0	0	0	5	5	1	5	0	0	1	3	0	0	1	0	0	0

'PBMC2 - contingency_matrix (rows: cellTypist - cols: seurat)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14
1	0	4	6	0	0	0	219	0	1	0	0	1	0	0
2	0	400	23	0	0	0	1	0	3	0	0	0	0	0
3	1151	352	550	0	2	15	69	1	0	0	0	2	0	0
4	0	0	2	635	0	0	3	7	0	0	1	59	0	0
5	0	42	0	0	0	0	7	0	267	0	0	0	0	0
6	0	0	0	0	11	0	0	0	0	0	0	0	82	0
7	0	0	0	0	567	0	0	0	0	0	0	0	0	0
8	8	4	2	0	0	541	119	0	0	0	0	0	0	0
9	0	0	0	0	14	0	0	0	0	172	0	0	0	0
10	0	0	0	3	0	0	0	0	0	0	0	49	0	0
11	0	0	0	0	0	0	0	83	0	0	145	0	0	0
12	0	0	0	0	0	0	0	201	1	0	2	0	0	0
13	0	0	0	0	0	0	0	0	0	0	0	0	0	48
14	0	0	0	0	0	0	0	0	0	0	0	14	0	0
15	0	0	77	0	0	0	2	0	0	0	0	1	0	0

'PBMC2 - contingency_matrix (rows: cellTypist - cols: COTAN)'

	1	2	3	4	5	6	7	8	9	10	...	22	23	24	25	26	27	28	29	30	31
1	0	0	0	0	0	0	1	25	54	3	...	0	0	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	1	0	2	...	0	0	0	0	0	0	0	0	0	0
3	2	0	0	0	1172	14	56	7	10	3	...	0	0	0	0	0	0	0	0	0	1
4	56	141	383	115	0	0	0	0	0	0	...	0	0	0	0	0	2	1	0	2	2
5	0	0	0	0	0	0	1	1	4	270	...	0	0	0	0	0	0	0	0	0	0
6	0	0	0	0	0	0	0	0	0	0	...	0	0	7	0	0	0	0	0	0	0
7	0	0	0	0	0	0	0	0	1	0	...	36	111	211	150	0	0	0	0	0	0
8	0	0	0	0	6	562	2	78	17	0	...	0	0	0	0	0	0	0	0	0	0
9	0	0	0	0	0	0	0	0	0	0	...	0	1	10	0	0	0	0	0	0	1
10	51	0	0	1	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
11	0	0	0	0	0	0	0	1	0	0	...	0	0	0	0	0	1	0	141	72	13
12	0	0	0	0	0	0	0	0	0	1	...	0	0	0	0	0	8	47	2	50	96
13	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	48	0	0	0	0	0
14	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	10	0	0	0	3
15	1	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0

15 rows × 31 columns

'Initial antibody cell/cluster table:'

cluster.ids
4     1510
11    1130
8      695
12     570
6      424
13     275
5      197
2      150
10     122
3       84
7       76
Name: count, dtype: int64

'PBMC2 - contingency_matrix (rows: antibody - cols: monocle)'

	1	2
2	0	145
3	60	19
4	1480	5
5	196	0
6	416	1
7	68	5
8	680	3
10	0	115
11	1115	7
12	566	2
13	0	262

'PBMC2 - contingency_matrix (rows: antibody - cols: scanpy)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	16
2	0	0	0	0	4	0	0	0	0	0	0	0	141	0	0
3	0	1	1	0	0	3	0	0	0	54	0	0	18	2	0
4	89	588	0	478	0	0	0	56	63	0	194	10	0	3	4
5	7	1	14	2	0	0	0	118	2	2	4	23	0	23	0
6	0	2	9	13	0	0	1	128	1	0	3	124	0	136	0
7	1	3	27	3	0	0	0	6	1	0	1	23	0	3	5
8	0	0	0	0	0	459	2	0	0	220	0	0	2	0	0
10	0	0	0	0	106	0	0	0	0	0	0	0	3	0	6
11	843	26	5	5	0	0	5	4	229	0	1	2	0	0	2
12	2	0	522	0	0	0	2	1	1	2	0	21	0	17	0
13	0	0	0	0	0	0	262	0	0	0	0	0	0	0	0

'PBMC2 - contingency_matrix (rows: antibody - cols: scvi-tools)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	20
2	0	0	0	3	0	0	0	0	0	0	142	0	0	0	0	0	0	0	0
3	0	0	55	0	1	0	0	1	0	0	18	0	0	0	0	0	0	4	0
4	758	143	0	0	0	0	30	125	259	51	0	5	4	54	26	4	23	0	3
5	4	8	1	0	5	0	27	4	6	0	0	36	105	0	0	0	0	0	0
6	4	0	0	0	17	0	270	2	9	0	0	112	3	0	0	0	0	0	0
7	0	1	0	1	31	0	27	2	1	1	0	5	0	0	0	4	0	0	0
8	0	0	622	0	0	2	0	0	0	0	2	0	1	0	0	0	0	56	0
10	0	0	0	107	0	0	0	0	0	0	4	0	0	0	0	4	0	0	0
11	14	581	0	0	5	5	2	191	5	138	0	0	1	50	51	2	54	0	23
12	0	2	1	0	520	2	34	1	0	1	0	0	6	0	0	0	1	0	0
13	0	0	0	0	0	262	0	0	0	0	0	0	0	0	0	0	0	0	0

'PBMC2 - contingency_matrix (rows: antibody - cols: seurat)'

	1	2	3	4	5	6	7	8	9	10	11	12	13
2	0	0	0	0	6	0	0	0	0	139	0	0	0
3	0	0	1	9	1	1	1	0	0	18	0	48	0
4	65	756	607	0	0	0	10	0	42	0	0	1	4
5	6	6	2	0	0	12	65	0	104	0	0	1	0
6	0	22	3	0	0	7	267	0	118	0	0	0	0
7	2	7	5	0	0	21	30	0	3	0	0	0	5
8	0	0	0	627	0	0	0	2	0	2	0	52	0
10	0	0	0	0	109	0	0	0	0	3	0	0	3
11	1073	4	35	0	0	1	1	4	1	0	1	0	2
12	9	0	0	0	0	512	44	1	0	0	1	1	0
13	0	0	0	0	0	0	0	159	1	0	102	0	0

'PBMC2 - contingency_matrix (rows: antibody - cols: COTAN)'

	1	2	3	4	5	6	7	8	9	10	...	19	20	21	23	24	25	28	29	30	31
2	0	0	0	0	0	0	0	0	0	0	...	4	136	0	1	4	0	0	0	0	0
3	49	0	4	4	0	1	0	0	0	0	...	17	1	0	0	0	0	0	0	0	0
4	0	0	0	0	76	0	1	5	6	44	...	0	0	4	0	0	0	0	0	0	0
5	0	0	0	0	9	10	37	3	4	105	...	0	0	0	0	0	0	0	0	0	0
6	0	0	0	0	0	11	11	81	64	120	...	0	0	0	0	0	0	0	0	0	0
7	0	0	0	0	1	31	0	14	1	4	...	0	0	5	0	0	0	0	0	0	0
8	50	140	378	112	0	0	0	0	0	0	...	2	0	0	0	0	0	1	0	0	0
10	0	0	0	0	0	0	0	0	0	0	...	0	3	5	26	59	22	0	0	0	0
11	0	0	0	0	1081	1	0	0	0	1	...	0	0	2	0	0	0	1	1	2	1
12	1	0	0	0	6	520	11	9	9	1	...	0	0	0	0	0	0	0	0	1	0
13	0	0	0	0	0	0	0	0	0	1	...	0	0	0	0	0	0	31	102	59	69

11 rows × 26 columns

print_clustering_data(tuning = 'default',dataset="PBMC3")

'Initial COTAN cluster number:'

'Initial monocle cluster number:'

'Initial scanpy cluster number:'

'Initial scvi-tools cluster number:'

'Initial seurat cluster number:'

'PBMC3 - contingency_matrix (rows: cellTypist - cols: monocle)'

	1	2	3
1	3021	0	0
2	1	1471	0
3	6	1	655
4	1100	0	0
5	1183	26	33
6	0	156	0
7	1112	1	0
8	484	0	0
9	0	0	396
10	0	408	0
11	430	0	0
13	233	0	0
14	111	0	0
15	4	16	0
16	0	11	0
18	0	57	0
19	0	12	0

'PBMC3 - contingency_matrix (rows: cellTypist - cols: scanpy)'

	1	2	3	4	5	6	7	8	9	10	...	13	14	15	16	17	18	19	20	21	22
1	1401	0	153	0	0	29	26	534	121	429	...	1	0	100	0	0	0	0	0	28	0
2	0	0	0	0	816	0	0	0	0	0	...	0	2	0	227	0	5	0	35	0	0
3	0	0	0	543	0	0	0	0	5	0	...	0	0	0	0	111	0	0	2	0	0
4	26	0	0	0	0	806	12	5	29	6	...	0	0	155	0	0	0	0	0	1	0
5	0	961	0	0	0	0	3	0	0	0	...	216	0	0	29	33	0	0	0	0	0
6	0	0	0	0	8	0	0	0	0	0	...	0	0	0	0	0	147	0	0	0	0
7	0	0	683	0	0	0	7	128	236	15	...	0	0	6	1	0	0	0	0	0	0
8	0	0	0	0	0	0	423	0	32	0	...	4	0	6	0	0	0	0	0	0	0
9	0	0	0	395	0	0	0	0	0	0	...	0	0	0	0	1	0	0	0	0	0
10	0	0	0	0	90	0	0	0	0	0	...	0	311	0	1	0	1	0	1	0	0
11	0	2	0	0	0	0	281	2	54	0	...	4	0	42	0	0	0	0	0	0	0
13	12	0	116	0	0	0	1	52	9	11	...	0	0	1	0	0	0	0	0	0	0
14	0	5	1	0	0	0	5	0	3	0	...	97	0	0	0	0	0	0	0	0	0
15	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	20
16	0	0	0	0	0	0	0	0	0	0	...	0	0	0	11	0	0	0	0	0	0
18	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	57	0	0	0
19	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	12	0	0	0	0	0

17 rows × 22 columns

'PBMC3 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17
1	740	1603	2	1	1	40	86	23	248	1	197	79	0	0	0	0	0
2	0	0	1460	0	1	0	0	0	0	3	0	0	8	0	0	0	0
3	0	0	4	2	655	0	1	0	0	0	0	0	0	0	0	0	0
4	6	11	0	1	1	912	162	2	1	0	4	0	0	0	0	0	0
5	0	0	29	1165	31	0	7	1	0	0	0	0	0	8	0	1	0
6	0	0	10	0	0	0	0	0	0	0	0	0	146	0	0	0	0
7	883	14	1	0	0	1	25	29	80	0	70	10	0	0	0	0	0
8	24	0	0	6	0	1	5	313	0	0	0	135	0	0	0	0	0
9	0	0	0	0	396	0	0	0	0	0	0	0	0	0	0	0	0
10	0	0	97	0	0	0	0	0	0	311	0	0	0	0	0	0	0
11	10	0	0	2	0	2	408	2	0	0	1	4	0	1	0	0	0
13	174	26	0	0	0	0	3	0	25	0	5	0	0	0	0	0	0
14	0	0	1	19	0	0	1	0	0	0	0	0	0	90	0	0	0
15	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	20	0
16	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	11
18	0	0	1	0	1	0	0	0	0	0	0	0	0	0	55	0	0
19	0	0	0	0	0	12	0	0	0	0	0	0	0	0	0	0	0

'PBMC3 - contingency_matrix (rows: cellTypist - cols: seurat)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18
1	1740	415	0	0	681	20	162	0	3	0	0	0	0	0	0	0	0	0
2	0	0	1013	0	0	0	0	0	0	0	0	227	0	229	3	0	0	0
3	0	0	0	0	0	0	2	535	0	2	0	1	2	0	0	120	0	0
4	16	6	0	0	3	886	188	0	0	0	0	0	0	0	0	1	0	0
5	0	1	4	1043	0	0	2	1	1	0	0	24	136	0	0	30	0	0
6	0	0	7	0	0	0	0	0	0	0	0	0	0	0	149	0	0	0
7	0	980	0	0	108	0	18	0	7	0	0	0	0	0	0	0	0	0
8	0	18	0	3	0	0	8	0	454	0	0	0	1	0	0	0	0	0
9	0	0	0	0	0	0	0	57	0	336	0	0	0	0	0	3	0	0
10	0	0	84	0	0	0	0	0	0	0	319	1	0	4	0	0	0	0
11	0	14	0	0	0	2	411	0	3	0	0	0	0	0	0	0	0	0
13	13	5	0	0	208	0	7	0	0	0	0	0	0	0	0	0	0	0
14	0	0	0	2	0	0	0	0	0	0	0	0	109	0	0	0	0	0
15	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	20
16	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	11
18	0	0	0	0	0	0	0	0	0	0	0	0	0	0	5	0	52	0
19	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	12

'PBMC3 - contingency_matrix (rows: cellTypist - cols: COTAN)'

	1	2	3	4	5	6	7	8	9	10	...	48	49	50	51	52	53	54	55	56	57
1	8	131	1674	5	9	2	47	40	87	291	...	0	0	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	0	0	0	...	0	0	0	1	0	0	0	0	0	0
3	0	1	0	0	0	0	0	1	1	0	...	32	25	94	108	268	104	2	7	15	0
4	40	52	10	428	380	0	0	2	7	5	...	0	0	0	0	0	0	0	0	0	0
5	0	0	0	0	0	0	0	1	0	0	...	0	3	2	0	0	25	0	0	0	0
6	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
7	0	0	0	0	0	4	4	384	571	53	...	0	0	0	0	0	0	0	0	0	0
8	0	0	0	0	0	0	0	0	16	1	...	0	0	0	0	0	0	0	0	0	0
9	0	0	0	0	0	0	0	0	0	0	...	1	0	0	0	34	1	217	142	1	0
10	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
11	1	3	0	0	1	0	0	0	4	11	...	0	0	0	0	0	0	0	0	0	0
13	0	2	5	0	0	137	69	4	1	1	...	0	0	0	0	0	0	0	0	0	0
14	0	0	0	0	0	0	0	0	1	0	...	0	0	0	0	0	0	0	0	0	0
15	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	16	0
16	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
18	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	52
19	0	0	0	0	0	0	0	0	0	0	...	1	1	0	0	0	0	0	0	8	0

17 rows × 57 columns

'Initial antibody cell/cluster table:'

cluster.ids
9     2220
10    1635
7     1271
13    1067
5     1010
12     909
6      744
2      271
4      214
14     168
3      149
23     133
22      71
Name: count, dtype: int64

'PBMC3 - contingency_matrix (rows: antibody - cols: monocle)'

	1	2	3
2	2	264	0
3	132	12	3
4	209	2	1
5	993	11	1
6	730	0	0
7	4	1223	0
9	2201	9	1
10	1614	8	3
12	902	2	1
13	2	41	1002
14	140	20	1
22	69	0	0
23	130	0	0

'PBMC3 - contingency_matrix (rows: antibody - cols: scanpy)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	20	21
2	0	1	0	0	17	0	0	0	0	0	0	0	1	223	0	24	0	0	0	0
3	0	14	2	0	0	1	5	0	3	0	0	1	106	12	0	0	3	0	0	0
4	4	0	5	0	0	50	25	10	28	15	0	13	1	0	57	0	1	2	0	1
5	0	911	0	0	0	0	2	0	0	0	0	0	80	10	0	1	1	0	0	0
6	1	0	8	0	0	9	422	11	75	1	0	48	3	0	151	1	0	0	0	0
7	0	0	1	0	735	0	0	1	0	0	275	0	0	20	0	167	0	28	0	0
9	92	0	808	1	0	1	26	593	323	196	0	136	0	3	19	1	0	5	0	7
10	1252	2	4	0	0	15	3	18	4	205	0	82	3	1	5	3	4	4	0	20
12	25	1	0	0	1	737	32	0	8	2	0	52	5	0	40	1	1	0	0	0
13	0	0	0	873	1	0	1	0	1	0	1	1	0	6	0	1	128	2	30	0
14	0	17	0	0	0	0	3	0	2	1	0	0	116	20	0	1	1	0	0	0
22	10	0	9	0	0	0	1	18	6	19	0	4	0	0	1	0	0	0	0	1
23	0	0	90	0	0	0	0	18	1	2	0	19	0	0	0	0	0	0	0	0

'PBMC3 - contingency_matrix (rows: antibody - cols: scvi-tools)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14
2	0	0	41	2	0	0	0	0	0	223	0	0	0	0
3	3	0	1	48	2	0	6	1	0	12	0	0	0	74
4	18	8	1	0	2	76	80	18	0	0	7	0	2	0
5	0	0	1	979	1	0	0	0	0	10	0	0	0	14
6	14	1	1	1	0	23	457	221	0	0	6	3	0	3
7	2	0	1181	0	0	0	0	0	0	18	0	0	26	0
9	1524	275	1	0	2	1	26	47	171	3	134	22	5	0
10	52	1281	1	6	4	13	4	2	149	2	101	4	6	0
12	0	5	2	4	1	816	64	5	2	0	1	5	0	0
13	0	0	31	1	994	0	1	1	8	7	0	0	2	0
14	0	0	1	129	1	0	0	3	0	20	0	0	0	7
22	39	14	0	0	0	0	0	4	4	0	6	2	0	0
23	111	2	0	0	0	0	0	0	13	0	4	0	0	0

'PBMC3 - contingency_matrix (rows: antibody - cols: seurat)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16
2	0	0	18	1	0	0	0	0	0	0	226	20	1	0	0	0
3	0	3	0	25	0	1	1	0	2	0	12	0	101	0	0	2
4	11	28	0	0	12	48	97	0	12	0	0	0	1	0	2	1
5	0	0	0	979	0	0	0	0	0	0	10	1	14	0	0	1
6	1	22	0	0	6	16	461	0	221	0	0	1	2	0	0	0
7	0	2	873	0	0	0	0	0	0	0	22	139	0	164	27	0
9	129	1302	0	0	716	0	31	1	23	0	3	1	0	0	5	0
10	1543	8	1	2	46	3	6	0	2	0	0	3	3	0	4	4
12	6	0	1	0	3	816	69	0	4	0	0	1	4	0	0	1
13	0	0	1	1	0	0	0	549	1	327	7	30	0	1	2	126
14	0	1	0	14	1	0	0	0	2	0	20	1	121	0	0	1
22	12	19	0	0	34	0	3	0	1	0	0	0	0	0	0	0
23	0	4	0	0	126	0	0	0	0	0	0	0	0	0	0	0

'PBMC3 - contingency_matrix (rows: antibody - cols: COTAN)'

	1	2	3	4	5	6	7	8	9	10	...	47	48	49	50	51	52	53	54	55	56
2	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
3	0	0	0	0	1	0	0	0	1	3	...	0	0	1	0	0	0	1	0	0	0
4	1	5	14	46	8	1	4	10	16	11	...	0	0	0	0	0	0	1	0	0	0
5	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	1	0	0	0
6	2	5	2	7	2	1	0	14	9	6	...	0	0	0	0	0	0	0	0	0	0
7	0	0	0	0	0	0	0	0	1	1	...	255	0	0	0	0	0	0	0	0	0
9	0	25	135	0	0	28	69	384	639	293	...	0	0	0	0	0	1	0	0	0	0
10	2	89	1461	2	1	0	19	6	2	1	...	0	0	0	0	0	0	3	0	0	0
12	33	47	1	373	368	0	0	0	1	0	...	0	0	0	0	0	0	1	0	0	0
13	0	1	0	0	0	0	0	0	1	0	...	0	36	26	90	98	276	104	212	144	14
14	0	0	0	0	0	0	0	1	0	0	...	0	0	0	0	0	0	1	0	0	0
22	0	0	18	0	0	1	3	5	6	3	...	0	0	0	0	0	0	0	0	0	0
23	0	0	0	0	0	108	14	4	0	3	...	0	0	0	0	0	0	0	0	0	0

13 rows × 54 columns

print_clustering_data(tuning = 'default',dataset="PBMC4")

'Initial COTAN cluster number:'

'Initial monocle cluster number:'

'Initial scanpy cluster number:'

'Initial scvi-tools cluster number:'

'Initial seurat cluster number:'

'PBMC4 - contingency_matrix (rows: cellTypist - cols: monocle)'

	1	2	3
1	407	0	0
2	11	0	797
3	1330	1	0
4	108	0	0
5	9	2178	13
6	308	0	0
7	77	0	0
8	538	0	0
9	358	1	0
10	0	307	0
11	1	1	222
12	0	28	0
13	8	3	2
14	106	0	0
15	0	92	1
16	0	59	0

'PBMC4 - contingency_matrix (rows: cellTypist - cols: scanpy)'

	1	2	3	4	5	6	7	8	9	10	...	13	14	15	16	17	18	19	20	21	22
1	10	0	0	0	0	0	0	384	1	0	...	0	6	0	0	0	1	5	0	0	0
2	0	673	0	0	0	0	0	0	1	0	...	0	0	0	0	0	0	0	0	0	34
3	300	0	0	0	496	385	0	1	0	0	...	136	10	3	0	0	0	0	0	0	0
4	0	0	0	0	0	0	0	6	10	0	...	0	7	0	0	0	0	85	0	0	0
5	1	0	596	456	0	0	427	0	0	281	...	0	0	0	169	76	145	0	48	0	0
6	7	0	0	0	4	3	0	0	0	0	...	8	99	187	0	0	0	0	0	0	0
7	3	0	0	0	0	0	0	1	0	0	...	0	67	0	0	0	0	6	0	0	0
8	462	0	0	0	8	46	0	1	0	0	...	19	2	0	0	0	0	0	0	0	0
9	0	0	0	0	0	0	0	5	348	0	...	1	0	0	0	0	0	5	0	0	0
10	0	0	0	74	0	0	1	0	0	2	...	0	0	0	10	2	1	0	1	0	0
11	0	46	0	0	0	0	0	0	0	0	...	0	0	0	0	1	0	0	0	0	5
12	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	28	0	0
13	7	0	0	0	0	0	0	0	0	0	...	6	0	0	0	0	0	0	0	0	0
14	42	0	0	0	1	13	0	1	0	0	...	47	2	0	0	0	0	0	0	0	0
15	0	0	0	0	0	0	0	0	0	1	...	0	0	0	2	88	2	0	0	0	0
16	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	59	0

16 rows × 22 columns

'PBMC4 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16
1	0	0	4	1	0	0	1	0	315	0	0	0	86	0	0	0
2	800	0	2	1	0	0	2	0	1	0	1	0	1	0	0	0
3	0	939	357	2	0	0	20	0	1	0	0	0	0	0	0	12
4	0	0	0	0	0	0	105	2	0	0	0	0	1	0	0	0
5	4	0	1	805	746	413	0	4	2	1	167	15	1	41	0	0
6	0	8	8	0	0	0	291	0	1	0	0	0	0	0	0	0
7	0	0	3	0	0	0	67	1	6	0	0	0	0	0	0	0
8	0	31	483	0	0	0	3	0	6	0	0	0	0	0	0	15
9	0	0	1	2	0	0	0	355	0	0	0	0	1	0	0	0
10	0	0	0	14	0	94	1	0	0	194	4	0	0	0	0	0
11	217	0	0	0	0	0	0	0	0	0	7	0	0	0	0	0
12	0	0	0	0	0	0	0	0	0	0	0	0	0	28	0	0
13	0	1	1	0	0	0	0	6	0	0	0	0	0	0	5	0
14	1	9	89	0	0	0	4	0	1	0	0	0	0	0	0	2
15	0	0	0	2	0	3	0	0	0	0	1	87	0	0	0	0
16	0	0	0	0	0	0	0	0	0	0	0	1	0	0	58	0

'PBMC4 - contingency_matrix (rows: cellTypist - cols: seurat)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19
1	5	0	0	0	0	0	398	0	0	4	0	0	0	0	0	0	0	0	0
2	0	0	0	0	632	0	1	0	122	1	0	0	0	0	0	0	0	52	0
3	356	854	0	0	0	0	0	0	0	2	116	0	3	0	0	0	0	0	0
4	0	0	0	0	0	0	0	1	0	107	0	0	0	0	0	0	0	0	0
5	2	0	780	723	0	447	2	0	2	0	1	1	0	136	28	35	0	0	43
6	31	15	0	0	0	0	0	0	0	68	5	0	189	0	0	0	0	0	0
7	3	0	0	0	0	0	3	0	0	69	2	0	0	0	0	0	0	0	0
8	525	0	0	0	0	0	1	0	0	2	10	0	0	0	0	0	0	0	0
9	0	0	0	0	0	0	3	354	1	0	1	0	0	0	0	0	0	0	0
10	0	0	0	7	0	86	0	0	0	0	0	213	0	1	0	0	0	0	0
11	0	0	0	0	1	0	0	0	223	0	0	0	0	0	0	0	0	0	0
12	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	28	0	0	0
13	10	0	0	0	0	0	0	1	0	0	0	0	0	0	1	0	0	1	0
14	4	0	0	0	0	0	0	0	0	1	101	0	0	0	0	0	0	0	0
15	0	0	0	1	0	1	0	0	0	0	0	0	0	0	90	0	0	0	1
16	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	59	0	0

'PBMC4 - contingency_matrix (rows: cellTypist - cols: COTAN)'

	1	2	3	4	5	6	7	8	9	10	...	25	26	27	28	29	30	31	32	33	34
1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	3	1
2	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	1	0	0	0
3	0	0	0	0	0	0	0	0	0	0	...	1	0	0	0	41	4	958	5	321	1
4	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	3
5	40	131	59	247	199	293	724	448	1	27	...	0	0	0	0	0	0	0	0	1	0
6	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	4	0	7	202	26	69
7	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	2	0	0	0	2	66
8	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	7	0	528	2
9	0	0	0	0	0	0	0	0	0	0	...	55	36	118	147	0	1	0	0	1	0
10	0	1	65	18	0	2	6	0	215	0	...	0	0	0	0	0	0	0	0	0	0
11	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
12	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
13	0	0	0	0	0	0	0	0	0	1	...	8	0	0	0	0	0	1	0	1	0
14	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	30	63	1	0	11	1
15	1	0	0	1	0	0	1	0	0	90	...	0	0	0	0	0	0	0	0	0	0
16	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0

16 rows × 34 columns

'Initial antibody cell/cluster table:'

cluster.ids
3     2280
1     1367
10    1018
9      488
2      351
14     348
4      242
5      224
24     194
26      64
22      43
12      41
Name: count, dtype: int64

'PBMC4 - contingency_matrix (rows: antibody - cols: monocle)'

	1	2	3
1	1341	4	0
2	334	3	0
3	8	2153	0
4	241	0	0
5	16	195	0
9	473	1	0
10	12	39	931
12	38	0	0
14	343	1	0
22	42	0	0
24	192	0	0
26	63	0	0

'PBMC4 - contingency_matrix (rows: antibody - cols: scanpy)'

	1	2	3	4	5	6	7	8	9	10	...	12	13	14	15	16	17	18	19	20	22
1	744	0	0	0	195	247	0	7	1	0	...	0	139	6	0	0	3	1	2	0	0
2	10	0	0	0	0	0	0	4	317	0	...	0	0	0	0	0	3	0	3	0	0
3	1	0	573	514	1	0	405	0	0	273	...	29	2	0	0	178	26	118	0	41	0
4	3	0	0	0	3	2	0	1	2	0	...	0	8	30	174	0	0	0	18	0	0
5	0	0	1	0	0	0	1	0	14	0	...	178	0	0	0	0	3	11	2	1	0
9	10	0	0	0	1	3	0	283	0	0	...	0	2	130	8	0	1	0	36	0	0
10	1	657	1	0	0	0	1	0	5	1	...	1	0	0	0	0	40	0	0	3	37
12	0	0	0	0	0	0	0	2	3	0	...	0	0	2	0	0	0	0	31	0	0
14	10	0	0	0	205	105	0	0	0	0	...	0	23	0	0	0	1	0	0	0	0
22	6	0	0	0	2	2	0	2	0	0	...	0	0	24	6	0	0	0	0	0	0
24	9	0	0	0	97	75	0	0	1	0	...	0	9	0	1	0	0	0	0	0	0
26	27	0	0	0	1	12	0	1	0	0	...	0	22	0	0	0	0	0	0	0	0

12 rows × 21 columns

'PBMC4 - contingency_matrix (rows: antibody - cols: scvi-tools)'

	1	2	3	4	5	6	7	8	9	10	11	12	14	16
1	0	454	839	0	1	1	8	1	12	0	0	2	0	27
2	0	1	4	1	0	0	0	328	0	0	0	3	0	0
3	0	2	0	719	730	476	1	0	2	15	168	14	34	0
4	0	4	2	0	0	0	230	3	2	0	0	0	0	0
5	0	0	0	10	1	7	0	19	0	170	0	3	1	0
9	0	2	8	0	0	0	175	0	288	0	0	1	0	0
10	921	0	3	36	4	1	0	5	0	2	8	0	2	0
12	0	0	0	0	0	0	38	0	0	0	0	0	0	0
14	0	324	17	0	0	0	0	0	0	0	0	1	0	2
22	0	3	4	0	0	0	27	0	8	0	0	0	0	0
24	0	185	4	0	0	0	2	1	0	0	0	0	0	0
26	1	4	58	0	0	0	0	0	0	0	0	0	0	0

'PBMC4 - contingency_matrix (rows: antibody - cols: seurat)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	18	19
1	852	323	0	0	0	0	9	1	0	5	151	0	0	1	3	0	0	0
2	9	0	0	0	0	0	2	323	0	0	0	0	0	0	3	0	0	0
3	4	1	750	693	0	519	1	0	0	0	1	25	0	112	26	29	0	0
4	1	11	0	0	0	0	1	2	0	46	2	0	178	0	0	0	0	0
5	0	0	1	2	0	2	1	15	0	0	0	177	0	9	3	1	0	0
9	28	5	0	0	0	0	285	0	0	140	9	0	6	0	1	0	0	0
10	1	0	3	3	598	0	0	4	304	0	0	1	0	0	0	2	29	37
12	0	0	0	0	0	0	0	1	0	36	1	0	0	0	0	0	0	0
14	10	326	0	0	0	0	0	0	0	0	6	0	1	0	1	0	0	0
22	12	11	0	0	0	0	4	0	0	9	0	0	6	0	0	0	0	0
24	3	186	0	0	0	0	0	1	0	0	1	0	1	0	0	0	0	0
26	3	0	0	0	0	0	0	0	0	0	60	0	0	0	0	0	0	0

'PBMC4 - contingency_matrix (rows: antibody - cols: COTAN)'

	1	2	3	4	5	6	7	8	9	10	...	25	26	27	28	29	30	31	32	33	34
1	0	1	0	0	0	0	0	0	0	3	...	0	0	1	0	60	15	431	0	819	4
2	0	0	0	0	0	0	0	0	0	3	...	56	29	102	142	0	0	0	0	4	0
3	0	109	119	254	195	292	687	427	26	25	...	0	0	0	0	0	0	2	0	2	0
4	0	0	0	0	0	0	0	0	0	0	...	1	0	1	0	1	0	4	187	2	26
5	0	9	0	0	0	1	2	1	178	3	...	0	4	10	1	0	0	0	0	1	0
9	0	0	0	0	0	0	0	0	0	1	...	0	0	0	0	5	1	3	9	27	102
10	33	1	1	1	1	0	4	2	1	0	...	1	1	1	2	0	0	1	0	1	0
12	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	1	0	0	0	0	1
14	0	0	0	0	0	0	0	0	0	1	...	0	0	0	0	0	1	332	1	9	0
22	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	8	9	11	8
24	0	0	0	0	0	0	0	0	0	0	...	0	0	1	0	0	0	186	1	4	0
26	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	6	51	1	0	5	0

12 rows × 31 columns

Against cellTypist cluster number

print_clustering_data(tuning = 'celltypist',dataset="PBMC1")

'Initial COTAN cluster number:'

'Initial monocle cluster number:'

'Initial scanpy cluster number:'

'Initial scvi-tools cluster number:'

'Initial seurat cluster number:'

'PBMC1 - contingency_matrix (rows: cellTypist - cols: monocle)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18
1	0	1	3	1	273	0	0	0	237	227	147	0	6	6	0	77	1	0
2	66	0	0	0	0	230	228	218	0	0	0	145	25	0	31	0	0	0
3	3	0	0	0	0	0	2	0	0	0	0	0	36	0	6	0	0	0
4	0	0	0	0	0	0	0	0	0	0	5	0	0	0	0	0	73	0
5	200	0	0	7	0	1	0	0	0	0	0	1	65	0	35	0	0	0
6	0	142	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
7	36	0	0	40	0	0	0	0	0	0	0	0	6	0	0	0	0	0
8	0	1	267	10	0	0	0	0	0	0	0	0	1	0	0	0	0	0
9	2	0	0	0	0	22	15	20	0	0	0	21	0	0	1	0	0	0
10	0	0	0	0	1	0	0	0	0	2	37	0	0	131	0	0	0	0
11	0	0	21	49	0	0	0	0	0	0	0	0	0	0	0	0	0	0
12	27	0	0	179	0	3	2	1	0	0	0	4	14	0	10	0	0	0
13	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	28
14	0	155	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0

'PBMC1 - contingency_matrix (rows: cellTypist - cols: scanpy)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17
1	0	0	369	243	292	0	0	0	0	1	0	0	0	0	0	74	0
2	474	89	0	0	0	258	0	2	0	0	0	111	0	8	0	1	0
3	0	45	0	0	0	2	0	0	0	0	0	0	0	0	0	0	0
4	0	0	0	2	0	0	0	0	0	0	0	0	0	0	76	0	0
5	2	260	0	0	0	2	0	42	0	0	0	3	0	0	0	0	0
6	0	0	0	0	0	0	0	0	0	4	138	0	0	0	0	0	0
7	0	8	0	0	0	0	0	55	19	0	0	0	0	0	0	0	0
8	0	0	0	0	0	0	263	0	16	0	0	0	0	0	0	0	0
9	5	2	0	0	0	5	0	1	0	0	0	2	0	66	0	0	0
10	0	0	0	78	2	0	0	0	0	0	0	0	91	0	0	0	0
11	0	0	0	0	0	0	5	0	65	0	0	0	0	0	0	0	0
12	0	11	0	0	0	6	0	143	78	0	0	0	0	2	0	0	0
13	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	28
14	0	0	0	0	0	0	0	0	0	154	1	0	0	0	0	0	0

'PBMC1 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20
1	1	353	261	1	2	0	1	183	0	4	0	152	0	1	19	1	0	0	0	0
2	43	0	0	278	0	227	187	0	180	0	0	0	0	0	0	0	9	16	0	3
3	4	0	0	9	1	0	0	0	2	0	0	0	0	0	0	0	0	31	0	0
4	0	0	5	0	0	0	0	0	0	0	0	0	0	0	73	0	0	0	0	0
5	279	0	0	11	4	9	2	0	3	0	0	0	0	0	0	0	0	1	0	0
6	0	0	0	0	0	0	0	0	0	0	142	0	0	0	0	0	0	0	0	0
7	55	0	0	0	26	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0
8	0	0	0	0	1	0	0	0	0	152	1	0	0	125	0	0	0	0	0	0
9	1	0	0	1	1	3	1	0	0	0	0	0	0	0	0	0	72	0	0	2
10	0	0	86	0	0	0	0	4	0	0	0	0	0	0	0	81	0	0	0	0
11	0	0	0	0	65	0	0	0	0	5	0	0	0	0	0	0	0	0	0	0
12	45	0	0	1	173	1	0	0	2	0	0	0	0	0	0	0	0	1	0	17
13	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	28	0
14	0	0	0	0	0	0	0	0	0	0	9	0	145	0	0	0	0	0	0	1

'PBMC1 - contingency_matrix (rows: cellTypist - cols: seurat)'

	1	2	3	4	5	6	7	8	9	10	...	12	13	14	15	16	17	18	19	20	21
1	0	0	351	267	0	229	0	0	0	0	...	0	3	0	1	88	0	0	40	0	0
2	423	119	0	0	238	0	154	0	0	0	...	0	0	0	0	0	0	7	0	1	0
3	1	15	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	31	0
4	0	0	0	4	0	0	0	0	0	0	...	0	0	0	0	0	74	0	0	0	0
5	0	258	0	0	0	0	1	0	1	0	...	0	0	0	0	0	0	0	0	2	0
6	0	0	0	0	0	0	0	0	0	0	...	142	0	0	0	0	0	0	0	0	0
7	0	4	0	0	0	0	0	0	13	0	...	0	0	12	1	0	0	0	0	0	0
8	0	0	0	0	0	0	0	151	0	0	...	0	0	5	123	0	0	0	0	0	0
9	10	2	0	0	11	0	11	0	1	0	...	0	0	0	0	0	0	46	0	0	0
10	0	0	0	38	0	0	0	0	0	0	...	0	130	0	0	3	0	0	0	0	0
11	0	0	0	0	0	0	0	4	0	0	...	0	0	66	0	0	0	0	0	0	0
12	1	3	0	0	4	0	0	0	138	0	...	0	0	44	0	0	0	2	0	1	0
13	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	1	0	0	0	27
14	0	0	0	0	0	0	0	0	0	153	...	2	0	0	0	0	0	0	0	0	0

14 rows × 21 columns

'PBMC1 - contingency_matrix (rows: cellTypist - cols: COTAN)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18
1	35	0	0	272	464	207	0	0	0	0	0	0	0	0	0	1	0	0
2	0	0	0	0	0	0	0	0	0	800	31	24	85	3	0	0	0	0
3	0	0	0	0	0	0	0	0	0	2	0	22	10	7	0	0	0	6
4	0	73	0	0	0	5	0	0	0	0	0	0	0	0	0	0	0	0
5	0	0	0	0	0	0	0	0	0	0	7	9	293	0	0	0	0	0
6	0	0	0	0	0	0	0	0	142	0	0	0	0	0	0	0	0	0
7	0	0	0	0	0	0	0	0	0	0	0	0	56	0	25	1	0	0
8	0	0	0	0	0	0	0	0	0	0	0	3	1	5	4	115	104	47
9	0	0	0	0	0	0	0	0	0	3	77	0	0	0	1	0	0	0
10	0	1	122	0	5	42	0	0	0	0	1	0	0	0	0	0	0	0
11	0	0	0	0	0	0	0	0	0	0	0	0	0	0	67	0	2	1
12	0	0	0	0	0	0	0	0	0	4	4	5	51	0	175	0	0	1
13	0	1	0	0	0	0	27	0	0	0	0	0	0	0	0	0	0	0
14	0	0	0	0	0	0	0	152	3	0	0	0	0	0	0	0	0	0

print_clustering_data(tuning = 'celltypist',dataset="PBMC2")

'Initial COTAN cluster number:'

'Initial monocle cluster number:'

'Initial scanpy cluster number:'

'Initial scvi-tools cluster number:'

'Initial seurat cluster number:'

'PBMC2 - contingency_matrix (rows: cellTypist - cols: monocle)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18
1	1	0	0	1	0	5	0	55	0	0	12	38	6	90	1	0	0	22
2	0	0	0	0	0	279	0	6	22	0	53	22	24	1	19	0	0	1
3	0	1	332	1	407	130	338	27	297	36	202	95	99	38	132	0	0	7
4	577	1	0	6	0	0	0	2	0	0	0	2	2	42	0	0	0	75
5	1	0	0	0	0	15	0	258	0	0	9	9	2	13	0	0	0	9
6	0	16	0	0	0	0	0	0	0	0	0	0	0	0	0	0	77	0
7	0	558	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
8	0	0	142	0	32	4	69	4	11	292	24	59	4	2	31	0	0	0
9	0	13	0	0	0	0	0	0	0	0	0	0	0	0	0	173	0	0
10	43	0	0	0	0	0	0	1	0	0	0	0	0	5	0	0	0	3
11	0	0	0	228	0	0	0	0	0	0	0	0	0	0	0	0	0	0
12	0	0	0	204	0	0	0	0	0	0	0	0	0	0	0	0	0	0
13	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	48	0
14	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	14	0
15	0	0	0	0	0	4	0	1	3	0	0	0	66	3	3	0	0	0

'PBMC2 - contingency_matrix (rows: cellTypist - cols: scanpy)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20
1	0	0	0	0	0	1	0	2	1	0	15	2	57	5	0	148	0	0	0	0
2	0	0	0	0	0	0	131	200	35	2	2	0	1	55	0	1	0	0	0	0
3	21	575	490	0	0	0	230	107	248	286	10	2	48	98	0	26	0	0	0	1
4	0	0	0	0	471	8	0	1	0	0	2	222	0	0	0	2	1	0	0	0
5	0	0	0	0	0	0	1	1	0	0	253	0	3	52	0	6	0	0	0	0
6	0	0	0	5	0	0	0	0	0	0	0	0	0	0	0	0	0	88	0	0
7	0	0	0	464	0	0	0	0	0	0	0	0	0	0	6	0	88	0	0	0
8	558	2	0	0	0	0	1	0	0	0	0	0	112	1	0	0	0	0	0	0
9	0	0	0	12	0	0	0	0	0	0	0	0	0	0	174	0	0	0	0	0
10	0	0	0	0	0	0	0	0	0	0	0	50	0	2	0	0	0	0	0	0
11	0	0	0	0	0	228	0	0	0	0	0	0	0	0	0	0	0	0	0	0
12	0	0	0	0	0	204	0	0	0	0	0	0	0	0	0	0	0	0	0	0
13	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	48	0
14	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	13
15	0	0	0	0	0	0	10	44	17	1	1	1	2	3	0	1	0	0	0	0

'PBMC2 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18
1	0	0	0	0	1	0	217	0	0	0	7	0	3	0	3	0	0	0
2	348	2	0	0	0	4	7	0	2	3	60	0	1	0	0	0	0	0
3	469	4	1	15	1	412	69	356	324	322	165	0	4	0	0	0	0	0
4	1	667	1	0	8	0	6	0	0	0	0	0	0	0	1	23	0	0
5	10	2	0	0	0	0	7	0	1	0	65	0	159	0	72	0	0	0
6	0	0	9	0	0	0	0	0	0	0	0	0	0	80	0	0	4	0
7	0	0	556	0	0	0	0	0	0	0	0	2	0	0	0	0	0	0
8	1	0	1	561	0	2	99	1	2	2	5	0	0	0	0	0	0	0
9	0	0	13	0	0	0	0	0	0	0	0	173	0	0	0	0	0	0
10	0	5	0	0	0	0	0	0	0	0	0	0	0	0	0	47	0	0
11	0	0	0	0	228	0	0	0	0	0	0	0	0	0	0	0	0	0
12	0	0	0	0	204	0	0	0	0	0	0	0	0	0	0	0	0	0
13	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	48	0
14	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	14
15	61	0	0	0	0	3	2	1	1	3	9	0	0	0	0	0	0	0

'PBMC2 - contingency_matrix (rows: cellTypist - cols: seurat)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20
1	2	0	0	0	0	0	0	0	7	1	0	1	41	178	0	0	1	0	0	0
2	389	0	0	0	0	0	0	1	10	3	0	23	1	0	0	0	0	0	0	0
3	259	0	578	568	15	0	1	290	188	0	2	169	62	8	0	0	2	0	0	0
4	0	635	0	0	0	0	7	0	2	0	0	0	0	3	0	1	59	0	0	0
5	8	0	0	0	0	0	0	0	1	268	0	32	1	6	0	0	0	0	0	0
6	0	0	0	0	0	11	0	0	0	0	0	0	0	0	0	0	0	82	0	0
7	0	0	0	0	0	305	0	0	0	0	253	0	0	0	0	0	0	0	0	0
8	1	0	8	0	541	0	0	0	1	0	0	3	119	1	0	0	0	0	0	0
9	0	0	0	0	0	8	0	0	0	0	6	0	0	0	172	0	0	0	0	0
10	0	2	0	0	0	0	0	0	0	0	0	0	0	0	0	0	50	0	0	0
11	0	0	0	0	0	0	83	0	0	0	0	0	0	0	0	145	0	0	0	0
12	0	0	0	0	0	0	201	0	0	1	0	0	0	0	0	2	0	0	0	0
13	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	48	0
14	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	14
15	1	0	0	0	0	0	0	0	76	0	0	0	0	2	0	0	1	0	0	0

'PBMC2 - contingency_matrix (rows: cellTypist - cols: COTAN)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17
1	0	0	0	3	4	220	3	0	0	0	1	0	0	0	0	0	0
2	0	0	0	416	9	0	2	0	0	0	0	0	0	0	0	0	0
3	0	2	1186	847	31	72	3	0	0	0	0	0	0	0	0	0	1
4	639	56	0	0	2	3	0	0	0	0	0	0	0	0	1	5	1
5	0	0	0	39	0	7	270	0	0	0	0	0	0	0	0	0	0
6	0	0	0	0	0	0	0	0	0	86	0	7	0	0	0	0	0
7	1	0	0	0	0	0	0	0	0	0	81	300	174	0	0	2	0
8	0	0	568	9	1	96	0	0	0	0	0	0	0	0	0	0	0
9	0	0	0	0	0	0	0	154	24	0	0	8	0	0	0	0	0
10	1	51	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
11	0	0	0	0	0	0	0	0	0	0	0	0	0	0	145	69	14
12	0	0	0	0	0	0	1	0	0	0	0	0	0	0	1	22	180
13	0	0	0	0	0	0	0	0	0	0	0	0	0	48	0	0	0
14	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	13	1
15	0	1	0	6	72	1	0	0	0	0	0	0	0	0	0	0	0

print_clustering_data(tuning = 'celltypist',dataset="PBMC3")

'Initial COTAN cluster number:'

'Initial monocle cluster number:'

'Initial scanpy cluster number:'

'Initial scvi-tools cluster number:'

'Initial seurat cluster number:'

'PBMC3 - contingency_matrix (rows: cellTypist - cols: monocle)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17
1	860	0	0	0	38	559	313	338	231	324	158	0	195	0	0	5	0
2	0	1041	0	0	0	0	1	0	0	0	0	390	0	2	7	0	31
3	0	0	0	654	0	0	2	2	1	0	0	0	1	0	0	0	2
4	400	0	0	0	12	309	29	47	12	157	78	0	56	0	0	0	0
5	0	22	1036	33	22	0	0	0	0	0	3	2	1	0	2	121	0
6	0	11	0	0	0	0	0	0	0	0	0	1	0	0	144	0	0
7	0	0	0	0	134	1	247	305	248	0	135	1	42	0	0	0	0
8	0	0	3	0	435	0	3	7	0	0	12	0	20	0	0	4	0
9	0	0	0	396	0	0	0	0	0	0	0	0	0	0	0	0	0
10	0	73	0	0	0	0	0	0	0	0	0	4	0	328	2	0	1
11	0	0	8	0	277	2	21	26	1	0	47	0	46	0	0	2	0
13	11	0	0	0	0	8	157	9	19	1	7	0	21	0	0	0	0
14	0	0	79	0	12	0	0	0	0	0	0	0	0	0	1	19	0
15	1	0	0	0	0	0	0	3	0	0	0	0	0	0	16	0	0
16	0	0	0	0	0	0	0	0	0	0	0	11	0	0	0	0	0
18	0	0	0	0	0	0	0	0	0	0	0	0	0	0	57	0	0
19	0	0	0	0	0	0	0	0	0	0	0	0	0	0	12	0	0

'PBMC3 - contingency_matrix (rows: cellTypist - cols: scanpy)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18
1	1670	0	0	212	0	32	234	621	23	228	0	0	1	0	0	0	0	0
2	0	1228	0	0	0	0	0	0	0	0	2	202	0	0	5	0	35	0
3	0	0	0	0	543	0	5	0	0	1	0	0	0	111	0	0	2	0
4	29	0	0	0	0	811	192	5	4	59	0	0	0	0	0	0	0	0
5	0	0	1028	0	0	0	0	0	7	0	0	29	145	33	0	0	0	0
6	0	7	0	0	0	0	0	0	0	0	0	0	0	0	149	0	0	0
7	1	0	0	702	0	0	230	99	7	73	0	1	0	0	0	0	0	0
8	0	0	3	0	0	0	47	0	433	0	0	0	1	0	0	0	0	0
9	0	0	0	0	395	0	0	0	0	0	0	0	0	1	0	0	0	0
10	0	94	0	0	0	0	0	0	0	0	311	1	0	0	1	0	1	0
11	0	0	2	0	0	0	116	0	290	21	0	0	1	0	0	0	0	0
13	16	0	0	44	0	0	10	55	1	107	0	0	0	0	0	0	0	0
14	0	0	11	0	0	0	4	0	5	1	0	0	90	0	0	0	0	0
15	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	20
16	0	0	0	0	0	0	0	0	0	0	0	11	0	0	0	0	0	0
18	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	57	0	0
19	0	0	0	0	0	0	0	0	0	0	0	0	0	12	0	0	0	0

'PBMC3 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19
1	2	657	1415	2	2	43	614	91	5	0	64	0	14	49	0	0	32	31	0
2	1463	0	0	0	0	0	0	0	0	1	0	8	0	0	0	0	0	0	0
3	4	0	1	0	654	0	2	1	0	0	0	0	0	0	0	0	0	0	0
4	0	6	13	1	0	856	4	168	0	0	0	0	0	52	0	0	0	0	0
5	29	0	0	1172	32	0	0	2	0	0	1	0	0	0	5	0	0	0	1
6	9	0	0	0	0	0	0	0	0	0	0	147	0	0	0	0	0	0	0
7	1	827	5	0	0	1	187	35	22	0	0	0	24	8	0	0	1	2	0
8	0	30	0	2	0	1	3	12	305	0	131	0	0	0	0	0	0	0	0
9	0	0	0	0	396	0	0	0	0	0	0	0	0	0	0	0	0	0	0
10	83	0	0	0	0	0	0	0	0	324	0	1	0	0	0	0	0	0	0
11	0	15	0	2	1	3	1	399	3	0	5	0	0	0	1	0	0	0	0
13	0	49	21	0	0	0	22	6	0	0	0	0	131	2	0	0	2	0	0
14	0	0	0	31	0	0	0	0	0	0	0	0	0	0	80	0	0	0	0
15	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	20
16	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	11	0	0	0
18	0	0	0	0	0	0	0	0	0	0	0	5	0	0	0	52	0	0	0
19	0	0	0	0	0	0	0	0	0	0	12	0	0	0	0	0	0	0	0

'PBMC3 - contingency_matrix (rows: cellTypist - cols: seurat)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18
1	1744	412	0	0	681	20	161	0	3	0	0	0	0	0	0	0	0	0
2	0	0	1011	0	0	0	0	0	0	0	0	227	0	231	3	0	0	0
3	0	0	0	0	0	0	2	535	0	2	0	1	2	0	0	120	0	0
4	16	6	0	0	3	886	188	0	0	0	0	0	0	0	0	1	0	0
5	0	1	4	1043	0	0	2	1	1	0	0	24	136	0	0	30	0	0
6	0	0	7	0	0	0	0	0	0	0	0	0	0	0	149	0	0	0
7	0	995	0	0	93	0	18	0	7	0	0	0	0	0	0	0	0	0
8	0	18	0	3	0	0	8	0	454	0	0	0	1	0	0	0	0	0
9	0	0	0	0	0	0	0	57	0	336	0	0	0	0	0	3	0	0
10	0	0	89	0	0	0	0	0	0	0	314	1	0	4	0	0	0	0
11	0	14	0	0	0	2	411	0	3	0	0	0	0	0	0	0	0	0
13	13	8	0	0	205	0	7	0	0	0	0	0	0	0	0	0	0	0
14	0	0	0	2	0	0	0	0	0	0	0	0	109	0	0	0	0	0
15	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	20
16	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	11
18	0	0	0	0	0	0	0	0	0	0	0	0	0	0	5	0	52	0
19	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	12

'PBMC3 - contingency_matrix (rows: cellTypist - cols: COTAN)'

	1	2	3	4	5	6	7	8	9	10	...	14	15	16	17	18	19	20	21	22	23
1	1822	80	633	127	291	53	12	0	0	3	...	0	0	0	0	0	0	0	0	0	0
2	0	0	1	0	0	0	0	1	1	0	...	68	517	404	324	0	0	0	1	0	0
3	1	0	2	2	0	2	0	0	0	0	...	0	0	0	0	9	136	387	108	15	0
4	482	430	156	9	5	17	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0
5	0	0	0	1	0	138	2	28	281	755	...	0	1	0	0	0	25	5	0	0	0
6	0	0	0	0	0	0	0	0	0	0	...	5	1	4	0	0	0	0	0	0	0
7	0	0	87	955	53	11	7	0	0	0	...	0	0	0	0	0	0	0	0	0	0
8	0	0	1	16	1	8	454	0	3	1	...	0	0	0	0	0	0	0	0	0	0
9	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	359	2	34	0	1	0
10	0	0	0	0	0	0	0	0	0	0	...	82	31	91	0	0	0	0	0	0	0
11	5	1	86	4	11	29	255	0	24	15	...	0	0	0	0	0	0	0	0	0	0
13	7	0	210	5	1	10	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
14	0	0	0	1	0	10	0	0	100	0	...	0	0	0	0	0	0	0	0	0	0
15	0	0	3	0	0	1	0	0	0	0	...	0	0	0	0	0	0	0	0	16	0
16	0	0	0	0	0	0	0	0	0	0	...	0	11	0	0	0	0	0	0	0	0
18	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	52
19	0	0	0	0	0	1	0	0	0	1	...	0	0	0	0	0	1	1	0	8	0

17 rows × 23 columns

print_clustering_data(tuning = 'celltypist',dataset="PBMC4")

'Initial COTAN cluster number:'

'Initial monocle cluster number:'

'Initial scanpy cluster number:'

'Initial scvi-tools cluster number:'

'Initial seurat cluster number:'

'PBMC4 - contingency_matrix (rows: cellTypist - cols: monocle)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16
1	0	0	9	0	0	0	0	12	0	381	1	0	0	0	0	0
2	755	2	1	0	0	0	0	2	0	3	3	0	0	0	0	41
3	0	705	149	0	0	0	379	0	3	0	92	0	0	0	0	0
4	0	0	1	0	0	0	0	97	0	8	1	0	0	0	0	0
5	5	2	2	647	637	405	0	1	419	3	2	3	34	0	29	0
6	0	196	83	0	0	0	19	1	0	0	9	0	0	0	0	0
7	0	0	51	0	0	0	0	9	0	15	2	0	0	0	0	0
8	0	2	417	0	0	0	77	0	0	5	37	0	0	0	0	0
9	0	0	0	0	0	0	0	351	1	3	4	0	0	0	0	0
10	0	0	0	2	3	76	0	0	5	0	0	219	2	0	0	0
11	220	0	0	0	0	0	0	0	1	0	1	0	0	0	0	2
12	0	0	0	0	0	0	0	0	0	0	0	0	0	0	28	0
13	1	2	2	0	0	0	0	0	0	4	0	0	0	4	0	0
14	0	4	4	0	0	0	1	0	0	0	97	0	0	0	0	0
15	0	0	0	11	5	0	0	0	1	0	0	1	72	3	0	0
16	0	0	0	0	0	0	0	0	0	0	0	0	0	59	0	0

'PBMC4 - contingency_matrix (rows: cellTypist - cols: scanpy)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18
1	0	10	0	0	0	0	381	5	1	0	0	0	0	1	5	0	0	0
2	0	0	0	672	0	0	0	0	1	134	0	0	0	0	0	0	0	0
3	0	346	0	0	508	386	1	10	0	0	0	0	0	0	0	77	0	0
4	0	0	0	0	0	0	6	7	10	0	0	0	0	0	84	0	0	0
5	924	1	818	0	0	0	0	0	0	0	1	181	77	140	0	0	47	0
6	0	7	0	0	5	2	0	286	0	0	0	0	0	0	0	8	0	0
7	0	3	0	0	0	0	1	67	0	0	0	0	0	0	6	0	0	0
8	0	483	0	0	8	43	1	2	0	0	0	0	0	0	0	1	0	0
9	0	1	0	0	0	0	5	0	348	0	0	0	0	0	5	0	0	0
10	74	0	2	0	0	0	0	0	0	0	216	10	2	2	0	0	1	0
11	0	0	0	46	0	0	0	0	0	177	0	0	1	0	0	0	0	0
12	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	28	0
13	0	13	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
14	0	68	0	0	1	22	1	2	0	0	0	0	0	0	0	12	0	0
15	0	0	0	0	0	0	0	0	0	0	0	2	89	2	0	0	0	0
16	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	59

'PBMC4 - contingency_matrix (rows: cellTypist - cols: scvi-tools)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18
1	3	0	0	0	0	0	0	0	311	0	0	1	0	0	86	2	0	0
2	1	1	0	0	686	0	1	1	10	0	104	2	0	0	0	1	0	0
3	438	838	1	0	0	0	0	21	0	0	0	0	30	0	0	0	0	0
4	0	0	0	0	0	0	7	1	0	0	0	99	0	0	0	0	0	0
5	1	1	772	696	2	506	15	0	0	65	2	1	0	23	2	83	0	20
6	10	8	0	0	0	0	0	287	1	0	0	1	1	0	0	0	0	0
7	5	0	0	0	0	0	0	33	5	0	0	34	0	0	0	0	0	0
8	507	15	0	0	0	0	1	3	2	0	0	0	10	0	0	0	0	0
9	0	0	0	0	0	0	358	0	0	0	0	0	1	0	0	0	0	0
10	0	0	13	1	0	27	0	0	0	265	0	0	0	0	0	1	0	0
11	0	0	0	0	5	0	0	0	0	0	219	0	0	0	0	0	0	0
12	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	28
13	6	0	0	0	0	0	0	6	1	0	0	0	0	0	0	0	0	0
14	16	5	0	0	0	0	0	0	0	0	0	4	81	0	0	0	0	0
15	0	0	2	0	0	1	1	0	0	0	0	0	0	89	0	0	0	0
16	0	0	0	0	0	1	0	0	0	0	0	0	0	1	0	0	57	0

'PBMC4 - contingency_matrix (rows: cellTypist - cols: seurat)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19
1	5	0	0	0	0	0	394	0	0	4	0	0	0	0	0	0	0	0	0
2	0	0	0	0	631	0	1	0	122	1	0	0	0	0	0	0	0	52	0
3	361	841	0	0	0	0	0	0	0	2	121	0	3	0	0	0	0	0	0
4	0	0	0	0	0	0	0	1	0	106	0	0	0	0	0	0	0	0	0
5	2	0	770	728	0	445	2	0	2	0	1	1	0	133	27	35	0	0	43
6	31	15	0	0	0	0	0	0	0	68	5	0	189	0	0	0	0	0	0
7	3	0	0	0	0	0	3	0	0	69	2	0	0	0	0	0	0	0	0
8	525	0	0	0	0	0	1	0	0	2	10	0	0	0	0	0	0	0	0
9	0	0	0	0	0	0	3	354	1	0	1	0	0	0	0	0	0	0	0
10	0	0	0	7	0	86	0	0	0	0	0	213	0	1	0	0	0	0	0
11	0	0	0	0	1	0	0	0	223	0	0	0	0	0	0	0	0	0	0
12	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	28	0	0	0
13	10	0	0	0	0	0	0	1	0	0	0	0	0	0	1	0	0	1	0
14	4	0	0	0	0	0	0	0	0	1	101	0	0	0	0	0	0	0	0
15	0	0	0	1	0	1	0	0	0	0	0	0	0	0	90	0	0	0	1
16	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	59	0	0

'PBMC4 - contingency_matrix (rows: cellTypist - cols: COTAN)'

	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15
1	0	0	0	0	0	0	4	0	1	398	0	0	0	0	0
2	0	0	0	0	0	0	0	1	1	1	47	344	413	0	0
3	0	0	0	0	0	0	366	961	1	0	0	0	0	0	0
4	0	0	0	0	0	0	3	0	104	0	0	0	0	0	0
5	129	719	530	739	66	1	1	0	0	2	0	1	1	0	0
6	0	0	0	0	0	0	99	209	0	0	0	0	0	0	0
7	0	0	0	0	0	0	70	0	3	4	0	0	0	0	0
8	0	0	0	0	0	0	530	7	0	1	0	0	0	0	0
9	0	0	0	0	0	0	2	0	356	1	0	0	0	0	0
10	1	6	83	2	0	215	0	0	0	0	0	0	0	0	0
11	0	0	0	0	0	0	0	0	0	0	24	199	1	0	0
12	0	0	0	0	0	0	0	0	0	0	0	0	0	0	28
13	0	0	0	0	1	0	1	1	8	0	1	1	0	0	0
14	0	0	0	0	0	0	105	1	0	0	0	0	0	0	0
15	0	1	1	0	91	0	0	0	0	0	0	0	0	0	0
16	0	0	0	0	0	0	0	0	0	0	0	0	0	59	0

Against antibody cluster number

print_clustering_data(tuning = 'antibody',dataset="PBMC1")

'Initial COTAN cluster number:'

'Initial monocle cluster number:'

'Initial scanpy cluster number:'

'Initial scvi-tools cluster number:'

'Initial seurat cluster number:'

'Initial antibody cell/cluster table:'

cluster.ids
7     1338
8      876
3      748
4      341
9      331
5      211
1      202
6      131
2       62
10      51
12      16
Name: count, dtype: int64

'PBMC1 - contingency_matrix (rows: antibody - cols: monocle)'

	1	2	3	4	5	6	7	8	9
1	94	0	17	50	0	0	0	0	0
2	0	4	0	0	3	0	38	1	0
3	35	0	500	65	0	0	0	0	0
4	0	1	0	262	0	0	0	0	0
5	2	0	29	127	0	0	0	0	0
6	0	1	0	0	0	1	1	58	26
7	2	731	0	4	0	275	95	17	1
8	776	0	30	6	1	0	0	0	1
9	0	0	0	1	294	0	0	0	0
10	0	0	0	44	0	0	0	0	0
12	0	0	0	10	0	0	0	0	0

'PBMC1 - contingency_matrix (rows: antibody - cols: scanpy)'

	1	2	3	4	5	6	7	8	9	10	11
1	22	0	7	60	0	0	0	0	0	0	72
2	0	0	0	0	5	0	2	1	0	38	0
3	67	0	408	125	0	0	0	0	0	0	0
4	0	1	0	13	0	249	0	0	0	0	0
5	2	0	10	146	0	0	0	0	0	0	0
6	3	0	1	0	0	0	0	0	82	1	0
7	2	702	1	0	348	0	0	0	20	52	0
8	780	0	21	6	0	1	1	0	1	0	4
9	0	0	0	1	0	1	156	137	0	0	0
10	0	0	0	37	0	7	0	0	0	0	0
12	0	0	0	0	0	10	0	0	0	0	0

'PBMC1 - contingency_matrix (rows: antibody - cols: scvi-tools)'

	1	2	3	4	5	6	7	8	9	10
1	28	0	0	22	0	1	36	0	74	0
2	0	3	42	0	1	0	0	0	0	0
3	123	0	0	434	0	0	43	0	0	0
4	0	1	0	0	0	259	3	0	0	0
5	0	0	0	17	0	0	138	0	3	0
6	0	1	1	0	0	0	0	56	3	26
7	1	644	451	0	0	1	0	27	0	1
8	786	0	0	19	1	2	3	0	2	1
9	0	0	0	0	294	1	0	0	0	0
10	0	0	0	1	0	9	34	0	0	0
12	0	0	0	0	0	10	0	0	0	0

'PBMC1 - contingency_matrix (rows: antibody - cols: seurat)'

	1	2	3	4	5	6	7	8	9	10	11
1	93	14	0	0	0	54	0	0	0	0	0
2	0	0	1	4	0	0	1	1	39	0	0
3	22	536	0	0	0	42	0	0	0	0	0
4	0	0	1	0	261	1	0	0	0	0	0
5	1	18	0	0	0	139	0	0	0	0	0
6	3	1	0	1	0	0	0	0	1	56	25
7	2	1	524	488	2	0	0	0	89	18	1
8	766	40	0	0	1	5	1	0	0	0	1
9	0	0	0	0	2	0	151	142	0	0	0
10	0	0	0	0	8	36	0	0	0	0	0
12	0	0	0	0	8	2	0	0	0	0	0

'PBMC1 - contingency_matrix (rows: antibody - cols: COTAN)'

	1	2	3	4	5	6	7	8	9	10	11	12	13
1	0	0	0	0	0	0	0	0	54	1	2	94	10
2	1	1	38	0	4	0	1	1	0	0	0	0	0
3	0	0	0	0	0	0	0	0	39	6	55	58	442
4	1	0	0	0	0	0	0	0	1	257	4	0	0
5	0	0	0	0	0	0	0	0	135	0	1	2	20
6	0	56	1	1	1	25	0	0	0	0	1	1	1
7	33	17	83	271	716	1	0	0	0	1	1	2	0
8	0	0	0	0	0	1	1	0	4	0	12	773	23
9	0	0	0	0	0	0	150	143	0	2	0	0	0
10	0	0	0	0	0	0	0	0	38	5	1	0	0
12	0	0	0	0	0	0	0	0	1	6	2	0	1

print_clustering_data(tuning = 'antibody',dataset="PBMC1")

'Initial COTAN cluster number:'

'Initial monocle cluster number:'

'Initial scanpy cluster number:'

'Initial scvi-tools cluster number:'

'Initial seurat cluster number:'

'Initial antibody cell/cluster table:'

cluster.ids
7     1338
8      876
3      748
4      341
9      331
5      211
1      202
6      131
2       62
10      51
12      16
Name: count, dtype: int64

'PBMC1 - contingency_matrix (rows: antibody - cols: monocle)'

	1	2	3	4	5	6	7	8	9
1	94	0	17	50	0	0	0	0	0
2	0	4	0	0	3	0	38	1	0
3	35	0	500	65	0	0	0	0	0
4	0	1	0	262	0	0	0	0	0
5	2	0	29	127	0	0	0	0	0
6	0	1	0	0	0	1	1	58	26
7	2	731	0	4	0	275	95	17	1
8	776	0	30	6	1	0	0	0	1
9	0	0	0	1	294	0	0	0	0
10	0	0	0	44	0	0	0	0	0
12	0	0	0	10	0	0	0	0	0

'PBMC1 - contingency_matrix (rows: antibody - cols: scanpy)'

	1	2	3	4	5	6	7	8	9	10	11
1	22	0	7	60	0	0	0	0	0	0	72
2	0	0	0	0	5	0	2	1	0	38	0
3	67	0	408	125	0	0	0	0	0	0	0
4	0	1	0	13	0	249	0	0	0	0	0
5	2	0	10	146	0	0	0	0	0	0	0
6	3	0	1	0	0	0	0	0	82	1	0
7	2	702	1	0	348	0	0	0	20	52	0
8	780	0	21	6	0	1	1	0	1	0	4
9	0	0	0	1	0	1	156	137	0	0	0
10	0	0	0	37	0	7	0	0	0	0	0
12	0	0	0	0	0	10	0	0	0	0	0

'PBMC1 - contingency_matrix (rows: antibody - cols: scvi-tools)'

	1	2	3	4	5	6	7	8	9	10
1	28	0	0	22	0	1	36	0	74	0
2	0	3	42	0	1	0	0	0	0	0
3	123	0	0	434	0	0	43	0	0	0
4	0	1	0	0	0	259	3	0	0	0
5	0	0	0	17	0	0	138	0	3	0
6	0	1	1	0	0	0	0	56	3	26
7	1	644	451	0	0	1	0	27	0	1
8	786	0	0	19	1	2	3	0	2	1
9	0	0	0	0	294	1	0	0	0	0
10	0	0	0	1	0	9	34	0	0	0
12	0	0	0	0	0	10	0	0	0	0

'PBMC1 - contingency_matrix (rows: antibody - cols: seurat)'

	1	2	3	4	5	6	7	8	9	10	11
1	93	14	0	0	0	54	0	0	0	0	0
2	0	0	1	4	0	0	1	1	39	0	0
3	22	536	0	0	0	42	0	0	0	0	0
4	0	0	1	0	261	1	0	0	0	0	0
5	1	18	0	0	0	139	0	0	0	0	0
6	3	1	0	1	0	0	0	0	1	56	25
7	2	1	524	488	2	0	0	0	89	18	1
8	766	40	0	0	1	5	1	0	0	0	1
9	0	0	0	0	2	0	151	142	0	0	0
10	0	0	0	0	8	36	0	0	0	0	0
12	0	0	0	0	8	2	0	0	0	0	0

'PBMC1 - contingency_matrix (rows: antibody - cols: COTAN)'

	1	2	3	4	5	6	7	8	9	10	11	12	13
1	0	0	0	0	0	0	0	0	54	1	2	94	10
2	1	1	38	0	4	0	1	1	0	0	0	0	0
3	0	0	0	0	0	0	0	0	39	6	55	58	442
4	1	0	0	0	0	0	0	0	1	257	4	0	0
5	0	0	0	0	0	0	0	0	135	0	1	2	20
6	0	56	1	1	1	25	0	0	0	0	1	1	1
7	33	17	83	271	716	1	0	0	0	1	1	2	0
8	0	0	0	0	0	1	1	0	4	0	12	773	23
9	0	0	0	0	0	0	150	143	0	2	0	0	0
10	0	0	0	0	0	0	0	0	38	5	1	0	0
12	0	0	0	0	0	0	0	0	1	6	2	0	1

print_clustering_data(tuning = 'antibody',dataset="PBMC1")

'Initial COTAN cluster number:'

'Initial monocle cluster number:'

'Initial scanpy cluster number:'

'Initial scvi-tools cluster number:'

'Initial seurat cluster number:'

'Initial antibody cell/cluster table:'

cluster.ids
7     1338
8      876
3      748
4      341
9      331
5      211
1      202
6      131
2       62
10      51
12      16
Name: count, dtype: int64

'PBMC1 - contingency_matrix (rows: antibody - cols: monocle)'

	1	2	3	4	5	6	7	8	9
1	94	0	17	50	0	0	0	0	0
2	0	4	0	0	3	0	38	1	0
3	35	0	500	65	0	0	0	0	0
4	0	1	0	262	0	0	0	0	0
5	2	0	29	127	0	0	0	0	0
6	0	1	0	0	0	1	1	58	26
7	2	731	0	4	0	275	95	17	1
8	776	0	30	6	1	0	0	0	1
9	0	0	0	1	294	0	0	0	0
10	0	0	0	44	0	0	0	0	0
12	0	0	0	10	0	0	0	0	0

'PBMC1 - contingency_matrix (rows: antibody - cols: scanpy)'

	1	2	3	4	5	6	7	8	9	10	11
1	22	0	7	60	0	0	0	0	0	0	72
2	0	0	0	0	5	0	2	1	0	38	0
3	67	0	408	125	0	0	0	0	0	0	0
4	0	1	0	13	0	249	0	0	0	0	0
5	2	0	10	146	0	0	0	0	0	0	0
6	3	0	1	0	0	0	0	0	82	1	0
7	2	702	1	0	348	0	0	0	20	52	0
8	780	0	21	6	0	1	1	0	1	0	4
9	0	0	0	1	0	1	156	137	0	0	0
10	0	0	0	37	0	7	0	0	0	0	0
12	0	0	0	0	0	10	0	0	0	0	0

'PBMC1 - contingency_matrix (rows: antibody - cols: scvi-tools)'

	1	2	3	4	5	6	7	8	9	10
1	28	0	0	22	0	1	36	0	74	0
2	0	3	42	0	1	0	0	0	0	0
3	123	0	0	434	0	0	43	0	0	0
4	0	1	0	0	0	259	3	0	0	0
5	0	0	0	17	0	0	138	0	3	0
6	0	1	1	0	0	0	0	56	3	26
7	1	644	451	0	0	1	0	27	0	1
8	786	0	0	19	1	2	3	0	2	1
9	0	0	0	0	294	1	0	0	0	0
10	0	0	0	1	0	9	34	0	0	0
12	0	0	0	0	0	10	0	0	0	0

'PBMC1 - contingency_matrix (rows: antibody - cols: seurat)'

	1	2	3	4	5	6	7	8	9	10	11
1	93	14	0	0	0	54	0	0	0	0	0
2	0	0	1	4	0	0	1	1	39	0	0
3	22	536	0	0	0	42	0	0	0	0	0
4	0	0	1	0	261	1	0	0	0	0	0
5	1	18	0	0	0	139	0	0	0	0	0
6	3	1	0	1	0	0	0	0	1	56	25
7	2	1	524	488	2	0	0	0	89	18	1
8	766	40	0	0	1	5	1	0	0	0	1
9	0	0	0	0	2	0	151	142	0	0	0
10	0	0	0	0	8	36	0	0	0	0	0
12	0	0	0	0	8	2	0	0	0	0	0

'PBMC1 - contingency_matrix (rows: antibody - cols: COTAN)'

	1	2	3	4	5	6	7	8	9	10	11	12	13
1	0	0	0	0	0	0	0	0	54	1	2	94	10
2	1	1	38	0	4	0	1	1	0	0	0	0	0
3	0	0	0	0	0	0	0	0	39	6	55	58	442
4	1	0	0	0	0	0	0	0	1	257	4	0	0
5	0	0	0	0	0	0	0	0	135	0	1	2	20
6	0	56	1	1	1	25	0	0	0	0	1	1	1
7	33	17	83	271	716	1	0	0	0	1	1	2	0
8	0	0	0	0	0	1	1	0	4	0	12	773	23
9	0	0	0	0	0	0	150	143	0	2	0	0	0
10	0	0	0	0	0	0	0	0	38	5	1	0	0
12	0	0	0	0	0	0	0	0	1	6	2	0	1

Default parameters

print_scores(tuning = 'default',dataset="PBMC1")

'PBMC1 - number of clusters'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	3	18	13	11	23

'PBMC1 - Silhuette (higher is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	0.106025	0.062012	0.087622	0.168956	0.1122

'PBMC1 - Calinski_Harabasz (higher is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	194.710746	159.568549	193.151278	235.185139	166.209735

'PBMC1 - davies_bouldin (lower is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	3.046493	2.534291	2.547137	1.695538	2.199121

'PBMC1 - Silhuette from Prob. (higher is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	0.182454	-0.006178	0.148873	0.235918	0.139282

'PBMC1 - Calinski_Harabasz from Prob. (higher is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	187.272904	160.02968	199.227613	213.845901	133.985915

'PBMC1 - davies_bouldin  from Prob. (lower is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	2.582264	2.79635	2.842419	1.973727	3.407326

'PBMC1 - default labels'

	NMI	ARI	homogeneity	completeness	fowlkes_mallows	precision	recall
monocle	0.580616	0.385715	0.412315	0.981082	0.603445	0.367190	0.991712
scanpy	0.722333	0.405339	0.828504	0.640283	0.509402	0.791080	0.328020
scvi-tools	0.777869	0.600555	0.813260	0.745430	0.667316	0.813991	0.547070
seurat	0.795611	0.651612	0.787916	0.803458	0.707763	0.751355	0.666699
COTAN	0.747622	0.582668	0.839063	0.674153	0.655776	0.845150	0.508836

	NMI	ARI	homogeneity	completeness	fowlkes_mallows	precision	recall
monocle	0.622344	0.439255	0.458549	0.968180	0.645589	0.421345	0.989178
scanpy	0.662480	0.389320	0.814398	0.558329	0.511739	0.851844	0.307424
scvi-tools	0.718265	0.557951	0.800919	0.651075	0.643426	0.842101	0.491625
seurat	0.747924	0.647338	0.787235	0.712353	0.712527	0.810669	0.626267
COTAN	0.684979	0.529470	0.823524	0.586337	0.623261	0.862630	0.450315

print_scores(tuning = 'default',dataset="PBMC2")

'PBMC2 - number of clusters'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	2	18	20	14	31

'PBMC2 - Silhuette (higher is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	0.237524	0.077322	0.018324	0.134064	0.112282

'PBMC2 - Calinski_Harabasz (higher is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	298.25227	270.502074	223.039427	367.295749	222.980901

'PBMC2 - davies_bouldin (lower is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	3.89379	2.581588	3.703433	1.958013	2.8615

'PBMC2 - Silhuette from Prob. (higher is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	0.283181	0.1987	0.064022	0.358299	0.19162

'PBMC2 - Calinski_Harabasz from Prob. (higher is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	284.036162	259.900464	223.875031	377.870193	213.170805

'PBMC2 - davies_bouldin  from Prob. (lower is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	3.514194	2.322847	5.400931	1.992412	4.535166

'PBMC2 - default labels'

	NMI	ARI	homogeneity	completeness	fowlkes_mallows	precision	recall
monocle	0.393439	0.206935	0.246216	0.978567	0.521545	0.273004	0.996358
scanpy	0.719426	0.457389	0.805848	0.649745	0.557061	0.815952	0.380312
scvi-tools	0.699891	0.424655	0.787025	0.630128	0.525155	0.763216	0.361349
seurat	0.776310	0.562412	0.820809	0.736387	0.640230	0.816710	0.501886
COTAN	0.724561	0.492226	0.862550	0.624634	0.593654	0.891369	0.395375

	NMI	ARI	homogeneity	completeness	fowlkes_mallows	precision	recall
monocle	0.266290	0.092131	0.156254	0.900271	0.459354	0.212893	0.991137
scanpy	0.693344	0.526719	0.779700	0.624210	0.610277	0.815918	0.456465
scvi-tools	0.661538	0.488989	0.757752	0.587004	0.576896	0.781492	0.425864
seurat	0.757858	0.683647	0.801432	0.718778	0.738063	0.851801	0.639512
COTAN	0.693767	0.569696	0.814741	0.604074	0.647577	0.850076	0.493316

print_scores(tuning = 'default',dataset="PBMC3")

'PBMC3 - number of clusters'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	3	22	17	18	57

'PBMC3 - Silhuette (higher is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	0.173831	0.017764	0.066172	0.12701	0.043145

'PBMC3 - Calinski_Harabasz (higher is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	565.456442	389.223708	568.006153	568.200931	269.332151

'PBMC3 - davies_bouldin (lower is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	3.238128	3.245809	2.168128	2.441035	2.894026

'PBMC3 - Silhuette from Prob. (higher is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	0.214085	0.065185	0.226855	0.282982	-0.001861

'PBMC3 - Calinski_Harabasz from Prob. (higher is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	531.480656	382.798915	537.03678	586.377699	255.33551

'PBMC3 - davies_bouldin  from Prob. (lower is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	2.634444	3.87979	2.321242	2.318734	4.285919

'PBMC3 - default labels'

	NMI	ARI	homogeneity	completeness	fowlkes_mallows	precision	recall
monocle	0.501257	0.233289	0.339140	0.960303	0.500276	0.252654	0.990586
scanpy	0.685942	0.462727	0.765047	0.621663	0.541439	0.751303	0.390196
scvi-tools	0.738810	0.579719	0.758792	0.719853	0.635430	0.710503	0.568289
seurat	0.771188	0.585275	0.823308	0.725274	0.644396	0.790547	0.525264
COTAN	0.684907	0.420422	0.880300	0.560498	0.531259	0.917456	0.307629

	NMI	ARI	homogeneity	completeness	fowlkes_mallows	precision	recall
monocle	0.473374	0.197340	0.318745	0.919383	0.474384	0.228968	0.982845
scanpy	0.678005	0.546455	0.758365	0.613045	0.613385	0.808137	0.465567
scvi-tools	0.725083	0.668897	0.739961	0.710792	0.711791	0.755603	0.670519
seurat	0.752260	0.669108	0.800562	0.709455	0.714356	0.824890	0.618633
COTAN	0.642410	0.427885	0.825485	0.525799	0.523267	0.834295	0.328191

print_scores(tuning = 'default',dataset="PBMC4")

'PBMC4 - number of clusters'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	3	22	16	19	34

'PBMC4 - Silhuette (higher is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	0.081399	0.063742	0.075337	0.12954	0.120257

'PBMC4 - Calinski_Harabasz (higher is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	364.985136	267.681245	341.396665	364.393784	285.369852

'PBMC4 - davies_bouldin (lower is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	3.354088	2.496024	2.226	2.224448	2.372892

'PBMC4 - Silhuette from Prob. (higher is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	0.193766	0.025023	0.077663	0.187532	0.062563

'PBMC4 - Calinski_Harabasz from Prob. (higher is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	353.871309	254.540593	284.048471	347.979408	270.2767

'PBMC4 - davies_bouldin  from Prob. (lower is better)'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	2.775993	3.467425	2.808762	2.299231	3.452692

'PBMC4 - default labels'

	NMI	ARI	homogeneity	completeness	fowlkes_mallows	precision	recall
monocle	0.617025	0.470070	0.453383	0.965513	0.647279	0.425154	0.985455
scanpy	0.701228	0.380357	0.819943	0.612541	0.487560	0.777350	0.305802
scvi-tools	0.739299	0.504966	0.788229	0.696088	0.584900	0.745208	0.459077
seurat	0.760207	0.494746	0.847372	0.689301	0.583823	0.820228	0.415555
COTAN	0.726404	0.422436	0.881515	0.617712	0.528853	0.837081	0.334120

	NMI	ARI	homogeneity	completeness	fowlkes_mallows	precision	recall
monocle	0.639609	0.527106	0.485153	0.938343	0.690625	0.486090	0.981225
scanpy	0.645222	0.369607	0.792297	0.544201	0.492852	0.824066	0.294762
scvi-tools	0.701912	0.483655	0.772259	0.643310	0.578775	0.767482	0.436467
seurat	0.693570	0.445739	0.804501	0.609523	0.551688	0.803926	0.378591
COTAN	0.644162	0.338121	0.813229	0.533293	0.457870	0.743146	0.282105

Matching cellTypist clusters number

print_scores(tuning = 'celltypist',dataset="PBMC1")

'PBMC1 - number of clusters'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	18	17	20	21	18

'PBMC1 - Silhuette (higher is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	0.018958	0.099632	0.064412	0.073234	0.097248	0.119959

'PBMC1 - Calinski_Harabasz (higher is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	130.295857	189.243925	161.219024	164.798269	187.055701	181.752646

'PBMC1 - davies_bouldin (lower is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	2.822402	1.766278	2.822667	2.83544	2.107907	2.206314

'PBMC1 - Silhuette from Prob. (higher is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	-0.045945	0.425583	0.035198	-0.003457	0.038689	0.17801

'PBMC1 - Calinski_Harabasz from Prob. (higher is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	105.849177	258.209764	166.895151	148.432443	163.330165	155.720126

'PBMC1 - davies_bouldin  from Prob. (lower is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	5.911548	1.299157	4.052006	4.249784	3.73374	2.738509

'PBMC1 - matching celltypist labels'

	NMI	ARI	homogeneity	completeness	fowlkes_mallows	precision	recall
monocle	0.658735	0.341968	0.759474	0.581592	0.448922	0.715120	0.281815
scanpy	0.736780	0.460617	0.825488	0.665288	0.554390	0.798681	0.384820
scvi-tools	0.700899	0.375385	0.811930	0.616582	0.480324	0.750160	0.307549
seurat	0.730959	0.423158	0.851564	0.640279	0.527781	0.824168	0.337980
COTAN	0.760567	0.614586	0.823060	0.706893	0.680190	0.835363	0.553840

print_scores(tuning = 'celltypist',dataset="PBMC2")

'PBMC2 - number of clusters'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	18	20	18	20	17

'PBMC2 - Silhuette (higher is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	-0.027381	0.142246	0.03787	0.039284	0.074705	0.129948

'PBMC2 - Calinski_Harabasz (higher is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	172.476071	369.182347	250.226682	275.332265	313.66872	297.21664

'PBMC2 - davies_bouldin (lower is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	3.456318	1.519571	2.755944	3.916041	2.073804	2.590932

'PBMC2 - Silhuette from Prob. (higher is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	0.016222	0.428361	0.111552	0.14981	0.180354	0.269737

'PBMC2 - Calinski_Harabasz from Prob. (higher is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	166.040634	405.143887	239.361751	274.501619	315.911839	295.576497

'PBMC2 - davies_bouldin  from Prob. (lower is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	3.973793	1.205375	3.43661	3.947089	2.428992	2.216364

'PBMC2 - matching celltypist labels'

	NMI	ARI	homogeneity	completeness	fowlkes_mallows	precision	recall
monocle	0.605793	0.310821	0.700581	0.533598	0.424516	0.695894	0.258968
scanpy	0.699382	0.378427	0.812919	0.613673	0.493978	0.814699	0.299515
scvi-tools	0.709756	0.399001	0.791766	0.643141	0.501208	0.730913	0.343692
seurat	0.737794	0.418471	0.850711	0.651340	0.528956	0.837754	0.333981
COTAN	0.731121	0.473101	0.747806	0.715164	0.563052	0.591871	0.535636

print_scores(tuning = 'celltypist',dataset="PBMC3")

'PBMC3 - number of clusters'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	17	18	19	18	23

'PBMC3 - Silhuette (higher is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	-0.023054	0.150798	0.055521	0.018785	0.131058	0.057865

'PBMC3 - Calinski_Harabasz (higher is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	297.808015	700.217364	456.227491	496.1459	574.480162	400.155157

'PBMC3 - davies_bouldin (lower is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	3.985684	1.442211	2.995741	2.362272	2.426385	2.598886

'PBMC3 - Silhuette from Prob. (higher is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	-0.029642	0.377306	0.25164	0.174206	0.275693	0.088576

'PBMC3 - Calinski_Harabasz from Prob. (higher is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	262.360318	739.44889	472.946903	516.505211	586.79879	354.586581

'PBMC3 - davies_bouldin  from Prob. (lower is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	11.904188	1.372576	2.656352	2.572155	2.308715	2.895497

'PBMC3 - matching celltypist labels'

	NMI	ARI	homogeneity	completeness	fowlkes_mallows	precision	recall
monocle	0.593633	0.350472	0.645190	0.549707	0.432543	0.575952	0.324842
scanpy	0.712471	0.545890	0.759378	0.671021	0.609493	0.757648	0.490309
scvi-tools	0.734566	0.564923	0.767509	0.704334	0.623340	0.727403	0.534165
seurat	0.771725	0.587107	0.823703	0.725918	0.645977	0.791597	0.527144
COTAN	0.670438	0.459746	0.737597	0.614488	0.530538	0.653153	0.430941

print_scores(tuning = 'celltypist',dataset="PBMC4")

'PBMC4 - number of clusters'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	16	18	18	19	15

'PBMC4 - Silhuette (higher is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	0.039137	0.094551	0.065347	0.129055	0.131231	0.099607

'PBMC4 - Calinski_Harabasz (higher is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	295.870856	361.837214	295.550588	369.578328	361.987263	327.83418

'PBMC4 - davies_bouldin (lower is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	2.558795	1.645136	2.486482	1.950227	2.225131	2.847014

'PBMC4 - Silhuette from Prob. (higher is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	0.046714	0.42606	0.093135	0.170808	0.186825	0.062989

'PBMC4 - Calinski_Harabasz from Prob. (higher is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	248.702625	497.518258	282.658631	374.867341	347.797347	303.69576

'PBMC4 - davies_bouldin  from Prob. (lower is better)'

	monocle	celltypist	scanpy	scvi-tools	seurat	COTAN
0	3.291098	1.09626	3.338571	2.073425	2.297124	2.483086

'PBMC4 - matching celltypist labels'

	NMI	ARI	homogeneity	completeness	fowlkes_mallows	precision	recall
monocle	0.686023	0.421456	0.747151	0.634141	0.512790	0.699270	0.376040
scanpy	0.730083	0.473671	0.809851	0.664619	0.562408	0.777976	0.406571
scvi-tools	0.752863	0.501079	0.830838	0.688268	0.587423	0.808372	0.426864
seurat	0.759689	0.493258	0.846652	0.688926	0.582261	0.817627	0.414649
COTAN	0.724775	0.450018	0.766000	0.687761	0.534712	0.666348	0.429080

Matching antibody clusters number

print_scores(tuning = 'antibody',dataset="PBMC1")

'PBMC1 - number of clusters'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	9	11	10	11	13

'PBMC1 - Silhuette (higher is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	0.123097	0.069567	0.097602	0.094258	0.171754	0.157886

'PBMC1 - Calinski_Harabasz (higher is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	203.253687	131.570445	193.550388	189.978034	237.429051	201.498385

'PBMC1 - davies_bouldin (lower is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	2.027098	2.515129	1.886001	1.890236	1.677632	1.976209

'PBMC1 - Silhuette from Prob. (higher is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	0.101586	0.245087	0.297043	0.261342	0.218776	0.162724

'PBMC1 - Calinski_Harabasz from Prob. (higher is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	184.932102	137.566711	202.80892	206.264825	214.61793	186.472326

'PBMC1 - davies_bouldin  from Prob. (lower is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	2.395779	2.16692	1.863548	1.700282	1.947515	2.059359

'PBMC1 - matching antibody labels'

	NMI	ARI	homogeneity	completeness	fowlkes_mallows	precision	recall
monocle	0.724319	0.641765	0.727281	0.721381	0.707943	0.753325	0.665295
scanpy	0.746106	0.652841	0.792721	0.704669	0.717629	0.829289	0.621003
scvi-tools	0.757587	0.658079	0.782127	0.734540	0.721084	0.800236	0.649760
seurat	0.749425	0.642110	0.790860	0.712116	0.708375	0.813318	0.616972
COTAN	0.721299	0.633200	0.772738	0.676282	0.700712	0.798916	0.614579

print_scores(tuning = 'antibody',dataset="PBMC2")

'PBMC2 - number of clusters'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	11	9	11	12	11

'PBMC2 - Silhuette (higher is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	-0.037233	0.053999	0.047735	-0.01373	0.107504	0.078974

'PBMC2 - Calinski_Harabasz (higher is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	183.966932	197.645911	256.298442	177.876897	291.531393	203.655796

'PBMC2 - davies_bouldin (lower is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	3.431006	2.999071	2.597576	5.123237	1.843028	3.072363

'PBMC2 - Silhuette from Prob. (higher is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	0.037684	0.242566	0.260903	0.074589	0.359083	0.234126

'PBMC2 - Calinski_Harabasz from Prob. (higher is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	187.161715	197.483785	259.897112	187.848746	297.046578	208.763542

'PBMC2 - davies_bouldin  from Prob. (lower is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	6.262231	2.570934	2.234808	4.149191	1.494746	2.238186

'PBMC2 - matching antibody labels'

	NMI	ARI	homogeneity	completeness	fowlkes_mallows	precision	recall
monocle	0.592156	0.458950	0.611387	0.574098	0.545023	0.612345	0.485102
scanpy	0.749075	0.650770	0.757603	0.740737	0.708423	0.779752	0.643619
scvi-tools	0.674668	0.578230	0.709325	0.643240	0.647236	0.750492	0.558185
seurat	0.762283	0.762523	0.779418	0.745886	0.802406	0.838975	0.767431
COTAN	0.738004	0.674191	0.688420	0.795286	0.744862	0.649628	0.854057

print_scores(tuning = 'antibody',dataset="PBMC3")

'PBMC3 - number of clusters'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	12	14	13	14	12

'PBMC3 - Silhuette (higher is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	-0.040176	0.037871	0.034398	0.001717	0.076119	0.066886

'PBMC3 - Calinski_Harabasz (higher is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	303.562678	309.45087	332.440157	368.948628	434.276887	338.97586

'PBMC3 - davies_bouldin (lower is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	3.604809	3.04294	3.434343	3.282334	2.612535	3.274719

'PBMC3 - Silhuette from Prob. (higher is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	0.074904	0.205664	0.23307	0.17138	0.305558	0.220006

'PBMC3 - Calinski_Harabasz from Prob. (higher is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	298.489075	331.435612	382.997054	438.502899	489.196185	393.696152

'PBMC3 - davies_bouldin  from Prob. (lower is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	12.995929	2.780055	2.882473	4.362136	1.884472	2.454523

'PBMC3 - matching antibody labels'

	NMI	ARI	homogeneity	completeness	fowlkes_mallows	precision	recall
monocle	0.644484	0.537094	0.639574	0.649469	0.598577	0.600917	0.596245
scanpy	0.729603	0.683244	0.752370	0.708173	0.724410	0.784386	0.669020
scvi-tools	0.726492	0.670063	0.728625	0.724372	0.713239	0.729596	0.697249
seurat	0.764843	0.698339	0.799673	0.732920	0.738860	0.829783	0.657901
COTAN	0.691237	0.607331	0.643954	0.746015	0.676860	0.575699	0.795798

print_scores(tuning = 'antibody',dataset="PBMC4")

'PBMC4 - number of clusters'

	monocle	scanpy	scvi-tools	seurat	COTAN
0	12	11	10	13	10

'PBMC4 - Silhuette (higher is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	0.002385	-0.036094	0.050455	0.05113	0.077562	0.045683

'PBMC4 - Calinski_Harabasz (higher is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	235.482647	197.341814	270.202541	343.073339	320.138454	229.318527

'PBMC4 - davies_bouldin (lower is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	3.083971	4.423945	2.822422	2.12666	2.473315	3.226592

'PBMC4 - Silhuette from Prob. (higher is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	0.049133	0.172283	0.157433	0.088787	0.040314	0.088272

'PBMC4 - Calinski_Harabasz from Prob. (higher is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	200.437686	236.062289	249.06549	280.961153	300.480476	200.115128

'PBMC4 - davies_bouldin  from Prob. (lower is better)'

	monocle	antibody	scanpy	scvi-tools	seurat	COTAN
0	3.494187	3.01011	2.595129	2.489428	2.124028	2.931601

'PBMC4 - matching antibody labels'

	NMI	ARI	homogeneity	completeness	fowlkes_mallows	precision	recall
monocle	0.644898	0.463759	0.695399	0.601235	0.559620	0.723970	0.432579
scanpy	0.722095	0.587951	0.761954	0.686198	0.664451	0.799173	0.552440
scvi-tools	0.733942	0.592083	0.764983	0.705322	0.667996	0.803599	0.555275
seurat	0.723924	0.571960	0.786070	0.670884	0.652635	0.815198	0.522489
COTAN	0.677301	0.519220	0.678168	0.676436	0.606874	0.658843	0.559005

Check cellTypist vs Antibody

def compute_clustering_scores(output_dir, dataset):#celltypist_df, antibody_df,
    # Merge the dataframes on the common 'cell' column
    #cotan_df = pd.read_csv(f'{DIR}{dataset}/COTAN/antibody/clustering_labels.csv', index_col=0)
    #display("Cotan clusters objetc dimension ",cotan_df.shape)
    #display("----------------------------------------")

    celltypist_df = pd.read_csv(f'{DIR}{dataset}/celltypist/celltypist_labels.csv', index_col=0)
    celltypist_df.index = celltypist_df.index.str[:-2]
    antibody_df = pd.read_csv(f'{DIR}{dataset}/antibody_annotation/antibody_labels_postproc.csv', index_col=0)
    #antibody_df = labels_df.merge(antibody_df, how='inner', on='cell')
    #all_in_antibody = celltypist_df.index.isin(antibody_df.index).all()
    #all_in_celltypist = antibody_df.index.isin(celltypist_df.index).all()

    #display("All celltypist indices in antibody: ",all_in_antibody, celltypist_df.index.isin(antibody_df.index).sum(),celltypist_df.shape)
    #display("All antibody indices in cellTypist:", all_in_celltypist)

    #display("----------------------------------------")
    
    merged_df = celltypist_df.merge(antibody_df, how='inner',left_index=True, right_index=True)# on='cell')

    merged_df.columns = ['cluster_celltypist','cluster_antibody']
    
    # Initialize scores dictionary
    scores = {
        'NMI': normalized_mutual_info_score(merged_df['cluster_celltypist'], merged_df['cluster_antibody'], average_method='arithmetic'),
        'ARI': adjusted_rand_score(merged_df['cluster_celltypist'], merged_df['cluster_antibody']),
        'Homogeneity': homogeneity_score(merged_df['cluster_celltypist'], merged_df['cluster_antibody']),
        'Completeness': completeness_score(merged_df['cluster_celltypist'], merged_df['cluster_antibody']),
        'Fowlkes_Mallows': fowlkes_mallows_score(merged_df['cluster_celltypist'], merged_df['cluster_antibody'])
    }
    
    # Convert scores to DataFrame
    scores_df = pd.DataFrame([scores])
    
    # Save scores to CSV and LaTeX
    #scores_df.to_csv(f'{output_dir}{dataset}/clustering_comparison_scores.csv')
    #scores_df.to_latex(f'{output_dir}{dataset}/clustering_comparison_scores.tex')
    
    # Display scores DataFrame
    display(scores_df)

for dataset in DATASET_NAMES:
    #display('------------------------------')
    display(f'{dataset} - Clustering Comparison between CellTypist and Antibody')

    # Assuming celltypist_df and antibody_df are defined elsewhere and available here
    compute_clustering_scores(DIR, dataset)

'PBMC1 - Clustering Comparison between CellTypist and Antibody'

	NMI	ARI	Homogeneity	Completeness	Fowlkes_Mallows
0	0.752326	0.731095	0.708308	0.802178	0.78126

'PBMC2 - Clustering Comparison between CellTypist and Antibody'

	NMI	ARI	Homogeneity	Completeness	Fowlkes_Mallows
0	0.659259	0.481537	0.667725	0.651004	0.585734

'PBMC3 - Clustering Comparison between CellTypist and Antibody'

	NMI	ARI	Homogeneity	Completeness	Fowlkes_Mallows
0	0.693433	0.555502	0.693429	0.693436	0.618105

'PBMC4 - Clustering Comparison between CellTypist and Antibody'

	NMI	ARI	Homogeneity	Completeness	Fowlkes_Mallows
0	0.751294	0.7252	0.728817	0.775201	0.776972

Summary

External measures

def load_scores(tuning, dataset):
    scores = pd.read_csv(f'{DIR}{dataset}/scores_{tuning}.csv')
    scores = scores.rename(columns={"Unnamed: 0": "tool"})
    scores['tuning'] = tuning
    return scores

datasets = ['PBMC1', 'PBMC2', 'PBMC3', 'PBMC4']
tunings = ['default_celltypist', 'default_antibody', 'celltypist_celltypist', 'antibody_antibody']

scores_list = []

# Concatenate all scores into one DataFrame
for dataset in datasets:
    for tuning in tunings:
        scores = load_scores(tuning, dataset)
        scores['dataset'] = dataset
        scores_list.append(scores)

all_scores = pd.concat(scores_list)

# Prepare data for plotting
all_scores_melted = all_scores.melt(id_vars=['tool', 'tuning', 'dataset'], var_name='score', value_name='value')

sns.set_context("talk") 
# Define custom colors
custom_palette = { 
    "seurat": "#4575B4",
    "monocle": "#DAABE9",
    "scanpy": "#7F9B5C",
    "COTAN": "#F73604",
    "scvi-tools": "#B6A18F"
}

g = sns.FacetGrid(all_scores_melted, row='score', col='tuning', sharey=False, height=4, aspect=1.3)
g.map(sns.pointplot, 'tool', 'value', palette=custom_palette,capsize=0.2, errwidth=2)

# Set titles and labels
g.set_titles(col_template="{col_name}", row_template="{row_name}")
g.set_axis_labels("Tool", "Score Value")
plt.subplots_adjust(top=1.4)
#g.fig.suptitle('Comparison of Clustering Tools by Various Scores and Conditions')
# Rotate x-axis labels
for ax in g.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)

g.savefig(f"{DIR}ClusteringToolsComparison{min_size_cluster}.pdf")
plt.show()

Internal measures

# Load your data (assuming you have CSV files for the scores)
def load_scores(tuning, dataset, score_type):
    file_path = f'{DIR}{dataset}/{tuning}_{score_type}.csv'
    print(f"Loading {file_path}")
    scores = pd.read_csv(file_path, header=0)  # Read the CSV file without an index column
    scores_melted = scores.melt(var_name='tool', value_name='value')
    scores_melted['tuning'] = tuning
    scores_melted['dataset'] = dataset
    scores_melted['score_type'] = score_type
    return scores_melted


datasets = ['PBMC1', 'PBMC2', 'PBMC3', 'PBMC4']
tunings = ['default', 'celltypist', 'antibody']
score_types = ['silhouette', 'davies_bouldin','Calinski_Harabasz','silhouette_fromProb', 'davies_bouldin_fromProb','Calinski_Harabasz_fromProb']
scores_list = []

# Concatenate all scores into one DataFrame
for dataset in datasets:
    for tuning in tunings:
        for score_type in score_types:
            scores = load_scores(tuning, dataset, score_type)
            scores_list.append(scores)

all_scores = pd.concat(scores_list)

# Debug: Check the loaded data
print(all_scores.head())

# Define custom colors
custom_palette = {
    "seurat": "#4575B4",
    "monocle": "#DAABE9",
    "scanpy": "#7F9B5C",
    "COTAN": "#F73604",
    "scvi-tools": "#B6A18F"
}

# Filter for silhouette and davies_bouldin scores
silhouette_scores = all_scores[all_scores['score_type'] == 'silhouette']
davies_bouldin_scores = all_scores[all_scores['score_type'] == 'davies_bouldin']
Calinski_Harabasz_scores = all_scores[all_scores['score_type'] == 'Calinski_Harabasz']
silhouette_scores_fromProb = all_scores[all_scores['score_type'] == 'silhouette_fromProb']
davies_bouldin_scores_fromProb = all_scores[all_scores['score_type'] == 'davies_bouldin_fromProb']
Calinski_Harabasz_scores_fromProb = all_scores[all_scores['score_type'] == 'Calinski_Harabasz_fromProb']

# Plot Silhouette scores
g1 = sns.FacetGrid(silhouette_scores, col='tuning', sharey=False, height=4, aspect=1.8)
g1.map(sns.pointplot, 'tool', 'value', palette=custom_palette, order=[ "monocle", "scanpy", "scvi-tools","seurat","COTAN"],capsize=0.2, errwidth=2)
g1.set_titles(col_template="{col_name}")
g1.set_axis_labels("Tool", "Silhouette Score")
g1.fig.suptitle('Silhouette Scores by Tool and Tuning Condition', y=1.25)
plt.subplots_adjust(top=0.85)
# Rotate x-axis labels
for ax in g1.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)

# Plot Davies-Bouldin scores
g2 = sns.FacetGrid(davies_bouldin_scores, col='tuning', sharey=False, height=4, aspect=1.8)
g2.map(sns.pointplot, 'tool', 'value', palette=custom_palette, order=["monocle", "scanpy",  "scvi-tools","seurat","COTAN"],capsize=0.2, errwidth=2)
g2.set_titles(col_template="{col_name}")
g2.set_axis_labels("Tool", "Davies-Bouldin Score")
g2.fig.suptitle('Davies-Bouldin Scores by Tool and Tuning Condition', y=1.85)
plt.subplots_adjust(top=1.5)
# Rotate x-axis labels
for ax in g2.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)

# Plot Calinski_Harabasz scores
g3 = sns.FacetGrid(Calinski_Harabasz_scores, col='tuning', sharey=False, height=4, aspect=1.8)
g3.map(sns.pointplot, 'tool', 'value', palette=custom_palette, order=["monocle", "scanpy",  "scvi-tools","seurat","COTAN"],capsize=0.2, errwidth=2)
g3.set_titles(col_template="{col_name}")
g3.set_axis_labels("Tool", "Calinski_Harabasz Score")
g3.fig.suptitle('Calinski Harabasz Scores by Tool and Tuning Condition', y=1.85)
plt.subplots_adjust(top=1.5)
# Rotate x-axis labels
for ax in g3.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)


# Plot Silhouette scores
g4 = sns.FacetGrid(silhouette_scores_fromProb, col='tuning', sharey=False, height=4, aspect=1.8)
g4.map(sns.pointplot, 'tool', 'value', palette=custom_palette, order=[ "monocle", "scanpy", "scvi-tools","seurat","COTAN"],capsize=0.2, errwidth=2)
g4.set_titles(col_template="{col_name}")
g4.set_axis_labels("Tool", "Silhouette Score")
g4.fig.suptitle('Silhouette Scores From Prob. by Tool and Tuning Condition', y=1.25)
plt.subplots_adjust(top=0.85)
# Rotate x-axis labels
for ax in g4.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)

# Plot Davies-Bouldin scores
g5 = sns.FacetGrid(davies_bouldin_scores_fromProb, col='tuning', sharey=False, height=4, aspect=1.8)
g5.map(sns.pointplot, 'tool', 'value', palette=custom_palette, order=["monocle", "scanpy",  "scvi-tools","seurat","COTAN"],capsize=0.2, errwidth=2)
g5.set_titles(col_template="{col_name}")
g5.set_axis_labels("Tool", "Davies-Bouldin Score")
g5.fig.suptitle('Davies-Bouldin Scores From Prob. by Tool and Tuning Condition', y=1.85)
plt.subplots_adjust(top=1.5)
# Rotate x-axis labels
for ax in g5.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)

# Plot Calinski_Harabasz scores
g6 = sns.FacetGrid(Calinski_Harabasz_scores_fromProb, col='tuning', sharey=False, height=4, aspect=1.8)
g6.map(sns.pointplot, 'tool', 'value', palette=custom_palette, order=["monocle", "scanpy",  "scvi-tools","seurat","COTAN"],capsize=0.2, errwidth=2)
g6.set_titles(col_template="{col_name}")
g6.set_axis_labels("Tool", "Calinski_Harabasz Score")
g6.fig.suptitle('Calinski Harabasz Scores From Prob. by Tool and Tuning Condition', y=1.85)
plt.subplots_adjust(top=1.5)
# Rotate x-axis labels
for ax in g6.axes.flatten():
    plt.setp(ax.get_xticklabels(), rotation=45)


g1.savefig(f"{DIR}Silhouette{min_size_cluster}.pdf")
g2.savefig(f"{DIR}Calinski_Harabasz{min_size_cluster}.pdf")
g3.savefig(f"{DIR}Davies_Bouldin{min_size_cluster}.pdf")

g4.savefig(f"{DIR}SilhouetteFromProb{min_size_cluster}.pdf")
g5.savefig(f"{DIR}Calinski_HarabaszFromProb{min_size_cluster}.pdf")
g6.savefig(f"{DIR}Davies_BouldinFromProb{min_size_cluster}.pdf")

plt.show()

Loading Data/PBMC1/default_silhouette.csv
Loading Data/PBMC1/default_davies_bouldin.csv
Loading Data/PBMC1/default_Calinski_Harabasz.csv
Loading Data/PBMC1/default_silhouette_fromProb.csv
Loading Data/PBMC1/default_davies_bouldin_fromProb.csv
Loading Data/PBMC1/default_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC1/celltypist_silhouette.csv
Loading Data/PBMC1/celltypist_davies_bouldin.csv
Loading Data/PBMC1/celltypist_Calinski_Harabasz.csv
Loading Data/PBMC1/celltypist_silhouette_fromProb.csv
Loading Data/PBMC1/celltypist_davies_bouldin_fromProb.csv
Loading Data/PBMC1/celltypist_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC1/antibody_silhouette.csv
Loading Data/PBMC1/antibody_davies_bouldin.csv
Loading Data/PBMC1/antibody_Calinski_Harabasz.csv
Loading Data/PBMC1/antibody_silhouette_fromProb.csv
Loading Data/PBMC1/antibody_davies_bouldin_fromProb.csv
Loading Data/PBMC1/antibody_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC2/default_silhouette.csv
Loading Data/PBMC2/default_davies_bouldin.csv
Loading Data/PBMC2/default_Calinski_Harabasz.csv
Loading Data/PBMC2/default_silhouette_fromProb.csv
Loading Data/PBMC2/default_davies_bouldin_fromProb.csv
Loading Data/PBMC2/default_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC2/celltypist_silhouette.csv
Loading Data/PBMC2/celltypist_davies_bouldin.csv
Loading Data/PBMC2/celltypist_Calinski_Harabasz.csv
Loading Data/PBMC2/celltypist_silhouette_fromProb.csv
Loading Data/PBMC2/celltypist_davies_bouldin_fromProb.csv
Loading Data/PBMC2/celltypist_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC2/antibody_silhouette.csv
Loading Data/PBMC2/antibody_davies_bouldin.csv
Loading Data/PBMC2/antibody_Calinski_Harabasz.csv
Loading Data/PBMC2/antibody_silhouette_fromProb.csv
Loading Data/PBMC2/antibody_davies_bouldin_fromProb.csv
Loading Data/PBMC2/antibody_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC3/default_silhouette.csv
Loading Data/PBMC3/default_davies_bouldin.csv
Loading Data/PBMC3/default_Calinski_Harabasz.csv
Loading Data/PBMC3/default_silhouette_fromProb.csv
Loading Data/PBMC3/default_davies_bouldin_fromProb.csv
Loading Data/PBMC3/default_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC3/celltypist_silhouette.csv
Loading Data/PBMC3/celltypist_davies_bouldin.csv
Loading Data/PBMC3/celltypist_Calinski_Harabasz.csv
Loading Data/PBMC3/celltypist_silhouette_fromProb.csv
Loading Data/PBMC3/celltypist_davies_bouldin_fromProb.csv
Loading Data/PBMC3/celltypist_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC3/antibody_silhouette.csv
Loading Data/PBMC3/antibody_davies_bouldin.csv
Loading Data/PBMC3/antibody_Calinski_Harabasz.csv
Loading Data/PBMC3/antibody_silhouette_fromProb.csv
Loading Data/PBMC3/antibody_davies_bouldin_fromProb.csv
Loading Data/PBMC3/antibody_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC4/default_silhouette.csv
Loading Data/PBMC4/default_davies_bouldin.csv
Loading Data/PBMC4/default_Calinski_Harabasz.csv
Loading Data/PBMC4/default_silhouette_fromProb.csv
Loading Data/PBMC4/default_davies_bouldin_fromProb.csv
Loading Data/PBMC4/default_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC4/celltypist_silhouette.csv
Loading Data/PBMC4/celltypist_davies_bouldin.csv
Loading Data/PBMC4/celltypist_Calinski_Harabasz.csv
Loading Data/PBMC4/celltypist_silhouette_fromProb.csv
Loading Data/PBMC4/celltypist_davies_bouldin_fromProb.csv
Loading Data/PBMC4/celltypist_Calinski_Harabasz_fromProb.csv
Loading Data/PBMC4/antibody_silhouette.csv
Loading Data/PBMC4/antibody_davies_bouldin.csv
Loading Data/PBMC4/antibody_Calinski_Harabasz.csv
Loading Data/PBMC4/antibody_silhouette_fromProb.csv
Loading Data/PBMC4/antibody_davies_bouldin_fromProb.csv
Loading Data/PBMC4/antibody_Calinski_Harabasz_fromProb.csv
         tool     value   tuning dataset  score_type
0  Unnamed: 0  0.000000  default   PBMC1  silhouette
1     monocle  0.106025  default   PBMC1  silhouette
2      scanpy  0.062012  default   PBMC1  silhouette
3  scvi-tools  0.087622  default   PBMC1  silhouette
4      seurat  0.168956  default   PBMC1  silhouette