Forebrain La Manno 2021 Clusters GDI

Author

trinetra75 & seriph78

Published

November 22, 2023

Preamble

For the evaluation of GDI sensitivity and also to test type I error and FDR for the differential expressed gene in a cluster we need to have some Coherent Transcript Cell Cluster (cluster formed by cells as similar as possible). So we tested all clusters defined in the original paper for this data set (focusing on E13.5, E15.0, and E17.5).

library(rlang)
library(zeallot)
library(data.table)
library(COTAN)
#devtools::load_all("~/dev/COTAN/COTAN/")

options(parallelly.fork.enable = TRUE)

setLoggingLevel(2)
setLoggingFile(file.path(".", "ClusterizationsGDI_AllForebrainDorsal.log"))

Definition of functions to extract data from clustersList

getClData <- function(objCOTAN, clList, cl) {
  cluster <- names(clList)[[cl]]
  cells <- clList[[cl]]
  res <- checkClusterUniformity(objCOTAN, cluster = cluster, cells = cells,
                                GDIThreshold = 1.4, cores = 6, saveObj = FALSE)
  res <- c(list("cluster" = cluster, "size" = length(cells)), res)
  res <- as.data.frame(res)
  rownames(res) <- cluster
  colnames(res)
  return(res)
}

clDataVal <- list("cluster" = NULL, "size" = NULL, "isUniform" = NULL,
                  "fractionAbove" = NULL, "1stPercentile" = NULL)

Forebrain Dorsal E13.5

Load dataset

fb135Obj <- readRDS(file = file.path("Data/MouseCortexFromLoom/", "e13.5_ForebrainDorsal.cotan.RDS"))

sampleCondition <- getMetadataElement(fb135Obj, datasetTags()[["cond"]])

logThis(sampleCondition)

allClust <- getClusterizations(fb135Obj)

logThis("")
logThis(paste("Number of cells:", getNumCells(fb135Obj)))

logThis("")
logThis("Available clusterizations:")
logThis(paste0(allClust, collapse = ", "))

clSizes <- vapply(allClust, function(x) { nlevels(getClusters(fb135Obj, x))}, integer(1L))
clSizes

            split             merge original.clusters original.subclass 
               35                31               159                40 
   original.class       cell.typist            seurat   seurat.high.res 
               13                13                19                36 
          monocle  monocle.high.res 
               11                27

logThis(paste(names(clSizes), clSizes, sep = ": ", collapse = ", "))

Extract original cluster

originalFB135 <- getClusters(fb135Obj, clName = "original.clusters")

nlevels(originalFB135)

[1] 159

sort(table(originalFB135), decreasing = TRUE)

originalFB135
432 186 187 434 184 437 510 509 183 185 438 433 525 443 446 192 568 188 441 565 
536 423 334 326 292 259 248 239 178 168 155 141 130  82  74  65  65  63  56  53 
428 181 679 440 189 118 507 431 442 445 497 444 193 426 511 439 178 436 508 135 
 52  51  49  46  39  36  35  32  30  29  29  26  25  25  23  22  20  20  20  19 
 68 177 427 435 175 225 120 271  67 173 430 494 524 176 429 493 496 502 566 117 
 17  14  14  13  12  12  11  11  11  10  10  10  10   9   9   9   9   9   9   8 
136 161 194 332 504 112 174 222 228 675 115 158 270 505 670 114 133 159 227 267 
  8   8   8   8   8   7   7   6   6   6   5   5   5   5   5   4   4   4   4   4 
359  41 470 564 162 179 329 447 492 519 535 671 119 130 146 163 168 195 224 226 
  4   4   4   4   3   3   3   3   3   3   3   3   2   2   2   2   2   2   2   2 
251 263 273 326 453 516 520 528 538 569 575 672 676 677 683 691 793 107 113 125 
  2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   1   1   1 
137 138 144 180 182 196 216 254 259 269 333 355 358 374  40 420 454 458 468 471 
  1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
495 498 499 523 549  55 559 658 666 673 678 686 688 689 731 744 760 795  82 
  1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1

# drop too small clusters: those whose size is less than 15
largeClFB135 <- which(table(originalFB135) >= 50)
largeOrigFB135 <- originalFB135[(originalFB135 %in% names(largeClFB135)),
                                drop = TRUE]

largeOrigFB135 <- toClustersList(largeOrigFB135)

Calculate GDI for each cluster in E13.5

print("Length larger clusters")
vapply(largeOrigFB135, length, integer(1))

resDataFB135 <- lapply(seq_along(largeOrigFB135), FUN = getClData,
                       objCOTAN = fb135Obj, clList = largeOrigFB135)

clDataFB135 <- data.table::rbindlist(resDataFB135)
clDataFB135 <- column_to_rownames(clDataFB135, var = "cluster")

saveRDS(clDataFB135,
        file = file.path("Data/MouseCortexFromLoom/ClustersGDI/", "e13.5_ForebrainDorsal_GDI_of_original_clusterization.RDS"))

clDataFB135 <- readRDS("Data/MouseCortexFromLoom/ClustersGDI/e13.5_ForebrainDorsal_GDI_of_original_clusterization.RDS")
clDataFB135[order(clDataFB135$size, decreasing = T),]

    size isUniform fractionAbove X1stPercentile
432  536     FALSE  3.770380e-02       1.484698
186  423     FALSE  4.307131e-02       1.609718
187  334     FALSE  1.303433e-02       1.423035
434  326      TRUE  4.379562e-03       1.376828
184  292     FALSE  1.170610e-02       1.414304
437  259      TRUE  4.410239e-03       1.364885
510  248      TRUE  9.610563e-03       1.397472
509  239      TRUE  4.471580e-03       1.373229
183  178     FALSE  1.458630e-02       1.427795
185  168     FALSE  1.630482e-02       1.428907
438  155      TRUE  1.080901e-03       1.320271
433  141      TRUE  4.997501e-03       1.363220
525  130      TRUE  1.488711e-03       1.329041
443   82      TRUE  2.276508e-03       1.344695
446   74      TRUE  8.701706e-05       1.300874
192   65      TRUE  2.597852e-04       1.310808
568   65      TRUE  0.000000e+00       1.233130
188   63      TRUE  1.147802e-03       1.314789
441   56      TRUE  0.000000e+00       1.277716
565   53      TRUE  0.000000e+00       1.232690
428   52      TRUE  0.000000e+00       1.263561
181   51      TRUE  3.013116e-03       1.330148

So we can use the following clusters (at least 100 cells - cell number in the parenthesis - and GDI lower than 1.5):

cl432 (n.cells 536),
cl187 (334),
cl434 (326),
cl184 (292),
cl437 (259),
cl510 (248)

Forebrain Dorsal E15.0

Load dataset

fb150Obj <- readRDS(file = file.path("Data/MouseCortexFromLoom/", "e15.0_ForebrainDorsal.cotan.RDS"))

sampleCondition <- getMetadataElement(fb150Obj, datasetTags()[["cond"]])

logThis(sampleCondition)

allClust <- getClusterizations(fb150Obj)

logThis("")
logThis(paste("Number of cells:", getNumCells(fb150Obj)))

logThis("")
logThis("Available clusterizations:")
logThis(paste0(allClust, collapse = ", "))

clSizes <- vapply(allClust, function(x) { nlevels(getClusters(fb150Obj, x))}, integer(1L))
clSizes

            split             merge            seurat   seurat.high.res 
               57                50                21                40 
original.clusters original.subclass    original.class           monocle 
              229                47                14                11 
 monocle.high.res       cell.typist 
               46                13

logThis(paste(names(clSizes), clSizes, sep = ": ", collapse = ", "))

Extract original cluster

originalFB150 <- getClusters(fb150Obj, clName = "original.clusters")

nlevels(originalFB150)

[1] 229

sort(table(originalFB150), decreasing = TRUE)

originalFB150
525 432 511 509 510 508 184 428 434 437 507 568 433 504 426 185 187 505 516 565 
826 586 540 402 402 397 322 318 273 258 183 181 176 174 173 172 163 147 137 133 
427 524 679 439 436 493 498 446 438 441 183 497 192 506 188 443 502 494 574 575 
120 108 105 102  93  93  79  66  63  60  54  51  46  46  43  42  42  41  41  41 
492 118 177 519 526 566 182 181 501 573 193 429 445 499 518 440 560 178 442 514 
 38  36  31  31  28  28  27  26  24  24  23  23  23  23  22  21  20  19  19  19 
523 186 569 444 557 271 452 495 520 535 542 677 527 431 135 455 458 496 512 676 
 19  18  18  17  16  15  15  15  15  14  14  14  13  12  11  11  11  11  11  11 
120 159 517 145 161 174 179 558 227 332 457 470 503 739 117 175 176 189 559 564 
 10  10  10   9   9   9   9   9   8   8   8   8   8   8   7   7   7   7   7   7 
180 225 538 549 561 671  68 695 738 747 136 144 222 226 273 500 536 678 112 114 
  6   6   6   6   6   6   6   6   6   6   5   5   5   5   5   5   5   5   4   4 
115 157 267 328 453 456 534 550 570 686 731 737 133 146 147 158 172 173 270 326 
  4   4   4   4   4   4   4   4   4   4   4   4   3   3   3   3   3   3   3   3 
430 513 515 528 533 539 544 571 674 675 732 113 125 130 143 152 162 163 195 259 
  3   3   3   3   3   3   3   3   3   3   3   2   2   2   2   2   2   2   2   2 
435 451 454 531 543 548 552 554 562 670 689 740 119 122 134 137 142 151 156 191 
  2   2   2   2   2   2   2   2   2   2   2   2   1   1   1   1   1   1   1   1 
194 223 224 228 234 240 262 268 269 275 286 296 308 314 329 333 359  41 448 450 
  1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
468 529 530 532 537 540 553 567 572 601 614 634 647 649  67 672 680 681 684 693 
  1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
696 726 734 749 750 751 760 771  88 
  1   1   1   1   1   1   1   1   1

# drop too small clusters: those whose size is less than 15
largeClFB150 <- which(table(originalFB150) >= 50)
largeOrigFB150 <- originalFB150[(originalFB150 %in% names(largeClFB150)),
                                drop = TRUE]

largeOrigFB150 <- toClustersList(largeOrigFB150)

Calculate GDI for each cluster

print("Length larger clusters")
vapply(largeOrigFB150, length, integer(1))

resDataFB150 <- lapply(seq_along(largeOrigFB150), FUN = getClData,
                       objCOTAN = fb150Obj, clList = largeOrigFB150)

clDataFB150 <- data.table::rbindlist(resDataFB150)
clDataFB150 <- column_to_rownames(clDataFB150, var = "cluster")

saveRDS(clDataFB150,
        file = file.path("Data/MouseCortexFromLoom/ClustersGDI/", "e15.0_ForebrainDorsal_GDI_of_original_clusterization.RDS"))

clDataFB150 <- readRDS("Data/MouseCortexFromLoom/ClustersGDI/e15.0_ForebrainDorsal_GDI_of_original_clusterization.RDS")

clDataFB150[order(clDataFB150$size,decreasing = T),]

    size isUniform fractionAbove X1stPercentile
525  826     FALSE  0.3016472479       2.352178
432  586     FALSE  0.0254880232       1.459100
511  540     FALSE  0.1510725315       1.913734
509  402      TRUE  0.0067395414       1.381121
510  402     FALSE  0.0152561426       1.420390
508  397      TRUE  0.0046488461       1.370125
184  322     FALSE  0.0268745853       1.498304
428  318      TRUE  0.0012377332       1.342660
434  273      TRUE  0.0004509380       1.329601
437  258      TRUE  0.0025343004       1.342904
507  183      TRUE  0.0094123856       1.393946
568  181      TRUE  0.0000000000       1.305810
433  176      TRUE  0.0011034483       1.336980
504  174      TRUE  0.0039181957       1.361682
426  173      TRUE  0.0004750143       1.330498
185  172     FALSE  0.0101354303       1.400158
187  163      TRUE  0.0054207120       1.359734
505  147      TRUE  0.0001728011       1.293017
516  137      TRUE  0.0014539856       1.328445
565  133      TRUE  0.0073304826       1.385537
427  120      TRUE  0.0000000000       1.274314
524  108     FALSE  0.0187910535       1.439259
679  105      TRUE  0.0023903726       1.339173
439  102      TRUE  0.0000000000       1.267754
436   93      TRUE  0.0000000000       1.253222
493   93      TRUE  0.0007476325       1.324079
498   79      TRUE  0.0010643016       1.321372
446   66      TRUE  0.0000000000       1.296987
438   63      TRUE  0.0000000000       1.265541
441   60      TRUE  0.0000000000       1.277497
183   54      TRUE  0.0002659810       1.305260
497   51      TRUE  0.0003751993       1.301691

So we can use the following clusters (at least 100 cells - cell number in the parenthesis - and GDI lower than 1.5):

cl432 (586),
cl509 (402),
cl510 (402),
cl508 (397),
cl428 (318),
cl434 (273),
cl437 (258)

Forebrain Dorsal E17.5

Load dataset

fb175Obj <- readRDS(file = file.path("Data/MouseCortexFromLoom/", "e17.5_ForebrainDorsal.cotan.RDS"))

sampleCondition <- getMetadataElement(fb175Obj, datasetTags()[["cond"]])

logThis(sampleCondition)

allClust <- getClusterizations(fb175Obj)

logThis("")
logThis(paste("Number of cells:", getNumCells(fb175Obj)))

logThis("")
logThis("Available clusterizations:")
logThis(paste0(allClust, collapse = ", "))

clSizes <- vapply(allClust, function(x) { nlevels(getClusters(fb175Obj, x))}, integer(1L))
clSizes

            split             merge original.clusters original.subclass 
               25                19               136                27 
   original.class       cell.typist            seurat   seurat.high.res 
                9                10                19                21 
          monocle  monocle.high.res 
                7                24

logThis(paste(names(clSizes), clSizes, sep = ": ", collapse = ", "))

Extract original cluster

originalFB175 <- getClusters(fb175Obj, clName = "original.clusters")

nlevels(originalFB175)

[1] 136

sort(table(originalFB175), decreasing = TRUE)

originalFB175
516 505 515 427 504 514 428 493 506 513 508 517 523 177 436 498 512 507 524 172 
297 203 159 147 135 119 110  80  72  69  58  58  55  38  38  38  37  36  29  28 
568 439 492 495 503 574 575 179 426 526 521 441 433 442 494 497 502 178 501 565 
 27  26  23  22  22  22  20  18  17  17  16  15  13  13  13  13  13  12  12  12 
678 446 518 677 180 536 573 135 159 174 309 564 225 452 453 522 525 676 679 161 
 11  10  10  10   9   8   8   7   7   7   7   7   6   6   6   6   6   6   6   5 
176 184 277 437 458 520 535 539 144 175 182 187 457 499 519 537 569 117 185 262 
  5   5   5   5   5   5   5   5   4   4   4   4   4   4   4   4   4   3   3   3 
355 359 425 509 511 689 142 158 189 307 315 429 432 434 443 448 538 543 690 737 
  3   3   3   3   3   3   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
115 120 124 132 136 151 160 173 183 188 227 245 261 273 286 298 306 308 310 316 
  1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
317 438 440 455 500 510 557 560 562 570 572 671 673 688 740 796 
  1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1

# drop too small clusters: those whose size is less than 15
largeClFB175 <- which(table(originalFB175) >= 50)
largeOrigFB175 <- originalFB175[(originalFB175 %in% names(largeClFB175)),
                                drop = TRUE]

largeOrigFB175 <- toClustersList(largeOrigFB175)

Calculate GDI for each cluster

print("Length larger clusters")
vapply(largeOrigFB175, length, integer(1))

resDataFB175 <- lapply(seq_along(largeOrigFB175), FUN = getClData,
                       objCOTAN = fb175Obj, clList = largeOrigFB175)

clDataFB175 <- data.table::rbindlist(resDataFB175)
clDataFB175 <- column_to_rownames(clDataFB175, var = "cluster")

saveRDS(clDataFB175,
        file = file.path("Data/MouseCortexFromLoom/ClustersGDI/", "e17.5_ForebrainDorsal_GDI_of_original_clusterization.RDS"))

clDataFB175 <- readRDS("Data/MouseCortexFromLoom/ClustersGDI/e17.5_ForebrainDorsal_GDI_of_original_clusterization.RDS")
clDataFB175[order(clDataFB175$size,decreasing = T),]

    size isUniform fractionAbove X1stPercentile
516  297      TRUE  5.787234e-03       1.380640
505  203      TRUE  7.261505e-04       1.330011
515  159     FALSE  8.098331e-02       1.736907
427  147      TRUE  0.000000e+00       1.275390
504  135      TRUE  0.000000e+00       1.265854
514  119     FALSE  3.665886e-02       1.517923
428  110      TRUE  0.000000e+00       1.263149
493   80      TRUE  1.512224e-03       1.331605
506   72      TRUE  8.827684e-05       1.285308
513   69     FALSE  7.475535e-02       1.649405
508   58      TRUE  0.000000e+00       1.255277
517   58      TRUE  8.074475e-03       1.383811
523   55     FALSE  1.395007e-02       1.416125

So we can use the following clusters (at least 100 cells - cell number in the parenthesis - and GDI lower than 1.5):

cl516 (297),
cl505 (203)

sessionInfo()

R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0 
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0

locale:
 [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
 [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
 [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
[10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   

time zone: Europe/Rome
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] COTAN_2.3.0       data.table_1.14.8 zeallot_0.1.0     rlang_1.1.1      

loaded via a namespace (and not attached):
  [1] RColorBrewer_1.1-3        rstudioapi_0.15.0        
  [3] jsonlite_1.8.7            shape_1.4.6              
  [5] umap_0.2.10.0             magrittr_2.0.3           
  [7] spatstat.utils_3.0-3      rmarkdown_2.24           
  [9] GlobalOptions_0.1.2       vctrs_0.6.3              
 [11] ROCR_1.0-11               spatstat.explore_3.2-1   
 [13] DelayedMatrixStats_1.22.5 askpass_1.2.0            
 [15] htmltools_0.5.5           S4Arrays_1.2.0           
 [17] sctransform_0.4.1         parallelly_1.36.0        
 [19] KernSmooth_2.23-22        htmlwidgets_1.6.2        
 [21] ica_1.0-3                 plyr_1.8.8               
 [23] plotly_4.10.2             zoo_1.8-12               
 [25] igraph_1.5.1              mime_0.12                
 [27] lifecycle_1.0.3           iterators_1.0.14         
 [29] pkgconfig_2.0.3           rsvd_1.0.5               
 [31] Matrix_1.6-2              R6_2.5.1                 
 [33] fastmap_1.1.1             MatrixGenerics_1.12.3    
 [35] fitdistrplus_1.1-11       future_1.33.0            
 [37] shiny_1.7.5               clue_0.3-64              
 [39] digest_0.6.33             colorspace_2.1-0         
 [41] patchwork_1.1.2           S4Vectors_0.38.1         
 [43] tensor_1.5                Seurat_5.0.0             
 [45] dqrng_0.3.0               RSpectra_0.16-1          
 [47] irlba_2.3.5.1             beachmat_2.16.0          
 [49] PCAtools_2.14.0           progressr_0.14.0         
 [51] RcppZiggurat_0.1.6        spatstat.sparse_3.0-2    
 [53] fansi_1.0.4               polyclip_1.10-4          
 [55] httr_1.4.6                abind_1.4-5              
 [57] compiler_4.3.2            withr_2.5.0              
 [59] doParallel_1.0.17         BiocParallel_1.34.2      
 [61] viridis_0.6.4             fastDummies_1.7.3        
 [63] dendextend_1.17.1         MASS_7.3-60              
 [65] openssl_2.1.0             DelayedArray_0.26.7      
 [67] rjson_0.2.21              tools_4.3.2              
 [69] lmtest_0.9-40             httpuv_1.6.11            
 [71] future.apply_1.11.0       goftest_1.2-3            
 [73] glue_1.6.2                nlme_3.1-163             
 [75] promises_1.2.0.1          grid_4.3.2               
 [77] Rtsne_0.16                cluster_2.1.4            
 [79] reshape2_1.4.4            generics_0.1.3           
 [81] spatstat.data_3.0-1       gtable_0.3.3             
 [83] tidyr_1.3.0               BiocSingular_1.16.0      
 [85] ScaledMatrix_1.8.1        sp_2.1-1                 
 [87] utf8_1.2.3                spatstat.geom_3.2-4      
 [89] BiocGenerics_0.46.0       RcppAnnoy_0.0.21         
 [91] ggrepel_0.9.3             RANN_2.6.1               
 [93] foreach_1.5.2             pillar_1.9.0             
 [95] stringr_1.5.0             spam_2.10-0              
 [97] RcppHNSW_0.5.0            later_1.3.1              
 [99] circlize_0.4.15           splines_4.3.2            
[101] dplyr_1.1.2               lattice_0.22-5           
[103] deldir_1.0-9              survival_3.5-7           
[105] tidyselect_1.2.0          ComplexHeatmap_2.16.0    
[107] miniUI_0.1.1.1            pbapply_1.7-2            
[109] knitr_1.43                gridExtra_2.3            
[111] IRanges_2.34.1            scattermore_1.2          
[113] stats4_4.3.2              xfun_0.39                
[115] matrixStats_1.1.0         stringi_1.8.1            
[117] lazyeval_0.2.2            yaml_2.3.7               
[119] evaluate_0.21             codetools_0.2-19         
[121] tibble_3.2.1              cli_3.6.1                
[123] uwot_0.1.16               RcppParallel_5.1.7       
[125] xtable_1.8-4              reticulate_1.34.0        
[127] munsell_0.5.0             Rcpp_1.0.11              
[129] spatstat.random_3.2-1     globals_0.16.2           
[131] png_0.1-8                 parallel_4.3.2           
[133] Rfast_2.1.0               ellipsis_0.3.2           
[135] assertthat_0.2.1          ggplot2_3.4.2            
[137] dotCall64_1.1-0           parallelDist_0.2.6       
[139] sparseMatrixStats_1.12.2  listenv_0.9.0            
[141] ggthemes_4.2.4            viridisLite_0.4.2        
[143] scales_1.2.1              ggridges_0.5.4           
[145] SeuratObject_5.0.0        leiden_0.4.3             
[147] purrr_1.0.1               crayon_1.5.2             
[149] GetoptLong_1.0.5          cowplot_1.1.1