library(rlang)
library(zeallot)
library(data.table)
library(COTAN)
#devtools::load_all("~/dev/COTAN/COTAN/")
options(parallelly.fork.enable = TRUE)
setLoggingLevel(2)
setLoggingFile(file.path(".", "ClusterizationsGDI_AllForebrainDorsal.log"))
Forebrain La Manno 2021 Clusters GDI
Preamble
For the evaluation of GDI sensitivity and also to test type I error and FDR for the differential expressed gene in a cluster we need to have some Coherent Transcript Cell Cluster (cluster formed by cells as similar as possible). So we tested all clusters defined in the original paper for this data set (focusing on E13.5, E15.0, and E17.5).
Definition of functions to extract data from clustersList
<- function(objCOTAN, clList, cl) {
getClData <- names(clList)[[cl]]
cluster <- clList[[cl]]
cells <- checkClusterUniformity(objCOTAN, cluster = cluster, cells = cells,
res GDIThreshold = 1.4, cores = 6, saveObj = FALSE)
<- c(list("cluster" = cluster, "size" = length(cells)), res)
res <- as.data.frame(res)
res rownames(res) <- cluster
colnames(res)
return(res)
}
<- list("cluster" = NULL, "size" = NULL, "isUniform" = NULL,
clDataVal "fractionAbove" = NULL, "1stPercentile" = NULL)
Forebrain Dorsal E13.5
Load dataset
<- readRDS(file = file.path("Data/MouseCortexFromLoom/", "e13.5_ForebrainDorsal.cotan.RDS"))
fb135Obj
<- getMetadataElement(fb135Obj, datasetTags()[["cond"]])
sampleCondition
logThis(sampleCondition)
<- getClusterizations(fb135Obj)
allClust
logThis("")
logThis(paste("Number of cells:", getNumCells(fb135Obj)))
logThis("")
logThis("Available clusterizations:")
logThis(paste0(allClust, collapse = ", "))
<- vapply(allClust, function(x) { nlevels(getClusters(fb135Obj, x))}, integer(1L))
clSizes clSizes
split merge original.clusters original.subclass
35 31 159 40
original.class cell.typist seurat seurat.high.res
13 13 19 36
monocle monocle.high.res
11 27
logThis(paste(names(clSizes), clSizes, sep = ": ", collapse = ", "))
Extract original cluster
<- getClusters(fb135Obj, clName = "original.clusters")
originalFB135
nlevels(originalFB135)
[1] 159
sort(table(originalFB135), decreasing = TRUE)
originalFB135
432 186 187 434 184 437 510 509 183 185 438 433 525 443 446 192 568 188 441 565
536 423 334 326 292 259 248 239 178 168 155 141 130 82 74 65 65 63 56 53
428 181 679 440 189 118 507 431 442 445 497 444 193 426 511 439 178 436 508 135
52 51 49 46 39 36 35 32 30 29 29 26 25 25 23 22 20 20 20 19
68 177 427 435 175 225 120 271 67 173 430 494 524 176 429 493 496 502 566 117
17 14 14 13 12 12 11 11 11 10 10 10 10 9 9 9 9 9 9 8
136 161 194 332 504 112 174 222 228 675 115 158 270 505 670 114 133 159 227 267
8 8 8 8 8 7 7 6 6 6 5 5 5 5 5 4 4 4 4 4
359 41 470 564 162 179 329 447 492 519 535 671 119 130 146 163 168 195 224 226
4 4 4 4 3 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2
251 263 273 326 453 516 520 528 538 569 575 672 676 677 683 691 793 107 113 125
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1
137 138 144 180 182 196 216 254 259 269 333 355 358 374 40 420 454 458 468 471
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
495 498 499 523 549 55 559 658 666 673 678 686 688 689 731 744 760 795 82
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# drop too small clusters: those whose size is less than 15
<- which(table(originalFB135) >= 50)
largeClFB135 <- originalFB135[(originalFB135 %in% names(largeClFB135)),
largeOrigFB135 = TRUE]
drop
<- toClustersList(largeOrigFB135) largeOrigFB135
Calculate GDI for each cluster in E13.5
print("Length larger clusters")
vapply(largeOrigFB135, length, integer(1))
<- lapply(seq_along(largeOrigFB135), FUN = getClData,
resDataFB135 objCOTAN = fb135Obj, clList = largeOrigFB135)
<- data.table::rbindlist(resDataFB135)
clDataFB135 <- column_to_rownames(clDataFB135, var = "cluster") clDataFB135
saveRDS(clDataFB135,
file = file.path("Data/MouseCortexFromLoom/ClustersGDI/", "e13.5_ForebrainDorsal_GDI_of_original_clusterization.RDS"))
<- readRDS("Data/MouseCortexFromLoom/ClustersGDI/e13.5_ForebrainDorsal_GDI_of_original_clusterization.RDS")
clDataFB135 order(clDataFB135$size, decreasing = T),] clDataFB135[
size isUniform fractionAbove X1stPercentile
432 536 FALSE 3.770380e-02 1.484698
186 423 FALSE 4.307131e-02 1.609718
187 334 FALSE 1.303433e-02 1.423035
434 326 TRUE 4.379562e-03 1.376828
184 292 FALSE 1.170610e-02 1.414304
437 259 TRUE 4.410239e-03 1.364885
510 248 TRUE 9.610563e-03 1.397472
509 239 TRUE 4.471580e-03 1.373229
183 178 FALSE 1.458630e-02 1.427795
185 168 FALSE 1.630482e-02 1.428907
438 155 TRUE 1.080901e-03 1.320271
433 141 TRUE 4.997501e-03 1.363220
525 130 TRUE 1.488711e-03 1.329041
443 82 TRUE 2.276508e-03 1.344695
446 74 TRUE 8.701706e-05 1.300874
192 65 TRUE 2.597852e-04 1.310808
568 65 TRUE 0.000000e+00 1.233130
188 63 TRUE 1.147802e-03 1.314789
441 56 TRUE 0.000000e+00 1.277716
565 53 TRUE 0.000000e+00 1.232690
428 52 TRUE 0.000000e+00 1.263561
181 51 TRUE 3.013116e-03 1.330148
So we can use the following clusters (at least 100 cells - cell number in the parenthesis - and GDI lower than 1.5):
cl432 (n.cells 536),
cl187 (334),
cl434 (326),
cl184 (292),
cl437 (259),
cl510 (248)
Forebrain Dorsal E15.0
Load dataset
<- readRDS(file = file.path("Data/MouseCortexFromLoom/", "e15.0_ForebrainDorsal.cotan.RDS"))
fb150Obj
<- getMetadataElement(fb150Obj, datasetTags()[["cond"]])
sampleCondition
logThis(sampleCondition)
<- getClusterizations(fb150Obj)
allClust
logThis("")
logThis(paste("Number of cells:", getNumCells(fb150Obj)))
logThis("")
logThis("Available clusterizations:")
logThis(paste0(allClust, collapse = ", "))
<- vapply(allClust, function(x) { nlevels(getClusters(fb150Obj, x))}, integer(1L))
clSizes clSizes
split merge seurat seurat.high.res
57 50 21 40
original.clusters original.subclass original.class monocle
229 47 14 11
monocle.high.res cell.typist
46 13
logThis(paste(names(clSizes), clSizes, sep = ": ", collapse = ", "))
Extract original cluster
<- getClusters(fb150Obj, clName = "original.clusters")
originalFB150
nlevels(originalFB150)
[1] 229
sort(table(originalFB150), decreasing = TRUE)
originalFB150
525 432 511 509 510 508 184 428 434 437 507 568 433 504 426 185 187 505 516 565
826 586 540 402 402 397 322 318 273 258 183 181 176 174 173 172 163 147 137 133
427 524 679 439 436 493 498 446 438 441 183 497 192 506 188 443 502 494 574 575
120 108 105 102 93 93 79 66 63 60 54 51 46 46 43 42 42 41 41 41
492 118 177 519 526 566 182 181 501 573 193 429 445 499 518 440 560 178 442 514
38 36 31 31 28 28 27 26 24 24 23 23 23 23 22 21 20 19 19 19
523 186 569 444 557 271 452 495 520 535 542 677 527 431 135 455 458 496 512 676
19 18 18 17 16 15 15 15 15 14 14 14 13 12 11 11 11 11 11 11
120 159 517 145 161 174 179 558 227 332 457 470 503 739 117 175 176 189 559 564
10 10 10 9 9 9 9 9 8 8 8 8 8 8 7 7 7 7 7 7
180 225 538 549 561 671 68 695 738 747 136 144 222 226 273 500 536 678 112 114
6 6 6 6 6 6 6 6 6 6 5 5 5 5 5 5 5 5 4 4
115 157 267 328 453 456 534 550 570 686 731 737 133 146 147 158 172 173 270 326
4 4 4 4 4 4 4 4 4 4 4 4 3 3 3 3 3 3 3 3
430 513 515 528 533 539 544 571 674 675 732 113 125 130 143 152 162 163 195 259
3 3 3 3 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2
435 451 454 531 543 548 552 554 562 670 689 740 119 122 134 137 142 151 156 191
2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1
194 223 224 228 234 240 262 268 269 275 286 296 308 314 329 333 359 41 448 450
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
468 529 530 532 537 540 553 567 572 601 614 634 647 649 67 672 680 681 684 693
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
696 726 734 749 750 751 760 771 88
1 1 1 1 1 1 1 1 1
# drop too small clusters: those whose size is less than 15
<- which(table(originalFB150) >= 50)
largeClFB150 <- originalFB150[(originalFB150 %in% names(largeClFB150)),
largeOrigFB150 = TRUE]
drop
<- toClustersList(largeOrigFB150) largeOrigFB150
Calculate GDI for each cluster
print("Length larger clusters")
vapply(largeOrigFB150, length, integer(1))
<- lapply(seq_along(largeOrigFB150), FUN = getClData,
resDataFB150 objCOTAN = fb150Obj, clList = largeOrigFB150)
<- data.table::rbindlist(resDataFB150)
clDataFB150 <- column_to_rownames(clDataFB150, var = "cluster") clDataFB150
saveRDS(clDataFB150,
file = file.path("Data/MouseCortexFromLoom/ClustersGDI/", "e15.0_ForebrainDorsal_GDI_of_original_clusterization.RDS"))
<- readRDS("Data/MouseCortexFromLoom/ClustersGDI/e15.0_ForebrainDorsal_GDI_of_original_clusterization.RDS")
clDataFB150
order(clDataFB150$size,decreasing = T),] clDataFB150[
size isUniform fractionAbove X1stPercentile
525 826 FALSE 0.3016472479 2.352178
432 586 FALSE 0.0254880232 1.459100
511 540 FALSE 0.1510725315 1.913734
509 402 TRUE 0.0067395414 1.381121
510 402 FALSE 0.0152561426 1.420390
508 397 TRUE 0.0046488461 1.370125
184 322 FALSE 0.0268745853 1.498304
428 318 TRUE 0.0012377332 1.342660
434 273 TRUE 0.0004509380 1.329601
437 258 TRUE 0.0025343004 1.342904
507 183 TRUE 0.0094123856 1.393946
568 181 TRUE 0.0000000000 1.305810
433 176 TRUE 0.0011034483 1.336980
504 174 TRUE 0.0039181957 1.361682
426 173 TRUE 0.0004750143 1.330498
185 172 FALSE 0.0101354303 1.400158
187 163 TRUE 0.0054207120 1.359734
505 147 TRUE 0.0001728011 1.293017
516 137 TRUE 0.0014539856 1.328445
565 133 TRUE 0.0073304826 1.385537
427 120 TRUE 0.0000000000 1.274314
524 108 FALSE 0.0187910535 1.439259
679 105 TRUE 0.0023903726 1.339173
439 102 TRUE 0.0000000000 1.267754
436 93 TRUE 0.0000000000 1.253222
493 93 TRUE 0.0007476325 1.324079
498 79 TRUE 0.0010643016 1.321372
446 66 TRUE 0.0000000000 1.296987
438 63 TRUE 0.0000000000 1.265541
441 60 TRUE 0.0000000000 1.277497
183 54 TRUE 0.0002659810 1.305260
497 51 TRUE 0.0003751993 1.301691
So we can use the following clusters (at least 100 cells - cell number in the parenthesis - and GDI lower than 1.5):
cl432 (586),
cl509 (402),
cl510 (402),
cl508 (397),
cl428 (318),
cl434 (273),
cl437 (258)
Forebrain Dorsal E17.5
Load dataset
<- readRDS(file = file.path("Data/MouseCortexFromLoom/", "e17.5_ForebrainDorsal.cotan.RDS"))
fb175Obj
<- getMetadataElement(fb175Obj, datasetTags()[["cond"]])
sampleCondition
logThis(sampleCondition)
<- getClusterizations(fb175Obj)
allClust
logThis("")
logThis(paste("Number of cells:", getNumCells(fb175Obj)))
logThis("")
logThis("Available clusterizations:")
logThis(paste0(allClust, collapse = ", "))
<- vapply(allClust, function(x) { nlevels(getClusters(fb175Obj, x))}, integer(1L))
clSizes clSizes
split merge original.clusters original.subclass
25 19 136 27
original.class cell.typist seurat seurat.high.res
9 10 19 21
monocle monocle.high.res
7 24
logThis(paste(names(clSizes), clSizes, sep = ": ", collapse = ", "))
Extract original cluster
<- getClusters(fb175Obj, clName = "original.clusters")
originalFB175
nlevels(originalFB175)
[1] 136
sort(table(originalFB175), decreasing = TRUE)
originalFB175
516 505 515 427 504 514 428 493 506 513 508 517 523 177 436 498 512 507 524 172
297 203 159 147 135 119 110 80 72 69 58 58 55 38 38 38 37 36 29 28
568 439 492 495 503 574 575 179 426 526 521 441 433 442 494 497 502 178 501 565
27 26 23 22 22 22 20 18 17 17 16 15 13 13 13 13 13 12 12 12
678 446 518 677 180 536 573 135 159 174 309 564 225 452 453 522 525 676 679 161
11 10 10 10 9 8 8 7 7 7 7 7 6 6 6 6 6 6 6 5
176 184 277 437 458 520 535 539 144 175 182 187 457 499 519 537 569 117 185 262
5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 3 3 3
355 359 425 509 511 689 142 158 189 307 315 429 432 434 443 448 538 543 690 737
3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2
115 120 124 132 136 151 160 173 183 188 227 245 261 273 286 298 306 308 310 316
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
317 438 440 455 500 510 557 560 562 570 572 671 673 688 740 796
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# drop too small clusters: those whose size is less than 15
<- which(table(originalFB175) >= 50)
largeClFB175 <- originalFB175[(originalFB175 %in% names(largeClFB175)),
largeOrigFB175 = TRUE]
drop
<- toClustersList(largeOrigFB175) largeOrigFB175
Calculate GDI for each cluster
print("Length larger clusters")
vapply(largeOrigFB175, length, integer(1))
<- lapply(seq_along(largeOrigFB175), FUN = getClData,
resDataFB175 objCOTAN = fb175Obj, clList = largeOrigFB175)
<- data.table::rbindlist(resDataFB175)
clDataFB175 <- column_to_rownames(clDataFB175, var = "cluster") clDataFB175
saveRDS(clDataFB175,
file = file.path("Data/MouseCortexFromLoom/ClustersGDI/", "e17.5_ForebrainDorsal_GDI_of_original_clusterization.RDS"))
<- readRDS("Data/MouseCortexFromLoom/ClustersGDI/e17.5_ForebrainDorsal_GDI_of_original_clusterization.RDS")
clDataFB175 order(clDataFB175$size,decreasing = T),] clDataFB175[
size isUniform fractionAbove X1stPercentile
516 297 TRUE 5.787234e-03 1.380640
505 203 TRUE 7.261505e-04 1.330011
515 159 FALSE 8.098331e-02 1.736907
427 147 TRUE 0.000000e+00 1.275390
504 135 TRUE 0.000000e+00 1.265854
514 119 FALSE 3.665886e-02 1.517923
428 110 TRUE 0.000000e+00 1.263149
493 80 TRUE 1.512224e-03 1.331605
506 72 TRUE 8.827684e-05 1.285308
513 69 FALSE 7.475535e-02 1.649405
508 58 TRUE 0.000000e+00 1.255277
517 58 TRUE 8.074475e-03 1.383811
523 55 FALSE 1.395007e-02 1.416125
So we can use the following clusters (at least 100 cells - cell number in the parenthesis - and GDI lower than 1.5):
cl516 (297),
cl505 (203)
sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Rome
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] COTAN_2.3.0 data.table_1.14.8 zeallot_0.1.0 rlang_1.1.1
loaded via a namespace (and not attached):
[1] RColorBrewer_1.1-3 rstudioapi_0.15.0
[3] jsonlite_1.8.7 shape_1.4.6
[5] umap_0.2.10.0 magrittr_2.0.3
[7] spatstat.utils_3.0-3 rmarkdown_2.24
[9] GlobalOptions_0.1.2 vctrs_0.6.3
[11] ROCR_1.0-11 spatstat.explore_3.2-1
[13] DelayedMatrixStats_1.22.5 askpass_1.2.0
[15] htmltools_0.5.5 S4Arrays_1.2.0
[17] sctransform_0.4.1 parallelly_1.36.0
[19] KernSmooth_2.23-22 htmlwidgets_1.6.2
[21] ica_1.0-3 plyr_1.8.8
[23] plotly_4.10.2 zoo_1.8-12
[25] igraph_1.5.1 mime_0.12
[27] lifecycle_1.0.3 iterators_1.0.14
[29] pkgconfig_2.0.3 rsvd_1.0.5
[31] Matrix_1.6-2 R6_2.5.1
[33] fastmap_1.1.1 MatrixGenerics_1.12.3
[35] fitdistrplus_1.1-11 future_1.33.0
[37] shiny_1.7.5 clue_0.3-64
[39] digest_0.6.33 colorspace_2.1-0
[41] patchwork_1.1.2 S4Vectors_0.38.1
[43] tensor_1.5 Seurat_5.0.0
[45] dqrng_0.3.0 RSpectra_0.16-1
[47] irlba_2.3.5.1 beachmat_2.16.0
[49] PCAtools_2.14.0 progressr_0.14.0
[51] RcppZiggurat_0.1.6 spatstat.sparse_3.0-2
[53] fansi_1.0.4 polyclip_1.10-4
[55] httr_1.4.6 abind_1.4-5
[57] compiler_4.3.2 withr_2.5.0
[59] doParallel_1.0.17 BiocParallel_1.34.2
[61] viridis_0.6.4 fastDummies_1.7.3
[63] dendextend_1.17.1 MASS_7.3-60
[65] openssl_2.1.0 DelayedArray_0.26.7
[67] rjson_0.2.21 tools_4.3.2
[69] lmtest_0.9-40 httpuv_1.6.11
[71] future.apply_1.11.0 goftest_1.2-3
[73] glue_1.6.2 nlme_3.1-163
[75] promises_1.2.0.1 grid_4.3.2
[77] Rtsne_0.16 cluster_2.1.4
[79] reshape2_1.4.4 generics_0.1.3
[81] spatstat.data_3.0-1 gtable_0.3.3
[83] tidyr_1.3.0 BiocSingular_1.16.0
[85] ScaledMatrix_1.8.1 sp_2.1-1
[87] utf8_1.2.3 spatstat.geom_3.2-4
[89] BiocGenerics_0.46.0 RcppAnnoy_0.0.21
[91] ggrepel_0.9.3 RANN_2.6.1
[93] foreach_1.5.2 pillar_1.9.0
[95] stringr_1.5.0 spam_2.10-0
[97] RcppHNSW_0.5.0 later_1.3.1
[99] circlize_0.4.15 splines_4.3.2
[101] dplyr_1.1.2 lattice_0.22-5
[103] deldir_1.0-9 survival_3.5-7
[105] tidyselect_1.2.0 ComplexHeatmap_2.16.0
[107] miniUI_0.1.1.1 pbapply_1.7-2
[109] knitr_1.43 gridExtra_2.3
[111] IRanges_2.34.1 scattermore_1.2
[113] stats4_4.3.2 xfun_0.39
[115] matrixStats_1.1.0 stringi_1.8.1
[117] lazyeval_0.2.2 yaml_2.3.7
[119] evaluate_0.21 codetools_0.2-19
[121] tibble_3.2.1 cli_3.6.1
[123] uwot_0.1.16 RcppParallel_5.1.7
[125] xtable_1.8-4 reticulate_1.34.0
[127] munsell_0.5.0 Rcpp_1.0.11
[129] spatstat.random_3.2-1 globals_0.16.2
[131] png_0.1-8 parallel_4.3.2
[133] Rfast_2.1.0 ellipsis_0.3.2
[135] assertthat_0.2.1 ggplot2_3.4.2
[137] dotCall64_1.1-0 parallelDist_0.2.6
[139] sparseMatrixStats_1.12.2 listenv_0.9.0
[141] ggthemes_4.2.4 viridisLite_0.4.2
[143] scales_1.2.1 ggridges_0.5.4
[145] SeuratObject_5.0.0 leiden_0.4.3
[147] purrr_1.0.1 crayon_1.5.2
[149] GetoptLong_1.0.5 cowplot_1.1.1