library(ggplot2)
library(tibble)
library(zeallot)
library(COTAN)
#devtools::load_all("../COTAN/")
options(parallelly.fork.enable = TRUE)
<- "Data/MouseCortexFromLoom/"
outDir if (!file.exists(outDir)) {
dir.create(outDir)
}
setLoggingLevel(2)
setLoggingFile(file.path(outDir, "ForebrainDorsal_E175-analysis.log"))
Forebrain Dorsal E17.5 Data-set Anaysis
Read the already created COTAN object
<- readRDS(file.path("Data/MouseCortexFromLoom/SourceData/", "e17.5_ForebrainDorsal.cotan.RDS"))
fb175Obj <- getMetadataElement(fb175Obj, datasetTags()[["cond"]])
sampleCondition
sampleCondition
[1] "e17.5_ForebrainDorsal"
Inspect cells’ sizes
cellSizePlot(fb175Obj, splitPattern = ":", numCol = 1)
Drop cells with too many reads reads as they are probably doublets
<- 10000
cellsSizeThr <- addElementToMetaDataset(fb175Obj, "Cells size threshold", cellsSizeThr)
fb175Obj
<- getCells(fb175Obj)[getCellsSize(fb175Obj) > cellsSizeThr]
cells_to_rem <- dropGenesCells(fb175Obj, cells = cells_to_rem)
fb175Obj
cellSizePlot(fb175Obj, splitPattern = ":", numCol = 1)
Inspect the number of expressed genes per cell
genesSizePlot(fb175Obj, splitPattern = ":", numCol = 1)
Drop cells with too low genes expression as they are probably dead
<- 700
genesSizeLowThr <- addElementToMetaDataset(fb175Obj, "Num genes low threshold", genesSizeLowThr)
fb175Obj
<- getNumExpressedGenes(fb175Obj)
numExprGenes <- names(numExprGenes)[numExprGenes < genesSizeLowThr]
cells_to_rem <- dropGenesCells(fb175Obj, cells = cells_to_rem)
fb175Obj
genesSizePlot(fb175Obj, splitPattern = ":", numCol = 1)
Check number of mitochondrial genes expressed in each cell
<- "^mt."
mitGenesPattern getGenes(fb175Obj)[grep(mitGenesPattern, getGenes(fb175Obj))]
[1] "mt.Co1" "mt.Co3" "mt.Nd4" "mt.Nd5" "mt.Nd1" "mt.Nd2"
c(mitPlot, mitSizes) %<-%
mitochondrialPercentagePlot(fb175Obj, genePrefix = mitGenesPattern,
splitPattern = ":", numCol = 1)
plot(mitPlot)
Cells with a too high percentage of mitochondrial genes are likely dead (or at the last problematic) cells. So we drop them!
<- 1.5
mitPercThr <- addElementToMetaDataset(fb175Obj, "Mitoc. perc. threshold", mitPercThr)
fb175Obj
<- rownames(mitSizes)[mitSizes[["mit.percentage"]] > mitPercThr]
cells_to_rem
<- dropGenesCells(fb175Obj, cells = cells_to_rem)
fb175Obj
c(mitPlot, mitSizes) %<-%
mitochondrialPercentagePlot(fb175Obj, genePrefix = mitGenesPattern,
splitPattern = ":", numCol = 1)
plot(mitPlot)
Check no further outlines after all the culling
cellSizePlot(fb175Obj, splitPattern = ":", numCol = 1)
genesSizePlot(fb175Obj, splitPattern = ":", numCol = 1)
Clean: round 1
<- clean(fb175Obj)
fb175Obj
c(pcaCellsPlot, pcaCellsData, genesPlot,
%<-% cleanPlots(fb175Obj)
UDEPlot, nuPlot, zoomedNuPlot)
plot(pcaCellsPlot)
plot(genesPlot)
<- addElementToMetaDataset(fb175Obj, "Num drop B group", 0) fb175Obj
Visualize if all is ok:
plot(UDEPlot)
plot(nuPlot)
plot(zoomedNuPlot)
Final cleaning to check all is OK
<- clean(fb175Obj)
fb175Obj
c(pcaCellsPlot, pcaCellsData, genesPlot,
%<-% cleanPlots(fb175Obj)
UDEPlot, nuPlot, zoomedNuPlot)
plot(pcaCellsPlot)
plot(genesPlot)
plot(UDEPlot)
plot(nuPlot)
plot(zoomedNuPlot)
plot(cellSizePlot(fb175Obj, splitPattern = ":", numCol = 1))
plot(genesSizePlot(fb175Obj, splitPattern = ":", numCol = 1))
Calculate genes’ COEX
<- proceedToCoex(fb175Obj, calcCoex = TRUE, cores = 12,
fb175Obj saveObj = TRUE, outDir = outDir)
<- calculateGDI(fb175Obj)
gdiData
<- head(rownames(gdiData[order(gdiData[["GDI"]],
genesToLabel decreasing = TRUE), ]), n = 10L)
genesToLabel
[1] "Mfge8" "Myt1l" "Atp1a2" "Sparc" "Apoe" "Phgdh"
[7] "Slc9a3r1" "Tnc" "Ccdc80" "Aldoc"
<- GDIPlot(fb175Obj, GDIIn = gdiData, GDIThreshold = 1.4,
gdiPlot genes = list("Top 10 GDI genes" = genesToLabel))
plot(gdiPlot)
Save the COTAN object
saveRDS(fb175Obj, file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS")))
c(splitClusters, splitCoexDF) %<-%
cellsUniformClustering(fb175Obj, GDIThreshold = 1.4, cores = 13L,
saveObj = TRUE, outDir = outDir)
<- addClusterization(fb175Obj, clName = "split",
fb175Obj clusters = splitClusters,
coexDF = splitCoexDF, override = TRUE)
<- getClusterizationData(fb175Obj,clName = "split")$clusters
splitClusters
table(splitClusters)
splitClusters
-1 20 04 05 06 07 10 22 11 08 21 23 16 12 18 24 09 14 15 13
10 384 130 119 82 53 44 40 32 22 413 166 159 91 55 50 15 202 146 70
02 01 03 17 19
50 37 32 35 30
c(mergedClusters, mergedCoexDF) %<-%
mergeUniformCellsClusters(fb175Obj, clusters = splitClusters,
GDIThreshold = 1.4, batchSize = 20L, cores = 13L,
saveObj = TRUE, outDir = outDir)
<- addClusterization(fb175Obj, clName = "merge",
fb175Obj clusters = mergedClusters,
coexDF = mergedCoexDF,
override = TRUE)
<- getClusterizationData(fb175Obj,clName = "merge")$clusters
mergedClusters table(mergedClusters)
mergedClusters
16 03 04 05 06 07 18 01 02 12 19 14 13 15 17 08 09 10 11
65 37 50 32 130 119 135 22 15 44 32 237 100 202 194 384 413 90 166
Sys.time()
[1] "2023-08-22 11:46:58 CEST"
sessionInfo()
R version 4.3.1 (2023-06-16)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Rome
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] COTAN_2.1.7 zeallot_0.1.0 tibble_3.2.1 ggplot2_3.4.2
loaded via a namespace (and not attached):
[1] RColorBrewer_1.1-3 rstudioapi_0.15.0 jsonlite_1.8.7
[4] shape_1.4.6 umap_0.2.10.0 magrittr_2.0.3
[7] spatstat.utils_3.0-3 farver_2.1.1 rmarkdown_2.24
[10] GlobalOptions_0.1.2 vctrs_0.6.3 ROCR_1.0-11
[13] spatstat.explore_3.2-1 askpass_1.1 htmltools_0.5.5
[16] sctransform_0.3.5 parallelly_1.36.0 KernSmooth_2.23-22
[19] htmlwidgets_1.6.2 ica_1.0-3 plyr_1.8.8
[22] plotly_4.10.2 zoo_1.8-12 igraph_1.5.1
[25] mime_0.12 lifecycle_1.0.3 iterators_1.0.14
[28] pkgconfig_2.0.3 Matrix_1.6-0 R6_2.5.1
[31] fastmap_1.1.1 fitdistrplus_1.1-11 future_1.33.0
[34] shiny_1.7.5 clue_0.3-64 digest_0.6.33
[37] colorspace_2.1-0 patchwork_1.1.2 S4Vectors_0.38.1
[40] Seurat_4.3.0.1 tensor_1.5 RSpectra_0.16-1
[43] irlba_2.3.5.1 labeling_0.4.2 progressr_0.14.0
[46] RcppZiggurat_0.1.6 fansi_1.0.4 spatstat.sparse_3.0-2
[49] httr_1.4.6 polyclip_1.10-4 abind_1.4-5
[52] compiler_4.3.1 withr_2.5.0 doParallel_1.0.17
[55] viridis_0.6.4 dendextend_1.17.1 MASS_7.3-60
[58] openssl_2.1.0 rjson_0.2.21 tools_4.3.1
[61] lmtest_0.9-40 httpuv_1.6.11 future.apply_1.11.0
[64] goftest_1.2-3 glue_1.6.2 nlme_3.1-162
[67] promises_1.2.0.1 grid_4.3.1 Rtsne_0.16
[70] cluster_2.1.4 reshape2_1.4.4 generics_0.1.3
[73] gtable_0.3.3 spatstat.data_3.0-1 tidyr_1.3.0
[76] data.table_1.14.8 sp_2.0-0 utf8_1.2.3
[79] BiocGenerics_0.46.0 spatstat.geom_3.2-4 RcppAnnoy_0.0.21
[82] ggrepel_0.9.3 RANN_2.6.1 foreach_1.5.2
[85] pillar_1.9.0 stringr_1.5.0 later_1.3.1
[88] circlize_0.4.15 splines_4.3.1 dplyr_1.1.2
[91] lattice_0.21-8 survival_3.5-5 deldir_1.0-9
[94] tidyselect_1.2.0 ComplexHeatmap_2.16.0 miniUI_0.1.1.1
[97] pbapply_1.7-2 knitr_1.43 gridExtra_2.3
[100] IRanges_2.34.1 scattermore_1.2 stats4_4.3.1
[103] xfun_0.39 factoextra_1.0.7 matrixStats_1.0.0
[106] stringi_1.7.12 lazyeval_0.2.2 yaml_2.3.7
[109] evaluate_0.21 codetools_0.2-19 cli_3.6.1
[112] RcppParallel_5.1.7 uwot_0.1.16 xtable_1.8-4
[115] reticulate_1.30 munsell_0.5.0 Rcpp_1.0.11
[118] globals_0.16.2 spatstat.random_3.1-5 png_0.1-8
[121] parallel_4.3.1 Rfast_2.0.8 ellipsis_0.3.2
[124] assertthat_0.2.1 parallelDist_0.2.6 listenv_0.9.0
[127] ggthemes_4.2.4 viridisLite_0.4.2 scales_1.2.1
[130] ggridges_0.5.4 SeuratObject_4.1.3 leiden_0.4.3
[133] purrr_1.0.1 crayon_1.5.2 GetoptLong_1.0.5
[136] rlang_1.1.1 cowplot_1.1.1