library(ggplot2)
library(tibble)
library(zeallot)
library(COTAN)
options(parallelly.fork.enable = TRUE)
<- "Data/MouseCortexFromLoom/"
outDir
setLoggingLevel(1)
setLoggingFile(file.path(outDir, "ForebrainDorsal_E135-analysis.log"))
Forebrain Dorsal E13.5 Data-set Anaysis
Read the already created COTAN object
<- readRDS("Data/MouseCortexFromLoom/SourceData/e13.5_ForebrainDorsal.cotan.RDS")
fb135Obj <- getMetadataElement(fb135Obj, datasetTags()[["cond"]])
sampleCondition
sampleCondition
[1] "e13.5_ForebrainDorsal"
Inspect cells’ sizes
cellSizePlot(fb135Obj, splitPattern = ":", numCol = 1)
Drop cells with too many ritz reads as they are probably duplets
<- 10000
cellsSizeThr <- addElementToMetaDataset(fb135Obj, "Cells size threshold", cellsSizeThr)
fb135Obj
<- getCells(fb135Obj)[getCellsSize(fb135Obj) > cellsSizeThr]
cells_to_rem <- dropGenesCells(fb135Obj, cells = cells_to_rem)
fb135Obj
cellSizePlot(fb135Obj, splitPattern = ":", numCol = 1)
Inspect the number of expressed genes per cell
genesSizePlot(fb135Obj, splitPattern = ":", numCol = 1)
Drop cells with too low genes expession as they are probably dead
<- 700
genesSizeLowThr <- addElementToMetaDataset(fb135Obj, "Num genes low threshold", genesSizeLowThr)
fb135Obj
<- getNumGenes(fb135Obj)
numExprGenes <- names(numExprGenes)[numExprGenes < genesSizeLowThr]
cells_to_rem <- dropGenesCells(fb135Obj, cells = cells_to_rem)
fb135Obj
genesSizePlot(fb135Obj, splitPattern = ":", numCol = 1)
Check number of mitocondrial genes expressed in each cell
<- "^mt."
mitGenesPattern getGenes(fb135Obj)[grep(mitGenesPattern, getGenes(fb135Obj))]
[1] "mt.Co1" "mt.Co3" "mt.Nd4" "mt.Nd5" "mt.Nd1" "mt.Nd2" "mt.Atp6"
c(mitPlot, mitSizes) %<-%
mitochondrialPercentagePlot(fb135Obj, genePrefix = mitGenesPattern,
splitPattern = ":", numCol = 1)
plot(mitPlot)
Cells with a too high percentage of mitocondrial genes are likely dead (or at the last problematic) cells. So we drop them!
<- 1.0
mitPercThr <- addElementToMetaDataset(fb135Obj, "Mitoc. perc. threshold", mitPercThr)
fb135Obj
<- rownames(mitSizes)[mitSizes[["mit.percentage"]] > mitPercThr]
cells_to_rem
<- dropGenesCells(fb135Obj, cells = cells_to_rem)
fb135Obj
c(mitPlot, mitSizes) %<-%
mitochondrialPercentagePlot(fb135Obj, genePrefix = mitGenesPattern,
splitPattern = ":", numCol = 1)
plot(mitPlot)
Check no further outliers after all the culling
cellSizePlot(fb135Obj, splitPattern = ":", numCol = 1)
genesSizePlot(fb135Obj, splitPattern = ":", numCol = 1)
Clean: round 1
<- clean(fb135Obj)
fb135Obj
c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot) %<-% cleanPlots(fb135Obj)
plot(pcaCellsPlot)
plot(genesPlot)
<- addElementToMetaDataset(fb135Obj, "Num drop B group", 0) fb135Obj
B group contains high number of hemoglobin genes: so they are not interesting
<- rownames(pcaCellsData)[pcaCellsData[["groups"]] == "B"]
cells_to_rem
<- dropGenesCells(fb135Obj, cells = cells_to_rem) fb135Obj
Clean: round 2
<- clean(fb135Obj)
fb135Obj
c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot) %<-% cleanPlots(fb135Obj)
plot(pcaCellsPlot)
plot(genesPlot)
<- addElementToMetaDataset(fb135Obj, "Num drop B group", 1) fb135Obj
B group contains high number of hemoglobin genes: so they are not interesting
<- rownames(pcaCellsData)[pcaCellsData[["groups"]] == "B"]
cells_to_rem
<- dropGenesCells(fb135Obj, cells = cells_to_rem) fb135Obj
Clean: round 3
<- clean(fb135Obj)
fb135Obj
c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot) %<-% cleanPlots(fb135Obj)
plot(pcaCellsPlot)
plot(genesPlot)
<- addElementToMetaDataset(fb135Obj, "Num drop B group", 2) fb135Obj
Visualize if all is ok:
plot(UDEPlot)
plot(nuPlot)
<- 0.3 # the threshold to remove low UDE cells
lowUDEThr
<- data.frame("nu" = sort(getNu(fb135Obj)), "n" = seq_along(getNu(fb135Obj)))
nuDf <- ggplot(nuDf, aes(x = n, y = nu)) +
UDEPlot_zoomed geom_point(colour = "#8491B4B2", size = 1.0) +
xlim(0L, 400L) +
ylim(0.0, 1.0) +
geom_hline(yintercept = lowUDEThr, linetype = "dashed",
color = "darkred") +
annotate(geom = "text", x = 200L, y = 0.25,
label = paste0("to remove cells with nu < ", lowUDEThr),
color = "darkred", size = 4.5)
plot(UDEPlot_zoomed)
Final cleaning to check all is OK
<- clean(fb135Obj)
fb135Obj
c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot) %<-% cleanPlots(fb135Obj)
plot(pcaCellsPlot)
plot(genesPlot)
plot(UDEPlot)
plot(nuPlot)
plot(cellSizePlot(fb135Obj, splitPattern = ":", numCol = 1))
plot(genesSizePlot(fb135Obj, splitPattern = ":", numCol = 1))
Calculate genes’ COEX
<- proceedToCoex(fb135Obj, calcCoex = TRUE, cores = 12,
fb135Obj saveObj = TRUE, outDir = outDir)
<- calculateGDI(fb135Obj)
gdiData
<- head(rownames(gdiData[order(gdiData[["GDI"]],
genesToLabel decreasing = TRUE), ]), n = 10L)
genesToLabel
[1] "Myt1l" "Ccser1" "Hes1" "Neurod2" "Neurod6" "Aldoc" "Hes5"
[8] "Nsg2" "Rab3a" "Rbfox1"
<- GDIPlot(fb135Obj, GDIIn = gdiData, GDIThreshold = 1.4,
gdiPlot genes = list("Top 10 GDI genes" = genesToLabel))
plot(gdiPlot)
Save the COTAN object
saveRDS(fb135Obj, file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS")))
<- readRDS(file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS"))) fb135Obj
<- cellsUniformClustering(fb135Obj, GDIThreshold = 1.4, cores = 13,
splitClusters saveObj = TRUE, outDir = outDir)
c(splitCoexDF, splitPValueDF) %<-% DEAOnClusters(fb135Obj, clusters = splitClusters)
<- addClusterization(fb135Obj, clName = "split",
fb135Obj clusters = splitClusters,
coexDF = splitCoexDF, override = TRUE)
table(splitClusters)
saveRDS(fb135Obj, file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS")))
c(mergedClusters, mergedCoexDF, mergedPValueDF) %<-%
mergeUniformCellsClusters(fb135Obj, clusters = splitClusters,
GDIThreshold = 1.4, cores = 13,
saveObj = TRUE, outDir = outDir)
<- addClusterization(fb135Obj, clName = "merge",
fb135Obj clusters = mergedClusters,
coexDF = mergedCoexDF,
override = TRUE)
table(mergedClusters)
saveRDS(fb135Obj, file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS")))
sessionInfo()
R version 4.3.0 (2023-04-21)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Berlin
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] COTAN_2.1.5 zeallot_0.1.0 tibble_3.2.1 ggplot2_3.4.2
loaded via a namespace (and not attached):
[1] RColorBrewer_1.1-3 rstudioapi_0.14 jsonlite_1.8.4
[4] shape_1.4.6 umap_0.2.10.0 magrittr_2.0.3
[7] spatstat.utils_3.0-3 farver_2.1.1 rmarkdown_2.21
[10] GlobalOptions_0.1.2 vctrs_0.6.1 ROCR_1.0-11
[13] spatstat.explore_3.2-1 askpass_1.1 htmltools_0.5.5
[16] sctransform_0.3.5 parallelly_1.36.0 KernSmooth_2.23-20
[19] htmlwidgets_1.6.2 ica_1.0-3 plyr_1.8.8
[22] plotly_4.10.1 zoo_1.8-12 igraph_1.4.2
[25] mime_0.12 lifecycle_1.0.3 iterators_1.0.14
[28] pkgconfig_2.0.3 Matrix_1.5-4.1 R6_2.5.1
[31] fastmap_1.1.1 fitdistrplus_1.1-8 future_1.32.0
[34] shiny_1.7.4 clue_0.3-64 digest_0.6.31
[37] colorspace_2.1-0 patchwork_1.1.2 S4Vectors_0.38.0
[40] Seurat_4.3.0 tensor_1.5 RSpectra_0.16-1
[43] irlba_2.3.5.1 labeling_0.4.2 progressr_0.13.0
[46] RcppZiggurat_0.1.6 fansi_1.0.4 spatstat.sparse_3.0-1
[49] httr_1.4.5 polyclip_1.10-4 abind_1.4-5
[52] compiler_4.3.0 withr_2.5.0 doParallel_1.0.17
[55] viridis_0.6.2 dendextend_1.17.1 MASS_7.3-59
[58] openssl_2.0.6 rjson_0.2.21 tools_4.3.0
[61] lmtest_0.9-40 httpuv_1.6.9 future.apply_1.11.0
[64] goftest_1.2-3 glue_1.6.2 nlme_3.1-162
[67] promises_1.2.0.1 grid_4.3.0 Rtsne_0.16
[70] cluster_2.1.4 reshape2_1.4.4 generics_0.1.3
[73] gtable_0.3.3 spatstat.data_3.0-1 tidyr_1.3.0
[76] data.table_1.14.8 sp_1.6-0 utf8_1.2.3
[79] BiocGenerics_0.46.0 spatstat.geom_3.2-1 RcppAnnoy_0.0.20
[82] ggrepel_0.9.3 RANN_2.6.1 foreach_1.5.2
[85] pillar_1.9.0 stringr_1.5.0 later_1.3.0
[88] circlize_0.4.15 splines_4.3.0 dplyr_1.1.2
[91] lattice_0.21-8 survival_3.5-5 deldir_1.0-6
[94] tidyselect_1.2.0 ComplexHeatmap_2.16.0 miniUI_0.1.1.1
[97] pbapply_1.7-0 knitr_1.42 gridExtra_2.3
[100] IRanges_2.34.0 scattermore_1.1 stats4_4.3.0
[103] xfun_0.39 factoextra_1.0.7 matrixStats_1.0.0
[106] stringi_1.7.12 lazyeval_0.2.2 yaml_2.3.7
[109] evaluate_0.20 codetools_0.2-19 cli_3.6.1
[112] RcppParallel_5.1.7 uwot_0.1.14 xtable_1.8-4
[115] reticulate_1.28 munsell_0.5.0 Rcpp_1.0.10
[118] globals_0.16.2 spatstat.random_3.1-4 png_0.1-8
[121] parallel_4.3.0 Rfast_2.0.7 ellipsis_0.3.2
[124] assertthat_0.2.1 parallelDist_0.2.6 listenv_0.9.0
[127] ggthemes_4.2.4 viridisLite_0.4.1 scales_1.2.1
[130] ggridges_0.5.4 SeuratObject_4.1.3 leiden_0.4.3
[133] purrr_1.0.1 crayon_1.5.2 GetoptLong_1.0.5
[136] rlang_1.1.0 cowplot_1.1.1