library(ggplot2)
library(tibble)
library(zeallot)
library(COTAN)
options(parallelly.fork.enable = TRUE)
<- "Data/Yuzwa_MouseCortex/"
outDir
setLoggingLevel(1)
setLoggingFile(file.path(outDir, "CorticalCells_GSM2861511_E135-analysis.log"))
Cortical cells DGE E13.5 Data-set Anaysis
Read the dataset from file
<- read.csv(file.path("Data/Yuzwa_MouseCortex/", "GSM2861511_E135_Only_Cortical_Cells_DGE.txt.gz"),
dataset header = TRUE, sep = "\t", strip.white = TRUE,
row.names = 1)
Cleaning
Crate the COTAN object
<- "CorticalCells_GSM2861511_E135"
sampleCondition <- COTAN(raw = dataset)
cc135Obj <- initializeMetaDataset(cc135Obj,
cc135Obj GEO = "GSM2861511_E135",
sequencingMethod = "Drop_seq",
sampleCondition = sampleCondition)
Inspect cells’ sizes
cellSizePlot(cc135Obj)
Drop cells with too many ritz reads as they are probably duplets
<- 8000
cellsSizeThr <- addElementToMetaDataset(cc135Obj, "Cells size threshold", cellsSizeThr)
cc135Obj
<- getCells(cc135Obj)[getCellsSize(cc135Obj) > cellsSizeThr]
cells_to_rem <- dropGenesCells(cc135Obj, cells = cells_to_rem)
cc135Obj
cellSizePlot(cc135Obj)
Inspect the number of expressed genes per cell
genesSizePlot(cc135Obj)
Drop cells with too high genes expession as they are probably duplets
<- 3300
genesSizeThr <- addElementToMetaDataset(cc135Obj, "Num genes threshold", genesSizeThr)
cc135Obj
<- getNumExpressedGenes(cc135Obj)
numExprGenes <- names(numExprGenes)[numExprGenes > genesSizeThr]
cells_to_rem <- dropGenesCells(cc135Obj, cells = cells_to_rem)
cc135Obj
genesSizePlot(cc135Obj)
Check number of mitocondrial genes expressed in each cell
<- "^mt-"
mitGenesPattern getGenes(cc135Obj)[grep(mitGenesPattern, getGenes(cc135Obj))]
[1] "mt-Co1" "mt-Co3" "mt-Cytb" "mt-Nd1" "mt-Nd2" "mt-Nd4" "mt-Nd5"
[8] "mt-Nd6" "mt-Rnr1" "mt-Rnr2" "mt-Ta" "mt-Tc" "mt-Te" "mt-Tf"
[15] "mt-Ti" "mt-Tl1" "mt-Tl2" "mt-Tm" "mt-Tn" "mt-Tp" "mt-Tq"
[22] "mt-Ts2" "mt-Tt" "mt-Tv" "mt-Tw"
c(mitPlot, mitSizes) %<-%
mitochondrialPercentagePlot(cc135Obj, genePrefix = mitGenesPattern)
plot(mitPlot)
Cells with a too high percentage of mitocondrial genes are likely dead (or at the last problematic) cells. So we drop them!
<- 10.0
mitPercThr <- addElementToMetaDataset(cc135Obj, "Mitoc. perc. threshold", mitPercThr)
cc135Obj
<- rownames(mitSizes)[mitSizes[["mit.percentage"]] > mitPercThr]
cells_to_rem
<- dropGenesCells(cc135Obj, cells = cells_to_rem)
cc135Obj
c(mitPlot, mitSizes) %<-%
mitochondrialPercentagePlot(cc135Obj, genePrefix = mitGenesPattern)
plot(mitPlot)
Check no further outliers after all the culling
cellSizePlot(cc135Obj)
genesSizePlot(cc135Obj)
Clean: round 1
<- clean(cc135Obj)
cc135Obj
c(pcaCellsPlot, pcaCellsData, genesPlot,
%<-% cleanPlots(cc135Obj)
UDEPlot, nuPlot, zoomedNuPlot)
plot(pcaCellsPlot)
plot(genesPlot)
B group contains high number of hemoglobin genes: so they are not interesting
<- rownames(pcaCellsData)[pcaCellsData[["groups"]] == "B"]
cells_to_rem
<- dropGenesCells(cc135Obj, cells = cells_to_rem) cc135Obj
Clean: round 2
<- clean(cc135Obj)
cc135Obj
c(pcaCellsPlot, pcaCellsData, genesPlot,
%<-% cleanPlots(cc135Obj)
UDEPlot, nuPlot, zoomedNuPlot)
plot(pcaCellsPlot)
plot(pcaCellsData)
plot(genesPlot)
B group contains just 3 cells quite different in the 3rd and 4th components: better to drop them
<- rownames(pcaCellsData)[pcaCellsData[["groups"]] == "B"]
cells_to_rem
<- dropGenesCells(cc135Obj, cells = cells_to_rem) cc135Obj
Clean: round 3
<- clean(cc135Obj)
cc135Obj
c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(cc135Obj)
plot(pcaCellsPlot)
plot(genesPlot)
<- addElementToMetaDataset(cc135Obj, "Num drop B group", 2) cc135Obj
Visualize if all is ok:
plot(UDEPlot)
plot(nuPlot)
plot(zoomedNuPlot)
Drop very low UDE cells as they are likely outliers
<- 0.14 # the threshold to remove low UDE cells
lowUDEThr
<- addElementToMetaDataset(cc135Obj, "Low UDE threshold", lowUDEThr)
cc135Obj
<- data.frame("nu" = sort(getNu(cc135Obj)), "n" = seq_along(getNu(cc135Obj)))
nuDf
<- rownames(nuDf)[nuDf[["nu"]] < lowUDEThr]
cells_to_rem <- dropGenesCells(cc135Obj, cells = cells_to_rem) cc135Obj
Final cleaning to check all is OK
<- clean(cc135Obj)
cc135Obj
c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(cc135Obj)
plot(pcaCellsPlot)
plot(genesPlot)
plot(UDEPlot)
plot(nuPlot)
plot(zoomedNuPlot)
plot(cellSizePlot(cc135Obj))
plot(genesSizePlot(cc135Obj))
<- proceedToCoex(cc135Obj, calcCoex = TRUE, cores = 12,
cc135Obj saveObj = TRUE, outDir = outDir)
Save the COTAN object
saveRDS(cc135Obj, file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS")))
GDI
<- calculateGDI(cc135Obj)
gdiData
<- head(rownames(gdiData[order(gdiData[["GDI"]],
genesToLabel decreasing = TRUE), ]), n = 10L)
genesToLabel
[1] "Neurod6" "2610017I09Rik" "Nr2e1" "Ina"
[5] "Sox2" "Stmn2" "Mdk" "Mapt"
[9] "Gas1" "2810417H13Rik"
<- GDIPlot(cc135Obj, GDIIn = gdiData, GDIThreshold = 1.4,
gdiPlot genes = list("Top 10 GDI genes" = genesToLabel))
plot(gdiPlot)
Consistent Transcript Cohorts
c(splitClusters, splitCoexDF) %<-%
cellsUniformClustering(cc135Obj, GDIThreshold = 1.4, cores = 13,
saveObj = TRUE, outDir = outDir)
<- addClusterization(cc135Obj, clName = "split",
cc135Obj clusters = splitClusters,
coexDF = splitCoexDF, override = TRUE)
table(splitClusters)
splitClusters
-1 01 02 03 04 05 06 07 08 09 10 11 12 13 14
6 94 106 47 69 127 92 11 25 12 41 113 59 161 149
saveRDS(cc135Obj, file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS")))
c(mergedClusters, mergedCoexDF) %<-%
mergeUniformCellsClusters(cc135Obj, clusters = splitClusters,
GDIThreshold = 1.4, cores = 13,
saveObj = TRUE, outDir = outDir)
<- addClusterization(cc135Obj, clName = "merge",
cc135Obj clusters = mergedClusters,
coexDF = mergedCoexDF,
override = TRUE)
table(mergedClusters)
mergedClusters
1 2 3 4 5 6 7 8 9
94 153 161 127 113 102 52 161 149
saveRDS(cc135Obj, file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS")))
Sys.time()
[1] "2023-08-22 17:55:23 CEST"
sessionInfo()
R version 4.3.1 (2023-06-16)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Rome
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] COTAN_2.1.7 zeallot_0.1.0 tibble_3.2.1 ggplot2_3.4.2
loaded via a namespace (and not attached):
[1] RColorBrewer_1.1-3 rstudioapi_0.15.0 jsonlite_1.8.7
[4] shape_1.4.6 umap_0.2.10.0 magrittr_2.0.3
[7] spatstat.utils_3.0-3 farver_2.1.1 rmarkdown_2.24
[10] GlobalOptions_0.1.2 vctrs_0.6.3 ROCR_1.0-11
[13] spatstat.explore_3.2-1 askpass_1.1 htmltools_0.5.5
[16] sctransform_0.3.5 parallelly_1.36.0 KernSmooth_2.23-22
[19] htmlwidgets_1.6.2 ica_1.0-3 plyr_1.8.8
[22] plotly_4.10.2 zoo_1.8-12 igraph_1.5.1
[25] mime_0.12 lifecycle_1.0.3 iterators_1.0.14
[28] pkgconfig_2.0.3 Matrix_1.6-0 R6_2.5.1
[31] fastmap_1.1.1 fitdistrplus_1.1-11 future_1.33.0
[34] shiny_1.7.5 clue_0.3-64 digest_0.6.33
[37] colorspace_2.1-0 patchwork_1.1.2 S4Vectors_0.38.1
[40] Seurat_4.3.0.1 tensor_1.5 RSpectra_0.16-1
[43] irlba_2.3.5.1 labeling_0.4.2 progressr_0.14.0
[46] RcppZiggurat_0.1.6 fansi_1.0.4 spatstat.sparse_3.0-2
[49] httr_1.4.6 polyclip_1.10-4 abind_1.4-5
[52] compiler_4.3.1 withr_2.5.0 doParallel_1.0.17
[55] viridis_0.6.4 dendextend_1.17.1 MASS_7.3-60
[58] openssl_2.1.0 rjson_0.2.21 tools_4.3.1
[61] lmtest_0.9-40 httpuv_1.6.11 future.apply_1.11.0
[64] goftest_1.2-3 glue_1.6.2 nlme_3.1-162
[67] promises_1.2.0.1 grid_4.3.1 Rtsne_0.16
[70] cluster_2.1.4 reshape2_1.4.4 generics_0.1.3
[73] gtable_0.3.3 spatstat.data_3.0-1 tidyr_1.3.0
[76] data.table_1.14.8 sp_2.0-0 utf8_1.2.3
[79] BiocGenerics_0.46.0 spatstat.geom_3.2-4 RcppAnnoy_0.0.21
[82] ggrepel_0.9.3 RANN_2.6.1 foreach_1.5.2
[85] pillar_1.9.0 stringr_1.5.0 later_1.3.1
[88] circlize_0.4.15 splines_4.3.1 dplyr_1.1.2
[91] lattice_0.21-8 survival_3.5-5 deldir_1.0-9
[94] tidyselect_1.2.0 ComplexHeatmap_2.16.0 miniUI_0.1.1.1
[97] pbapply_1.7-2 knitr_1.43 gridExtra_2.3
[100] IRanges_2.34.1 scattermore_1.2 stats4_4.3.1
[103] xfun_0.39 factoextra_1.0.7 matrixStats_1.0.0
[106] stringi_1.7.12 lazyeval_0.2.2 yaml_2.3.7
[109] evaluate_0.21 codetools_0.2-19 cli_3.6.1
[112] RcppParallel_5.1.7 uwot_0.1.16 xtable_1.8-4
[115] reticulate_1.30 munsell_0.5.0 Rcpp_1.0.11
[118] globals_0.16.2 spatstat.random_3.1-5 png_0.1-8
[121] parallel_4.3.1 Rfast_2.0.8 ellipsis_0.3.2
[124] assertthat_0.2.1 parallelDist_0.2.6 listenv_0.9.0
[127] ggthemes_4.2.4 viridisLite_0.4.2 scales_1.2.1
[130] ggridges_0.5.4 SeuratObject_4.1.3 leiden_0.4.3
[133] purrr_1.0.1 crayon_1.5.2 GetoptLong_1.0.5
[136] rlang_1.1.1 cowplot_1.1.1