options(parallelly.fork.enable = TRUE)
library(COTAN)
library(zeallot)
library(data.table)
library(factoextra)
library(Rtsne)
library(qpdf)
library(stringr)
E14.5 Mouse Cortex Loo 2019
= read.table("../COTAN_small_paper/data/MouseCortex/E14_combined_matrix.txt.gz",header=T,sep="\t",row.names=1)
e14_dge
print(dim(e14_dge))
[1] 21313 11069
<- "Data/MouseCortex/"
outDir setLoggingLevel(1)
setLoggingFile(file.path(outDir, "Dataset.log"))
<- "MouseCortex_E14.5"
cond <- COTAN(raw = e14_dge)
obj <- initializeMetaDataset(obj,
obj GEO = "GSE123335",
sequencingMethod = "Drop_seq",
sampleCondition = cond)
rm(e14_dge)
ECDPlot(obj, yCut = 400L)
cellSizePlot(obj,splitPattern = "_",numCol = 1)
genesSizePlot(obj,splitPattern = "_",numCol = 1)
<- mitochondrialPercentagePlot(obj, genePrefix = "^mt-",splitPattern = "_",numCol = 1)
mit "plot"]] mit[[
To drop cells by cell library size:
<- getCells(obj)[getCellsSize(obj) > 15000]
cells_to_rem <- dropGenesCells(obj, cells = cells_to_rem)
obj cellSizePlot(obj,splitPattern = "_",numCol = 1)
genesSizePlot(obj,splitPattern = "_",numCol = 1)
To drop cells by mitochondrial percentage:
<- mit[["sizes"]][["mit.percentage"]] > 7.5
to_rem <- rownames(mit[["sizes"]])[to_rem]
cells_to_rem <- dropGenesCells(obj, cells = cells_to_rem)
obj
<- mitochondrialPercentagePlot(obj, genePrefix = "^mt-",splitPattern = "_",numCol = 1)
mit
"plot"]] mit[[
cellSizePlot(obj,splitPattern = "_",numCol = 1)
<- clean(obj)
obj c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoom) %<-% cleanPlots(obj)
pcaCellsPlot
genesPlot
<- rownames(pcaCellsData)[pcaCellsData[["groups"]] == "B"]
cells_to_rem <- dropGenesCells(obj, cells = cells_to_rem)
obj <- clean(obj)
obj c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoom) %<-% cleanPlots(obj)
pcaCellsPlot
genesPlot
plot(nuPlot)
<- data.frame("nu" = sort(getNu(obj)), "n" = seq_along(getNu(obj)))
nuDf <- 0.18 # the threshold to remove low UDE cells
yset <- ggplot(nuDf, aes(x = n, y = nu)) +
plot.ude geom_point(colour = "#8491B4B2", size = 1.0) +
xlim(0L, 3000) +
ylim(0.0, 1.0) +
geom_hline(yintercept = yset, linetype = "dashed",
color = "darkred") +
annotate(geom = "text", x = 1000L, y = 0.25,
label = paste0("to remove cells with nu < ", yset),
color = "darkred", size = 4.5)
plot.ude
<- addElementToMetaDataset(obj, "Threshold low UDE cells:", yset)
obj
<- rownames(nuDf)[nuDf[["nu"]] < yset]
cells_to_rem <- dropGenesCells(obj, cells = cells_to_rem)
obj <- clean(obj)
obj c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoom) %<-% cleanPlots(obj)
pcaCellsPlot
genesPlot
UDEPlot
nuPlot
$data$Sample <- str_split(rownames(UDEPlot$data),pattern = "_",simplify = T)[,1]
UDEPlot
ggplot(UDEPlot$data, aes(PC1,PC2,color = Sample)) + geom_point(size = 0.5, alpha=0.7)
ggplot(UDEPlot$data, aes(PC2,PC3,color = Sample)) + geom_point(size = 0.5, alpha=0.7)
ggplot(UDEPlot$data, aes(PC2,PC5,color = Sample)) + geom_point(size = 0.5, alpha=0.7)
ggplot(UDEPlot$data, aes(PC2,PC5,color = groups)) + geom_point(size = 0.5, alpha=0.7)
COTAN analysis
In this part, all the contingency tables are computed and used to get the statistics.
<- estimateDispersionBisection(obj, cores = 15L) obj
COEX
evaluation and storing
<- calculateCoex(obj) obj
# saving the structure
saveRDS(obj, file = file.path(outDir, paste0(cond, ".cotan.RDS")))
<- readRDS(file = file.path(outDir, paste0(cond, ".cotan.RDS"))) obj
sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3; LAPACK version 3.9.0
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Rome
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] stringr_1.5.0 qpdf_1.3.2 Rtsne_0.17 factoextra_1.0.7
[5] ggplot2_3.5.0 data.table_1.15.0 zeallot_0.1.0 COTAN_2.5.0
loaded via a namespace (and not attached):
[1] RcppAnnoy_0.0.21 splines_4.3.2
[3] later_1.3.1 tibble_3.2.1
[5] polyclip_1.10-4 fastDummies_1.7.3
[7] lifecycle_1.0.3 doParallel_1.0.17
[9] globals_0.16.2 lattice_0.22-5
[11] MASS_7.3-60 dendextend_1.17.1
[13] magrittr_2.0.3 plotly_4.10.2
[15] rmarkdown_2.24 yaml_2.3.7
[17] httpuv_1.6.11 Seurat_5.0.0
[19] sctransform_0.4.1 spam_2.10-0
[21] askpass_1.2.0 sp_2.1-1
[23] spatstat.sparse_3.0-2 reticulate_1.36.1
[25] cowplot_1.1.1 pbapply_1.7-2
[27] RColorBrewer_1.1-3 abind_1.4-5
[29] purrr_1.0.1 BiocGenerics_0.46.0
[31] circlize_0.4.15 IRanges_2.34.1
[33] S4Vectors_0.38.1 ggrepel_0.9.5
[35] irlba_2.3.5.1 listenv_0.9.0
[37] spatstat.utils_3.0-3 umap_0.2.10.0
[39] goftest_1.2-3 RSpectra_0.16-1
[41] spatstat.random_3.2-1 dqrng_0.3.0
[43] fitdistrplus_1.1-11 parallelly_1.37.1
[45] DelayedMatrixStats_1.22.5 leiden_0.4.3
[47] codetools_0.2-19 DelayedArray_0.26.7
[49] tidyselect_1.2.0 shape_1.4.6
[51] farver_2.1.1 ScaledMatrix_1.8.1
[53] viridis_0.6.4 matrixStats_1.2.0
[55] stats4_4.3.2 spatstat.explore_3.2-1
[57] jsonlite_1.8.7 GetoptLong_1.0.5
[59] ellipsis_0.3.2 progressr_0.14.0
[61] ggridges_0.5.4 survival_3.5-8
[63] iterators_1.0.14 foreach_1.5.2
[65] tools_4.3.2 ica_1.0-3
[67] Rcpp_1.0.11 glue_1.7.0
[69] gridExtra_2.3 xfun_0.39
[71] MatrixGenerics_1.12.3 ggthemes_5.1.0
[73] dplyr_1.1.2 withr_3.0.0
[75] fastmap_1.1.1 fansi_1.0.4
[77] openssl_2.1.0 digest_0.6.33
[79] rsvd_1.0.5 parallelDist_0.2.6
[81] R6_2.5.1 mime_0.12
[83] colorspace_2.1-0 scattermore_1.2
[85] tensor_1.5 spatstat.data_3.0-1
[87] utf8_1.2.3 tidyr_1.3.0
[89] generics_0.1.3 httr_1.4.6
[91] htmlwidgets_1.6.2 S4Arrays_1.2.0
[93] uwot_0.1.16 pkgconfig_2.0.3
[95] gtable_0.3.3 ComplexHeatmap_2.16.0
[97] lmtest_0.9-40 htmltools_0.5.8
[99] dotCall64_1.1-0 clue_0.3-64
[101] SeuratObject_5.0.0 scales_1.3.0
[103] png_0.1-8 knitr_1.43
[105] rstudioapi_0.15.0 reshape2_1.4.4
[107] rjson_0.2.21 nlme_3.1-163
[109] zoo_1.8-12 GlobalOptions_0.1.2
[111] KernSmooth_2.23-22 parallel_4.3.2
[113] miniUI_0.1.1.1 RcppZiggurat_0.1.6
[115] pillar_1.9.0 grid_4.3.2
[117] vctrs_0.6.3 RANN_2.6.1
[119] promises_1.2.0.1 BiocSingular_1.16.0
[121] beachmat_2.16.0 xtable_1.8-4
[123] cluster_2.1.6 evaluate_0.21
[125] cli_3.6.1 compiler_4.3.2
[127] rlang_1.1.1 crayon_1.5.2
[129] future.apply_1.11.0 labeling_0.4.2
[131] plyr_1.8.8 stringi_1.8.1
[133] viridisLite_0.4.2 deldir_2.0-2
[135] BiocParallel_1.34.2 assertthat_0.2.1
[137] munsell_0.5.0 lazyeval_0.2.2
[139] spatstat.geom_3.2-4 PCAtools_2.14.0
[141] Matrix_1.6-3 RcppHNSW_0.6.0
[143] patchwork_1.2.0 sparseMatrixStats_1.12.2
[145] future_1.33.0 shiny_1.8.0
[147] ROCR_1.0-11 Rfast_2.1.0
[149] igraph_2.0.3 RcppParallel_5.1.7