# Util libs
library(assertthat)
library(ggplot2)
library(zeallot)
library(conflicted)
library(Matrix)
library(torch)
# Data processing libs
if (!suppressWarnings(require(COTAN))) {
devtools::load_all("~/dev/COTAN/COTAN/")
}
conflicts_prefer(zeallot::`%->%`, zeallot::`%<-%`)
options(parallelly.fork.enable = TRUE)
setLoggingLevel(2L)
setLoggingFile(file.path(".", "Dataset_Cleaning.log"))Dataset_Extraction
Preamble
GEO <- "GSE189033"
outDir <- file.path(".", "analysis")
if (!dir.exists(outDir)) {
dir.create(outDir)
}Load dataset
fileNameIn <- file.path(".", paste0("MouseBrain_Morabito_", GEO, "-SeuratCleaned.RDS"))
cObj <- readRDS(file = fileNameIn)
sapply(getAllConditions(cObj), function(nm) nlevels(getCondition(cObj, nm))) sample rnd1_well rnd2_well rnd3_well
48 48 96 96
sublibrary RNA.Nuclei.Group Sex Sac
8 6 2 16
Dissection.Batch DX
3 6
sapply(getClusterizations(cObj), function(nm) nlevels(getClusters(cObj, nm))) class cluster_name clusternum_anno
12 20 43
clusters_4conditions cellchat_clusters
37 18
getDims(cObj)$raw
[1] 30368 51327
$genesCoex
[1] 0 0
$cellsCoex
[1] 0 0
$metaDataset
[1] 1
$metaGenes
[1] 2
$metaCells
[1] 35
$clustersCoex
[1] 5
Cleaning
clean() using standard thresholds
cObj <- clean(cObj)Check the initial plots
cellSizePlot(cObj, condName = "sample")
genesSizePlot(cObj, condName = "sample")
scatterPlot(cObj, condName = "sample")
c(mitPerPlot, .) %<-%
mitochondrialPercentagePlot(cObj, genePrefix = "^mt-", condName = "sample")
mitPerPlot
Check for spurious clusters
#cObj <- clean(cObj)
c(pcaCells, pcaCellsData, genes, UDE, nu, zoomedNu) %<-%
cleanPlots(cObj, includePCA = TRUE)Plot PCA and Nu
plot(pcaCells)
plot(UDE)
plot(pcaCellsData)
plot(genes)
plot(nu)
plot(zoomedNu)
Finalize object and save
cObj <- proceedToCoex(cObj, calcCoex = TRUE, cores = 5L, saveObj = FALSE)fileNameOut <- file.path(".", paste0("MouseBrain_Morabito_", GEO, "_CotanCleaned.RDS"))
saveRDS(cObj, file = fileNameOut)Sys.time()[1] "2026-01-18 22:51:45 CET"
sessionInfo()R version 4.5.2 (2025-10-31)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 22.04.5 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0 LAPACK version 3.10.0
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Rome
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] COTAN_2.11.1 torch_0.14.2 Matrix_1.7-4 conflicted_1.2.0
[5] zeallot_0.2.0 ggplot2_4.0.1 assertthat_0.2.1
loaded via a namespace (and not attached):
[1] RcppAnnoy_0.0.22 splines_4.5.2
[3] later_1.4.2 tibble_3.3.0
[5] polyclip_1.10-7 fastDummies_1.7.5
[7] lifecycle_1.0.4 doParallel_1.0.17
[9] globals_0.18.0 processx_3.8.6
[11] lattice_0.22-7 MASS_7.3-65
[13] ggdist_3.3.3 dendextend_1.19.0
[15] magrittr_2.0.4 plotly_4.11.0
[17] rmarkdown_2.29 yaml_2.3.10
[19] httpuv_1.6.16 Seurat_5.2.1
[21] sctransform_0.4.2 spam_2.11-1
[23] sp_2.2-0 spatstat.sparse_3.1-0
[25] reticulate_1.42.0 cowplot_1.2.0
[27] pbapply_1.7-2 RColorBrewer_1.1-3
[29] abind_1.4-8 GenomicRanges_1.62.1
[31] Rtsne_0.17 purrr_1.2.0
[33] BiocGenerics_0.56.0 coro_1.1.0
[35] circlize_0.4.16 GenomeInfoDbData_1.2.14
[37] IRanges_2.44.0 S4Vectors_0.48.0
[39] ggrepel_0.9.6 irlba_2.3.5.1
[41] listenv_0.9.1 spatstat.utils_3.1-4
[43] goftest_1.2-3 RSpectra_0.16-2
[45] spatstat.random_3.4-1 fitdistrplus_1.2-2
[47] parallelly_1.46.0 codetools_0.2-20
[49] DelayedArray_0.36.0 tidyselect_1.2.1
[51] shape_1.4.6.1 UCSC.utils_1.4.0
[53] farver_2.1.2 viridis_0.6.5
[55] ScaledMatrix_1.16.0 matrixStats_1.5.0
[57] stats4_4.5.2 spatstat.explore_3.4-2
[59] Seqinfo_1.0.0 jsonlite_2.0.0
[61] GetoptLong_1.0.5 progressr_0.15.1
[63] ggridges_0.5.6 survival_3.8-3
[65] iterators_1.0.14 foreach_1.5.2
[67] tools_4.5.2 ica_1.0-3
[69] Rcpp_1.1.0 glue_1.8.0
[71] gridExtra_2.3 SparseArray_1.10.8
[73] xfun_0.52 distributional_0.5.0
[75] MatrixGenerics_1.22.0 ggthemes_5.2.0
[77] GenomeInfoDb_1.44.0 dplyr_1.1.4
[79] withr_3.0.2 fastmap_1.2.0
[81] callr_3.7.6 digest_0.6.37
[83] rsvd_1.0.5 parallelDist_0.2.6
[85] R6_2.6.1 mime_0.13
[87] colorspace_2.1-1 scattermore_1.2
[89] tensor_1.5 spatstat.data_3.1-6
[91] tidyr_1.3.1 generics_0.1.3
[93] data.table_1.17.0 httr_1.4.7
[95] htmlwidgets_1.6.4 S4Arrays_1.10.1
[97] uwot_0.2.3 pkgconfig_2.0.3
[99] gtable_0.3.6 ComplexHeatmap_2.26.0
[101] lmtest_0.9-40 S7_0.2.1
[103] SingleCellExperiment_1.32.0 XVector_0.50.0
[105] htmltools_0.5.8.1 dotCall64_1.2
[107] zigg_0.0.2 clue_0.3-66
[109] Biobase_2.70.0 SeuratObject_5.1.0
[111] scales_1.4.0 png_0.1-8
[113] spatstat.univar_3.1-3 knitr_1.50
[115] reshape2_1.4.4 rjson_0.2.23
[117] nlme_3.1-168 proxy_0.4-27
[119] cachem_1.1.0 zoo_1.8-14
[121] GlobalOptions_0.1.2 stringr_1.6.0
[123] KernSmooth_2.23-26 parallel_4.5.2
[125] miniUI_0.1.2 pillar_1.10.2
[127] grid_4.5.2 vctrs_0.6.5
[129] RANN_2.6.2 promises_1.3.2
[131] BiocSingular_1.26.1 beachmat_2.26.0
[133] xtable_1.8-4 cluster_2.1.8.1
[135] evaluate_1.0.3 cli_3.6.5
[137] compiler_4.5.2 rlang_1.1.6
[139] crayon_1.5.3 future.apply_1.20.0
[141] labeling_0.4.3 ps_1.9.1
[143] plyr_1.8.9 stringi_1.8.7
[145] viridisLite_0.4.2 deldir_2.0-4
[147] BiocParallel_1.44.0 lazyeval_0.2.2
[149] spatstat.geom_3.4-1 RcppHNSW_0.6.0
[151] patchwork_1.3.2 bit64_4.6.0-1
[153] future_1.58.0 shiny_1.11.0
[155] SummarizedExperiment_1.38.1 ROCR_1.0-11
[157] Rfast_2.1.5.1 igraph_2.1.4
[159] memoise_2.0.1 RcppParallel_5.1.10
[161] bit_4.6.0