Dataset_Extraction

Preamble

# Util libs
library(assertthat)
library(ggplot2)
library(zeallot)
library(conflicted)
library(Matrix)
library(torch)

# Data processing libs
if (!suppressWarnings(require(COTAN))) {
  devtools::load_all("~/dev/COTAN/COTAN/")
}

conflicts_prefer(zeallot::`%->%`, zeallot::`%<-%`)

options(parallelly.fork.enable = TRUE)

setLoggingLevel(2L)
setLoggingFile(file.path(".", "Dataset_Cleaning.log"))
GEO <- "GSE189033"

outDir   <- file.path(".", "analysis")

if (!dir.exists(outDir)) {
  dir.create(outDir)
}

Load dataset

fileNameIn <- file.path(".", paste0("MouseBrain_Morabito_", GEO, "-SeuratCleaned.RDS"))

cObj <- readRDS(file = fileNameIn)

sapply(getAllConditions(cObj), function(nm) nlevels(getCondition(cObj, nm)))
          sample        rnd1_well        rnd2_well        rnd3_well 
              48               48               96               96 
      sublibrary RNA.Nuclei.Group              Sex              Sac 
               8                6                2               16 
Dissection.Batch               DX 
               3                6 
sapply(getClusterizations(cObj), function(nm) nlevels(getClusters(cObj, nm)))
               class         cluster_name      clusternum_anno 
                  12                   20                   43 
clusters_4conditions    cellchat_clusters 
                  37                   18 
getDims(cObj)
$raw
[1] 30368 51327

$genesCoex
[1] 0 0

$cellsCoex
[1] 0 0

$metaDataset
[1] 1

$metaGenes
[1] 2

$metaCells
[1] 35

$clustersCoex
[1] 5

Cleaning

clean() using standard thresholds

cObj <- clean(cObj)

Check the initial plots

cellSizePlot(cObj, condName = "sample")

genesSizePlot(cObj, condName = "sample")

scatterPlot(cObj, condName = "sample")

c(mitPerPlot, .) %<-%
  mitochondrialPercentagePlot(cObj, genePrefix = "^mt-", condName = "sample")
mitPerPlot

Check for spurious clusters

#cObj <- clean(cObj)

c(pcaCells, pcaCellsData, genes, UDE, nu, zoomedNu) %<-%
  cleanPlots(cObj, includePCA = TRUE)

Plot PCA and Nu

plot(pcaCells)

plot(UDE)

plot(pcaCellsData)

plot(genes)

plot(nu)

plot(zoomedNu)

Finalize object and save

cObj <- proceedToCoex(cObj, calcCoex = TRUE, cores = 5L, saveObj = FALSE)
fileNameOut <- file.path(".", paste0("MouseBrain_Morabito_", GEO, "_CotanCleaned.RDS"))

saveRDS(cObj, file = fileNameOut)

Sys.time()
[1] "2026-01-18 22:51:45 CET"
sessionInfo()
R version 4.5.2 (2025-10-31)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 22.04.5 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0  LAPACK version 3.10.0

locale:
 [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
 [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
 [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
[10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   

time zone: Europe/Rome
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] COTAN_2.11.1     torch_0.14.2     Matrix_1.7-4     conflicted_1.2.0
[5] zeallot_0.2.0    ggplot2_4.0.1    assertthat_0.2.1

loaded via a namespace (and not attached):
  [1] RcppAnnoy_0.0.22            splines_4.5.2              
  [3] later_1.4.2                 tibble_3.3.0               
  [5] polyclip_1.10-7             fastDummies_1.7.5          
  [7] lifecycle_1.0.4             doParallel_1.0.17          
  [9] globals_0.18.0              processx_3.8.6             
 [11] lattice_0.22-7              MASS_7.3-65                
 [13] ggdist_3.3.3                dendextend_1.19.0          
 [15] magrittr_2.0.4              plotly_4.11.0              
 [17] rmarkdown_2.29              yaml_2.3.10                
 [19] httpuv_1.6.16               Seurat_5.2.1               
 [21] sctransform_0.4.2           spam_2.11-1                
 [23] sp_2.2-0                    spatstat.sparse_3.1-0      
 [25] reticulate_1.42.0           cowplot_1.2.0              
 [27] pbapply_1.7-2               RColorBrewer_1.1-3         
 [29] abind_1.4-8                 GenomicRanges_1.62.1       
 [31] Rtsne_0.17                  purrr_1.2.0                
 [33] BiocGenerics_0.56.0         coro_1.1.0                 
 [35] circlize_0.4.16             GenomeInfoDbData_1.2.14    
 [37] IRanges_2.44.0              S4Vectors_0.48.0           
 [39] ggrepel_0.9.6               irlba_2.3.5.1              
 [41] listenv_0.9.1               spatstat.utils_3.1-4       
 [43] goftest_1.2-3               RSpectra_0.16-2            
 [45] spatstat.random_3.4-1       fitdistrplus_1.2-2         
 [47] parallelly_1.46.0           codetools_0.2-20           
 [49] DelayedArray_0.36.0         tidyselect_1.2.1           
 [51] shape_1.4.6.1               UCSC.utils_1.4.0           
 [53] farver_2.1.2                viridis_0.6.5              
 [55] ScaledMatrix_1.16.0         matrixStats_1.5.0          
 [57] stats4_4.5.2                spatstat.explore_3.4-2     
 [59] Seqinfo_1.0.0               jsonlite_2.0.0             
 [61] GetoptLong_1.0.5            progressr_0.15.1           
 [63] ggridges_0.5.6              survival_3.8-3             
 [65] iterators_1.0.14            foreach_1.5.2              
 [67] tools_4.5.2                 ica_1.0-3                  
 [69] Rcpp_1.1.0                  glue_1.8.0                 
 [71] gridExtra_2.3               SparseArray_1.10.8         
 [73] xfun_0.52                   distributional_0.5.0       
 [75] MatrixGenerics_1.22.0       ggthemes_5.2.0             
 [77] GenomeInfoDb_1.44.0         dplyr_1.1.4                
 [79] withr_3.0.2                 fastmap_1.2.0              
 [81] callr_3.7.6                 digest_0.6.37              
 [83] rsvd_1.0.5                  parallelDist_0.2.6         
 [85] R6_2.6.1                    mime_0.13                  
 [87] colorspace_2.1-1            scattermore_1.2            
 [89] tensor_1.5                  spatstat.data_3.1-6        
 [91] tidyr_1.3.1                 generics_0.1.3             
 [93] data.table_1.17.0           httr_1.4.7                 
 [95] htmlwidgets_1.6.4           S4Arrays_1.10.1            
 [97] uwot_0.2.3                  pkgconfig_2.0.3            
 [99] gtable_0.3.6                ComplexHeatmap_2.26.0      
[101] lmtest_0.9-40               S7_0.2.1                   
[103] SingleCellExperiment_1.32.0 XVector_0.50.0             
[105] htmltools_0.5.8.1           dotCall64_1.2              
[107] zigg_0.0.2                  clue_0.3-66                
[109] Biobase_2.70.0              SeuratObject_5.1.0         
[111] scales_1.4.0                png_0.1-8                  
[113] spatstat.univar_3.1-3       knitr_1.50                 
[115] reshape2_1.4.4              rjson_0.2.23               
[117] nlme_3.1-168                proxy_0.4-27               
[119] cachem_1.1.0                zoo_1.8-14                 
[121] GlobalOptions_0.1.2         stringr_1.6.0              
[123] KernSmooth_2.23-26          parallel_4.5.2             
[125] miniUI_0.1.2                pillar_1.10.2              
[127] grid_4.5.2                  vctrs_0.6.5                
[129] RANN_2.6.2                  promises_1.3.2             
[131] BiocSingular_1.26.1         beachmat_2.26.0            
[133] xtable_1.8-4                cluster_2.1.8.1            
[135] evaluate_1.0.3              cli_3.6.5                  
[137] compiler_4.5.2              rlang_1.1.6                
[139] crayon_1.5.3                future.apply_1.20.0        
[141] labeling_0.4.3              ps_1.9.1                   
[143] plyr_1.8.9                  stringi_1.8.7              
[145] viridisLite_0.4.2           deldir_2.0-4               
[147] BiocParallel_1.44.0         lazyeval_0.2.2             
[149] spatstat.geom_3.4-1         RcppHNSW_0.6.0             
[151] patchwork_1.3.2             bit64_4.6.0-1              
[153] future_1.58.0               shiny_1.11.0               
[155] SummarizedExperiment_1.38.1 ROCR_1.0-11                
[157] Rfast_2.1.5.1               igraph_2.1.4               
[159] memoise_2.0.1               RcppParallel_5.1.10        
[161] bit_4.6.0