Cortical cells DGE E13.5 Data-set Anaysis

library(ggplot2)
library(tibble)
library(zeallot)
library(COTAN)

options(parallelly.fork.enable = TRUE)

outDir <- "Data/Yuzwa_MouseCortex/"

setLoggingLevel(1)
setLoggingFile(file.path(outDir, "CorticalCells_GSM2861511_E135-analysis.log"))

Read the dataset from file:

dataset <- read.csv(file.path("Data/Yuzwa_MouseCortex/", "GSM2861511_E135_Only_Cortical_Cells_DGE.txt.gz"),
                    header = TRUE, sep = "\t", strip.white = TRUE,
                    row.names = 1)

Cleaning

Crate the COTAN object

sampleCondition <- "CorticalCells_GSM2861511_E135"
cc135Obj <- COTAN(raw = dataset)
cc135Obj <- initializeMetaDataset(cc135Obj,
                                  GEO = "GSM2861511_E135",
                                  sequencingMethod = "Drop_seq",
                                  sampleCondition = sampleCondition)

Inspect cells’ sizes

cellSizePlot(cc135Obj)

Drop cells with too many ritz reads as they are probably duplets

cellsSizeThr <- 8000
cc135Obj <- addElementToMetaDataset(cc135Obj, "Cells size threshold", cellsSizeThr)

cells_to_rem <- getCells(cc135Obj)[getCellsSize(cc135Obj) > cellsSizeThr]
cc135Obj <- dropGenesCells(cc135Obj, cells = cells_to_rem)

cellSizePlot(cc135Obj)

Inspect the number of expressed genes per cell

genesSizePlot(cc135Obj)

Drop cells with too high genes expession as they are probably duplets

genesSizeThr <- 3300
cc135Obj <- addElementToMetaDataset(cc135Obj, "Num genes threshold", genesSizeThr)

numExprGenes <- getNumExpressedGenes(cc135Obj)
cells_to_rem <- names(numExprGenes)[numExprGenes > genesSizeThr]
cc135Obj <- dropGenesCells(cc135Obj, cells = cells_to_rem)

genesSizePlot(cc135Obj)

Check number of mitocondrial genes expressed in each cell

mitGenesPattern <- "^mt-"
getGenes(cc135Obj)[grep(mitGenesPattern, getGenes(cc135Obj))]
 [1] "mt-Co1"  "mt-Co3"  "mt-Cytb" "mt-Nd1"  "mt-Nd2"  "mt-Nd4"  "mt-Nd5" 
 [8] "mt-Nd6"  "mt-Rnr1" "mt-Rnr2" "mt-Ta"   "mt-Tc"   "mt-Te"   "mt-Tf"  
[15] "mt-Ti"   "mt-Tl1"  "mt-Tl2"  "mt-Tm"   "mt-Tn"   "mt-Tp"   "mt-Tq"  
[22] "mt-Ts2"  "mt-Tt"   "mt-Tv"   "mt-Tw"  
c(mitPlot, mitSizes) %<-%
  mitochondrialPercentagePlot(cc135Obj, genePrefix = mitGenesPattern)

plot(mitPlot)

Cells with a too high percentage of mitocondrial genes are likely dead (or at the last problematic) cells. So we drop them!

mitPercThr <- 10.0
cc135Obj <- addElementToMetaDataset(cc135Obj, "Mitoc. perc. threshold", mitPercThr)

cells_to_rem <- rownames(mitSizes)[mitSizes[["mit.percentage"]] > mitPercThr]

cc135Obj <- dropGenesCells(cc135Obj, cells = cells_to_rem)

c(mitPlot, mitSizes) %<-%
  mitochondrialPercentagePlot(cc135Obj, genePrefix = mitGenesPattern)

plot(mitPlot)

Check no further outliers after all the culling

cellSizePlot(cc135Obj)

genesSizePlot(cc135Obj)

Clean: round 1

cc135Obj <- clean(cc135Obj)

c(pcaCellsPlot, pcaCellsData, genesPlot,
  UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(cc135Obj)

plot(pcaCellsPlot)

plot(genesPlot)

B group contains high number of hemoglobin genes: so they are not interesting

cells_to_rem <- rownames(pcaCellsData)[pcaCellsData[["groups"]] == "B"]

cc135Obj <- dropGenesCells(cc135Obj, cells = cells_to_rem)

Clean: round 2

cc135Obj <- clean(cc135Obj)

c(pcaCellsPlot, pcaCellsData, genesPlot,
  UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(cc135Obj)

plot(pcaCellsPlot)

plot(pcaCellsData)

plot(genesPlot)

B group contains just 3 cells quite different in the 3rd and 4th components: better to drop them

cells_to_rem <- rownames(pcaCellsData)[pcaCellsData[["groups"]] == "B"]

cc135Obj <- dropGenesCells(cc135Obj, cells = cells_to_rem)

Clean: round 3

cc135Obj <- clean(cc135Obj)

c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(cc135Obj)

plot(pcaCellsPlot)

plot(genesPlot)

cc135Obj <- addElementToMetaDataset(cc135Obj, "Num drop B group", 2)

Visualize if all is ok:

plot(UDEPlot)

plot(nuPlot)

plot(zoomedNuPlot)

Drop very low UDE cells as they are likely outliers

lowUDEThr <- 0.14 # the threshold to remove low UDE cells

cc135Obj <- addElementToMetaDataset(cc135Obj, "Low UDE threshold", lowUDEThr)

nuDf <- data.frame("nu" = sort(getNu(cc135Obj)), "n" = seq_along(getNu(cc135Obj)))


cells_to_rem <- rownames(nuDf)[nuDf[["nu"]] < lowUDEThr]
cc135Obj <- dropGenesCells(cc135Obj, cells = cells_to_rem)

Final cleaning to check all is OK

cc135Obj <- clean(cc135Obj)

c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(cc135Obj)

plot(pcaCellsPlot)

plot(genesPlot)

plot(UDEPlot)

plot(nuPlot)

plot(zoomedNuPlot)

plot(cellSizePlot(cc135Obj))

plot(genesSizePlot(cc135Obj))

cc135Obj <- proceedToCoex(cc135Obj, calcCoex = TRUE, cores = 12,
                          saveObj = TRUE, outDir = outDir)

Save the COTAN object

saveRDS(cc135Obj, file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS")))

GDI

gdiData <- calculateGDI(cc135Obj)

genesToLabel <- head(rownames(gdiData[order(gdiData[["GDI"]],
                                            decreasing = TRUE), ]), n = 10L)

genesToLabel
 [1] "Neurod6"       "2610017I09Rik" "Nr2e1"         "Ina"          
 [5] "Sox2"          "Stmn2"         "Mdk"           "Mapt"         
 [9] "Gas1"          "2810417H13Rik"
gdiPlot <- GDIPlot(cc135Obj, GDIIn = gdiData, GDIThreshold = 1.4,
                   genes = list("Top 10 GDI genes" = genesToLabel))

plot(gdiPlot)

Consistent Transcript Cohorts

c(splitClusters, splitCoexDF) %<-% 
  cellsUniformClustering(cc135Obj, GDIThreshold = 1.4, cores = 13,
                         saveObj = TRUE, outDir = outDir)


cc135Obj <- addClusterization(cc135Obj, clName = "split",
                              clusters = splitClusters,
                              coexDF = splitCoexDF, override = TRUE)
table(splitClusters)
splitClusters
 -1  01  02  03  04  05  06  07  08  09  10  11  12  13  14 
  6  94 106  47  69 127  92  11  25  12  41 113  59 161 149 
saveRDS(cc135Obj, file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS")))
c(mergedClusters, mergedCoexDF) %<-%
  mergeUniformCellsClusters(cc135Obj, clusters = splitClusters,
                            GDIThreshold = 1.4, cores = 13,
                            saveObj = TRUE, outDir = outDir)

cc135Obj <- addClusterization(cc135Obj, clName = "merge",
                              clusters = mergedClusters,
                              coexDF = mergedCoexDF,
                              override = TRUE)
table(mergedClusters)
mergedClusters
  1   2   3   4   5   6   7   8   9 
 94 153 161 127 113 102  52 161 149 
saveRDS(cc135Obj, file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS")))

Sys.time()
[1] "2025-12-25 16:53:23 CET"
sessionInfo()
R version 4.5.2 (2025-10-31 ucrt)
Platform: x86_64-w64-mingw32/x64
Running under: Windows 11 x64 (build 26200)

Matrix products: default
  LAPACK version 3.12.1

locale:
[1] LC_COLLATE=English_United States.utf8 
[2] LC_CTYPE=English_United States.utf8   
[3] LC_MONETARY=English_United States.utf8
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.utf8    

time zone: Europe/Rome
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] COTAN_2.11.0  zeallot_0.2.0 tibble_3.3.0  ggplot2_4.0.1

loaded via a namespace (and not attached):
  [1] RColorBrewer_1.1-3          jsonlite_2.0.0             
  [3] shape_1.4.6.1               magrittr_2.0.4             
  [5] spatstat.utils_3.2-0        farver_2.1.2               
  [7] rmarkdown_2.30              GlobalOptions_0.1.3        
  [9] vctrs_0.6.5                 ROCR_1.0-11                
 [11] spatstat.explore_3.6-0      htmltools_0.5.9            
 [13] S4Arrays_1.10.0             SparseArray_1.10.2         
 [15] sctransform_0.4.2           parallelly_1.46.0          
 [17] KernSmooth_2.23-26          htmlwidgets_1.6.4          
 [19] ica_1.0-3                   plyr_1.8.9                 
 [21] plotly_4.11.0               zoo_1.8-14                 
 [23] igraph_2.2.1                mime_0.13                  
 [25] lifecycle_1.0.4             iterators_1.0.14           
 [27] pkgconfig_2.0.3             rsvd_1.0.5                 
 [29] Matrix_1.7-4                R6_2.6.1                   
 [31] fastmap_1.2.0               MatrixGenerics_1.22.0      
 [33] fitdistrplus_1.2-4          future_1.68.0              
 [35] shiny_1.12.1                clue_0.3-66                
 [37] digest_0.6.39               colorspace_2.1-2           
 [39] patchwork_1.3.2             S4Vectors_0.48.0           
 [41] tensor_1.5.1                Seurat_5.3.1               
 [43] RSpectra_0.16-2             irlba_2.3.5.1              
 [45] GenomicRanges_1.62.0        beachmat_2.26.0            
 [47] labeling_0.4.3              progressr_0.18.0           
 [49] spatstat.sparse_3.1-0       polyclip_1.10-7            
 [51] httr_1.4.7                  abind_1.4-8                
 [53] compiler_4.5.2              proxy_0.4-28               
 [55] withr_3.0.2                 doParallel_1.0.17          
 [57] S7_0.2.1                    BiocParallel_1.44.0        
 [59] viridis_0.6.5               fastDummies_1.7.5          
 [61] dendextend_1.19.1           MASS_7.3-65                
 [63] DelayedArray_0.36.0         rjson_0.2.23               
 [65] tools_4.5.2                 lmtest_0.9-40              
 [67] otel_0.2.0                  httpuv_1.6.16              
 [69] future.apply_1.20.1         goftest_1.2-3              
 [71] glue_1.8.0                  nlme_3.1-168               
 [73] promises_1.5.0              grid_4.5.2                 
 [75] Rtsne_0.17                  cluster_2.1.8.1            
 [77] reshape2_1.4.5              generics_0.1.4             
 [79] spatstat.data_3.1-9         gtable_0.3.6               
 [81] tidyr_1.3.1                 data.table_1.17.8          
 [83] BiocSingular_1.26.1         ScaledMatrix_1.18.0        
 [85] sp_2.2-0                    XVector_0.50.0             
 [87] spatstat.geom_3.6-1         BiocGenerics_0.56.0        
 [89] RcppAnnoy_0.0.22            ggrepel_0.9.6              
 [91] RANN_2.6.2                  foreach_1.5.2              
 [93] pillar_1.11.1               stringr_1.6.0              
 [95] spam_2.11-1                 RcppHNSW_0.6.0             
 [97] later_1.4.4                 circlize_0.4.17            
 [99] splines_4.5.2               dplyr_1.1.4                
[101] gghalves_0.1.4              lattice_0.22-7             
[103] deldir_2.0-4                survival_3.8-3             
[105] tidyselect_1.2.1            SingleCellExperiment_1.32.0
[107] ComplexHeatmap_2.26.0       miniUI_0.1.2               
[109] pbapply_1.7-4               knitr_1.50                 
[111] gridExtra_2.3               Seqinfo_1.0.0              
[113] IRanges_2.44.0              SummarizedExperiment_1.40.0
[115] scattermore_1.2             stats4_4.5.2               
[117] xfun_0.54                   Biobase_2.70.0             
[119] matrixStats_1.5.0           stringi_1.8.7              
[121] lazyeval_0.2.2              yaml_2.3.12                
[123] evaluate_1.0.5              codetools_0.2-20           
[125] cli_3.6.5                   uwot_0.2.4                 
[127] RcppParallel_5.1.11-1       xtable_1.8-4               
[129] reticulate_1.44.1           Rcpp_1.1.0                 
[131] spatstat.random_3.4-3       zigg_0.0.2                 
[133] globals_0.18.0              png_0.1-8                  
[135] spatstat.univar_3.1-5       parallel_4.5.2             
[137] Rfast_2.1.5.2               assertthat_0.2.1           
[139] dotCall64_1.2               parallelDist_0.2.7         
[141] listenv_0.10.0              ggthemes_5.2.0             
[143] viridisLite_0.4.2           scales_1.4.0               
[145] ggridges_0.5.7              SeuratObject_5.3.0         
[147] purrr_1.2.0                 crayon_1.5.3               
[149] GetoptLong_1.1.0            rlang_1.1.6                
[151] cowplot_1.2.0