E14.5 Mouse Cortex Loo 2019

options(parallelly.fork.enable = TRUE)
library(COTAN)
library(zeallot)
library(data.table)
library(factoextra)
library(Rtsne)
library(qpdf)
library(stringr)
e14_dge = read.table("../COTAN_small_paper/data/MouseCortex/E14_combined_matrix.txt.gz",header=T,sep="\t",row.names=1)

print(dim(e14_dge))
[1] 21313 11069
outDir <- "Data/MouseCortex/"
setLoggingLevel(1)
setLoggingFile(file.path(outDir, "Dataset.log"))
cond <- "MouseCortex_E14.5"
obj <- COTAN(raw = e14_dge)
obj <- initializeMetaDataset(obj,
                             GEO = "GSE123335",
                             sequencingMethod = "Drop_seq",
                             sampleCondition = cond)
rm(e14_dge)
ECDPlot(obj, yCut = 400L)

cellSizePlot(obj,splitPattern = "_",numCol = 1)

genesSizePlot(obj,splitPattern = "_",numCol = 1)

mit <- mitochondrialPercentagePlot(obj, genePrefix = "^mt-",splitPattern = "_",numCol = 1)
mit[["plot"]]

To drop cells by cell library size:

cells_to_rem <- getCells(obj)[getCellsSize(obj) > 15000]
obj <- dropGenesCells(obj, cells = cells_to_rem)
cellSizePlot(obj,splitPattern = "_",numCol = 1)

genesSizePlot(obj,splitPattern = "_",numCol = 1)

To drop cells by mitochondrial percentage:

to_rem <- mit[["sizes"]][["mit.percentage"]] > 7.5
cells_to_rem <- rownames(mit[["sizes"]])[to_rem]
obj <- dropGenesCells(obj, cells = cells_to_rem)

mit <- mitochondrialPercentagePlot(obj, genePrefix = "^mt-",splitPattern = "_",numCol = 1)

mit[["plot"]]

cellSizePlot(obj,splitPattern = "_",numCol = 1)

obj <- clean(obj)
c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoom) %<-% cleanPlots(obj)

pcaCellsPlot

genesPlot

cells_to_rem <- rownames(pcaCellsData)[pcaCellsData[["groups"]] == "B"]
obj <- dropGenesCells(obj, cells = cells_to_rem)
obj <- clean(obj)
c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoom) %<-% cleanPlots(obj)

pcaCellsPlot

 genesPlot

plot(nuPlot)

nuDf <- data.frame("nu" = sort(getNu(obj)), "n" = seq_along(getNu(obj)))
yset <- 0.18 # the threshold to remove low UDE cells
plot.ude <- ggplot(nuDf, aes(x = n, y = nu)) +
            geom_point(colour = "#8491B4B2", size = 1.0) +
            xlim(0L, 3000) +
            ylim(0.0, 1.0) +
            geom_hline(yintercept = yset, linetype = "dashed",
                       color = "darkred") +
            annotate(geom = "text", x = 1000L, y = 0.25,
                     label = paste0("to remove cells with nu < ", yset),
                     color = "darkred", size = 4.5)

plot.ude

obj <- addElementToMetaDataset(obj, "Threshold low UDE cells:", yset)

cells_to_rem <- rownames(nuDf)[nuDf[["nu"]] < yset]
obj <- dropGenesCells(obj, cells = cells_to_rem)
obj <- clean(obj)
c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoom) %<-% cleanPlots(obj)

pcaCellsPlot

genesPlot

UDEPlot

nuPlot

UDEPlot$data$Sample <- str_split(rownames(UDEPlot$data),pattern = "_",simplify = T)[,1]

ggplot(UDEPlot$data, aes(PC1,PC2,color = Sample)) + geom_point(size = 0.5, alpha=0.7)

ggplot(UDEPlot$data, aes(PC2,PC3,color = Sample)) + geom_point(size = 0.5, alpha=0.7)

ggplot(UDEPlot$data, aes(PC2,PC5,color = Sample)) + geom_point(size = 0.5, alpha=0.7)

ggplot(UDEPlot$data, aes(PC2,PC5,color = groups)) + geom_point(size = 0.5, alpha=0.7)

COTAN analysis

In this part, all the contingency tables are computed and used to get the statistics.

obj <- estimateDispersionBisection(obj, cores = 15L)

COEX evaluation and storing

obj <- calculateCoex(obj)
# saving the structure
saveRDS(obj, file = file.path(outDir, paste0(cond, ".cotan.RDS")))
obj <- readRDS(file = file.path(outDir, paste0(cond, ".cotan.RDS")))
sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
 [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
 [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
[10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   

time zone: Europe/Rome
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] stringr_1.5.0     qpdf_1.3.2        Rtsne_0.17        factoextra_1.0.7 
[5] ggplot2_3.5.0     data.table_1.15.0 zeallot_0.1.0     COTAN_2.5.0      

loaded via a namespace (and not attached):
  [1] RcppAnnoy_0.0.21          splines_4.3.2            
  [3] later_1.3.1               tibble_3.2.1             
  [5] polyclip_1.10-4           fastDummies_1.7.3        
  [7] lifecycle_1.0.3           doParallel_1.0.17        
  [9] globals_0.16.2            lattice_0.22-5           
 [11] MASS_7.3-60               dendextend_1.17.1        
 [13] magrittr_2.0.3            plotly_4.10.2            
 [15] rmarkdown_2.24            yaml_2.3.7               
 [17] httpuv_1.6.11             Seurat_5.0.0             
 [19] sctransform_0.4.1         spam_2.10-0              
 [21] askpass_1.2.0             sp_2.1-1                 
 [23] spatstat.sparse_3.0-2     reticulate_1.36.1        
 [25] cowplot_1.1.1             pbapply_1.7-2            
 [27] RColorBrewer_1.1-3        abind_1.4-5              
 [29] purrr_1.0.1               BiocGenerics_0.46.0      
 [31] circlize_0.4.15           IRanges_2.34.1           
 [33] S4Vectors_0.38.1          ggrepel_0.9.5            
 [35] irlba_2.3.5.1             listenv_0.9.0            
 [37] spatstat.utils_3.0-3      umap_0.2.10.0            
 [39] goftest_1.2-3             RSpectra_0.16-1          
 [41] spatstat.random_3.2-1     dqrng_0.3.0              
 [43] fitdistrplus_1.1-11       parallelly_1.37.1        
 [45] DelayedMatrixStats_1.22.5 leiden_0.4.3             
 [47] codetools_0.2-19          DelayedArray_0.26.7      
 [49] tidyselect_1.2.0          shape_1.4.6              
 [51] farver_2.1.1              ScaledMatrix_1.8.1       
 [53] viridis_0.6.4             matrixStats_1.2.0        
 [55] stats4_4.3.2              spatstat.explore_3.2-1   
 [57] jsonlite_1.8.7            GetoptLong_1.0.5         
 [59] ellipsis_0.3.2            progressr_0.14.0         
 [61] ggridges_0.5.4            survival_3.5-8           
 [63] iterators_1.0.14          foreach_1.5.2            
 [65] tools_4.3.2               ica_1.0-3                
 [67] Rcpp_1.0.11               glue_1.7.0               
 [69] gridExtra_2.3             xfun_0.39                
 [71] MatrixGenerics_1.12.3     ggthemes_5.1.0           
 [73] dplyr_1.1.2               withr_3.0.0              
 [75] fastmap_1.1.1             fansi_1.0.4              
 [77] openssl_2.1.0             digest_0.6.33            
 [79] rsvd_1.0.5                parallelDist_0.2.6       
 [81] R6_2.5.1                  mime_0.12                
 [83] colorspace_2.1-0          scattermore_1.2          
 [85] tensor_1.5                spatstat.data_3.0-1      
 [87] utf8_1.2.3                tidyr_1.3.0              
 [89] generics_0.1.3            httr_1.4.6               
 [91] htmlwidgets_1.6.2         S4Arrays_1.2.0           
 [93] uwot_0.1.16               pkgconfig_2.0.3          
 [95] gtable_0.3.3              ComplexHeatmap_2.16.0    
 [97] lmtest_0.9-40             htmltools_0.5.8          
 [99] dotCall64_1.1-0           clue_0.3-64              
[101] SeuratObject_5.0.0        scales_1.3.0             
[103] png_0.1-8                 knitr_1.43               
[105] rstudioapi_0.15.0         reshape2_1.4.4           
[107] rjson_0.2.21              nlme_3.1-163             
[109] zoo_1.8-12                GlobalOptions_0.1.2      
[111] KernSmooth_2.23-22        parallel_4.3.2           
[113] miniUI_0.1.1.1            RcppZiggurat_0.1.6       
[115] pillar_1.9.0              grid_4.3.2               
[117] vctrs_0.6.3               RANN_2.6.1               
[119] promises_1.2.0.1          BiocSingular_1.16.0      
[121] beachmat_2.16.0           xtable_1.8-4             
[123] cluster_2.1.6             evaluate_0.21            
[125] cli_3.6.1                 compiler_4.3.2           
[127] rlang_1.1.1               crayon_1.5.2             
[129] future.apply_1.11.0       labeling_0.4.2           
[131] plyr_1.8.8                stringi_1.8.1            
[133] viridisLite_0.4.2         deldir_2.0-2             
[135] BiocParallel_1.34.2       assertthat_0.2.1         
[137] munsell_0.5.0             lazyeval_0.2.2           
[139] spatstat.geom_3.2-4       PCAtools_2.14.0          
[141] Matrix_1.6-3              RcppHNSW_0.6.0           
[143] patchwork_1.2.0           sparseMatrixStats_1.12.2 
[145] future_1.33.0             shiny_1.8.0              
[147] ROCR_1.0-11               Rfast_2.1.0              
[149] igraph_2.0.3              RcppParallel_5.1.7