Cortical cells DGE E17.5 Data-set Anaysis

library(ggplot2)
library(tibble)
library(zeallot)
library(COTAN)

options(parallelly.fork.enable = TRUE)

outDir <- "Data/Yuzwa_MouseCortex/"

setLoggingLevel(1)
setLoggingFile(file.path(outDir, "CorticalCells_GSM2861514_E175-analysis.log"))

Read the dataset from file

dataset <- read.csv(file.path(outDir <- "Data/Yuzwa_MouseCortex/"
, "GSM2861514_E175_Only_Cortical_Cells_DGE.txt"),
       header = TRUE, sep = "\t", strip.white = TRUE,row.names = 1)

Cleaning

Crate the COTAN object

sampleCondition <- "CorticalCells_GSM2861514_E175"
cc175Obj <- COTAN(raw = dataset)
cc175Obj <- initializeMetaDataset(cc175Obj,
                                  GEO = "GSM2861511_E135",
                                  sequencingMethod = "DropSeq",
                                  sampleCondition = sampleCondition)

Inspect cells’ sizes

cellSizePlot(cc175Obj)

Drop cells with too many ritz reads as they are probably duplets

cellsSizeThr <- 6500
cc175Obj <- addElementToMetaDataset(cc175Obj, "Cells size threshold", cellsSizeThr)

cells_to_rem <- getCells(cc175Obj)[getCellsSize(cc175Obj) > cellsSizeThr]
cc175Obj <- dropGenesCells(cc175Obj, cells = cells_to_rem)

cellSizePlot(cc175Obj)

Inspect the number of expressed genes per cell

genesSizePlot(cc175Obj)

Drop cells with too high genes expession as they are probably duplets

genesSizeThr <- 3000
cc175Obj <- addElementToMetaDataset(cc175Obj, "Num genes threshold", genesSizeThr)

numExprGenes <- getNumExpressedGenes(cc175Obj)
cells_to_rem <- names(numExprGenes)[numExprGenes > genesSizeThr]
cc175Obj <- dropGenesCells(cc175Obj, cells = cells_to_rem)

genesSizePlot(cc175Obj)

Check number of mitocondrial genes expressed in each cell

mitGenesPattern <- "^mt-"
getGenes(cc175Obj)[grep(mitGenesPattern, getGenes(cc175Obj))]
 [1] "mt-Co1"  "mt-Co2"  "mt-Co3"  "mt-Cytb" "mt-Nd1"  "mt-Nd2"  "mt-Nd4" 
 [8] "mt-Nd5"  "mt-Nd6"  "mt-Rnr1" "mt-Rnr2" "mt-Ta"   "mt-Tc"   "mt-Te"  
[15] "mt-Tf"   "mt-Ti"   "mt-Tl1"  "mt-Tl2"  "mt-Tm"   "mt-Tp"   "mt-Tq"  
[22] "mt-Ts2"  "mt-Tt"   "mt-Tv"   "mt-Tw"   "mt-Ty"  
c(mitPlot, mitSizes) %<-%
  mitochondrialPercentagePlot(cc175Obj, genePrefix = mitGenesPattern)

plot(mitPlot)

Cells with a too high percentage of mitocondrial genes are likely dead (or at the last problematic) cells. So we drop them!

mitPercThr <- 5.0
cc175Obj <- addElementToMetaDataset(cc175Obj, "Mitoc. perc. threshold", mitPercThr)

cells_to_rem <- rownames(mitSizes)[mitSizes[["mit.percentage"]] > mitPercThr]

cc175Obj <- dropGenesCells(cc175Obj, cells = cells_to_rem)

c(mitPlot, mitSizes) %<-%
  mitochondrialPercentagePlot(cc175Obj, genePrefix = mitGenesPattern)

plot(mitPlot)

Check no further outliers after all the culling

cellSizePlot(cc175Obj)

genesSizePlot(cc175Obj)

Clean: round 1

cc175Obj <- clean(cc175Obj)

c(pcaCellsPlot, pcaCellsData, genesPlot,
  UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(cc175Obj)

plot(pcaCellsPlot)

plot(genesPlot)

cc175Obj <- addElementToMetaDataset(cc175Obj, "Num drop B group", 0)

B group contains high number of hemoglobin genes: so they are not interesting

cells_to_rem <- rownames(pcaCellsData)[pcaCellsData[["groups"]] == "B"]

cc175Obj <- dropGenesCells(cc175Obj, cells = cells_to_rem)

Clean: round 2

cc175Obj <- clean(cc175Obj)

c(pcaCellsPlot, pcaCellsData, genesPlot,
  UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(cc175Obj)

plot(pcaCellsPlot)

plot(genesPlot)

cc175Obj <- addElementToMetaDataset(cc175Obj, "Num drop B group", 1)

Visualize if all is ok:

plot(pcaCellsData)

plot(UDEPlot)

plot(nuPlot)

plot(zoomedNuPlot)

Drop very low UDE cells as they are likely outliers

lowUDEThr <- 0.2
cc175Obj <- addElementToMetaDataset(cc175Obj, "Low UDE threshold", lowUDEThr)

nuDf <- data.frame("nu" = sort(getNu(cc175Obj)), "n" = seq_along(getNu(cc175Obj)))

cells_to_rem <- rownames(nuDf)[nuDf[["nu"]] < lowUDEThr]
cc175Obj <- dropGenesCells(cc175Obj, cells = cells_to_rem)

Final cleaning to check all is OK

cc175Obj <- clean(cc175Obj)

c(pcaCellsPlot, pcaCellsData, genesPlot,
  UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(cc175Obj)

plot(pcaCellsPlot)

plot(genesPlot)

plot(UDEPlot)

plot(nuPlot)

plot(cellSizePlot(cc175Obj))

plot(genesSizePlot(cc175Obj))

Calculate genes’ COEX

cc175Obj <- proceedToCoex(cc175Obj, calcCoex = TRUE, cores = 12,
                          saveObj = TRUE, outDir = outDir)

GDI

gdiData <- calculateGDI(cc175Obj)

genesToLabel <- head(rownames(gdiData[order(gdiData[["GDI"]],
                                            decreasing = TRUE), ]), n = 10L)

genesToLabel
 [1] "Ednrb"   "Mfge8"   "Aldoc"   "Sox2"    "Vim"     "Hes5"    "Neurod6"
 [8] "Tubb3"   "Ddah1"   "Atp1a2" 
gdiPlot <- GDIPlot(cc175Obj, GDIIn = gdiData, GDIThreshold = 1.4,
                   genes = list("Top 10 GDI genes" = genesToLabel))

plot(gdiPlot)

Save the COTAN object

saveRDS(cc175Obj, file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS")))

Consistent Transcript Cohorts (clustering)

c(splitClusters, splitCoexDF) %<-%
  cellsUniformClustering(cc175Obj, GDIThreshold = 1.4, cores = 13,
                         saveObj = TRUE, outDir = outDir)


cc175Obj <- addClusterization(cc175Obj, clName = "split",
                              clusters = splitClusters,
                              coexDF = splitCoexDF, override = TRUE)

table(splitClusters)
saveRDS(cc175Obj, file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS")))
c(mergedClusters, mergedCoexDF) %<-%
  mergeUniformCellsClusters(cc175Obj, clusters = splitClusters,
                            GDIThreshold = 1.4, cores = 13,
                            saveObj = TRUE, outDir = outDir)

cc175Obj <- addClusterization(cc175Obj, clName = "merge",
                              clusters = mergedClusters,
                              coexDF = mergedCoexDF,
                              override = TRUE)

table(mergedClusters)
saveRDS(cc175Obj, file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS")))

Sys.time()
[1] "2023-08-22 19:46:59 CEST"
sessionInfo()
R version 4.3.1 (2023-06-16)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0 
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0

locale:
 [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
 [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
 [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
[10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   

time zone: Europe/Rome
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] COTAN_2.1.7   zeallot_0.1.0 tibble_3.2.1  ggplot2_3.4.2

loaded via a namespace (and not attached):
  [1] RColorBrewer_1.1-3     rstudioapi_0.15.0      jsonlite_1.8.7        
  [4] shape_1.4.6            umap_0.2.10.0          magrittr_2.0.3        
  [7] spatstat.utils_3.0-3   farver_2.1.1           rmarkdown_2.24        
 [10] GlobalOptions_0.1.2    vctrs_0.6.3            ROCR_1.0-11           
 [13] spatstat.explore_3.2-1 askpass_1.1            htmltools_0.5.5       
 [16] sctransform_0.3.5      parallelly_1.36.0      KernSmooth_2.23-22    
 [19] htmlwidgets_1.6.2      ica_1.0-3              plyr_1.8.8            
 [22] plotly_4.10.2          zoo_1.8-12             igraph_1.5.1          
 [25] mime_0.12              lifecycle_1.0.3        iterators_1.0.14      
 [28] pkgconfig_2.0.3        Matrix_1.6-0           R6_2.5.1              
 [31] fastmap_1.1.1          fitdistrplus_1.1-11    future_1.33.0         
 [34] shiny_1.7.5            clue_0.3-64            digest_0.6.33         
 [37] colorspace_2.1-0       patchwork_1.1.2        S4Vectors_0.38.1      
 [40] Seurat_4.3.0.1         tensor_1.5             RSpectra_0.16-1       
 [43] irlba_2.3.5.1          labeling_0.4.2         progressr_0.14.0      
 [46] RcppZiggurat_0.1.6     fansi_1.0.4            spatstat.sparse_3.0-2 
 [49] httr_1.4.6             polyclip_1.10-4        abind_1.4-5           
 [52] compiler_4.3.1         withr_2.5.0            doParallel_1.0.17     
 [55] viridis_0.6.4          dendextend_1.17.1      MASS_7.3-60           
 [58] openssl_2.1.0          rjson_0.2.21           tools_4.3.1           
 [61] lmtest_0.9-40          httpuv_1.6.11          future.apply_1.11.0   
 [64] goftest_1.2-3          glue_1.6.2             nlme_3.1-162          
 [67] promises_1.2.0.1       grid_4.3.1             Rtsne_0.16            
 [70] cluster_2.1.4          reshape2_1.4.4         generics_0.1.3        
 [73] gtable_0.3.3           spatstat.data_3.0-1    tidyr_1.3.0           
 [76] data.table_1.14.8      sp_2.0-0               utf8_1.2.3            
 [79] BiocGenerics_0.46.0    spatstat.geom_3.2-4    RcppAnnoy_0.0.21      
 [82] ggrepel_0.9.3          RANN_2.6.1             foreach_1.5.2         
 [85] pillar_1.9.0           stringr_1.5.0          later_1.3.1           
 [88] circlize_0.4.15        splines_4.3.1          dplyr_1.1.2           
 [91] lattice_0.21-8         survival_3.5-5         deldir_1.0-9          
 [94] tidyselect_1.2.0       ComplexHeatmap_2.16.0  miniUI_0.1.1.1        
 [97] pbapply_1.7-2          knitr_1.43             gridExtra_2.3         
[100] IRanges_2.34.1         scattermore_1.2        stats4_4.3.1          
[103] xfun_0.39              factoextra_1.0.7       matrixStats_1.0.0     
[106] stringi_1.7.12         lazyeval_0.2.2         yaml_2.3.7            
[109] evaluate_0.21          codetools_0.2-19       cli_3.6.1             
[112] RcppParallel_5.1.7     uwot_0.1.16            xtable_1.8-4          
[115] reticulate_1.30        munsell_0.5.0          Rcpp_1.0.11           
[118] globals_0.16.2         spatstat.random_3.1-5  png_0.1-8             
[121] parallel_4.3.1         Rfast_2.0.8            ellipsis_0.3.2        
[124] assertthat_0.2.1       parallelDist_0.2.6     listenv_0.9.0         
[127] ggthemes_4.2.4         viridisLite_0.4.2      scales_1.2.1          
[130] ggridges_0.5.4         SeuratObject_4.1.3     leiden_0.4.3          
[133] purrr_1.0.1            crayon_1.5.2           GetoptLong_1.0.5      
[136] rlang_1.1.1            cowplot_1.1.1