Cortical cells DGE E13.5 Data-set Anaysis

library(ggplot2)
library(tibble)
library(zeallot)
library(COTAN)

options(parallelly.fork.enable = TRUE)

outDir <- "Data/Yuzwa_MouseCortex/"

setLoggingLevel(1)
setLoggingFile(file.path(outDir, "CorticalCells_GSM2861511_E135-analysis.log"))

Read the dataset from file

dataset <- read.csv(file.path("Data/Yuzwa_MouseCortex/", "GSM2861511_E135_Only_Cortical_Cells_DGE.txt.gz"),
                    header = TRUE, sep = "\t", strip.white = TRUE,
                    row.names = 1)

Cleaning

Crate the COTAN object

sampleCondition <- "CorticalCells_GSM2861511_E135"
cc135Obj <- COTAN(raw = dataset)
cc135Obj <- initializeMetaDataset(cc135Obj,
                                  GEO = "GSM2861511_E135",
                                  sequencingMethod = "Drop_seq",
                                  sampleCondition = sampleCondition)

Inspect cells’ sizes

cellSizePlot(cc135Obj)

Drop cells with too many ritz reads as they are probably duplets

cellsSizeThr <- 8000
cc135Obj <- addElementToMetaDataset(cc135Obj, "Cells size threshold", cellsSizeThr)

cells_to_rem <- getCells(cc135Obj)[getCellsSize(cc135Obj) > cellsSizeThr]
cc135Obj <- dropGenesCells(cc135Obj, cells = cells_to_rem)

cellSizePlot(cc135Obj)

Inspect the number of expressed genes per cell

genesSizePlot(cc135Obj)

Drop cells with too high genes expession as they are probably duplets

genesSizeThr <- 3300
cc135Obj <- addElementToMetaDataset(cc135Obj, "Num genes threshold", genesSizeThr)

numExprGenes <- getNumExpressedGenes(cc135Obj)
cells_to_rem <- names(numExprGenes)[numExprGenes > genesSizeThr]
cc135Obj <- dropGenesCells(cc135Obj, cells = cells_to_rem)

genesSizePlot(cc135Obj)

Check number of mitocondrial genes expressed in each cell

mitGenesPattern <- "^mt-"
getGenes(cc135Obj)[grep(mitGenesPattern, getGenes(cc135Obj))]

 [1] "mt-Co1"  "mt-Co3"  "mt-Cytb" "mt-Nd1"  "mt-Nd2"  "mt-Nd4"  "mt-Nd5" 
 [8] "mt-Nd6"  "mt-Rnr1" "mt-Rnr2" "mt-Ta"   "mt-Tc"   "mt-Te"   "mt-Tf"  
[15] "mt-Ti"   "mt-Tl1"  "mt-Tl2"  "mt-Tm"   "mt-Tn"   "mt-Tp"   "mt-Tq"  
[22] "mt-Ts2"  "mt-Tt"   "mt-Tv"   "mt-Tw"

c(mitPlot, mitSizes) %<-%
  mitochondrialPercentagePlot(cc135Obj, genePrefix = mitGenesPattern)

plot(mitPlot)

Cells with a too high percentage of mitocondrial genes are likely dead (or at the last problematic) cells. So we drop them!

mitPercThr <- 10.0
cc135Obj <- addElementToMetaDataset(cc135Obj, "Mitoc. perc. threshold", mitPercThr)

cells_to_rem <- rownames(mitSizes)[mitSizes[["mit.percentage"]] > mitPercThr]

cc135Obj <- dropGenesCells(cc135Obj, cells = cells_to_rem)

c(mitPlot, mitSizes) %<-%
  mitochondrialPercentagePlot(cc135Obj, genePrefix = mitGenesPattern)

plot(mitPlot)

Check no further outliers after all the culling

cellSizePlot(cc135Obj)

genesSizePlot(cc135Obj)

Clean: round 1

cc135Obj <- clean(cc135Obj)

c(pcaCellsPlot, pcaCellsData, genesPlot,
  UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(cc135Obj)

plot(pcaCellsPlot)

plot(genesPlot)

B group contains high number of hemoglobin genes: so they are not interesting

cells_to_rem <- rownames(pcaCellsData)[pcaCellsData[["groups"]] == "B"]

cc135Obj <- dropGenesCells(cc135Obj, cells = cells_to_rem)

Clean: round 2

cc135Obj <- clean(cc135Obj)

c(pcaCellsPlot, pcaCellsData, genesPlot,
  UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(cc135Obj)

plot(pcaCellsPlot)

plot(pcaCellsData)

plot(genesPlot)

B group contains just 3 cells quite different in the 3rd and 4th components: better to drop them

cells_to_rem <- rownames(pcaCellsData)[pcaCellsData[["groups"]] == "B"]

cc135Obj <- dropGenesCells(cc135Obj, cells = cells_to_rem)

Clean: round 3

cc135Obj <- clean(cc135Obj)

c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(cc135Obj)

plot(pcaCellsPlot)

plot(genesPlot)

cc135Obj <- addElementToMetaDataset(cc135Obj, "Num drop B group", 2)

Visualize if all is ok:

plot(UDEPlot)

plot(nuPlot)

plot(zoomedNuPlot)

Drop very low UDE cells as they are likely outliers

lowUDEThr <- 0.14 # the threshold to remove low UDE cells

cc135Obj <- addElementToMetaDataset(cc135Obj, "Low UDE threshold", lowUDEThr)

nuDf <- data.frame("nu" = sort(getNu(cc135Obj)), "n" = seq_along(getNu(cc135Obj)))


cells_to_rem <- rownames(nuDf)[nuDf[["nu"]] < lowUDEThr]
cc135Obj <- dropGenesCells(cc135Obj, cells = cells_to_rem)

Final cleaning to check all is OK

cc135Obj <- clean(cc135Obj)

c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(cc135Obj)

plot(pcaCellsPlot)

plot(genesPlot)

plot(UDEPlot)

plot(nuPlot)

plot(zoomedNuPlot)

plot(cellSizePlot(cc135Obj))

plot(genesSizePlot(cc135Obj))

cc135Obj <- proceedToCoex(cc135Obj, calcCoex = TRUE, cores = 12,
                          saveObj = TRUE, outDir = outDir)

Save the COTAN object

saveRDS(cc135Obj, file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS")))

GDI

gdiData <- calculateGDI(cc135Obj)

genesToLabel <- head(rownames(gdiData[order(gdiData[["GDI"]],
                                            decreasing = TRUE), ]), n = 10L)

genesToLabel

 [1] "Neurod6"       "2610017I09Rik" "Nr2e1"         "Ina"          
 [5] "Sox2"          "Stmn2"         "Mdk"           "Mapt"         
 [9] "Gas1"          "2810417H13Rik"

gdiPlot <- GDIPlot(cc135Obj, GDIIn = gdiData, GDIThreshold = 1.4,
                   genes = list("Top 10 GDI genes" = genesToLabel))

plot(gdiPlot)

Consistent Transcript Cohorts

c(splitClusters, splitCoexDF) %<-% 
  cellsUniformClustering(cc135Obj, GDIThreshold = 1.4, cores = 13,
                         saveObj = TRUE, outDir = outDir)


cc135Obj <- addClusterization(cc135Obj, clName = "split",
                              clusters = splitClusters,
                              coexDF = splitCoexDF, override = TRUE)

table(splitClusters)

splitClusters
 -1  01  02  03  04  05  06  07  08  09  10  11  12  13  14 
  6  94 106  47  69 127  92  11  25  12  41 113  59 161 149

saveRDS(cc135Obj, file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS")))

c(mergedClusters, mergedCoexDF) %<-%
  mergeUniformCellsClusters(cc135Obj, clusters = splitClusters,
                            GDIThreshold = 1.4, cores = 13,
                            saveObj = TRUE, outDir = outDir)

cc135Obj <- addClusterization(cc135Obj, clName = "merge",
                              clusters = mergedClusters,
                              coexDF = mergedCoexDF,
                              override = TRUE)

table(mergedClusters)

mergedClusters
  1   2   3   4   5   6   7   8   9 
 94 153 161 127 113 102  52 161 149

saveRDS(cc135Obj, file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS")))

Sys.time()

[1] "2023-08-22 17:55:23 CEST"

sessionInfo()

R version 4.3.1 (2023-06-16)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0 
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0

locale:
 [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
 [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
 [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
[10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   

time zone: Europe/Rome
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] COTAN_2.1.7   zeallot_0.1.0 tibble_3.2.1  ggplot2_3.4.2

loaded via a namespace (and not attached):
  [1] RColorBrewer_1.1-3     rstudioapi_0.15.0      jsonlite_1.8.7        
  [4] shape_1.4.6            umap_0.2.10.0          magrittr_2.0.3        
  [7] spatstat.utils_3.0-3   farver_2.1.1           rmarkdown_2.24        
 [10] GlobalOptions_0.1.2    vctrs_0.6.3            ROCR_1.0-11           
 [13] spatstat.explore_3.2-1 askpass_1.1            htmltools_0.5.5       
 [16] sctransform_0.3.5      parallelly_1.36.0      KernSmooth_2.23-22    
 [19] htmlwidgets_1.6.2      ica_1.0-3              plyr_1.8.8            
 [22] plotly_4.10.2          zoo_1.8-12             igraph_1.5.1          
 [25] mime_0.12              lifecycle_1.0.3        iterators_1.0.14      
 [28] pkgconfig_2.0.3        Matrix_1.6-0           R6_2.5.1              
 [31] fastmap_1.1.1          fitdistrplus_1.1-11    future_1.33.0         
 [34] shiny_1.7.5            clue_0.3-64            digest_0.6.33         
 [37] colorspace_2.1-0       patchwork_1.1.2        S4Vectors_0.38.1      
 [40] Seurat_4.3.0.1         tensor_1.5             RSpectra_0.16-1       
 [43] irlba_2.3.5.1          labeling_0.4.2         progressr_0.14.0      
 [46] RcppZiggurat_0.1.6     fansi_1.0.4            spatstat.sparse_3.0-2 
 [49] httr_1.4.6             polyclip_1.10-4        abind_1.4-5           
 [52] compiler_4.3.1         withr_2.5.0            doParallel_1.0.17     
 [55] viridis_0.6.4          dendextend_1.17.1      MASS_7.3-60           
 [58] openssl_2.1.0          rjson_0.2.21           tools_4.3.1           
 [61] lmtest_0.9-40          httpuv_1.6.11          future.apply_1.11.0   
 [64] goftest_1.2-3          glue_1.6.2             nlme_3.1-162          
 [67] promises_1.2.0.1       grid_4.3.1             Rtsne_0.16            
 [70] cluster_2.1.4          reshape2_1.4.4         generics_0.1.3        
 [73] gtable_0.3.3           spatstat.data_3.0-1    tidyr_1.3.0           
 [76] data.table_1.14.8      sp_2.0-0               utf8_1.2.3            
 [79] BiocGenerics_0.46.0    spatstat.geom_3.2-4    RcppAnnoy_0.0.21      
 [82] ggrepel_0.9.3          RANN_2.6.1             foreach_1.5.2         
 [85] pillar_1.9.0           stringr_1.5.0          later_1.3.1           
 [88] circlize_0.4.15        splines_4.3.1          dplyr_1.1.2           
 [91] lattice_0.21-8         survival_3.5-5         deldir_1.0-9          
 [94] tidyselect_1.2.0       ComplexHeatmap_2.16.0  miniUI_0.1.1.1        
 [97] pbapply_1.7-2          knitr_1.43             gridExtra_2.3         
[100] IRanges_2.34.1         scattermore_1.2        stats4_4.3.1          
[103] xfun_0.39              factoextra_1.0.7       matrixStats_1.0.0     
[106] stringi_1.7.12         lazyeval_0.2.2         yaml_2.3.7            
[109] evaluate_0.21          codetools_0.2-19       cli_3.6.1             
[112] RcppParallel_5.1.7     uwot_0.1.16            xtable_1.8-4          
[115] reticulate_1.30        munsell_0.5.0          Rcpp_1.0.11           
[118] globals_0.16.2         spatstat.random_3.1-5  png_0.1-8             
[121] parallel_4.3.1         Rfast_2.0.8            ellipsis_0.3.2        
[124] assertthat_0.2.1       parallelDist_0.2.6     listenv_0.9.0         
[127] ggthemes_4.2.4         viridisLite_0.4.2      scales_1.2.1          
[130] ggridges_0.5.4         SeuratObject_4.1.3     leiden_0.4.3          
[133] purrr_1.0.1            crayon_1.5.2           GetoptLong_1.0.5      
[136] rlang_1.1.1            cowplot_1.1.1