Gene Correlation Analysis E14.5 Mouse Cortex

Prologue

library(COTAN)
library(ComplexHeatmap)
library(circlize)
library(dplyr)
library(Hmisc)
library(Seurat)
library(patchwork)
library(Rfast)
library(parallel)
library(doParallel)
library(HiClimR)
library(stringr)
library(fst)

options(parallelly.fork.enable = TRUE)

dataSetFile <- "Data/MouseCortex/MouseCortex_E14.5.cotan.RDS"
name <- str_split(dataSetFile,pattern = "/",simplify = T)[3]
name <- str_remove(name,pattern = ".RDS")

project = "E14.5"


setLoggingLevel(1)
outDir <- "CoexData/"
setLoggingFile(paste0(outDir, "Logs/",name,".log"))

obj <- readRDS(dataSetFile)
file_code = getMetadataElement(obj, datasetTags()[["cond"]])
source("src/Functions.R")

To compare the ability of COTAN to asses the real correlation between genes we define some pools of genes:

  1. Constitutive genes
  2. Neural progenitor genes
  3. Pan neuronal genes
  4. Some layer marker genes
genesList <- list(
  "NPGs"= 
    c("Nes", "Vim", "Sox2", "Sox1", "Notch1", "Hes1", "Hes5", "Pax6"),
  "PNGs"= 
    c("Map2", "Tubb3", "Neurod1", "Nefm", "Nefl", "Dcx", "Tbr1"),
  "hk"= 
    c("Calm1", "Cox6b1", "Ppia", "Rpl18", "Cox7c", "Erh", "H3f3a",
      "Taf1", "Taf2", "Gapdh", "Actb", "Golph3", "Zfr", "Sub1",
      "Tars", "Amacr"),
  "layers" = 
    c("Reln","Lhx5","Cux1","Satb2","Tle1","Mef2c","Rorb","Sox5","Bcl11b","Fezf2","Foxp2","Ntf3","Rasgrf2","Pvrl3", "Cux2","Slc17a6", "Sema3c","Thsd7a", "Sulf2", "Kcnk2","Grik3", "Etv1", "Tle4", "Tmem200a", "Glra2", "Etv1","Htr1f", "Sulf1","Rxfp1", "Syt6") 
  # From https://www.science.org/doi/10.1126/science.aam8999
)

COTAN

int.genes <-getGenes(obj)
coexMat.big <- getGenesCoex(obj)

coexMat <- getGenesCoex(obj)[c(genesList$NPGs,genesList$hk,genesList$PNGs),c(genesList$NPGs,genesList$hk,genesList$PNGs)]

f1 = colorRamp2(seq(-0.5,0.5, length = 3), c("#DC0000B2", "white","#3C5488B2" ))

split.genes <- factor(c(rep("NPGs",length(genesList[["NPGs"]])),
                         rep("HK",length(genesList[["hk"]])),
                         rep("PNGs",length(genesList[["PNGs"]]))
                        ),
                         levels = c("NPGs","HK","PNGs"))

lgd = Legend(col_fun = f1, title = "COTAN coex")

htmp <- Heatmap(as.matrix(coexMat),
        #width = ncol(coexMat)*unit(2.5, "mm"), 
        height = nrow(coexMat)*unit(3, "mm"),
        cluster_rows = FALSE,
        cluster_columns = FALSE,
        col = f1,
        row_names_side = "left",
        row_names_gp = gpar(fontsize = 11),
        column_names_gp  = gpar(fontsize = 11),
        column_split = split.genes,
        row_split = split.genes,
        cluster_row_slices = FALSE, 
    cluster_column_slices = FALSE,
    heatmap_legend_param = list(
        title = "COTAN coex", at = c(-0.5, 0, 0.5),direction = "horizontal",
        labels = c("-0.5", "0", "0.5")
    )
   )
draw(htmp, heatmap_legend_side="right")

GDI_DF <- calculateGDI(obj)
GDI_DF$geneType <- NA
for (cat in names(genesList)) {
  GDI_DF[rownames(GDI_DF) %in% genesList[[cat]],]$geneType <- cat
}

GDI_DF$GDI_centered <- scale(GDI_DF$GDI,center = T,scale = T)

write.csv(GDI_DF,paste0("CoexData/","Variance_GDI_genes",file_code,".csv"))


GDI_DF[unlist(genesList),]
         sum.raw.norm      GDI  exp.cells geneType GDI_centered
Nes          8.839957 4.678500 27.7337997     NPGs   3.63695691
Vim         10.246463 4.534465 51.0309278     NPGs   3.42694609
Sox2         9.124228 4.566666 31.4709131     NPGs   3.47389659
Sox1         7.368458 3.455731  9.5636966     NPGs   1.85409012
Notch1       7.469284 4.355698 11.6623711     NPGs   3.16629269
Hes1         7.698079 4.634264 10.5946244     NPGs   3.57245806
Hes5         9.405265 4.812530 22.7908689     NPGs   3.83238101
Pax6         8.144613 4.367938 17.3877025     NPGs   3.18413945
Map2        10.172685 3.768818 72.7172312     PNGs   2.31058907
Tubb3       11.537925 4.586213 88.3836524     PNGs   3.50239807
Neurod1      8.881391 3.388955 22.9657585     PNGs   1.75672703
Nefm         8.288330 3.346732 17.5441826     PNGs   1.69516332
Nefl         7.678477 3.544436  8.6340206     PNGs   1.98342736
Dcx         10.120356 4.821636 66.9090574     PNGs   3.84565720
Tbr1         8.472723 3.926272 24.1715758     PNGs   2.54016525
Calm1       11.491577 1.857652 96.5114138       hk  -0.47600225
Cox6b1      10.398257 1.992891 84.6833579       hk  -0.27881530
Ppia         7.666735 2.386108 17.4797496       hk   0.29451755
Rpl18        9.176712 2.575898 51.5187776       hk   0.57124278
Cox7c        8.439961 1.902976 31.8206922       hk  -0.40991709
Erh          6.593118 2.307720  6.5721649       hk   0.18022355
H3f3a        9.388420 1.886267 58.1093520       hk  -0.43428010
Taf1         8.315368 2.138643 26.8869661       hk  -0.06630076
Taf2         8.017975 1.861938 21.1432253       hk  -0.46975224
Gapdh        5.018736 1.853094  1.5648012       hk  -0.48264772
Actb        12.609181 1.492016 99.6778351       hk  -1.00911996
Golph3       7.150183 1.851371 10.8891753       hk  -0.48515921
Zfr          9.269238 2.273001 50.7916053       hk   0.12960056
Sub1        10.018254 2.583820 74.7238586       hk   0.58279347
Tars         8.056910 2.487556 22.8645066       hk   0.44243393
Amacr        5.492910 1.714319  2.2551546       hk  -0.68499008
Reln         7.095360 3.011725  4.5563328   layers   1.20670295
Lhx5         6.037778 2.901306  2.0434462   layers   1.04570542
Cux1         8.597262 3.026589 31.3512518   layers   1.22837610
Satb2        7.603191 3.121471 11.0916789   layers   1.36671855
Tle1         7.891909 2.322600 17.8111193   layers   0.20191897
Mef2c        8.776162 4.057560 23.6377025   layers   2.73159053
Rorb         6.523092 1.943768  4.6023564   layers  -0.35043950
Sox5         9.301863 3.584350 38.2639912   layers   2.04162343
Bcl11b       9.860621 4.189936 53.4057437   layers   2.92460213
Fezf2        8.804060 3.630239 28.8751841   layers   2.10853283
Foxp2        7.474618 2.254280  8.4867452   layers   0.10230519
Ntf3         4.673267 2.314479  0.8560383   layers   0.19007802
NA                 NA       NA         NA     <NA>           NA
Pvrl3        7.255727 3.417288 10.6866716   layers   1.79803751
Cux2         7.652690 3.088322 11.2573638   layers   1.31838644
Slc17a6      7.901396 2.998305 14.0648012   layers   1.18713575
Sema3c       7.222013 2.481806  7.6307069   layers   0.43405122
Thsd7a       7.472372 3.054089  8.0449190   layers   1.26847311
Sulf2        6.491056 2.379550  4.6759941   layers   0.28495572
Kcnk2        7.151877 1.901258  9.0390280   layers  -0.41242163
Grik3        6.673233 2.781234  5.3019146   layers   0.87063375
Etv1         6.743118 2.264998  4.8232695   layers   0.11793173
Tle4         7.880978 2.635418 15.0405007   layers   0.65802538
Tmem200a     6.188517 2.926026  3.8751841   layers   1.08174899
Glra2        7.057638 3.858495  8.1369661   layers   2.44134235
Etv1.1       6.743118 2.264998  4.8232695   layers   0.11793173
NA.1               NA       NA         NA     <NA>           NA
Sulf1        5.465717 2.461515  1.0125184   layers   0.40446505
NA.2               NA       NA         NA     <NA>           NA
Syt6         7.555334 3.052113  8.8457290   layers   1.26559064
GDIPlot(obj,GDIIn = GDI_DF, genes = genesList,GDIThreshold = 1.4)

Seurat correlation

srat<- CreateSeuratObject(counts = getRawData(obj), 
                          project = project, 
                          min.cells = 3, 
                          min.features = 200)
srat[["percent.mt"]] <- PercentageFeatureSet(srat, pattern = "^mt-")
srat <- NormalizeData(srat)
srat <- FindVariableFeatures(srat, selection.method = "vst", nfeatures = 2000)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(srat)

plot1$data$centered_variance <- scale(plot1$data$variance.standardized,
                                      center = T,scale = F)

write.csv(plot1$data,paste0("CoexData/",
                            "Variance_Seurat_genes",
                            getMetadataElement(obj, 
                                               datasetTags()[["cond"]]),".csv"))

LabelPoints(plot = plot1, points = c(genesList$NPGs,genesList$PNGs,genesList$layers), repel = TRUE)

LabelPoints(plot = plot1, points = c(genesList$hk), repel = TRUE)

all.genes <- rownames(srat)
srat <- ScaleData(srat, features = all.genes)
seurat.data = GetAssayData(srat[["RNA"]],layer = "data")
corr.pval.list <- correlation_pvalues(data = seurat.data,
                                      int.genes,
                                      n.cells = getNumCells(obj))

seurat.data.cor.big <- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))

htmp <- correlation_plot(seurat.data.cor.big, 
                         genesList, title="Seurat corr")


p_values.fromSeurat <- corr.pval.list$p_values
seurat.data.cor.big <- corr.pval.list$data.cor

rm(corr.pval.list)
gc()
            used   (Mb) gc trigger    (Mb)   max used    (Mb)
Ncells   7120021  380.3   11454531   611.8   11454531   611.8
Vcells 940272073 7173.8 1885882011 14388.2 1571501676 11989.7
draw(htmp, heatmap_legend_side="right")

Seurat SC Transform

srat <-  SCTransform(srat, 
                     method = "glmGamPoi", 
                     vars.to.regress = "percent.mt", 
                     verbose = FALSE)

seurat.data <- GetAssayData(srat[["SCT"]],layer = "data")

#Remove genes with all zeros
seurat.data <-seurat.data[rowSums(seurat.data) > 0,]


corr.pval.list <- correlation_pvalues(seurat.data,
                                      int.genes,
                                      n.cells = getNumCells(obj))

seurat.data.cor.big <- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))

htmp <- correlation_plot(seurat.data.cor.big, 
                         genesList, title="Seurat corr SCT")



p_values.fromSeurat <- corr.pval.list$p_values
seurat.data.cor.big <- corr.pval.list$data.cor

rm(corr.pval.list)
gc()
             used   (Mb) gc trigger    (Mb)   max used    (Mb)
Ncells    8854790  472.9   16718127   892.9   12172573   650.1
Vcells 1002434648 7648.0 2263138413 17266.4 1885879002 14388.2
draw(htmp, heatmap_legend_side="right")

Monocle

library(monocle3)
cds <- new_cell_data_set(getRawData(obj),
                         cell_metadata = getMetadataCells(obj),
                         gene_metadata = getMetadataGenes(obj)
                         )
cds <- preprocess_cds(cds, num_dim = 100)

normalized_counts <- normalized_counts(cds)
#Remove genes with all zeros
normalized_counts <- normalized_counts[rowSums(normalized_counts) > 0,]


corr.pval.list <- correlation_pvalues(normalized_counts,
                                      int.genes,
                                      n.cells = getNumCells(obj))

rm(normalized_counts)

monocle.data.cor.big <- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))

htmp <- correlation_plot(data.cor.big = monocle.data.cor.big,
                         genesList,
                         title = "Monocle corr")


p_values.from.monocle <- corr.pval.list$p_values
monocle.data.cor.big <- corr.pval.list$data.cor

rm(corr.pval.list)
gc()
             used   (Mb) gc trigger    (Mb)   max used    (Mb)
Ncells    9966472  532.3   16718127   892.9   16718127   892.9
Vcells 1006589306 7679.7 2358214745 17991.8 2358214745 17991.8
draw(htmp, heatmap_legend_side="right")

ScanPy

library(reticulate)

dirOutScP <- paste0("CoexData/ScanPy/")
if (!dir.exists(dirOutScP)) {
  dir.create(dirOutScP)
}

Sys.setenv(RETICULATE_PYTHON = "../../../bin/python3")
  py <- import("sys")
  
  source_python("src/scanpyGenesExpression.py")
  scanpyFDR(getRawData(obj), 
                  getMetadataCells(obj), 
                  getMetadataGenes(obj), 
                   "mt", 
                   dirOutScP, 
                   file_code,
            int.genes)

normalized_counts <- read.csv(paste0(dirOutScP,
                                     file_code,"_Scampy_expression_all_genes.gz"),header = T,row.names = 1)

normalized_counts <- t(normalized_counts)
#Remove genes with all zeros
normalized_counts <-normalized_counts[rowSums(normalized_counts) > 0,]


corr.pval.list <- correlation_pvalues(normalized_counts,
                                      int.genes,
                                      n.cells = getNumCells(obj))

ScanPy.data.cor.big <- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))


htmp <- correlation_plot(data.cor.big = ScanPy.data.cor.big,
                         genesList,
                         title = "ScanPy corr")

p_values.from.ScanPy <- corr.pval.list$p_values
ScanPy.data.cor.big <- corr.pval.list$data.cor

rm(corr.pval.list)
gc()
             used   (Mb) gc trigger    (Mb)   max used    (Mb)
Ncells    9990389  533.6   16718127   892.9   16718127   892.9
Vcells 1104918338 8429.9 2358214745 17991.8 2358214745 17991.8
draw(htmp, heatmap_legend_side="right")


Sys.time()
[1] "2024-05-08 19:37:36 CEST"
sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3;  LAPACK version 3.9.0

locale:
[1] C.UTF-8

time zone: Europe/Rome
tzcode source: system (glibc)

attached base packages:
 [1] stats4    parallel  grid      stats     graphics  grDevices utils    
 [8] datasets  methods   base     

other attached packages:
 [1] reticulate_1.36.1           monocle3_1.3.4             
 [3] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.2
 [5] GenomicRanges_1.52.0        GenomeInfoDb_1.36.1        
 [7] IRanges_2.34.1              S4Vectors_0.38.1           
 [9] MatrixGenerics_1.12.3       matrixStats_1.2.0          
[11] Biobase_2.60.0              BiocGenerics_0.46.0        
[13] fstcore_0.9.18              fst_0.9.8                  
[15] stringr_1.5.0               HiClimR_2.2.1              
[17] doParallel_1.0.17           iterators_1.0.14           
[19] foreach_1.5.2               Rfast_2.1.0                
[21] RcppParallel_5.1.7          RcppZiggurat_0.1.6         
[23] Rcpp_1.0.11                 patchwork_1.2.0            
[25] Seurat_5.0.0                SeuratObject_5.0.0         
[27] sp_2.1-1                    Hmisc_5.1-0                
[29] dplyr_1.1.2                 circlize_0.4.15            
[31] ComplexHeatmap_2.16.0       COTAN_2.5.0                

loaded via a namespace (and not attached):
  [1] RcppAnnoy_0.0.21          splines_4.3.2            
  [3] later_1.3.1               bitops_1.0-7             
  [5] tibble_3.2.1              polyclip_1.10-4          
  [7] rpart_4.1.23              fastDummies_1.7.3        
  [9] lifecycle_1.0.3           globals_0.16.2           
 [11] lattice_0.22-5            MASS_7.3-60              
 [13] backports_1.4.1           dendextend_1.17.1        
 [15] magrittr_2.0.3            plotly_4.10.2            
 [17] rmarkdown_2.24            yaml_2.3.7               
 [19] httpuv_1.6.11             glmGamPoi_1.12.2         
 [21] sctransform_0.4.1         spam_2.10-0              
 [23] askpass_1.2.0             spatstat.sparse_3.0-2    
 [25] minqa_1.2.5               cowplot_1.1.1            
 [27] pbapply_1.7-2             RColorBrewer_1.1-3       
 [29] zlibbioc_1.46.0           abind_1.4-5              
 [31] Rtsne_0.17                purrr_1.0.1              
 [33] RCurl_1.98-1.12           nnet_7.3-19              
 [35] GenomeInfoDbData_1.2.10   ggrepel_0.9.5            
 [37] irlba_2.3.5.1             listenv_0.9.0            
 [39] spatstat.utils_3.0-3      terra_1.7-39             
 [41] umap_0.2.10.0             goftest_1.2-3            
 [43] RSpectra_0.16-1           spatstat.random_3.2-1    
 [45] dqrng_0.3.0               fitdistrplus_1.1-11      
 [47] parallelly_1.37.1         DelayedMatrixStats_1.22.5
 [49] ncdf4_1.22                leiden_0.4.3             
 [51] codetools_0.2-19          DelayedArray_0.26.7      
 [53] tidyselect_1.2.0          shape_1.4.6              
 [55] farver_2.1.1              lme4_1.1-34              
 [57] ScaledMatrix_1.8.1        viridis_0.6.4            
 [59] base64enc_0.1-3           spatstat.explore_3.2-1   
 [61] jsonlite_1.8.7            GetoptLong_1.0.5         
 [63] ellipsis_0.3.2            progressr_0.14.0         
 [65] Formula_1.2-5             ggridges_0.5.4           
 [67] survival_3.5-8            tools_4.3.2              
 [69] ica_1.0-3                 glue_1.7.0               
 [71] gridExtra_2.3             xfun_0.39                
 [73] ggthemes_5.1.0            withr_3.0.0              
 [75] fastmap_1.1.1             boot_1.3-28              
 [77] fansi_1.0.4               openssl_2.1.0            
 [79] digest_0.6.33             rsvd_1.0.5               
 [81] parallelDist_0.2.6        R6_2.5.1                 
 [83] mime_0.12                 colorspace_2.1-0         
 [85] Cairo_1.6-1               scattermore_1.2          
 [87] tensor_1.5                spatstat.data_3.0-1      
 [89] utf8_1.2.3                tidyr_1.3.0              
 [91] generics_0.1.3            data.table_1.15.0        
 [93] httr_1.4.6                htmlwidgets_1.6.2        
 [95] S4Arrays_1.2.0            uwot_0.1.16              
 [97] pkgconfig_2.0.3           gtable_0.3.3             
 [99] lmtest_0.9-40             XVector_0.40.0           
[101] htmltools_0.5.8           dotCall64_1.1-0          
[103] clue_0.3-64               scales_1.3.0             
[105] png_0.1-8                 knitr_1.43               
[107] rstudioapi_0.15.0         reshape2_1.4.4           
[109] rjson_0.2.21              nloptr_2.0.3             
[111] checkmate_2.3.0           nlme_3.1-163             
[113] zoo_1.8-12                GlobalOptions_0.1.2      
[115] KernSmooth_2.23-22        miniUI_0.1.1.1           
[117] foreign_0.8-86            pillar_1.9.0             
[119] vctrs_0.6.3               RANN_2.6.1               
[121] promises_1.2.0.1          BiocSingular_1.16.0      
[123] beachmat_2.16.0           xtable_1.8-4             
[125] cluster_2.1.6             htmlTable_2.4.1          
[127] evaluate_0.21             zeallot_0.1.0            
[129] cli_3.6.1                 compiler_4.3.2           
[131] rlang_1.1.1               crayon_1.5.2             
[133] future.apply_1.11.0       labeling_0.4.2           
[135] plyr_1.8.8                stringi_1.8.1            
[137] viridisLite_0.4.2         deldir_2.0-2             
[139] BiocParallel_1.34.2       assertthat_0.2.1         
[141] munsell_0.5.0             lazyeval_0.2.2           
[143] spatstat.geom_3.2-4       PCAtools_2.14.0          
[145] Matrix_1.6-3              RcppHNSW_0.6.0           
[147] sparseMatrixStats_1.12.2  future_1.33.0            
[149] ggplot2_3.5.0             shiny_1.8.0              
[151] ROCR_1.0-11               igraph_2.0.3