Gene Correlation Analysis E17.5 Mouse Brain

Prologue

library(COTAN)
library(ComplexHeatmap)
library(circlize)
library(dplyr)
library(Hmisc)
library(Seurat)
library(patchwork)
library(Rfast)
library(parallel)
library(doParallel)
library(HiClimR)
library(stringr)
library(fst)

options(parallelly.fork.enable = TRUE)

dataSetFile <- "Data/MouseCortexFromLoom/e17.5_ForebrainDorsal.cotan.RDS"
name <- str_split(dataSetFile,pattern = "/",simplify = T)[3]
name <- str_remove(name,pattern = ".RDS")

project = "E17.5"


setLoggingLevel(1)
outDir <- "CoexData/"
setLoggingFile(paste0(outDir, "Logs/",name,".log"))

obj <- readRDS(dataSetFile)
file_code = getMetadataElement(obj, datasetTags()[["cond"]])
source("src/Functions.R")

To compare the ability of COTAN to asses the real correlation between genes we define some pools of genes:

  1. Constitutive genes
  2. Neural progenitor genes
  3. Pan neuronal genes
  4. Some layer marker genes
genesList <- list(
  "NPGs"= 
    c("Nes", "Vim", "Sox2", "Sox1", "Notch1", "Hes1", "Hes5", "Pax6"),
  "PNGs"= 
    c("Map2", "Tubb3", "Neurod1", "Nefm", "Nefl", "Dcx", "Tbr1"),
  "hk"= 
    c("Calm1", "Cox6b1", "Ppia", "Rpl18", "Cox7c", "Erh", "H3f3a",
      "Taf1", "Taf2", "Gapdh", "Actb", "Golph3", "Zfr", "Sub1",
      "Tars", "Amacr"),
  "layers" = 
    c("Reln","Lhx5","Cux1","Satb2","Tle1","Mef2c","Rorb","Sox5","Bcl11b","Fezf2","Foxp2","Ntf3","Rasgrf2","Pvrl3", "Cux2","Slc17a6", "Sema3c","Thsd7a", "Sulf2", "Kcnk2","Grik3", "Etv1", "Tle4", "Tmem200a", "Glra2", "Etv1","Htr1f", "Sulf1","Rxfp1", "Syt6") 
  # From https://www.science.org/doi/10.1126/science.aam8999
)

COTAN

int.genes <-getGenes(obj)
coexMat.big <- getGenesCoex(obj)#[int.genes,int.genes]

coexMat <- getGenesCoex(obj)[c(genesList$NPGs,genesList$hk,genesList$PNGs),c(genesList$NPGs,genesList$hk,genesList$PNGs)]

f1 = colorRamp2(seq(-0.5,0.5, length = 3), c("#DC0000B2", "white","#3C5488B2" ))

split.genes <- factor(c(rep("NPGs",length(genesList[["NPGs"]])),
                         rep("HK",length(genesList[["hk"]])),
                         rep("PNGs",length(genesList[["PNGs"]]))
                        ),
                         levels = c("NPGs","HK","PNGs"))

lgd = Legend(col_fun = f1, title = "COTAN coex")

htmp <- Heatmap(as.matrix(coexMat),
        #width = ncol(coexMat)*unit(2.5, "mm"), 
        height = nrow(coexMat)*unit(3, "mm"),
        cluster_rows = FALSE,
        cluster_columns = FALSE,
        col = f1,
        row_names_side = "left",
        row_names_gp = gpar(fontsize = 11),
        column_names_gp  = gpar(fontsize = 11),
        column_split = split.genes,
        row_split = split.genes,
        cluster_row_slices = FALSE, 
    cluster_column_slices = FALSE,
    heatmap_legend_param = list(
        title = "COTAN coex", at = c(-0.5, 0, 0.5),direction = "horizontal",
        labels = c("-0.5", "0", "0.5")
    )
   )
draw(htmp, heatmap_legend_side="right")

GDI_DF <- calculateGDI(obj)
GDI_DF$geneType <- NA
for (cat in names(genesList)) {
  GDI_DF[rownames(GDI_DF) %in% genesList[[cat]],]$geneType <- cat
}

GDI_DF$GDI_centered <- scale(GDI_DF$GDI,center = T,scale = T)

write.csv(GDI_DF,paste0("CoexData/","Variance_GDI_genes",file_code,".csv"))


GDI_DF[unlist(genesList),]
         sum.raw.norm      GDI  exp.cells geneType GDI_centered
Nes          5.354860 2.677591  7.6611269     NPGs   2.12164684
Vim          7.975351 3.653910 23.7130118     NPGs   4.21500654
Sox2         5.379522 3.446151  6.6477503     NPGs   3.76954465
Sox1         4.826847 2.321489  3.8913660     NPGs   1.35811585
Notch1       5.491804 3.644973  8.1070126     NPGs   4.19584566
Hes1         5.241888 3.715489  4.4183218     NPGs   4.34704054
Hes5         6.367234 3.706690  6.9720308     NPGs   4.32817408
Pax6         6.445184 3.410841 15.2006486     NPGs   3.69383388
Map2         8.945220 2.919301 87.3530604     PNGs   2.63990577
Tubb3       10.669909 3.802683 93.9602756     PNGs   4.53399718
Neurod1      4.512176 1.570675  3.2833401     PNGs  -0.25172970
Nefm         5.320286 2.010487  6.2829347     PNGs   0.69128553
Nefl         5.592561 2.179108  5.2695582     PNGs   1.05283171
Dcx          8.048568 2.850882 60.6404540     PNGs   2.49320624
Tbr1         6.829413 1.953272 23.6724767     PNGs   0.56860885
Calm1        9.893664 1.604866 97.6489664       hk  -0.17841926
Cox6b1       9.028070 2.184997 90.8796109       hk   1.06545895
Ppia         9.189622 2.372214 90.7580057       hk   1.46687705
Rpl18        9.554798 2.279009 96.3518443       hk   1.26703311
Cox7c        9.114185 2.170535 91.9740576       hk   1.03444998
Erh          7.929830 2.390307 62.5050669       hk   1.50567107
H3f3a        9.697620 2.349951 96.3518443       hk   1.41914319
Taf1         6.206264 1.475777 16.9436563       hk  -0.45520422
Taf2         5.648276 1.342082 10.2959060       hk  -0.74186428
Gapdh        8.621629 2.367727 80.5026348       hk   1.45725765
Actb        10.484376 1.292044 99.6351844       hk  -0.84915323
Golph3       5.947435 1.643738 13.8629915       hk  -0.09507358
Zfr          7.359818 1.675085 43.6157276       hk  -0.02786120
Sub1         8.434336 2.419785 75.3952169       hk   1.56887719
Tars         5.930970 1.571117 13.5387110       hk  -0.25078126
Amacr        4.339515 1.454028  2.9995946       hk  -0.50183660
Reln         6.723819 2.232753  4.7020673   layers   1.16785364
Lhx5         4.079754 2.579243  1.3376571   layers   1.91077584
Cux1         7.968038 2.190834 50.0202675   layers   1.07797409
Satb2        9.062287 3.390857 57.7219295   layers   3.65098593
Tle1         6.594351 1.618541 21.7673287   layers  -0.14909808
Mef2c        9.071730 3.175241 51.6011350   layers   3.18867667
Rorb         6.282362 1.926899 13.1333604   layers   0.51206224
Sox5         8.049776 2.285548 43.4130523   layers   1.28105506
Bcl11b       7.109492 2.116159 21.4835833   layers   0.91786051
Fezf2        6.506386 2.096344 18.1597081   layers   0.87537638
Foxp2        6.557695 2.455294 14.4304824   layers   1.64501202
Ntf3         6.051423 2.225961  6.9314957   layers   1.15329231
Rasgrf2      4.096064 1.795794  2.0267531   layers   0.23095491
NA                 NA       NA         NA     <NA>           NA
Cux2         7.067401 2.518402 26.4288610   layers   1.78032434
Slc17a6      6.228895 2.065176 14.4304824   layers   0.80854728
Sema3c       7.553723 2.676192 31.6984191   layers   2.11864837
Thsd7a       5.951856 1.810789  9.6068099   layers   0.26310666
Sulf2        4.348066 1.725374  2.3915687   layers   0.07996546
Kcnk2        7.512577 1.895654 41.7511147   layers   0.44506877
Grik3        5.173332 1.974505  4.4993920   layers   0.61413535
Etv1         5.383398 2.639083  5.8775841   layers   2.03908090
Tle4         5.664300 1.881239  8.4718281   layers   0.41416114
Tmem200a     5.235004 2.081666  6.3234698   layers   0.84390268
Glra2        6.076633 2.094562 11.2687475   layers   0.87155349
Etv1.1       5.383398 2.639083  5.8775841   layers   2.03908090
Htr1f        3.780477 1.777503  1.2160519   layers   0.19173786
Sulf1        2.631257 1.128008  0.3648156   layers  -1.20086709
NA.1               NA       NA         NA     <NA>           NA
Syt6         4.757520 1.609693  2.6347791   layers  -0.16807098
GDIPlot(obj,GDIIn = GDI_DF, genes = genesList,GDIThreshold = 1.4)

Seurat correlation

srat<- CreateSeuratObject(counts = getRawData(obj), 
                          project = project, 
                          min.cells = 3, 
                          min.features = 200)
srat[["percent.mt"]] <- PercentageFeatureSet(srat, pattern = "^mt-")
srat <- NormalizeData(srat)
srat <- FindVariableFeatures(srat, selection.method = "vst", nfeatures = 2000)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(srat)

plot1$data$centered_variance <- scale(plot1$data$variance.standardized,
                                      center = T,scale = F)

write.csv(plot1$data,paste0("CoexData/",
                            "Variance_Seurat_genes",
                            getMetadataElement(obj, 
                                               datasetTags()[["cond"]]),".csv"))

LabelPoints(plot = plot1, points = c(genesList$NPGs,genesList$PNGs,genesList$layers), repel = TRUE)

LabelPoints(plot = plot1, points = c(genesList$hk), repel = TRUE)

all.genes <- rownames(srat)
srat <- ScaleData(srat, features = all.genes)
seurat.data = GetAssayData(srat[["RNA"]],layer = "data")
corr.pval.list <- correlation_pvalues(data = seurat.data,
                                      int.genes,
                                      n.cells = getNumCells(obj))

seurat.data.cor.big <- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))

htmp <- correlation_plot(seurat.data.cor.big, 
                         genesList, title="Seurat corr")


p_values.fromSeurat <- corr.pval.list$p_values
seurat.data.cor.big <- corr.pval.list$data.cor

rm(corr.pval.list)
gc()
            used   (Mb) gc trigger   (Mb)   max used   (Mb)
Ncells   7121123  380.4   11454522  611.8   11454522  611.8
Vcells 766000052 5844.2 1271129210 9698.0 1191235271 9088.5
draw(htmp, heatmap_legend_side="right")

Seurat SC Transform

srat <-  SCTransform(srat, 
                     method = "glmGamPoi", 
                     vars.to.regress = "percent.mt", 
                     verbose = FALSE)

seurat.data <- GetAssayData(srat[["SCT"]],layer = "data")

#Remove genes with all zeros
seurat.data <-seurat.data[rowSums(seurat.data) > 0,]


corr.pval.list <- correlation_pvalues(seurat.data,
                                      int.genes,
                                      n.cells = getNumCells(obj))

seurat.data.cor.big <- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))

htmp <- correlation_plot(seurat.data.cor.big, 
                         genesList, title="Seurat corr SCT")



p_values.fromSeurat <- corr.pval.list$p_values
seurat.data.cor.big <- corr.pval.list$data.cor

rm(corr.pval.list)
gc()
            used   (Mb) gc trigger    (Mb)   max used    (Mb)
Ncells   8850105  472.7   16721112   893.1   13569547   724.7
Vcells 721897666 5507.7 1757477180 13408.5 1525429261 11638.2
draw(htmp, heatmap_legend_side="right")

Monocle

library(monocle3)
cds <- new_cell_data_set(getRawData(obj),
                         cell_metadata = getMetadataCells(obj),
                         gene_metadata = getMetadataGenes(obj)
                         )
cds <- preprocess_cds(cds, num_dim = 100)

normalized_counts <- normalized_counts(cds)
#Remove genes with all zeros
normalized_counts <- normalized_counts[rowSums(normalized_counts) > 0,]


corr.pval.list <- correlation_pvalues(normalized_counts,
                                      int.genes,
                                      n.cells = getNumCells(obj))

rm(normalized_counts)

monocle.data.cor.big <- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))

htmp <- correlation_plot(data.cor.big = monocle.data.cor.big,
                         genesList,
                         title = "Monocle corr")


p_values.from.monocle <- corr.pval.list$p_values
monocle.data.cor.big <- corr.pval.list$data.cor

rm(corr.pval.list)
gc()
            used   (Mb) gc trigger    (Mb)   max used    (Mb)
Ncells   9961818  532.1   16721112   893.1   16721112   893.1
Vcells 795023729 6065.6 1757477180 13408.5 1757464389 13408.4
draw(htmp, heatmap_legend_side="right")

ScanPy

library(reticulate)

dirOutScP <- paste0("CoexData/ScanPy/")
if (!dir.exists(dirOutScP)) {
  dir.create(dirOutScP)
}

Sys.setenv(RETICULATE_PYTHON = "../../../bin/python3")
  py <- import("sys")
  
  source_python("src/scanpyGenesExpression.py")
  scanpyFDR(getRawData(obj), 
                  getMetadataCells(obj), 
                  getMetadataGenes(obj), 
                   "mt", 
                   dirOutScP, 
                   file_code,
            int.genes)

normalized_counts <- read.csv(paste0(dirOutScP,
                                     file_code,"_Scampy_expression_all_genes.gz"),header = T,row.names = 1)

normalized_counts <- t(normalized_counts)
#Remove genes with all zeros
normalized_counts <-normalized_counts[rowSums(normalized_counts) > 0,]


corr.pval.list <- correlation_pvalues(normalized_counts,
                                      int.genes,
                                      n.cells = getNumCells(obj))

ScanPy.data.cor.big <- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))


htmp <- correlation_plot(data.cor.big = ScanPy.data.cor.big,
                         genesList,
                         title = "ScanPy corr")

p_values.from.ScanPy <- corr.pval.list$p_values
ScanPy.data.cor.big <- corr.pval.list$data.cor

rm(corr.pval.list)
gc()
            used   (Mb) gc trigger    (Mb)   max used    (Mb)
Ncells   9985106  533.3   16721112   893.1   16721112   893.1
Vcells 829560962 6329.1 1757477180 13408.5 1757476100 13408.5
draw(htmp, heatmap_legend_side="right")


Sys.time()
[1] "2024-05-06 13:29:24 CEST"
sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3;  LAPACK version 3.9.0

locale:
[1] C.UTF-8

time zone: Europe/Rome
tzcode source: system (glibc)

attached base packages:
 [1] stats4    parallel  grid      stats     graphics  grDevices utils    
 [8] datasets  methods   base     

other attached packages:
 [1] reticulate_1.36.1           monocle3_1.3.4             
 [3] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.2
 [5] GenomicRanges_1.52.0        GenomeInfoDb_1.36.1        
 [7] IRanges_2.34.1              S4Vectors_0.38.1           
 [9] MatrixGenerics_1.12.3       matrixStats_1.2.0          
[11] Biobase_2.60.0              BiocGenerics_0.46.0        
[13] fstcore_0.9.18              fst_0.9.8                  
[15] stringr_1.5.0               HiClimR_2.2.1              
[17] doParallel_1.0.17           iterators_1.0.14           
[19] foreach_1.5.2               Rfast_2.1.0                
[21] RcppParallel_5.1.7          RcppZiggurat_0.1.6         
[23] Rcpp_1.0.11                 patchwork_1.2.0            
[25] Seurat_5.0.0                SeuratObject_5.0.0         
[27] sp_2.1-1                    Hmisc_5.1-0                
[29] dplyr_1.1.2                 circlize_0.4.15            
[31] ComplexHeatmap_2.16.0       COTAN_2.5.0                

loaded via a namespace (and not attached):
  [1] RcppAnnoy_0.0.21          splines_4.3.2            
  [3] later_1.3.1               bitops_1.0-7             
  [5] tibble_3.2.1              polyclip_1.10-4          
  [7] rpart_4.1.23              fastDummies_1.7.3        
  [9] lifecycle_1.0.3           globals_0.16.2           
 [11] lattice_0.22-5            MASS_7.3-60              
 [13] backports_1.4.1           dendextend_1.17.1        
 [15] magrittr_2.0.3            plotly_4.10.2            
 [17] rmarkdown_2.24            yaml_2.3.7               
 [19] httpuv_1.6.11             glmGamPoi_1.12.2         
 [21] sctransform_0.4.1         spam_2.10-0              
 [23] askpass_1.2.0             spatstat.sparse_3.0-2    
 [25] minqa_1.2.5               cowplot_1.1.1            
 [27] pbapply_1.7-2             RColorBrewer_1.1-3       
 [29] zlibbioc_1.46.0           abind_1.4-5              
 [31] Rtsne_0.17                purrr_1.0.1              
 [33] RCurl_1.98-1.12           nnet_7.3-19              
 [35] GenomeInfoDbData_1.2.10   ggrepel_0.9.5            
 [37] irlba_2.3.5.1             listenv_0.9.0            
 [39] spatstat.utils_3.0-3      terra_1.7-39             
 [41] umap_0.2.10.0             goftest_1.2-3            
 [43] RSpectra_0.16-1           spatstat.random_3.2-1    
 [45] dqrng_0.3.0               fitdistrplus_1.1-11      
 [47] parallelly_1.37.1         DelayedMatrixStats_1.22.5
 [49] ncdf4_1.22                leiden_0.4.3             
 [51] codetools_0.2-19          DelayedArray_0.26.7      
 [53] tidyselect_1.2.0          shape_1.4.6              
 [55] farver_2.1.1              lme4_1.1-34              
 [57] ScaledMatrix_1.8.1        viridis_0.6.4            
 [59] base64enc_0.1-3           spatstat.explore_3.2-1   
 [61] jsonlite_1.8.7            GetoptLong_1.0.5         
 [63] ellipsis_0.3.2            progressr_0.14.0         
 [65] Formula_1.2-5             ggridges_0.5.4           
 [67] survival_3.5-8            tools_4.3.2              
 [69] ica_1.0-3                 glue_1.7.0               
 [71] gridExtra_2.3             xfun_0.39                
 [73] ggthemes_5.1.0            withr_3.0.0              
 [75] fastmap_1.1.1             boot_1.3-28              
 [77] fansi_1.0.4               openssl_2.1.0            
 [79] digest_0.6.33             rsvd_1.0.5               
 [81] parallelDist_0.2.6        R6_2.5.1                 
 [83] mime_0.12                 colorspace_2.1-0         
 [85] Cairo_1.6-1               scattermore_1.2          
 [87] tensor_1.5                spatstat.data_3.0-1      
 [89] utf8_1.2.3                tidyr_1.3.0              
 [91] generics_0.1.3            data.table_1.15.0        
 [93] httr_1.4.6                htmlwidgets_1.6.2        
 [95] S4Arrays_1.2.0            uwot_0.1.16              
 [97] pkgconfig_2.0.3           gtable_0.3.3             
 [99] lmtest_0.9-40             XVector_0.40.0           
[101] htmltools_0.5.8           dotCall64_1.1-0          
[103] clue_0.3-64               scales_1.3.0             
[105] png_0.1-8                 knitr_1.43               
[107] rstudioapi_0.15.0         reshape2_1.4.4           
[109] rjson_0.2.21              nloptr_2.0.3             
[111] checkmate_2.3.0           nlme_3.1-163             
[113] zoo_1.8-12                GlobalOptions_0.1.2      
[115] KernSmooth_2.23-22        miniUI_0.1.1.1           
[117] foreign_0.8-86            pillar_1.9.0             
[119] vctrs_0.6.3               RANN_2.6.1               
[121] promises_1.2.0.1          BiocSingular_1.16.0      
[123] beachmat_2.16.0           xtable_1.8-4             
[125] cluster_2.1.6             htmlTable_2.4.1          
[127] evaluate_0.21             zeallot_0.1.0            
[129] cli_3.6.1                 compiler_4.3.2           
[131] rlang_1.1.1               crayon_1.5.2             
[133] future.apply_1.11.0       labeling_0.4.2           
[135] plyr_1.8.8                stringi_1.8.1            
[137] viridisLite_0.4.2         deldir_2.0-2             
[139] BiocParallel_1.34.2       assertthat_0.2.1         
[141] munsell_0.5.0             lazyeval_0.2.2           
[143] spatstat.geom_3.2-4       PCAtools_2.14.0          
[145] Matrix_1.6-3              RcppHNSW_0.6.0           
[147] sparseMatrixStats_1.12.2  future_1.33.0            
[149] ggplot2_3.5.0             shiny_1.8.0              
[151] ROCR_1.0-11               igraph_2.0.3