Gene Correlation Analysis E13.5 Mouse Brain

Prologue

library(COTAN)
library(ComplexHeatmap)
library(circlize)
library(dplyr)
library(Hmisc)
library(Seurat)
library(patchwork)
library(Rfast)
library(parallel)
library(doParallel)
library(HiClimR)
library(stringr)
library(fst)

options(parallelly.fork.enable = TRUE)

dataSetFile <- "Data/MouseCortexFromLoom/e13.5_ForebrainDorsal.cotan.RDS"

name <- str_split(dataSetFile,pattern = "/",simplify = T)[3]
name <- str_remove(name,pattern = ".RDS")

project = "E13.5"


setLoggingLevel(1)
outDir <- "CoexData/"
setLoggingFile(paste0(outDir, "Logs/",name,".log"))

obj <- readRDS(dataSetFile)
file_code = getMetadataElement(obj, datasetTags()[["cond"]])
source("src/Functions.R")

To compare the ability of COTAN to asses the real correlation between genes we define some pools of genes:

  1. Constitutive genes
  2. Neural progenitor genes
  3. Pan neuronal genes
  4. Some layer marker genes
genesList <- list(
  "NPGs"= 
    c("Nes", "Vim", "Sox2", "Sox1", "Notch1", "Hes1", "Hes5", "Pax6"),
  "PNGs"= 
    c("Map2", "Tubb3", "Neurod1", "Nefm", "Nefl", "Dcx", "Tbr1"),
  "hk"= 
    c("Calm1", "Cox6b1", "Ppia", "Rpl18", "Cox7c", "Erh", "H3f3a",
      "Taf1", "Taf2", "Gapdh", "Actb", "Golph3", "Zfr", "Sub1",
      "Tars", "Amacr"),
  "layers" = 
    c("Reln","Lhx5","Cux1","Satb2","Tle1","Mef2c","Rorb","Sox5","Bcl11b","Fezf2","Foxp2","Ntf3","Rasgrf2","Pvrl3", "Cux2","Slc17a6", "Sema3c","Thsd7a", "Sulf2", "Kcnk2","Grik3", "Etv1", "Tle4", "Tmem200a", "Glra2", "Etv1","Htr1f", "Sulf1","Rxfp1", "Syt6") 
  # From https://www.science.org/doi/10.1126/science.aam8999
)

COTAN

int.genes <-getGenes(obj)
coexMat.big <- getGenesCoex(obj)#[int.genes,int.genes]

coexMat <- getGenesCoex(obj)[c(genesList$NPGs,genesList$hk,genesList$PNGs),c(genesList$NPGs,genesList$hk,genesList$PNGs)]

f1 = colorRamp2(seq(-0.5,0.5, length = 3), c("#DC0000B2", "white","#3C5488B2" ))

split.genes <- factor(c(rep("NPGs",length(genesList[["NPGs"]])),
                         rep("HK",length(genesList[["hk"]])),
                         rep("PNGs",length(genesList[["PNGs"]]))
                        ),
                         levels = c("NPGs","HK","PNGs"))

lgd = Legend(col_fun = f1, title = "COTAN coex")

htmp <- Heatmap(as.matrix(coexMat),
        #width = ncol(coexMat)*unit(2.5, "mm"), 
        height = nrow(coexMat)*unit(3, "mm"),
        cluster_rows = FALSE,
        cluster_columns = FALSE,
        col = f1,
        row_names_side = "left",
        row_names_gp = gpar(fontsize = 11),
        column_names_gp  = gpar(fontsize = 11),
        column_split = split.genes,
        row_split = split.genes,
        cluster_row_slices = FALSE, 
    cluster_column_slices = FALSE,
    heatmap_legend_param = list(
        title = "COTAN coex", at = c(-0.5, 0, 0.5),direction = "horizontal",
        labels = c("-0.5", "0", "0.5")
    )
   )
draw(htmp, heatmap_legend_side="right")

GDI_DF <- calculateGDI(obj)
GDI_DF$geneType <- NA
for (cat in names(genesList)) {
  GDI_DF[rownames(GDI_DF) %in% genesList[[cat]],]$geneType <- cat
}

GDI_DF$GDI_centered <- scale(GDI_DF$GDI,center = T,scale = T)

write.csv(GDI_DF,paste0("CoexData/","Variance_GDI_genes",file_code,".csv"))


GDI_DF[unlist(genesList),]
         sum.raw.norm      GDI  exp.cells geneType GDI_centered
Nes          7.581052 4.192489 25.4165830     NPGs   3.04415531
Vim          9.971694 3.984104 76.6512748     NPGs   2.75607580
Sox2         7.082317 3.999433 18.5304156     NPGs   2.77726729
Sox1         5.715170 2.983572  5.6615138     NPGs   1.37290505
Notch1       7.227078 3.984877 20.6384260     NPGs   2.75714514
Hes1         8.449033 4.822367 31.8409958     NPGs   3.91492097
Hes5         9.585092 4.784038 42.6018872     NPGs   3.86193346
Pax6         8.481894 4.385577 44.5492873     NPGs   3.31108627
Map2         9.355155 3.931909 70.5681590     PNGs   2.68391910
Tubb3       10.986131 4.374838 76.9524192     PNGs   3.29624084
Neurod1      5.123468 2.046275  2.8508332     PNGs   0.07715236
Nefm         5.862676 3.085697  5.9024292     PNGs   1.51408601
Nefl         4.741106 2.586671  1.8871713     PNGs   0.82421512
Dcx          8.357463 4.539183 39.6707488     PNGs   3.52343659
Tbr1         7.759295 4.237369 26.0991769     PNGs   3.10619909
Calm1       10.447080 1.385779 97.8919896       hk  -0.83594052
Cox6b1       9.981533 1.441202 96.6673359       hk  -0.75932183
Ppia        10.196964 2.609030 86.8901827       hk   0.85512419
Rpl18       10.790428 1.335148 99.6386268       hk  -0.90593539
Cox7c       10.031070 1.518121 97.1893194       hk  -0.65298677
Erh          9.280735 2.711007 67.8377836       hk   0.99610081
H3f3a       10.665312 1.380775 99.6587031       hk  -0.84285899
Taf1         6.937213 2.168233 17.1049990       hk   0.24575162
Taf2         6.537627 1.310694 12.1863080       hk  -0.93974092
Gapdh        9.979987 2.531673 92.7323831       hk   0.74818409
Actb        11.323008 1.382499 99.4980928       hk  -0.84047559
Golph3       6.990454 1.648246 19.0724754       hk  -0.47309720
Zfr          8.192974 2.212476 47.4001205       hk   0.30691375
Sub1         9.735618 2.343091 92.1903232       hk   0.48748156
Tars         6.946500 1.753805 17.9682795       hk  -0.32716939
Amacr        5.143626 1.632567  3.4932744       hk  -0.49477252
Reln         7.550123 3.069353  8.3718129   layers   1.49149142
Lhx5         4.850935 2.291750  1.5057217   layers   0.41650600
Cux1         8.229655 3.634065 43.1840996   layers   2.27216946
Satb2        7.683171 3.401215 21.5418591   layers   1.95026951
Tle1         7.461375 2.108275 25.6976511   layers   0.16286267
Mef2c        7.773242 3.876656 19.5543064   layers   2.60753655
Rorb         6.274757 1.635946  7.5486850   layers  -0.49010169
Sox5         9.709193 3.707954 64.7460349   layers   2.37431668
Bcl11b       8.848123 4.654283 43.7060831   layers   3.68255551
Fezf2        8.690887 2.925436 54.6878137   layers   1.29253570
Foxp2        7.803267 2.713967 28.2674162   layers   1.00019356
Ntf3         5.006960 2.463725  2.2485445   layers   0.65424957
NA                 NA       NA         NA     <NA>           NA
NA.1               NA       NA         NA     <NA>           NA
Cux2         6.681962 2.670967  9.2551696   layers   0.94074878
Slc17a6      7.136129 3.316106 14.1337081   layers   1.83261146
Sema3c       6.576389 2.740901  8.7733387   layers   1.03742781
Thsd7a       4.705115 1.784039  1.3451114   layers  -0.28537229
Sulf2        5.293881 2.461288  3.2523590   layers   0.65088105
Kcnk2        7.401683 1.957314 21.6422405   layers  -0.04583116
Grik3        5.864890 3.353295  5.0793013   layers   1.88402330
Etv1         5.600431 2.460104  4.1758683   layers   0.64924346
Tle4         6.694944 1.811217 12.5276049   layers  -0.24780084
Tmem200a     4.701951 2.381286  1.8068661   layers   0.54028368
Glra2        6.044741 3.654254  5.6414375   layers   2.30007960
Etv1.1       5.600431 2.460104  4.1758683   layers   0.64924346
NA.2               NA       NA         NA     <NA>           NA
Sulf1        4.529211 2.076179  0.9235093   layers   0.11849227
NA.3               NA       NA         NA     <NA>           NA
Syt6         5.144411 2.559922  2.6902228   layers   0.78723640
GDIPlot(obj,GDIIn = GDI_DF, genes = genesList,GDIThreshold = 1.4)

Seurat correlation

srat<- CreateSeuratObject(counts = getRawData(obj), 
                          project = project, 
                          min.cells = 3, 
                          min.features = 200)
srat[["percent.mt"]] <- PercentageFeatureSet(srat, pattern = "^mt-")
srat <- NormalizeData(srat)
srat <- FindVariableFeatures(srat, selection.method = "vst", nfeatures = 2000)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(srat)

plot1$data$centered_variance <- scale(plot1$data$variance.standardized,
                                      center = T,scale = F)

write.csv(plot1$data,paste0("CoexData/",
                            "Variance_Seurat_genes",
                            getMetadataElement(obj, 
                                               datasetTags()[["cond"]]),".csv"))

LabelPoints(plot = plot1, points = c(genesList$NPGs,genesList$PNGs,genesList$layers), repel = TRUE)

LabelPoints(plot = plot1, points = c(genesList$hk), repel = TRUE)

all.genes <- rownames(srat)
srat <- ScaleData(srat, features = all.genes)
seurat.data = GetAssayData(srat[["RNA"]],layer = "data")
corr.pval.list <- correlation_pvalues(data = seurat.data,
                                      int.genes,
                                      n.cells = getNumCells(obj))

seurat.data.cor.big <- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))

htmp <- correlation_plot(seurat.data.cor.big, 
                         genesList, title="Seurat corr")


p_values.fromSeurat <- corr.pval.list$p_values
seurat.data.cor.big <- corr.pval.list$data.cor

rm(corr.pval.list)
gc()
            used   (Mb) gc trigger    (Mb)   max used    (Mb)
Ncells   7130858  380.9   11454895   611.8   11454895   611.8
Vcells 840516207 6412.7 1342169060 10240.0 1342169060 10240.0
draw(htmp, heatmap_legend_side="right")

Seurat SC Transform

srat <-  SCTransform(srat, 
                     method = "glmGamPoi", 
                     vars.to.regress = "percent.mt", 
                     verbose = FALSE)

seurat.data <- GetAssayData(srat[["SCT"]],layer = "data")

#Remove genes with all zeros
seurat.data <-seurat.data[rowSums(seurat.data) > 0,]


corr.pval.list <- correlation_pvalues(seurat.data,
                                      int.genes,
                                      n.cells = getNumCells(obj))

seurat.data.cor.big <- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))

htmp <- correlation_plot(seurat.data.cor.big, 
                         genesList, title="Seurat corr SCT")



p_values.fromSeurat <- corr.pval.list$p_values
seurat.data.cor.big <- corr.pval.list$data.cor

rm(corr.pval.list)
gc()
            used   (Mb) gc trigger    (Mb)   max used    (Mb)
Ncells   8859943  473.2   16748832   894.5   12021442   642.1
Vcells 861708025 6574.4 1932899446 14746.9 1610675510 12288.5
draw(htmp, heatmap_legend_side="right")

Monocle

library(monocle3)
cds <- new_cell_data_set(getRawData(obj),
                         cell_metadata = getMetadataCells(obj),
                         gene_metadata = getMetadataGenes(obj)
                         )
cds <- preprocess_cds(cds, num_dim = 100)

normalized_counts <- normalized_counts(cds)
#Remove genes with all zeros
normalized_counts <- normalized_counts[rowSums(normalized_counts) > 0,]


corr.pval.list <- correlation_pvalues(normalized_counts,
                                      int.genes,
                                      n.cells = getNumCells(obj))

rm(normalized_counts)

monocle.data.cor.big <- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))

htmp <- correlation_plot(data.cor.big = monocle.data.cor.big,
                         genesList,
                         title = "Monocle corr")


p_values.from.monocle <- corr.pval.list$p_values
monocle.data.cor.big <- corr.pval.list$data.cor

rm(corr.pval.list)
gc()
            used   (Mb) gc trigger    (Mb)   max used    (Mb)
Ncells   9971655  532.6   16748832   894.5   16748832   894.5
Vcells 896560359 6840.3 2062053399 15732.3 2062053399 15732.3
draw(htmp, heatmap_legend_side="right")

ScanPy

library(reticulate)

dirOutScP <- paste0("CoexData/ScanPy/")
if (!dir.exists(dirOutScP)) {
  dir.create(dirOutScP)
}

Sys.setenv(RETICULATE_PYTHON = "../../../bin/python3")
  py <- import("sys")
  
  source_python("src/scanpyGenesExpression.py")
  scanpyFDR(getRawData(obj), 
                  getMetadataCells(obj), 
                  getMetadataGenes(obj), 
                   "mt", 
                   dirOutScP, 
                   file_code,
            int.genes)

normalized_counts <- read.csv(paste0(dirOutScP,
                                     file_code,"_Scampy_expression_all_genes.gz"),header = T,row.names = 1)

normalized_counts <- t(normalized_counts)
#Remove genes with all zeros
normalized_counts <-normalized_counts[rowSums(normalized_counts) > 0,]


corr.pval.list <- correlation_pvalues(normalized_counts,
                                      int.genes,
                                      n.cells = getNumCells(obj))

ScanPy.data.cor.big <- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))


htmp <- correlation_plot(data.cor.big = ScanPy.data.cor.big,
                         genesList,
                         title = "ScanPy corr")

p_values.from.ScanPy <- corr.pval.list$p_values
ScanPy.data.cor.big <- corr.pval.list$data.cor

rm(corr.pval.list)
gc()
            used   (Mb) gc trigger    (Mb)   max used    (Mb)
Ncells   9994943  533.8   16748832   894.5   16748832   894.5
Vcells 966228182 7371.8 2062053399 15732.3 2062053399 15732.3
draw(htmp, heatmap_legend_side="right")


Sys.time()
[1] "2024-05-06 11:52:16 CEST"
sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3;  LAPACK version 3.9.0

locale:
[1] C.UTF-8

time zone: Europe/Rome
tzcode source: system (glibc)

attached base packages:
 [1] stats4    parallel  grid      stats     graphics  grDevices utils    
 [8] datasets  methods   base     

other attached packages:
 [1] reticulate_1.36.1           monocle3_1.3.4             
 [3] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.2
 [5] GenomicRanges_1.52.0        GenomeInfoDb_1.36.1        
 [7] IRanges_2.34.1              S4Vectors_0.38.1           
 [9] MatrixGenerics_1.12.3       matrixStats_1.2.0          
[11] Biobase_2.60.0              BiocGenerics_0.46.0        
[13] fstcore_0.9.18              fst_0.9.8                  
[15] stringr_1.5.0               HiClimR_2.2.1              
[17] doParallel_1.0.17           iterators_1.0.14           
[19] foreach_1.5.2               Rfast_2.1.0                
[21] RcppParallel_5.1.7          RcppZiggurat_0.1.6         
[23] Rcpp_1.0.11                 patchwork_1.2.0            
[25] Seurat_5.0.0                SeuratObject_5.0.0         
[27] sp_2.1-1                    Hmisc_5.1-0                
[29] dplyr_1.1.2                 circlize_0.4.15            
[31] ComplexHeatmap_2.16.0       COTAN_2.5.0                

loaded via a namespace (and not attached):
  [1] RcppAnnoy_0.0.21          splines_4.3.2            
  [3] later_1.3.1               bitops_1.0-7             
  [5] tibble_3.2.1              polyclip_1.10-4          
  [7] rpart_4.1.23              fastDummies_1.7.3        
  [9] lifecycle_1.0.3           globals_0.16.2           
 [11] lattice_0.22-5            MASS_7.3-60              
 [13] backports_1.4.1           dendextend_1.17.1        
 [15] magrittr_2.0.3            plotly_4.10.2            
 [17] rmarkdown_2.24            yaml_2.3.7               
 [19] httpuv_1.6.11             glmGamPoi_1.12.2         
 [21] sctransform_0.4.1         spam_2.10-0              
 [23] askpass_1.2.0             spatstat.sparse_3.0-2    
 [25] minqa_1.2.5               cowplot_1.1.1            
 [27] pbapply_1.7-2             RColorBrewer_1.1-3       
 [29] zlibbioc_1.46.0           abind_1.4-5              
 [31] Rtsne_0.17                purrr_1.0.1              
 [33] RCurl_1.98-1.12           nnet_7.3-19              
 [35] GenomeInfoDbData_1.2.10   ggrepel_0.9.5            
 [37] irlba_2.3.5.1             listenv_0.9.0            
 [39] spatstat.utils_3.0-3      terra_1.7-39             
 [41] umap_0.2.10.0             goftest_1.2-3            
 [43] RSpectra_0.16-1           spatstat.random_3.2-1    
 [45] dqrng_0.3.0               fitdistrplus_1.1-11      
 [47] parallelly_1.37.1         DelayedMatrixStats_1.22.5
 [49] ncdf4_1.22                leiden_0.4.3             
 [51] codetools_0.2-19          DelayedArray_0.26.7      
 [53] tidyselect_1.2.0          shape_1.4.6              
 [55] farver_2.1.1              lme4_1.1-34              
 [57] ScaledMatrix_1.8.1        viridis_0.6.4            
 [59] base64enc_0.1-3           spatstat.explore_3.2-1   
 [61] jsonlite_1.8.7            GetoptLong_1.0.5         
 [63] ellipsis_0.3.2            progressr_0.14.0         
 [65] Formula_1.2-5             ggridges_0.5.4           
 [67] survival_3.5-8            tools_4.3.2              
 [69] ica_1.0-3                 glue_1.7.0               
 [71] gridExtra_2.3             xfun_0.39                
 [73] ggthemes_5.1.0            withr_3.0.0              
 [75] fastmap_1.1.1             boot_1.3-28              
 [77] fansi_1.0.4               openssl_2.1.0            
 [79] digest_0.6.33             rsvd_1.0.5               
 [81] parallelDist_0.2.6        R6_2.5.1                 
 [83] mime_0.12                 colorspace_2.1-0         
 [85] Cairo_1.6-1               scattermore_1.2          
 [87] tensor_1.5                spatstat.data_3.0-1      
 [89] utf8_1.2.3                tidyr_1.3.0              
 [91] generics_0.1.3            data.table_1.15.0        
 [93] httr_1.4.6                htmlwidgets_1.6.2        
 [95] S4Arrays_1.2.0            uwot_0.1.16              
 [97] pkgconfig_2.0.3           gtable_0.3.3             
 [99] lmtest_0.9-40             XVector_0.40.0           
[101] htmltools_0.5.8           dotCall64_1.1-0          
[103] clue_0.3-64               scales_1.3.0             
[105] png_0.1-8                 knitr_1.43               
[107] rstudioapi_0.15.0         reshape2_1.4.4           
[109] rjson_0.2.21              nloptr_2.0.3             
[111] checkmate_2.3.0           nlme_3.1-163             
[113] zoo_1.8-12                GlobalOptions_0.1.2      
[115] KernSmooth_2.23-22        miniUI_0.1.1.1           
[117] foreign_0.8-86            pillar_1.9.0             
[119] vctrs_0.6.3               RANN_2.6.1               
[121] promises_1.2.0.1          BiocSingular_1.16.0      
[123] beachmat_2.16.0           xtable_1.8-4             
[125] cluster_2.1.6             htmlTable_2.4.1          
[127] evaluate_0.21             zeallot_0.1.0            
[129] cli_3.6.1                 compiler_4.3.2           
[131] rlang_1.1.1               crayon_1.5.2             
[133] future.apply_1.11.0       labeling_0.4.2           
[135] plyr_1.8.8                stringi_1.8.1            
[137] viridisLite_0.4.2         deldir_2.0-2             
[139] BiocParallel_1.34.2       assertthat_0.2.1         
[141] munsell_0.5.0             lazyeval_0.2.2           
[143] spatstat.geom_3.2-4       PCAtools_2.14.0          
[145] Matrix_1.6-3              RcppHNSW_0.6.0           
[147] sparseMatrixStats_1.12.2  future_1.33.0            
[149] ggplot2_3.5.0             shiny_1.8.0              
[151] ROCR_1.0-11               igraph_2.0.3