library(COTAN)
library(ComplexHeatmap)
library(circlize)
library(dplyr)
library(Hmisc)
library(Seurat)
library(patchwork)
library(Rfast)
library(parallel)
library(doParallel)
library(HiClimR)
library(stringr)
library(fst)
options(parallelly.fork.enable = TRUE)
<- "Data/MouseCortex/MouseCortex_E14.5.cotan.RDS"
dataSetFile <- str_split(dataSetFile,pattern = "/",simplify = T)[3]
name <- str_remove(name,pattern = ".RDS")
name
= "E14.5"
project
setLoggingLevel(1)
<- "CoexData/"
outDir setLoggingFile(paste0(outDir, "Logs/",name,".log"))
<- readRDS(dataSetFile)
obj = getMetadataElement(obj, datasetTags()[["cond"]]) file_code
Gene Correlation Analysis E14.5 Mouse Cortex
Prologue
source("src/Functions.R")
To compare the ability of COTAN to asses the real correlation between genes we define some pools of genes:
- Constitutive genes
- Neural progenitor genes
- Pan neuronal genes
- Some layer marker genes
<- list(
genesList "NPGs"=
c("Nes", "Vim", "Sox2", "Sox1", "Notch1", "Hes1", "Hes5", "Pax6"),
"PNGs"=
c("Map2", "Tubb3", "Neurod1", "Nefm", "Nefl", "Dcx", "Tbr1"),
"hk"=
c("Calm1", "Cox6b1", "Ppia", "Rpl18", "Cox7c", "Erh", "H3f3a",
"Taf1", "Taf2", "Gapdh", "Actb", "Golph3", "Zfr", "Sub1",
"Tars", "Amacr"),
"layers" =
c("Reln","Lhx5","Cux1","Satb2","Tle1","Mef2c","Rorb","Sox5","Bcl11b","Fezf2","Foxp2","Ntf3","Rasgrf2","Pvrl3", "Cux2","Slc17a6", "Sema3c","Thsd7a", "Sulf2", "Kcnk2","Grik3", "Etv1", "Tle4", "Tmem200a", "Glra2", "Etv1","Htr1f", "Sulf1","Rxfp1", "Syt6")
# From https://www.science.org/doi/10.1126/science.aam8999
)
COTAN
<-getGenes(obj) int.genes
<- getGenesCoex(obj)
coexMat.big
<- getGenesCoex(obj)[c(genesList$NPGs,genesList$hk,genesList$PNGs),c(genesList$NPGs,genesList$hk,genesList$PNGs)]
coexMat
= colorRamp2(seq(-0.5,0.5, length = 3), c("#DC0000B2", "white","#3C5488B2" ))
f1
<- factor(c(rep("NPGs",length(genesList[["NPGs"]])),
split.genes rep("HK",length(genesList[["hk"]])),
rep("PNGs",length(genesList[["PNGs"]]))
),levels = c("NPGs","HK","PNGs"))
= Legend(col_fun = f1, title = "COTAN coex")
lgd
<- Heatmap(as.matrix(coexMat),
htmp #width = ncol(coexMat)*unit(2.5, "mm"),
height = nrow(coexMat)*unit(3, "mm"),
cluster_rows = FALSE,
cluster_columns = FALSE,
col = f1,
row_names_side = "left",
row_names_gp = gpar(fontsize = 11),
column_names_gp = gpar(fontsize = 11),
column_split = split.genes,
row_split = split.genes,
cluster_row_slices = FALSE,
cluster_column_slices = FALSE,
heatmap_legend_param = list(
title = "COTAN coex", at = c(-0.5, 0, 0.5),direction = "horizontal",
labels = c("-0.5", "0", "0.5")
)
)draw(htmp, heatmap_legend_side="right")
<- calculateGDI(obj)
GDI_DF $geneType <- NA
GDI_DFfor (cat in names(genesList)) {
rownames(GDI_DF) %in% genesList[[cat]],]$geneType <- cat
GDI_DF[
}
$GDI_centered <- scale(GDI_DF$GDI,center = T,scale = T)
GDI_DF
write.csv(GDI_DF,paste0("CoexData/","Variance_GDI_genes",file_code,".csv"))
unlist(genesList),] GDI_DF[
sum.raw.norm GDI exp.cells geneType GDI_centered
Nes 8.839957 4.678500 27.7337997 NPGs 3.63695691
Vim 10.246463 4.534465 51.0309278 NPGs 3.42694609
Sox2 9.124228 4.566666 31.4709131 NPGs 3.47389659
Sox1 7.368458 3.455731 9.5636966 NPGs 1.85409012
Notch1 7.469284 4.355698 11.6623711 NPGs 3.16629269
Hes1 7.698079 4.634264 10.5946244 NPGs 3.57245806
Hes5 9.405265 4.812530 22.7908689 NPGs 3.83238101
Pax6 8.144613 4.367938 17.3877025 NPGs 3.18413945
Map2 10.172685 3.768818 72.7172312 PNGs 2.31058907
Tubb3 11.537925 4.586213 88.3836524 PNGs 3.50239807
Neurod1 8.881391 3.388955 22.9657585 PNGs 1.75672703
Nefm 8.288330 3.346732 17.5441826 PNGs 1.69516332
Nefl 7.678477 3.544436 8.6340206 PNGs 1.98342736
Dcx 10.120356 4.821636 66.9090574 PNGs 3.84565720
Tbr1 8.472723 3.926272 24.1715758 PNGs 2.54016525
Calm1 11.491577 1.857652 96.5114138 hk -0.47600225
Cox6b1 10.398257 1.992891 84.6833579 hk -0.27881530
Ppia 7.666735 2.386108 17.4797496 hk 0.29451755
Rpl18 9.176712 2.575898 51.5187776 hk 0.57124278
Cox7c 8.439961 1.902976 31.8206922 hk -0.40991709
Erh 6.593118 2.307720 6.5721649 hk 0.18022355
H3f3a 9.388420 1.886267 58.1093520 hk -0.43428010
Taf1 8.315368 2.138643 26.8869661 hk -0.06630076
Taf2 8.017975 1.861938 21.1432253 hk -0.46975224
Gapdh 5.018736 1.853094 1.5648012 hk -0.48264772
Actb 12.609181 1.492016 99.6778351 hk -1.00911996
Golph3 7.150183 1.851371 10.8891753 hk -0.48515921
Zfr 9.269238 2.273001 50.7916053 hk 0.12960056
Sub1 10.018254 2.583820 74.7238586 hk 0.58279347
Tars 8.056910 2.487556 22.8645066 hk 0.44243393
Amacr 5.492910 1.714319 2.2551546 hk -0.68499008
Reln 7.095360 3.011725 4.5563328 layers 1.20670295
Lhx5 6.037778 2.901306 2.0434462 layers 1.04570542
Cux1 8.597262 3.026589 31.3512518 layers 1.22837610
Satb2 7.603191 3.121471 11.0916789 layers 1.36671855
Tle1 7.891909 2.322600 17.8111193 layers 0.20191897
Mef2c 8.776162 4.057560 23.6377025 layers 2.73159053
Rorb 6.523092 1.943768 4.6023564 layers -0.35043950
Sox5 9.301863 3.584350 38.2639912 layers 2.04162343
Bcl11b 9.860621 4.189936 53.4057437 layers 2.92460213
Fezf2 8.804060 3.630239 28.8751841 layers 2.10853283
Foxp2 7.474618 2.254280 8.4867452 layers 0.10230519
Ntf3 4.673267 2.314479 0.8560383 layers 0.19007802
NA NA NA NA <NA> NA
Pvrl3 7.255727 3.417288 10.6866716 layers 1.79803751
Cux2 7.652690 3.088322 11.2573638 layers 1.31838644
Slc17a6 7.901396 2.998305 14.0648012 layers 1.18713575
Sema3c 7.222013 2.481806 7.6307069 layers 0.43405122
Thsd7a 7.472372 3.054089 8.0449190 layers 1.26847311
Sulf2 6.491056 2.379550 4.6759941 layers 0.28495572
Kcnk2 7.151877 1.901258 9.0390280 layers -0.41242163
Grik3 6.673233 2.781234 5.3019146 layers 0.87063375
Etv1 6.743118 2.264998 4.8232695 layers 0.11793173
Tle4 7.880978 2.635418 15.0405007 layers 0.65802538
Tmem200a 6.188517 2.926026 3.8751841 layers 1.08174899
Glra2 7.057638 3.858495 8.1369661 layers 2.44134235
Etv1.1 6.743118 2.264998 4.8232695 layers 0.11793173
NA.1 NA NA NA <NA> NA
Sulf1 5.465717 2.461515 1.0125184 layers 0.40446505
NA.2 NA NA NA <NA> NA
Syt6 7.555334 3.052113 8.8457290 layers 1.26559064
GDIPlot(obj,GDIIn = GDI_DF, genes = genesList,GDIThreshold = 1.4)
Seurat correlation
<- CreateSeuratObject(counts = getRawData(obj),
sratproject = project,
min.cells = 3,
min.features = 200)
"percent.mt"]] <- PercentageFeatureSet(srat, pattern = "^mt-")
srat[[<- NormalizeData(srat)
srat <- FindVariableFeatures(srat, selection.method = "vst", nfeatures = 2000)
srat
# plot variable features with and without labels
<- VariableFeaturePlot(srat)
plot1
$data$centered_variance <- scale(plot1$data$variance.standardized,
plot1center = T,scale = F)
write.csv(plot1$data,paste0("CoexData/",
"Variance_Seurat_genes",
getMetadataElement(obj,
datasetTags()[["cond"]]),".csv"))
LabelPoints(plot = plot1, points = c(genesList$NPGs,genesList$PNGs,genesList$layers), repel = TRUE)
LabelPoints(plot = plot1, points = c(genesList$hk), repel = TRUE)
<- rownames(srat)
all.genes <- ScaleData(srat, features = all.genes)
srat = GetAssayData(srat[["RNA"]],layer = "data") seurat.data
<- correlation_pvalues(data = seurat.data,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
seurat.data.cor.big
<- correlation_plot(seurat.data.cor.big,
htmp title="Seurat corr")
genesList,
<- corr.pval.list$p_values
p_values.fromSeurat <- corr.pval.list$data.cor
seurat.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 7120021 380.3 11454531 611.8 11454531 611.8
Vcells 940272073 7173.8 1885882011 14388.2 1571501676 11989.7
draw(htmp, heatmap_legend_side="right")
Seurat SC Transform
<- SCTransform(srat,
srat method = "glmGamPoi",
vars.to.regress = "percent.mt",
verbose = FALSE)
<- GetAssayData(srat[["SCT"]],layer = "data")
seurat.data
#Remove genes with all zeros
<-seurat.data[rowSums(seurat.data) > 0,]
seurat.data
<- correlation_pvalues(seurat.data,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
seurat.data.cor.big
<- correlation_plot(seurat.data.cor.big,
htmp title="Seurat corr SCT")
genesList,
<- corr.pval.list$p_values
p_values.fromSeurat <- corr.pval.list$data.cor
seurat.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 8854790 472.9 16718127 892.9 12172573 650.1
Vcells 1002434648 7648.0 2263138413 17266.4 1885879002 14388.2
draw(htmp, heatmap_legend_side="right")
Monocle
library(monocle3)
<- new_cell_data_set(getRawData(obj),
cds cell_metadata = getMetadataCells(obj),
gene_metadata = getMetadataGenes(obj)
)<- preprocess_cds(cds, num_dim = 100)
cds
<- normalized_counts(cds) normalized_counts
#Remove genes with all zeros
<- normalized_counts[rowSums(normalized_counts) > 0,]
normalized_counts
<- correlation_pvalues(normalized_counts,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
rm(normalized_counts)
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
monocle.data.cor.big
<- correlation_plot(data.cor.big = monocle.data.cor.big,
htmp
genesList,title = "Monocle corr")
<- corr.pval.list$p_values
p_values.from.monocle <- corr.pval.list$data.cor
monocle.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 9966472 532.3 16718127 892.9 16718127 892.9
Vcells 1006589306 7679.7 2358214745 17991.8 2358214745 17991.8
draw(htmp, heatmap_legend_side="right")
ScanPy
library(reticulate)
<- paste0("CoexData/ScanPy/")
dirOutScP if (!dir.exists(dirOutScP)) {
dir.create(dirOutScP)
}
Sys.setenv(RETICULATE_PYTHON = "../../../bin/python3")
<- import("sys")
py
source_python("src/scanpyGenesExpression.py")
scanpyFDR(getRawData(obj),
getMetadataCells(obj),
getMetadataGenes(obj),
"mt",
dirOutScP,
file_code,
int.genes)
<- read.csv(paste0(dirOutScP,
normalized_counts "_Scampy_expression_all_genes.gz"),header = T,row.names = 1)
file_code,
<- t(normalized_counts) normalized_counts
#Remove genes with all zeros
<-normalized_counts[rowSums(normalized_counts) > 0,]
normalized_counts
<- correlation_pvalues(normalized_counts,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
ScanPy.data.cor.big
<- correlation_plot(data.cor.big = ScanPy.data.cor.big,
htmp
genesList,title = "ScanPy corr")
<- corr.pval.list$p_values
p_values.from.ScanPy <- corr.pval.list$data.cor
ScanPy.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 9990389 533.6 16718127 892.9 16718127 892.9
Vcells 1104918338 8429.9 2358214745 17991.8 2358214745 17991.8
draw(htmp, heatmap_legend_side="right")
Sys.time()
[1] "2024-05-08 19:37:36 CEST"
sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3; LAPACK version 3.9.0
locale:
[1] C.UTF-8
time zone: Europe/Rome
tzcode source: system (glibc)
attached base packages:
[1] stats4 parallel grid stats graphics grDevices utils
[8] datasets methods base
other attached packages:
[1] reticulate_1.36.1 monocle3_1.3.4
[3] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.2
[5] GenomicRanges_1.52.0 GenomeInfoDb_1.36.1
[7] IRanges_2.34.1 S4Vectors_0.38.1
[9] MatrixGenerics_1.12.3 matrixStats_1.2.0
[11] Biobase_2.60.0 BiocGenerics_0.46.0
[13] fstcore_0.9.18 fst_0.9.8
[15] stringr_1.5.0 HiClimR_2.2.1
[17] doParallel_1.0.17 iterators_1.0.14
[19] foreach_1.5.2 Rfast_2.1.0
[21] RcppParallel_5.1.7 RcppZiggurat_0.1.6
[23] Rcpp_1.0.11 patchwork_1.2.0
[25] Seurat_5.0.0 SeuratObject_5.0.0
[27] sp_2.1-1 Hmisc_5.1-0
[29] dplyr_1.1.2 circlize_0.4.15
[31] ComplexHeatmap_2.16.0 COTAN_2.5.0
loaded via a namespace (and not attached):
[1] RcppAnnoy_0.0.21 splines_4.3.2
[3] later_1.3.1 bitops_1.0-7
[5] tibble_3.2.1 polyclip_1.10-4
[7] rpart_4.1.23 fastDummies_1.7.3
[9] lifecycle_1.0.3 globals_0.16.2
[11] lattice_0.22-5 MASS_7.3-60
[13] backports_1.4.1 dendextend_1.17.1
[15] magrittr_2.0.3 plotly_4.10.2
[17] rmarkdown_2.24 yaml_2.3.7
[19] httpuv_1.6.11 glmGamPoi_1.12.2
[21] sctransform_0.4.1 spam_2.10-0
[23] askpass_1.2.0 spatstat.sparse_3.0-2
[25] minqa_1.2.5 cowplot_1.1.1
[27] pbapply_1.7-2 RColorBrewer_1.1-3
[29] zlibbioc_1.46.0 abind_1.4-5
[31] Rtsne_0.17 purrr_1.0.1
[33] RCurl_1.98-1.12 nnet_7.3-19
[35] GenomeInfoDbData_1.2.10 ggrepel_0.9.5
[37] irlba_2.3.5.1 listenv_0.9.0
[39] spatstat.utils_3.0-3 terra_1.7-39
[41] umap_0.2.10.0 goftest_1.2-3
[43] RSpectra_0.16-1 spatstat.random_3.2-1
[45] dqrng_0.3.0 fitdistrplus_1.1-11
[47] parallelly_1.37.1 DelayedMatrixStats_1.22.5
[49] ncdf4_1.22 leiden_0.4.3
[51] codetools_0.2-19 DelayedArray_0.26.7
[53] tidyselect_1.2.0 shape_1.4.6
[55] farver_2.1.1 lme4_1.1-34
[57] ScaledMatrix_1.8.1 viridis_0.6.4
[59] base64enc_0.1-3 spatstat.explore_3.2-1
[61] jsonlite_1.8.7 GetoptLong_1.0.5
[63] ellipsis_0.3.2 progressr_0.14.0
[65] Formula_1.2-5 ggridges_0.5.4
[67] survival_3.5-8 tools_4.3.2
[69] ica_1.0-3 glue_1.7.0
[71] gridExtra_2.3 xfun_0.39
[73] ggthemes_5.1.0 withr_3.0.0
[75] fastmap_1.1.1 boot_1.3-28
[77] fansi_1.0.4 openssl_2.1.0
[79] digest_0.6.33 rsvd_1.0.5
[81] parallelDist_0.2.6 R6_2.5.1
[83] mime_0.12 colorspace_2.1-0
[85] Cairo_1.6-1 scattermore_1.2
[87] tensor_1.5 spatstat.data_3.0-1
[89] utf8_1.2.3 tidyr_1.3.0
[91] generics_0.1.3 data.table_1.15.0
[93] httr_1.4.6 htmlwidgets_1.6.2
[95] S4Arrays_1.2.0 uwot_0.1.16
[97] pkgconfig_2.0.3 gtable_0.3.3
[99] lmtest_0.9-40 XVector_0.40.0
[101] htmltools_0.5.8 dotCall64_1.1-0
[103] clue_0.3-64 scales_1.3.0
[105] png_0.1-8 knitr_1.43
[107] rstudioapi_0.15.0 reshape2_1.4.4
[109] rjson_0.2.21 nloptr_2.0.3
[111] checkmate_2.3.0 nlme_3.1-163
[113] zoo_1.8-12 GlobalOptions_0.1.2
[115] KernSmooth_2.23-22 miniUI_0.1.1.1
[117] foreign_0.8-86 pillar_1.9.0
[119] vctrs_0.6.3 RANN_2.6.1
[121] promises_1.2.0.1 BiocSingular_1.16.0
[123] beachmat_2.16.0 xtable_1.8-4
[125] cluster_2.1.6 htmlTable_2.4.1
[127] evaluate_0.21 zeallot_0.1.0
[129] cli_3.6.1 compiler_4.3.2
[131] rlang_1.1.1 crayon_1.5.2
[133] future.apply_1.11.0 labeling_0.4.2
[135] plyr_1.8.8 stringi_1.8.1
[137] viridisLite_0.4.2 deldir_2.0-2
[139] BiocParallel_1.34.2 assertthat_0.2.1
[141] munsell_0.5.0 lazyeval_0.2.2
[143] spatstat.geom_3.2-4 PCAtools_2.14.0
[145] Matrix_1.6-3 RcppHNSW_0.6.0
[147] sparseMatrixStats_1.12.2 future_1.33.0
[149] ggplot2_3.5.0 shiny_1.8.0
[151] ROCR_1.0-11 igraph_2.0.3