library(COTAN)
library(ComplexHeatmap)
library(circlize)
library(dplyr)
library(Hmisc)
library(Seurat)
library(patchwork)
library(Rfast)
library(parallel)
library(doParallel)
library(HiClimR)
library(stringr)
library(fst)
options(parallelly.fork.enable = TRUE)
<- "Data/MouseCortexFromLoom/e13.5_ForebrainDorsal.cotan.RDS"
dataSetFile
<- str_split(dataSetFile,pattern = "/",simplify = T)[3]
name <- str_remove(name,pattern = ".RDS")
name
= "E13.5"
project
setLoggingLevel(1)
<- "CoexData/"
outDir setLoggingFile(paste0(outDir, "Logs/",name,".log"))
<- readRDS(dataSetFile)
obj = getMetadataElement(obj, datasetTags()[["cond"]]) file_code
Gene Correlation Analysis E13.5 Mouse Brain
Prologue
source("src/Functions.R")
To compare the ability of COTAN to asses the real correlation between genes we define some pools of genes:
- Constitutive genes
- Neural progenitor genes
- Pan neuronal genes
- Some layer marker genes
<- list(
genesList "NPGs"=
c("Nes", "Vim", "Sox2", "Sox1", "Notch1", "Hes1", "Hes5", "Pax6"),
"PNGs"=
c("Map2", "Tubb3", "Neurod1", "Nefm", "Nefl", "Dcx", "Tbr1"),
"hk"=
c("Calm1", "Cox6b1", "Ppia", "Rpl18", "Cox7c", "Erh", "H3f3a",
"Taf1", "Taf2", "Gapdh", "Actb", "Golph3", "Zfr", "Sub1",
"Tars", "Amacr"),
"layers" =
c("Reln","Lhx5","Cux1","Satb2","Tle1","Mef2c","Rorb","Sox5","Bcl11b","Fezf2","Foxp2","Ntf3","Rasgrf2","Pvrl3", "Cux2","Slc17a6", "Sema3c","Thsd7a", "Sulf2", "Kcnk2","Grik3", "Etv1", "Tle4", "Tmem200a", "Glra2", "Etv1","Htr1f", "Sulf1","Rxfp1", "Syt6")
# From https://www.science.org/doi/10.1126/science.aam8999
)
COTAN
<-getGenes(obj) int.genes
<- getGenesCoex(obj)#[int.genes,int.genes]
coexMat.big
<- getGenesCoex(obj)[c(genesList$NPGs,genesList$hk,genesList$PNGs),c(genesList$NPGs,genesList$hk,genesList$PNGs)]
coexMat
= colorRamp2(seq(-0.5,0.5, length = 3), c("#DC0000B2", "white","#3C5488B2" ))
f1
<- factor(c(rep("NPGs",length(genesList[["NPGs"]])),
split.genes rep("HK",length(genesList[["hk"]])),
rep("PNGs",length(genesList[["PNGs"]]))
),levels = c("NPGs","HK","PNGs"))
= Legend(col_fun = f1, title = "COTAN coex")
lgd
<- Heatmap(as.matrix(coexMat),
htmp #width = ncol(coexMat)*unit(2.5, "mm"),
height = nrow(coexMat)*unit(3, "mm"),
cluster_rows = FALSE,
cluster_columns = FALSE,
col = f1,
row_names_side = "left",
row_names_gp = gpar(fontsize = 11),
column_names_gp = gpar(fontsize = 11),
column_split = split.genes,
row_split = split.genes,
cluster_row_slices = FALSE,
cluster_column_slices = FALSE,
heatmap_legend_param = list(
title = "COTAN coex", at = c(-0.5, 0, 0.5),direction = "horizontal",
labels = c("-0.5", "0", "0.5")
)
)draw(htmp, heatmap_legend_side="right")
<- calculateGDI(obj)
GDI_DF $geneType <- NA
GDI_DFfor (cat in names(genesList)) {
rownames(GDI_DF) %in% genesList[[cat]],]$geneType <- cat
GDI_DF[
}
$GDI_centered <- scale(GDI_DF$GDI,center = T,scale = T)
GDI_DF
write.csv(GDI_DF,paste0("CoexData/","Variance_GDI_genes",file_code,".csv"))
unlist(genesList),] GDI_DF[
sum.raw.norm GDI exp.cells geneType GDI_centered
Nes 7.581052 4.192489 25.4165830 NPGs 3.04415531
Vim 9.971694 3.984104 76.6512748 NPGs 2.75607580
Sox2 7.082317 3.999433 18.5304156 NPGs 2.77726729
Sox1 5.715170 2.983572 5.6615138 NPGs 1.37290505
Notch1 7.227078 3.984877 20.6384260 NPGs 2.75714514
Hes1 8.449033 4.822367 31.8409958 NPGs 3.91492097
Hes5 9.585092 4.784038 42.6018872 NPGs 3.86193346
Pax6 8.481894 4.385577 44.5492873 NPGs 3.31108627
Map2 9.355155 3.931909 70.5681590 PNGs 2.68391910
Tubb3 10.986131 4.374838 76.9524192 PNGs 3.29624084
Neurod1 5.123468 2.046275 2.8508332 PNGs 0.07715236
Nefm 5.862676 3.085697 5.9024292 PNGs 1.51408601
Nefl 4.741106 2.586671 1.8871713 PNGs 0.82421512
Dcx 8.357463 4.539183 39.6707488 PNGs 3.52343659
Tbr1 7.759295 4.237369 26.0991769 PNGs 3.10619909
Calm1 10.447080 1.385779 97.8919896 hk -0.83594052
Cox6b1 9.981533 1.441202 96.6673359 hk -0.75932183
Ppia 10.196964 2.609030 86.8901827 hk 0.85512419
Rpl18 10.790428 1.335148 99.6386268 hk -0.90593539
Cox7c 10.031070 1.518121 97.1893194 hk -0.65298677
Erh 9.280735 2.711007 67.8377836 hk 0.99610081
H3f3a 10.665312 1.380775 99.6587031 hk -0.84285899
Taf1 6.937213 2.168233 17.1049990 hk 0.24575162
Taf2 6.537627 1.310694 12.1863080 hk -0.93974092
Gapdh 9.979987 2.531673 92.7323831 hk 0.74818409
Actb 11.323008 1.382499 99.4980928 hk -0.84047559
Golph3 6.990454 1.648246 19.0724754 hk -0.47309720
Zfr 8.192974 2.212476 47.4001205 hk 0.30691375
Sub1 9.735618 2.343091 92.1903232 hk 0.48748156
Tars 6.946500 1.753805 17.9682795 hk -0.32716939
Amacr 5.143626 1.632567 3.4932744 hk -0.49477252
Reln 7.550123 3.069353 8.3718129 layers 1.49149142
Lhx5 4.850935 2.291750 1.5057217 layers 0.41650600
Cux1 8.229655 3.634065 43.1840996 layers 2.27216946
Satb2 7.683171 3.401215 21.5418591 layers 1.95026951
Tle1 7.461375 2.108275 25.6976511 layers 0.16286267
Mef2c 7.773242 3.876656 19.5543064 layers 2.60753655
Rorb 6.274757 1.635946 7.5486850 layers -0.49010169
Sox5 9.709193 3.707954 64.7460349 layers 2.37431668
Bcl11b 8.848123 4.654283 43.7060831 layers 3.68255551
Fezf2 8.690887 2.925436 54.6878137 layers 1.29253570
Foxp2 7.803267 2.713967 28.2674162 layers 1.00019356
Ntf3 5.006960 2.463725 2.2485445 layers 0.65424957
NA NA NA NA <NA> NA
NA.1 NA NA NA <NA> NA
Cux2 6.681962 2.670967 9.2551696 layers 0.94074878
Slc17a6 7.136129 3.316106 14.1337081 layers 1.83261146
Sema3c 6.576389 2.740901 8.7733387 layers 1.03742781
Thsd7a 4.705115 1.784039 1.3451114 layers -0.28537229
Sulf2 5.293881 2.461288 3.2523590 layers 0.65088105
Kcnk2 7.401683 1.957314 21.6422405 layers -0.04583116
Grik3 5.864890 3.353295 5.0793013 layers 1.88402330
Etv1 5.600431 2.460104 4.1758683 layers 0.64924346
Tle4 6.694944 1.811217 12.5276049 layers -0.24780084
Tmem200a 4.701951 2.381286 1.8068661 layers 0.54028368
Glra2 6.044741 3.654254 5.6414375 layers 2.30007960
Etv1.1 5.600431 2.460104 4.1758683 layers 0.64924346
NA.2 NA NA NA <NA> NA
Sulf1 4.529211 2.076179 0.9235093 layers 0.11849227
NA.3 NA NA NA <NA> NA
Syt6 5.144411 2.559922 2.6902228 layers 0.78723640
GDIPlot(obj,GDIIn = GDI_DF, genes = genesList,GDIThreshold = 1.4)
Seurat correlation
<- CreateSeuratObject(counts = getRawData(obj),
sratproject = project,
min.cells = 3,
min.features = 200)
"percent.mt"]] <- PercentageFeatureSet(srat, pattern = "^mt-")
srat[[<- NormalizeData(srat)
srat <- FindVariableFeatures(srat, selection.method = "vst", nfeatures = 2000)
srat
# plot variable features with and without labels
<- VariableFeaturePlot(srat)
plot1
$data$centered_variance <- scale(plot1$data$variance.standardized,
plot1center = T,scale = F)
write.csv(plot1$data,paste0("CoexData/",
"Variance_Seurat_genes",
getMetadataElement(obj,
datasetTags()[["cond"]]),".csv"))
LabelPoints(plot = plot1, points = c(genesList$NPGs,genesList$PNGs,genesList$layers), repel = TRUE)
LabelPoints(plot = plot1, points = c(genesList$hk), repel = TRUE)
<- rownames(srat)
all.genes <- ScaleData(srat, features = all.genes)
srat = GetAssayData(srat[["RNA"]],layer = "data") seurat.data
<- correlation_pvalues(data = seurat.data,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
seurat.data.cor.big
<- correlation_plot(seurat.data.cor.big,
htmp title="Seurat corr")
genesList,
<- corr.pval.list$p_values
p_values.fromSeurat <- corr.pval.list$data.cor
seurat.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 7130858 380.9 11454895 611.8 11454895 611.8
Vcells 840516207 6412.7 1342169060 10240.0 1342169060 10240.0
draw(htmp, heatmap_legend_side="right")
Seurat SC Transform
<- SCTransform(srat,
srat method = "glmGamPoi",
vars.to.regress = "percent.mt",
verbose = FALSE)
<- GetAssayData(srat[["SCT"]],layer = "data")
seurat.data
#Remove genes with all zeros
<-seurat.data[rowSums(seurat.data) > 0,]
seurat.data
<- correlation_pvalues(seurat.data,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
seurat.data.cor.big
<- correlation_plot(seurat.data.cor.big,
htmp title="Seurat corr SCT")
genesList,
<- corr.pval.list$p_values
p_values.fromSeurat <- corr.pval.list$data.cor
seurat.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 8859943 473.2 16748832 894.5 12021442 642.1
Vcells 861708025 6574.4 1932899446 14746.9 1610675510 12288.5
draw(htmp, heatmap_legend_side="right")
Monocle
library(monocle3)
<- new_cell_data_set(getRawData(obj),
cds cell_metadata = getMetadataCells(obj),
gene_metadata = getMetadataGenes(obj)
)<- preprocess_cds(cds, num_dim = 100)
cds
<- normalized_counts(cds) normalized_counts
#Remove genes with all zeros
<- normalized_counts[rowSums(normalized_counts) > 0,]
normalized_counts
<- correlation_pvalues(normalized_counts,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
rm(normalized_counts)
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
monocle.data.cor.big
<- correlation_plot(data.cor.big = monocle.data.cor.big,
htmp
genesList,title = "Monocle corr")
<- corr.pval.list$p_values
p_values.from.monocle <- corr.pval.list$data.cor
monocle.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 9971655 532.6 16748832 894.5 16748832 894.5
Vcells 896560359 6840.3 2062053399 15732.3 2062053399 15732.3
draw(htmp, heatmap_legend_side="right")
ScanPy
library(reticulate)
<- paste0("CoexData/ScanPy/")
dirOutScP if (!dir.exists(dirOutScP)) {
dir.create(dirOutScP)
}
Sys.setenv(RETICULATE_PYTHON = "../../../bin/python3")
<- import("sys")
py
source_python("src/scanpyGenesExpression.py")
scanpyFDR(getRawData(obj),
getMetadataCells(obj),
getMetadataGenes(obj),
"mt",
dirOutScP,
file_code,
int.genes)
<- read.csv(paste0(dirOutScP,
normalized_counts "_Scampy_expression_all_genes.gz"),header = T,row.names = 1)
file_code,
<- t(normalized_counts) normalized_counts
#Remove genes with all zeros
<-normalized_counts[rowSums(normalized_counts) > 0,]
normalized_counts
<- correlation_pvalues(normalized_counts,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
ScanPy.data.cor.big
<- correlation_plot(data.cor.big = ScanPy.data.cor.big,
htmp
genesList,title = "ScanPy corr")
<- corr.pval.list$p_values
p_values.from.ScanPy <- corr.pval.list$data.cor
ScanPy.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 9994943 533.8 16748832 894.5 16748832 894.5
Vcells 966228182 7371.8 2062053399 15732.3 2062053399 15732.3
draw(htmp, heatmap_legend_side="right")
Sys.time()
[1] "2024-05-06 11:52:16 CEST"
sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3; LAPACK version 3.9.0
locale:
[1] C.UTF-8
time zone: Europe/Rome
tzcode source: system (glibc)
attached base packages:
[1] stats4 parallel grid stats graphics grDevices utils
[8] datasets methods base
other attached packages:
[1] reticulate_1.36.1 monocle3_1.3.4
[3] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.2
[5] GenomicRanges_1.52.0 GenomeInfoDb_1.36.1
[7] IRanges_2.34.1 S4Vectors_0.38.1
[9] MatrixGenerics_1.12.3 matrixStats_1.2.0
[11] Biobase_2.60.0 BiocGenerics_0.46.0
[13] fstcore_0.9.18 fst_0.9.8
[15] stringr_1.5.0 HiClimR_2.2.1
[17] doParallel_1.0.17 iterators_1.0.14
[19] foreach_1.5.2 Rfast_2.1.0
[21] RcppParallel_5.1.7 RcppZiggurat_0.1.6
[23] Rcpp_1.0.11 patchwork_1.2.0
[25] Seurat_5.0.0 SeuratObject_5.0.0
[27] sp_2.1-1 Hmisc_5.1-0
[29] dplyr_1.1.2 circlize_0.4.15
[31] ComplexHeatmap_2.16.0 COTAN_2.5.0
loaded via a namespace (and not attached):
[1] RcppAnnoy_0.0.21 splines_4.3.2
[3] later_1.3.1 bitops_1.0-7
[5] tibble_3.2.1 polyclip_1.10-4
[7] rpart_4.1.23 fastDummies_1.7.3
[9] lifecycle_1.0.3 globals_0.16.2
[11] lattice_0.22-5 MASS_7.3-60
[13] backports_1.4.1 dendextend_1.17.1
[15] magrittr_2.0.3 plotly_4.10.2
[17] rmarkdown_2.24 yaml_2.3.7
[19] httpuv_1.6.11 glmGamPoi_1.12.2
[21] sctransform_0.4.1 spam_2.10-0
[23] askpass_1.2.0 spatstat.sparse_3.0-2
[25] minqa_1.2.5 cowplot_1.1.1
[27] pbapply_1.7-2 RColorBrewer_1.1-3
[29] zlibbioc_1.46.0 abind_1.4-5
[31] Rtsne_0.17 purrr_1.0.1
[33] RCurl_1.98-1.12 nnet_7.3-19
[35] GenomeInfoDbData_1.2.10 ggrepel_0.9.5
[37] irlba_2.3.5.1 listenv_0.9.0
[39] spatstat.utils_3.0-3 terra_1.7-39
[41] umap_0.2.10.0 goftest_1.2-3
[43] RSpectra_0.16-1 spatstat.random_3.2-1
[45] dqrng_0.3.0 fitdistrplus_1.1-11
[47] parallelly_1.37.1 DelayedMatrixStats_1.22.5
[49] ncdf4_1.22 leiden_0.4.3
[51] codetools_0.2-19 DelayedArray_0.26.7
[53] tidyselect_1.2.0 shape_1.4.6
[55] farver_2.1.1 lme4_1.1-34
[57] ScaledMatrix_1.8.1 viridis_0.6.4
[59] base64enc_0.1-3 spatstat.explore_3.2-1
[61] jsonlite_1.8.7 GetoptLong_1.0.5
[63] ellipsis_0.3.2 progressr_0.14.0
[65] Formula_1.2-5 ggridges_0.5.4
[67] survival_3.5-8 tools_4.3.2
[69] ica_1.0-3 glue_1.7.0
[71] gridExtra_2.3 xfun_0.39
[73] ggthemes_5.1.0 withr_3.0.0
[75] fastmap_1.1.1 boot_1.3-28
[77] fansi_1.0.4 openssl_2.1.0
[79] digest_0.6.33 rsvd_1.0.5
[81] parallelDist_0.2.6 R6_2.5.1
[83] mime_0.12 colorspace_2.1-0
[85] Cairo_1.6-1 scattermore_1.2
[87] tensor_1.5 spatstat.data_3.0-1
[89] utf8_1.2.3 tidyr_1.3.0
[91] generics_0.1.3 data.table_1.15.0
[93] httr_1.4.6 htmlwidgets_1.6.2
[95] S4Arrays_1.2.0 uwot_0.1.16
[97] pkgconfig_2.0.3 gtable_0.3.3
[99] lmtest_0.9-40 XVector_0.40.0
[101] htmltools_0.5.8 dotCall64_1.1-0
[103] clue_0.3-64 scales_1.3.0
[105] png_0.1-8 knitr_1.43
[107] rstudioapi_0.15.0 reshape2_1.4.4
[109] rjson_0.2.21 nloptr_2.0.3
[111] checkmate_2.3.0 nlme_3.1-163
[113] zoo_1.8-12 GlobalOptions_0.1.2
[115] KernSmooth_2.23-22 miniUI_0.1.1.1
[117] foreign_0.8-86 pillar_1.9.0
[119] vctrs_0.6.3 RANN_2.6.1
[121] promises_1.2.0.1 BiocSingular_1.16.0
[123] beachmat_2.16.0 xtable_1.8-4
[125] cluster_2.1.6 htmlTable_2.4.1
[127] evaluate_0.21 zeallot_0.1.0
[129] cli_3.6.1 compiler_4.3.2
[131] rlang_1.1.1 crayon_1.5.2
[133] future.apply_1.11.0 labeling_0.4.2
[135] plyr_1.8.8 stringi_1.8.1
[137] viridisLite_0.4.2 deldir_2.0-2
[139] BiocParallel_1.34.2 assertthat_0.2.1
[141] munsell_0.5.0 lazyeval_0.2.2
[143] spatstat.geom_3.2-4 PCAtools_2.14.0
[145] Matrix_1.6-3 RcppHNSW_0.6.0
[147] sparseMatrixStats_1.12.2 future_1.33.0
[149] ggplot2_3.5.0 shiny_1.8.0
[151] ROCR_1.0-11 igraph_2.0.3