library(COTAN)
library(ComplexHeatmap)
library(circlize)
library(dplyr)
library(Hmisc)
library(Seurat)
library(patchwork)
library(Rfast)
library(parallel)
library(doParallel)
library(HiClimR)
library(stringr)
library(fst)
options(parallelly.fork.enable = TRUE)
<- "Data/MouseCortexFromLoom/e17.5_ForebrainDorsal.cotan.RDS"
dataSetFile <- str_split(dataSetFile,pattern = "/",simplify = T)[3]
name <- str_remove(name,pattern = ".RDS")
name
= "E17.5"
project
setLoggingLevel(1)
<- "CoexData/"
outDir setLoggingFile(paste0(outDir, "Logs/",name,".log"))
<- readRDS(dataSetFile)
obj = getMetadataElement(obj, datasetTags()[["cond"]]) file_code
Gene Correlation Analysis E17.5 Mouse Brain
Prologue
source("src/Functions.R")
To compare the ability of COTAN to asses the real correlation between genes we define some pools of genes:
- Constitutive genes
- Neural progenitor genes
- Pan neuronal genes
- Some layer marker genes
<- list(
genesList "NPGs"=
c("Nes", "Vim", "Sox2", "Sox1", "Notch1", "Hes1", "Hes5", "Pax6"),
"PNGs"=
c("Map2", "Tubb3", "Neurod1", "Nefm", "Nefl", "Dcx", "Tbr1"),
"hk"=
c("Calm1", "Cox6b1", "Ppia", "Rpl18", "Cox7c", "Erh", "H3f3a",
"Taf1", "Taf2", "Gapdh", "Actb", "Golph3", "Zfr", "Sub1",
"Tars", "Amacr"),
"layers" =
c("Reln","Lhx5","Cux1","Satb2","Tle1","Mef2c","Rorb","Sox5","Bcl11b","Fezf2","Foxp2","Ntf3","Rasgrf2","Pvrl3", "Cux2","Slc17a6", "Sema3c","Thsd7a", "Sulf2", "Kcnk2","Grik3", "Etv1", "Tle4", "Tmem200a", "Glra2", "Etv1","Htr1f", "Sulf1","Rxfp1", "Syt6")
# From https://www.science.org/doi/10.1126/science.aam8999
)
COTAN
<-getGenes(obj) int.genes
<- getGenesCoex(obj)#[int.genes,int.genes]
coexMat.big
<- getGenesCoex(obj)[c(genesList$NPGs,genesList$hk,genesList$PNGs),c(genesList$NPGs,genesList$hk,genesList$PNGs)]
coexMat
= colorRamp2(seq(-0.5,0.5, length = 3), c("#DC0000B2", "white","#3C5488B2" ))
f1
<- factor(c(rep("NPGs",length(genesList[["NPGs"]])),
split.genes rep("HK",length(genesList[["hk"]])),
rep("PNGs",length(genesList[["PNGs"]]))
),levels = c("NPGs","HK","PNGs"))
= Legend(col_fun = f1, title = "COTAN coex")
lgd
<- Heatmap(as.matrix(coexMat),
htmp #width = ncol(coexMat)*unit(2.5, "mm"),
height = nrow(coexMat)*unit(3, "mm"),
cluster_rows = FALSE,
cluster_columns = FALSE,
col = f1,
row_names_side = "left",
row_names_gp = gpar(fontsize = 11),
column_names_gp = gpar(fontsize = 11),
column_split = split.genes,
row_split = split.genes,
cluster_row_slices = FALSE,
cluster_column_slices = FALSE,
heatmap_legend_param = list(
title = "COTAN coex", at = c(-0.5, 0, 0.5),direction = "horizontal",
labels = c("-0.5", "0", "0.5")
)
)draw(htmp, heatmap_legend_side="right")
<- calculateGDI(obj)
GDI_DF $geneType <- NA
GDI_DFfor (cat in names(genesList)) {
rownames(GDI_DF) %in% genesList[[cat]],]$geneType <- cat
GDI_DF[
}
$GDI_centered <- scale(GDI_DF$GDI,center = T,scale = T)
GDI_DF
write.csv(GDI_DF,paste0("CoexData/","Variance_GDI_genes",file_code,".csv"))
unlist(genesList),] GDI_DF[
sum.raw.norm GDI exp.cells geneType GDI_centered
Nes 5.354860 2.677591 7.6611269 NPGs 2.12164684
Vim 7.975351 3.653910 23.7130118 NPGs 4.21500654
Sox2 5.379522 3.446151 6.6477503 NPGs 3.76954465
Sox1 4.826847 2.321489 3.8913660 NPGs 1.35811585
Notch1 5.491804 3.644973 8.1070126 NPGs 4.19584566
Hes1 5.241888 3.715489 4.4183218 NPGs 4.34704054
Hes5 6.367234 3.706690 6.9720308 NPGs 4.32817408
Pax6 6.445184 3.410841 15.2006486 NPGs 3.69383388
Map2 8.945220 2.919301 87.3530604 PNGs 2.63990577
Tubb3 10.669909 3.802683 93.9602756 PNGs 4.53399718
Neurod1 4.512176 1.570675 3.2833401 PNGs -0.25172970
Nefm 5.320286 2.010487 6.2829347 PNGs 0.69128553
Nefl 5.592561 2.179108 5.2695582 PNGs 1.05283171
Dcx 8.048568 2.850882 60.6404540 PNGs 2.49320624
Tbr1 6.829413 1.953272 23.6724767 PNGs 0.56860885
Calm1 9.893664 1.604866 97.6489664 hk -0.17841926
Cox6b1 9.028070 2.184997 90.8796109 hk 1.06545895
Ppia 9.189622 2.372214 90.7580057 hk 1.46687705
Rpl18 9.554798 2.279009 96.3518443 hk 1.26703311
Cox7c 9.114185 2.170535 91.9740576 hk 1.03444998
Erh 7.929830 2.390307 62.5050669 hk 1.50567107
H3f3a 9.697620 2.349951 96.3518443 hk 1.41914319
Taf1 6.206264 1.475777 16.9436563 hk -0.45520422
Taf2 5.648276 1.342082 10.2959060 hk -0.74186428
Gapdh 8.621629 2.367727 80.5026348 hk 1.45725765
Actb 10.484376 1.292044 99.6351844 hk -0.84915323
Golph3 5.947435 1.643738 13.8629915 hk -0.09507358
Zfr 7.359818 1.675085 43.6157276 hk -0.02786120
Sub1 8.434336 2.419785 75.3952169 hk 1.56887719
Tars 5.930970 1.571117 13.5387110 hk -0.25078126
Amacr 4.339515 1.454028 2.9995946 hk -0.50183660
Reln 6.723819 2.232753 4.7020673 layers 1.16785364
Lhx5 4.079754 2.579243 1.3376571 layers 1.91077584
Cux1 7.968038 2.190834 50.0202675 layers 1.07797409
Satb2 9.062287 3.390857 57.7219295 layers 3.65098593
Tle1 6.594351 1.618541 21.7673287 layers -0.14909808
Mef2c 9.071730 3.175241 51.6011350 layers 3.18867667
Rorb 6.282362 1.926899 13.1333604 layers 0.51206224
Sox5 8.049776 2.285548 43.4130523 layers 1.28105506
Bcl11b 7.109492 2.116159 21.4835833 layers 0.91786051
Fezf2 6.506386 2.096344 18.1597081 layers 0.87537638
Foxp2 6.557695 2.455294 14.4304824 layers 1.64501202
Ntf3 6.051423 2.225961 6.9314957 layers 1.15329231
Rasgrf2 4.096064 1.795794 2.0267531 layers 0.23095491
NA NA NA NA <NA> NA
Cux2 7.067401 2.518402 26.4288610 layers 1.78032434
Slc17a6 6.228895 2.065176 14.4304824 layers 0.80854728
Sema3c 7.553723 2.676192 31.6984191 layers 2.11864837
Thsd7a 5.951856 1.810789 9.6068099 layers 0.26310666
Sulf2 4.348066 1.725374 2.3915687 layers 0.07996546
Kcnk2 7.512577 1.895654 41.7511147 layers 0.44506877
Grik3 5.173332 1.974505 4.4993920 layers 0.61413535
Etv1 5.383398 2.639083 5.8775841 layers 2.03908090
Tle4 5.664300 1.881239 8.4718281 layers 0.41416114
Tmem200a 5.235004 2.081666 6.3234698 layers 0.84390268
Glra2 6.076633 2.094562 11.2687475 layers 0.87155349
Etv1.1 5.383398 2.639083 5.8775841 layers 2.03908090
Htr1f 3.780477 1.777503 1.2160519 layers 0.19173786
Sulf1 2.631257 1.128008 0.3648156 layers -1.20086709
NA.1 NA NA NA <NA> NA
Syt6 4.757520 1.609693 2.6347791 layers -0.16807098
GDIPlot(obj,GDIIn = GDI_DF, genes = genesList,GDIThreshold = 1.4)
Seurat correlation
<- CreateSeuratObject(counts = getRawData(obj),
sratproject = project,
min.cells = 3,
min.features = 200)
"percent.mt"]] <- PercentageFeatureSet(srat, pattern = "^mt-")
srat[[<- NormalizeData(srat)
srat <- FindVariableFeatures(srat, selection.method = "vst", nfeatures = 2000)
srat
# plot variable features with and without labels
<- VariableFeaturePlot(srat)
plot1
$data$centered_variance <- scale(plot1$data$variance.standardized,
plot1center = T,scale = F)
write.csv(plot1$data,paste0("CoexData/",
"Variance_Seurat_genes",
getMetadataElement(obj,
datasetTags()[["cond"]]),".csv"))
LabelPoints(plot = plot1, points = c(genesList$NPGs,genesList$PNGs,genesList$layers), repel = TRUE)
LabelPoints(plot = plot1, points = c(genesList$hk), repel = TRUE)
<- rownames(srat)
all.genes <- ScaleData(srat, features = all.genes)
srat = GetAssayData(srat[["RNA"]],layer = "data") seurat.data
<- correlation_pvalues(data = seurat.data,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
seurat.data.cor.big
<- correlation_plot(seurat.data.cor.big,
htmp title="Seurat corr")
genesList,
<- corr.pval.list$p_values
p_values.fromSeurat <- corr.pval.list$data.cor
seurat.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 7121123 380.4 11454522 611.8 11454522 611.8
Vcells 766000052 5844.2 1271129210 9698.0 1191235271 9088.5
draw(htmp, heatmap_legend_side="right")
Seurat SC Transform
<- SCTransform(srat,
srat method = "glmGamPoi",
vars.to.regress = "percent.mt",
verbose = FALSE)
<- GetAssayData(srat[["SCT"]],layer = "data")
seurat.data
#Remove genes with all zeros
<-seurat.data[rowSums(seurat.data) > 0,]
seurat.data
<- correlation_pvalues(seurat.data,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
seurat.data.cor.big
<- correlation_plot(seurat.data.cor.big,
htmp title="Seurat corr SCT")
genesList,
<- corr.pval.list$p_values
p_values.fromSeurat <- corr.pval.list$data.cor
seurat.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 8850105 472.7 16721112 893.1 13569547 724.7
Vcells 721897666 5507.7 1757477180 13408.5 1525429261 11638.2
draw(htmp, heatmap_legend_side="right")
Monocle
library(monocle3)
<- new_cell_data_set(getRawData(obj),
cds cell_metadata = getMetadataCells(obj),
gene_metadata = getMetadataGenes(obj)
)<- preprocess_cds(cds, num_dim = 100)
cds
<- normalized_counts(cds) normalized_counts
#Remove genes with all zeros
<- normalized_counts[rowSums(normalized_counts) > 0,]
normalized_counts
<- correlation_pvalues(normalized_counts,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
rm(normalized_counts)
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
monocle.data.cor.big
<- correlation_plot(data.cor.big = monocle.data.cor.big,
htmp
genesList,title = "Monocle corr")
<- corr.pval.list$p_values
p_values.from.monocle <- corr.pval.list$data.cor
monocle.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 9961818 532.1 16721112 893.1 16721112 893.1
Vcells 795023729 6065.6 1757477180 13408.5 1757464389 13408.4
draw(htmp, heatmap_legend_side="right")
ScanPy
library(reticulate)
<- paste0("CoexData/ScanPy/")
dirOutScP if (!dir.exists(dirOutScP)) {
dir.create(dirOutScP)
}
Sys.setenv(RETICULATE_PYTHON = "../../../bin/python3")
<- import("sys")
py
source_python("src/scanpyGenesExpression.py")
scanpyFDR(getRawData(obj),
getMetadataCells(obj),
getMetadataGenes(obj),
"mt",
dirOutScP,
file_code,
int.genes)
<- read.csv(paste0(dirOutScP,
normalized_counts "_Scampy_expression_all_genes.gz"),header = T,row.names = 1)
file_code,
<- t(normalized_counts) normalized_counts
#Remove genes with all zeros
<-normalized_counts[rowSums(normalized_counts) > 0,]
normalized_counts
<- correlation_pvalues(normalized_counts,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
ScanPy.data.cor.big
<- correlation_plot(data.cor.big = ScanPy.data.cor.big,
htmp
genesList,title = "ScanPy corr")
<- corr.pval.list$p_values
p_values.from.ScanPy <- corr.pval.list$data.cor
ScanPy.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 9985106 533.3 16721112 893.1 16721112 893.1
Vcells 829560962 6329.1 1757477180 13408.5 1757476100 13408.5
draw(htmp, heatmap_legend_side="right")
Sys.time()
[1] "2024-05-06 13:29:24 CEST"
sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3; LAPACK version 3.9.0
locale:
[1] C.UTF-8
time zone: Europe/Rome
tzcode source: system (glibc)
attached base packages:
[1] stats4 parallel grid stats graphics grDevices utils
[8] datasets methods base
other attached packages:
[1] reticulate_1.36.1 monocle3_1.3.4
[3] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.2
[5] GenomicRanges_1.52.0 GenomeInfoDb_1.36.1
[7] IRanges_2.34.1 S4Vectors_0.38.1
[9] MatrixGenerics_1.12.3 matrixStats_1.2.0
[11] Biobase_2.60.0 BiocGenerics_0.46.0
[13] fstcore_0.9.18 fst_0.9.8
[15] stringr_1.5.0 HiClimR_2.2.1
[17] doParallel_1.0.17 iterators_1.0.14
[19] foreach_1.5.2 Rfast_2.1.0
[21] RcppParallel_5.1.7 RcppZiggurat_0.1.6
[23] Rcpp_1.0.11 patchwork_1.2.0
[25] Seurat_5.0.0 SeuratObject_5.0.0
[27] sp_2.1-1 Hmisc_5.1-0
[29] dplyr_1.1.2 circlize_0.4.15
[31] ComplexHeatmap_2.16.0 COTAN_2.5.0
loaded via a namespace (and not attached):
[1] RcppAnnoy_0.0.21 splines_4.3.2
[3] later_1.3.1 bitops_1.0-7
[5] tibble_3.2.1 polyclip_1.10-4
[7] rpart_4.1.23 fastDummies_1.7.3
[9] lifecycle_1.0.3 globals_0.16.2
[11] lattice_0.22-5 MASS_7.3-60
[13] backports_1.4.1 dendextend_1.17.1
[15] magrittr_2.0.3 plotly_4.10.2
[17] rmarkdown_2.24 yaml_2.3.7
[19] httpuv_1.6.11 glmGamPoi_1.12.2
[21] sctransform_0.4.1 spam_2.10-0
[23] askpass_1.2.0 spatstat.sparse_3.0-2
[25] minqa_1.2.5 cowplot_1.1.1
[27] pbapply_1.7-2 RColorBrewer_1.1-3
[29] zlibbioc_1.46.0 abind_1.4-5
[31] Rtsne_0.17 purrr_1.0.1
[33] RCurl_1.98-1.12 nnet_7.3-19
[35] GenomeInfoDbData_1.2.10 ggrepel_0.9.5
[37] irlba_2.3.5.1 listenv_0.9.0
[39] spatstat.utils_3.0-3 terra_1.7-39
[41] umap_0.2.10.0 goftest_1.2-3
[43] RSpectra_0.16-1 spatstat.random_3.2-1
[45] dqrng_0.3.0 fitdistrplus_1.1-11
[47] parallelly_1.37.1 DelayedMatrixStats_1.22.5
[49] ncdf4_1.22 leiden_0.4.3
[51] codetools_0.2-19 DelayedArray_0.26.7
[53] tidyselect_1.2.0 shape_1.4.6
[55] farver_2.1.1 lme4_1.1-34
[57] ScaledMatrix_1.8.1 viridis_0.6.4
[59] base64enc_0.1-3 spatstat.explore_3.2-1
[61] jsonlite_1.8.7 GetoptLong_1.0.5
[63] ellipsis_0.3.2 progressr_0.14.0
[65] Formula_1.2-5 ggridges_0.5.4
[67] survival_3.5-8 tools_4.3.2
[69] ica_1.0-3 glue_1.7.0
[71] gridExtra_2.3 xfun_0.39
[73] ggthemes_5.1.0 withr_3.0.0
[75] fastmap_1.1.1 boot_1.3-28
[77] fansi_1.0.4 openssl_2.1.0
[79] digest_0.6.33 rsvd_1.0.5
[81] parallelDist_0.2.6 R6_2.5.1
[83] mime_0.12 colorspace_2.1-0
[85] Cairo_1.6-1 scattermore_1.2
[87] tensor_1.5 spatstat.data_3.0-1
[89] utf8_1.2.3 tidyr_1.3.0
[91] generics_0.1.3 data.table_1.15.0
[93] httr_1.4.6 htmlwidgets_1.6.2
[95] S4Arrays_1.2.0 uwot_0.1.16
[97] pkgconfig_2.0.3 gtable_0.3.3
[99] lmtest_0.9-40 XVector_0.40.0
[101] htmltools_0.5.8 dotCall64_1.1-0
[103] clue_0.3-64 scales_1.3.0
[105] png_0.1-8 knitr_1.43
[107] rstudioapi_0.15.0 reshape2_1.4.4
[109] rjson_0.2.21 nloptr_2.0.3
[111] checkmate_2.3.0 nlme_3.1-163
[113] zoo_1.8-12 GlobalOptions_0.1.2
[115] KernSmooth_2.23-22 miniUI_0.1.1.1
[117] foreign_0.8-86 pillar_1.9.0
[119] vctrs_0.6.3 RANN_2.6.1
[121] promises_1.2.0.1 BiocSingular_1.16.0
[123] beachmat_2.16.0 xtable_1.8-4
[125] cluster_2.1.6 htmlTable_2.4.1
[127] evaluate_0.21 zeallot_0.1.0
[129] cli_3.6.1 compiler_4.3.2
[131] rlang_1.1.1 crayon_1.5.2
[133] future.apply_1.11.0 labeling_0.4.2
[135] plyr_1.8.8 stringi_1.8.1
[137] viridisLite_0.4.2 deldir_2.0-2
[139] BiocParallel_1.34.2 assertthat_0.2.1
[141] munsell_0.5.0 lazyeval_0.2.2
[143] spatstat.geom_3.2-4 PCAtools_2.14.0
[145] Matrix_1.6-3 RcppHNSW_0.6.0
[147] sparseMatrixStats_1.12.2 future_1.33.0
[149] ggplot2_3.5.0 shiny_1.8.0
[151] ROCR_1.0-11 igraph_2.0.3