library(COTAN)
library(ComplexHeatmap)
library(circlize)
library(dplyr)
library(Hmisc)
library(Seurat)
library(patchwork)
library(Rfast)
library(parallel)
library(doParallel)
library(HiClimR)
library(stringr)
library(fst)
options(parallelly.fork.enable = TRUE)
<- ("Data/Yuzwa_MouseCortex/CorticalCells_GSM2861514_E175.cotan.RDS")
dataSetFile
<- str_split(dataSetFile,pattern = "/",simplify = T)[3]
name <- str_remove(name,pattern = ".RDS")
name
= "E17.5"
project
setLoggingLevel(1)
<- "CoexData/"
outDir setLoggingFile(paste0(outDir, "Logs/",name,".log"))
<- readRDS(dataSetFile)
obj = getMetadataElement(obj, datasetTags()[["cond"]]) file_code
Gene Correlation Analysis E17.5
Prologue
source("src/Functions.R")
To compare the ability of COTAN to asses the real correlation between genes we define some pools of genes:
- Constitutive genes
- Neural progenitor genes
- Pan neuronal genes
- Some layer marker genes
<- list(
genesList "NPGs"=
c("Nes", "Vim", "Sox2", "Sox1", "Notch1", "Hes1", "Hes5", "Pax6"),
"PNGs"=
c("Map2", "Tubb3", "Neurod1", "Nefm", "Nefl", "Dcx", "Tbr1"),
"hk"=
c("Calm1", "Cox6b1", "Ppia", "Rpl18", "Cox7c", "Erh", "H3f3a",
"Taf1", "Taf2", "Gapdh", "Actb", "Golph3", "Zfr", "Sub1",
"Tars", "Amacr"),
"layers" =
c("Reln","Lhx5","Cux1","Satb2","Tle1","Mef2c","Rorb","Sox5","Bcl11b","Fezf2","Foxp2","Ntf3","Rasgrf2","Pvrl3", "Cux2","Slc17a6", "Sema3c","Thsd7a", "Sulf2", "Kcnk2","Grik3", "Etv1", "Tle4", "Tmem200a", "Glra2", "Etv1","Htr1f", "Sulf1","Rxfp1", "Syt6")
# From https://www.science.org/doi/10.1126/science.aam8999
)
COTAN
<-getGenes(obj) int.genes
<- getGenesCoex(obj)#[int.genes,int.genes]
coexMat.big
<- getGenesCoex(obj)[c(genesList$NPGs,genesList$hk,genesList$PNGs),c(genesList$NPGs,genesList$hk,genesList$PNGs)]
coexMat
= colorRamp2(seq(-0.5,0.5, length = 3), c("#DC0000B2", "white","#3C5488B2" ))
f1
<- factor(c(rep("NPGs",length(genesList[["NPGs"]])),
split.genes rep("HK",length(genesList[["hk"]])),
rep("PNGs",length(genesList[["PNGs"]]))
),levels = c("NPGs","HK","PNGs"))
= Legend(col_fun = f1, title = "COTAN coex")
lgd
<- Heatmap(as.matrix(coexMat),
htmp #width = ncol(coexMat)*unit(2.5, "mm"),
height = nrow(coexMat)*unit(3, "mm"),
cluster_rows = FALSE,
cluster_columns = FALSE,
col = f1,
row_names_side = "left",
row_names_gp = gpar(fontsize = 11),
column_names_gp = gpar(fontsize = 11),
column_split = split.genes,
row_split = split.genes,
cluster_row_slices = FALSE,
cluster_column_slices = FALSE,
heatmap_legend_param = list(
title = "COTAN coex", at = c(-0.5, 0, 0.5),direction = "horizontal",
labels = c("-0.5", "0", "0.5")
)
)draw(htmp, heatmap_legend_side="right")
<- calculateGDI(obj)
GDI_DF $geneType <- NA
GDI_DFfor (cat in names(genesList)) {
rownames(GDI_DF) %in% genesList[[cat]],]$geneType <- cat
GDI_DF[
}
$GDI_centered <- scale(GDI_DF$GDI,center = T,scale = T)
GDI_DF
write.csv(GDI_DF,paste0("CoexData/","Variance_GDI_genes",file_code,".csv"))
unlist(genesList),] GDI_DF[
sum.raw.norm GDI exp.cells geneType GDI_centered
Nes 5.220644 1.9759151 13.9588101 NPGs 3.88073248
Vim 5.707954 2.4316257 13.3867277 NPGs 6.38025751
Sox2 5.049686 2.4580921 9.2677346 NPGs 6.52542286
Sox1 2.596456 1.5372902 1.7162471 NPGs 1.47492018
Notch1 2.548442 1.2912127 1.2585812 NPGs 0.12521118
Hes1 3.815307 2.0026215 3.0892449 NPGs 4.02721428
Hes5 5.005001 2.3956230 6.5217391 NPGs 6.18278634
Pax6 3.925807 1.7927127 4.0045767 NPGs 2.87588602
Map2 7.165812 1.5519874 65.3318078 PNGs 1.55553290
Tubb3 8.735578 2.3871787 90.9610984 PNGs 6.13647007
Neurod1 5.979697 1.4115723 20.8237986 PNGs 0.78537104
Nefm 5.247537 1.8040041 11.4416476 PNGs 2.93781860
Nefl 4.397731 1.6537969 5.2631579 PNGs 2.11394740
Dcx 7.063133 1.6738575 63.1578947 PNGs 2.22397771
Tbr1 5.930953 1.5543822 26.6590389 PNGs 1.56866816
Calm1 8.420630 1.9599507 89.4736842 hk 3.79316931
Cox6b1 7.396777 1.6401134 75.9725400 hk 2.03889511
Ppia 4.938416 1.5230935 15.4462243 hk 1.39705306
Rpl18 6.336462 1.4146625 44.1647597 hk 0.80232041
Cox7c 6.144064 1.3937240 39.0160183 hk 0.68747482
Erh 5.116709 1.2996389 17.9633867 hk 0.17142798
H3f3a 6.883043 1.5810893 61.7848970 hk 1.71515374
Taf1 5.356496 1.3000061 16.4759725 hk 0.17344183
Taf2 4.784093 1.3661691 11.5560641 hk 0.53633887
Gapdh 2.027649 1.0309864 0.9153318 hk -1.30210301
Actb 9.168400 1.2795919 98.3981693 hk 0.06147194
Golph3 4.401339 1.2584072 8.6956522 hk -0.05472380
Zfr 6.134988 1.4748224 35.0114416 hk 1.13229093
Sub1 6.778528 1.6575142 56.9794050 hk 2.13433668
Tars 4.622981 1.3112605 10.4118993 hk 0.23517106
Amacr 3.001914 1.0595608 1.7162471 hk -1.14537537
Reln 4.630042 1.3624092 4.1189931 layers 0.51571626
Lhx5 3.288483 1.4261767 1.4874142 layers 0.86547473
Cux1 6.104752 1.4098312 32.6086957 layers 0.77582115
Satb2 6.545260 1.7564615 38.9016018 layers 2.67705206
Tle1 4.942203 1.2576091 12.4713959 layers -0.05910126
Mef2c 7.176654 2.0319404 50.2288330 layers 4.18802527
Rorb 4.038835 1.4237173 5.0343249 layers 0.85198472
Sox5 5.686965 1.4932040 18.0778032 layers 1.23311226
Bcl11b 5.827814 1.3788500 22.6544622 layers 0.60589249
Fezf2 5.060242 1.8449393 10.0686499 layers 3.16234383
Foxp2 3.715673 1.2017254 2.6315789 layers -0.36561756
Ntf3 3.517345 1.3707172 2.5171625 layers 0.56128503
NA NA NA NA <NA> NA
Pvrl3 4.638048 1.4248249 8.6956522 layers 0.85805972
Cux2 5.235976 1.3303178 14.6453089 layers 0.33969841
Slc17a6 4.833627 1.3393122 9.9542334 layers 0.38903165
Sema3c 5.091768 1.3767920 11.8993135 layers 0.59460463
Thsd7a 4.340644 1.2252121 4.9199085 layers -0.23679530
Sulf2 3.014381 1.2499911 1.7162471 layers -0.10088493
Kcnk2 5.256674 1.3228860 19.1075515 layers 0.29893593
Grik3 3.920478 1.3638079 2.8604119 layers 0.52338795
Etv1 3.994692 1.5185054 4.1189931 layers 1.37188765
Tle4 4.873726 1.4669420 8.4668192 layers 1.08906769
Tmem200a 2.962816 1.1818921 2.0594966 layers -0.47440110
Glra2 4.613463 1.3419476 9.7254005 layers 0.40348636
Etv1.1 3.994692 1.5185054 4.1189931 layers 1.37188765
Htr1f 2.004839 0.9689532 0.6864989 layers -1.64234848
NA.1 NA NA NA <NA> NA
NA.2 NA NA NA <NA> NA
Syt6 3.750383 1.3359728 3.6613272 layers 0.37071545
GDIPlot(obj,GDIIn = GDI_DF, genes = genesList,GDIThreshold = 1.4)
Seurat correlation
<- CreateSeuratObject(counts = getRawData(obj),
sratproject = project,
min.cells = 3,
min.features = 200)
"percent.mt"]] <- PercentageFeatureSet(srat, pattern = "^mt-")
srat[[<- NormalizeData(srat)
srat <- FindVariableFeatures(srat, selection.method = "vst", nfeatures = 2000)
srat
# plot variable features with and without labels
<- VariableFeaturePlot(srat)
plot1
$data$centered_variance <- scale(plot1$data$variance.standardized,
plot1center = T,scale = F)
write.csv(plot1$data,paste0("CoexData/",
"Variance_Seurat_genes",
getMetadataElement(obj,
datasetTags()[["cond"]]),".csv"))
LabelPoints(plot = plot1, points = c(genesList$NPGs,genesList$PNGs,genesList$layers), repel = TRUE)
LabelPoints(plot = plot1, points = c(genesList$hk), repel = TRUE)
<- rownames(srat)
all.genes <- ScaleData(srat, features = all.genes)
srat = GetAssayData(srat[["RNA"]],layer = "data") seurat.data
<- correlation_pvalues(data = seurat.data,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
seurat.data.cor.big
<- correlation_plot(seurat.data.cor.big,
htmp title="Seurat corr")
genesList,
<- corr.pval.list$p_values
p_values.fromSeurat <- corr.pval.list$data.cor
seurat.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 7108398 379.7 11454412 611.8 11454412 611.8
Vcells 564771015 4308.9 902333175 6884.3 902333175 6884.3
draw(htmp, heatmap_legend_side="right")
Seurat SC Transform
<- SCTransform(srat,
srat method = "glmGamPoi",
vars.to.regress = "percent.mt",
verbose = FALSE)
<- GetAssayData(srat[["SCT"]],layer = "data")
seurat.data
#Remove genes with all zeros
<-seurat.data[rowSums(seurat.data) > 0,]
seurat.data
<- correlation_pvalues(seurat.data,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
seurat.data.cor.big
<- correlation_plot(seurat.data.cor.big,
htmp title="Seurat corr SCT")
genesList,
<- corr.pval.list$p_values
p_values.fromSeurat <- corr.pval.list$data.cor
seurat.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 8837356 472.0 16666662 890.1 13556438 724.0
Vcells 485068772 3700.8 1039644617 7931.9 1082876197 8261.7
draw(htmp, heatmap_legend_side="right")
Monocle
library(monocle3)
<- new_cell_data_set(getRawData(obj),
cds cell_metadata = getMetadataCells(obj),
gene_metadata = getMetadataGenes(obj)
)<- preprocess_cds(cds, num_dim = 100)
cds
<- normalized_counts(cds) normalized_counts
#Remove genes with all zeros
<- normalized_counts[rowSums(normalized_counts) > 0,]
normalized_counts
<- correlation_pvalues(normalized_counts,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
rm(normalized_counts)
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
monocle.data.cor.big
<- correlation_plot(data.cor.big = monocle.data.cor.big,
htmp
genesList,title = "Monocle corr")
<- corr.pval.list$p_values
p_values.from.monocle <- corr.pval.list$data.cor
monocle.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 9949073 531.4 16666662 890.1 16666662 890.1
Vcells 577016889 4402.3 1197846598 9138.9 1170159130 8927.7
draw(htmp, heatmap_legend_side="right")
ScanPy
library(reticulate)
<- paste0("CoexData/ScanPy/")
dirOutScP if (!dir.exists(dirOutScP)) {
dir.create(dirOutScP)
}
Sys.setenv(RETICULATE_PYTHON = "../../../bin/python3")
<- import("sys")
py
source_python("src/scanpyGenesExpression.py")
scanpyFDR(getRawData(obj),
getMetadataCells(obj),
getMetadataGenes(obj),
"mt",
dirOutScP,
file_code,
int.genes)
<- read.csv(paste0(dirOutScP,
normalized_counts "_Scampy_expression_all_genes.gz"),header = T,row.names = 1)
file_code,
<- t(normalized_counts) normalized_counts
#Remove genes with all zeros
<-normalized_counts[rowSums(normalized_counts) > 0,]
normalized_counts
<- correlation_pvalues(normalized_counts,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
ScanPy.data.cor.big
<- correlation_plot(data.cor.big = ScanPy.data.cor.big,
htmp
genesList,title = "ScanPy corr")
<- corr.pval.list$p_values
p_values.from.ScanPy <- corr.pval.list$data.cor
ScanPy.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 9972882 532.7 16666662 890.1 16666662 890.1
Vcells 549865083 4195.2 1197846598 9138.9 1197843725 9138.9
draw(htmp, heatmap_legend_side="right")
Sys.time()
[1] "2024-05-06 12:30:26 CEST"
sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3; LAPACK version 3.9.0
locale:
[1] C.UTF-8
time zone: Europe/Rome
tzcode source: system (glibc)
attached base packages:
[1] stats4 parallel grid stats graphics grDevices utils
[8] datasets methods base
other attached packages:
[1] reticulate_1.36.1 monocle3_1.3.4
[3] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.2
[5] GenomicRanges_1.52.0 GenomeInfoDb_1.36.1
[7] IRanges_2.34.1 S4Vectors_0.38.1
[9] MatrixGenerics_1.12.3 matrixStats_1.2.0
[11] Biobase_2.60.0 BiocGenerics_0.46.0
[13] fstcore_0.9.18 fst_0.9.8
[15] stringr_1.5.0 HiClimR_2.2.1
[17] doParallel_1.0.17 iterators_1.0.14
[19] foreach_1.5.2 Rfast_2.1.0
[21] RcppParallel_5.1.7 RcppZiggurat_0.1.6
[23] Rcpp_1.0.11 patchwork_1.2.0
[25] Seurat_5.0.0 SeuratObject_5.0.0
[27] sp_2.1-1 Hmisc_5.1-0
[29] dplyr_1.1.2 circlize_0.4.15
[31] ComplexHeatmap_2.16.0 COTAN_2.5.0
loaded via a namespace (and not attached):
[1] RcppAnnoy_0.0.21 splines_4.3.2
[3] later_1.3.1 bitops_1.0-7
[5] tibble_3.2.1 polyclip_1.10-4
[7] rpart_4.1.23 fastDummies_1.7.3
[9] lifecycle_1.0.3 globals_0.16.2
[11] lattice_0.22-5 MASS_7.3-60
[13] backports_1.4.1 dendextend_1.17.1
[15] magrittr_2.0.3 plotly_4.10.2
[17] rmarkdown_2.24 yaml_2.3.7
[19] httpuv_1.6.11 glmGamPoi_1.12.2
[21] sctransform_0.4.1 spam_2.10-0
[23] askpass_1.2.0 spatstat.sparse_3.0-2
[25] minqa_1.2.5 cowplot_1.1.1
[27] pbapply_1.7-2 RColorBrewer_1.1-3
[29] zlibbioc_1.46.0 abind_1.4-5
[31] Rtsne_0.17 purrr_1.0.1
[33] RCurl_1.98-1.12 nnet_7.3-19
[35] GenomeInfoDbData_1.2.10 ggrepel_0.9.5
[37] irlba_2.3.5.1 listenv_0.9.0
[39] spatstat.utils_3.0-3 terra_1.7-39
[41] umap_0.2.10.0 goftest_1.2-3
[43] RSpectra_0.16-1 spatstat.random_3.2-1
[45] dqrng_0.3.0 fitdistrplus_1.1-11
[47] parallelly_1.37.1 DelayedMatrixStats_1.22.5
[49] ncdf4_1.22 leiden_0.4.3
[51] codetools_0.2-19 DelayedArray_0.26.7
[53] tidyselect_1.2.0 shape_1.4.6
[55] farver_2.1.1 lme4_1.1-34
[57] ScaledMatrix_1.8.1 viridis_0.6.4
[59] base64enc_0.1-3 spatstat.explore_3.2-1
[61] jsonlite_1.8.7 GetoptLong_1.0.5
[63] ellipsis_0.3.2 progressr_0.14.0
[65] Formula_1.2-5 ggridges_0.5.4
[67] survival_3.5-8 tools_4.3.2
[69] ica_1.0-3 glue_1.7.0
[71] gridExtra_2.3 xfun_0.39
[73] ggthemes_5.1.0 withr_3.0.0
[75] fastmap_1.1.1 boot_1.3-28
[77] fansi_1.0.4 openssl_2.1.0
[79] digest_0.6.33 rsvd_1.0.5
[81] parallelDist_0.2.6 R6_2.5.1
[83] mime_0.12 colorspace_2.1-0
[85] Cairo_1.6-1 scattermore_1.2
[87] tensor_1.5 spatstat.data_3.0-1
[89] utf8_1.2.3 tidyr_1.3.0
[91] generics_0.1.3 data.table_1.15.0
[93] httr_1.4.6 htmlwidgets_1.6.2
[95] S4Arrays_1.2.0 uwot_0.1.16
[97] pkgconfig_2.0.3 gtable_0.3.3
[99] lmtest_0.9-40 XVector_0.40.0
[101] htmltools_0.5.8 dotCall64_1.1-0
[103] clue_0.3-64 scales_1.3.0
[105] png_0.1-8 knitr_1.43
[107] rstudioapi_0.15.0 reshape2_1.4.4
[109] rjson_0.2.21 nloptr_2.0.3
[111] checkmate_2.3.0 nlme_3.1-163
[113] zoo_1.8-12 GlobalOptions_0.1.2
[115] KernSmooth_2.23-22 miniUI_0.1.1.1
[117] foreign_0.8-86 pillar_1.9.0
[119] vctrs_0.6.3 RANN_2.6.1
[121] promises_1.2.0.1 BiocSingular_1.16.0
[123] beachmat_2.16.0 xtable_1.8-4
[125] cluster_2.1.6 htmlTable_2.4.1
[127] evaluate_0.21 zeallot_0.1.0
[129] cli_3.6.1 compiler_4.3.2
[131] rlang_1.1.1 crayon_1.5.2
[133] future.apply_1.11.0 labeling_0.4.2
[135] plyr_1.8.8 stringi_1.8.1
[137] viridisLite_0.4.2 deldir_2.0-2
[139] BiocParallel_1.34.2 assertthat_0.2.1
[141] munsell_0.5.0 lazyeval_0.2.2
[143] spatstat.geom_3.2-4 PCAtools_2.14.0
[145] Matrix_1.6-3 RcppHNSW_0.6.0
[147] sparseMatrixStats_1.12.2 future_1.33.0
[149] ggplot2_3.5.0 shiny_1.8.0
[151] ROCR_1.0-11 igraph_2.0.3