library(COTAN)
library(ComplexHeatmap)
library(circlize)
library(dplyr)
library(Hmisc)
library(Seurat)
library(patchwork)
library(Rfast)
library(parallel)
library(doParallel)
library(HiClimR)
library(stringr)
library(fst)
options(parallelly.fork.enable = TRUE)
<- "Data/Yuzwa_MouseCortex/CorticalCells_GSM2861511_E135.cotan.RDS"
dataSetFile <- str_split(dataSetFile,pattern = "/",simplify = T)[3]
name <- str_remove(name,pattern = ".RDS")
name
= "E13.5"
project
setLoggingLevel(1)
<- "CoexData/"
outDir setLoggingFile(paste0(outDir, "Logs/",name,".log"))
<- readRDS(dataSetFile)
obj = getMetadataElement(obj, datasetTags()[["cond"]]) file_code
Gene Correlation Analysis E13.5 for Mouse Cortex
Prologue
source("src/Functions.R")
To compare the ability of COTAN to asses the real correlation between genes we define some pools of genes:
- Constitutive genes
- Neural progenitor genes
- Pan neuronal genes
- Some layer marker genes
<- list(
genesList "NPGs"=
c("Nes", "Vim", "Sox2", "Sox1", "Notch1", "Hes1", "Hes5", "Pax6"),
"PNGs"=
c("Map2", "Tubb3", "Neurod1", "Nefm", "Nefl", "Dcx", "Tbr1"),
"hk"=
c("Calm1", "Cox6b1", "Ppia", "Rpl18", "Cox7c", "Erh", "H3f3a",
"Taf1", "Taf2", "Gapdh", "Actb", "Golph3", "Zfr", "Sub1",
"Tars", "Amacr"),
"layers" =
c("Reln","Lhx5","Cux1","Satb2","Tle1","Mef2c","Rorb","Sox5","Bcl11b","Fezf2","Foxp2","Ntf3","Rasgrf2","Pvrl3", "Cux2","Slc17a6", "Sema3c","Thsd7a", "Sulf2", "Kcnk2","Grik3", "Etv1", "Tle4", "Tmem200a", "Glra2", "Etv1","Htr1f", "Sulf1","Rxfp1", "Syt6")
# From https://www.science.org/doi/10.1126/science.aam8999
)
COTAN
<-getGenes(obj) int.genes
<- getGenesCoex(obj)#[int.genes,int.genes]
coexMat.big
<- getGenesCoex(obj)[c(genesList$NPGs,genesList$hk,genesList$PNGs),c(genesList$NPGs,genesList$hk,genesList$PNGs)]
coexMat
= colorRamp2(seq(-0.5,0.5, length = 3), c("#DC0000B2", "white","#3C5488B2" ))
f1
<- factor(c(rep("NPGs",length(genesList[["NPGs"]])),
split.genes rep("HK",length(genesList[["hk"]])),
rep("PNGs",length(genesList[["PNGs"]]))
),levels = c("NPGs","HK","PNGs"))
= Legend(col_fun = f1, title = "COTAN coex")
lgd
<- Heatmap(as.matrix(coexMat),
htmp #width = ncol(coexMat)*unit(2.5, "mm"),
height = nrow(coexMat)*unit(3, "mm"),
cluster_rows = FALSE,
cluster_columns = FALSE,
col = f1,
row_names_side = "left",
row_names_gp = gpar(fontsize = 11),
column_names_gp = gpar(fontsize = 11),
column_split = split.genes,
row_split = split.genes,
cluster_row_slices = FALSE,
cluster_column_slices = FALSE,
heatmap_legend_param = list(
title = "COTAN coex", at = c(-0.5, 0, 0.5),direction = "horizontal",
labels = c("-0.5", "0", "0.5")
)
)draw(htmp, heatmap_legend_side="right")
<- calculateGDI(obj)
GDI_DF $geneType <- NA
GDI_DFfor (cat in names(genesList)) {
rownames(GDI_DF) %in% genesList[[cat]],]$geneType <- cat
GDI_DF[
}
$GDI_centered <- scale(GDI_DF$GDI,center = T,scale = T)
GDI_DF
write.csv(GDI_DF,paste0("CoexData/","Variance_GDI_genes",file_code,".csv"))
unlist(genesList),] GDI_DF[
sum.raw.norm GDI exp.cells geneType GDI_centered
Nes 6.408692 2.647419 23.1115108 NPGs 4.418329327
Vim 7.312324 2.551967 41.3669065 NPGs 4.083250142
Sox2 6.185654 2.911622 19.1546763 NPGs 5.345792150
Sox1 3.559135 1.880001 2.7877698 NPGs 1.724370053
Notch1 4.800041 2.166044 7.4640288 NPGs 2.728500194
Hes1 4.886836 2.553616 7.9136691 NPGs 4.089038941
Hes5 5.693747 2.751355 11.5107914 NPGs 4.783186933
Pax6 5.993715 2.670699 17.7158273 NPGs 4.500049648
Map2 7.442345 2.359424 59.3525180 PNGs 3.407343330
Tubb3 8.588602 2.719463 79.4964029 PNGs 4.671233319
Neurod1 6.711720 2.015698 26.1690647 PNGs 2.200723866
Nefm 4.837197 1.791972 8.4532374 PNGs 1.415351634
Nefl 3.221387 1.405750 1.8884892 PNGs 0.059550935
Dcx 7.327976 2.605916 53.6870504 PNGs 4.272634438
Tbr1 6.713621 2.557574 37.1402878 PNGs 4.102935005
Calm1 8.848612 1.258354 93.7949640 hk -0.457871717
Cox6b1 7.730664 1.403952 77.6079137 hk 0.053235962
Ppia 5.880634 1.517396 24.1007194 hk 0.451472872
Rpl18 7.242657 1.440153 61.6906475 hk 0.180318242
Cox7c 6.139111 1.409634 31.8345324 hk 0.073183680
Erh 4.868970 1.538559 10.9712230 hk 0.525765263
H3f3a 7.360796 1.375840 68.1654676 hk -0.045447274
Taf1 5.871166 1.296737 20.7733813 hk -0.323130578
Taf2 5.310284 1.282004 13.4892086 hk -0.374852844
Gapdh 3.206206 1.449379 2.3381295 hk 0.212705058
Actb 9.921265 0.894289 99.1906475 hk -1.735893435
Golph3 5.007818 1.281697 10.1618705 hk -0.375928017
Zfr 6.490652 1.446589 35.9712230 hk 0.202909958
Sub1 7.383003 1.618534 62.5899281 hk 0.806510422
Tars 5.450030 1.285601 16.0071942 hk -0.362225062
Amacr 3.191098 1.096773 1.5287770 hk -1.025089118
Reln 3.726806 1.242825 2.1582734 layers -0.512384761
NA NA NA NA <NA> NA
Cux1 5.933575 1.939433 20.8633094 layers 1.933000923
Satb2 5.368174 1.665243 12.6798561 layers 0.970477743
Tle1 4.897234 1.760796 9.8021583 layers 1.305910297
Mef2c 6.152818 2.759922 21.0431655 layers 4.813259827
Rorb 4.875888 1.660140 8.3633094 layers 0.952565229
Sox5 7.276191 2.404274 47.9316547 layers 3.564787044
Bcl11b 7.571149 2.656745 54.0467626 layers 4.451065776
Fezf2 6.718361 2.297884 36.1510791 layers 3.191312576
Foxp2 4.357485 1.350679 4.9460432 layers -0.133772145
Ntf3 2.147161 1.266745 0.8093525 layers -0.428416587
NA.1 NA NA NA <NA> NA
Pvrl3 4.546763 1.600616 6.8345324 layers 0.743609475
Cux2 4.718140 1.538965 6.7446043 layers 0.527188512
Slc17a6 5.699010 1.917841 14.0287770 layers 1.857203373
Sema3c 4.866232 1.435579 7.1942446 layers 0.164262095
Thsd7a 3.836608 1.257654 1.7086331 layers -0.460330847
Sulf2 2.671440 1.498034 1.6187050 layers 0.383504826
Kcnk2 4.495542 1.271818 6.1151079 layers -0.410607221
Grik3 4.020369 1.928627 3.8669065 layers 1.895067663
Etv1 3.735043 1.388003 2.5179856 layers -0.002748532
Tle4 5.288375 1.877388 10.9712230 layers 1.715197034
Tmem200a 2.905427 1.228125 1.5287770 layers -0.563988740
Glra2 4.296262 2.230806 6.3848921 layers 2.955840432
Etv1.1 3.735043 1.388003 2.5179856 layers -0.002748532
NA.2 NA NA NA <NA> NA
Sulf1 1.254408 1.057413 0.3597122 layers -1.163260269
NA.3 NA NA NA <NA> NA
Syt6 4.757203 2.197045 7.5539568 layers 2.837327266
GDIPlot(obj,GDIIn = GDI_DF, genes = genesList,GDIThreshold = 1.4)
Seurat correlation
<- CreateSeuratObject(counts = getRawData(obj),
sratproject = project,
min.cells = 3,
min.features = 200)
"percent.mt"]] <- PercentageFeatureSet(srat, pattern = "^mt-")
srat[[<- NormalizeData(srat)
srat <- FindVariableFeatures(srat, selection.method = "vst", nfeatures = 2000)
srat
# plot variable features with and without labels
<- VariableFeaturePlot(srat)
plot1
$data$centered_variance <- scale(plot1$data$variance.standardized,
plot1center = T,scale = F)
write.csv(plot1$data,paste0("CoexData/",
"Variance_Seurat_genes",
getMetadataElement(obj,
datasetTags()[["cond"]]),".csv"))
LabelPoints(plot = plot1, points = c(genesList$NPGs,genesList$PNGs,genesList$layers), repel = TRUE)
LabelPoints(plot = plot1, points = c(genesList$hk), repel = TRUE)
<- rownames(srat)
all.genes <- ScaleData(srat, features = all.genes)
srat = GetAssayData(srat[["RNA"]],layer = "data") seurat.data
<- correlation_pvalues(data = seurat.data,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
seurat.data.cor.big
<- correlation_plot(seurat.data.cor.big,
htmp title="Seurat corr")
genesList,
<- corr.pval.list$p_values
p_values.fromSeurat <- corr.pval.list$data.cor
seurat.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 7108938 379.7 11454455 611.8 11454455 611.8
Vcells 595844235 4546.0 957583000 7305.8 957583000 7305.8
draw(htmp, heatmap_legend_side="right")
Seurat SC Transform
<- SCTransform(srat,
srat method = "glmGamPoi",
vars.to.regress = "percent.mt",
verbose = FALSE)
<- GetAssayData(srat[["SCT"]],layer = "data")
seurat.data
#Remove genes with all zeros
<-seurat.data[rowSums(seurat.data) > 0,]
seurat.data
<- correlation_pvalues(seurat.data,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
seurat.data.cor.big
<- correlation_plot(seurat.data.cor.big,
htmp title="Seurat corr SCT")
genesList,
<- corr.pval.list$p_values
p_values.fromSeurat <- corr.pval.list$data.cor
seurat.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 8837923 472.0 16667718 890.2 13557124 724.1
Vcells 520365727 3970.1 1189358788 9074.1 1099004564 8384.8
draw(htmp, heatmap_legend_side="right")
Monocle
library(monocle3)
<- new_cell_data_set(getRawData(obj),
cds cell_metadata = getMetadataCells(obj),
gene_metadata = getMetadataGenes(obj)
)<- preprocess_cds(cds, num_dim = 100)
cds
<- normalized_counts(cds) normalized_counts
#Remove genes with all zeros
<- normalized_counts[rowSums(normalized_counts) > 0,]
normalized_counts
<- correlation_pvalues(normalized_counts,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
rm(normalized_counts)
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
monocle.data.cor.big
<- correlation_plot(data.cor.big = monocle.data.cor.big,
htmp
genesList,title = "Monocle corr")
<- corr.pval.list$p_values
p_values.from.monocle <- corr.pval.list$data.cor
monocle.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 9949636 531.4 16667718 890.2 16667718 890.2
Vcells 610053653 4654.4 1301028864 9926.1 1301028864 9926.1
draw(htmp, heatmap_legend_side="right")
ScanPy
library(reticulate)
<- paste0("CoexData/ScanPy/")
dirOutScP if (!dir.exists(dirOutScP)) {
dir.create(dirOutScP)
}
Sys.setenv(RETICULATE_PYTHON = "../../../bin/python3")
<- import("sys")
py
source_python("src/scanpyGenesExpression.py")
scanpyFDR(getRawData(obj),
getMetadataCells(obj),
getMetadataGenes(obj),
"mt",
dirOutScP,
file_code,
int.genes)
<- read.csv(paste0(dirOutScP,
normalized_counts "_Scampy_expression_all_genes.gz"),header = T,row.names = 1)
file_code,
<- t(normalized_counts) normalized_counts
#Remove genes with all zeros
<-normalized_counts[rowSums(normalized_counts) > 0,]
normalized_counts
<- correlation_pvalues(normalized_counts,
corr.pval.list
int.genes,n.cells = getNumCells(obj))
<- as.matrix(Matrix::forceSymmetric(corr.pval.list$data.cor, uplo = "U"))
ScanPy.data.cor.big
<- correlation_plot(data.cor.big = ScanPy.data.cor.big,
htmp
genesList,title = "ScanPy corr")
<- corr.pval.list$p_values
p_values.from.ScanPy <- corr.pval.list$data.cor
ScanPy.data.cor.big
rm(corr.pval.list)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 9973487 532.7 16667718 890.2 16667718 890.2
Vcells 582421870 4443.6 1366198826 10423.3 1366198826 10423.3
draw(htmp, heatmap_legend_side="right")
Sys.time()
[1] "2024-05-06 12:11:56 CEST"
sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3; LAPACK version 3.9.0
locale:
[1] C.UTF-8
time zone: Europe/Rome
tzcode source: system (glibc)
attached base packages:
[1] stats4 parallel grid stats graphics grDevices utils
[8] datasets methods base
other attached packages:
[1] reticulate_1.36.1 monocle3_1.3.4
[3] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.2
[5] GenomicRanges_1.52.0 GenomeInfoDb_1.36.1
[7] IRanges_2.34.1 S4Vectors_0.38.1
[9] MatrixGenerics_1.12.3 matrixStats_1.2.0
[11] Biobase_2.60.0 BiocGenerics_0.46.0
[13] fstcore_0.9.18 fst_0.9.8
[15] stringr_1.5.0 HiClimR_2.2.1
[17] doParallel_1.0.17 iterators_1.0.14
[19] foreach_1.5.2 Rfast_2.1.0
[21] RcppParallel_5.1.7 RcppZiggurat_0.1.6
[23] Rcpp_1.0.11 patchwork_1.2.0
[25] Seurat_5.0.0 SeuratObject_5.0.0
[27] sp_2.1-1 Hmisc_5.1-0
[29] dplyr_1.1.2 circlize_0.4.15
[31] ComplexHeatmap_2.16.0 COTAN_2.5.0
loaded via a namespace (and not attached):
[1] RcppAnnoy_0.0.21 splines_4.3.2
[3] later_1.3.1 bitops_1.0-7
[5] tibble_3.2.1 polyclip_1.10-4
[7] rpart_4.1.23 fastDummies_1.7.3
[9] lifecycle_1.0.3 globals_0.16.2
[11] lattice_0.22-5 MASS_7.3-60
[13] backports_1.4.1 dendextend_1.17.1
[15] magrittr_2.0.3 plotly_4.10.2
[17] rmarkdown_2.24 yaml_2.3.7
[19] httpuv_1.6.11 glmGamPoi_1.12.2
[21] sctransform_0.4.1 spam_2.10-0
[23] askpass_1.2.0 spatstat.sparse_3.0-2
[25] minqa_1.2.5 cowplot_1.1.1
[27] pbapply_1.7-2 RColorBrewer_1.1-3
[29] zlibbioc_1.46.0 abind_1.4-5
[31] Rtsne_0.17 purrr_1.0.1
[33] RCurl_1.98-1.12 nnet_7.3-19
[35] GenomeInfoDbData_1.2.10 ggrepel_0.9.5
[37] irlba_2.3.5.1 listenv_0.9.0
[39] spatstat.utils_3.0-3 terra_1.7-39
[41] umap_0.2.10.0 goftest_1.2-3
[43] RSpectra_0.16-1 spatstat.random_3.2-1
[45] dqrng_0.3.0 fitdistrplus_1.1-11
[47] parallelly_1.37.1 DelayedMatrixStats_1.22.5
[49] ncdf4_1.22 leiden_0.4.3
[51] codetools_0.2-19 DelayedArray_0.26.7
[53] tidyselect_1.2.0 shape_1.4.6
[55] farver_2.1.1 lme4_1.1-34
[57] ScaledMatrix_1.8.1 viridis_0.6.4
[59] base64enc_0.1-3 spatstat.explore_3.2-1
[61] jsonlite_1.8.7 GetoptLong_1.0.5
[63] ellipsis_0.3.2 progressr_0.14.0
[65] Formula_1.2-5 ggridges_0.5.4
[67] survival_3.5-8 tools_4.3.2
[69] ica_1.0-3 glue_1.7.0
[71] gridExtra_2.3 xfun_0.39
[73] ggthemes_5.1.0 withr_3.0.0
[75] fastmap_1.1.1 boot_1.3-28
[77] fansi_1.0.4 openssl_2.1.0
[79] digest_0.6.33 rsvd_1.0.5
[81] parallelDist_0.2.6 R6_2.5.1
[83] mime_0.12 colorspace_2.1-0
[85] Cairo_1.6-1 scattermore_1.2
[87] tensor_1.5 spatstat.data_3.0-1
[89] utf8_1.2.3 tidyr_1.3.0
[91] generics_0.1.3 data.table_1.15.0
[93] httr_1.4.6 htmlwidgets_1.6.2
[95] S4Arrays_1.2.0 uwot_0.1.16
[97] pkgconfig_2.0.3 gtable_0.3.3
[99] lmtest_0.9-40 XVector_0.40.0
[101] htmltools_0.5.8 dotCall64_1.1-0
[103] clue_0.3-64 scales_1.3.0
[105] png_0.1-8 knitr_1.43
[107] rstudioapi_0.15.0 reshape2_1.4.4
[109] rjson_0.2.21 nloptr_2.0.3
[111] checkmate_2.3.0 nlme_3.1-163
[113] zoo_1.8-12 GlobalOptions_0.1.2
[115] KernSmooth_2.23-22 miniUI_0.1.1.1
[117] foreign_0.8-86 pillar_1.9.0
[119] vctrs_0.6.3 RANN_2.6.1
[121] promises_1.2.0.1 BiocSingular_1.16.0
[123] beachmat_2.16.0 xtable_1.8-4
[125] cluster_2.1.6 htmlTable_2.4.1
[127] evaluate_0.21 zeallot_0.1.0
[129] cli_3.6.1 compiler_4.3.2
[131] rlang_1.1.1 crayon_1.5.2
[133] future.apply_1.11.0 labeling_0.4.2
[135] plyr_1.8.8 stringi_1.8.1
[137] viridisLite_0.4.2 deldir_2.0-2
[139] BiocParallel_1.34.2 assertthat_0.2.1
[141] munsell_0.5.0 lazyeval_0.2.2
[143] spatstat.geom_3.2-4 PCAtools_2.14.0
[145] Matrix_1.6-3 RcppHNSW_0.6.0
[147] sparseMatrixStats_1.12.2 future_1.33.0
[149] ggplot2_3.5.0 shiny_1.8.0
[151] ROCR_1.0-11 igraph_2.0.3