library(COTAN)
options(parallelly.fork.enable = TRUE)
library(Seurat)
library(monocle3)
library(reticulate)
library(stringr)
library(dplyr)
<- "Results/FDR/"
dirOut if (!dir.exists(dirOut)) {
dir.create(dirOut)
}
<- "Data/MouseCortexFromLoom/FDR/MergedClusters_For_FDR/" dataSetDir
FDR analysis - define DE genes
Automatic functions
<- function(dataSet,clusters.list, project, dirOut#,percentage
Seurat.DEA
){<- CreateSeuratObject(counts = dataSet,
pbmc project = project, min.cells = 3, min.features = 20)
stopifnot(length(clusters.list)==length(pbmc$orig.ident))
<- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
pbmc <- rownames(pbmc)
all.genes <- ScaleData(pbmc, features = all.genes)
pbmc
<- RunPCA(pbmc)
pbmc <- RunUMAP(pbmc, dims = 1:20)
pbmc
@meta.data$TestCl <- NA
pbmc@meta.data[names(clusters.list),]$TestCl <- factor(clusters.list)
pbmc
<- SetIdent(pbmc,value = "TestCl")
pbmc
<- FindAllMarkers(pbmc, only.pos = TRUE)
pbmc.markers
<- sum(pbmc.markers$p_val_adj < 0.05)
n.genes.DEA
print(n.genes.DEA)
#write.csv(pbmc.markers, file = paste0(dirOut,project,"_de_genes_Seurat_",percentage,".csv"))
return(list("n.genes.DEA"=n.genes.DEA,"markers"= pbmc.markers))
}
<- function(dataSet,clusters.list, project, dirOut#,percentage
Seurat.DEA.bimod
){<- CreateSeuratObject(counts = dataSet,
pbmc project = project, min.cells = 3, min.features = 20)
stopifnot(length(clusters.list)==length(pbmc$orig.ident))
<- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
pbmc <- rownames(pbmc)
all.genes <- ScaleData(pbmc, features = all.genes)
pbmc
<- RunPCA(pbmc)
pbmc <- RunUMAP(pbmc, dims = 1:20)
pbmc
@meta.data$TestCl <- NA
pbmc@meta.data[names(clusters.list),]$TestCl <- factor(clusters.list)
pbmc
<- SetIdent(pbmc,value = "TestCl")
pbmc
<- FindAllMarkers(pbmc, only.pos = TRUE,test.use = "bimod" )
pbmc.markers
<- sum(pbmc.markers$p_val_adj < 0.05)
n.genes.DEA
print(n.genes.DEA)
#write.csv(pbmc.markers, file = paste0(dirOut,project,"_de_genes_Seurat_",percentage,".csv"))
return(list("n.genes.DEA"=n.genes.DEA,"markers"= pbmc.markers))
}
<- function(dataSet,clusters.list, project, dirOut#,percentage
Seurat.DEAScTransform
){<- CreateSeuratObject(counts = dataSet,
pbmc project = project, min.cells = 3, min.features = 20)
stopifnot(length(clusters.list)==length(pbmc$orig.ident))
# store mitochondrial percentage in object meta data
<- PercentageFeatureSet(pbmc, pattern = "^mt-", col.name = "percent.mt")
pbmc
# run sctransform
<- SCTransform(pbmc, vars.to.regress = "percent.mt", verbose = FALSE)
pbmc
<- RunPCA(pbmc)
pbmc <- RunUMAP(pbmc, dims = 1:20)
pbmc
@meta.data$TestCl <- NA
pbmc@meta.data[names(clusters.list),]$TestCl <- factor(clusters.list)
pbmc
<- SetIdent(pbmc,value = "TestCl")
pbmc
<- FindAllMarkers(pbmc, only.pos = TRUE)
pbmc.markers
<- sum(pbmc.markers$p_val_adj < 0.05)
n.genes.DEA
print(n.genes.DEA)
#write.csv(pbmc.markers, file = paste0(dirOut,project,"_de_genes_Seurat_",percentage,".csv"))
return(list("n.genes.DEA"=n.genes.DEA,"markers"= pbmc.markers))
}
<- function(dataSet,clusters.list,project, dirOut#,percentage
Monocle.DEA
){
= as.data.frame(clusters.list[colnames(dataSet)])
cell_metadata colnames(cell_metadata) <- "Clusters"
<- new_cell_data_set(dataSet[rowSums(dataSet) > 3,],
cds cell_metadata = cell_metadata
)colData(cds)$cluster <- clusters.list[rownames(colData(cds))]
#cds <- preprocess_cds(cds, num_dim = 100)
#cds <- reduce_dimension(cds)
#cds <- cluster_cells(cds, resolution=1e-5)
<- top_markers(cds,
marker_test_res group_cells_by="Clusters",
genes_to_test_per_group = dim(cds)[1],
cores=10)
# de_results <- fit_models(cds,model_formula_str = " ~ cluster",cores = 10,verbose = T)
# fit_coefs <- coefficient_table(de_results)
#
# fit_coefs <- fit_coefs %>% filter(grepl("cluster",term))
# fit_coefs <- as.data.frame(fit_coefs)
#write.csv(as.data.frame(fit_coefs[,c("num_cells_expressed" ,"gene_id","p_value","q_value")]), file = paste0(dirOut,project,"_de_genes_Monocle_",percentage,".csv"))
return(list("n.genes.DEA"=sum(marker_test_res$marker_test_q_value < 0.05, na.rm = T),
"marker_test_res"= marker_test_res
))
}
<- function(dataSet,
ScamPy.DEA
clusters.list,
project,
dirOut,
percentage){<- CreateSeuratObject(counts = dataSet, project = project, min.cells = 3, min.features = 20)
pbmc
@meta.data$TestCl <- NA
pbmc@meta.data[names(clusters.list),]$TestCl <- clusters.list
pbmc
<- pbmc@assays$RNA$counts
exprs
<- pbmc[[]]
meta #feature_meta <- GetAssay(pbmc)[[]]
<- as.data.frame(matrix(data = NA,
tmp ncol = 1,
nrow = nrow(pbmc@assays$RNA$counts)))
rownames(tmp) <- rownames(pbmc@assays$RNA$counts)
<- tmp
feature_meta #embedding <- Embeddings(pbmc, "umap")
Sys.setenv(RETICULATE_PYTHON = "../../../bin/python3")
<- import("sys")
py
source_python("src/scanpyFDR.py")
scanpyFDR(exprs,
meta,
feature_meta, "mt",
dirOut,
project)
<- read.csv(file = paste0(dirOut,
out
project,"_Scampy_DEA_all_genes.csv"
),header = T,
row.names = 1)
gc()
return(out)
}
Cluster Gene Enrichment
<- read.csv(file.path(dataSetDir,"Cells_Usage_DataFrame.csv"),
datasets_csv row.names = 1
)
datasets_csv
Group
1 2_Clusters_even_near
2 2_Clusters_even_near
3 2_Clusters_even_near
4 2_Clusters_even_medium
5 2_Clusters_even_medium
6 2_Clusters_even_medium
7 2_Clusters_even_far
8 2_Clusters_even_far
9 2_Clusters_even_far
10 2_Clusters_uneven_near
11 2_Clusters_uneven_near
12 2_Clusters_uneven_near
13 2_Clusters_uneven_medium
14 2_Clusters_uneven_medium
15 2_Clusters_uneven_medium
16 2_Clusters_uneven_far
17 2_Clusters_uneven_far
18 2_Clusters_uneven_far
19 3_Clusters_even
20 3_Clusters_even
21 3_Clusters_even
22 3_Clusters_uneven
23 3_Clusters_uneven
24 3_Clusters_uneven
25 5_Clusters_uneven
26 5_Clusters_uneven
27 5_Clusters_uneven
Collection E13.5.432
1 E13.5-434_+_E15.0-428 0
2 E15.0-432_+_E13.5-432 536
3 E15.0-508_+_E15.0-509 0
4 E13.5-187_+_E13.5-184 0
5 E15.0-434_+_E17.5-516 0
6 E15.0-437_+_E15.0-508 0
7 E17.5-516_+_E13.5-187 0
8 E15.0-510_+_E13.5-437 0
9 E15.0-509_+_E13.5-184 0
10 E13.5-434_+_E15.0-428 0
11 E15.0-432_+_E13.5-432 66
12 E15.0-508_+_E15.0-509 0
13 E13.5-187_+_E13.5-184 0
14 E15.0-434_+_E17.5-516 0
15 E15.0-437_+_E15.0-508 0
16 E17.5-516_+_E13.5-187 0
17 E15.0-510_+_E13.5-437 0
18 E15.0-509_+_E13.5-184 0
19 E15.0-437_+_E13.5-510_+_E13.5-437 0
20 E17.5-505_+_E17.5-516_+_E13.5-437 0
21 E15.0-510_+_E15.0-428_+_E13.5-510 0
22 E15.0-428_+_E13.5-434_+_E15.0-510 0
23 E13.5-187_+_E13.5-432_+_E15.0-432 168
24 E15.0-509_+_E13.5-184_+_E15.0-508 0
25 E13.5-510_+_E15.0-437_+_E15.0-510_+_E13.5-432_+_E13.5-437 518
26 E15.0-428_+_E13.5-434_+_E15.0-434_+_E17.5-505_+_E13.5-184 0
27 E13.5-432_+_E15.0-509_+_E15.0-432_+_E13.5-187_+_E15.0-508 440
E13.5.187 E13.5.434 E13.5.184 E13.5.437 E13.5.510 E15.0.432 E15.0.509
1 0 318 0 0 0 0 0
2 0 0 0 0 0 536 0
3 0 0 0 0 0 0 397
4 292 0 292 0 0 0 0
5 0 0 0 0 0 0 0
6 0 0 0 0 0 0 0
7 297 0 0 0 0 0 0
8 0 0 0 259 0 0 0
9 0 0 292 0 0 0 292
10 0 326 0 0 0 0 0
11 0 0 0 0 0 586 0
12 0 0 0 0 0 0 402
13 334 0 38 0 0 0 0
14 0 0 0 0 0 0 0
15 0 0 0 0 0 0 0
16 334 0 0 0 0 0 0
17 0 0 0 45 0 0 0
18 0 0 45 0 0 0 402
19 0 0 0 248 248 0 0
20 0 0 0 203 0 0 0
21 0 0 0 0 248 0 0
22 0 115 0 0 0 0 0
23 84 0 0 0 0 586 0
24 0 0 58 0 0 0 402
25 0 0 0 259 65 0 0
26 0 326 163 0 0 0 0
27 74 0 0 0 0 586 293
E15.0.510 E15.0.508 E15.0.428 E15.0.434 E15.0.437 E17.5.516 E17.5.505
1 0 0 318 0 0 0 0
2 0 0 0 0 0 0 0
3 0 397 0 0 0 0 0
4 0 0 0 0 0 0 0
5 0 0 0 273 0 273 0
6 0 258 0 0 258 0 0
7 0 0 0 0 0 297 0
8 259 0 0 0 0 0 0
9 0 0 0 0 0 0 0
10 0 0 37 0 0 0 0
11 0 0 0 0 0 0 0
12 0 45 0 0 0 0 0
13 0 0 0 0 0 0 0
14 0 0 0 33 0 297 0
15 0 397 0 0 45 0 0
16 0 0 0 0 0 38 0
17 402 0 0 0 0 0 0
18 0 0 0 0 0 0 0
19 0 0 0 0 248 0 0
20 0 0 0 0 0 203 203
21 248 0 248 0 0 0 0
22 402 0 58 0 0 0 0
23 0 0 0 0 0 0 0
24 0 115 0 0 0 0 0
25 389 0 0 0 65 0 0
26 0 0 245 41 0 0 41
27 0 74 0 0 0 0 0
DEA genes for COTAN
for (ind in 1:dim(datasets_csv)[1]) {
#print(ind)
<- paste0(datasets_csv$Group[ind],"_",datasets_csv$Collection[ind])
file.code <- readRDS(file = file.path(dataSetDir,paste0(file.code,".RDS")))
dataset <- getClusterizationData(dataset,clName = "mergedClusters")[[2]]
deaCOTAN <- pValueFromDEA(deaCOTAN,
pvalCOTAN numCells = getNumCells(dataset),method = "bonferroni")
<- deaCOTAN[rowMin(as.matrix(pvalCOTAN)) < 0.05,]
df.genes
write.csv(df.genes,file.path(dirOut,paste0(file.code,"COTAN_DEA_genes.csv")))
}
DEA genes for Seurat
for (ind in 1:dim(datasets_csv)[1]) {
#print(ind)
<- paste0(datasets_csv$Group[ind],"_",datasets_csv$Collection[ind])
file.code <- readRDS(file = file.path(dataSetDir,paste0(file.code,".RDS")))
dataset
<- getClusterizationData(dataset,clName = "mergedClusters")[[1]]
clusters <- Seurat.DEA(getRawData(dataset),
deaSeurat clusters.list = clusters,
project = file.code,
dirOut = dirOut)
write.csv(deaSeurat$markers,
file.path(dirOut,paste0(file.code,"Seurat_DEA_genes.csv")))
}
DEA genes for Seurat scTransform
for (ind in 1:dim(datasets_csv)[1]) {
#print(ind)
<- paste0(datasets_csv$Group[ind],"_",datasets_csv$Collection[ind])
file.code <- readRDS(file = file.path(dataSetDir,paste0(file.code,".RDS")))
dataset
<- getClusterizationData(dataset,clName = "mergedClusters")[[1]]
clusters <- Seurat.DEAScTransform(getRawData(dataset),
deaSeurat clusters.list = clusters,
project = file.code,
dirOut = dirOut)
write.csv(deaSeurat$markers,
file.path(dirOut,paste0(file.code,"Seurat_DEA_ScTransform_genes.csv")))
}
DEA genes for Seurat test: bimod
for (ind in 1:dim(datasets_csv)[1]) {
#print(ind)
<- paste0(datasets_csv$Group[ind],"_",datasets_csv$Collection[ind])
file.code <- readRDS(file = file.path(dataSetDir,paste0(file.code,".RDS")))
dataset
<- getClusterizationData(dataset,clName = "mergedClusters")[[1]]
clusters <- Seurat.DEA.bimod(getRawData(dataset),
deaSeurat clusters.list = clusters,
project = file.code,
dirOut = dirOut)
write.csv(deaSeurat$markers,
file.path(dirOut,paste0(file.code,"Seurat_DEA_Bimod_genes.csv")))
}
DEA from Monocle
for (ind in 1:dim(datasets_csv)[1]) {
<- paste0(datasets_csv$Group[ind],"_",datasets_csv$Collection[ind])
file.code <- readRDS(file = file.path(dataSetDir,paste0(file.code,".RDS")))
dataset print(file.code)
<- getClusterizationData(dataset,clName = "mergedClusters")[[1]]
clusters
<- Monocle.DEA(dataSet = getRawData(dataset),
deaMonocle clusters.list = clusters,
project = file.code,
dirOut = dirOut)
# deaMonocleOut <- deaMonocle$fit_coefs[,c("num_cells_expressed",
# "gene_id","term","estimate",
# "std_err", "test_val","p_value",
# "normalized_effect","q_value")]
write.csv(deaMonocle$marker_test_res,
file.path(dirOut,paste0(file.code,"Monocle_DEA_genes.csv")))
#rm(deaMonocleOut)
rm(deaMonocle)
gc()
}
DEA from ScamPy
for (ind in 1:dim(datasets_csv)[1]) {
<- paste0(datasets_csv$Group[ind],"_",datasets_csv$Collection[ind])
file.code <- readRDS(file = file.path(dataSetDir,paste0(file.code,".RDS")))
dataset
<- getClusterizationData(dataset,clName = "mergedClusters")[[1]]
clusters
<- ScamPy.DEA(dataSet = getRawData(dataset),
deaScamPy clusters.list = clusters,
project = file.code,
dirOut = dirOut)
write.csv(deaScamPy,
file.path(dirOut,paste0(file.code,"ScamPy_DEA_genes.csv")))
}