library(assertthat)
library(rlang)
library(scales)
library(ggplot2)
library(zeallot)
library(data.table)
library(parallelDist)
library(tidyr)
library(tidyverse)
library(ggh4x)
library(gridExtra)
#options(parallelly.fork.enable = TRUE)
<- file.path("Results")
inDir
#setLoggingLevel(2)
#setLoggingFile(file.path(inDir, "MixingClustersGDI_ForebrainDorsal.log"))
<- file.path(inDir, "GDI_Sensitivity")
outDir if (!file.exists(outDir)) {
dir.create(outDir)
}
GDI Increment From Mixing
Preamble
Load calculated data for analysis
<- readRDS(file.path(outDir, paste0("GDI_with_05%_Mixing.RDS")))
resMix05 <- readRDS(file.path(outDir, paste0("GDI_with_10%_Mixing.RDS")))
resMix10 <- readRDS(file.path(outDir, paste0("GDI_with_20%_Mixing.RDS")))
resMix20 <- readRDS(file.path(outDir, paste0("GDI_with_40%_Mixing.RDS")))
resMix40 <- readRDS(file.path(outDir, paste0("GDI_with_80%_Mixing.RDS")))
resMix80
# create an aligned baseline
<- resMix05
resMix00 "MixingFraction"]] <- 0.0
resMix00[["GDI"]] <- resMix00[["GDI"]] - resMix00[["GDIIncrement"]]
resMix00[["GDIIncrement"]] <- 0.0 resMix00[[
Recall cluster distance and add it to the results
<- readRDS(file.path(outDir, "allZeroOne.RDS"))
zeroOneAvg <- as.matrix(parDist(t(zeroOneAvg), method = "hellinger", diag = TRUE, upper = TRUE))^2
distZeroOne
<- rownames_to_column(as.data.frame(distZeroOne), var = "MainCluster")
distZeroOneLong <-pivot_longer(distZeroOneLong,
distZeroOneLong cols = !MainCluster,
names_to = "OtherCluster",
values_to = "Distance")
<- as.data.frame(distZeroOneLong[distZeroOneLong[["Distance"]] != 0.0, ])
distZeroOneLong
assert_that(identical(distZeroOneLong[, 1:2], resMix20[, 1:2]))
[1] TRUE
<- order(distZeroOneLong[["Distance"]]) perm
# Scatter plot of the effective increment at 40% mixing [Y]
# against estimated distance [X]
<- cbind(distZeroOneLong[, "Distance", drop = FALSE],
distDF sqrt(distZeroOneLong[, "Distance", drop = FALSE]))
colnames(distDF) <- c("Distance", "DistanceSqrt")
<- ggplot(cbind(resMix40, distDF),
D2IPlot aes(x=Distance, y=GDIIncrement)) +
geom_point() +
geom_smooth(method=lm, formula = y ~ x)+
ylab("Top GDI percentile Increment")
# + xlim(0, 1.5) + ylim(0, 1.5) + coord_fixed()
plot(D2IPlot)
Merge all data and plot it using the distance as discriminant
<- rbind(resMix05[perm, ], resMix10[perm, ], resMix20[perm, ], resMix40[perm, ], resMix80[perm, ])
allRes <- cbind(allRes, "Distance" = rep(distZeroOneLong[["Distance"]][perm], 5))
allRes rownames(allRes) <- NULL
<- cbind(allRes, "ClusterPair" = rep.int(c(1:210),5))
allRes
<- cbind(resMix00[perm, ], "Distance" = distZeroOneLong[["Distance"]][perm])
allResWithBase rownames(allResWithBase) <- NULL
<- cbind(allResWithBase, "ClusterPair" = c(1:210))
allResWithBase <- rbind(allResWithBase, allRes)
allResWithBase
assert_that(identical(allRes[, 4], allResWithBase[211:1260, 4]))
[1] TRUE
dim(allRes)
[1] 1050 7
<- ggplot(allRes, aes(x=MixingFraction, y=GDIIncrement, color=Distance,)) +
IScPlot geom_point() +
scale_color_continuous(type = "viridis") +
scale_x_log10()+
ylab("Top GDI percentile Increment")
plot(IScPlot)
<- ggplot(allResWithBase, aes(x=MixingFraction, y=GDI, color=Distance,)) +
GScPlot geom_point() +
scale_color_continuous(type = "viridis")+
ylab("Top GDI percentile")
plot(GScPlot)
<- function(df, numBlocks) {
reOrder <- nrow(df) / numBlocks
blockLength <- rep(1:blockLength, each = numBlocks) +
permut rep(seq(0, nrow(df) - 1, by = blockLength), times = numBlocks)
return(df[permut, ])
}
<- reOrder(allRes, 5)
allRes2 <- reOrder(allResWithBase, 6) allResWithBase2
<- c(1,210)
rng #rng <- c(1,42)
#rng <- c(43,84)
#rng <- c(85,126)
#rng <- c(127,168)
#rng <- c(169,210)
<- ggplot(allRes2[allRes2[["ClusterPair"]] %between% rng, ],
ILinesPLot aes(x = MixingFraction, y = GDIIncrement,
color = (ClusterPair - 1) %/% 35 + 0.5)) +
geom_path(aes(group = ClusterPair)) +
theme(legend.position = "none") +
#scale_x_log10() +
scale_colour_stepsn(colours = hcl.colors(6, palette = "Dark 2")[6:1])+
ylab("Top GDI percentile Increment")
plot(ILinesPLot)
<- ggplot(allResWithBase2[allResWithBase2[["ClusterPair"]] %between% rng, ],
GLinesPLot aes(x = MixingFraction, y = GDI,
color = (ClusterPair - 1) %/% 35 + 0.5)) +
geom_path(aes(group = ClusterPair)) +
theme(legend.position = "none") +
#scale_x_log10() +
scale_colour_stepsn(colours = hcl.colors(6, palette = "Dark 2")[6:1]) +
ylab("Top GDI percentile")
# geom_line(data = data.frame(cbind(MixingFraction = c(0,0.8), GDI = c(1.4,1.4))),
# aes(x = MixingFraction, y = GDI))
plot(GLinesPLot)
<- function(mixings) {
mg <- mixings
res !=0 ] <- ceiling(log2(round(res[res !=0 ] * 40)))
res[res return(res)
}
<- c(1,35)
rng #rng <- c(43,84)
#rng <- c(85,126)
#rng <- c(127,168)
#rng <- c(169,210)
$Group <- factor((allRes$ClusterPair - 1) %/% 35 + 1)
allReslevels(allRes$Group) <- paste0("Distance bin ",c(1:6))
$discreteMixing <- factor(mg(allRes$MixingFraction))
allReslevels(allRes$discreteMixing) <- c("5%","10%","20%","40%","80%")
<- "
design ABC
DEF
DEF
"
<- c("Distance bin 1", "Distance bin 2", "Distance bin 3")
first.part <-c("Distance bin 4", "Distance bin 5", "Distance bin 6")
second.part
<- ggplot(allRes[allRes$Group %in% first.part,], aes(x=discreteMixing,
IBoxPlotA y=GDIIncrement,
fill=Group,
group = discreteMixing)) +
geom_boxplot()+
geom_jitter(width=0.25, alpha=0.5) +
scale_fill_manual(values = c("#0B5345","#117A65","#16A085")) +
facet_grid2(cols = vars(Group))+
theme_light()+
theme(legend.position = "none",axis.title.x=element_blank(),
axis.title.y =element_blank()) #+
#ylab("Top GDI percentile Increment")
<- ggplot(allRes[allRes$Group %in% second.part,], aes(x=discreteMixing,
IBoxPlotB y=GDIIncrement,
fill=Group,
group = discreteMixing)) +
geom_boxplot()+
geom_jitter(width=0.25, alpha=0.5) +
scale_fill_manual(values = c("#1ABC9C","#48C9B0","#A3E4D7")) +
facet_grid2(cols = vars(Group))+
theme_light()+
theme(legend.position = "none",axis.title.x=element_blank(),
axis.title.y =element_blank())
<- grid.arrange(IBoxPlotA,IBoxPlotB,ncol = 2, nrow = 3,
final.plot1 layout_matrix = rbind(c(1,1), c(2,2), c(2,2)))
pdf("Results/GDI_Sensitivity/GDI_increment.pdf",width = 7, height = 7)
plot(final.plot1)
dev.off()
png
2
plot(final.plot1)
$Group <- factor((allResWithBase$ClusterPair - 1) %/% 35 + 1)
allResWithBaselevels(allResWithBase$Group) <- paste0("Distance bin ",c(1:6))
$discreteMixing <- factor(mg(allResWithBase$MixingFraction))
allResWithBaselevels(allResWithBase$discreteMixing) <- c("0%","5%","10%","20%","40%","80%")
<- ggplot(allResWithBase[allResWithBase$Group %in% first.part,], aes(x=discreteMixing, y=GDI, fill=Group,
GBoxPlotA group = discreteMixing))+
geom_boxplot()+
geom_jitter(width=0.25, alpha=0.5) +
scale_fill_manual(values = c("#0B5345","#117A65","#16A085")) +
facet_grid2(cols = vars(Group))+
theme_light()+
theme(legend.position = "none",axis.title.x=element_blank(),
axis.title.y =element_blank()) #+
#ylab("Top GDI percentile Increment")
<- ggplot(allResWithBase[allResWithBase$Group %in% second.part,], aes(x=discreteMixing, y=GDI, fill=Group,
GBoxPlotB group = discreteMixing)) +
geom_boxplot()+
geom_jitter(width=0.25, alpha=0.5) +
scale_fill_manual(values = c("#1ABC9C","#48C9B0","#A3E4D7")) +
facet_grid2(cols = vars(Group))+
theme_light()+
theme(legend.position = "none",axis.title.x=element_blank(),
axis.title.y =element_blank())
<- grid.arrange(GBoxPlotA,GBoxPlotB,ncol = 2, nrow = 5,
final.plot2 layout_matrix = rbind(c(1,1),c(1,1),c(2,2), c(2,2), c(2,2)))
pdf("Results/GDI_Sensitivity/GDI_TopPercentile.pdf",width = 7, height = 7 )
plot(final.plot2)
dev.off()
png
2
plot(final.plot2)
Load calculated data about multi clusters cases for analysis
<- readRDS(file.path(outDir, paste0("GDI_with_20%_Mixing_from_Pairs.RDS")))
resMix20_2
dim(resMix20_2)
[1] 150 8
head(resMix20_2)
MainCluster OtherCluster1 MixingFraction1 OtherCluster2 MixingFraction2
1 E13.5:432 E13.5:187 0.14179104 E15.0:428 0.05970149
2 E13.5:432 E13.5:184 0.08208955 E15.0:509 0.11940299
3 E13.5:432 E13.5:437 0.05970149 E15.0:509 0.14365672
4 E13.5:432 E15.0:428 0.13432836 E17.5:516 0.06716418
5 E13.5:432 E15.0:432 0.09514925 E17.5:505 0.10820896
6 E13.5:432 E13.5:184 0.11380597 E17.5:516 0.08768657
GDI GDIIncrement PredictedGDIIncrement
1 2.807473 1.32277530 1.10291265
2 1.850392 0.36569464 0.35314603
3 1.574105 0.08940718 0.12012178
4 1.620614 0.13591610 0.17472537
5 1.520910 0.03621287 0.05547557
6 1.956113 0.47141577 0.52537039
Compare estimated vs real GDI increment
# Scatter plot of the effective increment [Y] against estimated increment [X]
<- ggplot(resMix20_2, aes(x=PredictedGDIIncrement, y=GDIIncrement)) +
pg geom_point() +
geom_smooth(method=lm, formula = y ~ x + 0) +
coord_fixed() +
xlim(0, 1.5) + ylim(0, 1.5)+
ylab("Top GDI percentile Increment")
#scale_x_log10() + scale_y_log10()
pdf("Results/GDI_Sensitivity/Estimated_VS_RealGDIincrement.pdf",width = 7, height = 7 )
plot(pg)
dev.off()
png
2
plot(pg)
The plot shows that having the 20% extraneous cells in the mixture coming from multiple clusters does not affect significantly the sensitivity of the GDI to score cluster uniformity.
sessionInfo()
R version 4.4.0 (2024-04-24)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 20.04.6 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3; LAPACK version 3.9.0
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Rome
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] gridExtra_2.3 ggh4x_0.2.8 lubridate_1.9.3 forcats_1.0.0
[5] stringr_1.5.1 dplyr_1.1.4 purrr_1.0.2 readr_2.1.5
[9] tibble_3.2.1 tidyverse_2.0.0 tidyr_1.3.1 parallelDist_0.2.6
[13] data.table_1.15.4 zeallot_0.1.0 ggplot2_3.5.1 scales_1.3.0
[17] rlang_1.1.3 assertthat_0.2.1
loaded via a namespace (and not attached):
[1] utf8_1.2.4 generics_0.1.3 lattice_0.22-6 stringi_1.8.4
[5] hms_1.1.3 digest_0.6.35 magrittr_2.0.3 evaluate_0.23
[9] grid_4.4.0 timechange_0.3.0 fastmap_1.2.0 Matrix_1.7-0
[13] jsonlite_1.8.8 mgcv_1.9-1 fansi_1.0.6 viridisLite_0.4.2
[17] cli_3.6.2 splines_4.4.0 munsell_0.5.1 withr_3.0.0
[21] yaml_2.3.8 tools_4.4.0 tzdb_0.4.0 colorspace_2.1-0
[25] vctrs_0.6.5 R6_2.5.1 lifecycle_1.0.4 htmlwidgets_1.6.4
[29] pkgconfig_2.0.3 RcppParallel_5.1.7 pillar_1.9.0 gtable_0.3.5
[33] glue_1.7.0 Rcpp_1.0.12 xfun_0.44 tidyselect_1.2.1
[37] rstudioapi_0.16.0 knitr_1.46 farver_2.1.2 nlme_3.1-164
[41] htmltools_0.5.8.1 labeling_0.4.3 rmarkdown_2.27 compiler_4.4.0