library(assertthat)
library(rlang)
library(stringr)
library(scales)
library(ggplot2)
library(ggh4x)
library(gridExtra)
library(grDevices)
library(zeallot)
library(conflicted)
library(data.table)
library(tibble)
library(tidyr)
#library(tidyverse)
library(parallelDist)
conflicts_prefer(zeallot::`%->%`, zeallot::`%<-%`)
inDir <- file.path("Data/Brown_PBMC_Datasets/")
outDir <- file.path("Results/GDI_Sensitivity/PBMCBrownRun40/")GDI Increment From Mixing using Run 40
Preamble
Re-Load calculated data for analysis
Define all mixing fractions
allMixingFractions <- c(0.05, 0.10, 0.20, 0.40, 0.80)
names(allMixingFractions) <-
str_pad(scales::label_percent()(allMixingFractions), 3, pad = "0")Actually load the data
allResList <-
lapply(
names(allMixingFractions),
function(mixStr) {
readRDS(file.path(outDir, paste0("GDI_with_", mixStr, "_Mixing.RDS")))
})
names(allResList) <- names(allMixingFractions)
assert_that(unique(sapply(allResList, ncol)) == 5L)[1] TRUE
Add baseline (“00%” mixing)
baseline <- allResList[[1L]]
baseline[["MixingFraction"]] <- 0.0
baseline[["GDI"]] <- baseline[["GDI"]] - baseline[["GDIIncrement"]]
baseline[["GDIIncrement"]] <- 0.0
allResListWithBase <- append(list("00%" = baseline), allResList)
rm(baseline)Merge all results and calculate the fitting regression for each cluster pair
allRes <-
cbind(allResList[[1L]][, 1:2],
lapply(allResList, function(df) { df[, 3:5] }))
colnames(allRes) <-
c(colnames(allResList[[1L]])[1:2],
paste0(
rep(colnames(allResList[[1L]])[3:5], 5),
"_",
rep(names(allMixingFractions), each = 3)
)
)
allResWithBase <-
cbind(allResListWithBase[[1L]][, 1:2],
lapply(allResListWithBase, function(df) { df[, 3:5] }))
colnames(allResWithBase) <-
c(colnames(allResListWithBase[[1L]])[1:2],
paste0(
rep(colnames(allResListWithBase[[1L]])[3:5], 5),
"_",
rep(c("00%", names(allMixingFractions)), each = 3)
)
)
assert_that(
identical(
allRes, allResWithBase[, str_detect(colnames(allResWithBase),
"00%", negate = TRUE)]
)
)[1] TRUE
Recall cluster distance and add it to the results
zeroOneAvg <- readRDS(file.path(outDir, "allZeroOne.RDS"))
# zeroOneAvg <- readRDS(file.path(outDir, "distanceZeroOne.RDS"))
distZeroOne <-
as.matrix(parDist(t(zeroOneAvg), method = "hellinger",
diag = TRUE, upper = TRUE))^2
distZeroOneLong <-
rownames_to_column(as.data.frame(distZeroOne), var = "MainCluster")
distZeroOneLong <-
pivot_longer(distZeroOneLong,
cols = !MainCluster,
names_to = "OtherCluster",
values_to = "Distance")
distZeroOneLong <-
as.data.frame(distZeroOneLong[distZeroOneLong[["Distance"]] != 0.0, ])
assert_that(all.equal(distZeroOneLong[, 1:2], allRes[, 1:2]))[1] TRUE
perm <- order(distZeroOneLong[["Distance"]])Scatter plot of ΔGDI vs Distance
distDF <- cbind(distZeroOneLong[, "Distance", drop = FALSE],
sqrt(distZeroOneLong[, "Distance", drop = FALSE]))
colnames(distDF) <- c("Distance", "DistanceSqrt")
D2IPlot <- ggplot(cbind(allResList[["20%"]], distDF),
aes(x = Distance, y = GDIIncrement)) +
geom_point() +
geom_smooth(method = lm, formula = y ~ x)
# + xlim(0, 1.5) + ylim(0, 1.5) + coord_fixed()
plot(D2IPlot)
Merge all data and plot it using a-priory (squared) distance as discriminant
allResPerm <-
do.call(rbind, lapply(allResList, function(df) df[perm, ]))
allResPerm <-
cbind(allResPerm,
"ClusterPair" = rep.int(seq_along(perm),
length(allResList)),
"Distance" = rep(distZeroOneLong[["Distance"]][perm],
length(allResList)))
rownames(allResPerm) <- NULL
dim(allResPerm)[1] 150 7
allResWithBasePerm <-
do.call(rbind,lapply(allResListWithBase, function(df) df[perm, ]))
allResWithBasePerm <-
cbind(allResWithBasePerm,
"ClusterPair" = rep.int(seq_along(perm),
length(allResListWithBase)),
"Distance" = rep(distZeroOneLong[["Distance"]][perm],
length(allResListWithBase)))
rownames(allResWithBasePerm) <- NULL
dim(allResWithBasePerm)[1] 180 7
assert_that(
all.equal(
allResPerm,
allResWithBasePerm[(length(perm) + 1):nrow(allResWithBasePerm), ],
check.attributes = FALSE
),
msg = "Trouble with meshing all res permutated data"
)[1] TRUE
Plot all data using the mixing fraction as abscissas and the distance as color
IScPlot <-
ggplot(allResPerm,
aes(x = MixingFraction, y = GDIIncrement, color = Distance)
) +
geom_point() +
scale_color_continuous(type = "viridis") +
scale_x_log10() +
ylab("Top GDI percentile Increment")
plot(IScPlot)
GScPlot <-
ggplot(allResWithBasePerm,
aes(x = MixingFraction, y = GDI, color = Distance)
) +
geom_point() +
scale_color_continuous(type = "viridis") +
ylab("Top GDI percentile")
plot(GScPlot)
Plot all data using the mixing fraction as abscissas and the cell type pairing as color
# IScPlot <-
# ggplot(allResPerm,
# aes(x = MixingFraction, y = GDIIncrement, color = Distance)
# ) +
# geom_point() +
# scale_color_continuous(type = "viridis") +
# scale_x_log10() +
# ylab("Top GDI percentile Increment")
#
# plot(IScPlot)
#
# GScPlot <-
# ggplot(allResWithBasePerm,
# aes(x = MixingFraction, y = GDI, color = Distance)
# ) +
# geom_point() +
# scale_color_continuous(type = "viridis") +
# ylab("Top GDI percentile")
#
# plot(GScPlot)Reorder by cluster pair
reOrder <- function(df, numBlocks) {
blockLength <- nrow(df) / numBlocks
permut <- rep(1:blockLength, each = numBlocks) +
rep(seq(0, nrow(df) - 1, by = blockLength), times = numBlocks)
return(df[permut, ])
}
allResPerm2 <- reOrder(allResPerm, 5)
allResWithBasePerm2 <- reOrder(allResWithBasePerm, 6)
head(allResPerm[["ClusterPair"]], 20) [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
head(allResPerm2[["ClusterPair"]], 20) [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3 4 4 4 4 4
Plot by lines
numBuckets <- 6L
bucketLength <- length(perm) %/% numBuckets
ILinesPLot <-
ggplot(
allResPerm2,
aes(x = MixingFraction, y = GDIIncrement,
color = (ClusterPair - 1) %/% bucketLength + 0.5)
) +
geom_path(aes(group = ClusterPair)) +
theme(legend.position = "none") +
scale_colour_stepsn(
colours = rev(hcl.colors(numBuckets, palette = "Dark 2"))
) +
ylab("Top GDI percentile Increment")
plot(ILinesPLot)
GLinesPLot <-
ggplot(
allResWithBasePerm2,
aes(x = MixingFraction, y = GDI,
color = (ClusterPair - 1) %/% bucketLength + 0.5)
) +
geom_path(aes(group = ClusterPair)) +
theme(legend.position = "none") +
scale_colour_stepsn(
colours = rev(hcl.colors(numBuckets, palette = "Dark 2"))
) +
ylab("Top GDI percentile")
# geom_line(data = data.frame(cbind(MixingFraction = c(0,0.8), GDI = c(1.4,1.4))),
# aes(x = MixingFraction, y = GDI))
plot(GLinesPLot)
ΔGDI main plot
numBuckets <- 6L
bucketLength <- length(perm) %/% numBuckets
mg <- function(mixings) {
res <- mixings
actOn <- res != 0.0
res[actOn] <- round(log2(res[actOn] * 40))
return(res)
}
group <- factor((allResPerm[["ClusterPair"]] - 1) %/% bucketLength + 1)
levels(group) <- paste0("Distance bin ", 1:numBuckets)
discreteMixing <- factor(mg(allResPerm[["MixingFraction"]]))
levels(discreteMixing) <- names(allMixingFractions)
allResPermBuck <-
cbind(allResPerm, "Group" = group, "DiscreteMixing" = discreteMixing)
first.part <- c("Distance bin 1", "Distance bin 2", "Distance bin 3")
second.part <- c("Distance bin 4", "Distance bin 5", "Distance bin 6")
IBoxPlotA <-
ggplot(
allResPermBuck[allResPermBuck[["Group"]] %in% first.part,],
aes(x = DiscreteMixing, y = GDIIncrement,
fill = Group, group = DiscreteMixing)
) +
geom_boxplot() +
geom_jitter(width = 0.25, alpha = 0.5) +
scale_fill_manual(
values = c("#4C0D7A", "#3C279C", "#4C29B5")
# values = c("#0B5345", "#117A65", "#16A085")
) +
facet_grid2(cols = vars(Group)) +
scale_y_continuous(
limits = c(-0.05, 1.15),
breaks = scales::breaks_width(0.5)
) +
theme_light() +
theme(legend.position = "none", axis.title.x = element_blank()) +
labs(
y = ""
)
IBoxPlotB <-
ggplot(
allResPermBuck[allResPermBuck[["Group"]] %in% second.part, ],
aes(x = DiscreteMixing, y = GDIIncrement,
fill = Group, group = DiscreteMixing)
) +
geom_boxplot() +
geom_jitter(width = 0.25, alpha = 0.5) +
scale_fill_manual(
values = c("#702FCC", "#9E79D4", "#AA96CF")
# values = c("#1ABC9C", "#48C9B0", "#A3E4D7")
) +
facet_grid2(cols = vars(Group)) +
scale_y_continuous(
limits = c(-0.05, 1.45),
breaks = scales::breaks_width(0.5)
) +
theme_light() +
theme(legend.position = "none") +
labs(
x = "Ratio between sizes of second and first cluster",
y = "Variation of the 99th percentile of GDI scores"
)
finalIPlot <-
gridExtra::grid.arrange(
IBoxPlotA, IBoxPlotB, nrow = 9, ncol = 1,
layout_matrix = cbind(rep(1:2, each = 5)[2:10])
)
grDevices::pdf(file.path(outDir, "GDI-Increment_PBMC-Brown_Run-40.pdf"),
width = 7, height = 7)
grid::grid.newpage()
grid::grid.draw(finalIPlot)
grDevices::dev.off()png
2
GDI main plot
numBuckets <- 6L
bucketLength <- length(perm) %/% numBuckets
mg <- function(mixings) {
res <- mixings
actOn <- res != 0.0
res[actOn] <- round(log2(res[actOn] * 40))
return(res)
}
group <- factor((allResWithBasePerm[["ClusterPair"]] - 1) %/% bucketLength + 1)
levels(group) <- paste0("Distance bin ", 1:numBuckets)
discreteMixing <- factor(mg(allResWithBasePerm[["MixingFraction"]]))
levels(discreteMixing) <- c("00%", names(allMixingFractions))
allResWithBasePermBuck <-
cbind(allResWithBasePerm, "Group" = group, "DiscreteMixing" = discreteMixing)
first.part <- c("Distance bin 1", "Distance bin 2", "Distance bin 3")
second.part <- c("Distance bin 4", "Distance bin 5", "Distance bin 6")
GBoxPlotA <-
ggplot(
allResWithBasePermBuck[allResWithBasePermBuck[["Group"]] %in% first.part,],
aes(x = DiscreteMixing, y = GDI,
fill = Group, group = DiscreteMixing)
) +
geom_boxplot() +
geom_jitter(width = 0.25, alpha = 0.5) +
scale_fill_manual(
values = c("#4C0D7A", "#3C279C", "#4C29B5")
# values = c("#0B5345", "#117A65", "#16A085")
) +
facet_grid2(cols = vars(Group)) +
scale_y_continuous(
limits = c(1.2, 2.40),
breaks = scales::breaks_width(0.5)
) +
theme_light() +
theme(legend.position = "none", axis.title.x = element_blank()) +
labs(
y = "Top 1% GDI score of genes"
)
GBoxPlotB <-
ggplot(
allResWithBasePermBuck[allResWithBasePermBuck[["Group"]] %in% second.part,],
aes(x = DiscreteMixing, y = GDI,
fill = Group, group = DiscreteMixing)
) +
geom_boxplot() +
geom_jitter(width = 0.25, alpha = 0.5) +
scale_fill_manual(
values = c("#702FCC", "#9E79D4", "#AA96CF")
# values = c("#1ABC9C", "#48C9B0", "#A3E4D7")
) +
facet_grid2(cols = vars(Group)) +
scale_y_continuous(
limits = c(1.20, 2.70),
breaks = scales::breaks_width(0.5)
) +
theme_light() +
theme(legend.position = "none") +
labs(
x = "Ratio between sizes of second and first cluster",
y = "Top 1% GDI score of genes"
)
finalGPlot <-
gridExtra::grid.arrange(
GBoxPlotA, GBoxPlotB, nrow = 9, ncol = 1,
layout_matrix = cbind(rep(1:2, each = 5)[2:10])
)
grDevices::pdf(file.path(outDir, "GDI_PBMC-Brown_Run-40.pdf"),
width = 7, height = 7)
grid::grid.newpage()
grid::grid.draw(finalGPlot)
grDevices::dev.off()png
2
Sys.time()[1] "2026-02-02 15:44:26 CET"
#Sys.info()
sessionInfo()R version 4.5.2 (2025-10-31)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 22.04.5 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0 LAPACK version 3.10.0
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Rome
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] parallelDist_0.2.6 tidyr_1.3.1 tibble_3.3.0 data.table_1.18.0
[5] conflicted_1.2.0 zeallot_0.2.0 gridExtra_2.3 ggh4x_0.3.1
[9] ggplot2_4.0.1 scales_1.4.0 stringr_1.6.0 rlang_1.1.7
[13] assertthat_0.2.1
loaded via a namespace (and not attached):
[1] generics_0.1.3 stringi_1.8.7 lattice_0.22-7
[4] digest_0.6.37 magrittr_2.0.4 evaluate_1.0.5
[7] grid_4.5.2 RColorBrewer_1.1-3 fastmap_1.2.0
[10] jsonlite_2.0.0 Matrix_1.7-4 mgcv_1.9-3
[13] purrr_1.2.0 viridisLite_0.4.2 codetools_0.2-20
[16] cli_3.6.5 splines_4.5.2 withr_3.0.2
[19] cachem_1.1.0 yaml_2.3.12 tools_4.5.2
[22] memoise_2.0.1 dplyr_1.1.4 vctrs_0.7.0
[25] R6_2.6.1 lifecycle_1.0.4 htmlwidgets_1.6.4
[28] pkgconfig_2.0.3 RcppParallel_5.1.10 pillar_1.11.1
[31] gtable_0.3.6 glue_1.8.0 Rcpp_1.1.0
[34] xfun_0.52 tidyselect_1.2.1 knitr_1.50
[37] farver_2.1.2 htmltools_0.5.8.1 nlme_3.1-168
[40] labeling_0.4.3 rmarkdown_2.29 compiler_4.5.2
[43] S7_0.2.1