GDI Increment From Mixing

Author

Marco Fantozzi

Published

June 17, 2024

Preamble

library(assertthat)
library(rlang)
library(scales)
library(ggplot2)
library(zeallot)
library(data.table)
library(parallelDist)
library(tidyr)
library(tidyverse)
library(ggh4x)
library(gridExtra)

#options(parallelly.fork.enable = TRUE)

inDir <- file.path("Results")

#setLoggingLevel(2)
#setLoggingFile(file.path(inDir, "MixingClustersGDI_ForebrainDorsal.log"))

outDir <- file.path(inDir, "GDI_Sensitivity")
if (!file.exists(outDir)) {
  dir.create(outDir)
}

Load calculated data for analysis

resMix05 <- readRDS(file.path(outDir, paste0("GDI_with_05%_Mixing.RDS")))
resMix10 <- readRDS(file.path(outDir, paste0("GDI_with_10%_Mixing.RDS")))
resMix20 <- readRDS(file.path(outDir, paste0("GDI_with_20%_Mixing.RDS")))
resMix40 <- readRDS(file.path(outDir, paste0("GDI_with_40%_Mixing.RDS")))
resMix80 <- readRDS(file.path(outDir, paste0("GDI_with_80%_Mixing.RDS")))

# create an aligned baseline
resMix00 <- resMix05
resMix00[["MixingFraction"]] <- 0.0
resMix00[["GDI"]] <- resMix00[["GDI"]] - resMix00[["GDIIncrement"]]
resMix00[["GDIIncrement"]] <- 0.0

Recall cluster distance and add it to the results

zeroOneAvg <- readRDS(file.path(outDir, "allZeroOne.RDS"))
distZeroOne <- as.matrix(parDist(t(zeroOneAvg), method = "hellinger", diag = TRUE, upper = TRUE))^2

distZeroOneLong <- rownames_to_column(as.data.frame(distZeroOne), var = "MainCluster")
distZeroOneLong <-pivot_longer(distZeroOneLong,
                               cols = !MainCluster,
                               names_to = "OtherCluster", 
                               values_to = "Distance")

distZeroOneLong <- as.data.frame(distZeroOneLong[distZeroOneLong[["Distance"]] != 0.0, ])

assert_that(identical(distZeroOneLong[, 1:2], resMix20[, 1:2]))

[1] TRUE

perm <- order(distZeroOneLong[["Distance"]])

# Scatter plot of the effective increment at 40% mixing [Y]
# against estimated distance [X]
distDF <- cbind(distZeroOneLong[, "Distance", drop = FALSE],
                sqrt(distZeroOneLong[, "Distance", drop = FALSE]))
colnames(distDF) <- c("Distance", "DistanceSqrt")

D2IPlot <- ggplot(cbind(resMix40, distDF),
                  aes(x=Distance, y=GDIIncrement)) +
             geom_point() +
             geom_smooth(method=lm, formula = y ~ x)+
             ylab("Top GDI percentile Increment") 
           # +  xlim(0, 1.5) + ylim(0, 1.5) + coord_fixed()

plot(D2IPlot)

Merge all data and plot it using the distance as discriminant

allRes <- rbind(resMix05[perm, ], resMix10[perm, ], resMix20[perm, ], resMix40[perm, ], resMix80[perm, ])
allRes <- cbind(allRes, "Distance" = rep(distZeroOneLong[["Distance"]][perm], 5))
rownames(allRes) <- NULL
allRes <- cbind(allRes, "ClusterPair" = rep.int(c(1:210),5))


allResWithBase <- cbind(resMix00[perm, ], "Distance" = distZeroOneLong[["Distance"]][perm])
rownames(allResWithBase) <- NULL
allResWithBase <- cbind(allResWithBase, "ClusterPair" = c(1:210))
allResWithBase <- rbind(allResWithBase, allRes)

assert_that(identical(allRes[, 4], allResWithBase[211:1260, 4]))

[1] TRUE

dim(allRes)

[1] 1050    7

IScPlot <- ggplot(allRes, aes(x=MixingFraction, y=GDIIncrement, color=Distance,)) +
  geom_point() +
  scale_color_continuous(type = "viridis") +
  scale_x_log10()+
             ylab("Top GDI percentile Increment") 

plot(IScPlot)

GScPlot <- ggplot(allResWithBase, aes(x=MixingFraction, y=GDI, color=Distance,)) +
  geom_point() +
  scale_color_continuous(type = "viridis")+
             ylab("Top GDI percentile") 

plot(GScPlot)

reOrder <- function(df, numBlocks) {
  blockLength <- nrow(df) / numBlocks
  permut <- rep(1:blockLength, each = numBlocks) +
              rep(seq(0, nrow(df) - 1, by = blockLength), times = numBlocks)
  return(df[permut, ])
}

allRes2 <- reOrder(allRes, 5)
allResWithBase2 <- reOrder(allResWithBase, 6)

rng <- c(1,210)
#rng <- c(1,42)
#rng <- c(43,84)
#rng <- c(85,126)
#rng <- c(127,168)
#rng <- c(169,210)

ILinesPLot <- ggplot(allRes2[allRes2[["ClusterPair"]] %between% rng, ],
                     aes(x = MixingFraction, y = GDIIncrement,
                         color = (ClusterPair - 1) %/% 35 + 0.5)) + 
  geom_path(aes(group = ClusterPair)) +
  theme(legend.position = "none") +
  #scale_x_log10() + 
  scale_colour_stepsn(colours = hcl.colors(6, palette = "Dark 2")[6:1])+
             ylab("Top GDI percentile Increment") 

plot(ILinesPLot)

GLinesPLot <- ggplot(allResWithBase2[allResWithBase2[["ClusterPair"]] %between% rng, ],
                     aes(x = MixingFraction, y = GDI,
                         color = (ClusterPair - 1) %/% 35 + 0.5)) + 
  geom_path(aes(group = ClusterPair)) +
  theme(legend.position = "none") +
  #scale_x_log10() + 
  scale_colour_stepsn(colours = hcl.colors(6, palette = "Dark 2")[6:1]) +
             ylab("Top GDI percentile") 
#  geom_line(data = data.frame(cbind(MixingFraction = c(0,0.8), GDI = c(1.4,1.4))),
#            aes(x = MixingFraction, y = GDI))


plot(GLinesPLot)

mg <- function(mixings) {
  res <- mixings
  res[res !=0 ] <- ceiling(log2(round(res[res !=0 ] * 40)))
  return(res)
}

rng <- c(1,35)
#rng <- c(43,84)
#rng <- c(85,126)
#rng <- c(127,168)
#rng <- c(169,210)

allRes$Group <- factor((allRes$ClusterPair - 1) %/% 35 + 1)
levels(allRes$Group) <- paste0("Distance bin ",c(1:6))

allRes$discreteMixing <- factor(mg(allRes$MixingFraction))
levels(allRes$discreteMixing) <- c("5%","10%","20%","40%","80%")

design <- "
  ABC
  DEF
  DEF
"

first.part <- c("Distance bin 1", "Distance bin 2", "Distance bin 3")
second.part <-c("Distance bin 4", "Distance bin 5", "Distance bin 6") 

IBoxPlotA <- ggplot(allRes[allRes$Group %in% first.part,], aes(x=discreteMixing, 
                       y=GDIIncrement, 
                       fill=Group,
                       group = discreteMixing)) +
  geom_boxplot()+
  geom_jitter(width=0.25, alpha=0.5) +
  scale_fill_manual(values = c("#0B5345","#117A65","#16A085")) +
  facet_grid2(cols = vars(Group))+
  theme_light()+
  theme(legend.position = "none",axis.title.x=element_blank(),
        axis.title.y =element_blank()) #+
             #ylab("Top GDI percentile Increment") 

IBoxPlotB <- ggplot(allRes[allRes$Group %in% second.part,], aes(x=discreteMixing, 
                       y=GDIIncrement, 
                       fill=Group,
                       group = discreteMixing)) +
  geom_boxplot()+
  geom_jitter(width=0.25, alpha=0.5) +
  scale_fill_manual(values = c("#1ABC9C","#48C9B0","#A3E4D7")) +
  facet_grid2(cols = vars(Group))+
  theme_light()+
  theme(legend.position = "none",axis.title.x=element_blank(),
        axis.title.y =element_blank()) 

final.plot1 <- grid.arrange(IBoxPlotA,IBoxPlotB,ncol = 2, nrow = 3,
             layout_matrix = rbind(c(1,1), c(2,2), c(2,2)))

pdf("Results/GDI_Sensitivity/GDI_increment.pdf",width = 7, height = 7)
plot(final.plot1)
dev.off()

png 
  2

plot(final.plot1)

allResWithBase$Group <- factor((allResWithBase$ClusterPair - 1) %/% 35 + 1)
levels(allResWithBase$Group) <- paste0("Distance bin ",c(1:6))


allResWithBase$discreteMixing <- factor(mg(allResWithBase$MixingFraction))
levels(allResWithBase$discreteMixing) <- c("0%","5%","10%","20%","40%","80%")


GBoxPlotA <- ggplot(allResWithBase[allResWithBase$Group %in% first.part,], aes(x=discreteMixing, y=GDI, fill=Group,
                              group = discreteMixing))+
  geom_boxplot()+
  geom_jitter(width=0.25, alpha=0.5) +
  scale_fill_manual(values = c("#0B5345","#117A65","#16A085")) +
  facet_grid2(cols = vars(Group))+
  theme_light()+
  theme(legend.position = "none",axis.title.x=element_blank(),
        axis.title.y =element_blank()) #+
             #ylab("Top GDI percentile Increment") 

GBoxPlotB <- ggplot(allResWithBase[allResWithBase$Group %in% second.part,], aes(x=discreteMixing, y=GDI, fill=Group,
                              group = discreteMixing)) +
  geom_boxplot()+
  geom_jitter(width=0.25, alpha=0.5) +
  scale_fill_manual(values = c("#1ABC9C","#48C9B0","#A3E4D7")) +
  facet_grid2(cols = vars(Group))+
  theme_light()+
  theme(legend.position = "none",axis.title.x=element_blank(),
        axis.title.y =element_blank()) 

final.plot2 <- grid.arrange(GBoxPlotA,GBoxPlotB,ncol = 2, nrow = 5,
             layout_matrix = rbind(c(1,1),c(1,1),c(2,2), c(2,2), c(2,2)))

pdf("Results/GDI_Sensitivity/GDI_TopPercentile.pdf",width = 7, height = 7 )
plot(final.plot2)
dev.off()

png 
  2

plot(final.plot2)

Load calculated data about multi clusters cases for analysis

resMix20_2 <- readRDS(file.path(outDir, paste0("GDI_with_20%_Mixing_from_Pairs.RDS")))

dim(resMix20_2)

[1] 150   8

head(resMix20_2)

  MainCluster OtherCluster1 MixingFraction1 OtherCluster2 MixingFraction2
1   E13.5:432     E13.5:187      0.14179104     E15.0:428      0.05970149
2   E13.5:432     E13.5:184      0.08208955     E15.0:509      0.11940299
3   E13.5:432     E13.5:437      0.05970149     E15.0:509      0.14365672
4   E13.5:432     E15.0:428      0.13432836     E17.5:516      0.06716418
5   E13.5:432     E15.0:432      0.09514925     E17.5:505      0.10820896
6   E13.5:432     E13.5:184      0.11380597     E17.5:516      0.08768657
       GDI GDIIncrement PredictedGDIIncrement
1 2.807473   1.32277530            1.10291265
2 1.850392   0.36569464            0.35314603
3 1.574105   0.08940718            0.12012178
4 1.620614   0.13591610            0.17472537
5 1.520910   0.03621287            0.05547557
6 1.956113   0.47141577            0.52537039

Compare estimated vs real GDI increment

# Scatter plot of the effective increment [Y] against estimated increment [X]
pg <- ggplot(resMix20_2, aes(x=PredictedGDIIncrement, y=GDIIncrement)) +
  geom_point() +
  geom_smooth(method=lm, formula = y ~ x + 0) +
  coord_fixed() +
  xlim(0, 1.5) + ylim(0, 1.5)+
             ylab("Top GDI percentile Increment") 
  #scale_x_log10() + scale_y_log10() 

pdf("Results/GDI_Sensitivity/Estimated_VS_RealGDIincrement.pdf",width = 7, height = 7 )
plot(pg)
dev.off()

png 
  2

plot(pg)

The plot shows that having the 20% extraneous cells in the mixture coming from multiple clusters does not affect significantly the sensitivity of the GDI to score cluster uniformity.

sessionInfo()

R version 4.4.0 (2024-04-24)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 20.04.6 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
 [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
 [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
[10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   

time zone: Europe/Rome
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] gridExtra_2.3      ggh4x_0.2.8        lubridate_1.9.3    forcats_1.0.0     
 [5] stringr_1.5.1      dplyr_1.1.4        purrr_1.0.2        readr_2.1.5       
 [9] tibble_3.2.1       tidyverse_2.0.0    tidyr_1.3.1        parallelDist_0.2.6
[13] data.table_1.15.4  zeallot_0.1.0      ggplot2_3.5.1      scales_1.3.0      
[17] rlang_1.1.3        assertthat_0.2.1  

loaded via a namespace (and not attached):
 [1] utf8_1.2.4         generics_0.1.3     lattice_0.22-6     stringi_1.8.4     
 [5] hms_1.1.3          digest_0.6.35      magrittr_2.0.3     evaluate_0.23     
 [9] grid_4.4.0         timechange_0.3.0   fastmap_1.2.0      Matrix_1.7-0      
[13] jsonlite_1.8.8     mgcv_1.9-1         fansi_1.0.6        viridisLite_0.4.2 
[17] cli_3.6.2          splines_4.4.0      munsell_0.5.1      withr_3.0.0       
[21] yaml_2.3.8         tools_4.4.0        tzdb_0.4.0         colorspace_2.1-0  
[25] vctrs_0.6.5        R6_2.5.1           lifecycle_1.0.4    htmlwidgets_1.6.4 
[29] pkgconfig_2.0.3    RcppParallel_5.1.7 pillar_1.9.0       gtable_0.3.5      
[33] glue_1.7.0         Rcpp_1.0.12        xfun_0.44          tidyselect_1.2.1  
[37] rstudioapi_0.16.0  knitr_1.46         farver_2.1.2       nlme_3.1-164      
[41] htmltools_0.5.8.1  labeling_0.4.3     rmarkdown_2.27     compiler_4.4.0