Real UT clusters

Author

Silvia Galfrè

Published

April 19, 2024

Cl432 E13.5 E15.0

Preamble

library(ggplot2)
library(zeallot)
library(data.table)
library(parallelDist)
library(tidyr)
library(tidyverse)
library(caret)
theme_set(theme_bw())
library(COTAN)
library(stringr)
library(nnet)

options(parallelly.fork.enable = TRUE)
inDir <- file.path("Data/MouseCortexFromLoom/")

#setLoggingLevel(2)
#setLoggingFile(file.path(inDir, "MixingClustersGDI_ForebrainDorsal.log"))

outDir <- file.path("Data/MouseCortexFromLoom/PureClusters/")
if (!file.exists(outDir)) {
  dir.create(outDir)
}

428 and 434 e15.0

e15.0 <- readRDS(paste0(inDir,"e15.0_ForebrainDorsal.cotan.RDS"))

cells <- c(rownames(getMetadataCells(e15.0)[getMetadataCells(e15.0)[,"Clusters"] == 428,]),
           rownames(getMetadataCells(e15.0)[getMetadataCells(e15.0)[,"Clusters"] == 434,]))

cl428_cl434 <- dropGenesCells(e15.0,cells = getCells(e15.0)[!getCells(e15.0) %in% cells] )

cl428_cl434 <- clean(cl428_cl434)
cl428_cl434 <- proceedToCoex(cl428_cl434)

#saveRDS(cl428_cl434,past0(outDir,"cl428_cl434.cotan.RDS"))
cl428_cl434GDI <- calculateGDI(cl428_cl434)

subsetGDIcl428_cl434 <- cl428_cl434GDI[cl428_cl434GDI$sum.raw.norm > 7,]
top.GDI.genes <- rownames(subsetGDIcl428_cl434[order(subsetGDIcl428_cl434$GDI,decreasing = T),])[1:50]

GDIPlot(cl428_cl434,genes = "",GDIIn = cl428_cl434GDI)

data <- getNormalizedData(cl428_cl434)
data <- data[!rowSums(as.matrix(data)) < 1,]
data <- log(data*10000+1)

row_stdev <- apply(data, 1, sd, na.rm=TRUE)
row_stdev <- row_stdev[order(row_stdev,decreasing = T)]

genes.to.keep <- c(names(row_stdev[1:100]),top.GDI.genes)

data.small <- data[rownames(data) %in% genes.to.keep,]

#data <- t(as.matrix(data))
data.small <- t(as.matrix(data.small))


COTAN_Cl.code <- as.numeric(getClusterizationData(cl428_cl434,clName = "original.clusters")[[1]])

COTAN_Cl.code <- COTAN_Cl.code -1

data.small <- cbind(data.small,COTAN_Cl.code)
data.small <- as.data.frame(data.small)

# Split the data into training and test set
set.seed(123)
training.samples <- data.small[,"COTAN_Cl.code"] %>% 
  createDataPartition(p = 0.8, list = FALSE)
train.data  <- data.small[training.samples, ]
test.data <- data.small[-training.samples, ]
head(train.data)
                               Abracl  Cdkn1c    Cited2     Mllt3  Fam210b
10X74_4_A_1:GTAACGTGCCATAGx 10.128836 0.00000  9.435728  9.435728 0.000000
10X73_3_A_1:ACGCCGGATCGTAGx 10.211198 0.00000 10.904327 10.211198 9.518087
10X74_4_A_1:GATTCGGAGGTGAGx 10.518544 9.13233  9.825424  9.132330 9.132330
10X74_4_A_1:CTGAGCCTGGTAAAx 10.916737 0.00000  9.307371 10.405923 9.307371
10X74_4_A_1:CCTAGAGAGTCACAx  9.585177 0.00000 11.194560  0.000000 0.000000
10X74_4_A_1:CTAGGATGCTTGCCx 11.828628 0.00000 10.941336  8.995532 8.995532
                                Mfap4   Cxcl12  Cdc42ep3    Limch1     Ier2
10X74_4_A_1:GTAACGTGCCATAGx  9.435728 9.435728  0.000000  0.000000 0.000000
10X73_3_A_1:ACGCCGGATCGTAGx  9.518087 0.000000  9.518087  0.000000 0.000000
10X74_4_A_1:GATTCGGAGGTGAGx 11.078148 9.825424  0.000000  9.132330 9.132330
10X74_4_A_1:CTGAGCCTGGTAAAx 10.405923 0.000000 10.000473 10.000473 9.307371
10X74_4_A_1:CCTAGAGAGTCACAx  9.585177 9.585177  0.000000  0.000000 0.000000
10X74_4_A_1:CTAGGATGCTTGCCx 10.941336 9.688617  0.000000  8.995532 8.995532
                                Ptprk   Gm29260    Sfrp1    Pantr1     Magi2
10X74_4_A_1:GTAACGTGCCATAGx  9.435728  0.000000 9.435728 10.534287  0.000000
10X73_3_A_1:ACGCCGGATCGTAGx 10.211198  9.518087 9.518087  9.518087  9.518087
10X74_4_A_1:GATTCGGAGGTGAGx  9.132330  9.825424 9.825424 10.518544  0.000000
10X74_4_A_1:CTGAGCCTGGTAAAx  9.307371 10.000473 0.000000 10.405923  0.000000
10X74_4_A_1:CCTAGAGAGTCACAx  9.585177  0.000000 9.585177  0.000000  0.000000
10X74_4_A_1:CTAGGATGCTTGCCx  0.000000  8.995532 9.688617 10.604871 10.381733
                             Tmem108     Ddr1       Ezr    Sncaip CT025619.1
10X74_4_A_1:GTAACGTGCCATAGx 0.000000 0.000000  9.435728  0.000000   9.435728
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000 0.000000  0.000000  9.518087  10.211198
10X74_4_A_1:GATTCGGAGGTGAGx 0.000000 0.000000 10.230871  9.825424   9.132330
10X74_4_A_1:CTGAGCCTGGTAAAx 9.307371 0.000000  0.000000 10.000473   9.307371
10X74_4_A_1:CCTAGAGAGTCACAx 0.000000 0.000000 10.278290  0.000000   9.585177
10X74_4_A_1:CTAGGATGCTTGCCx 8.995532 8.995532  8.995532  0.000000   8.995532
                               Fabp7 Ctnnd2       Ptn    Frmd4a    Sorbs2
10X74_4_A_1:GTAACGTGCCATAGx 0.000000      0 10.128836 11.227421 10.534287
10X73_3_A_1:ACGCCGGATCGTAGx 9.518087      0  9.518087  9.518087 10.616651
10X74_4_A_1:GATTCGGAGGTGAGx 9.132330      0  0.000000  0.000000  9.825424
10X74_4_A_1:CTGAGCCTGGTAAAx 9.307371      0 10.693598  9.307371 10.000473
10X74_4_A_1:CCTAGAGAGTCACAx 9.585177      0 10.278290  0.000000  9.585177
10X74_4_A_1:CTAGGATGCTTGCCx 9.688617      0  0.000000  0.000000 10.094061
                                Hes6       Dcc   Neurog2     Tcf12  Fam110a
10X74_4_A_1:GTAACGTGCCATAGx 0.000000  9.435728  0.000000  0.000000 0.000000
10X73_3_A_1:ACGCCGGATCGTAGx 9.518087  0.000000  0.000000 10.616651 0.000000
10X74_4_A_1:GATTCGGAGGTGAGx 9.825424  9.825424 10.741682  9.825424 9.825424
10X74_4_A_1:CTGAGCCTGGTAAAx 0.000000 10.916737  0.000000 10.000473 0.000000
10X74_4_A_1:CCTAGAGAGTCACAx 9.585177  0.000000  0.000000 11.194560 9.585177
10X74_4_A_1:CTAGGATGCTTGCCx 0.000000  8.995532  8.995532 10.094061 0.000000
                               Dhrs4     Ccnd2    Elavl4      Ank3    Klf12
10X74_4_A_1:GTAACGTGCCATAGx 0.000000  0.000000  9.435728  9.435728 0.000000
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000  9.518087  0.000000  9.518087 0.000000
10X74_4_A_1:GATTCGGAGGTGAGx 9.132330 10.924000  9.132330  0.000000 9.132330
10X74_4_A_1:CTGAGCCTGGTAAAx 0.000000  0.000000 10.000473 10.916737 9.307371
10X74_4_A_1:CCTAGAGAGTCACAx 0.000000 10.278290 10.971420  9.585177 9.585177
10X74_4_A_1:CTAGGATGCTTGCCx 9.688617 10.094061 10.094061  9.688617 0.000000
                             Slc17a6     Lzts1      Map2      Lhx2   Neurod2
10X74_4_A_1:GTAACGTGCCATAGx 0.000000  0.000000  0.000000 10.128836 10.534287
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000  9.518087 10.211198  9.518087  9.518087
10X74_4_A_1:GATTCGGAGGTGAGx 9.132330 10.518544  9.825424 10.230871  9.825424
10X74_4_A_1:CTGAGCCTGGTAAAx 0.000000 10.405923  0.000000 10.000473 11.099055
10X74_4_A_1:CCTAGAGAGTCACAx 0.000000  0.000000 10.971420 10.971420 10.278290
10X74_4_A_1:CTAGGATGCTTGCCx 8.995532 10.381733  8.995532 10.787188 10.604871
                               Sstr2     Eomes   Vps37b      Ier5  Cttnbp2
10X74_4_A_1:GTAACGTGCCATAGx 10.82196  0.000000  0.00000 10.128836  0.00000
10X73_3_A_1:ACGCCGGATCGTAGx 10.21120  0.000000  0.00000  9.518087 11.12747
10X74_4_A_1:GATTCGGAGGTGAGx 10.51854  9.825424  9.13233  0.000000 10.23087
10X74_4_A_1:CTGAGCCTGGTAAAx 10.69360  0.000000  0.00000  0.000000  0.00000
10X74_4_A_1:CCTAGAGAGTCACAx 10.27829 10.278290 10.27829  9.585177 11.19456
10X74_4_A_1:CTAGGATGCTTGCCx 10.94134  0.000000 10.60487  0.000000 10.94134
                              Tmem178   Plxna4      Aff3     Tiam2  Ppp2r2b
10X74_4_A_1:GTAACGTGCCATAGx  9.435728 9.435728  9.435728  9.435728 10.82196
10X73_3_A_1:ACGCCGGATCGTAGx  0.000000 0.000000 10.616651 10.211198 10.21120
10X74_4_A_1:GATTCGGAGGTGAGx  9.132330 0.000000  0.000000 10.230871  9.13233
10X74_4_A_1:CTGAGCCTGGTAAAx 10.405923 0.000000  0.000000  9.307371 10.91674
10X74_4_A_1:CCTAGAGAGTCACAx  0.000000 0.000000  0.000000  0.000000 10.27829
10X74_4_A_1:CTAGGATGCTTGCCx  0.000000 0.000000  0.000000 10.094061 10.09406
                                 Zeb2     Gria2     Nbea     Ptprd      Sox5
10X74_4_A_1:GTAACGTGCCATAGx  9.435728 10.534287 0.000000  9.435728 10.821963
10X73_3_A_1:ACGCCGGATCGTAGx  0.000000 10.211198 9.518087  0.000000  9.518087
10X74_4_A_1:GATTCGGAGGTGAGx  9.825424  9.132330 0.000000  0.000000  9.132330
10X74_4_A_1:CTGAGCCTGGTAAAx  0.000000 10.405923 0.000000 10.405923 10.000473
10X74_4_A_1:CCTAGAGAGTCACAx  9.585177  9.585177 0.000000  0.000000 10.278290
10X74_4_A_1:CTAGGATGCTTGCCx 10.381733  0.000000 0.000000  0.000000  0.000000
                               Mpped2   Bcl11b    Epha5     Nol4    Grik2
10X74_4_A_1:GTAACGTGCCATAGx 10.534287 9.435728 10.53429  0.00000 9.435728
10X73_3_A_1:ACGCCGGATCGTAGx 10.211198 0.000000 10.61665 10.21120 0.000000
10X74_4_A_1:GATTCGGAGGTGAGx 10.924000 0.000000 10.23087  0.00000 9.132330
10X74_4_A_1:CTGAGCCTGGTAAAx  9.307371 0.000000 10.00047 10.91674 0.000000
10X74_4_A_1:CCTAGAGAGTCACAx  0.000000 0.000000 10.68374 10.97142 0.000000
10X74_4_A_1:CTAGGATGCTTGCCx  9.688617 0.000000 10.38173  0.00000 8.995532
                            Mir124.2hg      Pcp4   Ccser1     Kcnb2   Dscaml1
10X74_4_A_1:GTAACGTGCCATAGx   0.000000  0.000000 11.63288 10.128836  0.000000
10X73_3_A_1:ACGCCGGATCGTAGx   0.000000 11.127466 10.90433  0.000000  0.000000
10X74_4_A_1:GATTCGGAGGTGAGx   0.000000 10.741682  0.00000  9.132330  9.132330
10X74_4_A_1:CTGAGCCTGGTAAAx   0.000000  0.000000 11.09906  0.000000  0.000000
10X74_4_A_1:CCTAGAGAGTCACAx   0.000000 10.971420 10.27829  0.000000 10.278290
10X74_4_A_1:CTAGGATGCTTGCCx   8.995532  9.688617 10.38173  8.995532  9.688617
                                Kcnh7       Pam     Negr1     Ppm1e     Myt1l
10X74_4_A_1:GTAACGTGCCATAGx  0.000000  0.000000  9.435728 10.534287 11.515100
10X73_3_A_1:ACGCCGGATCGTAGx  0.000000 10.616651 10.616651  9.518087 10.211198
10X74_4_A_1:GATTCGGAGGTGAGx  9.132330  9.825424 10.518544  9.825424  0.000000
10X74_4_A_1:CTGAGCCTGGTAAAx  9.307371  9.307371  0.000000  0.000000 10.405923
10X74_4_A_1:CCTAGAGAGTCACAx  9.585177 10.683744 11.194560  0.000000  9.585177
10X74_4_A_1:CTAGGATGCTTGCCx 10.381733  8.995532  0.000000  9.688617 10.094061
                                Foxp2     Gpc6     Masp1 Serpini1    Pcdh7
10X74_4_A_1:GTAACGTGCCATAGx  9.435728 10.12884  0.000000 9.435728 10.12884
10X73_3_A_1:ACGCCGGATCGTAGx  9.518087 10.21120  9.518087 0.000000 11.12747
10X74_4_A_1:GATTCGGAGGTGAGx  0.000000  0.00000  9.132330 0.000000  9.13233
10X74_4_A_1:CTGAGCCTGGTAAAx 10.405923  0.00000 10.693598 0.000000  0.00000
10X74_4_A_1:CCTAGAGAGTCACAx  0.000000  0.00000  0.000000 0.000000 10.97142
10X74_4_A_1:CTAGGATGCTTGCCx  0.000000  0.00000  9.688617 0.000000 10.09406
                               Rbfox1    Robo2     Fhod3     Rprm      Gng3
10X74_4_A_1:GTAACGTGCCATAGx 10.821963 11.38157  9.435728 0.000000 10.128836
10X73_3_A_1:ACGCCGGATCGTAGx 11.463934 11.71525 10.211198 0.000000  0.000000
10X74_4_A_1:GATTCGGAGGTGAGx  9.132330 11.32946  9.132330 9.825424  9.132330
10X74_4_A_1:CTGAGCCTGGTAAAx 11.099055 10.69360  0.000000 0.000000  0.000000
10X74_4_A_1:CCTAGAGAGTCACAx  9.585177 10.68374  0.000000 0.000000  0.000000
10X74_4_A_1:CTAGGATGCTTGCCx  0.000000 11.19265  9.688617 8.995532  8.995532
                               Uchl1  Tmem176b      Nrn1   Snap25     Nrg1
10X74_4_A_1:GTAACGTGCCATAGx 0.000000  9.435728  0.000000 9.435728 11.04510
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000 10.211198  0.000000 0.000000 10.90433
10X74_4_A_1:GATTCGGAGGTGAGx 0.000000  9.132330  9.825424 0.000000 10.74168
10X74_4_A_1:CTGAGCCTGGTAAAx 9.307371  0.000000 10.916737 9.307371 11.38673
10X74_4_A_1:CCTAGAGAGTCACAx 0.000000  9.585177  9.585177 9.585177 10.97142
10X74_4_A_1:CTAGGATGCTTGCCx 0.000000  8.995532  9.688617 9.688617 11.19265
                                Cdh8   Trim17     Chga    Rpl26     Rack1
10X74_4_A_1:GTAACGTGCCATAGx 10.12884 0.000000 0.000000 10.82196  0.000000
10X73_3_A_1:ACGCCGGATCGTAGx 11.30979 0.000000 9.518087 11.30979  0.000000
10X74_4_A_1:GATTCGGAGGTGAGx 10.74168 0.000000 0.000000 10.92400 10.230871
10X74_4_A_1:CTGAGCCTGGTAAAx 10.40592 9.307371 9.307371 10.91674  9.307371
10X74_4_A_1:CCTAGAGAGTCACAx 10.27829 9.585177 0.000000 10.68374 10.278290
10X74_4_A_1:CTAGGATGCTTGCCx 10.38173 8.995532 0.000000 11.29801 10.094061
                             Hsp90b1    Aldoa   Tuba1b    Sf3b6      Top1
10X74_4_A_1:GTAACGTGCCATAGx 9.435728 9.435728 9.435728  0.00000 10.534287
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000 9.518087 9.518087  0.00000  9.518087
10X74_4_A_1:GATTCGGAGGTGAGx 0.000000 9.132330 9.132330  9.13233 10.230871
10X74_4_A_1:CTGAGCCTGGTAAAx 0.000000 9.307371 9.307371  0.00000  9.307371
10X74_4_A_1:CCTAGAGAGTCACAx 0.000000 0.000000 0.000000  0.00000  9.585177
10X74_4_A_1:CTAGGATGCTTGCCx 0.000000 8.995532 0.000000 10.09406 10.094061
                                  Tpr     Smc3     Rplp1     Rpl22   Rpl36a
10X74_4_A_1:GTAACGTGCCATAGx  9.435728 0.000000 10.128836  0.000000 10.53429
10X73_3_A_1:ACGCCGGATCGTAGx  0.000000 0.000000 10.616651  0.000000  0.00000
10X74_4_A_1:GATTCGGAGGTGAGx 10.230871 0.000000 10.518544  9.132330  0.00000
10X74_4_A_1:CTGAGCCTGGTAAAx  0.000000 0.000000  9.307371 10.000473 11.09906
10X74_4_A_1:CCTAGAGAGTCACAx  9.585177 0.000000 10.683744  0.000000 10.68374
10X74_4_A_1:CTAGGATGCTTGCCx  0.000000 8.995532 10.094061  9.688617 10.09406
                                Rps21    Eif4a1      Erh      Mdk     Pde4d
10X74_4_A_1:GTAACGTGCCATAGx  9.435728  9.435728 0.000000  0.00000  9.435728
10X73_3_A_1:ACGCCGGATCGTAGx  0.000000 10.211198 0.000000 10.61665 10.616651
10X74_4_A_1:GATTCGGAGGTGAGx 10.741682  9.825424 0.000000 11.07815  0.000000
10X74_4_A_1:CTGAGCCTGGTAAAx  9.307371 10.916737 0.000000 11.79219  0.000000
10X74_4_A_1:CCTAGAGAGTCACAx  9.585177 10.278290 0.000000 11.19456  9.585177
10X74_4_A_1:CTAGGATGCTTGCCx 10.381733  9.688617 9.688617 10.94134 10.941336
                              Pgrmc1    Ube2r2     Bri3     Rps28     Mbtd1
10X74_4_A_1:GTAACGTGCCATAGx 0.000000  0.000000 0.000000  9.435728  0.000000
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000  9.518087 0.000000  9.518087 10.616651
10X74_4_A_1:GATTCGGAGGTGAGx 9.132330  0.000000 9.132330  9.132330  0.000000
10X74_4_A_1:CTGAGCCTGGTAAAx 9.307371 10.000473 9.307371 10.000473  9.307371
10X74_4_A_1:CCTAGAGAGTCACAx 9.585177  0.000000 9.585177  0.000000 10.683744
10X74_4_A_1:CTAGGATGCTTGCCx 0.000000  0.000000 9.688617  8.995532  9.688617
                               Rpl15     Macf1   Phf21a     Oaz2      Gnb1
10X74_4_A_1:GTAACGTGCCATAGx 10.53429  9.435728 0.000000 10.12884  0.000000
10X73_3_A_1:ACGCCGGATCGTAGx  0.00000  9.518087 0.000000  0.00000 10.211198
10X74_4_A_1:GATTCGGAGGTGAGx 10.51854  0.000000 9.825424  9.13233  0.000000
10X74_4_A_1:CTGAGCCTGGTAAAx 10.40592  9.307371 0.000000  0.00000  9.307371
10X74_4_A_1:CCTAGAGAGTCACAx 10.68374  0.000000 0.000000  0.00000  9.585177
10X74_4_A_1:CTAGGATGCTTGCCx 10.60487 10.381733 8.995532  0.00000  0.000000
                               Zfp422    Rpl35a     Cox8a   Nt5dc2
10X74_4_A_1:GTAACGTGCCATAGx 10.128836 10.821963 10.128836  0.00000
10X73_3_A_1:ACGCCGGATCGTAGx  0.000000  9.518087 10.211198  0.00000
10X74_4_A_1:GATTCGGAGGTGAGx  9.132330 10.518544  9.132330 10.92400
10X74_4_A_1:CTGAGCCTGGTAAAx  0.000000 10.000473  9.307371 10.00047
10X74_4_A_1:CCTAGAGAGTCACAx  0.000000  0.000000  0.000000  0.00000
10X74_4_A_1:CTAGGATGCTTGCCx  8.995532  9.688617  8.995532 10.09406
                            X4930402H24Rik     Ttc28      Ckb   Sh3bgrl
10X74_4_A_1:GTAACGTGCCATAGx      10.821963 10.128836 9.435728  0.000000
10X73_3_A_1:ACGCCGGATCGTAGx       9.518087 10.616651 0.000000  9.518087
10X74_4_A_1:GATTCGGAGGTGAGx       0.000000 10.741682 9.132330  0.000000
10X74_4_A_1:CTGAGCCTGGTAAAx      10.000473 11.253204 0.000000  0.000000
10X74_4_A_1:CCTAGAGAGTCACAx       0.000000 10.278290 0.000000 10.278290
10X74_4_A_1:CTAGGATGCTTGCCx       0.000000  9.688617 8.995532  9.688617
                               Top2b  Ndufa10 C530008M17Rik     Basp1  Gpatch8
10X74_4_A_1:GTAACGTGCCATAGx 9.435728  0.00000      0.000000 10.534287 9.435728
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000  0.00000      0.000000 10.616651 9.518087
10X74_4_A_1:GATTCGGAGGTGAGx 0.000000 10.23087      0.000000 10.518544 9.132330
10X74_4_A_1:CTGAGCCTGGTAAAx 0.000000 10.00047      0.000000  9.307371 9.307371
10X74_4_A_1:CCTAGAGAGTCACAx 0.000000  0.00000      9.585177  0.000000 0.000000
10X74_4_A_1:CTAGGATGCTTGCCx 8.995532  0.00000     10.094061  9.688617 8.995532
                                Map1b  Ccdc88a     Xist    Ctnna2     Rnf7
10X74_4_A_1:GTAACGTGCCATAGx 10.534287 0.000000  0.00000 10.534287 9.435728
10X73_3_A_1:ACGCCGGATCGTAGx  9.518087 9.518087 10.61665 11.463934 0.000000
10X74_4_A_1:GATTCGGAGGTGAGx  0.000000 0.000000  0.00000  9.825424 0.000000
10X74_4_A_1:CTGAGCCTGGTAAAx 10.000473 0.000000 10.69360 10.000473 9.307371
10X74_4_A_1:CCTAGAGAGTCACAx  9.585177 0.000000 11.78234  0.000000 0.000000
10X74_4_A_1:CTAGGATGCTTGCCx  0.000000 8.995532 10.38173  9.688617 9.688617
                                Fabp5    Usmg5     Phip     Rbmx    Zbtb20
10X74_4_A_1:GTAACGTGCCATAGx  0.000000 9.435728 9.435728 10.12884  9.435728
10X73_3_A_1:ACGCCGGATCGTAGx  9.518087 0.000000 0.000000  0.00000 10.211198
10X74_4_A_1:GATTCGGAGGTGAGx  9.825424 0.000000 9.132330  9.13233 10.518544
10X74_4_A_1:CTGAGCCTGGTAAAx 10.000473 9.307371 0.000000  0.00000  9.307371
10X74_4_A_1:CCTAGAGAGTCACAx  0.000000 0.000000 0.000000  0.00000 10.278290
10X74_4_A_1:CTAGGATGCTTGCCx 10.381733 8.995532 0.000000  0.00000  8.995532
                                Eif1b      Lcor      Nsg1      Evl      Meg3
10X74_4_A_1:GTAACGTGCCATAGx  9.435728  0.000000  9.435728 9.435728  0.000000
10X73_3_A_1:ACGCCGGATCGTAGx 10.616651  9.518087  9.518087 9.518087  0.000000
10X74_4_A_1:GATTCGGAGGTGAGx  9.132330  9.825424  9.825424 0.000000  0.000000
10X74_4_A_1:CTGAGCCTGGTAAAx 10.693598 10.000473 10.693598 0.000000 10.000473
10X74_4_A_1:CCTAGAGAGTCACAx  9.585177  0.000000 10.683744 0.000000  9.585177
10X74_4_A_1:CTAGGATGCTTGCCx 10.094061  0.000000  9.688617 8.995532  8.995532
                               Eif4a2     Rab10    Mllt11 COTAN_Cl.code
10X74_4_A_1:GTAACGTGCCATAGx 10.821963  0.000000 10.128836             0
10X73_3_A_1:ACGCCGGATCGTAGx  0.000000  9.518087  0.000000             0
10X74_4_A_1:GATTCGGAGGTGAGx  0.000000 10.230871  9.132330             0
10X74_4_A_1:CTGAGCCTGGTAAAx  0.000000  0.000000 10.405923             0
10X74_4_A_1:CCTAGAGAGTCACAx  9.585177  0.000000  9.585177             0
10X74_4_A_1:CTAGGATGCTTGCCx  0.000000  0.000000 10.381733             0
# Fit the model
model <- glm( COTAN_Cl.code ~., data = train.data, family = binomial,control = list(maxit = 50))
# Summarize the model
summary(model)

Call:
glm(formula = COTAN_Cl.code ~ ., family = binomial, data = train.data, 
    control = list(maxit = 50))

Coefficients:
                 Estimate Std. Error    z value Pr(>|z|)    
(Intercept)    -1.686e+15  5.418e+07  -31111735   <2e-16 ***
Abracl         -4.715e+13  1.108e+06  -42568677   <2e-16 ***
Cdkn1c          1.198e+14  8.449e+05  141835339   <2e-16 ***
Cited2         -1.123e+14  1.158e+06  -96984394   <2e-16 ***
Mllt3          -1.480e+13  9.271e+05  -15963843   <2e-16 ***
Fam210b         5.776e+12  7.886e+05    7324089   <2e-16 ***
Mfap4           1.224e+13  1.105e+06   11082390   <2e-16 ***
Cxcl12          9.768e+12  8.228e+05   11871498   <2e-16 ***
Cdc42ep3        2.679e+13  8.102e+05   33062536   <2e-16 ***
Limch1          3.888e+13  7.629e+05   50956797   <2e-16 ***
Ier2           -1.941e+13  8.096e+05  -23978642   <2e-16 ***
Ptprk          -1.101e+13  7.877e+05  -13974326   <2e-16 ***
Gm29260        -1.365e+14  8.513e+05 -160329184   <2e-16 ***
Sfrp1          -5.195e+13  7.665e+05  -67769176   <2e-16 ***
Pantr1         -6.604e+13  1.184e+06  -55777291   <2e-16 ***
Magi2           2.868e+13  7.718e+05   37162645   <2e-16 ***
Tmem108         2.640e+13  8.406e+05   31407609   <2e-16 ***
Ddr1           -7.295e+13  7.777e+05  -93801889   <2e-16 ***
Ezr            -4.928e+12  8.165e+05   -6035217   <2e-16 ***
Sncaip         -9.105e+13  7.615e+05 -119561130   <2e-16 ***
CT025619.1      7.755e+13  7.693e+05  100806643   <2e-16 ***
Fabp7          -7.246e+12  7.987e+05   -9071616   <2e-16 ***
Ctnnd2          4.181e+13  7.989e+05   52332613   <2e-16 ***
Ptn            -6.993e+13  8.866e+05  -78870444   <2e-16 ***
Frmd4a         -8.510e+12  8.139e+05  -10455059   <2e-16 ***
Sorbs2          8.519e+13  1.040e+06   81931351   <2e-16 ***
Hes6            4.145e+13  8.207e+05   50507058   <2e-16 ***
Dcc            -7.059e+13  1.091e+06  -64717826   <2e-16 ***
Neurog2         1.400e+13  8.991e+05   15569920   <2e-16 ***
Tcf12           5.909e+13  7.580e+05   77960833   <2e-16 ***
Fam110a        -1.887e+13  7.726e+05  -24430882   <2e-16 ***
Dhrs4          -8.288e+13  8.280e+05 -100100195   <2e-16 ***
Ccnd2          -6.734e+12  8.477e+05   -7943945   <2e-16 ***
Elavl4          7.681e+13  1.040e+06   73878040   <2e-16 ***
Ank3            3.085e+13  8.158e+05   37815294   <2e-16 ***
Klf12           2.096e+13  8.113e+05   25830911   <2e-16 ***
Slc17a6         4.704e+13  7.777e+05   60486563   <2e-16 ***
Lzts1           1.221e+13  7.629e+05   16006958   <2e-16 ***
Map2           -2.064e+13  1.049e+06  -19667725   <2e-16 ***
Lhx2            7.691e+13  1.073e+06   71697568   <2e-16 ***
Neurod2         1.553e+14  1.308e+06  118766112   <2e-16 ***
Sstr2          -1.201e+13  1.123e+06  -10693159   <2e-16 ***
Eomes           4.862e+13  8.998e+05   54037701   <2e-16 ***
Vps37b          5.989e+13  7.719e+05   77580428   <2e-16 ***
Ier5           -7.104e+13  7.933e+05  -89558067   <2e-16 ***
Cttnbp2        -6.238e+12  1.026e+06   -6082076   <2e-16 ***
Tmem178         4.639e+13  7.711e+05   60164738   <2e-16 ***
Plxna4          2.699e+13  8.247e+05   32726248   <2e-16 ***
Aff3            2.566e+13  8.494e+05   30215978   <2e-16 ***
Tiam2           3.867e+13  8.431e+05   45865723   <2e-16 ***
Ppp2r2b         7.367e+12  1.343e+06    5487241   <2e-16 ***
Zeb2           -8.699e+13  8.214e+05 -105905030   <2e-16 ***
Gria2           4.712e+12  8.115e+05    5807339   <2e-16 ***
Nbea           -3.057e+13  7.864e+05  -38878840   <2e-16 ***
Ptprd          -1.635e+13  8.432e+05  -19394693   <2e-16 ***
Sox5            7.997e+11  9.521e+05     839948   <2e-16 ***
Mpped2         -1.178e+13  9.316e+05  -12640992   <2e-16 ***
Bcl11b         -8.699e+13  8.186e+05 -106261338   <2e-16 ***
Epha5           7.789e+13  1.438e+06   54158015   <2e-16 ***
Nol4            4.350e+13  8.773e+05   49578826   <2e-16 ***
Grik2           2.092e+13  7.828e+05   26717635   <2e-16 ***
Mir124.2hg     -1.978e+13  8.306e+05  -23811113   <2e-16 ***
Pcp4           -7.976e+12  7.703e+05  -10354354   <2e-16 ***
Ccser1          7.204e+13  1.011e+06   71225614   <2e-16 ***
Kcnb2          -6.638e+13  7.809e+05  -85011877   <2e-16 ***
Dscaml1        -2.009e+13  8.061e+05  -24923932   <2e-16 ***
Kcnh7          -6.348e+13  8.216e+05  -77260888   <2e-16 ***
Pam            -1.908e+13  9.566e+05  -19940070   <2e-16 ***
Negr1          -5.593e+13  8.125e+05  -68845519   <2e-16 ***
Ppm1e          -5.188e+12  7.777e+05   -6670744   <2e-16 ***
Myt1l          -1.775e+13  9.473e+05  -18739638   <2e-16 ***
Foxp2           1.982e+13  7.833e+05   25304938   <2e-16 ***
Gpc6            4.766e+12  7.945e+05    5999112   <2e-16 ***
Masp1           4.384e+13  7.665e+05   57188102   <2e-16 ***
Serpini1        2.141e+13  8.061e+05   26565876   <2e-16 ***
Pcdh7           4.264e+13  8.073e+05   52814591   <2e-16 ***
Rbfox1         -2.540e+12  8.031e+05   -3162771   <2e-16 ***
Robo2          -1.937e+13  1.420e+06  -13639796   <2e-16 ***
Fhod3           4.406e+13  7.980e+05   55209662   <2e-16 ***
Rprm            5.339e+13  7.835e+05   68145974   <2e-16 ***
Gng3           -2.037e+13  7.680e+05  -26522083   <2e-16 ***
Uchl1           7.239e+12  7.594e+05    9531581   <2e-16 ***
Tmem176b        3.088e+13  8.030e+05   38459013   <2e-16 ***
Nrn1           -5.860e+13  9.557e+05  -61323738   <2e-16 ***
Snap25         -2.782e+13  7.898e+05  -35221750   <2e-16 ***
Nrg1           -1.037e+14  1.511e+06  -68676296   <2e-16 ***
Cdh8           -6.110e+13  9.459e+05  -64591781   <2e-16 ***
Trim17         -4.393e+13  7.920e+05  -55467510   <2e-16 ***
Chga            3.352e+13  7.864e+05   42626343   <2e-16 ***
Rpl26          -1.541e+14  1.158e+06 -133038070   <2e-16 ***
Rack1          -1.958e+13  1.065e+06  -18388573   <2e-16 ***
Hsp90b1         1.180e+13  7.764e+05   15202488   <2e-16 ***
Aldoa           5.076e+13  7.895e+05   64297711   <2e-16 ***
Tuba1b         -2.882e+13  7.673e+05  -37556112   <2e-16 ***
Sf3b6          -8.811e+12  7.754e+05  -11363341   <2e-16 ***
Top1           -7.822e+13  1.256e+06  -62293981   <2e-16 ***
Tpr             4.572e+13  8.111e+05   56364549   <2e-16 ***
Smc3            7.762e+12  7.864e+05    9869483   <2e-16 ***
Rplp1           4.751e+13  1.506e+06   31536103   <2e-16 ***
Rpl22           5.640e+13  9.818e+05   57449909   <2e-16 ***
Rpl36a          5.228e+13  1.286e+06   40661251   <2e-16 ***
Rps21          -6.595e+12  9.803e+05   -6728251   <2e-16 ***
Eif4a1          5.623e+13  1.107e+06   50779647   <2e-16 ***
Erh             5.627e+13  8.249e+05   68215509   <2e-16 ***
Mdk             1.403e+13  1.311e+06   10698211   <2e-16 ***
Pde4d           7.496e+13  8.054e+05   93072149   <2e-16 ***
Pgrmc1          2.521e+13  7.890e+05   31958231   <2e-16 ***
Ube2r2          2.252e+13  7.630e+05   29515130   <2e-16 ***
Bri3           -1.631e+13  7.624e+05  -21389061   <2e-16 ***
Rps28           5.956e+12  1.081e+06    5511164   <2e-16 ***
Mbtd1          -1.601e+13  8.020e+05  -19968438   <2e-16 ***
Rpl15           1.769e+13  1.056e+06   16743069   <2e-16 ***
Macf1          -2.359e+13  7.456e+05  -31643581   <2e-16 ***
Phf21a         -2.270e+13  7.941e+05  -28584159   <2e-16 ***
Oaz2           -1.602e+13  7.671e+05  -20878553   <2e-16 ***
Gnb1            2.115e+13  7.786e+05   27161497   <2e-16 ***
Zfp422         -1.520e+13  7.937e+05  -19144492   <2e-16 ***
Rpl35a         -7.101e+13  1.012e+06  -70148519   <2e-16 ***
Cox8a           8.371e+12  9.384e+05    8920618   <2e-16 ***
Nt5dc2         -3.363e+13  7.820e+05  -43000354   <2e-16 ***
X4930402H24Rik  1.789e+12  8.011e+05    2232956   <2e-16 ***
Ttc28           7.823e+12  9.395e+05    8326198   <2e-16 ***
Ckb            -9.743e+13  8.412e+05 -115823225   <2e-16 ***
Sh3bgrl        -2.330e+13  7.831e+05  -29748021   <2e-16 ***
Top2b           5.521e+13  7.850e+05   70331459   <2e-16 ***
Ndufa10        -7.000e+13  7.998e+05  -87527429   <2e-16 ***
C530008M17Rik  -5.369e+13  8.134e+05  -66011040   <2e-16 ***
Basp1           1.162e+14  1.002e+06  115896331   <2e-16 ***
Gpatch8         1.888e+13  7.653e+05   24675271   <2e-16 ***
Map1b          -2.937e+13  9.207e+05  -31901455   <2e-16 ***
Ccdc88a         3.041e+13  7.736e+05   39301701   <2e-16 ***
Xist            1.730e+13  7.861e+05   22010505   <2e-16 ***
Ctnna2         -4.172e+13  9.718e+05  -42928048   <2e-16 ***
Rnf7           -2.193e+13  8.067e+05  -27182858   <2e-16 ***
Fabp5           6.984e+13  9.924e+05   70373006   <2e-16 ***
Usmg5           5.091e+13  7.931e+05   64193211   <2e-16 ***
Phip            2.855e+13  7.823e+05   36499842   <2e-16 ***
Rbmx            1.080e+13  7.791e+05   13857287   <2e-16 ***
Zbtb20         -3.542e+13  1.022e+06  -34653461   <2e-16 ***
Eif1b          -5.204e+13  1.019e+06  -51060909   <2e-16 ***
Lcor            1.071e+13  7.874e+05   13600823   <2e-16 ***
Nsg1           -6.878e+12  9.945e+05   -6915923   <2e-16 ***
Evl            -3.260e+13  7.905e+05  -41243490   <2e-16 ***
Meg3            6.629e+13  8.077e+05   82077606   <2e-16 ***
Eif4a2         -3.223e+13  7.879e+05  -40902790   <2e-16 ***
Rab10          -3.146e+13  7.727e+05  -40719594   <2e-16 ***
Mllt11          1.398e+14  1.233e+06  113408602   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance:  653.13  on 472  degrees of freedom
Residual deviance: 4757.76  on 326  degrees of freedom
AIC: 5051.8

Number of Fisher Scoring iterations: 46
# Make predictions
probabilities <- model %>% predict(test.data, type = "response")
predicted.classes <- ifelse(probabilities > 0.5, "1", "0")
# Model accuracy
mean(predicted.classes == test.data$COTAN_Cl.code)
[1] 0.6694915

434 e13.5 e14.5


Sys.time()
[1] "2024-04-19 12:11:58 CEST"
sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
 [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
 [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
[10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   

time zone: Europe/Rome
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] torch_0.12.0       nnet_7.3-19        COTAN_2.5.0        caret_6.0-94      
 [5] lattice_0.22-5     lubridate_1.9.2    forcats_1.0.0      stringr_1.5.0     
 [9] dplyr_1.1.2        purrr_1.0.1        readr_2.1.4        tibble_3.2.1      
[13] tidyverse_2.0.0    tidyr_1.3.0        parallelDist_0.2.6 data.table_1.15.0 
[17] zeallot_0.1.0      ggplot2_3.5.0     

loaded via a namespace (and not attached):
  [1] RcppAnnoy_0.0.21          splines_4.3.2            
  [3] later_1.3.1               polyclip_1.10-4          
  [5] hardhat_1.3.0             pROC_1.18.4              
  [7] rpart_4.1.23              fastDummies_1.7.3        
  [9] lifecycle_1.0.3           doParallel_1.0.17        
 [11] processx_3.8.2            globals_0.16.2           
 [13] MASS_7.3-60               dendextend_1.17.1        
 [15] magrittr_2.0.3            plotly_4.10.2            
 [17] rmarkdown_2.24            yaml_2.3.7               
 [19] httpuv_1.6.11             Seurat_5.0.0             
 [21] sctransform_0.4.1         askpass_1.2.0            
 [23] spam_2.10-0               spatstat.sparse_3.0-2    
 [25] sp_2.1-1                  reticulate_1.35.0        
 [27] cowplot_1.1.1             pbapply_1.7-2            
 [29] RColorBrewer_1.1-3        abind_1.4-5              
 [31] Rtsne_0.17                BiocGenerics_0.46.0      
 [33] coro_1.0.4                ipred_0.9-14             
 [35] circlize_0.4.15           lava_1.8.0               
 [37] IRanges_2.34.1            S4Vectors_0.38.1         
 [39] ggrepel_0.9.5             irlba_2.3.5.1            
 [41] spatstat.utils_3.0-3      listenv_0.9.0            
 [43] umap_0.2.10.0             goftest_1.2-3            
 [45] RSpectra_0.16-1           spatstat.random_3.2-1    
 [47] dqrng_0.3.0               fitdistrplus_1.1-11      
 [49] parallelly_1.37.1         DelayedMatrixStats_1.22.5
 [51] leiden_0.4.3              codetools_0.2-19         
 [53] DelayedArray_0.26.7       tidyselect_1.2.0         
 [55] shape_1.4.6               farver_2.1.1             
 [57] viridis_0.6.4             ScaledMatrix_1.8.1       
 [59] spatstat.explore_3.2-1    matrixStats_1.2.0        
 [61] stats4_4.3.2              jsonlite_1.8.7           
 [63] GetoptLong_1.0.5          ellipsis_0.3.2           
 [65] progressr_0.14.0          ggridges_0.5.4           
 [67] survival_3.5-8            iterators_1.0.14         
 [69] foreach_1.5.2             tools_4.3.2              
 [71] ica_1.0-3                 Rcpp_1.0.11              
 [73] glue_1.7.0                gridExtra_2.3            
 [75] prodlim_2023.08.28        xfun_0.39                
 [77] MatrixGenerics_1.12.3     ggthemes_5.1.0           
 [79] withr_3.0.0               fastmap_1.1.1            
 [81] fansi_1.0.4               openssl_2.1.0            
 [83] callr_3.7.3               digest_0.6.33            
 [85] rsvd_1.0.5                timechange_0.3.0         
 [87] R6_2.5.1                  mime_0.12                
 [89] colorspace_2.1-0          scattermore_1.2          
 [91] tensor_1.5                spatstat.data_3.0-1      
 [93] utf8_1.2.3                generics_0.1.3           
 [95] recipes_1.0.8             class_7.3-22             
 [97] httr_1.4.6                htmlwidgets_1.6.2        
 [99] S4Arrays_1.2.0            uwot_0.1.16              
[101] ModelMetrics_1.2.2.2      pkgconfig_2.0.3          
[103] gtable_0.3.3              timeDate_4032.109        
[105] ComplexHeatmap_2.16.0     lmtest_0.9-40            
[107] htmltools_0.5.8           dotCall64_1.1-0          
[109] clue_0.3-64               SeuratObject_5.0.0       
[111] scales_1.3.0              png_0.1-8                
[113] gower_1.0.1               knitr_1.43               
[115] rstudioapi_0.15.0         tzdb_0.4.0               
[117] reshape2_1.4.4            rjson_0.2.21             
[119] nlme_3.1-163              zoo_1.8-12               
[121] GlobalOptions_0.1.2       KernSmooth_2.23-22       
[123] parallel_4.3.2            miniUI_0.1.1.1           
[125] RcppZiggurat_0.1.6        pillar_1.9.0             
[127] grid_4.3.2                vctrs_0.6.3              
[129] RANN_2.6.1                promises_1.2.0.1         
[131] BiocSingular_1.16.0       beachmat_2.16.0          
[133] xtable_1.8-4              cluster_2.1.6            
[135] evaluate_0.21             cli_3.6.1                
[137] compiler_4.3.2            rlang_1.1.1              
[139] crayon_1.5.2              future.apply_1.11.0      
[141] labeling_0.4.2            ps_1.7.5                 
[143] plyr_1.8.8                stringi_1.8.1            
[145] deldir_2.0-2              viridisLite_0.4.2        
[147] BiocParallel_1.34.2       assertthat_0.2.1         
[149] munsell_0.5.0             lazyeval_0.2.2           
[151] spatstat.geom_3.2-4       PCAtools_2.14.0          
[153] Matrix_1.6-3              RcppHNSW_0.6.0           
[155] hms_1.1.3                 patchwork_1.2.0          
[157] bit64_4.0.5               sparseMatrixStats_1.12.2 
[159] future_1.33.0             shiny_1.8.0              
[161] ROCR_1.0-11               Rfast_2.1.0              
[163] igraph_2.0.3              RcppParallel_5.1.7       
[165] bit_4.0.5