library(ggplot2)
library(zeallot)
library(data.table)
library(parallelDist)
library(tidyr)
library(tidyverse)
library(caret)
theme_set(theme_bw())
library(COTAN)
library(stringr)
library(nnet)
options(parallelly.fork.enable = TRUE)
inDir <- file.path("Data/MouseCortexFromLoom/")
#setLoggingLevel(2)
#setLoggingFile(file.path(inDir, "MixingClustersGDI_ForebrainDorsal.log"))
outDir <- file.path("Data/MouseCortexFromLoom/PureClusters/")
if (!file.exists(outDir)) {
dir.create(outDir)
}Real UT clusters
Cl432 E13.5 E15.0
Preamble
428 and 434 e15.0
e15.0 <- readRDS(paste0(inDir,"e15.0_ForebrainDorsal.cotan.RDS"))
cells <- c(rownames(getMetadataCells(e15.0)[getMetadataCells(e15.0)[,"Clusters"] == 428,]),
rownames(getMetadataCells(e15.0)[getMetadataCells(e15.0)[,"Clusters"] == 434,]))
cl428_cl434 <- dropGenesCells(e15.0,cells = getCells(e15.0)[!getCells(e15.0) %in% cells] )
cl428_cl434 <- clean(cl428_cl434)
cl428_cl434 <- proceedToCoex(cl428_cl434)
#saveRDS(cl428_cl434,past0(outDir,"cl428_cl434.cotan.RDS"))cl428_cl434GDI <- calculateGDI(cl428_cl434)
subsetGDIcl428_cl434 <- cl428_cl434GDI[cl428_cl434GDI$sum.raw.norm > 7,]
top.GDI.genes <- rownames(subsetGDIcl428_cl434[order(subsetGDIcl428_cl434$GDI,decreasing = T),])[1:50]
GDIPlot(cl428_cl434,genes = "",GDIIn = cl428_cl434GDI)
data <- getNormalizedData(cl428_cl434)
data <- data[!rowSums(as.matrix(data)) < 1,]
data <- log(data*10000+1)
row_stdev <- apply(data, 1, sd, na.rm=TRUE)
row_stdev <- row_stdev[order(row_stdev,decreasing = T)]
genes.to.keep <- c(names(row_stdev[1:100]),top.GDI.genes)
data.small <- data[rownames(data) %in% genes.to.keep,]
#data <- t(as.matrix(data))
data.small <- t(as.matrix(data.small))
COTAN_Cl.code <- as.numeric(getClusterizationData(cl428_cl434,clName = "original.clusters")[[1]])
COTAN_Cl.code <- COTAN_Cl.code -1
data.small <- cbind(data.small,COTAN_Cl.code)
data.small <- as.data.frame(data.small)
# Split the data into training and test set
set.seed(123)
training.samples <- data.small[,"COTAN_Cl.code"] %>%
createDataPartition(p = 0.8, list = FALSE)
train.data <- data.small[training.samples, ]
test.data <- data.small[-training.samples, ]
head(train.data) Abracl Cdkn1c Cited2 Mllt3 Fam210b
10X74_4_A_1:GTAACGTGCCATAGx 10.128836 0.00000 9.435728 9.435728 0.000000
10X73_3_A_1:ACGCCGGATCGTAGx 10.211198 0.00000 10.904327 10.211198 9.518087
10X74_4_A_1:GATTCGGAGGTGAGx 10.518544 9.13233 9.825424 9.132330 9.132330
10X74_4_A_1:CTGAGCCTGGTAAAx 10.916737 0.00000 9.307371 10.405923 9.307371
10X74_4_A_1:CCTAGAGAGTCACAx 9.585177 0.00000 11.194560 0.000000 0.000000
10X74_4_A_1:CTAGGATGCTTGCCx 11.828628 0.00000 10.941336 8.995532 8.995532
Mfap4 Cxcl12 Cdc42ep3 Limch1 Ier2
10X74_4_A_1:GTAACGTGCCATAGx 9.435728 9.435728 0.000000 0.000000 0.000000
10X73_3_A_1:ACGCCGGATCGTAGx 9.518087 0.000000 9.518087 0.000000 0.000000
10X74_4_A_1:GATTCGGAGGTGAGx 11.078148 9.825424 0.000000 9.132330 9.132330
10X74_4_A_1:CTGAGCCTGGTAAAx 10.405923 0.000000 10.000473 10.000473 9.307371
10X74_4_A_1:CCTAGAGAGTCACAx 9.585177 9.585177 0.000000 0.000000 0.000000
10X74_4_A_1:CTAGGATGCTTGCCx 10.941336 9.688617 0.000000 8.995532 8.995532
Ptprk Gm29260 Sfrp1 Pantr1 Magi2
10X74_4_A_1:GTAACGTGCCATAGx 9.435728 0.000000 9.435728 10.534287 0.000000
10X73_3_A_1:ACGCCGGATCGTAGx 10.211198 9.518087 9.518087 9.518087 9.518087
10X74_4_A_1:GATTCGGAGGTGAGx 9.132330 9.825424 9.825424 10.518544 0.000000
10X74_4_A_1:CTGAGCCTGGTAAAx 9.307371 10.000473 0.000000 10.405923 0.000000
10X74_4_A_1:CCTAGAGAGTCACAx 9.585177 0.000000 9.585177 0.000000 0.000000
10X74_4_A_1:CTAGGATGCTTGCCx 0.000000 8.995532 9.688617 10.604871 10.381733
Tmem108 Ddr1 Ezr Sncaip CT025619.1
10X74_4_A_1:GTAACGTGCCATAGx 0.000000 0.000000 9.435728 0.000000 9.435728
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000 0.000000 0.000000 9.518087 10.211198
10X74_4_A_1:GATTCGGAGGTGAGx 0.000000 0.000000 10.230871 9.825424 9.132330
10X74_4_A_1:CTGAGCCTGGTAAAx 9.307371 0.000000 0.000000 10.000473 9.307371
10X74_4_A_1:CCTAGAGAGTCACAx 0.000000 0.000000 10.278290 0.000000 9.585177
10X74_4_A_1:CTAGGATGCTTGCCx 8.995532 8.995532 8.995532 0.000000 8.995532
Fabp7 Ctnnd2 Ptn Frmd4a Sorbs2
10X74_4_A_1:GTAACGTGCCATAGx 0.000000 0 10.128836 11.227421 10.534287
10X73_3_A_1:ACGCCGGATCGTAGx 9.518087 0 9.518087 9.518087 10.616651
10X74_4_A_1:GATTCGGAGGTGAGx 9.132330 0 0.000000 0.000000 9.825424
10X74_4_A_1:CTGAGCCTGGTAAAx 9.307371 0 10.693598 9.307371 10.000473
10X74_4_A_1:CCTAGAGAGTCACAx 9.585177 0 10.278290 0.000000 9.585177
10X74_4_A_1:CTAGGATGCTTGCCx 9.688617 0 0.000000 0.000000 10.094061
Hes6 Dcc Neurog2 Tcf12 Fam110a
10X74_4_A_1:GTAACGTGCCATAGx 0.000000 9.435728 0.000000 0.000000 0.000000
10X73_3_A_1:ACGCCGGATCGTAGx 9.518087 0.000000 0.000000 10.616651 0.000000
10X74_4_A_1:GATTCGGAGGTGAGx 9.825424 9.825424 10.741682 9.825424 9.825424
10X74_4_A_1:CTGAGCCTGGTAAAx 0.000000 10.916737 0.000000 10.000473 0.000000
10X74_4_A_1:CCTAGAGAGTCACAx 9.585177 0.000000 0.000000 11.194560 9.585177
10X74_4_A_1:CTAGGATGCTTGCCx 0.000000 8.995532 8.995532 10.094061 0.000000
Dhrs4 Ccnd2 Elavl4 Ank3 Klf12
10X74_4_A_1:GTAACGTGCCATAGx 0.000000 0.000000 9.435728 9.435728 0.000000
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000 9.518087 0.000000 9.518087 0.000000
10X74_4_A_1:GATTCGGAGGTGAGx 9.132330 10.924000 9.132330 0.000000 9.132330
10X74_4_A_1:CTGAGCCTGGTAAAx 0.000000 0.000000 10.000473 10.916737 9.307371
10X74_4_A_1:CCTAGAGAGTCACAx 0.000000 10.278290 10.971420 9.585177 9.585177
10X74_4_A_1:CTAGGATGCTTGCCx 9.688617 10.094061 10.094061 9.688617 0.000000
Slc17a6 Lzts1 Map2 Lhx2 Neurod2
10X74_4_A_1:GTAACGTGCCATAGx 0.000000 0.000000 0.000000 10.128836 10.534287
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000 9.518087 10.211198 9.518087 9.518087
10X74_4_A_1:GATTCGGAGGTGAGx 9.132330 10.518544 9.825424 10.230871 9.825424
10X74_4_A_1:CTGAGCCTGGTAAAx 0.000000 10.405923 0.000000 10.000473 11.099055
10X74_4_A_1:CCTAGAGAGTCACAx 0.000000 0.000000 10.971420 10.971420 10.278290
10X74_4_A_1:CTAGGATGCTTGCCx 8.995532 10.381733 8.995532 10.787188 10.604871
Sstr2 Eomes Vps37b Ier5 Cttnbp2
10X74_4_A_1:GTAACGTGCCATAGx 10.82196 0.000000 0.00000 10.128836 0.00000
10X73_3_A_1:ACGCCGGATCGTAGx 10.21120 0.000000 0.00000 9.518087 11.12747
10X74_4_A_1:GATTCGGAGGTGAGx 10.51854 9.825424 9.13233 0.000000 10.23087
10X74_4_A_1:CTGAGCCTGGTAAAx 10.69360 0.000000 0.00000 0.000000 0.00000
10X74_4_A_1:CCTAGAGAGTCACAx 10.27829 10.278290 10.27829 9.585177 11.19456
10X74_4_A_1:CTAGGATGCTTGCCx 10.94134 0.000000 10.60487 0.000000 10.94134
Tmem178 Plxna4 Aff3 Tiam2 Ppp2r2b
10X74_4_A_1:GTAACGTGCCATAGx 9.435728 9.435728 9.435728 9.435728 10.82196
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000 0.000000 10.616651 10.211198 10.21120
10X74_4_A_1:GATTCGGAGGTGAGx 9.132330 0.000000 0.000000 10.230871 9.13233
10X74_4_A_1:CTGAGCCTGGTAAAx 10.405923 0.000000 0.000000 9.307371 10.91674
10X74_4_A_1:CCTAGAGAGTCACAx 0.000000 0.000000 0.000000 0.000000 10.27829
10X74_4_A_1:CTAGGATGCTTGCCx 0.000000 0.000000 0.000000 10.094061 10.09406
Zeb2 Gria2 Nbea Ptprd Sox5
10X74_4_A_1:GTAACGTGCCATAGx 9.435728 10.534287 0.000000 9.435728 10.821963
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000 10.211198 9.518087 0.000000 9.518087
10X74_4_A_1:GATTCGGAGGTGAGx 9.825424 9.132330 0.000000 0.000000 9.132330
10X74_4_A_1:CTGAGCCTGGTAAAx 0.000000 10.405923 0.000000 10.405923 10.000473
10X74_4_A_1:CCTAGAGAGTCACAx 9.585177 9.585177 0.000000 0.000000 10.278290
10X74_4_A_1:CTAGGATGCTTGCCx 10.381733 0.000000 0.000000 0.000000 0.000000
Mpped2 Bcl11b Epha5 Nol4 Grik2
10X74_4_A_1:GTAACGTGCCATAGx 10.534287 9.435728 10.53429 0.00000 9.435728
10X73_3_A_1:ACGCCGGATCGTAGx 10.211198 0.000000 10.61665 10.21120 0.000000
10X74_4_A_1:GATTCGGAGGTGAGx 10.924000 0.000000 10.23087 0.00000 9.132330
10X74_4_A_1:CTGAGCCTGGTAAAx 9.307371 0.000000 10.00047 10.91674 0.000000
10X74_4_A_1:CCTAGAGAGTCACAx 0.000000 0.000000 10.68374 10.97142 0.000000
10X74_4_A_1:CTAGGATGCTTGCCx 9.688617 0.000000 10.38173 0.00000 8.995532
Mir124.2hg Pcp4 Ccser1 Kcnb2 Dscaml1
10X74_4_A_1:GTAACGTGCCATAGx 0.000000 0.000000 11.63288 10.128836 0.000000
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000 11.127466 10.90433 0.000000 0.000000
10X74_4_A_1:GATTCGGAGGTGAGx 0.000000 10.741682 0.00000 9.132330 9.132330
10X74_4_A_1:CTGAGCCTGGTAAAx 0.000000 0.000000 11.09906 0.000000 0.000000
10X74_4_A_1:CCTAGAGAGTCACAx 0.000000 10.971420 10.27829 0.000000 10.278290
10X74_4_A_1:CTAGGATGCTTGCCx 8.995532 9.688617 10.38173 8.995532 9.688617
Kcnh7 Pam Negr1 Ppm1e Myt1l
10X74_4_A_1:GTAACGTGCCATAGx 0.000000 0.000000 9.435728 10.534287 11.515100
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000 10.616651 10.616651 9.518087 10.211198
10X74_4_A_1:GATTCGGAGGTGAGx 9.132330 9.825424 10.518544 9.825424 0.000000
10X74_4_A_1:CTGAGCCTGGTAAAx 9.307371 9.307371 0.000000 0.000000 10.405923
10X74_4_A_1:CCTAGAGAGTCACAx 9.585177 10.683744 11.194560 0.000000 9.585177
10X74_4_A_1:CTAGGATGCTTGCCx 10.381733 8.995532 0.000000 9.688617 10.094061
Foxp2 Gpc6 Masp1 Serpini1 Pcdh7
10X74_4_A_1:GTAACGTGCCATAGx 9.435728 10.12884 0.000000 9.435728 10.12884
10X73_3_A_1:ACGCCGGATCGTAGx 9.518087 10.21120 9.518087 0.000000 11.12747
10X74_4_A_1:GATTCGGAGGTGAGx 0.000000 0.00000 9.132330 0.000000 9.13233
10X74_4_A_1:CTGAGCCTGGTAAAx 10.405923 0.00000 10.693598 0.000000 0.00000
10X74_4_A_1:CCTAGAGAGTCACAx 0.000000 0.00000 0.000000 0.000000 10.97142
10X74_4_A_1:CTAGGATGCTTGCCx 0.000000 0.00000 9.688617 0.000000 10.09406
Rbfox1 Robo2 Fhod3 Rprm Gng3
10X74_4_A_1:GTAACGTGCCATAGx 10.821963 11.38157 9.435728 0.000000 10.128836
10X73_3_A_1:ACGCCGGATCGTAGx 11.463934 11.71525 10.211198 0.000000 0.000000
10X74_4_A_1:GATTCGGAGGTGAGx 9.132330 11.32946 9.132330 9.825424 9.132330
10X74_4_A_1:CTGAGCCTGGTAAAx 11.099055 10.69360 0.000000 0.000000 0.000000
10X74_4_A_1:CCTAGAGAGTCACAx 9.585177 10.68374 0.000000 0.000000 0.000000
10X74_4_A_1:CTAGGATGCTTGCCx 0.000000 11.19265 9.688617 8.995532 8.995532
Uchl1 Tmem176b Nrn1 Snap25 Nrg1
10X74_4_A_1:GTAACGTGCCATAGx 0.000000 9.435728 0.000000 9.435728 11.04510
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000 10.211198 0.000000 0.000000 10.90433
10X74_4_A_1:GATTCGGAGGTGAGx 0.000000 9.132330 9.825424 0.000000 10.74168
10X74_4_A_1:CTGAGCCTGGTAAAx 9.307371 0.000000 10.916737 9.307371 11.38673
10X74_4_A_1:CCTAGAGAGTCACAx 0.000000 9.585177 9.585177 9.585177 10.97142
10X74_4_A_1:CTAGGATGCTTGCCx 0.000000 8.995532 9.688617 9.688617 11.19265
Cdh8 Trim17 Chga Rpl26 Rack1
10X74_4_A_1:GTAACGTGCCATAGx 10.12884 0.000000 0.000000 10.82196 0.000000
10X73_3_A_1:ACGCCGGATCGTAGx 11.30979 0.000000 9.518087 11.30979 0.000000
10X74_4_A_1:GATTCGGAGGTGAGx 10.74168 0.000000 0.000000 10.92400 10.230871
10X74_4_A_1:CTGAGCCTGGTAAAx 10.40592 9.307371 9.307371 10.91674 9.307371
10X74_4_A_1:CCTAGAGAGTCACAx 10.27829 9.585177 0.000000 10.68374 10.278290
10X74_4_A_1:CTAGGATGCTTGCCx 10.38173 8.995532 0.000000 11.29801 10.094061
Hsp90b1 Aldoa Tuba1b Sf3b6 Top1
10X74_4_A_1:GTAACGTGCCATAGx 9.435728 9.435728 9.435728 0.00000 10.534287
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000 9.518087 9.518087 0.00000 9.518087
10X74_4_A_1:GATTCGGAGGTGAGx 0.000000 9.132330 9.132330 9.13233 10.230871
10X74_4_A_1:CTGAGCCTGGTAAAx 0.000000 9.307371 9.307371 0.00000 9.307371
10X74_4_A_1:CCTAGAGAGTCACAx 0.000000 0.000000 0.000000 0.00000 9.585177
10X74_4_A_1:CTAGGATGCTTGCCx 0.000000 8.995532 0.000000 10.09406 10.094061
Tpr Smc3 Rplp1 Rpl22 Rpl36a
10X74_4_A_1:GTAACGTGCCATAGx 9.435728 0.000000 10.128836 0.000000 10.53429
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000 0.000000 10.616651 0.000000 0.00000
10X74_4_A_1:GATTCGGAGGTGAGx 10.230871 0.000000 10.518544 9.132330 0.00000
10X74_4_A_1:CTGAGCCTGGTAAAx 0.000000 0.000000 9.307371 10.000473 11.09906
10X74_4_A_1:CCTAGAGAGTCACAx 9.585177 0.000000 10.683744 0.000000 10.68374
10X74_4_A_1:CTAGGATGCTTGCCx 0.000000 8.995532 10.094061 9.688617 10.09406
Rps21 Eif4a1 Erh Mdk Pde4d
10X74_4_A_1:GTAACGTGCCATAGx 9.435728 9.435728 0.000000 0.00000 9.435728
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000 10.211198 0.000000 10.61665 10.616651
10X74_4_A_1:GATTCGGAGGTGAGx 10.741682 9.825424 0.000000 11.07815 0.000000
10X74_4_A_1:CTGAGCCTGGTAAAx 9.307371 10.916737 0.000000 11.79219 0.000000
10X74_4_A_1:CCTAGAGAGTCACAx 9.585177 10.278290 0.000000 11.19456 9.585177
10X74_4_A_1:CTAGGATGCTTGCCx 10.381733 9.688617 9.688617 10.94134 10.941336
Pgrmc1 Ube2r2 Bri3 Rps28 Mbtd1
10X74_4_A_1:GTAACGTGCCATAGx 0.000000 0.000000 0.000000 9.435728 0.000000
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000 9.518087 0.000000 9.518087 10.616651
10X74_4_A_1:GATTCGGAGGTGAGx 9.132330 0.000000 9.132330 9.132330 0.000000
10X74_4_A_1:CTGAGCCTGGTAAAx 9.307371 10.000473 9.307371 10.000473 9.307371
10X74_4_A_1:CCTAGAGAGTCACAx 9.585177 0.000000 9.585177 0.000000 10.683744
10X74_4_A_1:CTAGGATGCTTGCCx 0.000000 0.000000 9.688617 8.995532 9.688617
Rpl15 Macf1 Phf21a Oaz2 Gnb1
10X74_4_A_1:GTAACGTGCCATAGx 10.53429 9.435728 0.000000 10.12884 0.000000
10X73_3_A_1:ACGCCGGATCGTAGx 0.00000 9.518087 0.000000 0.00000 10.211198
10X74_4_A_1:GATTCGGAGGTGAGx 10.51854 0.000000 9.825424 9.13233 0.000000
10X74_4_A_1:CTGAGCCTGGTAAAx 10.40592 9.307371 0.000000 0.00000 9.307371
10X74_4_A_1:CCTAGAGAGTCACAx 10.68374 0.000000 0.000000 0.00000 9.585177
10X74_4_A_1:CTAGGATGCTTGCCx 10.60487 10.381733 8.995532 0.00000 0.000000
Zfp422 Rpl35a Cox8a Nt5dc2
10X74_4_A_1:GTAACGTGCCATAGx 10.128836 10.821963 10.128836 0.00000
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000 9.518087 10.211198 0.00000
10X74_4_A_1:GATTCGGAGGTGAGx 9.132330 10.518544 9.132330 10.92400
10X74_4_A_1:CTGAGCCTGGTAAAx 0.000000 10.000473 9.307371 10.00047
10X74_4_A_1:CCTAGAGAGTCACAx 0.000000 0.000000 0.000000 0.00000
10X74_4_A_1:CTAGGATGCTTGCCx 8.995532 9.688617 8.995532 10.09406
X4930402H24Rik Ttc28 Ckb Sh3bgrl
10X74_4_A_1:GTAACGTGCCATAGx 10.821963 10.128836 9.435728 0.000000
10X73_3_A_1:ACGCCGGATCGTAGx 9.518087 10.616651 0.000000 9.518087
10X74_4_A_1:GATTCGGAGGTGAGx 0.000000 10.741682 9.132330 0.000000
10X74_4_A_1:CTGAGCCTGGTAAAx 10.000473 11.253204 0.000000 0.000000
10X74_4_A_1:CCTAGAGAGTCACAx 0.000000 10.278290 0.000000 10.278290
10X74_4_A_1:CTAGGATGCTTGCCx 0.000000 9.688617 8.995532 9.688617
Top2b Ndufa10 C530008M17Rik Basp1 Gpatch8
10X74_4_A_1:GTAACGTGCCATAGx 9.435728 0.00000 0.000000 10.534287 9.435728
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000 0.00000 0.000000 10.616651 9.518087
10X74_4_A_1:GATTCGGAGGTGAGx 0.000000 10.23087 0.000000 10.518544 9.132330
10X74_4_A_1:CTGAGCCTGGTAAAx 0.000000 10.00047 0.000000 9.307371 9.307371
10X74_4_A_1:CCTAGAGAGTCACAx 0.000000 0.00000 9.585177 0.000000 0.000000
10X74_4_A_1:CTAGGATGCTTGCCx 8.995532 0.00000 10.094061 9.688617 8.995532
Map1b Ccdc88a Xist Ctnna2 Rnf7
10X74_4_A_1:GTAACGTGCCATAGx 10.534287 0.000000 0.00000 10.534287 9.435728
10X73_3_A_1:ACGCCGGATCGTAGx 9.518087 9.518087 10.61665 11.463934 0.000000
10X74_4_A_1:GATTCGGAGGTGAGx 0.000000 0.000000 0.00000 9.825424 0.000000
10X74_4_A_1:CTGAGCCTGGTAAAx 10.000473 0.000000 10.69360 10.000473 9.307371
10X74_4_A_1:CCTAGAGAGTCACAx 9.585177 0.000000 11.78234 0.000000 0.000000
10X74_4_A_1:CTAGGATGCTTGCCx 0.000000 8.995532 10.38173 9.688617 9.688617
Fabp5 Usmg5 Phip Rbmx Zbtb20
10X74_4_A_1:GTAACGTGCCATAGx 0.000000 9.435728 9.435728 10.12884 9.435728
10X73_3_A_1:ACGCCGGATCGTAGx 9.518087 0.000000 0.000000 0.00000 10.211198
10X74_4_A_1:GATTCGGAGGTGAGx 9.825424 0.000000 9.132330 9.13233 10.518544
10X74_4_A_1:CTGAGCCTGGTAAAx 10.000473 9.307371 0.000000 0.00000 9.307371
10X74_4_A_1:CCTAGAGAGTCACAx 0.000000 0.000000 0.000000 0.00000 10.278290
10X74_4_A_1:CTAGGATGCTTGCCx 10.381733 8.995532 0.000000 0.00000 8.995532
Eif1b Lcor Nsg1 Evl Meg3
10X74_4_A_1:GTAACGTGCCATAGx 9.435728 0.000000 9.435728 9.435728 0.000000
10X73_3_A_1:ACGCCGGATCGTAGx 10.616651 9.518087 9.518087 9.518087 0.000000
10X74_4_A_1:GATTCGGAGGTGAGx 9.132330 9.825424 9.825424 0.000000 0.000000
10X74_4_A_1:CTGAGCCTGGTAAAx 10.693598 10.000473 10.693598 0.000000 10.000473
10X74_4_A_1:CCTAGAGAGTCACAx 9.585177 0.000000 10.683744 0.000000 9.585177
10X74_4_A_1:CTAGGATGCTTGCCx 10.094061 0.000000 9.688617 8.995532 8.995532
Eif4a2 Rab10 Mllt11 COTAN_Cl.code
10X74_4_A_1:GTAACGTGCCATAGx 10.821963 0.000000 10.128836 0
10X73_3_A_1:ACGCCGGATCGTAGx 0.000000 9.518087 0.000000 0
10X74_4_A_1:GATTCGGAGGTGAGx 0.000000 10.230871 9.132330 0
10X74_4_A_1:CTGAGCCTGGTAAAx 0.000000 0.000000 10.405923 0
10X74_4_A_1:CCTAGAGAGTCACAx 9.585177 0.000000 9.585177 0
10X74_4_A_1:CTAGGATGCTTGCCx 0.000000 0.000000 10.381733 0
# Fit the model
model <- glm( COTAN_Cl.code ~., data = train.data, family = binomial,control = list(maxit = 50))
# Summarize the model
summary(model)
Call:
glm(formula = COTAN_Cl.code ~ ., family = binomial, data = train.data,
control = list(maxit = 50))
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.392e+15 5.418e+07 -25692175 <2e-16 ***
Abracl -3.668e+13 1.108e+06 -33117531 <2e-16 ***
Cdkn1c 9.057e+13 8.449e+05 107200885 <2e-16 ***
Cited2 -1.192e+14 1.158e+06 -102925237 <2e-16 ***
Mllt3 -1.411e+12 9.271e+05 -1522016 <2e-16 ***
Fam210b -4.090e+12 7.886e+05 -5185950 <2e-16 ***
Mfap4 -6.719e+12 1.105e+06 -6082731 <2e-16 ***
Cxcl12 3.911e+13 8.228e+05 47537816 <2e-16 ***
Cdc42ep3 1.404e+13 8.102e+05 17326401 <2e-16 ***
Limch1 4.103e+13 7.629e+05 53781940 <2e-16 ***
Ier2 -1.632e+13 8.096e+05 -20155968 <2e-16 ***
Ptprk -3.990e+13 7.877e+05 -50648051 <2e-16 ***
Gm29260 -1.591e+14 8.513e+05 -186868495 <2e-16 ***
Sfrp1 -1.001e+13 7.665e+05 -13056648 <2e-16 ***
Pantr1 -4.908e+13 1.184e+06 -41449633 <2e-16 ***
Magi2 2.042e+13 7.718e+05 26452066 <2e-16 ***
Tmem108 3.884e+13 8.406e+05 46203169 <2e-16 ***
Ddr1 -6.061e+13 7.777e+05 -77935451 <2e-16 ***
Ezr -1.538e+12 8.165e+05 -1883070 <2e-16 ***
Sncaip -6.607e+13 7.615e+05 -86758312 <2e-16 ***
CT025619.1 5.786e+13 7.693e+05 75220795 <2e-16 ***
Fabp7 -1.160e+13 7.987e+05 -14523404 <2e-16 ***
Ctnnd2 5.243e+13 7.989e+05 65626431 <2e-16 ***
Ptn -5.180e+13 8.866e+05 -58423528 <2e-16 ***
Frmd4a -9.865e+12 8.139e+05 -12119603 <2e-16 ***
Sorbs2 7.082e+13 1.040e+06 68115662 <2e-16 ***
Hes6 5.248e+13 8.207e+05 63952536 <2e-16 ***
Dcc -1.086e+14 1.091e+06 -99558297 <2e-16 ***
Neurog2 8.885e+12 8.991e+05 9882478 <2e-16 ***
Tcf12 4.439e+13 7.580e+05 58560212 <2e-16 ***
Fam110a -3.762e+12 7.726e+05 -4870098 <2e-16 ***
Dhrs4 -6.585e+13 8.280e+05 -79533727 <2e-16 ***
Ccnd2 2.088e+13 8.477e+05 24635620 <2e-16 ***
Elavl4 5.882e+13 1.040e+06 56574947 <2e-16 ***
Ank3 1.621e+12 8.158e+05 1986603 <2e-16 ***
Klf12 1.858e+13 8.113e+05 22905550 <2e-16 ***
Slc17a6 3.336e+13 7.777e+05 42892923 <2e-16 ***
Lzts1 -3.340e+11 7.629e+05 -437779 <2e-16 ***
Map2 1.529e+13 1.049e+06 14565916 <2e-16 ***
Lhx2 6.076e+13 1.073e+06 56647519 <2e-16 ***
Neurod2 1.156e+14 1.308e+06 88402243 <2e-16 ***
Sstr2 2.958e+13 1.123e+06 26328080 <2e-16 ***
Eomes 2.323e+13 8.998e+05 25818313 <2e-16 ***
Vps37b 2.378e+13 7.719e+05 30806540 <2e-16 ***
Ier5 -6.635e+13 7.933e+05 -83638912 <2e-16 ***
Cttnbp2 7.413e+12 1.026e+06 7226886 <2e-16 ***
Tmem178 5.869e+13 7.711e+05 76112402 <2e-16 ***
Plxna4 6.142e+12 8.247e+05 7447401 <2e-16 ***
Aff3 3.216e+13 8.494e+05 37865603 <2e-16 ***
Tiam2 3.504e+13 8.431e+05 41564359 <2e-16 ***
Ppp2r2b 1.014e+12 1.343e+06 755110 <2e-16 ***
Zeb2 -8.426e+13 8.214e+05 -102580264 <2e-16 ***
Gria2 3.458e+13 8.115e+05 42614466 <2e-16 ***
Nbea -2.142e+13 7.864e+05 -27232237 <2e-16 ***
Ptprd -8.085e+12 8.432e+05 -9588010 <2e-16 ***
Sox5 1.043e+13 9.521e+05 10952629 <2e-16 ***
Mpped2 1.653e+13 9.316e+05 17741622 <2e-16 ***
Bcl11b -5.000e+13 8.186e+05 -61072633 <2e-16 ***
Epha5 7.168e+13 1.438e+06 49838920 <2e-16 ***
Nol4 5.393e+13 8.773e+05 61475929 <2e-16 ***
Grik2 -8.502e+12 7.828e+05 -10860942 <2e-16 ***
Mir124.2hg 3.667e+12 8.306e+05 4415594 <2e-16 ***
Pcp4 -9.935e+12 7.703e+05 -12897946 <2e-16 ***
Ccser1 6.731e+13 1.011e+06 66545181 <2e-16 ***
Kcnb2 -3.395e+13 7.809e+05 -43477366 <2e-16 ***
Dscaml1 -3.299e+13 8.061e+05 -40923228 <2e-16 ***
Kcnh7 -3.625e+13 8.216e+05 -44124624 <2e-16 ***
Pam -2.266e+13 9.566e+05 -23682478 <2e-16 ***
Negr1 -5.486e+13 8.125e+05 -67527681 <2e-16 ***
Ppm1e -1.101e+13 7.777e+05 -14161188 <2e-16 ***
Myt1l 1.275e+13 9.473e+05 13455861 <2e-16 ***
Foxp2 -4.217e+12 7.833e+05 -5382953 <2e-16 ***
Gpc6 -7.684e+12 7.945e+05 -9671234 <2e-16 ***
Masp1 4.292e+13 7.665e+05 55993994 <2e-16 ***
Serpini1 -5.105e+12 8.061e+05 -6333133 <2e-16 ***
Pcdh7 5.837e+13 8.073e+05 72299867 <2e-16 ***
Rbfox1 7.461e+12 8.031e+05 9289560 <2e-16 ***
Robo2 -6.876e+13 1.420e+06 -48418494 <2e-16 ***
Fhod3 4.399e+13 7.980e+05 55121613 <2e-16 ***
Rprm 3.484e+13 7.835e+05 44465376 <2e-16 ***
Gng3 2.281e+13 7.680e+05 29706841 <2e-16 ***
Uchl1 -1.332e+13 7.594e+05 -17544470 <2e-16 ***
Tmem176b 8.467e+12 8.030e+05 10544314 <2e-16 ***
Nrn1 -5.681e+13 9.557e+05 -59447760 <2e-16 ***
Snap25 -4.422e+13 7.898e+05 -55987310 <2e-16 ***
Nrg1 -3.190e+13 1.511e+06 -21119116 <2e-16 ***
Cdh8 -9.450e+13 9.459e+05 -99906199 <2e-16 ***
Trim17 -5.827e+13 7.920e+05 -73573697 <2e-16 ***
Chga 1.515e+13 7.864e+05 19260283 <2e-16 ***
Rpl26 -1.381e+14 1.158e+06 -119242819 <2e-16 ***
Rack1 -3.030e+13 1.065e+06 -28463399 <2e-16 ***
Hsp90b1 1.513e+12 7.764e+05 1949079 <2e-16 ***
Aldoa 5.999e+13 7.895e+05 75985414 <2e-16 ***
Tuba1b -2.984e+13 7.673e+05 -38893858 <2e-16 ***
Sf3b6 -1.751e+13 7.754e+05 -22583944 <2e-16 ***
Top1 -4.635e+13 1.256e+06 -36914861 <2e-16 ***
Tpr 2.535e+13 8.111e+05 31258405 <2e-16 ***
Smc3 1.778e+13 7.864e+05 22614692 <2e-16 ***
Rplp1 6.245e+13 1.506e+06 41453287 <2e-16 ***
Rpl22 5.820e+13 9.818e+05 59284313 <2e-16 ***
Rpl36a 7.624e+13 1.286e+06 59290521 <2e-16 ***
Rps21 -1.722e+13 9.803e+05 -17562653 <2e-16 ***
Eif4a1 3.635e+13 1.107e+06 32824725 <2e-16 ***
Erh 3.879e+13 8.249e+05 47018259 <2e-16 ***
Mdk 6.137e+13 1.311e+06 46812498 <2e-16 ***
Pde4d 9.697e+13 8.054e+05 120403478 <2e-16 ***
Pgrmc1 4.160e+13 7.890e+05 52731940 <2e-16 ***
Ube2r2 -4.281e+12 7.630e+05 -5610419 <2e-16 ***
Bri3 -4.674e+12 7.624e+05 -6130738 <2e-16 ***
Rps28 2.337e+13 1.081e+06 21624714 <2e-16 ***
Mbtd1 -3.431e+13 8.020e+05 -42787850 <2e-16 ***
Rpl15 4.540e+13 1.056e+06 42976360 <2e-16 ***
Macf1 -3.898e+13 7.456e+05 -52271693 <2e-16 ***
Phf21a -1.085e+13 7.941e+05 -13663454 <2e-16 ***
Oaz2 -2.184e+13 7.671e+05 -28476312 <2e-16 ***
Gnb1 3.086e+13 7.786e+05 39638926 <2e-16 ***
Zfp422 -2.665e+13 7.937e+05 -33579867 <2e-16 ***
Rpl35a -4.777e+13 1.012e+06 -47191386 <2e-16 ***
Cox8a 3.798e+12 9.384e+05 4047518 <2e-16 ***
Nt5dc2 -2.355e+13 7.820e+05 -30114999 <2e-16 ***
X4930402H24Rik -3.421e+13 8.011e+05 -42703505 <2e-16 ***
Ttc28 -2.613e+13 9.395e+05 -27807041 <2e-16 ***
Ckb -4.380e+13 8.412e+05 -52071863 <2e-16 ***
Sh3bgrl -1.422e+13 7.831e+05 -18158721 <2e-16 ***
Top2b 4.336e+13 7.850e+05 55234013 <2e-16 ***
Ndufa10 -8.308e+13 7.998e+05 -103871878 <2e-16 ***
C530008M17Rik -4.875e+13 8.134e+05 -59928713 <2e-16 ***
Basp1 1.084e+14 1.002e+06 108114394 <2e-16 ***
Gpatch8 4.789e+12 7.653e+05 6257168 <2e-16 ***
Map1b -3.338e+13 9.207e+05 -36254006 <2e-16 ***
Ccdc88a 9.193e+12 7.736e+05 11882326 <2e-16 ***
Xist -2.576e+13 7.861e+05 -32765738 <2e-16 ***
Ctnna2 -4.233e+13 9.718e+05 -43552178 <2e-16 ***
Rnf7 -5.865e+11 8.067e+05 -727059 <2e-16 ***
Fabp5 6.801e+13 9.924e+05 68526316 <2e-16 ***
Usmg5 3.478e+13 7.931e+05 43858152 <2e-16 ***
Phip 3.203e+13 7.823e+05 40942232 <2e-16 ***
Rbmx 5.754e+12 7.791e+05 7384674 <2e-16 ***
Zbtb20 -4.798e+13 1.022e+06 -46936751 <2e-16 ***
Eif1b -7.349e+13 1.019e+06 -72109569 <2e-16 ***
Lcor 1.574e+13 7.874e+05 19989987 <2e-16 ***
Nsg1 -8.545e+12 9.945e+05 -8592451 <2e-16 ***
Evl -1.350e+13 7.905e+05 -17079400 <2e-16 ***
Meg3 4.731e+13 8.077e+05 58571895 <2e-16 ***
Eif4a2 -3.366e+13 7.879e+05 -42721584 <2e-16 ***
Rab10 -3.012e+13 7.727e+05 -38981490 <2e-16 ***
Mllt11 9.241e+13 1.233e+06 74972937 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 653.13 on 472 degrees of freedom
Residual deviance: 4685.67 on 326 degrees of freedom
AIC: 4979.7
Number of Fisher Scoring iterations: 32
# Make predictions
probabilities <- model %>% predict(test.data, type = "response")
predicted.classes <- ifelse(probabilities > 0.5, "1", "0")
# Model accuracy
mean(predicted.classes == test.data$COTAN_Cl.code)[1] 0.6949153
434 e13.5 e14.5
Sys.time()[1] "2025-12-16 18:58:42 CET"
sessionInfo()R version 4.5.2 (2025-10-31)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 22.04.5 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0 LAPACK version 3.10.0
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Rome
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] nnet_7.3-20 COTAN_2.9.4 caret_7.0-1 lattice_0.22-7
[5] lubridate_1.9.4 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
[9] purrr_1.2.0 readr_2.1.5 tibble_3.3.0 tidyverse_2.0.0
[13] tidyr_1.3.1 parallelDist_0.2.6 data.table_1.17.0 zeallot_0.2.0
[17] ggplot2_4.0.1
loaded via a namespace (and not attached):
[1] RcppAnnoy_0.0.22 splines_4.5.2
[3] later_1.4.2 polyclip_1.10-7
[5] hardhat_1.4.1 pROC_1.18.5
[7] rpart_4.1.24 fastDummies_1.7.5
[9] lifecycle_1.0.4 doParallel_1.0.17
[11] processx_3.8.6 globals_0.18.0
[13] MASS_7.3-65 dendextend_1.19.0
[15] magrittr_2.0.4 plotly_4.11.0
[17] rmarkdown_2.29 yaml_2.3.10
[19] httpuv_1.6.16 Seurat_5.2.1
[21] sctransform_0.4.2 spam_2.11-1
[23] sp_2.2-0 spatstat.sparse_3.1-0
[25] reticulate_1.42.0 cowplot_1.1.3
[27] pbapply_1.7-2 RColorBrewer_1.1-3
[29] abind_1.4-8 GenomicRanges_1.60.0
[31] Rtsne_0.17 BiocGenerics_0.54.0
[33] coro_1.1.0 torch_0.14.2
[35] ipred_0.9-15 GenomeInfoDbData_1.2.14
[37] circlize_0.4.16 lava_1.8.1
[39] IRanges_2.42.0 S4Vectors_0.46.0
[41] ggrepel_0.9.6 irlba_2.3.5.1
[43] listenv_0.9.1 spatstat.utils_3.1-4
[45] goftest_1.2-3 RSpectra_0.16-2
[47] spatstat.random_3.4-1 fitdistrplus_1.2-2
[49] parallelly_1.45.0 codetools_0.2-20
[51] DelayedArray_0.34.1 tidyselect_1.2.1
[53] shape_1.4.6.1 UCSC.utils_1.4.0
[55] farver_2.1.2 viridis_0.6.5
[57] ScaledMatrix_1.16.0 matrixStats_1.5.0
[59] stats4_4.5.2 spatstat.explore_3.4-2
[61] jsonlite_2.0.0 GetoptLong_1.0.5
[63] gghalves_0.1.4 progressr_0.15.1
[65] ggridges_0.5.6 survival_3.8-3
[67] iterators_1.0.14 foreach_1.5.2
[69] tools_4.5.2 ica_1.0-3
[71] Rcpp_1.0.14 glue_1.8.0
[73] prodlim_2025.04.28 gridExtra_2.3
[75] SparseArray_1.8.0 xfun_0.52
[77] MatrixGenerics_1.20.0 ggthemes_5.1.0
[79] GenomeInfoDb_1.44.0 withr_3.0.2
[81] fastmap_1.2.0 callr_3.7.6
[83] digest_0.6.37 rsvd_1.0.5
[85] timechange_0.3.0 R6_2.6.1
[87] mime_0.13 colorspace_2.1-1
[89] scattermore_1.2 tensor_1.5
[91] spatstat.data_3.1-6 generics_0.1.3
[93] recipes_1.3.0 class_7.3-23
[95] httr_1.4.7 htmlwidgets_1.6.4
[97] S4Arrays_1.8.0 uwot_0.2.3
[99] ModelMetrics_1.2.2.2 pkgconfig_2.0.3
[101] gtable_0.3.6 timeDate_4041.110
[103] ComplexHeatmap_2.24.0 lmtest_0.9-40
[105] S7_0.2.1 SingleCellExperiment_1.30.0
[107] XVector_0.48.0 htmltools_0.5.8.1
[109] dotCall64_1.2 zigg_0.0.2
[111] clue_0.3-66 Biobase_2.68.0
[113] SeuratObject_5.1.0 scales_1.4.0
[115] png_0.1-8 gower_1.0.2
[117] spatstat.univar_3.1-3 knitr_1.50
[119] tzdb_0.5.0 reshape2_1.4.4
[121] rjson_0.2.23 nlme_3.1-168
[123] proxy_0.4-27 zoo_1.8-14
[125] GlobalOptions_0.1.2 KernSmooth_2.23-26
[127] parallel_4.5.2 miniUI_0.1.2
[129] pillar_1.10.2 grid_4.5.2
[131] vctrs_0.6.5 RANN_2.6.2
[133] promises_1.3.2 BiocSingular_1.24.0
[135] beachmat_2.24.0 xtable_1.8-4
[137] cluster_2.1.8.1 evaluate_1.0.3
[139] cli_3.6.5 compiler_4.5.2
[141] rlang_1.1.6 crayon_1.5.3
[143] future.apply_1.20.0 labeling_0.4.3
[145] ps_1.9.1 plyr_1.8.9
[147] stringi_1.8.7 deldir_2.0-4
[149] viridisLite_0.4.2 BiocParallel_1.42.0
[151] assertthat_0.2.1 lazyeval_0.2.2
[153] spatstat.geom_3.4-1 Matrix_1.7-4
[155] RcppHNSW_0.6.0 hms_1.1.3
[157] patchwork_1.3.2 bit64_4.6.0-1
[159] future_1.58.0 shiny_1.11.0
[161] SummarizedExperiment_1.38.1 ROCR_1.0-11
[163] Rfast_2.1.5.1 igraph_2.1.4
[165] RcppParallel_5.1.10 bit_4.6.0