#library(SingleCellExperiment)
#library(DuoClustering2018)
#library(tidyr)
library(ggplot2)
#library(ggsankey) # remotes::install_github("davidsjoberg/ggsankey")
library(tibble)
library(zeallot)
library(COTAN)
options(parallelly.fork.enable = TRUE)
<- "Data/CD14Cleaned/"
outDir
setLoggingLevel(2)
setLoggingFile(file.path(outDir, "cd14_analysis.log"))
CD14 Data-set Anaysis with cleaning
library(Seurat)
<- Read10X(file.path(outDir, "/OrigialDatahg19")) dataset
<- "CD14_Monocytes"
sampleCondition <- COTAN(raw = dataset)
cd14Obj <- initializeMetaDataset(cd14Obj,
cd14Obj GEO = "10X CD14+ Monocytes",
sequencingMethod = "10X",
sampleCondition = sampleCondition)
sampleCondition
[1] "CD14_Monocytes"
Inspect cells’ sizes
cellSizePlot(cd14Obj, splitPattern = "-", numCol = 2)
Drop cells with too many reads as they are probably doublets
<- 2500
cellsSizeThr <- addElementToMetaDataset(cd14Obj, "Cells size threshold", cellsSizeThr)
cd14Obj
<- getCells(cd14Obj)[getCellsSize(cd14Obj) > cellsSizeThr]
cells_to_rem <- dropGenesCells(cd14Obj, cells = cells_to_rem)
cd14Obj
cellSizePlot(cd14Obj, splitPattern = "-", numCol = 2)
Inspect the number of expressed genes per cell
genesSizePlot(cd14Obj, splitPattern = "-", numCol = 2)
Drop cells with too high genes expression as they are probably doublets
<- 800
genesSizeThr <- addElementToMetaDataset(cd14Obj, "Num genes threshold", genesSizeThr)
cd14Obj
<- getNumExpressedGenes(cd14Obj)
numExprGenes <- names(numExprGenes)[numExprGenes > genesSizeThr]
cells_to_rem <- dropGenesCells(cd14Obj, cells = cells_to_rem)
cd14Obj
genesSizePlot(cd14Obj, splitPattern = "-", numCol = 2)
Check number of mitochondrial genes expressed in each cell
<- "^[Mm][Tt]-"
mitGenesPattern getGenes(cd14Obj)[grep(mitGenesPattern, getGenes(cd14Obj))]
[1] "MT-ND1" "MT-ND2" "MT-CO1" "MT-CO2" "MT-ATP8" "MT-ATP6" "MT-CO3"
[8] "MT-ND3" "MT-ND4L" "MT-ND4" "MT-ND5" "MT-ND6" "MT-CYB"
c(mitPlot, mitSizes) %<-%
mitochondrialPercentagePlot(cd14Obj, genePrefix = mitGenesPattern,
splitPattern = "-", numCol = 2)
plot(mitPlot)
Cells with a too high percentage of mitochondrial genes are likely dead (or at the last problematic) cells. So we drop them!
<- 7
mitPercThr <- addElementToMetaDataset(cd14Obj, "Mitoc. perc. threshold", mitPercThr)
cd14Obj
<- rownames(mitSizes)[mitSizes[["mit.percentage"]] > mitPercThr]
cells_to_rem
<- dropGenesCells(cd14Obj, cells = cells_to_rem)
cd14Obj
c(mitPlot, mitSizes) %<-%
mitochondrialPercentagePlot(cd14Obj, genePrefix = mitGenesPattern,
splitPattern = "-", numCol = 2)
plot(mitPlot)
Check the number of ribosomal genes expressed in each cell
<- "^RP[SL]\\d+"
ribGenesPattern getGenes(cd14Obj)[grep(ribGenesPattern, getGenes(cd14Obj))]
[1] "RPL22" "RPL11" "RPS6KA1" "RPS8"
[5] "RPL5" "RPS27" "RPS10P7" "RPS6KC1"
[9] "RPS7" "RPS27A" "RPL31" "RPL37A"
[13] "RPL32" "RPL15" "RPL14" "RPL29"
[17] "RPL24" "RPL22L1" "RPL39L" "RPL35A"
[21] "RPL9" "RPL34-AS1" "RPL34" "RPS3A"
[25] "RPL37" "RPS23" "RPS14" "RPL26L1"
[29] "RPS18" "RPS10-NUDT3" "RPS10" "RPL10A"
[33] "RPL7L1" "RPS12" "RPS6KA2" "RPS6KA2-AS1"
[37] "RPS6KA3" "RPS4X" "RPS6KA6" "RPL36A"
[41] "RPL36A-HNRNPH2" "RPL39" "RPL10" "RPS20"
[45] "RPL7" "RPL30" "RPL8" "RPS6"
[49] "RPL35" "RPL12" "RPL7A" "RPS24"
[53] "RPL27A" "RPS13" "RPS6KA4" "RPS6KB2"
[57] "RPS3" "RPS25" "RPS26" "RPL41"
[61] "RPL6" "RPL21" "RPL10L" "RPS29"
[65] "RPL36AL" "RPS6KL1" "RPS6KA5" "RPS27L"
[69] "RPL4" "RPS17" "RPS17L" "RPL3L"
[73] "RPS2" "RPS15A" "RPL13" "RPL26"
[77] "RPL23A" "RPL23" "RPL19" "RPL27"
[81] "RPS6KB1" "RPL38" "RPL17-C18orf32" "RPL17"
[85] "RPS21" "RPS15" "RPL36" "RPS28"
[89] "RPL18A" "RPS16" "RPS19" "RPL18"
[93] "RPL13A" "RPS11" "RPS9" "RPL28"
[97] "RPS5" "RPS4Y1" "RPS4Y2" "RPL3"
[101] "RPS19BP1"
c(ribPlot, ribSizes) %<-%
mitochondrialPercentagePlot(cd14Obj, genePrefix = mitGenesPattern,
splitPattern = "-", numCol = 2)
plot(ribPlot)
Check no further outliers after all the culling
cellSizePlot(cd14Obj, splitPattern = "-", numCol = 2)
genesSizePlot(cd14Obj, splitPattern = "-", numCol = 2)
Clean: round 1
<- clean(cd14Obj)
cd14Obj
c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(cd14Obj)
plot(pcaCellsPlot)
plot(genesPlot)
<- addElementToMetaDataset(cd14Obj, "Num drop B group", 0) cd14Obj
Visualize if all is ok:
plot(UDEPlot)
plot(nuPlot)
plot(zoomedNuPlot)
Final cleaning to check all is OK
<- clean(cd14Obj)
cd14Obj
c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(cd14Obj)
plot(pcaCellsPlot)
plot(pcaCellsData)
plot(genesPlot)
plot(UDEPlot)
plot(nuPlot)
plot(zoomedNuPlot)
plot(cellSizePlot(cd14Obj, splitPattern = "-", numCol = 2))
plot(genesSizePlot(cd14Obj, splitPattern = "-", numCol = 2))
Calculate genes’ COEX
Sys.time()
<- proceedToCoex(cd14Obj, calcCoex = TRUE, cores = 12,
cd14Obj saveObj = TRUE, outDir = outDir)
<- readRDS(file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS"))) cd14Obj
<- calculateGDI(cd14Obj)
gdiData
<- head(rownames(gdiData[order(gdiData[["GDI"]],
genesToLabel decreasing = TRUE), ]), n = 50L)
sort(genesToLabel)
[1] "ABI3" "ARHGDIB" "ATP5A1" "ATP6V1G1" "CALM2" "CD53"
[7] "CD7" "CLEC10A" "CPVL" "CTSW" "FCER1A" "FKBP1A"
[13] "H2AFY" "HLA-DMB" "HLA-DPA1" "HLA-DPB1" "HLA-DQA1" "HLA-DQA2"
[19] "HLA-DQB1" "HLA-DRA" "HMGN1" "HNRNPA0" "HNRNPA1" "HNRNPA2B1"
[25] "IFITM1" "IL2RG" "IL32" "LDHA" "LDHB" "MYL12A"
[31] "NPM1" "PARK7" "PDIA6" "PGK1" "PTPRCAP" "RAB7A"
[37] "RBM3" "RHOA" "RHOC" "RPL5" "S100A6" "S100A8"
[43] "S100A9" "SELL" "SLC25A5" "TMEM230" "TMEM66" "TUBA1B"
[49] "TYROBP" "YBX1"
"GDI"] gdiData[genesToLabel[[50L]],
[1] 1.606624
<- GDIPlot(cd14Obj, GDIIn = gdiData, GDIThreshold = 1.4,
gdiPlot genes = list("Top 10 GDI genes" = genesToLabel[1L:10L]))
plot(gdiPlot)
Sys.time()
[1] "2024-05-10 19:18:01 CEST"
Save the COTAN object
saveRDS(cd14Obj, file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS")))
<- readRDS(file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS"))) cd14Obj
c(splitClusters, splitCoexDF) %<-%
cellsUniformClustering(cd14Obj, GDIThreshold = 1.43, cores = 13,
saveObj = TRUE, outDir = outDir)
<- addClusterization(cd14Obj, clName = "split",
cd14Obj clusters = splitClusters,
coexDF = splitCoexDF, override = TRUE)
<- getClusterizationData(cd14Obj, clName = "split")[[1]]
splitClusters
table(splitClusters)
splitClusters
1 2 3 4 5
878 38 637 837 48
saveRDS(cd14Obj, file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS")))
c(mergedClusters, mergedCoexDF) %<-%
mergeUniformCellsClusters(cd14Obj, clusters = splitClusters,
GDIThreshold = 1.43, cores = 13,
saveObj = TRUE, outDir = outDir)
<- addClusterization(cd14Obj, clName = "merge",
cd14Obj clusters = mergedClusters,
coexDF = mergedCoexDF,
override = TRUE)
<- getClusterizationData(cd14Obj, clName = "merge")[[1]]
mergedClusters
table(mergedClusters)
mergedClusters
1 2 3
1522 878 38
saveRDS(cd14Obj, file = file.path(outDir, paste0(sampleCondition, ".cotan.RDS")))
sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3; LAPACK version 3.9.0
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Rome
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] Seurat_5.0.0 SeuratObject_5.0.0 sp_2.1-1 COTAN_2.5.0
[5] zeallot_0.1.0 tibble_3.2.1 ggplot2_3.5.0
loaded via a namespace (and not attached):
[1] RcppAnnoy_0.0.21 splines_4.3.2
[3] later_1.3.1 R.oo_1.26.0
[5] polyclip_1.10-4 fastDummies_1.7.3
[7] lifecycle_1.0.3 doParallel_1.0.17
[9] globals_0.16.2 lattice_0.22-5
[11] MASS_7.3-60 dendextend_1.17.1
[13] magrittr_2.0.3 plotly_4.10.2
[15] rmarkdown_2.24 yaml_2.3.7
[17] httpuv_1.6.11 sctransform_0.4.1
[19] spam_2.10-0 askpass_1.2.0
[21] spatstat.sparse_3.0-2 reticulate_1.36.1
[23] cowplot_1.1.1 pbapply_1.7-2
[25] RColorBrewer_1.1-3 abind_1.4-5
[27] Rtsne_0.17 purrr_1.0.1
[29] R.utils_2.12.2 BiocGenerics_0.46.0
[31] circlize_0.4.15 IRanges_2.34.1
[33] S4Vectors_0.38.1 ggrepel_0.9.5
[35] irlba_2.3.5.1 listenv_0.9.0
[37] spatstat.utils_3.0-3 umap_0.2.10.0
[39] goftest_1.2-3 RSpectra_0.16-1
[41] spatstat.random_3.2-1 dqrng_0.3.0
[43] fitdistrplus_1.1-11 parallelly_1.37.1
[45] DelayedMatrixStats_1.22.5 leiden_0.4.3
[47] codetools_0.2-19 DelayedArray_0.26.7
[49] tidyselect_1.2.0 shape_1.4.6
[51] farver_2.1.1 ScaledMatrix_1.8.1
[53] viridis_0.6.4 matrixStats_1.2.0
[55] stats4_4.3.2 spatstat.explore_3.2-1
[57] jsonlite_1.8.7 GetoptLong_1.0.5
[59] ellipsis_0.3.2 progressr_0.14.0
[61] ggridges_0.5.4 survival_3.5-8
[63] iterators_1.0.14 foreach_1.5.2
[65] tools_4.3.2 ica_1.0-3
[67] Rcpp_1.0.11 glue_1.7.0
[69] gridExtra_2.3 xfun_0.39
[71] MatrixGenerics_1.12.3 ggthemes_5.1.0
[73] dplyr_1.1.2 withr_3.0.0
[75] fastmap_1.1.1 fansi_1.0.4
[77] openssl_2.1.0 digest_0.6.33
[79] rsvd_1.0.5 parallelDist_0.2.6
[81] R6_2.5.1 mime_0.12
[83] colorspace_2.1-0 scattermore_1.2
[85] tensor_1.5 spatstat.data_3.0-1
[87] R.methodsS3_1.8.2 utf8_1.2.3
[89] tidyr_1.3.0 generics_0.1.3
[91] data.table_1.15.0 httr_1.4.6
[93] htmlwidgets_1.6.2 S4Arrays_1.2.0
[95] uwot_0.1.16 pkgconfig_2.0.3
[97] gtable_0.3.3 ComplexHeatmap_2.16.0
[99] lmtest_0.9-40 htmltools_0.5.8
[101] dotCall64_1.1-0 clue_0.3-64
[103] scales_1.3.0 png_0.1-8
[105] knitr_1.43 rstudioapi_0.15.0
[107] reshape2_1.4.4 rjson_0.2.21
[109] nlme_3.1-163 zoo_1.8-12
[111] GlobalOptions_0.1.2 stringr_1.5.0
[113] KernSmooth_2.23-22 parallel_4.3.2
[115] miniUI_0.1.1.1 RcppZiggurat_0.1.6
[117] pillar_1.9.0 grid_4.3.2
[119] vctrs_0.6.3 RANN_2.6.1
[121] promises_1.2.0.1 BiocSingular_1.16.0
[123] beachmat_2.16.0 xtable_1.8-4
[125] cluster_2.1.6 evaluate_0.21
[127] cli_3.6.1 compiler_4.3.2
[129] rlang_1.1.1 crayon_1.5.2
[131] future.apply_1.11.0 labeling_0.4.2
[133] plyr_1.8.8 stringi_1.8.1
[135] viridisLite_0.4.2 deldir_2.0-2
[137] BiocParallel_1.34.2 assertthat_0.2.1
[139] munsell_0.5.0 lazyeval_0.2.2
[141] spatstat.geom_3.2-4 PCAtools_2.14.0
[143] Matrix_1.6-3 RcppHNSW_0.6.0
[145] patchwork_1.2.0 sparseMatrixStats_1.12.2
[147] future_1.33.0 shiny_1.8.0
[149] ROCR_1.0-11 Rfast_2.1.0
[151] igraph_2.0.3 RcppParallel_5.1.7