The sciNOME package excels at Region-Centric Integration
for Single-Cell Multi-Omics. The core function
Integrate_MultiOmics allows users to seamlessly merge RNA
expression, DNA methylation (CpG), and Chromatin accessibility (GpC)
matrices using a global metadata mapping table.
In this vignette, we simulate an ultra-lightweight multi-omics dataset to demonstrate the integration workflows rapidly.
To successfully integrate multiple omics layers, three components must perfectly align: 1. The Global Metadata (linking sample names across omics). 2. The Region Dictionary (linking genomic coordinates to Gene IDs). 3. The Expression/Methylation Matrices.
We simulate 4 cells/samples belonging to two biological conditions.
We simulate 3 genes and their corresponding promoter regions.
region_df <- data.frame(
chr = c("chr1", "chr2", "chr3"),
start = c(1000, 3000, 5000),
end = c(2000, 4000, 6000),
gene_id = c("GENE_A", "GENE_B", "GENE_C"),
gene_name = c("Sym_A", "Sym_B", "Sym_C"),
stringsAsFactors = FALSE
)
# The function expects 'chrdata' format
region_df$chrdata <- paste0(region_df$chr, ":", region_df$start, "-", region_df$end)set.seed(42)
# 1. RNA Object (using the package's native Build_RNAObject)
rna_counts <- matrix(runif(12, 10, 50), nrow = 3, ncol = 4)
rownames(rna_counts) <- c("GENE_A", "GENE_B", "GENE_C") # Matches region_df$gene_id
colnames(rna_counts) <- meta_df$RNA_Sample # Matches meta_df$RNA_Sample
rna_obj <- Build_RNAObject(rna_counts, min_cells = 0, min_features = 0)
rna_obj$assays$RNA$data <- rna_obj$assays$RNA$counts # Mock normalized data
# 2. CpG Matrix (Values 0-1)
cpg_mat <- matrix(runif(12, 0.2, 0.8), nrow = 3, ncol = 4)
rownames(cpg_mat) <- region_df$chrdata # Matches region_df$chrdata
colnames(cpg_mat) <- meta_df$CpG_Sample # Matches meta_df$CpG_Sample
# 3. GpC Matrix (Values 0-1)
gpc_mat <- matrix(runif(12, 0.1, 0.9), nrow = 3, ncol = 4)
rownames(gpc_mat) <- region_df$chrdata # Matches region_df$chrdata
colnames(gpc_mat) <- meta_df$GpC_Sample # Matches meta_df$GpC_SampleNow that our data is prepared and strictly aligned, we can demonstrate the various integration modes.
Integrate all three layers for the “Tumor” group.
tri_merged <- Integrate_MultiOmics(
mode = "tri",
target_group = "Tumor",
meta_df = meta_df,
group_col = "Condition",
region_df = region_df,
rna_obj = rna_obj, rna_id_col = "RNA_Sample",
cpg_mat = cpg_mat, cpg_id_col = "CpG_Sample",
gpc_mat = gpc_mat, gpc_id_col = "GpC_Sample"
)
# View results (Genes mapped to their average RNA exp, CpG level, and GpC level)
knitr::kable(tri_merged, digits = 3)| Associated_Regions | GeneID | GeneName | RNA_Exp | CpG_level | GpC_level |
|---|---|---|---|---|---|
| chr1:1000-2000;chr2:3000-4000;chr3:5000-6000 | GENE | Sym_A | 112.586 | 0.569 | 0.523 |
If you only have RNA and DNA methylation data, use
mode = "rna_cpg". Let’s calculate for the “Normal”
group.
rna_cpg_merged <- Integrate_MultiOmics(
mode = "rna_cpg",
target_group = "Normal",
meta_df = meta_df,
group_col = "Condition",
region_df = region_df,
rna_obj = rna_obj, rna_id_col = "RNA_Sample",
cpg_mat = cpg_mat, cpg_id_col = "CpG_Sample"
)
knitr::kable(rna_cpg_merged, digits = 3)| Associated_Regions | GeneID | GeneName | RNA_Exp | CpG_level |
|---|---|---|---|---|
| chr1:1000-2000;chr2:3000-4000;chr3:5000-6000 | GENE | Sym_A | 98.203 | 0.601 |
If you only want to compare Methylation and Accessibility at the region level (without RNA).
epi_merged <- Integrate_MultiOmics(
mode = "cpg_gpc",
target_group = "Tumor",
meta_df = meta_df,
group_col = "Condition",
region_df = region_df,
cpg_mat = cpg_mat, cpg_id_col = "CpG_Sample",
gpc_mat = gpc_mat, gpc_id_col = "GpC_Sample"
)
knitr::kable(epi_merged, digits = 3)| Associated_Regions | GeneID | GeneName | CpG_level | GpC_level |
|---|---|---|---|---|
| chr1:1000-2000 | GENE | Sym_A | 0.762 | 0.495 |
| chr2:3000-4000 | GENE | Sym_A | 0.570 | 0.484 |
| chr3:5000-6000 | GENE | Sym_A | 0.374 | 0.590 |
sessionInfo()
#> R version 4.6.0 (2026-04-24)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.4 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] dplyr_1.2.1 data.table_1.18.4 sciNOME_0.99.0 BiocStyle_2.41.0
#>
#> loaded via a namespace (and not attached):
#> [1] tidyr_1.3.2 sass_0.4.10 generics_0.1.4
#> [4] rstatix_0.7.3 lattice_0.22-9 digest_0.6.39
#> [7] magrittr_2.0.5 evaluate_1.0.5 grid_4.6.0
#> [10] RColorBrewer_1.1-3 fastmap_1.2.0 jsonlite_2.0.0
#> [13] Matrix_1.7-5 ggrepel_0.9.8 backports_1.5.1
#> [16] Formula_1.2-5 BiocManager_1.30.27 purrr_1.2.2
#> [19] scales_1.4.0 pbapply_1.7-4 jquerylib_0.1.4
#> [22] abind_1.4-8 cli_3.6.6 rlang_1.2.0
#> [25] withr_3.0.2 cachem_1.1.0 yaml_2.3.12
#> [28] otel_0.2.0 parallel_4.6.0 tools_4.6.0
#> [31] ggsignif_0.6.4 ggplot2_4.0.3 ggpubr_0.6.3
#> [34] BiocGenerics_0.59.7 broom_1.0.13 buildtools_1.0.0
#> [37] vctrs_0.7.3 R6_2.6.1 stats4_4.6.0
#> [40] lifecycle_1.0.5 Seqinfo_1.3.0 car_3.1-5
#> [43] S4Vectors_0.51.3 IRanges_2.47.2 pkgconfig_2.0.3
#> [46] pillar_1.11.1 bslib_0.11.0 gtable_0.3.6
#> [49] Rcpp_1.1.1-1.1 glue_1.8.1 xfun_0.58
#> [52] tibble_3.3.1 GenomicRanges_1.65.0 tidyselect_1.2.1
#> [55] sys_3.4.3 knitr_1.51 farver_2.1.2
#> [58] patchwork_1.3.2 igraph_2.3.2 htmltools_0.5.9
#> [61] carData_3.0-6 rmarkdown_2.31 maketools_1.3.2
#> [64] compiler_4.6.0 S7_0.2.2