Multi-Omics Integration in sciNOME

Introduction

The sciNOME package excels at Region-Centric Integration for Single-Cell Multi-Omics. The core function Integrate_MultiOmics allows users to seamlessly merge RNA expression, DNA methylation (CpG), and Chromatin accessibility (GpC) matrices using a global metadata mapping table.

In this vignette, we simulate an ultra-lightweight multi-omics dataset to demonstrate the integration workflows rapidly.

library(sciNOME)
library(dplyr)

1. Prepare Mock Multi-Omics Data

To successfully integrate multiple omics layers, three components must perfectly align: 1. The Global Metadata (linking sample names across omics). 2. The Region Dictionary (linking genomic coordinates to Gene IDs). 3. The Expression/Methylation Matrices.

1.1 Global Metadata

We simulate 4 cells/samples belonging to two biological conditions.

meta_df <- data.frame(
  Condition = c("Tumor", "Tumor", "Normal", "Normal"),
  RNA_Sample = c("rna_T1", "rna_T2", "rna_N1", "rna_N2"),
  CpG_Sample = c("cpg_T1", "cpg_T2", "cpg_N1", "cpg_N2"),
  GpC_Sample = c("gpc_T1", "gpc_T2", "gpc_N1", "gpc_N2"),
  stringsAsFactors = FALSE
)

1.2 Region Dictionary

We simulate 3 genes and their corresponding promoter regions.

region_df <- data.frame(
  chr = c("chr1", "chr2", "chr3"),
  start = c(1000, 3000, 5000),
  end = c(2000, 4000, 6000),
  gene_id = c("GENE_A", "GENE_B", "GENE_C"),
  gene_name = c("Sym_A", "Sym_B", "Sym_C"),
  stringsAsFactors = FALSE
)
# The function expects 'chrdata' format
region_df$chrdata <- paste0(region_df$chr, ":", region_df$start, "-", region_df$end)

1.3 Simulate Omics Matrices

set.seed(42)

# 1. RNA Object (using the package's native Build_RNAObject)
rna_counts <- matrix(runif(12, 10, 50), nrow = 3, ncol = 4)
rownames(rna_counts) <- c("GENE_A", "GENE_B", "GENE_C") # Matches region_df$gene_id
colnames(rna_counts) <- meta_df$RNA_Sample            # Matches meta_df$RNA_Sample
rna_obj <- Build_RNAObject(rna_counts, min_cells = 0, min_features = 0)
rna_obj$assays$RNA$data <- rna_obj$assays$RNA$counts  # Mock normalized data

# 2. CpG Matrix (Values 0-1)
cpg_mat <- matrix(runif(12, 0.2, 0.8), nrow = 3, ncol = 4)
rownames(cpg_mat) <- region_df$chrdata                # Matches region_df$chrdata
colnames(cpg_mat) <- meta_df$CpG_Sample               # Matches meta_df$CpG_Sample

# 3. GpC Matrix (Values 0-1)
gpc_mat <- matrix(runif(12, 0.1, 0.9), nrow = 3, ncol = 4)
rownames(gpc_mat) <- region_df$chrdata                # Matches region_df$chrdata
colnames(gpc_mat) <- meta_df$GpC_Sample               # Matches meta_df$GpC_Sample

2. Multi-Omics Integration

Now that our data is prepared and strictly aligned, we can demonstrate the various integration modes.

Mode A: Tri-Omics Integration (RNA + CpG + GpC)

Integrate all three layers for the “Tumor” group.

tri_merged <- Integrate_MultiOmics(
  mode = "tri",
  target_group = "Tumor",
  meta_df = meta_df,
  group_col = "Condition",
  region_df = region_df,
  
  rna_obj = rna_obj, rna_id_col = "RNA_Sample",
  cpg_mat = cpg_mat, cpg_id_col = "CpG_Sample",
  gpc_mat = gpc_mat, gpc_id_col = "GpC_Sample"
)

# View results (Genes mapped to their average RNA exp, CpG level, and GpC level)
knitr::kable(tri_merged, digits = 3)
Associated_Regions GeneID GeneName RNA_Exp CpG_level GpC_level
chr1:1000-2000;chr2:3000-4000;chr3:5000-6000 GENE Sym_A 112.586 0.569 0.523

Mode B: Dual Integration (RNA + CpG)

If you only have RNA and DNA methylation data, use mode = "rna_cpg". Let’s calculate for the “Normal” group.

rna_cpg_merged <- Integrate_MultiOmics(
  mode = "rna_cpg",
  target_group = "Normal",
  meta_df = meta_df,
  group_col = "Condition",
  region_df = region_df,
  
  rna_obj = rna_obj, rna_id_col = "RNA_Sample",
  cpg_mat = cpg_mat, cpg_id_col = "CpG_Sample"
)

knitr::kable(rna_cpg_merged, digits = 3)
Associated_Regions GeneID GeneName RNA_Exp CpG_level
chr1:1000-2000;chr2:3000-4000;chr3:5000-6000 GENE Sym_A 98.203 0.601

Mode C: Epigenetics Only Integration (CpG + GpC)

If you only want to compare Methylation and Accessibility at the region level (without RNA).

epi_merged <- Integrate_MultiOmics(
  mode = "cpg_gpc",
  target_group = "Tumor",
  meta_df = meta_df,
  group_col = "Condition",
  region_df = region_df,
  
  cpg_mat = cpg_mat, cpg_id_col = "CpG_Sample",
  gpc_mat = gpc_mat, gpc_id_col = "GpC_Sample"
)

knitr::kable(epi_merged, digits = 3)
Associated_Regions GeneID GeneName CpG_level GpC_level
chr1:1000-2000 GENE Sym_A 0.762 0.495
chr2:3000-4000 GENE Sym_A 0.570 0.484
chr3:5000-6000 GENE Sym_A 0.374 0.590

Session Information

sessionInfo()
#> R version 4.6.0 (2026-04-24)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.4 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] dplyr_1.2.1       data.table_1.18.4 sciNOME_0.99.0    BiocStyle_2.41.0 
#> 
#> loaded via a namespace (and not attached):
#>  [1] tidyr_1.3.2          sass_0.4.10          generics_0.1.4      
#>  [4] rstatix_0.7.3        lattice_0.22-9       digest_0.6.39       
#>  [7] magrittr_2.0.5       evaluate_1.0.5       grid_4.6.0          
#> [10] RColorBrewer_1.1-3   fastmap_1.2.0        jsonlite_2.0.0      
#> [13] Matrix_1.7-5         ggrepel_0.9.8        backports_1.5.1     
#> [16] Formula_1.2-5        BiocManager_1.30.27  purrr_1.2.2         
#> [19] scales_1.4.0         pbapply_1.7-4        jquerylib_0.1.4     
#> [22] abind_1.4-8          cli_3.6.6            rlang_1.2.0         
#> [25] withr_3.0.2          cachem_1.1.0         yaml_2.3.12         
#> [28] otel_0.2.0           parallel_4.6.0       tools_4.6.0         
#> [31] ggsignif_0.6.4       ggplot2_4.0.3        ggpubr_0.6.3        
#> [34] BiocGenerics_0.59.7  broom_1.0.13         buildtools_1.0.0    
#> [37] vctrs_0.7.3          R6_2.6.1             stats4_4.6.0        
#> [40] lifecycle_1.0.5      Seqinfo_1.3.0        car_3.1-5           
#> [43] S4Vectors_0.51.3     IRanges_2.47.2       pkgconfig_2.0.3     
#> [46] pillar_1.11.1        bslib_0.11.0         gtable_0.3.6        
#> [49] Rcpp_1.1.1-1.1       glue_1.8.1           xfun_0.58           
#> [52] tibble_3.3.1         GenomicRanges_1.65.0 tidyselect_1.2.1    
#> [55] sys_3.4.3            knitr_1.51           farver_2.1.2        
#> [58] patchwork_1.3.2      igraph_2.3.2         htmltools_0.5.9     
#> [61] carData_3.0-6        rmarkdown_2.31       maketools_1.3.2     
#> [64] compiler_4.6.0       S7_0.2.2