The sciNOME package provides an end-to-end pipeline for
processing single-cell or bulk epigenetic data (DNA
methylation/Chromatin accessibility). This vignette demonstrates the
complete workflow:
.cov.gz) into
genomic regions.To demonstrate the workflow, we will simulate a tiny dataset
containing 4 samples (2 Tumor, 2 Normal) and 5 genomic regions. We
create temporary .cov and .bed files.
library(sciNOME)
library(data.table)
# Create a temporary directory for our mock data
tmp_dir <- tempdir()
# 1. Create a mock BED file (5 regions)
mock_bed <- data.frame(
chr = "chr1",
start = seq(1, 401, 100),
end = seq(100, 500, 100)
)
bed_path <- file.path(tmp_dir, "regions.bed")
write.table(mock_bed, bed_path, row.names=FALSE, col.names=FALSE, sep="\t", quote=FALSE)
# 2. Create mock Bismark .cov files for 4 samples
samples <- c("Tumor_1", "Tumor_2", "Normal_1", "Normal_2")
set.seed(123)
for (samp in samples) {
# Generate a 6-column mock Bismark coverage file
mock_cov <- data.frame(
chr = "chr1",
start = seq(50, 450, 100), # Column 2
end = seq(50, 450, 100), # Column 3
meth_perc = 50, # Column 4 (Dummy placeholder)
meth = sample(10:50, 5, replace=TRUE), # Column 5: methylated count
unmeth = sample(10:50, 5, replace=TRUE) # Column 6: unmethylated count
)
# Make Tumor slightly more methylated for differential analysis later
if(grepl("Tumor", samp)) mock_cov$meth <- mock_cov$meth + 30
cov_path <- file.path(tmp_dir, paste0(samp, ".cov"))
write.table(mock_cov, cov_path, row.names=FALSE, col.names=FALSE, sep="\t", quote=FALSE)
}We use Aggregate_epiRegions to map the .cov
files to the BED regions.
agg_dt <- Aggregate_epiRegions(
cov_dir = tmp_dir,
bed_file = bed_path,
n_cores = 1
)
# Display the first few rows
head(agg_dt)
#> region_id Normal.1.meth Normal.1.nonmeth Normal.1.level Normal.2.meth
#> <char> <num> <num> <num> <num>
#> 1: chr1:1-100 45 24 0.6521739 19
#> 2: chr1:101-200 23 41 0.3593750 32
#> 3: chr1:201-300 26 16 0.6190476 36
#> 4: chr1:301-400 48 18 0.7272727 16
#> 5: chr1:401-500 21 50 0.2957746 36
#> Normal.2.nonmeth Normal.2.level Tumor.1.meth Tumor.1.nonmeth Tumor.1.level
#> <num> <num> <num> <num> <num>
#> 1: 41 0.3166667 70 23 0.7526882
#> 2: 47 0.4050633 54 34 0.6136364
#> 3: 34 0.5142857 53 35 0.6022727
#> 4: 43 0.2711864 42 36 0.5384615
#> 5: 38 0.4864865 76 14 0.8444444
#> Tumor.2.meth Tumor.2.nonmeth Tumor.2.level
#> <num> <num> <num>
#> 1: 66 17 0.7951807
#> 2: 67 35 0.6568627
#> 3: 48 16 0.7500000
#> 4: 68 18 0.7906977
#> 5: 74 28 0.7254902We can extract just the methylation level matrix using
Extract_epiData. Before that, let’s introduce an
NA value to demonstrate the QC function.
# Introduce an NA to simulate missing data
agg_dt[1, "Tumor.1.level"] <- NA
# Extract only the level columns
level_only <- Extract_epiData(data = agg_dt, suffix = ".level")
head(level_only, 3)
#> region_id Normal.1.level Normal.2.level Tumor.1.level Tumor.2.level
#> <char> <num> <num> <num> <num>
#> 1: chr1:1-100 0.6521739 0.3166667 NA 0.7951807
#> 2: chr1:101-200 0.3593750 0.4050633 0.6136364 0.6568627
#> 3: chr1:201-300 0.6190476 0.5142857 0.6022727 0.7500000
# Run Quality Control (Keep rows with fewest NAs, filter bad columns)
qc_dt <- QC_epiData(
data = agg_dt,
top_n_rows = 5,
max_col_na_ratio = 0.5
)
head(qc_dt, 3)
#> region_id Normal.1.meth Normal.1.nonmeth Normal.1.level Normal.2.meth
#> <char> <num> <num> <num> <num>
#> 1: chr1:101-200 23 41 0.3593750 32
#> 2: chr1:201-300 26 16 0.6190476 36
#> 3: chr1:301-400 48 18 0.7272727 16
#> Normal.2.nonmeth Normal.2.level Tumor.1.meth Tumor.1.nonmeth Tumor.1.level
#> <num> <num> <num> <num> <num>
#> 1: 47 0.4050633 54 34 0.6136364
#> 2: 34 0.5142857 53 35 0.6022727
#> 3: 43 0.2711864 42 36 0.5384615
#> Tumor.2.meth Tumor.2.nonmeth Tumor.2.level
#> <num> <num> <num>
#> 1: 67 35 0.6568627
#> 2: 48 16 0.7500000
#> 3: 68 18 0.7906977We use the QC-passed level data to perform Principal Component Analysis (PCA). The function automatically matches the data columns with our metadata.
# Create matching metadata
mock_meta <- data.frame(
SampleID = c("Tumor_1", "Tumor_2", "Normal_1", "Normal_2"),
Condition = c("Tumor", "Tumor", "Normal", "Normal")
)
# Extract just the level columns for PCA
level_mat <- Extract_epiData(qc_dt, suffix = ".level")
# Run PCA
pca_res <- Reduce_epiDims(
mat = level_mat,
meta = mock_meta,
sample_col = "SampleID",
group_col = "Condition",
dr_method = "PCA",
impute_method = "mean"
)
head(pca_res)
#> SampleID Dim1 Dim2 Condition
#> 1 Normal.1 -0.04649262 -0.361068002 Normal
#> 2 Normal.2 -0.43774211 0.102447671 Normal
#> 3 Tumor.1 0.10671869 0.256925196 Tumor
#> 4 Tumor.2 0.37751604 0.001695135 TumorFinally, we identify differentially methylated regions (DMRs) between
the Tumor and Normal groups using Run_Diffanalysis. We need
to provide a mapping table to tell the function which column corresponds
to which metric.
# Create comprehensive metadata for mapping columns
diff_meta <- data.frame(
SampleID = c("T1", "T2", "N1", "N2"),
Group = c("Tumor", "Tumor", "Normal", "Normal"),
col_lvl = paste0(c("Tumor.1", "Tumor.2", "Normal.1", "Normal.2"), ".level"),
col_met = paste0(c("Tumor.1", "Tumor.2", "Normal.1", "Normal.2"), ".meth"),
col_non = paste0(c("Tumor.1", "Tumor.2", "Normal.1", "Normal.2"), ".nonmeth")
)
# Run the core differential analysis algorithm
diff_res <- Run_Diffanalysis(
raw_mat = qc_dt,
meta = diff_meta,
group_col = "Group",
target_group = "Tumor",
control_group = "Normal",
col_level = "col_lvl",
col_meth = "col_met",
col_nonmeth = "col_non",
effect_metric = "Diff",
effect_th = 0.1, # Lowered threshold for mock data
p_th = 0.05,
verbose = FALSE
)
head(diff_res)
#> chrdata P.Value FDR var_group var_nogroup
#> chr1:401-500 chr1:401-500 4.679500e-13 1.871800e-12 0.007075057 0.018185503
#> chr1:101-200 chr1:101-200 5.381065e-06 1.076213e-05 0.000934260 0.001043710
#> chr1:301-400 chr1:301-400 7.609088e-03 1.014545e-02 0.031811534 0.104007350
#> chr1:201-300 chr1:201-300 7.376702e-02 7.376702e-02 0.010911674 0.005487528
#> Diff Scores FC logFC Significance
#> chr1:401-500 0.3938368 0.3862988 2.006919 0.35963983 Up-regulated
#> chr1:101-200 0.2530304 0.2526765 1.662003 0.24252445 Up-regulated
#> chr1:301-400 0.1653500 0.1491301 1.331210 0.15093654 Up-regulated
#> chr1:201-300 0.1094697 0.1081488 1.193182 0.09744127 Not SigsessionInfo()
#> R version 4.6.0 (2026-04-24)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.4 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] data.table_1.18.4 sciNOME_0.99.0 BiocStyle_2.41.0
#>
#> loaded via a namespace (and not attached):
#> [1] tidyr_1.3.2 sass_0.4.10 generics_0.1.4
#> [4] rstatix_0.7.3 lattice_0.22-9 digest_0.6.39
#> [7] magrittr_2.0.5 evaluate_1.0.5 grid_4.6.0
#> [10] RColorBrewer_1.1-3 fastmap_1.2.0 jsonlite_2.0.0
#> [13] Matrix_1.7-5 ggrepel_0.9.8 backports_1.5.1
#> [16] Formula_1.2-5 BiocManager_1.30.27 purrr_1.2.2
#> [19] scales_1.4.0 pbapply_1.7-4 jquerylib_0.1.4
#> [22] abind_1.4-8 cli_3.6.6 rlang_1.2.0
#> [25] cachem_1.1.0 yaml_2.3.12 otel_0.2.0
#> [28] parallel_4.6.0 tools_4.6.0 ggsignif_0.6.4
#> [31] dplyr_1.2.1 ggplot2_4.0.3 ggpubr_0.6.3
#> [34] BiocGenerics_0.59.7 broom_1.0.13 buildtools_1.0.0
#> [37] vctrs_0.7.3 R6_2.6.1 stats4_4.6.0
#> [40] lifecycle_1.0.5 Seqinfo_1.3.0 car_3.1-5
#> [43] S4Vectors_0.51.3 IRanges_2.47.2 pkgconfig_2.0.3
#> [46] pillar_1.11.1 bslib_0.11.0 gtable_0.3.6
#> [49] Rcpp_1.1.1-1.1 glue_1.8.1 xfun_0.58
#> [52] tibble_3.3.1 GenomicRanges_1.65.0 tidyselect_1.2.1
#> [55] sys_3.4.3 knitr_1.51 farver_2.1.2
#> [58] patchwork_1.3.2 igraph_2.3.2 htmltools_0.5.9
#> [61] carData_3.0-6 rmarkdown_2.31 maketools_1.3.2
#> [64] compiler_4.6.0 S7_0.2.2