tTEscanR includes a dedicated preprocessing module that provides multiple functions for generating ready-to-use data count matrices. The primary goal of this module is to facilitate the transformation from (sc)ATAC-seq count matrices to tRNA abundance count matrices where tRNA genes are the rows and the conditions the columns. Additionally, it helps annotating the tRNA genes.
To illustrate the functionality of the tRNA speciic preprocessing module we generated a toy example dataset representing a peak count matrix where the fragment regions are placed as rows and the samples as columns. We have also defined the corresponding metadata.
library(Matrix)
# Parameters
num_peaks <- 100
num_cells <- 20 # Increased to pass min.cells = 10
set.seed(42)
# Create peak names with "-" separator as required by your code
chroms <- sample(paste0("chr", 1:3), num_peaks, replace = TRUE)
starts <- seq(1000, by = 5000, length.out = num_peaks)
ends <- starts + 300
peak_names <- paste(chroms, starts, ends, sep = "-")
# Create a sparse matrix (more memory efficient for Seurat/Signac)
counts <- Matrix(
rpois(num_peaks * num_cells, lambda = 5),
nrow = num_peaks,
ncol = num_cells,
sparse = TRUE
)
rownames(counts) <- peak_names
colnames(counts) <- paste0("Cell_", seq_len(num_cells))
saveRDS(counts, "foo.RDS")library(dplyr)
library(Rsamtools)
#> Loading required package: Seqinfo
#> Loading required package: BiocGenerics
#> Loading required package: generics
#>
#> Attaching package: 'generics'
#> The following object is masked from 'package:dplyr':
#>
#> explain
#> The following objects are masked from 'package:base':
#>
#> as.difftime, as.factor, as.ordered, intersect, is.element, setdiff,
#> setequal, union
#>
#> Attaching package: 'BiocGenerics'
#> The following object is masked from 'package:dplyr':
#>
#> combine
#> The following object is masked from 'package:tTEscanR':
#>
#> updateObject
#> The following objects are masked from 'package:stats':
#>
#> IQR, mad, sd, var, xtabs
#> The following object is masked from 'package:utils':
#>
#> data
#> The following objects are masked from 'package:base':
#>
#> anyDuplicated, aperm, append, as.data.frame, basename, cbind,
#> colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
#> get, grep, grepl, is.unsorted, lapply, Map, mapply, match, mget,
#> order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
#> rbind, Reduce, rownames, sapply, saveRDS, scale, sequence, table,
#> tapply, transform, unique, unsplit, which.max, which.min
#> Loading required package: GenomicRanges
#> Loading required package: stats4
#> Loading required package: S4Vectors
#>
#> Attaching package: 'S4Vectors'
#> The following objects are masked from 'package:Matrix':
#>
#> expand, unname
#> The following objects are masked from 'package:dplyr':
#>
#> first, rename
#> The following object is masked from 'package:utils':
#>
#> findMatches
#> The following objects are masked from 'package:base':
#>
#> expand.grid, I, unname
#> Loading required package: IRanges
#>
#> Attaching package: 'IRanges'
#> The following objects are masked from 'package:dplyr':
#>
#> collapse, desc, slice
#> Loading required package: Biostrings
#> Loading required package: XVector
#>
#> Attaching package: 'Biostrings'
#> The following object is masked from 'package:base':
#>
#> strsplit
# Generate fragments based on our counts matrix
fragment_list <- list()
for (i in 1:ncol(counts)) {
cell_name <- colnames(counts)[i]
for (j in 1:nrow(counts)) {
cnt <- counts[j, i]
if (cnt > 0) {
parts <- strsplit(rownames(counts)[j], "-")[[1]]
for (k in 1:cnt) {
fragment_list[[length(fragment_list) + 1]] <- data.frame(
chr = parts[1],
start = as.numeric(parts[2]) + k,
end = as.numeric(parts[3]) - k,
cell = cell_name,
count = 1
)
}
}
}
}
fragments_df <- bind_rows(fragment_list) %>%
arrange(chr, start) # Sorting is mandatory for indexing
# Write to a temporary text file
temp_txt <- "foo.fragments.txt"
write.table(
fragments_df, temp_txt,
sep = "\t",
row.names = FALSE, col.names = FALSE, quote = FALSE
)
# Bgzip compress and index (Requires Rsamtools)
# This creates foo.fragments.txt.gz and foo.fragments.txt.gz.tbi
bgzip(temp_txt, dest = "foo.fragments.txt.gz", overwrite = TRUE)
#> [1] "foo.fragments.txt.gz"
indexTabix("foo.fragments.txt.gz", format = "bed")
#> [1] "foo.fragments.txt.gz.tbi"
# Clean up the uncompressed temp file
file.remove(temp_txt)
#> [1] TRUElibrary(Signac)
library(Seurat)
#> Loading required package: SeuratObject
#> Loading required package: sp
#>
#> Attaching package: 'sp'
#> The following object is masked from 'package:IRanges':
#>
#> %over%
#> 'SeuratObject' was built under R 4.6.0 but the current version is
#> 4.6.1; it is recomended that you reinstall 'SeuratObject' as the ABI
#> for R may have changed
#>
#> Attaching package: 'SeuratObject'
#> The following object is masked from 'package:Biostrings':
#>
#> intersect
#> The following object is masked from 'package:GenomicRanges':
#>
#> intersect
#> The following object is masked from 'package:IRanges':
#>
#> intersect
#> The following object is masked from 'package:S4Vectors':
#>
#> intersect
#> The following object is masked from 'package:Seqinfo':
#>
#> intersect
#> The following object is masked from 'package:BiocGenerics':
#>
#> intersect
#> The following objects are masked from 'package:base':
#>
#> intersect, t
# Now your provided code should work:
counts_loaded <- readRDS("foo.RDS")
fragment_file <- "foo.fragments.txt.gz"
chrom_assay <- CreateChromatinAssay(
counts = counts_loaded,
sep = c("-", "-"),
fragments = fragment_file,
min.cells = 10,
min.features = 0
)
#> Computing hash
chrom_obj <- CreateSeuratObject(
counts = chrom_assay,
assay = "peaks"
)
print(chrom_obj)
#> An object of class Seurat
#> 100 features across 20 samples within 1 assay
#> Active assay: peaks (100 features, 0 variable features)
#> 2 layers present: counts, dataThe first step is to generate the tRNA matrix and translate the peak fragments into tRNA gene names.
tRNA_matrix <- tRNAGetMatrix(
data = chrom_obj, assay = "peaks", confidence_set = NULL, species = "hg38"
)
#> 1 . Importing the high-confidence tRNA annotations.
#> The default hg38 'confidence_set' will be used.
#> 1 . COMPLETED
#> 2 . Filtering tRNA genes with unknown annotations.
#> Loading required namespace: Structstrings
#> The default hg38 'tRNA_name_map' will be used.
#> 2 . COMPLETED
#> 3 . Finding overlaps and aggregating tRNA counts.
#> Extracting reads overlapping genomic regions
#> 3 . COMPLETED
#> 4 . Exporting tRNA expression matrix.
#> - Parameter 'out_directory' has not been specified.
#> The file will be stored in the current working directory.
#> - Parameter 'out_name' has not been specified.
#> A standard name will be used.
#> - The generated file will be save in: /tmp/RtmpTo8Kzi/Rbuild2b423d366a71/tTEscanR/vignettes/tRNA_expression_matrix.rds
#> 4 . COMPLETED#> R version 4.6.1 (2026-06-24)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 26.04 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.32.so; LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats4 stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] Seurat_5.5.1 SeuratObject_5.4.0 sp_2.2-1
#> [4] Signac_1.17.1 Rsamtools_2.29.0 Biostrings_2.81.3
#> [7] XVector_0.53.0 GenomicRanges_1.65.0 IRanges_2.47.2
#> [10] S4Vectors_0.51.5 Seqinfo_1.3.0 BiocGenerics_0.59.8
#> [13] generics_0.1.4 Matrix_1.7-5 dplyr_1.2.1
#> [16] biomaRt_2.69.0 tTEscanR_0.99.0 BiocStyle_2.41.0
#>
#> loaded via a namespace (and not attached):
#> [1] RcppAnnoy_0.0.23 splines_4.6.1
#> [3] later_1.4.8 bitops_1.0-9
#> [5] filelock_1.0.3 tibble_3.3.1
#> [7] polyclip_1.10-7 fastDummies_1.7.6
#> [9] lifecycle_1.0.5 httr2_1.2.3
#> [11] rstatix_0.7.3 globals_0.19.1
#> [13] lattice_0.22-9 MASS_7.3-65
#> [15] backports_1.5.1 magrittr_2.0.5
#> [17] plotly_4.12.0 sass_0.4.10
#> [19] rmarkdown_2.31 jquerylib_0.1.4
#> [21] yaml_2.3.12 httpuv_1.6.17
#> [23] otel_0.2.0 sctransform_0.4.3
#> [25] spam_2.11-4 spatstat.sparse_3.2-0
#> [27] reticulate_1.46.0 cowplot_1.2.0
#> [29] pbapply_1.7-4 DBI_1.3.0
#> [31] buildtools_1.0.0 RColorBrewer_1.1-3
#> [33] abind_1.4-8 Rtsne_0.17
#> [35] purrr_1.2.2 rappdirs_0.3.4
#> [37] ggrepel_0.9.8 irlba_2.3.7
#> [39] spatstat.utils_3.2-3 listenv_1.0.0
#> [41] maketools_1.3.2 goftest_1.2-3
#> [43] RSpectra_0.16-2 spatstat.random_3.5-0
#> [45] fitdistrplus_1.2-6 parallelly_1.47.0
#> [47] codetools_0.2-20 DelayedArray_0.39.3
#> [49] RcppRoll_0.3.2 tidyselect_1.2.1
#> [51] UCSC.utils_1.9.0 farver_2.1.2
#> [53] spatstat.explore_3.8-1 matrixStats_1.5.0
#> [55] BiocFileCache_3.3.0 jsonlite_2.0.0
#> [57] progressr_0.19.0 Formula_1.2-5
#> [59] ggridges_0.5.7 survival_3.8-6
#> [61] tools_4.6.1 progress_1.2.3
#> [63] ica_1.0-3 Rcpp_1.1.1-1.1
#> [65] glue_1.8.1 gridExtra_2.3.1
#> [67] SparseArray_1.13.2 xfun_0.59
#> [69] DESeq2_1.53.0 MatrixGenerics_1.25.0
#> [71] GenomeInfoDb_1.49.1 withr_3.0.3
#> [73] BiocManager_1.30.27 fastmap_1.2.0
#> [75] digest_0.6.39 R6_2.6.1
#> [77] mime_0.13 scattermore_1.2
#> [79] tensor_1.5.1 spatstat.data_3.1-9
#> [81] RSQLite_3.53.2 tidyr_1.3.2
#> [83] data.table_1.18.4 Structstrings_1.29.0
#> [85] prettyunits_1.2.0 httr_1.4.8
#> [87] htmlwidgets_1.6.4 S4Arrays_1.13.0
#> [89] uwot_0.2.4 pkgconfig_2.0.3
#> [91] gtable_0.3.6 blob_1.3.0
#> [93] lmtest_0.9-40 S7_0.2.2
#> [95] sys_3.4.3 htmltools_0.5.9
#> [97] carData_3.0-6 dotCall64_1.2
#> [99] scales_1.4.0 Biobase_2.73.1
#> [101] png_0.1-9 spatstat.univar_3.2-0
#> [103] knitr_1.51 reshape2_1.4.5
#> [105] nlme_3.1-169 curl_7.1.0
#> [107] cachem_1.1.0 zoo_1.8-15
#> [109] stringr_1.6.0 KernSmooth_2.23-26
#> [111] parallel_4.6.1 miniUI_0.1.2
#> [113] AnnotationDbi_1.75.0 pillar_1.11.1
#> [115] grid_4.6.1 vctrs_0.7.3
#> [117] RANN_2.6.2 promises_1.5.0
#> [119] ggpubr_0.6.3 car_3.1-5
#> [121] dbplyr_2.6.0 xtable_1.8-8
#> [123] cluster_2.1.8.2 evaluate_1.0.5
#> [125] cli_3.6.6 locfit_1.5-9.12
#> [127] compiler_4.6.1 rlang_1.2.0
#> [129] crayon_1.5.3 future.apply_1.20.2
#> [131] ggsignif_0.6.4 labeling_0.4.3
#> [133] plyr_1.8.9 stringi_1.8.7
#> [135] deldir_2.0-4 viridisLite_0.4.3
#> [137] BiocParallel_1.47.0 lazyeval_0.2.3
#> [139] spatstat.geom_3.8-1 RcppHNSW_0.7.0
#> [141] hms_1.1.4 patchwork_1.3.2
#> [143] sparseMatrixStats_1.25.0 bit64_4.8.2
#> [145] future_1.70.0 ggplot2_4.0.3
#> [147] KEGGREST_1.53.4 shiny_1.14.0
#> [149] SummarizedExperiment_1.43.0 ROCR_1.0-12
#> [151] igraph_2.3.3 broom_1.0.13
#> [153] memoise_2.0.1 bslib_0.11.0
#> [155] fastmatch_1.1-8 bit_4.6.0