| Title: | Extract, QC and Screen Peptide Epitopes |
|---|---|
| Description: | Supports T cell receptor neoantigen co-culture screen analysis from peptide library design to sequencing quality control and differential abundance testing. The package annotates somatic variants and RNA fusions, extracts mutant and reference peptide context, prepares barcoded minigene construct tables, counts sample and construct barcodes from sequencing data, and identifies immunogenic epitopes in dropout screens. |
| Authors: | Michael Schubert [aut, cre] (ORCID: <https://orcid.org/0000-0002-6862-5221>) |
| Maintainer: | Michael Schubert <[email protected]> |
| License: | GPL-3 |
| Version: | 0.99.2 |
| Built: | 2026-06-18 17:36:43 UTC |
| Source: | https://github.com/BiocStaging/pepitope |
Annotate VCF variants with coding changes
annotate_coding(vr, txdb, asm)annotate_coding(vr, txdb, asm)
vr |
A VRanges object with SNVs and small indels |
txdb |
TxDb or EnsDb object |
asm |
Genomic sequence BSGenome object |
A GRanges object with annotated variants
if (interactive()) { vcf = system.file("my_variants.vcf", package="pepitope") vr = readVcfAsVRanges(vcf) txdb = AnnotationHub::AnnotationHub()[["AH100643"]] asm = BSgenome.Hsapiens.NCBI.GRCh38::BSgenome.Hsapiens.NCBI.GRCh38 annotate_coding(vr, txdb, asm) }if (interactive()) { vcf = system.file("my_variants.vcf", package="pepitope") vr = readVcfAsVRanges(vcf) txdb = AnnotationHub::AnnotationHub()[["AH100643"]] asm = BSgenome.Hsapiens.NCBI.GRCh38::BSgenome.Hsapiens.NCBI.GRCh38 annotate_coding(vr, txdb, asm) }
Aggregate fusion VCFs into a table
annotate_fusions(vr, txdb, asm)annotate_fusions(vr, txdb, asm)
vr |
A VRanges object with RNA fusions from readVcfAsRanges |
txdb |
A transcription database, eg. AnnotationHub()[["AH100643"]] |
asm |
A Genome sequence package object, eg. ::BSgenome.Hsapiens.NCBI.GRCh38 |
A DataFrame objects with fusions
if (interactive()) { vcf = system.file("my_fusions.vcf", package="pepitope") vr = readVcfAsVRanges(vcf) txdb = AnnotationHub::AnnotationHub()[["AH100643"]] asm = BSgenome.Hsapiens.NCBI.GRCh38::BSgenome.Hsapiens.NCBI.GRCh38 annotate_fusions(vr, txdb, asm) }if (interactive()) { vcf = system.file("my_fusions.vcf", package="pepitope") vr = readVcfAsVRanges(vcf) txdb = AnnotationHub::AnnotationHub()[["AH100643"]] asm = BSgenome.Hsapiens.NCBI.GRCh38::BSgenome.Hsapiens.NCBI.GRCh38 annotate_fusions(vr, txdb, asm) }
Count barcodes directly from source FASTQ files
count_fastq( fq, samples, all_constructs, valid_barcodes, read_structure, verbose = TRUE )count_fastq( fq, samples, all_constructs, valid_barcodes, read_structure, verbose = TRUE )
fq |
Path to one FASTQ file |
samples |
A sample sheet as |
all_constructs |
A named list of all construct libraries |
valid_barcodes |
A character vector of all possible construct barcodes |
read_structure |
A character string describing the FASTQ read structure. If missing, this will be inferred from the first reads in 'fq'. |
verbose |
Whether to print progress messages (default: TRUE) |
A SummarizedExperiment object with counts and metadata
samples = data.frame(sample_id="sample1", patient="pat1", rep="1", origin="library", barcode="GGG") constructs = list(pat1=data.frame(gene_name="GENE1", mut_id="GENE1_A1V", pep_id="GENE1_A1V", pep_type="alt", tiled="ATGGCCGCC", barcode_1="AAAA")) fq = tempfile(fileext=".fq") writeLines(c("@r1", "GGGAAAA", "+", "IIIIIII", "@r2", "GGGAAAA", "+", "IIIIIII"), fq) count_fastq(fq, samples, constructs, read_structure="3B4M", verbose=FALSE)samples = data.frame(sample_id="sample1", patient="pat1", rep="1", origin="library", barcode="GGG") constructs = list(pat1=data.frame(gene_name="GENE1", mut_id="GENE1_A1V", pep_id="GENE1_A1V", pep_type="alt", tiled="ATGGCCGCC", barcode_1="AAAA")) fq = tempfile(fileext=".fq") writeLines(c("@r1", "GGGAAAA", "+", "IIIIIII", "@r2", "GGGAAAA", "+", "IIIIIII"), fq) count_fastq(fq, samples, constructs, read_structure="3B4M", verbose=FALSE)
Filter a fusion VRanges object by number of reads and tools
filter_fusions(vr, min_reads = NULL, min_split_reads = NULL, min_tools = NULL)filter_fusions(vr, min_reads = NULL, min_split_reads = NULL, min_tools = NULL)
vr |
A VRanges object with RNA fusions from readVcfAsRanges |
min_reads |
The minimum number of linked read support for a fusion |
min_split_reads |
The minimum number of split read support for a fusion |
min_tools |
The minimum number of tools that identify a fusion |
A filtered VRanges object
vr = VariantAnnotation::VRanges("chr1", IRanges::IRanges(1, 1), ref="A", alt="T") vr$DV = 3L vr$RV = 2L vr$TOOL_HITS = IRanges::IntegerList(2L) filter_fusions(vr, min_reads=4, min_split_reads=2, min_tools=1)vr = VariantAnnotation::VRanges("chr1", IRanges::IRanges(1, 1), ref="A", alt="T") vr$DV = 3L vr$RV = 2L vr$TOOL_HITS = IRanges::IntegerList(2L) filter_fusions(vr, min_reads=4, min_split_reads=2, min_tools=1)
Make results report to save as xlsx sheets (full, filtered, peptides)
filter_variants( vr, ..., min_cov = 2, min_af = 0.05, pass = TRUE, sample = NULL, chrs = NULL )filter_variants( vr, ..., min_cov = 2, min_af = 0.05, pass = TRUE, sample = NULL, chrs = NULL )
vr |
A VRanges object from 'readVcfAsVRanges' |
... |
Force filters by name (ignored) |
min_cov |
Minimum number of reads to span the ALT allele |
min_af |
Minimum allele frequency of the ALT allele |
pass |
Whether to only include softFilterMatrix PASS |
sample |
Only include if in 'sampleNames(vr)' (required if more than one present) |
chrs |
Either "default" or a character vector of chromosome names |
A filtered VRanges object
vcf = system.file("my_variants.vcf", package="pepitope") vr = readVcfAsVRanges(vcf) filter_variants(vr, min_cov=2, min_af=0.05, pass=TRUE)vcf = system.file("my_variants.vcf", package="pepitope") vr = readVcfAsVRanges(vcf) filter_variants(vr, min_cov=2, min_af=0.05, pass=TRUE)
Make a variants report as named list of tables
make_peptides(subs, fus = DataFrame())make_peptides(subs, fus = DataFrame())
subs |
Variants within mutation context from 'subset_context()' |
fus |
Variants within fusion context from 'subset_context_fusions()' |
A data.frame with cDNA and peptide sequences
if (interactive()) { peptides = make_peptides(subs) }if (interactive()) { peptides = make_peptides(subs) }
Make a variants report as named list of tables
make_report(vars, subs, fus = DataFrame(), tiled)make_report(vars, subs, fus = DataFrame(), tiled)
vars |
Variant results from 'annotate_coding()' |
subs |
Variants within mutation context from 'subset_context()' |
fus |
Variants within fusion context from 'subset_context_fusions()' |
tiled |
A data.frame of the tiled peptide sequences |
A named list of data.frames for report output
if (interactive()) { make_report(vars, subs, fus, tiled) }if (interactive()) { make_report(vars, subs, fus, tiled) }
Tile cDNA into peptide sequences
pep_tile(peptides, tile_size = 93, tile_ov = 45)pep_tile(peptides, tile_size = 93, tile_ov = 45)
peptides |
A 'data.frame' with context-subset peptide/minigene data |
tile_size |
Oligo tiling size |
tile_ov |
Oligo tiling overlap |
A data.frame with tiled nucleotide and peptide sequences
peptides = data.frame(var_id="v1", mut_id="M1_A1V", gene_name="M1", gene_id="gene1", tx_id="tx1", pep_id="M1_A1V", cDNA="ATGGCCGCCGCC") pep_tile(peptides, tile_size=9, tile_ov=3)peptides = data.frame(var_id="v1", mut_id="M1_A1V", gene_name="M1", gene_id="gene1", tx_id="tx1", pep_id="M1_A1V", cDNA="ATGGCCGCCGCC") pep_tile(peptides, tile_size=9, tile_ov=3)
Given a reference genome and VCF file, this package will provide the upstream/downstream peptide context of a variant. It will generate a summary report for protein-coding variants including the reference and mutated allele, read coverage, amino acid sequence, and other information. It can also be used to remove restriction sites from cDNA, alongside other helper functions.
Package documentation for pepitope
Maintainer: Michael Schubert [email protected] (ORCID)
Authors:
Michael Schubert [email protected] (ORCID)
Useful links:
Plot barcode overlap between different samples
plot_barcode_overlap(all_constructs, valid_barcodes)plot_barcode_overlap(all_constructs, valid_barcodes)
all_constructs |
A named list of all constructs |
valid_barcodes |
A character vector of possible barcodes (optional) |
A ggplot2 object
if (interactive()) { constructs = list(pat1=data.frame(gene_name="GENE1", mut_id="GENE1_A1V", pep_id="GENE1_A1V", pep_type="alt", tiled="ATGGCCGCC", barcode_1="AAAA")) plot_barcode_overlap(constructs, valid_barcodes=c("AAAA", "CCCC")) }if (interactive()) { constructs = list(pat1=data.frame(gene_name="GENE1", mut_id="GENE1_A1V", pep_id="GENE1_A1V", pep_type="alt", tiled="ATGGCCGCC", barcode_1="AAAA")) plot_barcode_overlap(constructs, valid_barcodes=c("AAAA", "CCCC")) }
Plot the overall read counts
plot_read_count(dset, n_cutoff = 10) plot_reads(dset)plot_read_count(dset, n_cutoff = 10) plot_reads(dset)
dset |
The 'SummarizedExperiment' object from 'count_fastq' |
n_cutoff |
Minimum reads for counting a barcode as represented |
A patchwork object containing the read count summary plots
if (interactive()) { samples = data.frame(sample_id="sample1", patient="pat1", rep="1", origin="library", barcode="GGG") constructs = list(pat1=data.frame(gene_name="GENE1", mut_id="GENE1_A1V", pep_id="GENE1_A1V", pep_type="alt", tiled="ATGGCCGCC", barcode_1="AAAA")) fq = tempfile(fileext=".fq") writeLines(c("@r1", "GGGAAAA", "+", "IIIIIII", "@r2", "GGGAAAA", "+", "IIIIIII"), fq) dset = count_fastq(fq, samples, constructs, read_structure="3B4M", verbose=FALSE) plot_read_count(dset) }if (interactive()) { samples = data.frame(sample_id="sample1", patient="pat1", rep="1", origin="library", barcode="GGG") constructs = list(pat1=data.frame(gene_name="GENE1", mut_id="GENE1_A1V", pep_id="GENE1_A1V", pep_type="alt", tiled="ATGGCCGCC", barcode_1="AAAA")) fq = tempfile(fileext=".fq") writeLines(c("@r1", "GGGAAAA", "+", "IIIIIII", "@r2", "GGGAAAA", "+", "IIIIIII"), fq) dset = count_fastq(fq, samples, constructs, read_structure="3B4M", verbose=FALSE) plot_read_count(dset) }
Plot the read distribution across barcodes
plot_read_distr(dset) plot_distr(dset)plot_read_distr(dset) plot_distr(dset)
dset |
The 'SummarizedExperiment' object from 'count_fastq' |
A 'ggplot2' object with cumulative read distribution plots
if (interactive()) { samples = data.frame(sample_id="sample1", patient="pat1", rep="1", origin="library", barcode="GGG") constructs = list(pat1=data.frame(gene_name="GENE1", mut_id="GENE1_A1V", pep_id="GENE1_A1V", pep_type="alt", tiled="ATGGCCGCC", barcode_1="AAAA")) fq = tempfile(fileext=".fq") writeLines(c("@r1", "GGGAAAA", "+", "IIIIIII", "@r2", "GGGAAAA", "+", "IIIIIII"), fq) dset = count_fastq(fq, samples, constructs, read_structure="3B4M", verbose=FALSE) plot_read_distr(dset) }if (interactive()) { samples = data.frame(sample_id="sample1", patient="pat1", rep="1", origin="library", barcode="GGG") constructs = list(pat1=data.frame(gene_name="GENE1", mut_id="GENE1_A1V", pep_id="GENE1_A1V", pep_type="alt", tiled="ATGGCCGCC", barcode_1="AAAA")) fq = tempfile(fileext=".fq") writeLines(c("@r1", "GGGAAAA", "+", "IIIIIII", "@r2", "GGGAAAA", "+", "IIIIIII"), fq) dset = count_fastq(fq, samples, constructs, read_structure="3B4M", verbose=FALSE) plot_read_distr(dset) }
Plot annotated read structure examples
plot_read_structure(fq, samples, all_constructs, nrec = 100000L)plot_read_structure(fq, samples, all_constructs, nrec = 100000L)
fq |
Path to one FASTQ file |
samples |
A sample sheet as 'data.frame' in tsv format. Requires the columns 'sample_id', 'patient', 'rep', 'origin', 'barcode' |
all_constructs |
A named list of all construct libraries |
nrec |
Number of FASTQ records to inspect |
A 'ggplot2' object
if (interactive()) { samples = data.frame(sample_id="sample1", patient="pat1", rep="1", origin="library", barcode="GGG") constructs = list(pat1=data.frame(gene_name="GENE1", mut_id="GENE1_A1V", pep_id="GENE1_A1V", pep_type="alt", tiled="ATGGCCGCC", barcode_1="AAAA")) fq = tempfile(fileext=".fq") writeLines(c("@r1", "GGGAAAA", "+", "IIIIIII", "@r2", "GGGAAAA", "+", "IIIIIII"), fq) plot_read_structure(fq, samples, constructs, nrec=10) }if (interactive()) { samples = data.frame(sample_id="sample1", patient="pat1", rep="1", origin="library", barcode="GGG") constructs = list(pat1=data.frame(gene_name="GENE1", mut_id="GENE1_A1V", pep_id="GENE1_A1V", pep_type="alt", tiled="ATGGCCGCC", barcode_1="AAAA")) fq = tempfile(fileext=".fq") writeLines(c("@r1", "GGGAAAA", "+", "IIIIIII", "@r2", "GGGAAAA", "+", "IIIIIII"), fq) plot_read_structure(fq, samples, constructs, nrec=10) }
Plot screen results
plot_screen( res, sample = NULL, links = TRUE, labs = TRUE, min_reads = 10, cap_fc = 8, p_sig = 0.05 )plot_screen( res, sample = NULL, links = TRUE, labs = TRUE, min_reads = 10, cap_fc = 8, p_sig = 0.05 )
res |
A results 'data.frame' from 'screen_calc()' |
sample |
Which library to plot (default: all) |
links |
Whether to draw arrows between ref and significant alt peptides |
labs |
Whether to label genes in less dense areas |
min_reads |
Minimum number of reads to show as points (default: 10) |
cap_fc |
Maximum amount of fold-change to limit values to |
p_sig |
Maximum adjusted p-value to consider significant (default: 0.05) |
A 'ggplot2' object of the differential expression results
if (interactive()) { res = data.frame(baseMean=c(20, 30), log2FoldChange=c(-1, 1), padj=c(0.2, 0.05), gene_name=c("GENE1", "GENE1"), pep_type=c("ref", "alt"), bc_type=c("pat1", "pat1"), mut_id=c("GENE1_A1", "GENE1_A1")) plot_screen(res, links=FALSE, labs=FALSE) }if (interactive()) { res = data.frame(baseMean=c(20, 30), log2FoldChange=c(-1, 1), padj=c(0.2, 0.05), gene_name=c("GENE1", "GENE1"), pep_type=c("ref", "alt"), bc_type=c("pat1", "pat1"), mut_id=c("GENE1_A1", "GENE1_A1")) plot_screen(res, links=FALSE, labs=FALSE) }
Remove a Restriction Enzyme cut site but keep AA in a tiled peptide data.frame
remove_cutsite(pep, ...)remove_cutsite(pep, ...)
pep |
A data.frame of tiled peptides |
... |
Named argumennts of cut sites, e.g. 'BbsI="GAAGAC"' |
A data.frame with replace nucleotides and number of replacements
pep = data.frame(pep_id="p1", tiled="ATGGCCGCC") remove_cutsite(pep, BbsI="GAAGAC")pep = data.frame(pep_id="p1", tiled="ATGGCCGCC") remove_cutsite(pep, BbsI="GAAGAC")
Calculate differential abundance of construct barcodes
screen_calc(dset, comparisons)screen_calc(dset, comparisons)
dset |
A 'SummarizedExperiment' object from 'count_fastq()' |
comparisons |
A character vector of sample and reference condition, or list thereof |
A data.frame of DESeq2 results, or a named list of data.frames
if (interactive()) { screen_calc(dset, c("screen", "library")) }if (interactive()) { screen_calc(dset, c("screen", "library")) }
Subset nucleotide/protein sequences to codon +/- 45 bp context
subset_context(codv, ctx_codons)subset_context(codv, ctx_codons)
codv |
Annotated variants from 'annotate_coding()' |
ctx_codons |
How many flanking codons each to include in the context |
GRanges object with sequence information of only context
if (interactive()) { subset_context(codv, ctx_codons=15) }if (interactive()) { subset_context(codv, ctx_codons=15) }
Subset the peptide context for gene fusions
subset_context_fusion(res, ctx_codons)subset_context_fusion(res, ctx_codons)
res |
A DataFrame object from 'fusions' |
ctx_codons |
How many flanking codons each to include in the context |
A DataFrame object of gene fusions
if (interactive()) { subset_context_fusion(fusions, ctx_codons=15) }if (interactive()) { subset_context_fusion(fusions, ctx_codons=15) }