% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/features-prediction.R
\name{predictTxFeaturesPerSample}
\alias{predictTxFeaturesPerSample}
\title{Identification of splice junctions and exons from BAM file}
\usage{
predictTxFeaturesPerSample(file_bam, which, paired_end, read_length,
  frag_length, lib_size, min_junction_count, alpha, psi, beta, gamma,
  min_anchor, include_counts, retain_coverage, junctions_only, max_complexity,
  sample_name, verbose, cores)
}
\arguments{
\item{file_bam}{BAM file with genomic RNA-seq read alignments}

\item{which}{\code{GRanges} of genomic regions to be considered for
feature prediction, passed to \code{ScanBamParam}}

\item{paired_end}{Logical, \code{TRUE} for paired-end data,
\code{FALSE} for single-end data}

\item{read_length}{Read length required for use with \code{alpha}}

\item{frag_length}{Fragment length for paired-end data required
for use with \code{alpha}}

\item{lib_size}{Number of aligned fragments required for use with
\code{alpha}}

\item{min_junction_count}{Minimum fragment count required for a splice
junction to be included. If specified, argument \code{alpha} is ignored.}

\item{alpha}{Minimum FPKM required for a splice junction to be
included. Internally, FPKMs are converted to counts, requiring arguments
\code{read_length}, \code{frag_length} and \code{lib_size}.
\code{alpha} is ignored if argument \code{min_junction_count}
is specified.}

\item{psi}{Minimum splice frequency required for a splice junction
to be included}

\item{beta}{Minimum relative coverage required for an internal exon
to be included}

\item{gamma}{Minimum relative coverage required for a terminal exon
to be included}

\item{min_anchor}{Integer specifiying minimum anchor length}

\item{include_counts}{Logical indicating whether counts of
compatible fragments should be included in metadata column
\dQuote{N}}

\item{retain_coverage}{Logical indicating whether coverage for each
exon should be retained as an \code{RleList} in metadata
column \dQuote{coverage}. This allows filtering of features
using more stringent criteria after the initial prediction.}

\item{junctions_only}{Logical indicating whether predictions
should be limited to identification of splice junctions only}

\item{max_complexity}{Maximum allowed complexity. If a locus exceeds
this threshold, it is skipped, resulting in a warning.
Complexity is defined as the maximum number of unique predicted
splice junctions overlapping a given position.
High complexity regions are often due to spurious read alignments
and can slow down processing. To disable this filter, set to \code{NA}.}

\item{sample_name}{Sample name used in messages}

\item{verbose}{If \code{TRUE}, generate messages indicating progress}

\item{cores}{Number of cores available for parallel processing}
}
\value{
\code{TxFeatures} object
}
\description{
Splice junctions and exons are predicted from genomic RNA-seq read
alignments in BAM format.
}
\details{
For spliced alignments, the direction of transcription is inferred from
the XS tag in the BAM file and used to assign strand information to
the read, or fragment for paired-end data.

Feature prediction is performed in two steps. First, splice junctions
are identified from spliced alignments. Second, exons
are identified based on regions that are flanked by splice
junctions and show sufficient coverage with compatible reads.

Splice junctions implied by read alignments are filtered based on
fragment count and splice frequency. The splice frequency at the
splice donor (acceptor) is defined as x_J/x_D (x_J/x_A), where
x_J is the number of fragments containing the splice junction, and
x_D (x_A) is the number of fragments overlapping the exon/intron
(intron/exon) boundary. Fragments overlapping the spliced boundary
can be either spliced or extend into the intron. To be included in
predicted features, splice junctions must have fragment count at
least \code{min_junction_count} or FPKM at least \code{alpha}, and
splice frequency at both donor and acceptor at least \code{psi}.

Regions between any pair of identified splice junctions with sufficient
compatible read coverage are considered candidate internal exons.
Read coverage for a candidate exon is computed based on compatible
fragments, i.e. fragments with matching (or missing) strand information
and introns consistent with the exon under consideration.
Candidate exons are included in predicted features if the minimum
coverage is at least \code{beta} * number of junction-containing
fragments for either flanking junctions.

Terminal exons are regions downstream or upstream of splice junctions
with compatible fragment coverage at least \code{gamma} * number of
junction-containing fragments.
}
\author{
Leonard Goldstein
}
\keyword{internal}
