% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/getPrevalence.R
\name{getPrevalence}
\alias{getPrevalence}
\alias{getPrevalence,ANY-method}
\alias{getPrevalence,SummarizedExperiment-method}
\alias{getPrevalentFeatures}
\alias{getPrevalentTaxa}
\alias{getPrevalentFeatures,ANY-method}
\alias{getRarePrevalentTaxa}
\alias{getPrevalentFeatures,SummarizedExperiment-method}
\alias{getPrevalentTaxa,ANY-method}
\alias{getRareFeatures}
\alias{getRareTaxa}
\alias{getRareFeatures,ANY-method}
\alias{getRareFeatures,SummarizedExperiment-method}
\alias{getRareTaxa,ANY-method}
\alias{subsetByPrevalentFeatures}
\alias{subsetByPrevalentTaxa}
\alias{subsetByPrevalentFeatures,SummarizedExperiment-method}
\alias{subsetByPrevalentTaxa,ANY-method}
\alias{subsetByRareFeatures}
\alias{subsetByRareTaxa}
\alias{subsetByRareFeatures,SummarizedExperiment-method}
\alias{subsetByRareTaxa,ANY-method}
\alias{getPrevalentAbundance}
\alias{getPrevalentAbundance,ANY-method}
\alias{getPrevalentAbundance,SummarizedExperiment-method}
\title{Calculation prevalence information for features across samples}
\usage{
getPrevalence(x, ...)

\S4method{getPrevalence}{ANY}(
  x,
  detection = 0,
  include_lowest = FALSE,
  sort = FALSE,
  na.rm = TRUE,
  ...
)

\S4method{getPrevalence}{SummarizedExperiment}(
  x,
  assay.type = assay_name,
  assay_name = "counts",
  as_relative = FALSE,
  rank = NULL,
  ...
)

getPrevalentFeatures(x, ...)

\S4method{getPrevalentFeatures}{ANY}(x, prevalence = 50/100, include_lowest = FALSE, ...)

\S4method{getPrevalentFeatures}{SummarizedExperiment}(
  x,
  rank = NULL,
  prevalence = 50/100,
  include_lowest = FALSE,
  ...
)

getPrevalentTaxa(x, ...)

\S4method{getPrevalentTaxa}{ANY}(x, ...)

getRareFeatures(x, ...)

\S4method{getRareFeatures}{ANY}(x, prevalence = 50/100, include_lowest = FALSE, ...)

\S4method{getRareFeatures}{SummarizedExperiment}(
  x,
  rank = NULL,
  prevalence = 50/100,
  include_lowest = FALSE,
  ...
)

getRareTaxa(x, ...)

\S4method{getRareTaxa}{ANY}(x, ...)

subsetByPrevalentFeatures(x, ...)

\S4method{subsetByPrevalentFeatures}{SummarizedExperiment}(x, rank = NULL, ...)

subsetByPrevalentTaxa(x, ...)

\S4method{subsetByPrevalentTaxa}{ANY}(x, ...)

subsetByRareFeatures(x, ...)

\S4method{subsetByRareFeatures}{SummarizedExperiment}(x, rank = NULL, ...)

subsetByRareTaxa(x, ...)

\S4method{subsetByRareTaxa}{ANY}(x, ...)

getPrevalentAbundance(
  x,
  assay.type = assay_name,
  assay_name = "relabundance",
  ...
)

\S4method{getPrevalentAbundance}{ANY}(
  x,
  assay.type = assay_name,
  assay_name = "relabundance",
  ...
)

\S4method{getPrevalentAbundance}{SummarizedExperiment}(x, assay.type = assay_name, assay_name = "counts", ...)
}
\arguments{
\item{x}{a
\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}
object}

\item{...}{additional arguments
\itemize{
\item{If \code{!is.null(rank)} arguments are passed on to
\code{\link[=agglomerate-methods]{agglomerateByRank}}. See
\code{\link[=agglomerate-methods]{?agglomerateByRank}} for more details.
Note that you can specify whether to remove empty ranks with
\code{agg.na.rm} instead of \code{na.rm}. (default: \code{FALSE})
}
\item{for \code{getPrevalentFeatures}, \code{getRareFeatures},
\code{subsetByPrevalentFeatures} and \code{subsetByRareFeatures} additional
parameters passed to \code{getPrevalence}}
\item{for \code{getPrevalentAbundance} additional parameters passed to
\code{getPrevalentFeatures}}
}}

\item{detection}{Detection threshold for absence/presence. Either an
absolute value compared directly to the values of \code{x} or a relative
value between 0 and 1, if \code{as_relative = FALSE}.}

\item{include_lowest}{logical scalar: Should the lower boundary of the
detection and prevalence cutoffs be included? (default: \code{FALSE})}

\item{sort}{logical scalar: Should the result be sorted by prevalence?
(default: \code{FALSE})}

\item{na.rm}{logical scalar: Should NA values be omitted when calculating
prevalence? (default: \code{na.rm = TRUE})}

\item{assay.type}{A single character value for selecting the
\code{\link[SummarizedExperiment:SummarizedExperiment-class]{assay}}
to use for prevalence calculation.}

\item{assay_name}{a single \code{character} value for specifying which
assay to use for calculation.
(Please use \code{assay.type} instead. At some point \code{assay_name}
will be disabled.)}

\item{as_relative}{logical scalar: Should the detection threshold be applied
on compositional (relative) abundances? (default: \code{FALSE})}

\item{rank}{a single character defining a taxonomic rank. Must be a value of
\code{taxonomyRanks()} function.}

\item{prevalence}{Prevalence threshold (in 0 to 1). The
required prevalence is strictly greater by default. To include the
limit, set \code{include_lowest} to \code{TRUE}.}
}
\value{
\code{subsetPrevalentFeatures} and \code{subsetRareFeatures} return subset of \code{x}.

All other functions return a named vectors:
\itemize{
\item{\code{getPrevalence} returns a \code{numeric} vector with the
names being set to either the row names of \code{x} or the names after
agglomeration.}
\item{\code{getPrevalentAbundance} returns a \code{numeric} vector with
the names corresponding to the column name of \code{x} and include the
joint abundance of prevalent taxa.}
\item{\code{getPrevalentTaxa} and \code{getRareFeatures} return a
\code{character} vector with only the names exceeding the threshold set
by \code{prevalence}, if the \code{rownames} of \code{x} is set.
Otherwise an \code{integer} vector is returned matching the rows in
\code{x}.}
}
}
\description{
These functions calculate the population prevalence for taxonomic ranks in a
\code{\link{SummarizedExperiment-class}} object.
}
\details{
\code{getPrevalence} calculates the relative frequency of samples that exceed
the detection threshold. For \code{SummarizedExperiment} objects, the
prevalence is calculated for the selected taxonomic rank, otherwise for the
rows. The absolute population prevalence can be obtained by multiplying the
prevalence by the number of samples (\code{ncol(x)}). If \code{as_relative =
FALSE} the relative frequency (between 0 and 1) is used to check against the
\code{detection} threshold.

The core abundance index from \code{getPrevalentAbundance} gives the relative
proportion of the core species (in between 0 and 1). The core taxa are
defined as those that exceed the given population prevalence threshold at the
given detection level as set for \code{getPrevalentFeatures}.

\code{subsetPrevalentFeatures} and \code{subsetRareFeatures} return a subset of \code{x}.
The subset includes the most prevalent or rare taxa that are calculated with
\code{getPrevalentFeatures} or \code{getRareFeatures} respectively.

\code{getPrevalentFeatures} returns taxa that are more prevalent with the
given detection threshold for the selected taxonomic rank.

\code{getRareFeatures} returns complement of \code{getPrevalentTaxa}.
}
\examples{
data(GlobalPatterns)
tse <- GlobalPatterns
# Get prevalence estimates for individual ASV/OTU
prevalence.frequency <- getPrevalence(tse,
                                      detection = 0,
                                      sort = TRUE,
                                      as_relative = TRUE)
head(prevalence.frequency)

# Get prevalence estimates for phylums
# - the getPrevalence function itself always returns population frequencies
prevalence.frequency <- getPrevalence(tse,
                                      rank = "Phylum",
                                      detection = 0,
                                      sort = TRUE,
                                      as_relative = TRUE)
head(prevalence.frequency)

# - to obtain population counts, multiply frequencies with the sample size,
# which answers the question "In how many samples is this phylum detectable"
prevalence.count <- prevalence.frequency * ncol(tse)
head(prevalence.count)

# Detection threshold 1 (strictly greater by default);
# Note that the data (GlobalPatterns) is here in absolute counts
# (and not compositional, relative abundances)
# Prevalence threshold 50 percent (strictly greater by default)
prevalent <- getPrevalentFeatures(tse,
                              rank = "Phylum",
                              detection = 10,
                              prevalence = 50/100,
                              as_relative = FALSE)
head(prevalent)

# Gets a subset of object that includes prevalent taxa
altExp(tse, "prevalent") <- subsetByPrevalentFeatures(tse,
                                       rank = "Family",
                                       detection = 0.001,
                                       prevalence = 0.55,
                                       as_relative = TRUE)
altExp(tse, "prevalent")                                 

# getRareFeatures returns the inverse
rare <- getRareFeatures(tse,
                    rank = "Phylum",
                    detection = 1/100,
                    prevalence = 50/100,
                    as_relative = TRUE)
head(rare)

# Gets a subset of object that includes rare taxa
altExp(tse, "rare") <- subsetByRareFeatures(tse,
                             rank = "Class",
                             detection = 0.001,
                             prevalence = 0.001,
                             as_relative = TRUE)
altExp(tse, "rare")      

# Names of both experiments, prevalent and rare, can be found from slot altExpNames
tse
                         
data(esophagus)
getPrevalentAbundance(esophagus, assay.type = "counts")

}
\references{
A Salonen et al. The adult intestinal core microbiota is determined by
analysis depth and health status. Clinical Microbiology and Infection
18(S4):16 20, 2012.
To cite the R package, see citation('mia')
}
\seealso{
\code{\link[=agglomerate-methods]{agglomerateByRank}},
\code{\link[=getTopTaxa]{getTopTaxa}}
}
\author{
Leo Lahti
For \code{getPrevalentAbundance}: Leo Lahti and Tuomas Borman.
Contact: \url{microbiome.github.io}
}
