% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/scPCA.R
\name{scPCA}
\alias{scPCA}
\title{Sparse Contrastive Principal Component Analysis}
\usage{
scPCA(
  target,
  background,
  center = TRUE,
  scale = FALSE,
  n_eigen = 2,
  cv = NULL,
  alg = c("iterative", "var_proj", "rand_var_proj"),
  contrasts = exp(seq(log(0.1), log(1000), length.out = 40)),
  penalties = seq(0.05, 1, length.out = 20),
  clust_method = c("kmeans", "pam", "hclust"),
  n_centers = NULL,
  max_iter = 10,
  linkage_method = "complete",
  n_medoids = 8,
  parallel = FALSE,
  clusters = NULL,
  eigdecomp_tol = 1e-10,
  eigdecomp_iter = 1000,
  scaled_matrix = FALSE
)
}
\arguments{
\item{target}{The target (experimental) data set, in a standard format such
as a \code{data.frame} or \code{matrix}. \code{dgCMatrix} and
\code{DelayedMatrix} objects are also supported.}

\item{background}{The background data set, in a standard format such as a
\code{data.frame} or \code{matrix}. The features must match the features of
the target data set. \code{dgCMatrix} and \code{DelayedMatrix} objects are
also supported.}

\item{center}{A \code{logical} indicating whether the target and background
data sets' features should be centered to mean zero.}

\item{scale}{A \code{logical} indicating whether the target and background
data sets' features should be scaled to unit variance.}

\item{n_eigen}{A \code{numeric} indicating the number of eigenvectors (or
(sparse) contrastive components) to be computed. Two eigenvectors are
computed by default.}

\item{cv}{A \code{numeric} indicating the number of cross-validation folds
to use in choosing the optimal contrastive and penalization parameters from
over the grids of \code{contrasts} and \code{penalties}. Cross-validation
is expected to improve the robustness and generalization of the choice of
these parameters. However, it increases the time the procedure costs.
The default is therefore \code{NULL}, corresponding to no cross-validation.}

\item{alg}{A \code{character} indicating the sparse PCA algorithm used to
sparsify the contrastive loadings. Currently supports \code{iterative} for
the \insertCite{zou2006sparse;textual}{scPCA} implementation, \code{var_proj}
for the non-randomized \insertCite{erichson2018sparse;textual}{scPCA}
solution, and \code{rand_var_proj} for the randomized
\insertCite{erichson2018sparse;textual}{scPCA} implementation. Defaults to
\code{iterative}.}

\item{contrasts}{A \code{numeric} vector of the contrastive parameters. Each
element must be a unique, non-negative real number. By default, 40
logarithmically spaced values between 0.1 and 1000 are used. If a single
value is provided and \code{penalties} is set to 0, then \code{n_centers},
\code{clust_method}, \code{max_iter}, \code{linkage_method},
\code{n_medoids}, and \code{parallel} can be safely ignored.}

\item{penalties}{A \code{numeric} vector of the L1 penalty terms on the
loadings. The default is to use 20 equidistant values between 0.05 and 1.
If \code{penalties} is set to 0, then cPCA is performed in place of scPCA.
See \code{contrasts} and \code{n_centers} arguments for more infotmation.}

\item{clust_method}{A \code{character} specifying the clustering method to
use for choosing the optimal contrastive parameter. Currently, this is
limited to either k-means, partitioning around medoids (PAM), and
hierarchical clustering. The default is k-means clustering.}

\item{n_centers}{A \code{numeric} giving the number of centers to use in the
clustering algorithm. If set to 1, cPCA, as first proposed by
\insertCite{abid2018exploring;textual}{scPCA}, is performed, regardless of
what the \code{penalties} argument is set to.}

\item{max_iter}{A \code{numeric} giving the maximum number of iterations to
be used in k-means clustering. Defaults to 10.}

\item{linkage_method}{A \code{character} specifying the agglomerative
linkage method to be used if \code{clust_method = "hclust"}. The options
are \code{ward.D2}, \code{single}, \code{complete}, \code{average},
\code{mcquitty}, \code{median}, and \code{centroid}. The default is
\code{complete}.}

\item{n_medoids}{A \code{numeric} indicating the number of medoids to
consider if \code{n_centers} is set to 1 and \code{contrasts} is a vector of
length 2 or more. The default is 8 medoids.}

\item{parallel}{A \code{logical} indicating whether to invoke parallel
processing via the \pkg{BiocParallel} infrastructure. The default is
\code{FALSE} for sequential evaluation.}

\item{clusters}{A \code{numeric} vector of cluster labels for observations in
the \code{target} data. Defaults to \code{NULL}, but is otherwise used to
identify the optimal set of hyperparameters when fitting the scPCA and the
automated version of cPCA. If a vector is provided, the
\code{n_centers}, \code{clust_method}, \code{max_iter},
\code{linkage_method}, and \code{n_medoids} arguments can be safely ignored.}

\item{eigdecomp_tol}{A \code{numeric} providing the level of precision used by
eigendecompositon calculations. Defaults to \code{1e-10}.}

\item{eigdecomp_iter}{A \code{numeric} indicating the maximum number of
interations performed by eigendecompositon calculations. Defaults to
\code{1000}.}

\item{scaled_matrix}{A \code{logical} indicating whether to output a
\code{\link[ScaledMatrix]{ScaledMatrix}} object. The centering and scaling
procedure is delayed until later, permitting more efficient matrix
multiplication and row or column sums downstream. However, this comes at the
at the cost of numerical precision. Defaults to \code{FALSE}.}
}
\value{
A list containing the following components:
  \itemize{
    \item \code{rotation}: The matrix of variable loadings if \code{n_centers}
      is larger than one. Otherwise, a list of rotation matrices is returned,
      one for each medoid. The number of medoids is specified by
      \code{n_medoids}.
    \item \code{x}: The rotated data, centred and scaled if requested,
      multiplied by the rotation matrix if \code{n_centers} is larger than
      one. Otherwise, a list of rotated data matrices is returned, one for
      each medoid. The number of medoids is specified by \code{n_medoids}.
    \item contrast: The optimal contrastive parameter.
    \item penalty: The optimal L1 penalty term.
    \item center: A logical indicating whether the target dataset was centered.
    \item scale: A logical indicating whether the target dataset was scaled.
  }
}
\description{
Given target and background data frames or matrices,
 \code{scPCA} will perform the sparse contrastive principal component
 analysis (scPCA) of the target data for a given number of eigenvectors, a
 vector of real-valued contrast parameters, and a vector of sparsity inducing
 penalty terms.

 If instead you wish to perform contrastive principal component analysis
 (cPCA), set the \code{penalties} argument to \code{0}. So long as the
 \code{n_centers} parameter is larger than one, the automated hyperparameter
 tuning heuristic described in \insertCite{boileau2020;textual}{scPCA} is
 used. Otherwise, the semi-automated approach of
 \insertCite{abid2018exploring;textual}{scPCA} is used to select the
 appropriate hyperparameter.
}
\examples{
# perform cPCA on the simulated data set
scPCA(
  target = toy_df[, 1:30],
  background = background_df,
  contrasts = exp(seq(log(0.1), log(100), length.out = 5)),
  penalties = 0,
  n_centers = 4
)

# perform scPCA on the simulated data set
scPCA(
  target = toy_df[, 1:30],
  background = background_df,
  contrasts = exp(seq(log(0.1), log(100), length.out = 5)),
  penalties = seq(0.1, 1, length.out = 3),
  n_centers = 4
)

# perform cPCA on the simulated data set with known clusters
scPCA(
  target = toy_df[, 1:30],
  background = background_df,
  contrasts = exp(seq(log(0.1), log(100), length.out = 5)),
  penalties = 0,
  clusters = toy_df[, 31]
)

# cPCA as implemented in Abid et al.
scPCA(
  target = toy_df[, 1:30],
  background = background_df,
  contrasts = exp(seq(log(0.1), log(100), length.out = 10)),
  penalties = 0,
  n_centers = 1
)
}
\references{
\insertAllCited{}
}
