% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cms.R
\name{cms}
\alias{cms}
\title{cms}
\usage{
cms(
  sce,
  k,
  group,
  dim_red = "PCA",
  assay_name = "logcounts",
  res_name = NULL,
  k_min = NA,
  smooth = TRUE,
  n_dim = 20,
  cell_min = 10,
  batch_min = NULL,
  unbalanced = FALSE,
  BPPARAM = SerialParam()
)
}
\arguments{
\item{sce}{A \code{SingleCellExperiment} object with the combined data.}

\item{k}{Numeric. Number of k-nearest neighbours (knn) to use.}

\item{group}{Character. Name of group/batch variable.
Needs to be one of \code{names(colData(sce))}}

\item{dim_red}{Character. Name of embeddings to use as subspace for distance
distributions. Default is "PCA".}

\item{assay_name}{Character. Name of the assay to use for PCA.
Only relevant if no existing 'dim_red' is provided.
Must be one of \code{names(assays(sce))}. Default is "logcounts".}

\item{res_name}{Character. Appendix of the result score's name
(e.g. method used to combine batches).}

\item{k_min}{Numeric. Minimum number of knn to include.
Default is NA (see Details).}

\item{smooth}{Logical. Indicating if cms results should be smoothened within
each neighbourhood using the weigthed mean.}

\item{n_dim}{Numeric. Number of dimensions to include to define the subspace.}

\item{cell_min}{Numeric. Minimum number of cells from each group to be
included into the AD test.}

\item{batch_min}{Numeric. Minimum number of cells per batch to include in to
the AD test. If set neighbours will be included until batch_min cells from
each batch are present.}

\item{unbalanced}{Boolean. If True neighbourhoods with only one batch present
will be set to NA. This way they are not included into any summaries or
smoothening.}

\item{BPPARAM}{A \linkS4class{BiocParallelParam} object specifying whether
cms scores shall be calculated in parallel.}
}
\value{
A \code{SingleCellExperiment} with cms (and cms_smooth) within
colData.
}
\description{
Calculates cell-specific mixing scores based on euclidean distances within a
subspace of integrated data.
}
\details{
The cms function tests the hypothesis, that group-specific distance
distributions of knn cells have the same underlying unspecified distribution.
It performs Anderson-Darling tests as implemented in the
\code{kSamples package}.
In default the function uses all distances and group label defined in knn.
Alternative a density based neighbourhood can be defined by specifying
\code{k_min}. In this case the first local minimum of the overall distance
distribution with at least k_min cells is used. This can be used to adapt to
the local structure of the datatset e.g. prevent cells from a
different cluster to be included. Third the neighbourhood can be defined by
batch occurences. \code{batch_min} specifies the minimal number of cells from
 each batch that should be included to define the neighbourhood.
 If 'dim_red' is not defined or default cms will calculate a PCA using
 \code{runPCA}. Results will be appended to \code{colData(sce)}.
 Names can be specified using \code{res_name}.
If multiple cores are available cms scores can be calculated in parallel
(does not work on Windows). Parallelization can be specified using BPPARAM.
}
\examples{
library(SingleCellExperiment)
sim_list <- readRDS(system.file("extdata/sim50.rds", package = "CellMixS"))
sce <- sim_list[[1]][, c(1:50)]

sce_cms <- cms(sce, k = 20, group = "batch", n_dim = 2)

}
\references{
Scholz, F. W. and Stephens, M. A. (1987).
K-Sample Anderson-Darling Tests.
J. Am. Stat. Assoc.
}
\seealso{
\code{\link{.cmsCell}}, \code{\link{.smoothCms}}.
}
\concept{cms functions}
