% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AllGenerics.R, R/kmeans.R
\name{mbkmeans}
\alias{mbkmeans}
\alias{mbkmeans,SummarizedExperiment-method}
\alias{mbkmeans,SingleCellExperiment-method}
\alias{mbkmeans,LinearEmbeddingMatrix-method}
\alias{mbkmeans,ANY-method}
\title{Mini-Batch k-means for large single cell sequencing data}
\usage{
mbkmeans(x, ...)

\S4method{mbkmeans}{SummarizedExperiment}(x, whichAssay = 1, ...)

\S4method{mbkmeans}{SingleCellExperiment}(x, reduceMethod = "PCA", whichAssay = 1, ...)

\S4method{mbkmeans}{LinearEmbeddingMatrix}(x, ...)

\S4method{mbkmeans}{ANY}(
  x,
  clusters,
  batch_size = min(500, NCOL(x)),
  max_iters = 100,
  num_init = 1,
  init_fraction = batch_size/NCOL(x),
  initializer = "kmeans++",
  compute_labels = TRUE,
  calc_wcss = FALSE,
  early_stop_iter = 10,
  verbose = FALSE,
  CENTROIDS = NULL,
  tol = 1e-04,
  BPPARAM = BiocParallel::SerialParam(),
  ...
)
}
\arguments{
\item{x}{The object on which to run mini-batch k-means. It can be a
matrix-like object (e.g., matrix, Matrix, DelayedMatrix, HDF5Matrix) with
genes in the rows and samples in the columns. Specialized methods are
defined for SummarizedExperiment and SingleCellExperiment.}

\item{...}{passed to `blockApply`.}

\item{whichAssay}{The assay to use as input to mini-batch k-means. If x is a
SingleCellExperiment, this is ignored unless \code{reduceMethod = NA}.}

\item{reduceMethod}{Name of dimensionality reduction results to use as input
to mini-batch k-means. Set to NA to use the full matrix.}

\item{clusters}{the number of clusters}

\item{batch_size}{the size of the mini batches. By default, it equals the
minimum between the number of observations and 500.}

\item{max_iters}{the maximum number of clustering iterations}

\item{num_init}{number of times the algorithm will be run with different
centroid seeds}

\item{init_fraction}{proportion of data to use for the initialization
centroids (applies if initializer is \emph{kmeans++} ). Should be a float
number between 0.0 and 1.0. By default, it uses the relative batch size.}

\item{initializer}{the method of initialization. One of \emph{kmeans++} and
\emph{random}. See details for more information}

\item{compute_labels}{logcical indicating whether to compute the final cluster
labels.}

\item{calc_wcss}{logical indicating whether the per-cluster WCSS
is computed. Ignored if `compute_labels = FALSE`.}

\item{early_stop_iter}{continue that many iterations after calculation of the
best within-cluster-sum-of-squared-error}

\item{verbose}{either TRUE or FALSE, indicating whether progress is printed
during clustering}

\item{CENTROIDS}{a matrix of initial cluster centroids. The rows of the
CENTROIDS matrix should be equal to the number of clusters and the columns
should be equal to the columns of the data}

\item{tol}{a float number. If, in case of an iteration (iteration > 1 and
iteration < max_iters) 'tol' is greater than the squared norm of the
centroids, then kmeans has converged}

\item{BPPARAM}{See the `BiocParallel` package. Only the label assignment is
done in parallel.}
}
\value{
A list with the following attributes: centroids, WCSS_per_cluster,
  best_initialization, iters_per_initialization.

a list with the following attributes: centroids, WCSS_per_cluster,
 best_initialization, iters_per_initialization
}
\description{
This is an implementation of the mini-batch k-means algorithm of
  Sculley (2010) for large single cell sequencing data with the
  dimensionality reduction results as input in the reducedDim() slot.
}
\details{
The implementation is largely based on the
  \code{\link[ClusterR]{MiniBatchKmeans}} function of the \code{ClusterR}
  package. The contribution of this package is to provide support for on-disk
  data representations such as HDF5, through the use of \code{DelayedMatrix}
  and \code{HDF5Matrix} objects, as well as for sparse data representation
  through the classes of the \code{Matrix} package. We also provide
  high-level methods for objects of class \code{SummarizedExperiment},
  \code{SingleCellExperiment}, and \code{LinearEmbeddingMatrix}.

This function performs k-means clustering using mini batches.

\strong{kmeans++}: kmeans++ initialization. Reference :
http://theory.stanford.edu/~sergei/papers/kMeansPP-soda.pdf AND
http://stackoverflow.com/questions/5466323/how-exactly-does-k-means-work

\strong{random}: random selection of data rows as initial centroids
}
\examples{
library(SummarizedExperiment)
se <- SummarizedExperiment(matrix(rnorm(100), ncol=10))
mbkmeans(se, clusters = 2)
library(SingleCellExperiment)
sce <- SingleCellExperiment(matrix(rnorm(100), ncol=10))
mbkmeans(sce, clusters = 2, reduceMethod = NA)
x<-matrix(rnorm(100), ncol=10)
mbkmeans(x,clusters = 3)

}
\references{
Sculley. Web-Scale K-Means Clustering. WWW 2010, April 26–30,
  2010, Raleigh, North Carolina, USA. ACM 978-1-60558-799-8/10/04.

https://github.com/mlampros/ClusterR
}
\author{
Lampros Mouselimis and Yuwei Ni
}
