% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/clusterSingle.R
\name{clusterSingle}
\alias{clusterSingle}
\alias{clusterSingle,SummarizedExperiment-method}
\alias{clusterSingle,ClusterExperiment-method}
\alias{clusterSingle,SingleCellExperiment-method}
\alias{clusterSingle,matrixOrHDF5OrNULL-method}
\title{General wrapper method to cluster the data}
\usage{
\S4method{clusterSingle}{SummarizedExperiment}(inputMatrix, ...)

\S4method{clusterSingle}{ClusterExperiment}(inputMatrix, ...)

\S4method{clusterSingle}{SingleCellExperiment}(
  inputMatrix,
  reduceMethod = "none",
  nDims = defaultNDims(inputMatrix, reduceMethod),
  whichAssay = 1,
  ...
)

\S4method{clusterSingle}{matrixOrHDF5OrNULL}(
  inputMatrix,
  inputType = "X",
  subsample = FALSE,
  sequential = FALSE,
  distFunction = NA,
  mainClusterArgs = NULL,
  subsampleArgs = NULL,
  seqArgs = NULL,
  isCount = FALSE,
  transFun = NULL,
  reduceMethod = "none",
  nDims = defaultNDims(inputMatrix, reduceMethod),
  makeMissingDiss = if (ncol(inputMatrix) < 1000) TRUE else FALSE,
  clusterLabel = "clusterSingle",
  saveSubsamplingMatrix = FALSE,
  checkDiss = FALSE,
  warnings = TRUE
)
}
\arguments{
\item{inputMatrix}{numerical matrix on which to run the clustering or a
\code{\link[SummarizedExperiment]{SummarizedExperiment}},
\code{\link{SingleCellExperiment}}, or \code{\link{ClusterExperiment}}
object.}

\item{...}{arguments to be passed on to the method for signature
\code{matrix}.}

\item{reduceMethod}{character A character identifying what type of
dimensionality reduction to perform before clustering. Options are 1)
"none", 2) one of listBuiltInReducedDims() or listBuiltInFitlerStats OR 3)
stored filtering or reducedDim values in the object.}

\item{nDims}{integer An integer identifying how many dimensions to reduce to
in the reduction specified by \code{reduceMethod}. Defaults to output of
\code{\link{defaultNDims}}}

\item{whichAssay}{numeric or character specifying which assay to use. See
\code{\link[SummarizedExperiment]{assay}} for details.}

\item{inputType}{a character vector defining what type of input is given in
the \code{inputMatrix} argument. Must consist of values "diss","X", or
"cat" (see details). "X" and "cat" should be indicate
matrices with features in the row and samples in the column; "cat"
corresponds to the features being numerical integers corresponding to
categories, while "X" are continuous valued features. "diss" corresponds to
an \code{inputMatrix} that is a NxN dissimilarity matrix. "cat" is largely
used internally for clustering of sets of clusterings.}

\item{subsample}{logical as to whether to subsample via
\code{\link{subsampleClustering}}. If TRUE, clustering in mainClustering
step is done on the co-occurance between clusterings in the subsampled
clustering results.  If FALSE, the mainClustering step will be run directly
on \code{x}/\code{diss}}

\item{sequential}{logical whether to use the sequential strategy (see details
of \code{\link{seqCluster}}). Can be used in combination with
\code{subsample=TRUE} or \code{FALSE}.}

\item{distFunction}{a distance function to be applied to \code{inputMatrix}. Only
  relevant if \code{inputType="X"}. 
See details of \code{\link{clusterSingle}} for the
  required format of the distance function.}

\item{mainClusterArgs}{list of arguments to be passed for the mainClustering
step, see help pages of \code{\link{mainClustering}}.}

\item{subsampleArgs}{list of arguments to be passed to the subsampling step
(if \code{subsample=TRUE}), see help pages of
\code{\link{subsampleClustering}}.}

\item{seqArgs}{list of arguments to be passed to \code{\link{seqCluster}}.}

\item{isCount}{if \code{transFun=NULL}, then \code{isCount=TRUE} will
determine the transformation as defined by \code{function(x){log2(x+1)}},
and \code{isCount=FALSE} will give a transformation function
\code{function(x){x}}. Ignored if \code{transFun=NULL}. If object is of
class \code{ClusterExperiment}, the stored transformation will be used and
giving this parameter will result in an error.}

\item{transFun}{a transformation function to be applied to the data. If the
transformation applied to the data creates an error or NA values, then the
function will throw an error. If object is of class
\code{ClusterExperiment}, the stored transformation will be used and giving
this parameter will result in an error.}

\item{makeMissingDiss}{logical. Whether to calculate necessary distance 
matrices needed when input is not "diss". If TRUE, then when a clustering 
function calls for a inputType "diss", but the given matrix is of type "X",
the function will calculate a distance function. A dissimilarity matrix
will also be calculated if a post-processing argument like \code{findBestK}
or \code{removeSil} is chosen, since these rely on calcualting silhouette
widths from distances.}

\item{clusterLabel}{a string used to describe the clustering. By default it
is equal to "clusterSingle", to indicate that this clustering is the result
of a call to \code{clusterSingle}.}

\item{saveSubsamplingMatrix}{logical. If TRUE, the co-clustering matrix
resulting from subsampling is returned in the coClustering slot (and
replaces any existing coClustering object in the slot \code{coClustering}
if input object is a \code{ClusterExperiment} object.)}

\item{checkDiss}{logical. Whether to check whether the dissimilarities
matrices  are valid (whether given by the user or calculated because
\code{makeMissingDiss=TRUE}).}

\item{warnings}{logical. Whether to print out the many possible warnings and
messages regarding checking the internal consistency of the parameters.}
}
\value{
A \code{\link{ClusterExperiment}} object if
  \code{inputType} is of type "X".

If input was not of type "X", then the result is a list with values
  \itemize{ 
       \item{clustering: }{The vector of clustering results}
       \item{clusterInfo: }{A list with information about the parameters run in
  the clustering} 
       \item{coClusterMatrix: }{(only if \code{saveSubsamplingMatrix=TRUE}, NxB set of clusterings obtained after B subsamples.} 
  }
}
\description{
Given input data, this function will find clusters, based on a single
specification of parameters.
}
\details{
\code{clusterSingle} is an 'expert-oriented' function, intended to
  be used when a user wants to run a single clustering and/or have a great
  deal of control over the clustering parameters. Most users will find
  \code{\link{clusterMany}} more relevant. However, \code{\link{clusterMany}}
  makes certain assumptions about the intention of certain combinations of
  parameters that might not match the user's intent; similarly
  \code{\link{clusterMany}} does not directly take a dissimilarity matrix but
  only a matrix of values \code{x} (though a user can define a distance
  function to be applied to \code{x} in \code{\link{clusterMany}}).

Unlike \code{\link{clusterMany}}, most of the relevant arguments for
  the actual clustering algorithms in \code{clusterSingle} are passed to the
  relevant steps via the arguments \code{mainClusterArgs},
  \code{subsampleArgs}, and \code{seqArgs}. These arguments should be
  \emph{named} lists with parameters that match the corresponding functions:
  \code{\link{mainClustering}},\code{\link{subsampleClustering}}, and
  \code{\link{seqCluster}}. These three functions are not meant to be called
  by the user, but rather accessed via calls to \code{clusterSingle}. But the
  user can look at the help files of those functions for more information 
  regarding the parameters that they take.

Only certain combinations of parameters are possible for certain
  choices of \code{sequential} and \code{subsample}. These restrictions are
  documented below. 
  \itemize{ 
      \item{\code{clusterFunction} for
  \code{mainClusterArgs}: }{The choice of \code{subsample=TRUE} also controls
  what algorithm type of clustering functions can be used in the
  mainClustering step. When \code{subsample=TRUE}, then resulting
  co-clustering matrix  from subsampling is converted to a dissimilarity
  (specificaly 1-coclustering values) and is passed to \code{diss} of
  \code{\link{mainClustering}}. For this reason, the \code{ClusterFunction}
  object given to \code{\link{mainClustering}} via the argument
  \code{mainClusterArgs} must take input of the form of a dissimilarity. When
  \code{subsample=FALSE} and \code{sequential=TRUE}, the
  \code{clusterFunction} passed in \code{clusterArgs} element of
  \code{mainClusterArgs} must define a \code{ClusterFunction} object with
  \code{algorithmType} 'K'.  When \code{subsample=FALSE} and
  \code{sequential=FALSE}, then there are no restrictions on the
  \code{ClusterFunction} and that clustering is applied directly to the input
  data. } 
  \item{\code{clusterFunction}  for \code{subsampleArgs}: }{If the
  \code{ClusterFunction} object given to the \code{clusterArgs} of
  \code{subsamplingArgs} is missing the algorithm will use the default for
  \code{\link{subsampleClustering}} (currently "pam"). If
  \code{sequential=TRUE}, this \code{ClusterFunction} object must be of type
  'K'. } 
  \item{Setting \code{k} for subsampling: }{If \code{subsample=TRUE}
  and \code{sequential=TRUE}, the current K of the sequential iteration
  determines the 'k' argument passed to \code{\link{subsampleClustering}}  so
  setting 'k=' in the list given to the subsampleArgs will not do anything
  and will produce a warning to that effect (see documentation of
  \code{\link{seqCluster}}).} 
  \item{Setting \code{k} for mainClustering step:
  }{If \code{sequential=TRUE} then the user should not set \code{k} in the
  \code{clusterArgs} argument of \code{mainClusterArgs} because it must be
  set by the sequential code, which has a iterative reseting of the
  parameters. Specifically if \code{subsample=FALSE}, then the sequential
  method iterates over choices of \code{k} to cluster the input data. And if
  \code{subsample=TRUE}, then the \code{k} in the clustering of
  mainClustering step (assuming the clustering function is of type 'K') will
  use the \code{k} used in the subsampling step to make sure that the
  \code{k} used in the mainClustering step is reasonable. } 
  \item{Setting
  \code{findBestK} in \code{mainClusterArgs}: }{If \code{sequential=TRUE} and
  \code{subsample=FALSE}, the user should not set 'findBestK=TRUE' in
  \code{mainClusterArgs}. This is because in this case the sequential method
  changes \code{k}; an error message will be given if this combination of
  options are set. However, if \code{sequential=TRUE} and
  \code{subsample=TRUE}, then passing either 'findBestK=TRUE' or
  'findBestK=FALSE' via \code{mainClusterArgs} will function as expected
  (assuming the \code{clusterFunction} argument passed to
  \code{mainClusterArgs} is of type 'K'). In particular, the sequential step
  will set the number of clusters \code{k} for clustering of each subsample.
  If findBestK=FALSE, that same \code{k} will be used for mainClustering step
  that clusters the resulting co-occurance matrix after subsampling. If
  findBestK=TRUE, then \code{\link{mainClustering}} will search for best k.
  Note that the default 'kRange' over which \code{\link{mainClustering}}
  searches when findBestK=TRUE depends on the input value of \code{k} which
  is set by the sequential method if \code{sequential=TRUE}), see above. The
  user can change \code{kRange} to not depend on \code{k} and to be fixed
  across all of the sequential steps by setting \code{kRange} explicitly in
  the \code{mainClusterArgs} list.} }

To provide a distance matrix via the argument \code{distFunction},
  the function must be defined to take the distance of the rows of a matrix
  (internally, the function will call \code{distFunction(t(x))}. This is to
  be compatible with the input for the \code{dist} function. \code{as.matrix}
  will be performed on the output of \code{distFunction}, so if the object
  returned has a \code{as.matrix} method that will convert the output into a
  symmetric matrix of distances, this is fine (for example the class
  \code{dist} for objects returned by \code{dist} have such a method). If
  \code{distFunction=NA}, then a default distance will be calculated based on
  the type of clustering algorithm of \code{clusterFunction}. For type "K"
  the default is to take \code{dist} as the distance function. For type "01",
  the default is to take the (1-cor(x))/2.
}
\examples{
data(simData)

\dontrun{
#following code takes some time.
#use clusterSingle to do sequential clustering
#(same as example in seqCluster only using clusterSingle ...)
set.seed(44261)
clustSeqHier_v2 <- clusterSingle(simData,
     sequential=TRUE, subsample=TRUE, 
     subsampleArgs=list(resamp.n=100, samp.p=0.7,
     clusterFunction="kmeans", clusterArgs=list(nstart=10)),
     seqArgs=list(beta=0.8, k0=5), mainClusterArgs=list(minSize=5,
     clusterFunction="hierarchical01",clusterArgs=list(alpha=0.1)))
}

#use clusterSingle to do just clustering k=3 with no subsampling
clustObject <- clusterSingle(simData,
    subsample=FALSE, sequential=FALSE,
    mainClusterArgs=list(clusterFunction="pam", clusterArgs=list(k=3)))
#compare to standard pam
pamOut<-cluster::pam(t(simData),k=3,cluster.only=TRUE)
all(pamOut==primaryCluster(clustObject))
}
\seealso{
\code{\link{clusterMany}} to compare multiple choices of parameters,
  and \code{\link{mainClustering}},\code{\link{subsampleClustering}}, and
  \code{\link{seqCluster}} for the underlying functions called by
  \code{clusterSingle}.
}
