% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mergeClusters.R
\name{mergeClusters}
\alias{mergeClusters}
\alias{mergeClusters,matrixOrHDF5-method}
\alias{mergeClusters,ClusterExperiment-method}
\alias{nodeMergeInfo,ClusterExperiment-method}
\alias{nodeMergeInfo}
\alias{mergeCutoff,ClusterExperiment-method}
\alias{mergeCutoff}
\alias{mergeMethod,ClusterExperiment-method}
\alias{mergeMethod}
\alias{mergeClusterIndex,ClusterExperiment-method}
\alias{mergeClusterIndex}
\alias{eraseMergeInfo,ClusterExperiment-method}
\alias{eraseMergeInfo}
\alias{getMergeCorrespond,ClusterExperiment-method}
\alias{getMergeCorrespond}
\title{Merge clusters based on dendrogram}
\usage{
\S4method{mergeClusters}{matrixOrHDF5}(
  x,
  cl,
  dendro = NULL,
  mergeMethod = c("none", "Storey", "PC", "adjP", "locfdr", "JC"),
  plotInfo = "none",
  nodePropTable = NULL,
  calculateAll = TRUE,
  showWarnings = FALSE,
  cutoff = 0.05,
  plot = TRUE,
  DEMethod,
  logFCcutoff = 0,
  weights = NULL,
  ...
)

\S4method{mergeClusters}{ClusterExperiment}(
  x,
  eraseOld = FALSE,
  mergeMethod = "none",
  plotInfo = "all",
  clusterLabel = "mergeClusters",
  leafType = c("samples", "clusters"),
  plotType = c("colorblock", "name", "ids"),
  plot = TRUE,
  whichAssay = 1,
  forceCalculate = FALSE,
  weights = if ("weights" \%in\% assayNames(x)) "weights" else NULL,
  DEMethod,
  ...
)

\S4method{nodeMergeInfo}{ClusterExperiment}(x)

\S4method{mergeCutoff}{ClusterExperiment}(x)

\S4method{mergeMethod}{ClusterExperiment}(x)

\S4method{mergeClusterIndex}{ClusterExperiment}(x)

\S4method{eraseMergeInfo}{ClusterExperiment}(x)

\S4method{getMergeCorrespond}{ClusterExperiment}(x, by = c("merge", "original"))
}
\arguments{
\item{x}{data to perform the test on. It can be a matrix or a 
\code{\link{ClusterExperiment}}.}

\item{cl}{A numeric vector with cluster assignments to compare to. ``-1'' 
indicates the sample was not assigned to a cluster.}

\item{dendro}{dendrogram providing hierarchical clustering of clusters in cl.
If x is a matrix, then the default is \code{dendro=NULL} and the function 
will calculate the dendrogram with the given (x, cl) pair using 
\code{\link{makeDendrogram}}. If x is a \code{\link{ClusterExperiment}} 
object, the dendrogram in the slot \code{dendro_clusters} will be used. In 
this case, this means that \code{\link{makeDendrogram}} needs to be called 
before \code{mergeClusters}.}

\item{mergeMethod}{method for calculating proportion of non-null that will be
used to merge clusters (if 'none', no merging will be done). See details 
for description of methods.}

\item{plotInfo}{what type of information about the merging will be shown on 
the dendrogram. If 'all', then all the estimates of proportion non-null 
will be plotted at each node of the dendrogram; if 'mergeMethod', then only
the value used in the \code{mergeClusters} command is plotted at each node.
If 'none', then no proportions will be added to the dendrogram, though the 
dendrogram will be drawn. 'plotInfo' can also be one of the valid input to 
\code{mergeMethod} (even if that method is not the method chosen in 
\code{mergeMethod} argument). \code{plotInfo} can also show the information
corresponding  to "adjP" with a fold-change cutoff, by giving a value to 
this argument in  the form of "adjP_2.0", for example.}

\item{nodePropTable}{Only for matrix version. Matrix of results from previous
run of \code{mergeClusters} as returned by matrix version of 
\code{mergeClusters}. Useful if just want to change the cutoff. Not 
generally intended for user but used internally by package.}

\item{calculateAll}{logical. Whether to calculate the estimates for all 
methods. This reduces computation costs for any future calls to 
\code{mergeClusters} since the results can be passed to future calls of 
\code{mergeClusters} (and for \code{ClusterExperiment} objects this is done
automatically).}

\item{showWarnings}{logical. Whether to show warnings given by the methods. 
The 'locfdr' method in particular frequently spits out warnings (which may 
indicate that its estimates are not reliable). Setting 
\code{showWarnings=FALSE} will suppress all warnings from all methods (not 
just "locfdr"). By default this is set to \code{showWarnings=FALSE} by 
default to avoid large number of warnings being produced by "locfdr", but 
users may want to be more careful to check the warnings for themselves.}

\item{cutoff}{minimimum value required for NOT merging a cluster, i.e. two 
clusters with the proportion of DE below cutoff will be merged. Must be a 
value between 0, 1, where lower values will make it harder to merge 
clusters.}

\item{plot}{logical as to whether to plot the dendrogram with the merge 
results}

\item{DEMethod}{character vector describing how the differential expression 
 analysis should be performed that will be used in the estimation of the
percentage DE per node. See \link{getBestFeatures} for current options. 
 See details.}

\item{logFCcutoff}{Relevant only if the \code{mergeMethod} selected is 
"adjP", in which case the calculation of the proportion of individual tests
significant will also require that the estimated log-fold change of the 
features to be at least this large in absolute value. Value will be rounded
to nearest tenth of an integer via \code{round(logFCcutoff,digits=1)}. For 
any other method, this parameter is ignored. Note that the logFC is based
on \code{log2} (the results of \code{\link{getBestFeatures}})}

\item{weights}{weights to use in by edgeR. If \code{x} is a matrix, then
weights should be a matrix of weights, of the same dimensions as \code{x}.
If \code{x} is a \code{ClusterExperiment} object \code{weights} can be a
either a matrix, as previously described, or a character or numeric index
to an assay in \code{x} that contains the weights. We recommend that 
weights be stored as an assay with name \code{"weights"} so that the
weights will also be used with \code{\link{mergeClusters}}, and this is the
default. Setting \code{weights=NULL} ensures that weights will NOT be used,
and only the standard edgeR.}

\item{...}{for signature \code{matrix}, arguments passed to the 
\code{\link{plot.phylo}} function of \code{ape} that plots the dendrogram. 
For signature \code{ClusterExperiment} arguments passed to the method for 
signature \code{matrix} and then if do not match those arguments, will be 
passed onto \code{\link{plot.phylo}}.}

\item{eraseOld}{logical. Only relevant if input \code{x} is of class
\code{ClusterExperiment}. If TRUE, will erase existing workflow results
(clusterMany as well as mergeClusters and makeConsensus). If FALSE, existing
workflow results will have "\code{_i}" added to the clusterTypes value,
where \code{i} is one more than the largest such existing workflow
clusterTypes.}

\item{clusterLabel}{a string used to describe the type of clustering. By
default it is equal to "mergeClusters", to indicate that this clustering is
the result of a call to mergeClusters (only if x is a ClusterExperiment object)}

\item{leafType}{if plotting, whether the leaves should be the clusters or the
samples. Choosing 'samples' allows for visualization of how many samples
are in the merged clusters (only if x is a ClusterExperiment object), which
is the main difference between choosing "clusters" and "samples",
particularly if \code{plotType="colorblock"}}

\item{plotType}{if plotting, then whether leaves of dendrogram should be
labeled by rectangular blocks of color ("colorblock")  or with the names of
the leaves ("name") (only if x is a ClusterExperiment object).}

\item{whichAssay}{numeric or character specifying which assay to use. See
\code{\link[SummarizedExperiment]{assay}} for details.}

\item{forceCalculate}{This forces the function to erase previously saved
merge results and recalculate the merging.}

\item{by}{indicates whether output from \code{getMergeCorrespond} should be
a vector/list with elements corresponding to merge cluster ids or elements
corresponding to the original clustering ids. See return value for details.}
}
\value{
If `x` is a matrix, it returns (invisibly) a list with elements
\itemize{ 
\item{\code{clustering}}{ a vector of length equal to ncol(x) giving
the integer-valued cluster ids for each sample. "-1" indicates the sample was
not clustered.} 
\item{\code{oldClToNew}}{ A table of the old cluster labels to
the new cluster labels.} 
\item{\code{nodeProp}}{ A table of the proportions
that are DE on each node.This table is saved in the \code{merge_nodeProp} slot
of a \code{ClusterExperiment} object and can be accessed along with the
nodeMerge info with the \code{nodeMergeInfo} function. 
}
\item{\code{nodeMerge}}{ 
A table of indicating for each node whether merged or
not and the cluster id in the new clustering that corresponds to the node.
Note that a node can be merged and not correspond to a node in the new
clustering, if its ancestor node is also merged. But there must be some node
that corresponds to a new cluster id if merging has been done. This table is
saved in the \code{merge_nodeMerge} slot of a \code{ClusterExperiment} object
and can be accessed along with the nodeProp info with the \code{nodeMergeInfo}
function. 
} 
\item{\code{updatedClusterDendro}}{ The dendrogram on which the
merging was based (based on the original clustering).
} 
\item{\code{cutoff}}{
The cutoff value for merging.
} 
}

If `x` is a \code{\link{ClusterExperiment}}, it returns a new
  \code{ClusterExperiment} object with an additional clustering based on the
  merging. This becomes the new primary clustering. Note that even if
  \code{mergeMethod="none"}, the returned object will erase any old merge
  information, update the work flow numbering, and return the newly calculated 
  merge information.

\code{nodeMergeInfo} returns information collected about the nodes
  during merging as a data.frame with the following entries:
\itemize{ \item{\code{Node}}{ Name of the node}
\item{\code{Contrast}}{The
contrast compared at each node, in terms of the cluster ids}
\item{\code{isMerged}}{ Logical as to whether samples from that node which were
merged into one cluster during merging}
\item{\code{mergeClusterId}}{ If a
node corresponds to a new, merged cluster, gives the cluster id it
corresponds to. Otherwise NA}
\item{\code{...}}{The remaining columns give
the estimated proportion of genes differentially expressed for each method. A
column of NAs means that the method in question hasn't been calculated yet.}
}

\code{mergeCutoff} returns the cutoff used for the current merging.

\code{mergeMethod} returns the method used for the current merge.

\code{mergeClusterIndex} returns the index of the clustering used for the current merge.

\code{eraseMergeInfo} returns object with all previously saved merge info removed.

\code{getMergeCorrespond} returns the correspondence between the
  merged cluster and its originating cluster. If \code{by="original"} returns
  a named vector, where the names of the vector are the cluster ids of the
  originating cluster and the values of the vector are the cluster ids of the
  merged cluster. If \code{by="merge"} the results returned are organized by
  the merged clusters. This will generally be a list, with the names of the
  list equal to the clusterIds of the merge clusters and the entries the
  clusterIds of the originating clusters. However, if there was no merging
  done (so that the clusters are identical) the output will be a vector like
  with \code{by="original"}.
}
\description{
Takes an input of hierarchical clusterings of clusters and
  returns estimates of number of proportion of non-null and merges those
  below a certain cutoff.
}
\details{
\strong{Estimation of proportion non-null} "Storey" refers to the
  method of Storey (2002). "PC" refers to the method of Pounds and Cheng
  (2004). "JC" refers to the method of Ji and Cai (2007), and implementation
  of "JC" method is copied from code available on Jiashin Ji's website,
  December 16, 2015
  (http://www.stat.cmu.edu/~jiashun/Research/software/NullandProp/). "locfdr"
  refers to the method of Efron (2004) and is implemented in the package
  \code{\link{locfdr}}.  "adjP"
  refers to the proportion of genes that are found significant based on a FDR
  adjusted p-values (method "BH") and a cutoff of 0.05. Previous versions offered the method "MB", a method of Meinshausen and Buhlmann
  (2005), but the package  \code{howmany} is no longer supported for its implementation.

\strong{Control of Plotting} If \code{mergeMethod} is not equal to
  'none' then the plotting will indicate where the clusters will be merged by
  making dotted lines of edges that are merged together (assuming
  \code{plotInfo} is not 'none'). \code{plotInfo} controls simultaneously
  what information will be plotted on the nodes as well as whether the dotted
  lines will be shown for the merged cluster. Notice that the choice of
  \code{plotInfo} (as long as it is not 'none') has no effect on how the
  dotted edges are drawn -- they are always drawn based on the
  \code{mergeMethod}. If you choose \code{plotInfo} to not be equal to the
  \code{mergeMethod}, then you will have a confusing picture where the dotted
  edges will be based on the clustering created by \code{mergeMethod} while
  the information on the nodes is based on a different method. Note that you
  can override \code{plotInfo} by setting \code{show.node.label=FALSE}
  (passed to plot.phylo), so that no information is plotted on the nodes, but
  the dotted edges are still drawn. If you just want plot of the dendrogram,
  with no merging performed nor demonstrated on the plot, see
  \code{\link{plotDendrogram}}.

\strong{Saving and Reusing of results} By default, the function
  saves the results in the \code{ClusterExperiment} object and will not
  recalculate them if not needed. Note that by default
  \code{calculateAll=TRUE}, which means that regardless of the value of
  \code{mergeMethod}, all the methods will be calculated so that those
  results will be stored and if you change the mergeMethod, no additional
  calculations are needed. Since the computationally intensive step is the
  running the DE method on the genes, this is a big savings (all of the
  methods then calculate the proportion from those results). However, note
  that if \code{calculateAll=TRUE} and ANY of the methods returned NA for any
  value, the calculation will be redone. Thus if, for example, the
  \code{locfdr} function does not run successfully and returns NA, the
  function will always recalculate each time, even if you don't specifically
  want the results of \code{locfdr}. In this case, it makes sense to turn
  \code{calculateAll=FALSE}.

If the dendrogram was made with option
  \code{unassignedSamples="cluster"} (i.e. unassigned were clustered in with
  other samples), then you cannot choose the option
  \code{leafType='samples'}. This is because the current code cannot reliably
  link up the internal nodes of the sample dendrogram to the internal nodes
  of the cluster dendrogram when the unassigned samples are intermixed.

When the input is a \code{ClusterExperiment} object, the function
    attempts to update the merge information in that object. This is done by
    checking that the existing dendrogram stored in the object  (and run on
    the clustering stored in the slot \code{dendro_index}) is the same
    clustering that is stored in the slot \code{merge_dendrocluster_index}.
    For this reason, new calls to \code{\link{makeDendrogram}} will erase the merge
    information saved in the object.

If \code{mergeClusters} is run with \code{mergeMethod="none"}, the
  function may still calculate the proportions per node if \code{plotInfo} is
  not equal to "none" or \code{calculateAll=TRUE}. If the input object was a
  \code{ClusterExperiment} object, the resulting information will be still
  saved, though no new clustering was created; if there was not an existing
  merge method, the slot \code{merge_dendrocluster_index} will be updated.
}
\examples{
data(simData)

#create a clustering, for 8 clusters (truth was 3)
cl<-clusterSingle(simData, subsample=FALSE,
sequential=FALSE, mainClusterArgs=list(clusterFunction="pam", clusterArgs=list(k=8)))

#give more interesting names to clusters:
newNames<- paste("Cluster",clusterLegend(cl)[[1]][,"name"],sep="")
clusterLegend(cl)[[1]][,"name"]<-newNames
#make dendrogram
cl <- makeDendrogram(cl)

#plot showing the before and after clustering
#(Note argument 'use.edge.length' can improve
#readability)
merged <- mergeClusters(cl, plotInfo="all",
mergeMethod="adjP", use.edge.length=FALSE, DEMethod="limma")

#Simpler plot with just dendrogram and single method
merged <- mergeClusters(cl, plotInfo="mergeMethod",
mergeMethod="adjP", use.edge.length=FALSE, DEMethod="limma",
leafType="clusters",plotType="name")

#compare merged to original
tableClusters(merged,whichClusters=c("mergeClusters","clusterSingle"))

}
\references{
Ji and Cai (2007), "Estimating the Null and the Proportion
of Nonnull Effects in Large-Scale Multiple Comparisons", JASA 102: 495-906.

Efron (2004) "Large-scale simultaneous hypothesis testing:
the choice of a null hypothesis," JASA, 99: 96-104.

Meinshausen and Buhlmann (2005) "Lower bounds for the
number of false null hypotheses for multiple testing of associations",
Biometrika 92(4): 893-907.

Storey (2002) "A direct approach to false discovery rates", J. R.
  Statist. Soc. B 64 (3)": 479-498.

Pounds and Cheng (2004). "Improving false discovery rate
  estimation." Bioinformatics 20(11): 1737-1745.
}
\seealso{
makeDendrogram, plotDendrogram, getBestFeatures
}
