% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/comparePCASubspace.R,
%   R/plot.comparePCASubspaceObject.R
\name{comparePCASubspace}
\alias{comparePCASubspace}
\alias{plot.comparePCASubspaceObject}
\title{Compare Subspaces Spanned by Top Principal Components}
\usage{
comparePCASubspace(
  reference_data,
  query_data,
  query_cell_type_col,
  ref_cell_type_col,
  pc_subset = 1:5,
  n_top_vars = 50
)

\method{plot}{comparePCASubspaceObject}(x, ...)
}
\arguments{
\item{reference_data}{A \code{\linkS4class{SingleCellExperiment}} object containing numeric expression matrix for the reference cells.}

\item{query_data}{A \code{\linkS4class{SingleCellExperiment}} object containing numeric expression matrix for the query cells.}

\item{query_cell_type_col}{The column name in the \code{colData} of \code{query_data} that identifies the cell types.}

\item{ref_cell_type_col}{The column name in the \code{colData} of \code{reference_data} that identifies the cell types.}

\item{pc_subset}{A numeric vector specifying the subset of principal components (PCs) to compare. Default is the first five PCs.}

\item{n_top_vars}{An integer indicating the number of top loading variables to consider for each PC. Default is 50.}

\item{x}{A numeric matrix output from the \code{comparePCASubspace} function, representing
cosine similarities between query and reference principal components.}

\item{...}{Additional arguments passed to the plotting function.}
}
\value{
A list containing the following components:
  \item{principal_angles_cosines}{A numeric vector of cosine values of principal angles.}
  \item{average_variance_explained}{A numeric vector of average variance explained by each PC.}
  \item{weighted_cosine_similarity}{A numeric value representing the weighted cosine similarity.}

The S3 plot method returns a \code{ggplot} object representing the heatmap of cosine similarities.
}
\description{
This function compares the subspace spanned by the top principal components (PCs) in a reference dataset to that
in a query dataset. It computes the cosine similarity between the loadings of the top variables for each PC in
both datasets and provides a weighted cosine similarity score.

The S3 plot method generates a visualization of the output from the \code{comparePCASubspace} function.
The plot shows the cosine of principal angles between reference and query principal components,
with point sizes representing the variance explained.
}
\details{
This function compares the subspace spanned by the top principal components (PCs) in a reference dataset
to that in a query dataset. It first computes the cosine similarity between the loadings of the top variables
for each PC in both datasets. The top cosine similarity scores are then selected, and their corresponding PC
indices are stored. Additionally, the function calculates the average percentage of variance explained by the
selected top PCs. Finally, it computes a weighted cosine similarity score based on the top cosine similarities
and the average percentage of variance explained.

The S3 plot method converts the input list into a data frame suitable for plotting with \code{ggplot2}.
Each point in the scatter plot represents the cosine of a principal angle, with the size of the point
indicating the average variance explained by the corresponding principal components.
}
\examples{
# Load libraries
library(scran)
library(scater)

# Load data
data("reference_data")
data("query_data")

# Extract CD4 cells
ref_data_subset <- reference_data[, which(reference_data$expert_annotation == "CD4")]
query_data_subset <- query_data[, which(query_data$expert_annotation == "CD4")]

# Selecting highly variable genes (can be customized by the user)
ref_top_genes <- getTopHVGs(ref_data_subset, n = 500)
query_top_genes <- getTopHVGs(query_data_subset, n = 500)

# Intersect the gene symbols to obtain common genes
common_genes <- intersect(ref_top_genes, query_top_genes)
ref_data_subset <- ref_data_subset[common_genes,]
query_data_subset <- query_data_subset[common_genes,]

# Run PCA on datasets separately
ref_data_subset <- runPCA(ref_data_subset)
query_data_subset <- runPCA(query_data_subset)

# Compare PCA subspaces
subspace_comparison <- comparePCASubspace(query_data = query_data_subset,
                                          reference_data = ref_data_subset,
                                          query_cell_type_col = "expert_annotation",
                                          ref_cell_type_col = "expert_annotation",
                                          n_top_vars = 50,
                                          pc_subset = 1:5)

# Plot output for PCA subspace comparison
plot(subspace_comparison)

}
\seealso{
\code{\link{plot.comparePCASubspaceObject}}

\code{\link{comparePCASubspace}}
}
\author{
Anthony Christidis, \email{anthony-alexander_christidis@hms.harvard.edu}
}
\keyword{internal}
