% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/geneset_similarity.R
\name{geneset_similarity}
\alias{geneset_similarity}
\title{Plot Signature Similarity via Jaccard Index or Fisher's Odds Ratio}
\usage{
geneset_similarity(
  signatures,
  other_user_signatures = NULL,
  collection = NULL,
  subcollection = NULL,
  metric = c("jaccard", "odds_ratio"),
  universe = NULL,
  or_threshold = 1,
  pval_threshold = 0.05,
  limits = NULL,
  title_size = 12,
  color = "#B44141",
  neutral_color = "white",
  cold_color = "#4173B4",
  title = NULL,
  jaccard_threshold = 0,
  msig_subset = NULL,
  width_text = 20,
  na_color = "grey90"
)
}
\arguments{
\item{signatures}{A named list of character vectors representing reference
gene signatures.}

\item{other_user_signatures}{Optional. A named list of character vectors
representing other user-defined signatures to compare against.}

\item{collection}{Optional. MSigDB collection name (e.g., \code{"H"} for hallmark,
\code{"C2"} for curated gene sets). Use msigdbr::msigdbr_collections() for the
available options.}

\item{subcollection}{Optional. Subcategory within an MSigDB collection (e.g.,
\code{"CP:REACTOME"}). Use msigdbr::msigdbr_collections() for the available
options.}

\item{metric}{Character. Either "jaccard" or "odds_ratio".}

\item{universe}{Character vector. Background gene universe. Required for odds
ratio.}

\item{or_threshold}{(only if method == "odds_ratio" only) Numeric. Minimum
Odds Ratio required for a gene set to be included in the plot. Default is
1.}

\item{pval_threshold}{(only if method == "odds_ratio" only) Numeric. Maximum
adjusted p-value required for a gene set to be included in the plot.
Default is 0.05.}

\item{limits}{Numeric vector of length 2. Limits for color scale. If \code{NULL},
is automatically set to c(0,1) for Jaccard or the range of OR for odds
ratio.}

\item{title_size}{Integer specifying the font size for the plot title.
Default is \code{12}.}

\item{color}{Character. The color for the maximum of the scale. Default is
\code{red.}
\itemize{
\item If \code{method = "jaccard"}, the scale goes from \code{neutral_color} to \code{color}.
\item If \code{method = "odds_ratio"} and any OR >= 1, the scale ends at \code{color}.
\item If \code{method = "odds_ratio"} and all OR <= 1, \code{color} is not used; instead, the scale
runs from \code{cold_color} (minimum) to \code{neutral_color} (OR = 1, if present;
otherwise \code{neutral_color} is the maximum).
}}

\item{neutral_color}{Character. The neutral reference color. Default is
\code{white}.
\itemize{
\item If \code{method = "jaccard"}, this is the minimum of the scale.
\item If \code{method = "odds_ratio"} and any OR >= 1, this corresponds to OR = 1 if such values exist; otherwise it is the minimum of the scale.
\item If \code{method = "odds_ratio"} and all OR <= 1, this corresponds to OR = 1 if such values exist; otherwise it is the maximum of the scale (with \code{cold_color} as the minimum).
}}

\item{cold_color}{Character. The color for values below OR = 1 (only used
when \code{method = "odds_ratio"}). Default is \code{blue}.
\itemize{
\item If \code{method = "odds_ratio"} and any OR < 1, the scale runs from \code{cold_color}
(minimum) to \code{neutral_color} (OR = 1 if present; otherwise \code{neutral_color}
is the maximum).
\item Ignored if \code{method = "jaccard"} or if all OR >= 1.
}}

\item{title}{Optional. Custom title for the plot. If \code{NULL}, the title
defaults to \code{"Signature Overlap"}.}

\item{jaccard_threshold}{(only if method == "jaccard" only) Numeric. Minimum
Jaccard index required for a gene set to be included in the plot. Default
is \code{0}.}

\item{msig_subset}{Optional. Character vector of MSigDB gene set names to
subset from the specified collection. Useful to restrict analysis to a
specific set of pathways. If supplied, other filters will apply only to
this subset. Use "collection = "all" to mix gene sets from different
collections.}

\item{width_text}{Integer. Character wrap width for labels.}

\item{na_color}{Character. Color for NA values in the heatmap. Default is
\code{"grey90"}.}
}
\value{
Invisibly returns a list containing:
\describe{
\item{\code{plot}}{The \pkg{ggplot2} object of the similarity heatmap.}
\item{\code{data}}{The data frame object containing the similarity
scores per pair of gene sets.}
}
}
\description{
Visualizes similarity between user-defined gene signatures and either other
user-defined signatures or MSigDB gene sets, using either the Jaccard index
or Fisher's Odds Ratio. Produces a heatmap of pairwise similarity metrics.
}
\examples{
# Create two simple gene signatures
sig1 <- c("TP53", "BRCA1", "MYC", "EGFR", "CDK2")
sig2 <- c("ATXN2", "FUS", "MTOR", "CASP3")
signatures <- list(SignatureA = sig1, SignatureB = sig2)

# Compare the signatures using the Jaccard index
plt <- geneset_similarity(
  signatures = signatures,
  metric = "jaccard",
  collection = "H",
  jaccard_threshold = 0.01
)

# Print the plot (will show a small heatmap)
print(plt)


# Odds ratio example (requires universe)
gene_universe <- unique(c(
  sig1, sig2,
  msigdbr::msigdbr(species = "Homo sapiens", category = "C2")$gene_symbol
))

plt_or <- geneset_similarity(
  signatures = signatures,
  metric = "odds_ratio",
  universe = gene_universe,
  collection = "H"
)
print(plt_or)

}
