% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/filterSignature.R
\name{filterSignature}
\alias{filterSignature}
\title{Filter the L1000 Signature
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}}}
\usage{
filterSignature(signature, direction = "any", threshold = NULL, prop = NULL)
}
\arguments{
\item{signature}{A data.frame, tibble, or DataFrame containing the L1000 signature.
Must contain a column named "Value_LogDiffExp" with log fold-change values.}

\item{direction}{Character string specifying the direction to filter.
Must be one of "up" (up-regulated genes only), "down" (down-regulated genes only),
or "any" (both up- and down-regulated genes). Defaults to "any".}

\item{threshold}{Numeric value or vector specifying the log fold-change threshold(s).
Can be:
* A single positive value: Creates symmetric thresholds (\eqn{\pm threshold})
* A vector of two values: First value is the down-regulated threshold,
second value is the up-regulated threshold
Cannot be specified together with \code{prop}. One of \code{threshold} or
\code{prop} must be provided.}

\item{prop}{Numeric value between 0 and 1 specifying the proportion of genes
to select from the top and bottom of the expression distribution. For example,
\code{prop = 0.1} selects the top 10\% most up-regulated and bottom 10\%
most down-regulated genes. Cannot be specified together with \code{threshold}.}
}
\value{
A tibble containing the filtered L1000 signature with the same structure
as the input but containing only genes that meet the filtering criteria.
}
\description{
This function filters the L1000 signature to a given threshold, identifying
up-regulated, down-regulated, or both up- and down-regulated genes. The
function supports both absolute threshold filtering and proportional filtering
based on quantiles of the expression data.
}
\details{
The filtering process follows these steps:
\enumerate{
\item Input validation: Checks data frame structure and parameter consistency
\item Threshold calculation: Computes filtering thresholds based on either
absolute values (\code{threshold}) or quantiles (\code{prop})
\item Direction-based filtering: Applies the computed thresholds according
to the specified direction
}

When using \code{threshold}:
\itemize{
\item Single value: Genes with |logFC| >= threshold are retained
\item Two values: Genes with logFC <= \code{threshold[1]} OR logFC >= \code{threshold[2]}
}

When using \code{prop}:
\itemize{
\item Thresholds are calculated as quantiles of the expression distribution
\item Down threshold = quantile(logFC, prop)
\item Up threshold = quantile(logFC, 1 - prop)
}
}
\examples{
# Create a mock signature for demonstration
mockSignature <- data.frame(
    signatureID = rep("MOCK001", 20),
    Name_GeneSymbol = paste0("GENE", 1:20),
    ID_geneid = 1:20,
    Value_LogDiffExp = c(
        -3.5, -2.8, -2.1, -1.5, -1.2, -0.8, -0.5, -0.3,
        -0.1, 0.1, 0.3, 0.6, 0.9, 1.2, 1.6, 2.0, 2.4, 2.9, 3.3, 3.8
    )
)

# Example 1: Filter by symmetric absolute threshold
# Keeps genes with |logFC| >= 1.5
filteredSymmetric <- filterSignature(mockSignature, threshold = 1.5)
nrow(filteredSymmetric) # Should return 8 genes

# Example 2: Filter by asymmetric absolute thresholds
# Keeps genes with logFC <= -2.0 OR logFC >= 2.5
filteredAsymmetric <- filterSignature(mockSignature, threshold = c(-2.0, 2.5))
nrow(filteredAsymmetric) # Should return 5 genes

# Example 3: Filter by proportion (top and bottom 20\%)
filteredProportion <- filterSignature(mockSignature, prop = 0.2)
nrow(filteredProportion) # Should return 8 genes (4 up + 4 down)

# Example 4: Filter only up-regulated genes by threshold
upRegulated <- filterSignature(mockSignature, direction = "up", threshold = 1.0)
all(upRegulated$Value_LogDiffExp >= 1.0) # Should be TRUE

# Example 5: Filter only down-regulated genes by threshold
downRegulated <- filterSignature(mockSignature, direction = "down", threshold = 1.0)
all(downRegulated$Value_LogDiffExp <= -1.0) # Should be TRUE

# Network-dependent examples using real iLINCS data
# Get the L1000 signature for LINCSKD_28
kdSignature <- getSignature("LINCSKD_28")

# Filter for top 5\% most extreme genes
topExtreme <- filterSignature(kdSignature, prop = 0.05)

# Get top 20\% most up-regulated genes
topUpregulated <- filterSignature(kdSignature, direction = "up", prop = 0.2)
}
\seealso{
\verb{\link{getSignature}} for retrieving L1000 signatures from iLINCS,
\verb{\link{prepareSignature}} for preparing custom signatures,
\verb{\link{getConcordants}} for finding concordant signatures
}
