% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.R
\name{helpers}
\alias{helpers}
\alias{update_block}
\alias{get_measure_index}
\alias{get_train_val_sets}
\alias{quantile_computation}
\alias{evaluate_quantile_combinations}
\alias{execute_parallel_cv}
\alias{performance_measures}
\alias{compute_final_measures}
\alias{select_optimal_PLS}
\alias{execute_sequential_cv}
\alias{initialize_results}
\alias{permute_Y_matrix}
\alias{compute_permutation_stats}
\alias{select_samples}
\alias{fit_permuted_model}
\alias{evaluate_performance}
\alias{compute_pvalue}
\alias{compute_IC95}
\alias{jackknife_CIP_GIP}
\alias{subsampling_CIP_GIP}
\alias{generate_null_distributions}
\alias{permute_X_matrix}
\alias{calculate_pvalues}
\alias{perform_cv}
\alias{compute_validation_metrics}
\alias{detect_gene_type}
\alias{retrieve_one2one_orthologs}
\alias{FCtoExpression}
\alias{center_scale}
\alias{get_indices}
\alias{deflate_prediction}
\title{Update block of predictor matrices in matrixToBlock()}
\usage{
update_block(
  celltype,
  observed_gene_sets,
  block_predictor = block_predictor,
  matrix = matrix
)

get_measure_index(measure)

get_train_val_sets(X.matrix, Y.matrix, validation_index)

quantile_computation(
  j,
  ...,
  results_CV_summary_n,
  F_matrix_validation_bind,
  X.matrix,
  Y.matrix,
  PLS_term = 1,
  X.dim,
  quantile.comb.table,
  outcome.type = c("binary", "multiclass"),
  quantile_table_CV,
  K,
  n_quantile_comb,
  Method = NULL,
  measure = "B_accuracy",
  expected.measure.increase = 0.005,
  center = TRUE,
  scale = TRUE,
  maxiter = 100
)

evaluate_quantile_combinations(
  j,
  results_CV_summary_n,
  F_matrix_validation_bind,
  E_matrix_training,
  F_matrix_training,
  E_matrix_validation,
  F_matrix_validation,
  quantile_table_CV,
  i,
  X.dim,
  quantile.comb.table,
  outcome.type,
  center,
  scale,
  maxiter,
  Method
)

execute_parallel_cv(
  K,
  results_CV_summary_n,
  F_matrix_validation_bind,
  X.matrix,
  Y.matrix,
  PLS_term,
  X.dim,
  quantile.comb.table,
  outcome.type,
  quantile_table_CV,
  Method,
  measure,
  expected.measure.increase,
  center,
  scale,
  maxiter,
  BPPARAM = BiocParallel::bpparam()
)

performance_measures(
  n_quantile_comb,
  results_CV_summary_n,
  F_matrix_validation_bind,
  outcome.type,
  measure_selected
)

compute_final_measures(
  K,
  X.matrix,
  Y.matrix,
  i,
  X.dim,
  quantile_table_CV,
  outcome.type,
  center,
  scale,
  maxiter,
  Method
)

select_optimal_PLS(
  PLS_term,
  quantile_table_CV,
  X.dim,
  measure_selected,
  expected.measure.increase
)

execute_sequential_cv(
  K,
  n_quantile_comb,
  results_CV_summary_n,
  F_matrix_validation_bind,
  X.matrix,
  Y.matrix,
  PLS_term,
  X.dim,
  quantile.comb.table,
  outcome.type,
  quantile_table_CV,
  measure,
  expected.measure.increase,
  center,
  scale,
  maxiter,
  Method
)

initialize_results(npermut, q)

permute_Y_matrix(Y.matrix, nr, nbObsPermut, j)

compute_permutation_stats(res, Y.matrix, Ypermut, j, q, nr)

select_samples(object, nr, Nc)

fit_permuted_model(object, X_train, Y_train, maxiter)

evaluate_performance(
  res,
  Modelpermut,
  X_train,
  X_val,
  Y.matrix,
  s,
  measure,
  j,
  nr,
  Method,
  object
)

compute_pvalue(null_errors, CV_error)

compute_IC95(m)

jackknife_CIP_GIP(object, X.matrix, Y.matrix, K, maxiter, X.dim)

subsampling_CIP_GIP(
  object,
  X.matrix,
  Y.matrix,
  K,
  M,
  nsubsampling,
  maxiter,
  X.dim
)

generate_null_distributions(
  object,
  X.matrix,
  Y.matrix,
  npermut,
  K,
  X.dim,
  maxiter
)

permute_X_matrix(X.matrix, K, X.dim)

calculate_pvalues(variability, null_dist, test_func, ...)

perform_cv(
  object,
  model_block_matrices,
  nFC,
  measure,
  parallel,
  expected_measure_increase,
  maxiter,
  Method
)

compute_validation_metrics(
  output,
  optimal_hyperparameters,
  model_block_matrices,
  npermut,
  nbObsPermut,
  maxiter,
  global_significance_full,
  CIP.GIP_significance_full,
  type,
  nsubsampling,
  measure,
  Method
)

detect_gene_type(gene_set, mart)

retrieve_one2one_orthologs(
  annotation,
  gene_set,
  mart,
  from_species,
  to_species
)

FCtoExpression(model_object, b, samples, predictor_block, FC)

center_scale(data, fit_asmb)

get_indices(j, X.dim)

deflate_prediction(data, PLS, delta_cbind, fit_asmb)
}
\arguments{
\item{celltype}{Cell types modelled}

\item{observed_gene_sets}{Gene sets observed from your dataset}

\item{block_predictor}{Block of predictor matrices to update}

\item{matrix}{To iteratively update with block_predictor values}

\item{measure}{The accuracy measure used for validation. Default is "F1".}

\item{X.matrix}{Predictor matrix.}

\item{Y.matrix}{Response matrix.}

\item{validation_index}{Index of the validation sample.}

\item{j}{Block to return indices for}

\item{...}{Other parameters of \code{test_func}}

\item{results_CV_summary_n}{Passed from \link{asmbPLSDA.cv.loo}}

\item{F_matrix_validation_bind}{Passed from \link{asmbPLSDA.cv.loo}}

\item{PLS_term}{Passed from \link{asmbPLSDA.cv.loo}}

\item{X.dim}{Vector with number of genes of each block}

\item{quantile.comb.table}{Passed from \link{asmbPLSDA.cv.loo}}

\item{outcome.type}{Passed from \link{asmbPLSDA.cv.loo}}

\item{quantile_table_CV}{Passed from \link{asmbPLSDA.cv.loo}}

\item{K}{Number of samples.}

\item{n_quantile_comb}{Passed from \link{asmbPLSDA.cv.loo}}

\item{Method}{The decision rule for prediction (e.g., "fixed_cutoff",
"Euclidean_distance_X", etc.).}

\item{expected.measure.increase}{Passed from \link{asmbPLSDA.cv.loo}}

\item{center}{Passed from \link{asmbPLSDA.cv.loo}}

\item{scale}{Passed from \link{asmbPLSDA.cv.loo}}

\item{maxiter}{The maximum number of iterations for validation tests.
Default is 100.}

\item{E_matrix_training}{Training predictor matrix.}

\item{F_matrix_training}{Training response matrix.}

\item{E_matrix_validation}{Validation predictor matrix.}

\item{F_matrix_validation}{Validation response matrix}

\item{i}{Passed from \link{asmbPLSDA.cv.loo}}

\item{BPPARAM}{A \code{BiocParallel::bpparam()} with parallelization options}

\item{measure_selected}{Passed from \link{asmbPLSDA.cv.loo}}

\item{npermut}{The number of permutations for significance testing.}

\item{q}{Number of classes.}

\item{nr}{Number of samples}

\item{nbObsPermut}{The number of samples to permute in each permutation.
Default is \code{NULL}.}

\item{res}{List of results to store statistics}

\item{Ypermut}{Permuted response matrix.}

\item{object}{A superpathway input list containing the data to be used
for the cross-validation.}

\item{Nc}{Number of samples to drop at each permutation.}

\item{X_train}{Training predictor blocks}

\item{Y_train}{Training response matrix.}

\item{Modelpermut}{Permuted asmbPLSDA model}

\item{X_val}{Validation predictor blocks}

\item{s}{Validation samples}

\item{null_errors}{A vector of errors from the null distribution
(permuted errors).}

\item{CV_error}{The observed cross-validation error.}

\item{m}{A vector of errors from the null distribution (permuted errors).}

\item{M}{Number of classes.}

\item{nsubsampling}{The number of subsamples for CIP/GIP testing. Default is
100.}

\item{variability}{A list of CIP or GIP values for observed distributions.}

\item{null_dist}{A list of CIP or GIP values for null distributions.}

\item{test_func}{The test function to use (typically Wilcoxon).}

\item{model_block_matrices}{A list containing the model block matrices
(predictor and response matrices).}

\item{nFC}{The number of folds for K-fold cross-validation. If \code{nFC == 1},
LOOCV is performed.}

\item{parallel}{A logical value indicating whether parallel computation
should be used.}

\item{expected_measure_increase}{Expected decrease in measure per additional
PLS component. Default is 0.005.}

\item{output}{The superpathway fit model list that contains the
fitted model and validation information.}

\item{optimal_hyperparameters}{The optimal hyperparameters obtained
from cross-validation.}

\item{global_significance_full}{Boolean flag indicating whether to return
full global significance results.}

\item{CIP.GIP_significance_full}{Boolean flag indicating whether to return
full CIP/GIP significance results.}

\item{type}{The procedure type for generating CIP/GIP distributions. Can be
"jackknife" or "subsampling".}

\item{gene_set}{A parameter passed from \link{orthology_mapping}}

\item{mart}{A parameter passed from \link{orthology_mapping}}

\item{annotation}{A parameter passed from \link{orthology_mapping}, it
indicates the annotation of the gene set provided}

\item{from_species}{A parameter passed from \link{orthology_mapping}}

\item{to_species}{A parameter passed from \link{orthology_mapping}}

\item{model_object}{A superpathway fit model list}

\item{b}{A parameter passed from \link{singIST_treat}. The index of
current iteration block.}

\item{samples}{A parameter passed from \link{singIST_treat}. The samples
to modify its gene expression from \code{predictor_block}}

\item{predictor_block}{A parameter passed from \link{singIST_treat}.
The predictor block of matrices from asmbPLSDA to modify its gene expression.}

\item{FC}{A parameter passed from \link{singIST_treat}. A \code{data.frame} with
the Fold Changes, for a cell type, of each gene.}

\item{data}{Matrix of predictor block to deflate}

\item{fit_asmb}{asmbPLSDA fitted model}

\item{PLS}{Numeric value indicating the PLS component}

\item{delta_cbind}{Gene contributions (loadings) used to deflate the blocks}
}
\value{
A list containing the training and validation sets:
\item{E_matrix_validation}{Validation predictor matrix}
\item{F_matrix_validation}{Validation response matrix}
\item{E_matrix_training}{Training predictor matrix}
\item{F_matrix_training}{Training response matrix}

A numeric vector containing predicted values for validation samples.

A list containing updated \code{results_CV_summary_n} and
\code{F_matrix_validation_bind} matrices.

A vector with the performance measure of each quantile combination

Optimal quantile table for each PLS with all its performance measures

An integer with the optimal number of PLS

A list with the true class of each LOOCV sample and its predicted class
for each quantile combination

A list containing initialized data frames for permutation statistics.

A permuted response matrix.

Updated result list with permutation statistics.

A vector of selected sample indices.

The fitted asmbPLS-DA model.

Res list including the performance measure of the permuted model

The computed p-value.

A numeric vector containing the lower and upper bounds of the 95\%
confidence interval.

A list with the observed CIP and GIP distributions.

A list with the observed CIP and GIP distributions.

A list with the null CIP and GIP distributions.

A permuted X matrix.

A data frame of p-values.

A list containing the optimal hyperparameters and associated
quantile table.

The updated \code{superpathway.fit.model} object with the computed
validation metrics.

The identified gene annotation or NULL if it was not identified

A \code{data.table} object with the Ensembl identifiers of gene set for
from_species and to_species with only one to one orthologs

The predictor block matrix updated with the FC translation

The object \code{data} centered and scaled.

A vector with the indices of the predictor block matrix for the requestes
block

The \code{data} matrix loading deflated
}
\description{
Fill up matrix with the corresponding expression values

\code{get_measure_index()} returns the index associated to each performance
measure

Splits the predictor and response matrices into training and
validation sets for leave-one-out cross-validation.

Function to train and validate asmbPLSDA excluding one observation
parallelized for each quantile combination provided

Computes the prediction accuracy for different quantile
combinations by fitting the asmbPLSDA model and making predictions.

Performs leave-one-out cross-validation (LOO-CV) in parallel.

Computes the performance measure selected between the training LOOCV samples
and the validation LOOCV samples for all the quantile combination

For an optimal quantile combination and PLS component it computes its
performance metrics between the training and validation sets

Selects the optimal number of PLS according to the performance measure

Iterates over all quantiles to generate the fitted asmbPLSDA for each and
its associated predicted values

Creates a structured list to store permutation results.

Performs random permutations of the response matrix.

Calculates correlation, percentage change, and RV coefficient.

Selects sample indices for training and validation.

Fits the asmbPLS-DA model using permuted data.

Computes the p-value for the observed CV error against the null
distribution of errors generated from permutation testing.

Calculates the 95\% confidence interval for the null distribution
of permutation errors.

Perform the jackknife resampling procedure for CIP/GIP
calculations.

Perform the subsampling procedure for CIP/GIP calculations.

Generate null distributions of CIP and GIP using permutations.

Permute the X matrix to generate a null distribution.

Compute p-values by applying the Mann-Whitney test.

This helper function performs either Leave-One-Out Cross Validation
(LOOCV) or K-Fold Cross Validation (KCV) on the given dataset and returns
the optimal hyperparameters based on the specified accuracy measure.

This helper function computes various validation metrics, including global
significance, CIP/GIP significance, and adjusted p-values for the fitted
model based on cross-validation results.

For a given gene set it identifies the annotation of the genes, it does so
if the genes have more than 50\% match with a given annotation. Annotation
must be either Ensembl, Entrez or Gene Symbols.

Retrieves one to one orthologs between from_species and to_species of
\link{orthology_mapping}

Applies the biological link function conditions onto a predictor block
matrix. The resulting gene expression of the predictor block are the
cases defined in the biological link function.

Centers and scales each column of the predictor block matrices. The
centering and scaling is according to the centroid and variance estimated in
\code{fit_asmb}.

Given a block and the dimensions of all blocks it returns the indices of the
genes belonging to that block within the predictor block matrix

Performs loading deflation for a given predictor block and PLS component
}
\examples{
measure <- "F1"
get_measure_index(measure)
X <- matrix(rnorm(100), nrow = 10, ncol = 10)
Y <- matrix(sample(0:1, 10, replace = TRUE), ncol = 1)
result <- get_train_val_sets(X, Y, validation_index = 2)
str(result)
E_train <- matrix(rnorm(100), nrow = 10, ncol = 10)
F_train <- matrix(sample(0:1, 10, replace = TRUE), ncol = 1)
E_valid <- matrix(rnorm(10), nrow = 1, ncol = 10)
F_valid <- matrix(1, nrow = 1, ncol = 1)
quantile_table <- matrix(runif(2), nrow = 1, ncol = 2)
quantile_table_CV <- matrix(runif(7), nrow = 1, ncol = 7)
results_CV_summary_n <- matrix(0, nrow = 1, ncol = 2)
F_matrix_validation_bind <- matrix(0, nrow = 1, ncol = 2)
result <- evaluate_quantile_combinations(j=1, E_matrix_training = E_train,
                                         F_matrix_training = F_train,
                                         E_matrix_validation = E_valid,
                                         F_matrix_validation = F_valid,
                                         F_matrix_validation_bind =
                                         F_matrix_validation_bind,
                                         results_CV_summary_n =
                                         results_CV_summary_n,
                                         quantile_table_CV=quantile_table_CV,
                                         i = 1, X.dim = c(5,5),
                                         quantile.comb.table =quantile_table,
                                         outcome.type = "binary",
                                         center = TRUE,
                                         scale = TRUE, maxiter = 100,
                                         Method = NULL)
print(result)
set.seed(123)
K <- 5
X <- matrix(rnorm(50), nrow = 5, ncol = 10)
Y <- matrix(sample(0:1, 5, replace = TRUE), ncol = 1)
quantile_comb_table <- matrix(runif(10), nrow = 2, ncol = 10)
results_CV_summary_n <- matrix(0, nrow = 2, ncol = K)
F_matrix_validation_bind <- matrix(0, nrow = 2, ncol = K)
# Parallelization options
library(BiocParallel)
register(SnowParam(workers = 2, exportglobals = FALSE, progressbar = TRUE),
default = TRUE)
output <- execute_parallel_cv(K, results_CV_summary_n,
                              F_matrix_validation_bind, X, Y, PLS_term = 1,
                              X.dim = c(5,5),
                              quantile.comb.table = quantile_comb_table,
                              outcome.type = "binary",
                              quantile_table_CV = quantile_comb_table,
                              measure = "B_accuracy",
                              expected.measure.increase = 0.005,
                              center = TRUE, scale = TRUE, maxiter = 100,
                              Method = NULL)
register(SerialParam(), default = TRUE) # disable parallelization
str(output)
initialize_results(100, 3)
permute_Y_matrix(matrix(rnorm(100), 10, 10), nr = 10, nbObsPermut = 3, j = 2)
res <- initialize_results(100, 3)
compute_permutation_stats(res, matrix(rnorm(100), 10, 10),
matrix(rnorm(100), 10, 10), j = 2, q = 3, nr = 10)
null_errors <- c(0.3, 0.4, 0.35, 0.33)
CV_error <- 0.32
compute_pvalue(null_errors, CV_error)
null_errors <- c(0.3, 0.4, 0.35, 0.33)
compute_IC95(null_errors)
library(biomaRt)
gene_set <- c("IL13", "IL4", "IL5", "IL21")
mart <- biomaRt::useMart(biomart = "ensembl",
dataset = "hsapiens_gene_ensembl")
detect_gene_type(gene_set, mart)
annotation <- "external_gene_name"
gene_set <- c("IL13", "IL4", "IL5")
mart <- biomaRt::useMart(biomart = "ensembl", dataset = paste0("hsapiens",
"_gene_ensembl"))
retrieve_one2one_orthologs(annotation, gene_set, mart, "hsapiens",
"mmusculus")
X.dim <- c(30,40,60)
j <- 2
get_indices(j, X.dim)
}
