% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/stability_selection.R
\name{randLassoStabSel}
\alias{randLassoStabSel}
\title{Randomized Lasso Stability Selection}
\usage{
randLassoStabSel(
  x,
  y,
  weakness = 0.8,
  cutoff = 0.8,
  PFER = 2,
  mc.cores = 1L,
  glmnet.args = list(),
  ...
)
}
\arguments{
\item{x}{The predictor matrix.}

\item{y}{The response vector. This should be a numeric vector (also for 
the binomial case - in this case it will be converted to a factor 
internally by \code{glmnet}).}

\item{weakness}{Value between 0 and 1 (default = 0.8).
It affects how strict the method will be in selecting predictors. The
closer it is to 0, the more stringent the selection. A weakness value
of 1 is identical to performing lasso stability selection (not the
randomized version).}

\item{cutoff}{Value between 0 and 1 (default = 0.8) which is the cutoff
for the selection probability. Any variable with a selection probability
that is higher than the set cutoff will be selected.}

\item{PFER}{Integer (default = 2) representing the absolute number of
false positives that we allow for in the final list of selected
variables. For details see Meinshausen and Bühlmann (2010).}

\item{mc.cores}{Integer (default = 1) specifying the number of cores to
use in \code{\link[parallel]{mclapply}}, which is the default way
\code{\link[stabs]{stabsel}} does parallelization.}

\item{glmnet.args}{Named list with additional arguments to the internal 
\code{.glmnetRandomizedLasso} function (beyond \code{x}, 
\code{y} and \code{weakness}, which are determined automatically, and 
\code{q}, which should not be specified (it will be determined from 
\code{cutoff} and \code{PFER}). 
The available arguments to \code{.glmnetRandomizedLasso} are the same as 
the ones for \code{\link[stabs]{glmnet.lasso}}. A typical use case would 
be to define the \code{family} argument to \code{\link[glmnet]{glmnet}} 
(currently "gaussian" and "binomial" are supported).}

\item{...}{Additional parameters that can be passed on to
\code{\link[stabs]{stabsel}}.}
}
\value{
A \code{SummarizedExperiment} object where the rows are the
    observations and the columns the predictors (same dimnames as the
    predictor matrix \code{x}).
    It contains: \describe{
      \item{assays}{: \describe{
        \item{x}{: the predictor matrix.}
        }
      }
      \item{rowData}{: a \code{DataFrame} with columns: \describe{
        \item{y}{: the response vector.}
        }
      }
      \item{colData}{: a \code{DataFrame} with columns: \describe{
        \item{selProb}{: the final selection probabilities for the
          predictors (from the last regularization step).}
        \item{selected}{: logical indicating the predictors that made
          the selection with the specified cutoff.}
        \item{selAUC}{: the normalized area under the seletion curve
          (mean of selection probabilities over regulatization steps).}
        \item{reg'\code{i}'}{: columns containing the selection
          probabilities for regularization step i. }
        }
      }
      \item{metadata}{: a list of output returned from
        \code{\link[stabs]{stabsel}} and \code{randLassoStabSel}: \describe{
        \item{stabsel.params.cutoff}{: probability cutoff set for selection
          of predictors (see \code{\link[stabs]{stabsel}}).}
        \item{stabsel.params.selected}{: elements with maximal selection
          probability greater \code{cutoff}
          (see \code{\link[stabs]{stabsel}}). }
        \item{stabsel.params.max}{: maximum of selection probabilities
          (see \code{\link[stabs]{stabsel}}).}
        \item{stabsel.params.q}{: average number of selected variables
          used (see \code{\link[stabs]{stabsel}}).}
        \item{stabsel.params.PFER}{: (realized) upper bound for the
          per-family error rate (see \code{\link[stabs]{stabsel}}).}
        \item{stabsel.params.specifiedPFER}{: specified upper bound for
          the per-family error rate (see \code{\link[stabs]{stabsel}}).}
        \item{stabsel.params.p}{: the number of effects subject to
          selection (see \code{\link[stabs]{stabsel}}).}
        \item{stabsel.params.B}{: the number of subsamples (see
          \code{\link[stabs]{stabsel}}).}
        \item{stabsel.params.sampling.type}{: the sampling type used for
          stability selection (see \code{\link[stabs]{stabsel}}).}
        \item{stabsel.params.assumption}{: the assumptions made on the
          selection probabilities (see \code{\link[stabs]{stabsel}}).}
        \item{stabsel.params.call}{: \code{\link[stabs]{stabsel}} the call.}
        \item{randStabsel.params.weakness}{: the weakness parameter in the
          randomized lasso stability selection.}
        }
      }

    }
}
\description{
This function runs randomized lasso stability selection as
    presented by Meinshausen and Bühlmann (2010) and with the improved
    error bounds introduced by Shah and Samworth (2013). The function
    uses the \code{\link[stabs]{stabsel}} function from the \code{stabs}
    package, but implements the randomized lasso version.
}
\details{
Randomized lasso stability selection runs a randomized lasso
    regression several times on subsamples of the response variable and
    predictor matrix. N/2 elements from the response variable are randomly
    chosen in each regression, where N is the length of the vector. The
    corresponding section of the predictor matrix is also chosen, and the
    internal \code{.glmnetRandomizedLasso} function is applied.
    Stability selection results in selection probabilities for each
    predictor. The probability of a specific predictor is the number of
    times it was selected divided by the total number of subsamples that
    were done (total number of times the regression was performed).

    We made use of the \code{stabs} package that implements lasso stability
    selection, and adapted it to run randomized lasso stability selection.
}
\examples{
## create data set
Y <- rnorm(n = 500, mean = 2, sd = 1)
X <- matrix(data = NA, nrow = length(Y), ncol = 50)
for (i in seq_len(ncol(X))) {
  X[ ,i] <- runif(n = 500, min = 0, max = 3)
}
s_cols <- sample(x = seq_len(ncol(X)), size = 10,
  replace = FALSE)
for (i in seq_along(s_cols)) {
  X[ ,s_cols[i]] <- X[ ,s_cols[i]] + Y
}

## reproducible randLassoStabSel() with 1 core
set.seed(123)
ss <- randLassoStabSel(x = X, y = Y)

## reproducible randLassoStabSel() in parallel mode
## (only works on non-windows machines)
if (.Platform$OS.type == "unix") {
    RNGkind("L'Ecuyer-CMRG")
    set.seed(123)
    ss <- randLassoStabSel(x = X, y = Y, mc.preschedule = TRUE,
                           mc.set.seed = TRUE, mc.cores = 2L)
}

}
\references{
N. Meinshausen and P. Bühlmann (2010), Stability Selection,
    \emph{Journal of the Royal Statistical Society: Series B
    (Statistical Methodology)}, \strong{72}, 417–73. \cr
    R.D. Shah and R.J. Samworth (2013), Variable Selection with Error
    Control: Another Look at Stability Selection,
    \emph{Journal of the Royal Statistical Society: Series B
    (Statistical Methodology)}, \strong{75}, 55–80. \cr
    B. Hofner, L. Boccuto, and M. Göker (2015), Controlling False
    Discoveries in High-Dimensional Situations: Boosting with Stability
    Selection, \emph{BMC Bioinformatics}, \strong{16} 144.
}
\seealso{
\code{\link[stabs]{stabsel}}
}
