% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/MSstatsConvert_core_functions.R
\name{MSstatsPreprocess}
\alias{MSstatsPreprocess}
\title{Preprocess outputs from MS signal processing tools for analysis with MSstats}
\usage{
MSstatsPreprocess(
  input,
  annotation,
  feature_columns,
  remove_shared_peptides = TRUE,
  remove_single_feature_proteins = TRUE,
  feature_cleaning = list(remove_features_with_few_measurements = TRUE,
    summarize_multiple_psms = max),
  score_filtering = list(),
  exact_filtering = list(),
  pattern_filtering = list(),
  columns_to_fill = list(),
  aggregate_isotopic = FALSE,
  anomaly_metrics = c(),
  ...
)
}
\arguments{
\item{input}{data.table processed by the MSstatsClean function.}

\item{annotation}{annotation file generated by a signal processing tool.}

\item{feature_columns}{character vector of names of columns that
define spectral features.}

\item{remove_shared_peptides}{logical, if TRUE shared peptides will be removed.}

\item{remove_single_feature_proteins}{logical, if TRUE, proteins that only have
one feature will be removed.}

\item{feature_cleaning}{named list with maximum two (for \code{MSstats} converters)
or three (for \code{MSstatsTMT} converter) elements. If \code{handle_few_measurements} is
set to "remove", feature with less than three measurements will be removed
(otherwise it should be equal to "keep"). \code{summarize_multiple_psms} is a function
that will be used to aggregate multiple feature measurements in a run. It should
return a scalar and accept an \code{na.rm} parameter. For \code{MSstatsTMT} converters,
setting \code{remove_psms_with_any_missing} will remove features which have missing
values in a run from that run.}

\item{score_filtering}{a list of named lists that specify filtering options.
Details are provided in the vignette.}

\item{exact_filtering}{a list of named lists that specify filtering options.
Details are provided in the vignette.}

\item{pattern_filtering}{a list of named lists that specify filtering options.
Details are provided in the vignette.}

\item{columns_to_fill}{a named list of scalars. If provided, columns with
names defined by the names of this list and values corresponding to its elements
will be added to the output \code{data.frame}.}

\item{aggregate_isotopic}{logical. If \code{TRUE}, isotopic peaks will by summed.}

\item{anomaly_metrics}{character vector of names of columns with quality metrics. Default is missing and is not required if anomaly model not run.}

\item{...}{additional parameters to \code{data.table::fread}.}
}
\value{
data.table
}
\description{
Preprocess outputs from MS signal processing tools for analysis with MSstats
}
\examples{
evidence_path = system.file("tinytest/raw_data/MaxQuant/mq_ev.csv", 
                            package = "MSstatsConvert")
pg_path = system.file("tinytest/raw_data/MaxQuant/mq_pg.csv", 
                      package = "MSstatsConvert")
evidence = read.csv(evidence_path)
pg = read.csv(pg_path)
imported = MSstatsImport(list(evidence = evidence, protein_groups = pg),
                         "MSstats", "MaxQuant")
cleaned_data = MSstatsClean(imported, protein_id_col = "Proteins")
annot_path = system.file("tinytest/raw_data/MaxQuant/annotation.csv", 
                         package = "MSstatsConvert")
mq_annot = MSstatsMakeAnnotation(cleaned_data, read.csv(annot_path),
                                 Run = "Rawfile")
                               
# To filter M-peptides and oxidatin peptides 
m_filter = list(col_name = "PeptideSequence", pattern = "M", 
                filter = TRUE, drop_column = FALSE)
oxidation_filter = list(col_name = "Modifications", pattern = "Oxidation", 
                        filter = TRUE, drop_column = TRUE)
msstats_format = MSstatsPreprocess(
cleaned_data, mq_annot, 
feature_columns = c("PeptideSequence", "PrecursorCharge"),
columns_to_fill = list(FragmentIon = NA, ProductCharge = NA),
pattern_filtering = list(oxidation = oxidation_filter, m = m_filter)
)
# Output in the standard MSstats format
head(msstats_format)

}
