% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions-TopDownSet.R
\name{readTopDownFiles}
\alias{readTopDownFiles}
\title{Read top-down files.}
\usage{
readTopDownFiles(
  path,
  pattern = ".*",
  type = c("a", "b", "c", "x", "y", "z"),
  modifications = c("Carbamidomethyl", "Acetyl", "Met-loss"),
  customModifications = data.frame(),
  adducts = data.frame(),
  neutralLoss = PSMatch::defaultNeutralLoss(),
  sequenceOrder = c("original", "random", "inverse"),
  tolerance = 5e-06,
  redundantIonMatch = c("remove", "closest"),
  redundantFragmentMatch = c("remove", "closest"),
  dropNonInformativeColumns = TRUE,
  sampleColumns = c("Mz", "AgcTarget", "EtdReagentTarget", "EtdActivation",
    "CidActivation", "HcdActivation", "UvpdActivation"),
  conditions = "ScanDescription",
  verbose = interactive()
)
}
\arguments{
\item{path}{\code{character},
path to directory that contains the top-down files.}

\item{pattern}{\code{character},
a filename pattern, the default \verb{.*} means all files.}

\item{type}{\code{character},
type of fragments, currently \emph{a-c} and \emph{x-z} are
supported, see
\code{\link[PSMatch:calculateFragments]{PSMatch::calculateFragments()}}
for details.}

\item{modifications}{\code{character},
unimod names of modifications that should be applied.
Currenlty just \emph{Acetyl} (Unimod:1 but just protein N-term),
\emph{Carbamidomethyl} (Unimod:4) and
\emph{Met-loss} (Unimod:765) are supported.
\emph{Met-loss} removes M
(if followed by A, C, G, P, S, T, or V;
(see also
http://www.unimod.org/modifications_view.php?editid1=1,
http://www.unimod.org/modifications_view.php?editid1=4, and
http://www.unimod.org/modifications_view.php?editid1=765 for details)).
Use \code{NULL} to disable all modifications.}

\item{customModifications}{\code{data.frame},
with 4 columns, namely: mass, name, location, variable, see details section.}

\item{adducts}{\code{data.frame},
with 3 columns, namely: mass, name, to, see details section.}

\item{neutralLoss}{\code{list},
neutral loss that should be applied, see
\code{\link[PSMatch:calculateFragments]{PSMatch::calculateFragments()}} and
\code{\link[PSMatch:calculateFragments]{PSMatch::defaultNeutralLoss()}}
for details.}

\item{sequenceOrder}{\code{character},
order of the sequence before fragment calculation and matching is done.
\code{"original"} doesn't change anything.
\code{"inverse"} reverse the sequence and
\code{"random"} arranges the amino acid sequence at random.}

\item{tolerance}{\code{double},
tolerance in \emph{ppm} that is used to match the
theoretical fragments with the observed ones.}

\item{redundantIonMatch}{\code{character}, a mz could be matched to one, two or
more fragments. If it is matched against more than one fragment the match
could be \code{"remove"}d or the match to the \code{"closest"} fragment could be
chosen.}

\item{redundantFragmentMatch}{\code{character}, one or more mz could be matched to
the same fragment, these matches could be \code{"remove"}d or the match to the
\code{"closest"} mz is chosen.}

\item{dropNonInformativeColumns}{logical,
should columns with just one identical value across all runs be removed?}

\item{sampleColumns}{\code{character},
column names of the \code{\link[=colData]{colData()}}
used to define a sample (technical replicate). This is used to add the
\code{Sample} column (used for easier aggregation, etc.).}

\item{conditions}{\code{character}/\code{numeric}, one of:
\itemize{
\item \code{"ScanDescription"} (default): create condition IDs based on the given
"Scan Description" parameter (set automatically by
\code{\link[=createExperimentsFragmentOptimisation]{createExperimentsFragmentOptimisation()}}).
\item \code{"FilterString"}: create condition IDs based on mass labels in
the \emph{FilterString} column (included for backward-compatibilty, used
in \code{\link[=writeMethodXmls]{writeMethodXmls()}} prior version 1.5.2 in Dec 2018).
\item A single \code{numeric} value giving the number of conditions.
}}

\item{verbose}{\code{logical}, verbose output?}
}
\value{
A \code{TopDownSet} object.
}
\description{
It creates an \linkS4class{TopDownSet} object and is its only constructor.
}
\details{
\code{readTopDownFiles} reads and processes all top-down files, namely:
\itemize{
\item \code{.fasta} (protein sequence)
\item \code{.mzML} (spectra)
\item \code{.experiments.csv} (method/fragmentation conditions)
\item \code{.txt} (scan header information)
}

\code{customModifications}: additional to the provided unimod modifications
available through the \code{modifications} argument \code{customModifications} allow to
apply user-definied modifications to the output of
\code{\link[PSMatch:calculateFragments]{PSMatch::calculateFragments()}}.
The \code{customModifications} argument takes a
\code{data.frame} with the \code{mass} to add, the \code{name} of the modification, the
location (could be the position of the amino acid or "N-term"/"C-term"),
whether the modification is always seen (\code{variable=FALSE}) or both, the
modified and unmodified amino acid are present (\code{variable=TRUE}), e.g.
for Activation (which is available via \code{modification="Acetyl"})
\code{data.frame(mass=42.010565, name="Acetyl", location="N-term", variable=FALSE)}
or variable one (that could be present or not):
\code{data.frame(mass=365.132, name="Custom", location=10, variable=TRUE)}

If the \code{customModifications} \code{data.frame} contains multiple columns the
modifications are applied from row one to the last row one each time.

\code{adducts}: \emph{Thermo's Xtract}
allows some mistakes in deisotoping, mostly it
allows \verb{+/- C13-C12} and \verb{+/- H+}.
The \code{adducts} argument takes a
\code{data.frame} with the \code{mass} to add, the \code{name}
that should assign to these
new fragments and an information \code{to}
whom the modification should be
applied, e.g. for \verb{H+} on \code{z},
\code{data.frame(mass=1.008, name="zpH", to="z")}.

\emph{Please note:} The \code{adducts} are added to the output of
\code{\link[PSMatch:calculateFragments]{PSMatch::calculateFragments()}}.
That has some limitations, e.g.
neutral loss calculation could not be done in
\link{topdownr-package}.
If neutral loss should be applied on adducts you have to create
additional rows, e.g.:
\code{data.frame(mass=c(1.008, 1.008), name=c("cpH", "cpH_"), to=c("c", "c_"))}.
}
\examples{
if (require("topdownrdata")) {
    # add H+ to z and no neutral loss of water
    tds <- readTopDownFiles(
        topdownrdata::topDownDataPath("myoglobin"),
        ## Use an artifical pattern to load just the fasta
        ## file and files from m/z == 1211, ETD reagent
        ## target 1e6 and first replicate to keep runtime
        ## of the example short
        pattern=".*fasta.gz$|1211_.*1e6_1",
        adducts=data.frame(mass=1.008, name="zpH", to="z"),
        neutralLoss=PSMatch::defaultNeutralLoss(
            disableWaterLoss=c("Cterm", "D", "E", "S", "T")),
        tolerance=25e-6
   )
}
}
\seealso{
\code{\link[PSMatch:calculateFragments]{PSMatch::calculateFragments()}},
\code{\link[PSMatch:calculateFragments]{PSMatch::defaultNeutralLoss()}}
}
