% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/spectrum-import-functions.R
\name{msms_spectra_hmdb}
\alias{msms_spectra_hmdb}
\title{Import MS/MS spectra from HMDB xml files}
\usage{
msms_spectra_hmdb(x, collapsed = TRUE)
}
\arguments{
\item{x}{\code{character(1)}: with the path to directory containing the xml files.}

\item{collapsed}{\code{logical(1)} whether the returned \code{data.frame} should be
\emph{collapsed} or \emph{expanded}. See description for more details.}
}
\value{
\code{data.frame} with as many rows as there are peaks and columns:
\itemize{
\item spectrum_id (\code{integer}): an arbitrary, unique ID identifying values
from one xml file.
\item original_spectrum_id (\code{character}): the HMDB-internal ID of the spectrum.
\item compound_id (\code{character}): the HMDB compound ID the spectrum is associated
with.
\item polarity (\code{integer}): 0 for negative, 1 for positive, \code{NA} for not set.
\item collision_energy (\code{numeric}): collision energy voltage.
\item predicted (\code{logical}): whether the spectrum is predicted or experimentally
verified.
\item splash (\code{character}): the SPLASH (SPectraL hASH) key of the spectrum
(Wohlgemuth 2016).
\item instrument_type (\code{character}): the type of MS instrument on which the
spectrum was measured.
\item instrument (\code{character}): the MS instrument (not available for all spectra
in HMDB).
\item precursor_mz (\code{numeric}): not provided by HMDB and thus \code{NA}.
\item mz (\code{numeric} or \code{list} of \code{numeric}): m/z values of the spectrum.
\item intensity (\code{numeric} or \code{list} of \code{numeric}): intensity of the spectrum.
}
}
\description{
\code{msms_spectra_hmdb()} imports MS/MS spectra from corresponding xml files from
HMDB (http://www.hmdb.ca) and returns the data as a \code{data.frame}. HMDB
stores MS/MS spectrum data in xml files, one file per spectrum.

Depending on the parameter \code{collapsed}, the returned \code{data.frame} is either
\emph{collapsed}, meaning that each row represents data from one spectrum xml
file, or \emph{expanded} with one row for each m/z and intensity pair for each
spectrum. Columns \code{"mz"} and \code{"intensity"} are of type \code{list} for
\code{collapsed = TRUE} and \code{numeric} for \code{collapsed = FALSE}.
}
\note{
The HMDB xml files are supposed to be extracted from the downloaded zip file
into a folder and should not be renamed. The function identifies xml files
containing MS/MS spectra by their file name.

The same spectrum ID can be associated with multiple compounds. Thus, the
function assignes an arbitrary ID (column \code{"spectrum_id"}) to values from
each file. The original ID of the spectrum in HMDB is provided in column
\code{"original_spectrum_id"}.
}
\examples{

## Locate the folder within the package containing test xml files.
pth <- system.file("xml", package = "CompoundDb")

## List all files in that directory
dir(pth)

## Import spectrum data from HMDB MS/MS spectrum xml files in that directory
msms_spectra_hmdb(pth)

## Import the data as an *expanded* data frame, i.e. with a row for each
## single m/z (intensity) value.
msms_spectra_hmdb(pth, collapsed = FALSE)
}
\references{
Wohlgemuth G, Mehta SS, Mejia RF, Neumann S, Pedrosa D, Pluskal T,
Schymanski EL, Willighagen EL, Wilson M, Wishart DS, Arita M,
Dorrestein PC, Bandeira N, Wang M, Schulze T, Selak RM, Steinbeck C,
Nainala VC, Mistrik R, Nishioka T, Fiehn O. SPLASH, A hashed identifier for
mass spectra. Nature Biotechnology 2016 34(11):1099-1101
}
\seealso{
\code{\link[=createCompDb]{createCompDb()}} for the function to create a \link{CompDb} database with
compound annotation and spectrum data.

Other spectrum data import functions.: 
\code{\link{msms_spectra_mona}()}
}
\author{
Johannes Rainer
}
\concept{spectrum data import functions.}
