% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/CompDb.R, R/CompDb-methods.R
\name{CompDb}
\alias{CompDb}
\alias{CompDb-class}
\alias{show}
\alias{dbconn,CompDb-method}
\alias{show,CompDb-method}
\alias{compoundVariables}
\alias{insertSpectra}
\alias{deleteSpectra}
\alias{mass2mz}
\alias{mass2mz,ANY-method}
\alias{insertCompound}
\alias{deleteCompound}
\alias{deleteCompound,IonDb-method}
\alias{hasMsMsSpectra}
\alias{src_compdb}
\alias{tables}
\alias{copyCompDb}
\alias{Spectra,CompDb-method}
\alias{supportedFilters,CompDb-method}
\alias{metadata,CompDb-method}
\alias{spectraVariables,CompDb-method}
\alias{compoundVariables,CompDb-method}
\alias{compounds,CompDb-method}
\alias{insertSpectra,CompDb,Spectra-method}
\alias{deleteSpectra,CompDb-method}
\alias{mass2mz,CompDb-method}
\alias{insertCompound,CompDb-method}
\alias{deleteCompound,CompDb-method}
\title{Simple compound (metabolite) databases}
\usage{
CompDb(x, flags = SQLITE_RO)

hasMsMsSpectra(x)

src_compdb(x)

tables(x)

copyCompDb(x, y)

\S4method{dbconn}{CompDb}(x)

\S4method{Spectra}{CompDb}(object, filter, ...)

\S4method{supportedFilters}{CompDb}(object)

\S4method{metadata}{CompDb}(x, ...)

\S4method{spectraVariables}{CompDb}(object, ...)

\S4method{compoundVariables}{CompDb}(object, includeId = FALSE, ...)

\S4method{compounds}{CompDb}(
  object,
  columns = compoundVariables(object),
  filter,
  return.type = c("data.frame", "tibble"),
  ...
)

\S4method{insertSpectra}{CompDb,Spectra}(object, spectra, columns = spectraVariables(spectra), ...)

\S4method{deleteSpectra}{CompDb}(object, ids = integer(0), ...)

\S4method{mass2mz}{CompDb}(x, adduct = c("[M+H]+"), name = "formula")

\S4method{insertCompound}{CompDb}(object, compounds = data.frame(), addColumns = FALSE)

\S4method{deleteCompound}{CompDb}(object, ids = character(), recursive = FALSE, ...)
}
\arguments{
\item{x}{For \code{CompDb()}: \code{character(1)} with the file name of the SQLite
compound database. Alternatively it is possible to provide the
connection to the database with parameter \code{x}. For \code{copyCompDb()}:
either a \code{CompDb} or a database connection.

\if{html}{\out{<div class="sourceCode">}}\preformatted{For all other methods: a `CompDb` object.
}\if{html}{\out{</div>}}}

\item{flags}{flags passed to the SQLite database connection.
See \code{\link[RSQLite:SQLite]{RSQLite::SQLite()}}. Defaults to read-only, i.e.
\code{RSQLite::SQLITE_RO}.}

\item{y}{For \code{copyCompDb()}: connection to a database to which the content
should be copied.}

\item{object}{For all methods: a \code{CompDb} object.}

\item{filter}{For \code{compounds()} and \code{Spectra()}: filter expression or
\code{\link[AnnotationFilter:AnnotationFilter]{AnnotationFilter::AnnotationFilter()}} defining a filter to be used to
retrieve specific elements from the database.}

\item{...}{additional arguments. Currently not used.}

\item{includeId}{for \code{compoundVariables()}: \code{logical(1)} whether the comound
ID (column \code{"compound_id"}) should be included in the result. The
default is \code{includeIds = FALSE}.}

\item{columns}{For \code{compounds()}, \code{Spectra}: \code{character} with the names of
the database columns that should be retrieved. Use \code{compoundVariables()}
and/or \code{spectraVariables()} for a list of available column names.
For \code{insertSpectra()}: columns (spectra variables) that should be
inserted into the database (to avoid inserting all variables).}

\item{return.type}{For \code{compounds()}: either \code{"data.frame"} or \code{"tibble"} to
return the result as a \code{\link[=data.frame]{data.frame()}} or \code{\link[tibble:tibble]{tibble::tibble()}},
respectively.}

\item{spectra}{For \code{insertSpectra()}: \code{Spectra} object containing the
spectra to be added to the \code{IonDb} database.}

\item{ids}{For \code{deleteSpectra()}: \code{integer()}
specifying the IDs of the spectra to delete. IDs in \code{ids} that are
not associated to any spectra in the \code{CompDb} object are ignored.
For \code{deleteCompound}: \code{character()} with the compound IDs to be deleted.}

\item{adduct}{either a \code{character} specifying the name(s) of the adduct(s)
for which the m/z should be calculated or a \code{data.frame} with the adduct
definition. See \code{\link[MetaboCoreUtils:adductNames]{adductNames()}} for supported adduct names and the
description for more information on the expected format if a \code{data.frame}
is provided.}

\item{name}{For \code{mass2mz()}: \code{character(1)}. Defines the \code{CompDb} column
that will be used to name/identify the returned m/z values. By default
(\code{name = "formula"}) m/z values for all unique molecular formulas are
calculated and these are used as \code{rownames} for the returned \code{matrix}.
With \code{name = "compound_id"} the adduct m/z for all compounds (even those
with equal formulas) are calculated and returned.}

\item{compounds}{For \code{insertCompound()}: \code{data.frame} with compound data to
be inserted into a \code{CompDb} database. See function description for
details.}

\item{addColumns}{For \code{insertCompound()}: \code{logical(1)} whether all (extra)
columns in parameter \code{compounds} should be stored also in the database
table. The default is \code{addColumns = FALSE}.}

\item{recursive}{For \code{deleteCompound()}: \code{logical(1)} whether also MS2
spectra associated with the compounds should be deleted.}
}
\value{
See description of the respective function.
}
\description{
\code{CompDb} objects provide access to general (metabolite) compound
annotations along with \emph{metadata} information such as the annotation's
source, date and release version. The data is stored internally in a
database (usually an SQLite database).

\code{hasMsMsSpectra} returns \code{TRUE} if MS/MS spectrum data is
available in the database and \code{FALSE} otherwise.
}
\details{
\code{CompDb} objects should be created using the constructor function
\code{CompDb()} providing the name of the (SQLite) database file providing
the compound annotation data.
}
\section{Retrieve annotations from the database}{


Annotations/compound informations can be retrieved from a \code{CompDb} database
with the \code{compounds()} and \code{Spectra()} functions:
\itemize{
\item \code{compounds()} extracts compound data from the \code{CompDb} object. In contrast
to \code{src_compdb} it returns the actual data as a \code{data.frame} (if
\code{return.type = "data.frame"}) or a \code{\link[tibble:tibble]{tibble::tibble()}} (if
\code{return.type = "tibble"}). A \code{compounds()} call will always return all
elements from the \emph{ms_compound} table (unless a \code{filter} is used).
\item \code{Spectra()} extract spectra from the database and returns them as a
\code{\link[Spectra:Spectra]{Spectra::Spectra()}} object from the \emph{Spectra} package. Additional
annotations requested with the \code{columns} parameter are added as additional
spectra variables.
}
}

\section{General functions}{

\itemize{
\item \code{CompDb()}: connect to a compound database.
\item \code{compoundVariables()}: returns all available columns/database fields for
compounds.
\item \code{copyCompDb()}: allows to copy the content from a CompDb to another
database. Parameter \code{x} is supposed to be either a \code{CompDb} or a database
connection from which the data should be copied and \code{y} a connection to
a database to which it should be copied.
\item \code{dbconn()}: returns the connection (of type \code{DBIConnection}) to the
database.
\item \code{metadata()}: returns general meta data of the compound database.
\item \code{spectraVariables()}: returns all spectra variables (i.e. columns)
available in the \code{CompDb}.
\item \code{src_compdb()} provides access to the \code{CompDb}'s database \emph{via}
the functionality from the \code{dplyr}/\code{dbplyr} package.
\item \code{supportedFilters()}: provides an overview of the filters that can be
applied on a \code{CompDb} object to extract only specific data from the
database.
\item \code{tables()}: returns a named \code{list} (names being table names) with
the fields/columns from each table in the database.
\item \code{mass2mz()}: calculates a table of the m/z values for each compound based
on the provided set of adduct(s). Adduct definitions can be provided with
parameter \code{adduct}. See \code{\link[MetaboCoreUtils:mass2mz]{MetaboCoreUtils::mass2mz()}} for more details.
Parameter \code{name} defines the database table column that should be used as
\code{rownames} of the returned \code{matrix}. By default \code{name = "formula"}, m/z
values are calculated for each unique formula in the \code{CompDb} \code{x}.
}
}

\section{Adding and removing data from a database}{


Note that inserting and deleting data requires read-write access to the
database. Databases returned by \code{CompDb} are by default \emph{read-only}. To get
write access \code{CompDb} should be called with parameter
\code{flags = RSQLite::SQLITE_RW}.
\itemize{
\item \code{insertCompound()}: adds additional compound(s) to a \code{CompDb}. The
compound(s) to be added can be specified with parameter \code{compounds} that
is expected to be a \code{data.frame} with columns \code{"compound_id"}, \code{"name"},
\code{"inchi"}, \code{"inchikey"}, \code{"formula"}, \code{"exactmass"}.
Column \code{"exactmass"} is expected to contain numeric values, all other
columns \code{character}. Missing values are allowed for all columns except
\code{"compound_id"}. An optional column \code{"synonyms"} can be used to provide
alternative names for the compound. This column can contain a single
\code{character} by row, or a \code{list} with multiple \code{character} (names) per
row/compound (see examples below for details). By setting parameter
\code{addColumns = TRUE} any additional columns in \code{compound} will be added to
the database table. The default is \code{addColumns = FALSE}. The function
returns the \code{CompDb} with the compounds added.
See also \code{\link[=createCompDb]{createCompDb()}} for more information and details on expected
compound data and the examples below for general usage.
\item \code{deleteCompound()}: removes specified compounds from the \code{CompDb} database.
The IDs of the compounds that should be deleted need to be provided with
parameter \code{ids}. To include compound IDs in the output of a \code{compounds()}
call \code{"compound_id"} should be added to the \code{columns} parameter. By
default an error is thrown if for some of the specified compounds also MS2
spectra are present in the database. To force deletion of the compounds
along with all associated MS2 spectra use \code{recursive = TRUE}. See examples
below for details. The function returns the updated \code{CompDb} database.
\item \code{insertSpectra()}: adds further spectra to the database.
The method always adds all the spectra specified through the \code{spectra}
parameter and does not check if they are already in the database. Note that
the input spectra must have the variable \code{compound_id} and only \code{Spectra}
whose \code{compound_id} values are also in \code{compounds(object, "compound_id")}
can be added. Parameter \code{columns} defines which spectra variables from the
\code{spectra} should be inserted into the database. By default, all spectra
variables are added but it is strongly suggested to specifically select
(meaningful) spectra variables that should be stored in the database.
Note that a spectra variable \code{"compound_id"} is mandatory.
If needed, the function adds additional columns to the \code{msms_spectrum}
database table. The function returns the updated \code{CompDb} object.
\item \code{deleteSpectra()}: deletes specified spectra from the database. The IDs of
the spectra to be deleted need to be provided with parameter \code{ids}.
}
}

\section{Filtering the database}{


Data access methods such as \code{compounds()} and \code{Spectra} allow to filter the
results using specific filter classes and expressions. Filtering uses the
concepts from Bioconductor's \code{AnnotationFilter} package. All information
for a certain compound with the ID \code{"HMDB0000001"} can for example be
retrieved by passing the filter expression
\code{filter = ~ compound_id == "HMDB0000001"} to the \code{compounds} function.

Use the \code{\link[AnnotationFilter:AnnotationFilter]{AnnotationFilter::supportedFilters()}} function on the \link{CompDb}
object to get a list of all supported filters. See also examples below
or the usage vignette for details.
}

\examples{

## We load a small compound test database based on MassBank which is
## distributed with this package.
cdb <- CompDb(system.file("sql/CompDb.MassBank.sql", package = "CompoundDb"))
cdb

## Get general metadata information from the database, such as originating
## source and version:
metadata(cdb)

## List all available compound annotations/fields
compoundVariables(cdb)

## Extract a data.frame with these annotations for all compounds
compounds(cdb)

## Note that the `compounds` function will by default always return a
## data frame of **unique** entries for the specified columns. Including
## also the `"compound_id"` to the requested columns will ensure that all
## data is returned from the tables.
compounds(cdb, columns = c("compound_id", compoundVariables(cdb)))

## Add also the synonyms (aliases) for the compounds. This will cause the
## tables compound and synonym to be joined. The elements of the compound_id
## and name are now no longer unique
res <- compounds(cdb, columns = c("name", "synonym"))
head(res)

## List all database tables and their columns
tables(cdb)

## Any of these columns can be used in the `compounds` call to retrieve
## the specific annotations. The corresponding database tables will then be
## joined together
compounds(cdb, columns = c("formula", "publication"))

## Calculating m/z values for the exact masses of unique chemical formulas
## in the database:
mass2mz(cdb, adduct = c("[M+H]+", "[M+Na]+"))

## By using `name = "compound_id"` the calculation will be performed for
## each unique compound ID instead (resulting in potentially redundant
## results)
mass2mz(cdb, adduct = c("[M+H]+", "[M+Na]+"), name = "compound_id")

## Create a Spectra object with all MS/MS spectra from the database.
library(Spectra)
sps <- Spectra(cdb)
sps

## Extract spectra for a specific compound.
sps <- Spectra(cdb, filter = ~ name == "Mellein")
sps

## List all available annotations for MS/MS spectra
spectraVariables(sps)

## Get access to the m/z values of these
mz(sps)

library(Spectra)
## Plot the first spectrum
plotSpectra(sps[1])


#########
## Filtering the database
##
## Get all compounds with an exact mass between 310 and 320
res <- compounds(cdb, filter = ~ exactmass > 310 & exactmass < 320)
res

## Get all compounds that have an H14 in their formula.
res <- compounds(cdb, filter = FormulaFilter("H14", "contains"))
res

#########
## Using CompDb with the *tidyverse*
##
## Using return.type = "tibble" the result will be returned as a "tibble"
compounds(cdb, return.type = "tibble")

## Use the CompDb in a dplyr setup
library(dplyr)
src_cmp <- src_compdb(cdb)
src_cmp

## Get a tbl for the ms_compound table
cmp_tbl <- tbl(src_cmp, "ms_compound")

## Extract the id, name and inchi
cmp_tbl \%>\% select(compound_id, name, inchi) \%>\% collect()

########
## Creating an empty CompDb and sequentially adding content
##
## Create an empty CompDb and store the database in a temporary file
cdb <- emptyCompDb(tempfile())
cdb

## Define a data.frame with some compounds to add
cmp <- data.frame(
    compound_id = c(1, 2),
    name = c("Caffeine", "Glucose"),
    formula = c("C8H10N4O2", "C6H12O6"),
    exactmass = c(194.080375584, 180.063388116))

## We can also add multiple synonyms for each compound
cmp$synonyms <- list(c("Cafeina", "Koffein"), "D Glucose")
cmp

## These compounds can be added to the empty database with insertCompound
cdb <- insertCompound(cdb, compounds = cmp)
compounds(cdb)

## insertCompound would also allow to add additional columns/annotations to
## the database. Below we define a new compound adding an additional column
## hmdb_id
cmp <- data.frame(
    compound_id = 3,
    name = "Alpha-Lactose",
    formula = "C12H22O11",
    exactmass = 342.116211546,
    hmdb_id = "HMDB0000186")

## To add additional columns we need to set addColumns = TRUE
cdb <- insertCompound(cdb, compounds = cmp, addColumns = TRUE)
cdb
compounds(cdb)

######
## Deleting selected compounds from a database
##
## Compounds can be deleted with the deleteCompound function providing the
## IDs of the compounds that should be deleted. IDs of compounds in the
## database can be retrieved by adding "compound_id" to the columns parameter
## of the compounds function:
compounds(cdb, columns = c("compound_id", "name"))

## Compounds can be deleted with the deleteCompound function. Below we delete
## the compounds with the IDs "1" and "3" from the database
cdb <- deleteCompound(cdb, ids = c("1", "3"))
compounds(cdb)

## If also MS2 spectra associated with any of these two compounds an error
## would be thrown. Setting the parameter `recursive = TRUE` in the
## `deleteCompound` call would delete the compounds along with their MS2
## spectra.
}
\seealso{
\code{\link[=createCompDb]{createCompDb()}} for the function to create a SQLite compound database.

\code{\link[=CompoundIdFilter]{CompoundIdFilter()}} for filters that can be used on the \code{CompDb} database.
}
\author{
Johannes Rainer
}
