% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/distance.R
\name{distance}
\alias{distance}
\alias{ndotproduct}
\alias{dotproduct}
\alias{neuclidean}
\alias{navdist}
\alias{nspectraangle}
\title{Spectra Distance/Similarity Measurements}
\usage{
ndotproduct(x, y, m = 0L, n = 0.5, na.rm = TRUE, ...)

dotproduct(x, y, m = 0L, n = 0.5, na.rm = TRUE, ...)

neuclidean(x, y, m = 0L, n = 0.5, na.rm = TRUE, ...)

navdist(x, y, m = 0L, n = 0.5, na.rm = TRUE, ...)

nspectraangle(x, y, m = 0L, n = 0.5, na.rm = TRUE, ...)
}
\arguments{
\item{x}{\code{matrix}, two-columns e.g. m/z, intensity}

\item{y}{\code{matrix}, two-columns e.g. m/z, intensity}

\item{m}{\code{numeric}, weighting for the first column of \code{x} and \code{y} (e.g.
"mz"), default: \code{0} means don't weight by the first column. For more details
see the \code{ndotproduct} details section.}

\item{n}{\code{numeric}, weighting for the second column of \code{x} and \code{y} (e.g.
"intensity"), default: \code{0.5} means effectly using \code{sqrt(x[,2])} and
\code{sqrt(y[,2])}. For more details see the \code{ndotproduct} details section.}

\item{na.rm}{\code{logical(1)}, should \code{NA} be removed prior to calculation
(default \code{TRUE}).}

\item{...}{ignored.}
}
\value{
\code{double(1)} value between \code{0:1}, where \code{0} is completely different
and \code{1} identically.
}
\description{
These functions provide different normalized similariy/distance measurements.
}
\details{
All functions that calculate normalized similarity/distance measurements are
prefixed with a \emph{n}.

\code{ndotproduct}: the normalized dot product is described in Stein and Scott
1994 as: \eqn{NDP = \frac{\sum(W_1 W_2)^2}{\sum(W_1)^2 \sum(W_2)^2}}; where
\eqn{W_i = x^m * y^n}, where \eqn{x} and \eqn{y} are the m/z and intensity
values, respectively. Please note also that \eqn{NDP = NCos^2}; where NCos
is the cosine value (i.e. the orthodox normalized dot product) of the
intensity vectors as described in Yilmaz et al. 2017. Stein and Scott 1994
empirically determined the optimal exponents as \code{m = 3} and \code{n = 0.6} by
analyzing ca. 12000 EI-MS data of 8000 organic compounds in the NIST Mass
Spectral Library.
MassBank (Horai et al. 2010) uses \code{m = 2} and \code{n = 0.5}
for small compounds. In general with increasing values for \code{m},
high m/z values will be taken more into account for similarity calculation.
Especially when working with small molecules, a value \code{m > 0} can be set
to give a weight on the m/z values to accommodate that shared fragments
with higher m/z are less likely and will mean that molecules might be more
similar. Increasing \code{n} will result in a higher importance of the intensity
values. Most commonly \code{m = 0} and \code{n = 0.5} are used.

\code{neuclidean}: the normalized euclidean distance is described in Stein and
Scott 1994 as:
\eqn{NED = (1 + \frac{\sum((W_1 - W_2)^2)}{sum((W_2)^2)})^{-1}}; where
\eqn{W_i = x^m * y^n}, where \eqn{x} and \eqn{y} are the m/z and intensity
values, respectively. See the details section about \code{ndotproduct} for an
explanation how to set \code{m} and \code{n}.

\code{navdist}: the normalized absolute values distance is described in Stein and
Scott 1994 as:
\eqn{NED = (1 + \frac{\sum(|W_1 - W_2|)}{sum((W_2))})^{-1}}; where
\eqn{W_i = x^m * y^n}, where \eqn{x} and \eqn{y} are the m/z and intensity
values, respectively. See the details section about \code{ndotproduct} for an
explanation how to set \code{m} and \code{n}.

\code{nspectraangle}: the normalized spectra angle is described in Toprak et al
2014 as:
\eqn{NSA = 1 - \frac{2*\cos^{-1}(W_1 \cdot W_2)}{\pi}}; where
\eqn{W_i = x^m * y^n}, where \eqn{x} and \eqn{y} are the m/z and intensity
values, respectively. The weighting was not originally proposed by Toprak et
al. 2014. See the details section about \code{ndotproduct} for an explanation how
to set \code{m} and \code{n}.
}
\note{
These methods are implemented as described in Stein and Scott 1994
(\code{navdist}, \code{ndotproduct}, \code{neuclidean}) and Toprak et al. 2014
(\code{nspectraangle}) but because there is no reference implementation available
we are unable to guarantee that the results are identical.
Note that the Stein and Scott 1994 normalized dot product method (and by
extension \code{ndotproduct}) corresponds to the square of the orthodox
normalized dot product (or cosine distance) used also commonly as spectrum
similarity measure (Yilmaz et al. 2017).
Please see also the corresponding discussion at the github pull request
linked below. If you find any problems or reference implementation please
open an issue at
\url{https://github.com/rformassspectrometry/MsCoreUtils/issues}.
}
\examples{

x <- matrix(c(1:5, 1:5), ncol = 2, dimnames = list(c(), c("mz", "intensity")))
y <- matrix(c(1:5, 5:1), ncol = 2, dimnames = list(c(), c("mz", "intensity")))

ndotproduct(x, y)
ndotproduct(x, y, m = 2, n = 0.5)
ndotproduct(x, y, m = 3, n = 0.6)

neuclidean(x, y)

navdist(x, y)

nspectraangle(x, y)
}
\references{
Stein, S. E., and Scott, D. R. (1994).
Optimization and testing of mass spectral library search algorithms for
compound identification.
Journal of the American Society for Mass Spectrometry, 5(9), 859--866.
\doi{10.1016/1044-0305(94)87009-8}.

Yilmaz, S., Vandermarliere, E.,  and Lennart Martens (2017).
Methods to Calculate Spectrum Similarity.
In S. Keerthikumar and S. Mathivanan (eds.), Proteome
Bioinformatics: Methods in Molecular Biology, vol. 1549 (pp. 81).
\doi{10.1007/978-1-4939-6740-7_7}.

Horai et al. (2010).
MassBank: a public repository for sharing mass spectral data for life
sciences. Journal of mass spectrometry, 45(7), 703--714.
\doi{10.1002/jms.1777}.

Toprak et al. (2014).
Conserved peptide fragmentation as a benchmarking tool for mass spectrometers
and a discriminating feature for targeted proteomics.
Molecular & Cellular Proteomics : MCP, 13(8), 2056--2071.
\doi{10.1074/mcp.O113.036475}.

Pull Request for these distance/similarity measurements:
\url{https://github.com/rformassspectrometry/MsCoreUtils/pull/33}
}
\seealso{
Other distance/similarity functions: 
\code{\link{gnps}()}
}
\author{
\code{navdist}, \code{neuclidean}, \code{nspectraangle}: Sebastian Gibb

\code{ndotproduct}: Sebastian Gibb and
Thomas Naake, \email{thomasnaake@googlemail.com}
}
\concept{distance/similarity functions}
