% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/adjacencyMatrix-create.R,
%   R/adjacencyMatrix-plot.R
\name{adjacencyMatrix}
\alias{adjacencyMatrix}
\alias{makeAdjacencyMatrix}
\alias{makePeptideProteinVector}
\alias{plotAdjacencyMatrix}
\title{Convert to/from an adjacency matrix.}
\usage{
makeAdjacencyMatrix(
  x,
  split = ";",
  peptide = psmVariables(x)["peptide"],
  protein = psmVariables(x)["protein"],
  score = psmVariables(x)["score"],
  binary = FALSE
)

makePeptideProteinVector(m, collapse = ";")

plotAdjacencyMatrix(
  m,
  protColors = 0,
  pepColors = NULL,
  layout = igraph::layout_nicely
)
}
\arguments{
\item{x}{Either an instance of class \code{PSM} or a \code{character}. See
example below for details.}

\item{split}{\code{character(1)} defining how to split the string of
protein identifiers (using \code{\link[=strsplit]{strsplit()}}). Default is ";". If
\code{NULL}, splitting is ignored.}

\item{peptide}{\code{character(1)} indicating the name of the variable
that defines peptides in the \code{PSM} object. Default is the
\code{peptide} PSM variable as defined in \code{\link[=psmVariables]{psmVariables()}}.}

\item{protein}{\code{character(1)} indicating the name of the variable
that defines proteins in the \code{PSM} object. Default is the
\code{peptide} PSM variable as defined in \code{\link[=psmVariables]{psmVariables()}}.}

\item{score}{\code{character(1)} indicating the name of the variable
that defines PSM scores in the \code{PSM} object. Default is the
\code{score} PSM variable as defined in \code{\link[=psmVariables]{psmVariables()}}. Ignored
when \code{NA} (which is the default value unless set by the user
when constructing the \code{PSM} object).}

\item{binary}{\code{logical(1)} indicates if the adjacency matrix
should be strictly binary. In such a case, PSMs matching the
same peptide but from different precursors (for example charge
2 and 3) or carrying different PTMs, are counted only
once. Default if \code{FALSE}. This also overrides any \code{score} that
would be set.}

\item{m}{A peptide-by-protein adjacency matrix.}

\item{collapse}{\code{character(1)} indicating how to collapse protein
names for shared peptides. Default is \code{";"}.}

\item{protColors}{Either a \code{numeric(1)} or a named \code{character()}
of colour names. The numeric value indicates the protein
colouring level to use. If 0 (default), all protein nodes are
labelled in steelblue. For values > 0, approximate string
distances (see \code{\link[=adist]{adist()}}) between protein names are calculated
and nodes of proteins that have names that differ will be
coloured differently, with higher values leading to more
colours. While no maximum to this value is defined in the
code, it shouldn't be higher than the number of proteins. If a
character is used, it should be a character of colour names
named by protein identifiers. That vector should provide
colours for at least all proteins in the adjacency matrix \code{m},
but more protein could be named. The latter is useful when
generating a colour vector for all proteins in a dataset and
use it for different adjacency matrix visualisations.}

\item{pepColors}{Either \code{NULL} (default) for no peptide colouring
(white nodes) or a named \code{character()} of colour names. It
should be a character of colour names named by peptide
identifiers. That vector should provide colours for at least
all peptides in the adjacency matrix \code{m}, but more peptides
could be named. The latter is useful when generating a colour
vector for all peptides in a dataset and use it for different
adjacency matrix visualisations.}

\item{layout}{A graph layout, as defined in the \code{ipgraph}
package. Default is \code{\link[igraph:layout_as_bipartite]{igraph::layout_as_bipartite()}}.}
}
\value{
A peptide-by-protein sparce adjacency matrix (or class
\code{dgCMatrix} as defined in the \code{Matrix} package) or
peptide/protein vector.
}
\description{
There are two ways that peptide/protein matches are commonly
stored: either as a vector or an adjacency matrix. The functions
described below convert between these two format.
}
\details{
The \code{\link[=makeAdjacencyMatrix]{makeAdjacencyMatrix()}} function creates a peptide-by-protein adjacency
matrix from a \code{character} or an instance of class \code{\link[=PSM]{PSM()}}.

The character is formatted as \code{x <- c("ProtA", "ProtB", "ProtA;ProtB", ...)}, as commonly encountered in proteomics data spreadsheets. It defines
that the first peptide is mapped to protein "ProtA", the second one to
protein "ProtB", the third one to "ProtA" and "ProtB", and so on. The
resulting matrix contains as many rows as there are unique peptides and as
many columns as there are unique protein identifiers in \code{x}. The columns
are named after the protein identifiers and the peptide/protein vector
names are used to name the matrix rows (retaining only the unique names).

The \code{\link[=makePeptideProteinVector]{makePeptideProteinVector()}} function does the opposite operation,
taking an adjacency matrix as input and returning a peptide/protein
vector. The matrix colnames are used to populate the vector and the matrix
rownames are used to name the vector elements.

Note that when creating an adjacency matrix from a PSM object, the matrix is
not necessarily binary, as multiple PSMs can match the same peptide
(sequence), such as for example precursors with different charge states. A
binary matrix can either be generated with the \code{binary} argument (setting
all non-0 values to 1) or by reducing the PSM object accordingly (see
example below).

It is also possible to generate adjacency matrices populated with
identification scores or probabilites by setting the "score" PSM variable
upon construction of the PSM object (see \code{\link[=PSM]{PSM()}} for details). In case
multiple PSMs occur, their respective scores get summed.

The \code{plotAdjacencyMatrix()} function is useful to visualise small adjacency
matrices, such as those representing protein groups modelled as connected
components, as described and illustrated in \code{\link[=ConnectedComponents]{ConnectedComponents()}}. The
function generates a graph modelling the relation between proteins
(represented as squares) and peptides (represented as circes), which can
further be coloured (see the \code{protColors} and \code{pepColors} arguments). The
function invisibly returns the graph \code{igraph} object for additional tuning
and/or interactive visualisation using, for example \code{\link[igraph:tkplot]{igraph::tkplot()}}.

Such as illustrated in the examples below, each row/peptide is
expected to refer to protein groups or individual proteins (groups of
size 1). These have to be split accordingly.
}
\examples{

## -----------------------
## From a character
## -----------------------

## Protein vector without names
prots <- c("ProtA", "ProtB", "ProtA;ProtB")
makeAdjacencyMatrix(prots)

## Named protein vector
names(prots) <- c("pep1", "pep2", "pep3")
prots
m <- makeAdjacencyMatrix(prots)
m

## Back to vector
vec <- makePeptideProteinVector(m)
vec
identical(prots, vec)

## ----------------------------
## PSM object from a data.frame

## ----------------------------
## Case 1: Duplicate identifications

psmdf <- data.frame(psm = paste0("psm", 1:10),
                    peptide = paste0("pep", c(1, 1, 2, 2, 3, 4, 6, 7, 8, 8)),
                    protein = paste0("Prot", LETTERS[c(1, 1, 2, 2, 3, 4, 3, 5, 6, 6)]))
psmdf
psm <- PSM(psmdf, peptide = "peptide", protein = "protein")
psm
makeAdjacencyMatrix(psm)

## Reduce PSM object to peptides
rpsm <- reducePSMs(psm, k = psm$peptide)
rpsm
makeAdjacencyMatrix(rpsm)

## Or set binary to TRUE
makeAdjacencyMatrix(psm, binary = TRUE)

## ----------------------------
## Case 2: Protein groups are separated by a semicolon
psmdf <- data.frame(psm = paste0("psm", 1:5),
                    peptide = paste0("pep", c(1, 2, 3, 4, 5)),
                    protein = c("ProtA", "ProtB;ProtD", "ProtA;ProtC", 
                                "ProtC", "ProtA;ProtC;ProtD"))
psmdf
psm <- PSM(psmdf, peptide = "peptide", protein = "protein")
psm
makeAdjacencyMatrix(psm, split = ";")

## ----------------------------
## PSM object from an mzid file
## ----------------------------

f <- msdata::ident(full.names = TRUE, pattern = "TMT")
psm <- PSM(f) |>
       filterPsmDecoy() |>
       filterPsmRank()
psm
adj <- makeAdjacencyMatrix(psm)
dim(adj)
adj[1:10, 1:4]

## Binary adjacency matrix
adj <- makeAdjacencyMatrix(psm, binary = TRUE)
adj[1:10, 1:4]

## Peptides with rowSums > 1 match multiple proteins.
## Use filterPsmShared() to filter these out.
table(Matrix::rowSums(adj))

## -----------------------------------------------
## Binary, non-binary and score adjacency matrices
## -----------------------------------------------

## -------------------------------------
## Case 1: no scores, 1 PSM per peptides
psmdf <- data.frame(spectrum = c("sp1", "sp2", "sp3", "sp4", "sp5",
                                 "sp6", "sp7", "sp8", "sp9", "sp10"),
                    sequence = c("NKAVRTYHEQ", "IYNHSQGFCA", "YHWRLPVSEF",
                                 "YEHNGFPLKD", "WAQFDVYNLS", "EDHINCTQWP",
                                 "WSMKVDYEQT", "GWTSKMRYPL", "PMAYIWEKLC",
                                 "HWAEYFNDVT"),
                    protein = c("ProtB", "ProtB", "ProtA", "ProtD", "ProtD",
                                "ProtG", "ProtF", "ProtE", "ProtC", "ProtF"),
                    decoy = rep(FALSE, 10),
                    rank = rep(1, 10),
                    score = c(0.082, 0.310, 0.133, 0.174, 0.944, 0.0261,
                              0.375, 0.741, 0.254, 0.058))
psmdf

psm <- PSM(psmdf, spectrum = "spectrum", peptide = "sequence",
           protein = "protein", decoy = "decoy", rank = "rank")

## binary matrix
makeAdjacencyMatrix(psm)

## Case 2: sp1 and sp11 match the same peptide (NKAVRTYHEQ) as different PSMs
psmdf2 <- rbind(psmdf,
                data.frame(spectrum = "sp11",
                           sequence = psmdf$sequence[1],
                           protein = psmdf$protein[1],
                           decoy = FALSE,
                           rank = 1,
                           score = 0.011))
psmdf2
psm2 <- PSM(psmdf2, spectrum = "spectrum", peptide = "sequence",
            protein = "protein", decoy = "decoy", rank = "rank")

## Now NKAVRTYHEQ/ProtB counts 2 PSMs
makeAdjacencyMatrix(psm2)

## Force a binary matrix
makeAdjacencyMatrix(psm2, binary = TRUE)

## Case 3: Peptide (NKAVRTYHEQ) stems from multiple proteins (ProtB and 
## ProtG). They are separated by a semicolon.
psmdf3 <- psmdf
psmdf3[psmdf3$sequence == "NKAVRTYHEQ","protein"] <- "ProtB;ProtG"
psmdf3
psm3 <- PSM(psmdf3, spectrum = "spectrum", peptide = "sequence",
            protein = "protein", decoy = "decoy", rank = "rank")

## Now ProtB & ProtG count 2 PSMs each: NKAVRTYHEQ and IYNHSQGFCA & 
## EDHINCTQWP respectively
makeAdjacencyMatrix(psm3, split = ";")

## --------------------------------
## Case 4: set the score PSM values
psmVariables(psm) ## no score defined
psm4 <- PSM(psm, spectrum = "spectrum", peptide = "sequence",
            protein = "protein", decoy = "decoy", rank = "rank",
            score = "score")
psmVariables(psm4) ## score defined

## adjacency matrix with scores
makeAdjacencyMatrix(psm4)

## Force a binary matrix
makeAdjacencyMatrix(psm4, binary = TRUE)

## ---------------------------------
## Case 5: scores with multiple PSMs

psm5 <- PSM(psm2, spectrum = "spectrum", peptide = "sequence",
            protein = "protein", decoy = "decoy", rank = "rank",
            score = "score")

## Now NKAVRTYHEQ/ProtB has a summed score of 0.093 computed as
## 0.082 (from sp1) + 0.011 (from sp11)
makeAdjacencyMatrix(psm5)
}
\author{
Laurent Gatto
}
