% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/combine_records_class.R
\name{combine_records_helper_functions}
\alias{combine_records_helper_functions}
\alias{compute_mode}
\alias{compute_mean}
\alias{compute_median}
\alias{fuse}
\alias{select_max}
\alias{select_min}
\alias{select_match}
\alias{select_exact}
\alias{fuse_unique}
\alias{prioritise}
\alias{nothing}
\alias{count_records}
\alias{select_grade}
\title{Combine records helper functions}
\usage{
compute_mode(ties = FALSE, na.rm = TRUE)

compute_mean(na.rm = TRUE)

compute_median(na.rm = TRUE)

fuse(separator, na_string = "NA")

select_max(max_col, use_abs = FALSE, keep_NA = FALSE)

select_min(min_col, use_abs = FALSE, keep_NA = FALSE)

select_match(match_col, search_col, separator, na_string = "NA")

select_exact(match_col, match, separator, na_string = "NA")

fuse_unique(
  separator,
  na_string = "NA",
  digits = 6,
  drop_na = FALSE,
  sort = FALSE
)

prioritise(match_col, priority, separator, no_match = NA, na_string = "NA")

nothing()

count_records()

select_grade(grade_col, keep_NA = FALSE, upper_case = TRUE)
}
\arguments{
\item{ties}{(logical) If TRUE then all records matching the tied groups
are returned. Otherwise the first record is returned.}

\item{na.rm}{(logical) If TRUE then NA is ignored}

\item{separator}{(character, NULL) if !NULL this string is used to collapse
matches with the same priority}

\item{na_string}{(character) NA values are replaced with this string}

\item{max_col}{(character) the column name to search for the maximum value.}

\item{use_abs}{(logical) If TRUE then the sign of the values is ignored.}

\item{keep_NA}{(logical) If TRUE keeps records with NA values}

\item{min_col}{(character) the column name to search for the minimum value.}

\item{match_col}{(character) the column with labels to prioritise}

\item{search_col}{(character) the name of a column to use as a reference for
locating values in the matching column.}

\item{match}{(character) a value to search for in the matching column.}

\item{digits}{(numeric) the number of digits to use when converting numerical
values to characters when determining if values are unique.}

\item{drop_na}{(logical) exclude NA from the list of unique entires}

\item{sort}{(logical) sort the values before collapsing.}

\item{priority}{(character) a list of labels in priority order}

\item{no_match}{(character, NULL) if !NULL  then annotations not matching
any of the priority labels are replaced with this value}

\item{grade_col}{(character) the name of a column containing grades}

\item{upper_case}{(logical) If TRUE then grades are compared to upper case
letters to determine their ordering, otherwise lower case.}
}
\value{
A function for use with \code{\link[=combine_records]{combine_records()}}
}
\description{
This page documents helper functions for use with \code{\link[=combine_records]{combine_records()}}.
}
\section{Functions}{
\itemize{
\item \code{compute_mode()}: returns the most common value,
excluding NA. If \code{ties == TRUE} then all tied
values are returned, otherwise the first value in
a sorted unique list is returned (equal to min if numeric).
If \code{na.rm = FALSE} then NA are included when searching for the modal value
and placed last if \code{ties = FALSE} (values are returned preferentially over
NA).

\item \code{compute_mean()}: calculates the mean value,
excluding NA if \code{na.rm = TRUE}

\item \code{compute_median()}: calculates the median value,
excluding NA if \code{na.rm = TRUE}

\item \code{fuse()}: collapses multiple matching
records into a single string using the provided separator.

\item \code{select_max()}: selects a record based on
the index of the maximum value in a another column.

\item \code{select_min()}: selects a record based on the
index of the minimum in a second column.

\item \code{select_match()}: returns all records based on
the indices of identical matches in a second column and collapses them
using the provided separator.

\item \code{select_exact()}: returns records based on
the index of identical value matching the \code{match} parameter within the
current column, and collapses them using the provided separator if necessary.

\item \code{fuse_unique()}: collapses a set of records to a
set of unique values using the provided separator. \code{digits} can be provided
for numeric columns to control the precision used when determining unique
values.

\item \code{prioritise()}: reduces a set of annotations by
prioritising values according to the input. If there are multiple matches
with the same priority then they are collapsed using a separator.

\item \code{nothing()}: a pass-through function to
allow some annotation table columns to remain unchanged.

\item \code{count_records()}: adds a new column indicating
the number of annotations that match the given grouping variable.

\item \code{select_grade()}: returns records based on the
index of the best grade in a second list. The best grade is defined as "A"
for \code{upper_case = TRUE} or "a" for \code{upper_case = FALSE}
and the worst grade is "Z" or "z". Any non-exact matches to a character in
\code{LETTERS} or \code{letters} are replaced with NA.

}}
\examples{

# Select matching records
M <- combine_records(
    group_by = "example",
    default_fcn = select_exact(
        match_col = "match_column",
        match = "find_me",
        separator = ", ",
        na_string = "NA"
    )
)

# Collapse unique values
M <- combine_records(
    group_by = "example",
    default_fcn = fuse_unique(
        digits = 6,
        separator = ", ",
        na_string = "NA",
        sort = FALSE
    )
)

# Prioritise by source
M <- combine_records(
    group_by = "InChiKey",
    default_fcn = prioritise(
        match_col = "source",
        priority = c("CD", "LS"),
        separator = "  || "
    )
)

# Do nothing to all columns
M <- combine_records(
    group_by = "InChiKey",
    default_fcn = nothing()
)

# Add a column with the number of records with a matching inchikey
M <- combine_records(
    group_by = "InChiKey",
    fcns = list(
        count = count_records()
    )
)

# Select annotation with highest (best) grade
M <- combine_records(
    group_by = "InChiKey",
    default_fcn = select_grade(
        grade_col = "grade",
        keep_NA = FALSE,
        upper_case = TRUE
    )
)
}
