% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/BLAST.R
\name{blast}
\alias{blast}
\alias{BLAST}
\alias{blast_help}
\alias{print.BLAST}
\alias{predict.BLAST}
\alias{has_blast}
\title{Basic Local Alignment Search Tool (BLAST)}
\usage{
blast(db = NULL, remote = FALSE, type = "blastn")

blast_help(type = "blastn")

\method{print}{BLAST}(x, info = TRUE, ...)

\method{predict}{BLAST}(
  object,
  newdata,
  BLAST_args = "",
  custom_format = "",
  verbose = FALSE,
  keep_tmp = FALSE,
  ...
)

has_blast()
}
\arguments{
\item{db}{the database file to be searched (without file extension).}

\item{remote}{logical execute the query remotely on the NCBI server. \code{db}
needs to be the name of a database available in the server.}

\item{type}{BLAST program to use (e.g., \code{blastn}, \code{blastp}, \code{blastx}).}

\item{info}{show additional data base information.}

\item{...}{additional arguments are ignored.}

\item{object, x}{An open BLAST database as a BLAST object created
with \code{\link[=blast]{blast()}}.}

\item{newdata}{the query as an object of class \link[Biostrings:XStringSet-class]{Biostrings::XStringSet}.}

\item{BLAST_args}{additional arguments in command-line style.}

\item{custom_format}{custom format specified by space delimited format
specifiers.}

\item{verbose}{logical; print progress and debugging information.}

\item{keep_tmp}{logical; keep temporary files for debugging.}
}
\value{
\itemize{
\item \code{blast()} returns a BLAST database object which can be used for
queries (via \code{predict}).
\item \code{predict} returns a data.frame containing
the BLAST results.
\item \code{has_blast()} returns \code{TRUE} if the blast software installation can be
found and \code{FALSE} otherwise.
}
}
\description{
Open a BLAST database and execute blastn (blastp or blastx)
from blast+ to find sequences matches.
}
\section{Installing BLAST+}{
The BLAST+ software needs to be installed on your system. Installation
instructions are available in this package's
\href{https://github.com/mhahsler/rBLAST/blob/devel/INSTALL}{INSTALL} file and
at \url{https://www.ncbi.nlm.nih.gov/books/NBK569861/}.

R needs to be able to find the executable. After installing the software,
try in R

\if{html}{\out{<div class="sourceCode">}}\preformatted{Sys.which("blastn")
}\if{html}{\out{</div>}}

If the command returns "" instead of the path to the executable,
then you need to set the environment variable called PATH. In R

\if{html}{\out{<div class="sourceCode">}}\preformatted{Sys.setenv(PATH = paste(Sys.getenv("PATH"),
   "path_to_your_BLAST_installation", sep=.Platform$path.sep))
}\if{html}{\out{</div>}}
}

\section{BLAST Databases}{
You will also need a database. NCBI BLAST databases are updated daily and
may be downloaded via FTP from \url{https://ftp.ncbi.nlm.nih.gov/blast/db/}.
See \code{\link[=blast_db_cache]{blast_db_cache()}} on how to manage a local cache of database files.

BLAST databases are a set of database files with different extensions.
All files start with the same database name. For example,
\verb{16S_ribosomal_RNA.tar.gz} contains
files starting with \verb{16S_ribosomal_RNA} which is the database name used
for calling \code{blast()}.

Large databases are separated into several archives numbered \code{00}, \code{01}, etc.
Download all archives and extract the files in the same directory.
All files will have a common name which is the database name used for calling
\code{blast()}.
}

\examples{
## check if blastn is correctly installed. Should return the path to the
##   executable
Sys.which("blastn")

## only run if blast is installed
if (has_blast()) {
    ## check version you should have version 1.8.1+
    system2("blastn", "-version")

    ## download the latest version of the 16S Microbial
    ##  rRNA data base from NCBI using the local chache
    tgz_file <- blast_db_get("16S_ribosomal_RNA.tar.gz")

    ## extract the database files
    untar(tgz_file, exdir = "./16S_rRNA_DB")

    ## Note the database file can also downloaded without using a
    ##    cache using download.file
    # download.file(paste("https://ftp.ncbi.nlm.nih.gov/blast/db",
    #    "16S_ribosomal_RNA.tar.gz", sep = "/"),
    #    "16S_ribosomal_RNA.tar.gz", mode = "wb")
    # untar("16S_ribosomal_RNA.tar.gz", exdir = "./16S_rRNA_DB")

    ## A BLAST database is just a set of files. It is a good idea to
    ## organize the files in a directory.
    list.files("./16S_rRNA_DB")

    ## load a BLAST database (replace db with the location + name of
    ##   the BLAST DB without the extension)
    bl <- blast(db = "./16S_rRNA_DB/16S_ribosomal_RNA")
    bl

    ## read a single example sequence to BLAST
    seq <- readRNAStringSet(system.file("examples/RNA_example.fasta",
        package = "rBLAST"
    ))[1]
    seq

    ## query a sequence using BLAST
    cl <- predict(bl, seq)
    cl[1:5, ]

    ## Pass on BLAST arguments (99\% identity) and use a custom format
    ## (see BLAST documentation)
    fmt <- paste(
        "qaccver saccver pident length mismatch gapopen qstart qend",
        "sstart send evalue bitscore qseq sseq"
    )
    cl <- predict(bl, seq,
        BLAST_args = "-perc_identity 99",
        custom_format = fmt
    )
    cl

    ## cleanup the example: delete the database files
    unlink("./16S_rRNA_DB", recursive = TRUE)
}
}
\references{
BLAST Help - BLAST+ Executable:
https://blast.ncbi.nlm.nih.gov/doc/blast-help/downloadblastdata.html

BLAST Command Line Applications User Manual,
https://www.ncbi.nlm.nih.gov/books/NBK279690/
}
\seealso{
Other blast: 
\code{\link{blast_db_cache}()},
\code{\link{makeblastdb}()}
}
\author{
Michael Hahsler
}
\concept{blast}
\keyword{model}
