% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tr2g.R
\name{tr2g_ensembl}
\alias{tr2g_ensembl}
\title{Get transcript and gene info from Ensembl}
\usage{
tr2g_ensembl(
  species,
  type = c("vertebrate", "metazoa", "plant", "fungus", "protist"),
  out_path = ".",
  write_tr2g = TRUE,
  other_attrs = NULL,
  use_gene_name = TRUE,
  use_transcript_version = TRUE,
  use_gene_version = TRUE,
  transcript_biotype_col = "transcript_biotype",
  gene_biotype_col = "gene_biotype",
  transcript_biotype_use = "all",
  gene_biotype_use = "all",
  chrs_only = TRUE,
  ensembl_version = NULL,
  overwrite = FALSE,
  verbose = TRUE,
  ...
)
}
\arguments{
\item{species}{Character vector of length 1, Latin name of the species of
interest.}

\item{type}{Character, must be one of "vertebrate", "metazoa", "plant",
"fungus" and "protist". Passing "vertebrate" will use the default
www.ensembl.org host. Gene annotation of some common invertebrate model
organisms, such as \emph{Drosophila melanogaster}, are available on www.ensembl.org
so for these invertebrate model organisms, "vertebrate" can be used for this
argument. Passing values other than "vertebrate" will use other Ensembl hosts.
For animals absent from www.ensembl.org, try "metazoa".}

\item{out_path}{Directory to save the outputs written to disk. If this
directory does not exist, then it will be created. Defaults to the current
working directory.}

\item{write_tr2g}{Logical, whether to write tr2g to disk. If \code{TRUE}, then
a file \code{tr2g.tsv} will be written into \code{out_path}.}

\item{other_attrs}{Character vector. Other attributes to get from Ensembl,
such as gene symbol and position on the genome.
Use \code{\link{listAttributes}} to see which attributes are available.}

\item{use_gene_name}{Logical, whether to get gene names.}

\item{use_transcript_version}{Logical, whether to include version number in
the Ensembl transcript ID. To decide whether to
include transcript version number, check whether version numbers are included
in the \code{transcripts.txt} in the \code{kallisto} output directory. If that file
includes version numbers, then trannscript version numbers must be included
here as well. If that file does not include version numbers, then transcript
version numbers must not be included here.}

\item{use_gene_version}{Logical, whether to include version number in the
Ensembl gene ID. Unlike transcript
version number, it's up to you whether to include gene version number.}

\item{transcript_biotype_col}{Character vector of length 1. Tag in
\code{attribute} field corresponding to \emph{transcript} biotype.}

\item{gene_biotype_col}{Character vector of length 1. Tag in \code{attribute}
field corresponding to \emph{gene} biotype.}

\item{transcript_biotype_use}{Character, can be "all" or
a vector of \emph{transcript} biotypes to be used. Transcript biotypes aren't
entirely the same as gene biotypes. For instance, in Ensembl annotation,
\code{retained_intron} is a transcript biotype, but not a gene biotype. If
"cellranger", then a warning will be given. See \code{data("ensembl_tx_biotypes")}
for all available transcript biotypes from Ensembl.}

\item{gene_biotype_use}{Character, can be "all", "cellranger", or
a vector of \emph{gene} biotypes to be used. If "cellranger", then the biotypes
used by Cell Ranger's reference are used. See \code{data("cellranger_biotypes")}
for gene biotypes the Cell Ranger reference uses. See
\code{data("ensembl_gene_biotypes")} for all available gene biotypes from Ensembl.
Note that gene biotypes and transcript biotypes are not always the same.}

\item{chrs_only}{Logical, whether to include chromosomes only, for GTF and
GFF files can contain annotations for scaffolds, which are not incorporated
into chromosomes. This will also exclude haplotypes. Defaults to \code{TRUE}.
Only applicable to species found in \code{genomeStyles()}.}

\item{ensembl_version}{Integer version number of Ensembl (e.g. 94 for the
October 2018 release). This argument defaults to \code{NULL}, which will use
the current release of Ensembl. Use
\code{\link{listEnsemblArchives}} to see the version number corresponding
to the Ensembl release of a particular date. The version specified here must
match the version of Ensembl where the transcriptome used to build the
kallisto index was downloaded. This only works for vertebrates and the most
common invertebrate model organisms like \emph{Drosophila melanogaster} and
\emph{C. elegans} (i.e. www.ensembl.org and its mirrors), not the other Ensembl
sites for plants, protists, fungi, and metazoa.}

\item{overwrite}{Logical, whether to overwrite if files with names of outputs
written to disk already exist.}

\item{verbose}{Whether to display progress.}

\item{\dots}{Othe arguments to be passed to \code{\link{useMart}},
such as mirror. Note that setting mirrors other than the default, e.g. uswest,
does not work for archived versions.}
}
\value{
A data frame with at least 2 columns: \code{gene} for gene ID,
\code{transcript} for transcript ID, and optionally \code{gene_name}
for gene names. If \code{other_attrs} has been specified, then those will
also be columns in the data frame returned.
}
\description{
This function queries Ensembl biomart to convert transcript IDs to gene IDs.
}
\examples{
tr2g <- tr2g_ensembl(species = "Danio rerio",
other_attrs = "description", write_tr2g = FALSE)
# This will use plants.ensembl.org as host instead of www.ensembl.org
tr2g <- tr2g_ensembl(species = "Arabidopsis thaliana", type = "plant",
  write_tr2g = FALSE)
}
\seealso{
dl_transcriptome

Other functions to retrieve transcript and gene info: 
\code{\link{sort_tr2g}()},
\code{\link{tr2g_EnsDb}()},
\code{\link{tr2g_TxDb}()},
\code{\link{tr2g_fasta}()},
\code{\link{tr2g_gff3}()},
\code{\link{tr2g_gtf}()},
\code{\link{transcript2gene}()}
}
\concept{functions to retrieve transcript and gene info}
