% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/processStudy_internal.R
\encoding{UTF-8}
\name{runWrapperAncestry}
\alias{runWrapperAncestry}
\title{Run most steps leading to the ancestry inference call
on a specific profile (RNA or DNA)}
\usage{
runWrapperAncestry(
  pedStudy,
  studyDF,
  pathProfileGDS,
  pathGeno,
  pathOut,
  fileReferenceGDS,
  fileReferenceAnnotGDS,
  chrInfo,
  syntheticRefDF,
  genoSource = c("snp-pileup", "generic", "VCF"),
  studyType = c("DNA", "RNA"),
  np = 1L,
  blockTypeID = NULL,
  verbose = FALSE
)
}
\arguments{
\item{pedStudy}{a \code{data.frame} with those mandatory columns: "Name.ID",
"Case.ID", "Sample.Type", "Diagnosis", "Source". All columns must be in
\code{character} strings (no factor). The \code{data.frame}
must contain the information for all the samples passed in the
\code{listSamples} parameter. Only \code{filePedRDS} or \code{pedStudy}
can be defined.}

\item{studyDF}{a \code{data.frame} containing the information about the
study associated to the analysed sample(s). The \code{data.frame} must have
those 3 columns: "study.id", "study.desc", "study.platform". All columns
must be in \code{character} strings (no factor).}

\item{pathProfileGDS}{a \code{character} string representing the path to
the directory where the GDS Profile files will be created.
Default: \code{NULL}.}

\item{pathGeno}{a \code{character} string representing the path to the
directory containing the VCF output of SNP-pileup for each sample. The
SNP-pileup files must be compressed (gz files) and have the name identifiers
of the samples. A sample with "Name.ID" identifier would have an
associated file called
if genoSource is "VCF", then "Name.ID.vcf.gz",
if genoSource is "generic", then "Name.ID.generic.txt.gz"
if genoSource is "snp-pileup", then "Name.ID.txt.gz".}

\item{pathOut}{a \code{character} string representing the path to
the directory where the output files are created.}

\item{fileReferenceGDS}{a \code{character} string representing the file
name of the Reference GDS file. The file must exist.}

\item{fileReferenceAnnotGDS}{a \code{character} string representing the
file name of the Reference GDS Annotation file. The file must exist.}

\item{chrInfo}{a \code{vector} of positive \code{integer} values
representing the length of the chromosomes. See 'details' section.}

\item{syntheticRefDF}{a \code{data.frame} containing a subset of
reference profiles for each sub-population present in the Reference GDS
file. The \code{data.frame} must have those columns:
\describe{
\item{sample.id}{ a \code{character} string representing the sample
identifier. }
\item{pop.group}{ a \code{character} string representing the
subcontinental population assigned to the sample. }
\item{superPop}{ a \code{character} string representing the
super-population assigned to the sample. }
}}

\item{genoSource}{a \code{character} string with two possible values:
'snp-pileup', 'generic' or 'VCF'. It specifies if the genotype files
are generated by snp-pileup (Facets) or are a generic format CSV file
with at least those columns:
'Chromosome', 'Position', 'Ref', 'Alt', 'Count', 'File1R' and 'File1A'.
The 'Count' is the depth at the specified position;
'FileR' is the depth of the reference allele and
'File1A' is the depth of the specific alternative allele.
Finally the file can be a VCF file with at least those genotype
fields: GT, AD, DP.}

\item{studyType}{a \code{character} string representing the type of study.
The possible choices are: "DNA" and "RNA". The type of study affects the
way the estimation of the allelic fraction is done. Default: \code{"DNA"}.}

\item{np}{a single positive \code{integer} specifying the number of
threads to be used. Default: \code{1L}.}

\item{blockTypeID}{a \code{character} string corresponding to the block
type used to extract the block identifiers. The block type must be
present in the GDS Reference Annotation file.}

\item{verbose}{a \code{logical} indicating if messages should be printed
to show how the different steps in the function. Default: \code{FALSE}.}
}
\value{
The integer \code{0L} when successful. See details section for
more information about the generated output files.
}
\description{
This function runs most steps leading to the ancestry inference
call on a specific profile. First, the function creates the Profile GDS file
for the specific profile using the information from a RDS Sample
description file and the Population reference GDS file.
}
\details{
The runWrapperAncestry() function generates 3 types of files
in the \code{pathOut} directory.
\describe{
\item{Ancestry Inference}{ The ancestry inference CSV file
(".Ancestry.csv" file)}
\item{Inference Informaton}{ The inference information RDS file
(".infoCall.rds" file)}
\item{Synthetic Information}{ The parameter information RDS files
from the synthetic inference ("KNN.synt.*.rds" files in a sub-directory)}
}

In addition, a sub-directory (named using the profile ID) is
also created.
}
\examples{

## Required library for GDS
library(SNPRelate)

## Path to the demo 1KG GDS file is located in this package
dataDir <- system.file("extdata", package="RAIDS")

#################################################################
## Load the information about the profile
#################################################################
data(demoPedigreeEx1)
head(demoPedigreeEx1)

#################################################################
## The 1KG GDS file and the 1KG SNV Annotation GDS file
## need to be located in the same directory
## Note that the 1KG GDS file used for this example is a
## simplified version and CANNOT be used for any real analysis
#################################################################
path1KG <- file.path(dataDir, "tests")

fileReferenceGDS  <- file.path(path1KG, "ex1_good_small_1KG.gds")
fileAnnotGDS <- file.path(path1KG, "ex1_good_small_1KG_Annot.gds")

#################################################################
## The Sample SNP pileup files (one per sample) need
## to be located in the same directory.
#################################################################
pathGeno <- file.path(dataDir, "example", "snpPileup")

#################################################################
## The path where the Profile GDS Files (one per sample)
## will be created need to be specified.
#################################################################
pathProfileGDS <- file.path(tempdir(), "out.tmp")

pathOut <- file.path(tempdir(), "res.out")

#################################################################
## A data frame containing general information about the study
## is also required. The data frame must have
## those 3 columns: "studyID", "study.desc", "study.platform"
#################################################################
studyDF <- data.frame(study.id="MYDATA",
                        study.desc="Description",
                        study.platform="PLATFORM",
                        stringsAsFactors=FALSE)

####################################################################
## Fix seed to ensure reproducible results
####################################################################
set.seed(3043)

gds1KG <- snpgdsOpen(fileReferenceGDS)
dataRef <- select1KGPop(gds1KG, nbProfiles=2L)
closefn.gds(gds1KG)

## Required library for this example to run correctly
if (requireNamespace("Seqinfo", quietly=TRUE) &&
     requireNamespace("BSgenome.Hsapiens.UCSC.hg38", quietly=TRUE)) {

    ## Chromosome length information
    ## chr23 is chrX, chr24 is chrY and chrM is 25
    chrInfo <- Seqinfo::seqlengths(BSgenome.Hsapiens.UCSC.hg38::Hsapiens)[1:25]

    \dontrun{

        RAIDS:::runWrapperAncestry(pedStudy=demoPedigreeEx1, studyDF=studyDF,
            pathProfileGDS=pathProfileGDS,
            pathGeno=pathGeno, pathOut=pathOut,
            fileReferenceGDS=fileReferenceGDS,
            fileReferenceAnnotGDS=fileAnnotGDS,
            chrInfo=chrInfo, syntheticRefDF=dataRef,
            studyType="DNA", genoSource="snp-pileup")

        unlink(pathProfileGDS, recursive=TRUE, force=TRUE)
        unlink(pathOut, recursive=TRUE, force=TRUE)

    }
}

}
\references{
Galinsky KJ, Bhatia G, Loh PR, Georgiev S, Mukherjee S, Patterson NJ,
Price AL. Fast Principal-Component Analysis Reveals Convergent Evolution
of ADH1B in Europe and East Asia. Am J Hum Genet. 2016 Mar 3;98(3):456-72.
doi: 10.1016/j.ajhg.2015.12.022. Epub 2016 Feb 25.
}
\author{
Pascal Belleau, Astrid Deschênes and Alexander Krasnitz
}
\keyword{internal}
