#' @title Mogrify a transcriptome using a set of variants
#' @description
#' Use a set of SNPs, insertions and deletions to modify a reference
#' transcriptome
#'
#' @details
#' Produce a set of variant modified transcript sequences from a standard
#' reference genome.
#' Supported variants are SNPs, Insertions and Deletions
#'
#' Ranges needing to be masked, such as the Y-chromosome, or Y-PAR can be
#' provided.
#'
#' **It should be noted that this is a time consuming process**
#' Inclusion of a large set of insertions and deletions across an entire
#' transcriptome can involve individually modifying many thousands of
#' transcripts, which can be a computationally demanding task.
#' Whilst this can be parallelised using an appropriate number of cores, this
#' may also prove taxing for lower power laptops, and pre-emptively closing
#' memory hungry programs such as Slack, or internet browers may be prudent.
#'
#' @param x Reference genome as either a DNAStringSet or BSgenome
#' @param var GRanges object containing the variants
#' @param exons GRanges object with ranges representing exons
#' @param alt_col Column from `var` containing alternate bases
#' @param trans_col Column from 'exons' containing the transcript_id
#' @param omit_ranges GRanges object containing ranges to omit, such as PAR-Y
#' regions, for example
#' @param tag Optional tag to add to all sequence names which were modified
#' @param sep Separator to place between seqnames names & tag
#' @param var_tags logical(1) Add tags indicating which type of variant were
#' incorporated, with 's', 'i' and 'd' representing SNPs, Insertions and
#' Deletions respectively
#' @param var_sep Separator between any previous tags and variant tags
#' @param ol_vars Error handling for any overlapping variants. See
#' [cleanVariants] for possible values and an explanation
#' @param verbose logical(1) Include informative messages, or operate silently
#' @param mc.cores Number of cores to be used when multi-threading via
#' [parallel::mclapply]
#' @param which GRanges object passed to [VariantAnnotation::ScanVcfParam] if
#' using a VCF directly
#' @param ... Passed to [parallel::mclapply]
#'
#' @return An XStringSet
#'
#' @examples
#' library(GenomicRanges)
#' library(GenomicFeatures)
#' seq <- DNAStringSet(c(chr1 = "ACGTAAATGG"))
#' exons <- GRanges(c("chr1:1-3:-", "chr1:7-9:-"))
#' exons$transcript_id <- c("trans1")
#'
#' # When using extractTranscriptSeqs -stranded exons need to be sorted by end
#' exons <- sort(exons, decreasing = TRUE, by = ~end)
#' exons
#' trByExon <- splitAsList(exons, exons$transcript_id)
#'
#' # Check the sequences
#' seq
#' extractTranscriptSeqs(seq, trByExon)
#'
#' # Define some variants
#' var <- GRanges(c("chr1:2", "chr1:8"))
#' var$ALT <- c("A", "GGG")
#'
#' # Include the variants adding tags to indicate a SNP and indel
#' # The exons GRanges object will be split by transcript internally
#' transmogrify(seq, var, exons, var_tags = TRUE)
#'
#'
#' @export
#' @name transmogrify
#' @rdname transmogrify-methods
setGeneric(
    "transmogrify", function(x, var, exons, ...) standardGeneric("transmogrify")
)
#' @importFrom S4Vectors mcols splitAsList
#' @importFrom GenomeInfoDb seqlevels seqnames
#' @importFrom IRanges width subsetByOverlaps
#' @importFrom GenomicFeatures extractTranscriptSeqs
#' @importFrom parallel mclapply
#' @importFrom stats as.formula
#' @export
#' @rdname transmogrify-methods
#' @aliases transmogrify-methods
setMethod(
    "transmogrify",
    signature = signature(x = "XStringSet", var = "GRanges", exons = "GRanges"),
    function(
        x, var, exons, alt_col = "ALT", trans_col = "transcript_id",
        omit_ranges = NULL, tag = NULL, sep = "_", var_tags = FALSE,
        var_sep = "_", ol_vars = "fail", verbose = TRUE, mc.cores = 1, ...
    ) {

        ## 1. Identify SNPs within 'var'
        ## 2. Idenfiy InDels within 'var'
        ## 3. Substitute the SNPs into the reference
        ## 4. Extract Transcripts
        ## 5. Substitute the InDels
        ## 6. Optionally tag sequence names
        ##    - Use a common tag + specific tags for SNPs/Insertions/Deletions

        ## Checks
        trans_col <- match.arg(trans_col, colnames(mcols(exons)))
        var <- cleanVariants(var, ol_vars, alt_col = alt_col)

        ## Separate into snps & indels
        var <- subset(var, seqnames %in% seqlevels(x))
        type <- varTypes(var, alt_col)
        snps <- var[type == "SNV"]
        indels <- var[type != "SNV"]

        ## Remove any unwanted transcripts
        if (!is.null(omit_ranges)) {
            stopifnot(is(omit_ranges, "GRanges"))
            omit <- mcols(subsetByOverlaps(exons, omit_ranges))[[trans_col]]
            exons <- exons[!mcols(exons)[[trans_col]] %in% omit]
        }
        ## Sort by strand. Exons cannot be unstranded
        strand <- strand(exons)
        if (any(strand == "*"))
            stop("Unstranded exons found. Exons must be stranded")
        exList <- splitAsList(exons, strand)
        exList[["+"]] <- sort(exList[["+"]])
        fm <- as.formula("~seqnames + end")
        exList[["-"]] <- sort(exList[["-"]], decreasing = TRUE, by = fm)
        exons <- unlist(exList)

        ## Find those with InDels as we only need to really work on these
        trans_with_any <- mcols(subsetByOverlaps(exons, var))[[trans_col]]
        trans_with_indel <- mcols(subsetByOverlaps(exons, indels))[[trans_col]]
        trans_with_indel <- unique(trans_with_indel)
        if (verbose) message(
            length(trans_with_indel), " transcripts found with indels"
        )

        ## Modify the genome & extract sequences including SNPs
        new_ref <- owl(x, snps, alt_col = alt_col)
        ex_by_trans <- splitAsList(exons, mcols(exons)[[trans_col]])
        all_seq <- extractTranscriptSeqs(new_ref, ex_by_trans)
        if (length(var) == 0) return(all_seq)

        ## Now modify the transcripts with InDels
        cl <- class(all_seq)
        new_trans_seq <- mclapply(
            trans_with_indel,
            function(id) {
                indelcator(
                    all_seq[[id]], indels, ex_by_trans[[id]], alt_col
                )
            }, mc.cores = mc.cores, ...
        )
        names(new_trans_seq) <- trans_with_indel
        new_trans_seq <- as(new_trans_seq, cl)
        all_seq[trans_with_indel] <- new_trans_seq[trans_with_indel]

        ## Add tags where needed
        tags <- varTags(ex_by_trans, var, tag, var_tags, var_sep, sep)
        names(all_seq) <- paste0(names(all_seq), tags)
        all_seq

    }
)
#' @import GenomicRanges
#' @importFrom Biostrings getSeq
#' @importFrom GenomeInfoDb seqnames
#' @export
#' @rdname transmogrify-methods
#' @aliases transmogrify-methods
setMethod(
    "transmogrify",
    signature = signature(x = "BSgenome", var = "GRanges", exons = "GRanges"),
    function(
        x, var, exons, alt_col = "ALT", trans_col = "transcript_id",
        omit_ranges = NULL, tag = NULL, sep = "_", var_tags = FALSE,
        var_sep = "_", ol_vars = "fail", verbose = TRUE, mc.cores = 1, ...
    ) {
        ## Setup the sequence info, only extracting those with a transcript
        seq_to_get <- unique(seqnames(exons))
        if (verbose) message(
            "Extracting ", length(seq_to_get),
            " sequences as a DNAStringSet...", appendLF = FALSE
        )
        x <- as(getSeq(x, seq_to_get), "DNAStringSet")
        names(x) <- seq_to_get
        if (verbose) message("done")
        transmogrify(
            x, var, exons, alt_col, trans_col, omit_ranges, tag, sep, var_tags,
            var_sep, ol_vars, verbose, mc.cores, ...
        )
    }
)
#' @importClassesFrom VariantAnnotation VcfFile
#' @importFrom Biostrings getSeq
#' @importFrom GenomeInfoDb seqnames
#' @export
#' @rdname transmogrify-methods
#' @aliases transmogrify-methods
setMethod(
    "transmogrify",
    signature = signature(x = "BSgenome", var = "VcfFile", exons = "GRanges"),
    function(
        x, var, exons, alt_col = "ALT", trans_col = "transcript_id",
        omit_ranges = NULL, tag = NULL, sep = "_", var_tags = FALSE,
        var_sep = "_", ol_vars = "fail", verbose = TRUE, mc.cores = 1, which,
        ...
    ) {
        var <- .parseVariants(var, alt_col, which)
        if (verbose) message("Loaded ", length(var), " variants")
        transmogrify(
            x, var, exons, alt_col, trans_col, omit_ranges, tag, sep, var_tags,
            var_sep, ol_vars, verbose, mc.cores, ...
        )
    }
)
#' @importClassesFrom VariantAnnotation VcfFile
#'
#' @export
#' @rdname transmogrify-methods
#' @aliases transmogrify-methods
setMethod(
    "transmogrify",
    signature = signature(x = "XStringSet", var = "VcfFile", exons = "GRanges"),
    function(
        x, var, exons, alt_col = "ALT", trans_col = "transcript_id",
        omit_ranges = NULL, tag = NULL, sep = "_", var_tags = FALSE,
        var_sep = "_", ol_vars = "fail", verbose = TRUE, mc.cores = 1, which,
        ...
    ) {
        var <- .parseVariants(var, alt_col, which)
        transmogrify(
            x, var, exons, alt_col, trans_col, omit_ranges, tag, sep, var_tags,
            var_sep, ol_vars, verbose, mc.cores, ...
        )
    }
)
