#' Summarise the distribution of sRNA clusters across genomic features
#'
#' @description Calculates the number/proportion of genomic features within the
#' supplied annotations and calculates the number/proportion of sRNA clusters
#' which overlap with these genomic features.
#'
#' @details
#' `RNAfeatures` calculates the number/percentage of sRNA clusters which
#' overlap with genomic features based on their genomic coordinates.
#'
#'
#' @seealso [mobileRNA::RNAmergeAnnotations()] to merge 2 GFF files into 1.
#'
#' @param data data.frame; generated by [mobileRNA::RNAimport()]
#'
#' @param annotation path; URL or connection to a GFFFile object. A genome
#' reference annotation file (.gff/.gff1/.gff2/.gff3). Can be in compressed
#' format (gzip).
#'
#' @param repeats path; URL or connection to a GFFFile object. A genome
#' reference annotation file, which only contains information on repeat
#' sequences in the genome (.gff/.gff1/.gff2/.gff3). By default, this is not
#' required, however if there is a specific repeats annotation file for the
#' genome it is suggested to supply it. Can be in compressed format (gzip).
#'
#' @param promoterRegions numeric; defines the upstream promoter region of
#' genes. Default is 1000, which refers to promoters set at 1Kb upstream of
#' genes
#'
#'@param repeat.type character; features type in `annotation` file to represent
#'repeats when `repeats` not supplied.
#'
#'
#'@param rmFeatures vector; type of genomic features to not consider such as
#'"gene" or "exon" etc.
#'
#' @return Returns a table containing the number and percentage of overlaps in
#' the supplied sRNA data set with genomic features within supplied annotation
#' and/or with repeats.
#'
#' @examples
#' data("sRNA_data")
#' features <- RNAfeatures(data = sRNA_data,
#'                        annotation = system.file("extdata",
#'                        "reduced_chr2_Tomato.gff.gz", package="mobileRNA"))
#'
#'@importFrom rtracklayer import
#'@importFrom GenomicRanges findOverlaps
#'@importFrom dplyr select
#'@importFrom dplyr mutate
#'@importFrom dplyr filter
#'@importFrom GenomicRanges makeGRangesFromDataFrame
#'@importFrom GenomicRanges GRangesList
#'@importFrom dplyr %>%
#'@importFrom GenomicRanges promoters
#'@importFrom dplyr inner_join
#'@importFrom GenomicRanges mcols
#'@importFrom S4Vectors queryHits
#'@importFrom dplyr rename
#'@export
RNAfeatures <- function(data, annotation,
                        repeats = NULL,
                        repeat.type = NULL,
                        promoterRegions = 1000,
                        rmFeatures =NULL) {
  if (base::missing(data)) {
    stop("data is missing. data must be an object of class matrix, data.frame,
         DataFrame. ")
  }
  if (!base::inherits(data, c("matrix", "data.frame", "DataFrame"))) {
    stop("data must be an object of class matrix, data.frame, DataFrame.")
  }
  if (missing(annotation) || is.null(annotation) || annotation == "" ||
      !file.exists(annotation)) {
    stop("annotation parameter is missing or empty.")
  }

  annotation_info <-rtracklayer::import(annotation)

  # extract types of genomic features
  if(is.null(rmFeatures) == FALSE){
    annotation_info <- annotation_info[annotation_info$type != rmFeatures, ]
  }

  #make promoter regions
  # if genes presents, then make promorer
  features_genes <- as.character(annotation_info$type)
  if("gene" %in% features_genes){
  promoters <- annotation_info[annotation_info$type %in% "gene", ]
  promoters <- GenomicRanges::promoters(promoters,
                                        upstream = promoterRegions, downstream = 0)
  promoters$type <- "upstream promoter"

  dw_reg <- annotation_info[annotation_info$type %in% "gene", ]
  dw_reg <- GenomicRanges::promoters(dw_reg, upstream = 0,
                                     downstream = promoterRegions)
  dw_reg$type <- "downstream promoter"
  }


  # if repeats present then add to store
  if(!is.null(repeats)){
    repeats <- rtracklayer::import(repeats)
    repeats$type <- "repeats"
  } else
    if(is.null(repeats) && !is.null(repeat.type)){
      repeats <-subset(annotation_info, type==repeat.type)
      repeats$type <- "repeats"
    }

  # combine
  allfeatures <- c(annotation_info,promoters, dw_reg, repeats )

  # convert to grange
  data_gr <- GenomicRanges::makeGRangesFromDataFrame(data,
                                                     keep.extra.columns = T)

  # find overlaps
  overlaps <- suppressWarnings(GenomicRanges::findOverlaps(allfeatures, data_gr))

  # Subset the 'features' GRanges to get only overlapping regions
  overlapping_features <- allfeatures[S4Vectors::queryHits(overlaps)]

  # Extract the 'type' column from the metadata
  feature_types <- GenomicRanges::mcols(overlapping_features)$type


  # Count the occurrences of each feature type
  type_counts <- as.data.frame(table(feature_types))%>%
    dplyr::mutate(`Dataset Proportion (%)` = (Freq / sum(Freq))*100 )%>%
    dplyr::rename("Genomic feature" = feature_types)%>%
    dplyr::mutate("Genomic feature" = gsub("[-_.]", " ", `Genomic feature`))%>%
    dplyr::rename(Dataset = Freq)


  feature_counts <- as.data.frame(table(allfeatures$type))%>%
    dplyr::mutate(`Genome Proportion (%)` = (Freq / sum(Freq))*100 )%>%
    dplyr::rename("Genomic feature" = Var1)%>%
    dplyr::mutate("Genomic feature" = gsub("[-_.]", " ", `Genomic feature`))%>%
    dplyr::rename(Genome = Freq)


  res <- dplyr::inner_join(feature_counts, type_counts, by = "Genomic feature")

    return(res)

}
