## A script to make the metadata.csv file located in the inst/extdata directory.
## See ?ExperimentHubData::makeExperimentHubMetadata for a description of the
## metadata.csv file, expected fields and data types. This
## ExperimentHubData::makeExperimentHubMetadata() function can be used to
## validate the metadata.csv file before submitting the package.

## Create a data frame with the metadata
meta <- data.frame(
    Title = c(
        "exampleTENETMultiAssayExperiment",
        "exampleTENETClinicalDataFrame",
        "exampleTENETStep1MakeExternalDatasetsGRanges",
        paste0(
            "exampleTENETStep2GetDifferentiallyMethylatedSites",
            "PuritySummarizedExperiment"
        ),
        "exampleTENETPeakRegions",
        "exampleTENETTADRegions"
    ),
    Description = c(
        paste0(
            "A MultiAssayExperiment dataset created using a modified version ",
            "of the `TCGADownloader` function from the TENET package ",
            "utilizing TCGAbiolinks package functionality. This object ",
            "contains two SummarizedExperiment objects, `expression` and ",
            "`methylation`, with expression data for 11,637 genes annotated ",
            "to the GENCODE v36 dataset, including all 1,637 identified human ",
            "TF genes, and DNA methylation data for 20,000 probes from the ",
            "Illumina HM450 methylation array. The data are aligned to the ",
            "human hg38 genome. Expression and methylation ",
            "values were matched from 200 tumor and 42 adjacent ",
            "normal tissue samples subset from the TCGA BRCA dataset. ",
            "Additionally, results from running the TENET step 1-6 functions ",
            "on these samples are included in the metadata of this ",
            "MultiAssayExperiment object. Clinical data for these samples are ",
            "included in the colData of the MultiAssayExperiment object. ",
            "(A separate data frame object containing a subset of the ",
            "clinical data for these samples is available as ",
            "`exampleTENETClinicalDataFrame`.) This dataset is included ",
            "to demonstrate TENET functions. Note: Because this dataset is ",
            "a small subset of the overall BRCA dataset, results generated by ",
            "TENET from this dataset differ from those presented for the BRCA ",
            "dataset at large in TENET publications."
        ),
        paste0(
            "A data frame containing example and simulated clinical ",
            "information corresponding to the samples in the ",
            "`exampleTENETMultiAssayExperiment` object, used to demonstrate ",
            "how TENET functions can import clinical data from a specified ",
            "data frame. Clinical data are utilized by the ",
            "`step2GetDifferentiallyMethylatedSites`, ",
            "`step7TopGenesSurvival`, and ",
            "`step7ExpressionVsDNAMethylationScatterplots` functions. The ",
            "data frame consists of vital status and time variables for use ",
            "by the `step7TopGenesSurvival` function, simulated purity data ",
            "for each sample, and simulated copy number variation ",
            "(CNV) and somatic mutation (SM) data for the top 10 genes by ",
            "number of linked hypermethylated and hypomethylated probes ",
            "derived from analyses done using the ",
            "`exampleTENETMultiAssayExperiment` object. These data are a ",
            "subset of the clinical data contained in the colData of the ",
            "`exampleTENETMultiAssayExperiment` object."
        ),
        paste0(
            "A GenomicRanges dataset representing putative enhancer regions ",
            "relevant to BRCA, created using the ",
            "`step1MakeExternalDatasets` function in the TENET package ",
            "with the `consensusEnhancer`, `consensusNDR`, `publicEnhancer`, ",
            "`publicNDR`, and `ENCODEdELS` arguments all set to TRUE, and ",
            'the `cancerType` argument set to "BRCA". The data are aligned to ',
            "the human hg38 genome. This dataset is included to demonstrate ",
            "TENET's `step2GetDifferentiallyMethylatedSites` function."
        ),
        paste0(
            "A SummarizedExperiment object with three DNA methylation ",
            "datasets each composed of 10 adjacent normal colorectal ",
            "adenocarcinoma (COAD) samples from The Cancer Genome Atlas ",
            "(TCGA), retrieved using the TCGAbiolinks package. Each ",
            "dataset has data for 20,000 probes from the Illumina HM450 ",
            "methylation array, to match the number of probes in the ",
            "`exampleTENETMultiAssayExperiment` object. The data are ",
            "aligned to the human hg38 genome. This object is ",
            "representative of a `purity` dataset, which would contain DNA ",
            "methylation data from potentially confounding sources, used with ",
            "TENET's `step2GetDifferentiallyMethylatedSites` function."
        ),
        paste0(
            "A GenomicRanges dataset with example genomic regions (peaks) of ",
            "interest, used to demonstrate TENET's ",
            "`step7TopGenesUserPeakOverlap` function. The peaks are ",
            "derived from a ChIP-seq experiment on FOXA1 in MCF-7 cells and ",
            "aligned to the human hg38 genome. They were downloaded from ",
            "the ENCODE portal (file ENCFF112JVK in experiment ENCSR126YEB). ",
            "Citation: ENCODE Project Consortium; Moore JE, Purcaro MJ, ",
            "Pratt HE, et al. Expanded encyclopaedias of DNA elements in the ",
            "human and mouse genomes. Nature. 2020 Jul;583(7818):699-710. ",
            "doi: 10.1038/s41586-020-2493-4. Epub 2020 Jul 29. Erratum in: ",
            "Nature. 2022 May;605(7909):E3. PMID: 32728249; PMCID: PMC7410828."
        ),
        paste0(
            "A GenomicRanges dataset with example topologically associating ",
            "domains (TADs), used to demonstrate TENET's ",
            "`step7TopGenesTADTables` function. The TADs are derived from ",
            "T47D cells (mistakenly labeled as 'T470'), and aligned to the ",
            "human hg38 genome. They were downloaded from the 3D ",
            "Genome Browser at <http://3dgenome.fsm.northwestern.edu>. ",
            "Citation: Wang Y, Song F, Zhang B, et al. The 3D Genome ",
            "Browser: a web-based browser for visualizing 3D genome ",
            "organization and long-range chromatin interactions. Genome Biol. ",
            "2018 Oct 4;19(1):151. doi: 10.1186/s13059-018-1519-9. PMID: ",
            "30286773; PMCID: PMC6172833."
        )
    ),
    BiocVersion = c("3.22", "3.20", "3.20", "3.20", "3.20", "3.20"),
    Genome = c(
        "hg38", NA, "hg38", "hg38", "hg38", "hg38"
    ),
    SourceType = c(
        "Multiple", "Multiple", "Multiple", "Multiple", "BED", "BED"
    ),
    SourceUrl = c(
        "https://bioconductor.org/packages/release/bioc/html/TCGAbiolinks.html",
        "https://bioconductor.org/packages/release/bioc/html/TCGAbiolinks.html",
        "https://github.com/rhielab/TENET.AnnotationHub_files",
        "https://bioconductor.org/packages/release/bioc/html/TCGAbiolinks.html",
        paste0(
            "https://www.encodeproject.org/files/ENCFF112JVK/",
            "@@download/ENCFF112JVK.bed.gz"
        ),
        "http://3dgenome.fsm.northwestern.edu/downloads/hg38.TADs.zip"
    ),
    SourceVersion = c(NA, NA, NA, NA, NA, NA),
    Species = "Homo sapiens",
    TaxonomyId = 9606,
    Coordinate_1_based = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE),
    DataProvider = c(
        "TCGA",
        "Multiple",
        "Multiple",
        "TCGA",
        "ENCODE",
        "3D Genome Browser"
    ),
    Maintainer = (
        "Rhie Lab at the University of Southern California <rhielab@gmail.com>"
    ),
    RDataClass = c(
        "MultiAssayExperiment", "data.frame", "GRanges",
        "SummarizedExperiment", "GRanges", "GRanges"
    ),
    DispatchClass = "Rda",
    Location_Prefix = c(
        "https://zenodo.org/", NA, NA, NA, NA, NA
    ),
    RDataPath = c(
        "records/17403577/files/exampleTENETMultiAssayExperiment.Rda",
        "TENET.ExperimentHub/exampleTENETClinicalDataFrame.Rda",
        paste0(
            "TENET.ExperimentHub/",
            "exampleTENETStep1MakeExternalDatasetsGRanges.Rda"
        ),
        paste0(
            "TENET.ExperimentHub/",
            "exampleTENETStep2GetDifferentiallyMethylatedSites",
            "PuritySummarizedExperiment.Rda"
        ),
        "TENET.ExperimentHub/exampleTENETPeakRegions.Rda",
        "TENET.ExperimentHub/exampleTENETTADRegions.Rda"
    ),
    Tags = c(
        paste0(
            "TCGA:ExpressionData:DNAMethylationData:clinical:Survival:",
            "Homo_sapiens_Data:TENET"
        ),
        "TCGA:clinical:Survival:Homo_sapiens_Data:TENET",
        paste0(
            "ENCODE:GEO:TCGA:ChipSeq:DnaseSeq:H3K27ac:EpigenomeRoadMap:",
            "FANTOM5:peaks:Homo_sapiens_Data:TENET"
        ),
        "TCGA:DNAMethylationData:Homo_sapiens_Data:TENET",
        "ENCODE:FOXA1:ChIPSeqData:Homo_sapiens_Data:peaks:TENET",
        "TAD:Homo_sapiens_Data:TENET"
    )
)

## Create the inst/extdata directory if it doesn't exist
if (!dir.exists("inst/extdata")) {
    dir.create("inst/extdata")
}

## Write the metadata.csv file
write.csv(meta, file = "inst/extdata/metadata.csv", row.names = FALSE)
