## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  dev = "ragg_png",
  fig.ext = "png",
  dpi = 100,
  fig.width = 7,
  fig.height = 5,
  fig.crop = FALSE
)

## ----setup package loading, warning=FALSE-------------------------------------
library(PostChicago)

## ----Directory structure------------------------------------------------------
extdata <- system.file("extdata", package = "PostChicago")
dir(extdata)

path <- file.path(tempdir(), "my_package_output")
dir.create(path, showWarnings = FALSE)

#Before we run PostChicago, we define the default directories first:
dataPath <- file.path(extdata, "dataPath")

designDir <- file.path(extdata, "designDir")
intDir <- file.path(extdata, "intervals")

outputDir <- file.path(path, "postchicago")
dir.create(outputDir)

chicagoOutputDir <- file.path(extdata, "results")

## checking which .chinput files we have stored:
dir(dataPath)

## Check if you have already stored the chicagoData tables:
dir(chicagoOutputDir)

## ----set Chicago settings-----------------------------------------------------
weightsPath <- file.path(
  system.file("extdata", package = "Chicago"),
  "weights"
)
weightSettings <- file.path(weightsPath, "mESC-2reps.settings")
weightSettings <- read.delim(weightSettings, header = FALSE)

mySettings <- Chicago::defaultSettings()
mySettings[grep("weight", names(mySettings))] <- weightSettings[, 2]
mySettings$minFragLen <- 100

## ----data frame for runChicago------------------------------------------------
fls <- list.files(dataPath)
fls
    
df <- data.frame(
    filename = fls,
    RA = stringr::str_remove(fls, "_rep.*"),
    rep = stringr::str_remove(
        stringr::str_remove(fls, "^.*h_"), "\\.chinput$"
    )
)

df$type <- df$RA

head(df)

## ----Run Chicago, results='hide', message=FALSE, warning=FALSE----------------
runChicagoForPostChicago(
    df = df, mySettings = mySettings, outputDir = chicagoOutputDir,
    DataPath = dataPath, DesignDir = designDir
)

## ----Run Chicago on reps, echo=TRUE, message=FALSE, warning=FALSE-------------
df$type <- as.character(paste(df$RA, df$rep, sep = "_"))

head(df)

runChicagoForPostChicago(
    df = df, mySettings = mySettings, 
    outputDir = chicagoOutputDir,
    DataPath = dataPath, DesignDir = designDir
)


## ----read baited_genes and rmapgr, warning=FALSE------------------------------
baited_genes <- baitmap2baited_genes(designDir, save = FALSE)
rmapgr <- rmap2rmapgr(designDir, save = FALSE)

## ----Read chicagoData---------------------------------------------------------
dir(chicagoOutputDir)
cdlist <- loadCdList(
    resultsDir = chicagoOutputDir,
    baited_genes = baited_genes
)
names(cdlist)

## ----Extract L and LL---------------------------------------------------------
pooledSamples <- cdlist[-grep("rep", names(cdlist))]
replicateSamples <- cdlist[grep("rep", names(cdlist))]
rm(cdlist)

## ----plotSigIntsStats, fig.width=9, fig.height=9,warning=FALSE, fig.align='center', fig.cap=paste0('Basic statistics of Capture-C data.')----
plotSigIntsStats(pooledSamples, plotDistribution = FALSE, 
    plotExamples = TRUE, col = palette.colors(3)[2:3])

## ----plotInteractionsSimple, fig.width=5, fig.height=4, fig.small=TRUE, fig.cap='Capture-C signal +/- 200kb around the Hoxb3 TSS.'----
col <- palette.colors(3)[2:3]
name <- "Hoxb3"
id <- baited_genes[baited_genes$genename == name]$re_id
k <- 15
ylim <- c(0, 200)
plotInteractions(pooledSamples, id, k,
    ylim = ylim, show.legend = TRUE, name = name,
    rmapgr = rmapgr, col = col
)

## ----plotInteractionsReplicates, fig.width=5, fig.height=4, fig.small=TRUE, fig.cap='Replicate Capture-C +/- 500kb around the Hoxb3 TSS.'----
colreps <- rep(col, each = 2)
lty <- rep(1:2, 2)
zoom <- 500000
k <- 41
plotInteractions(replicateSamples, id, k,
    zoom = zoom, ylim = ylim, 
    show.legend = TRUE, name = name,
    rmapgr = rmapgr, col = colreps, lty = lty
)

## ----plotInteractionsLegendOutside, fig.width=8, fig.height=4, fig.cap= 'Capture-C signal within a set interval.'----
par(mfrow = c(1, 2))
xlim <- c(96150000, 96350000)
zoom <- as.numeric(NA)
k <- 31
plotInteractions(pooledSamples, id,
    zoom = zoom, k = k, ylim = ylim, 
    show.legend = FALSE, name = name,
    rmapgr = rmapgr, col = col, 
    show.legend.outside = TRUE, xlim = xlim
)

## ----read in annotation intervals, warning=FALSE------------------------------
dir(intDir)
grl <- loadGrl(intDir = intDir)

## ----plotInteractionsWithIntervals, fig.width=9,fig.height=4, fig.cap=paste0('Capture-C signal and ChIP-Seq intervals around Hoxb3. H3K27ac/H3K27me3 peaks are shown in blue/green')----
colintervals <- palette.colors(5)[4:5]

zoom <- 1000000
k <- 51

par(mfrow = c(1, 2))
plotInteractions(pooledSamples, id, k,
    zoom = zoom, ylim = ylim, show.legend = FALSE,
    show.legend.intervals = FALSE, name = name,
    intervals = grl, colintervals = colintervals,
    rmapgr = rmapgr, col = col, show.legend.outside = TRUE
)

## ----Create Bedgraphs, message=FALSE, results='hide'--------------------------
## define the bait of interest:
bait <- unique(pooledSamples[[1]]$baitID)[1] 

## create bedgraphs for the selected view point:
makeBedGraphs(bait, pooledSamples, rmapgr,
    baited_genes = baited_genes,
    outputDir = paste0(outputDir, "/bedgraphs")
)

## ----Create Ints file, message=FALSE------------------------------------------
ints <- makeIntsTable(pooledSamples, baited_genes,
    repscores = TRUE, LL = replicateSamples,
    ngroups = 1, outfolder = outputDir
)

## ----annotateInts, warning=FALSE----------------------------------------------
ints <- annotateInts(ints = ints, grl = grl)

## ----QCHeatmaps, message=FALSE,fig.width=12, fig.height=10, fig.cap= 'QC heatmaps'----
makeQCplots(ints, outputDir, fontsize=8)

## ----lineplotsWithInteractions, fig.width=8, fig.height=4, fig.cap= 'Visualization of significant interactions within lineplots. Significant interactions around Hoxb3 are shaded.'----
name <- "Hoxb3"
id <- baited_genes[baited_genes$genename == name]$re_id
k <- 31
zoom <- 500000
ylim <- c(0, 100)

par(mfrow = c(1, 2))
plotInteractions(pooledSamples, id, k, zoom,
    ylim = ylim, show.legend = FALSE,
    show.legend.intervals = FALSE, name = name,
    intervals = grl, colintervals = colintervals,
    rmapgr = rmapgr, col = col,
    show.legend.outside = TRUE, d = ints
)

## ----lineplotsWithInteractionsClusters, fig.width=8, fig.height=4, fig.cap= 'Significant interactions colored by interaction type.'----
## select interactions detected only at 0h, these are 'lost'
lost <- ints[ints$RA_0h_score >= 5 & ints$RA_72h_score < 3, ]$intID

## select interactions detected only at 72h, these are 'gained'
gained <- ints[ints$RA_0h_score < 3 & ints$RA_72h_score >= 5, ]$intID

## annotate the interaction table with interaction types:
ints$clusters_refined <- "both"
ints[ints$intID %in% lost, ]$clusters_refined <- "RA_0h"
ints[ints$intID %in% gained, ]$clusters_refined <- "RA_72h"

par(mfrow = c(1, 2))
plotInteractions(pooledSamples, id, k, zoom,
    ylim = ylim, show.legend = FALSE,
    show.legend.intervals = FALSE,
    name = name, intervals = grl,
    colintervals = colintervals,
    rmapgr = rmapgr, col = col,
    show.legend.outside = TRUE, d = ints
)

## ----makeOneGeneOnePeak, warning=FALSE, message=FALSE, fig.width=8, fig.height=6----
quantfolder <- paste0(outputDir, "/oneGeneOnePeak")
ogoplist <- makeOneGeneOnePeak(
    grl = grl, rmapgr = rmapgr, 
    ints = ints, folder = quantfolder
)

##look at the oneGeneOnePeak table:
ogoplist[[1]][1:2,]

##The tables are also saved in the quantfolder:
list.files(path = quantfolder, pattern = "oneGeneOnePeak")

## ----examplesOneGeneOnePeak, fig.width=8, fig.height=4, fig.cap= 'Visualization of oneGeneOnePeak interactions. Interactions with H3K27me3 and H3K27ac are shaded.'----
fls_ogop <- list.files(path = quantfolder, pattern = "oneGeneOnePeak")

## Load oneGeneOnePeak file for H3K27me3 peaks:
ogop_K27me3 <- read.delim(paste(quantfolder, 
                                fls_ogop[grep("K27me3", fls_ogop)],sep = "/"), 
                          stringsAsFactors = FALSE)
ogop_K27me3$clusters_refined <- "K27me3_interaction"

## Load oneGeneOnePeak file for H3K27ac peaks:
ogop_K27ac <- read.delim(
    paste(quantfolder, fls_ogop[grep("K27ac", fls_ogop)],sep = "/"), 
    stringsAsFactors = FALSE
)
ogop_K27ac$clusters_refined <- "K27ac_interaction"

par(mfrow = c(1, 2))

## K27ac-overlapping interactions
plotInteractions(pooledSamples, id, k, zoom = zoom,
    ylim = ylim, show.legend = TRUE,
    name = paste("K27ac interactions \n", sep = ","),
    intervals = grl, colintervals = colintervals,
    rmapgr = rmapgr, col = col, d = ogop_K27ac,
    colints = colintervals[1]
)

## K27me3-overlapping interactions
plotInteractions(pooledSamples, id, k, zoom = zoom,
  ylim = ylim, show.legend = TRUE,
  name = paste("K27me3 interactions \n", sep = ","),
  intervals = grl, rmapgr = rmapgr, col = col,
  colintervals = colintervals, d = ogop_K27me3,
  colints = colintervals[2]
)


## ----CreateInteractionMatrices, message=FALSE, warning=FALSE------------------
name <- names(grl)[grep("K27ac", names(grl))]
name

## Zoom defines the +/- distance (in restriction fragments) around the intervals
## for which the matrices will be created
zoom <- 100

## Folder in which the matrices will be stored:
resfolder <- paste0(outputDir, "/matrices/")
if (!file.exists(resfolder)) {
    dir.create(resfolder)
}

## un-normalized matrix with reads
m_reads <- getMatrix(pooledSamples,
    zoom = zoom, d = ogop_K27ac, resfolder = resfolder, type = "reads",
    norm = FALSE, name = name, rmapgr = rmapgr
)

## matrix normalized to the center of interactions
## in sample 1 (as defined per normsam) with reads
m_reads_norm <- getMatrix(pooledSamples,
    zoom = zoom, d = ogop_K27ac, resfolder = resfolder, type = "reads",
    norm = TRUE, name = name, rmapgr = rmapgr, normsam = 1
)

## matrix normalized to the center of interactions
## in sample 2 (as defined per normsam) with reads
m_reads_norm2 <- getMatrix(pooledSamples,
    zoom = zoom, d = ogop_K27ac, resfolder = resfolder, type = "reads",
    norm = TRUE, name = name, rmapgr = rmapgr, normsam = 2
)

## un-normalized matrix with scores
m_scores <- getMatrix(pooledSamples,
    zoom = zoom, d = ogop_K27ac, resfolder = resfolder, type = "scores",
    norm = FALSE, name = name, rmapgr = rmapgr
)

## matrix normalized to the center of interactions
## in sample 1 (as defined per normsam) with scores
m_scores_norm <- getMatrix(pooledSamples,
    zoom = zoom, d = ogop_K27ac, resfolder = resfolder, type = "scores",
    norm = TRUE, name = name, rmapgr = rmapgr, normsam = 1
)

## matrix normalized to the center of interactions
## in sample 2 (as defined per normsam) with scores
m_scores_norm2 <- getMatrix(pooledSamples,
    zoom = zoom, d = ogop_K27ac, resfolder = resfolder, type = "scores",
    norm = TRUE, name = name, rmapgr = rmapgr, normsam = 2
)

## ----PlotAggregateProfiles, message=FALSE, warning=FALSE, fig.width=8, fig.height=6, fig.cap='Aggregate Capture-C profiles. Dotted lines: Distance-matched background.'----
## Loading precomputed matrices:
name <- names(grl)[grep("K27ac", names(grl))]
name

## Plots:

colaggr <- rep(col, 2)
lty <- rep(c(1, 3), each = 2)

par(mfrow = c(2, 3))

plotAggregatePeaks(m_reads,
    plotwhat = "median", k = 3, xlim = c(-50, 50),
    mainprefix = "K27ac\n", ylab = "reads",
    col = colaggr, lty = lty
)

plotAggregatePeaks(m_reads_norm,
    plotwhat = "median", k = 3, xlim = c(-50, 50),
    mainprefix = "K27ac norm to 0h\n", ylab = "reads_norm",
    col = colaggr, lty = lty
)

plotAggregatePeaks(m_reads_norm2,
    plotwhat = "median", k = 3, xlim = c(-50, 50),
    mainprefix = "K27ac norm to 72h\n", ylab = "reads_norm",
    col = colaggr, lty = lty
)

plotAggregatePeaks(m_scores,
    plotwhat = "median", k = 3, xlim = c(-50, 50),
    mainprefix = "K27ac scores\n", ylab = "scores",
    col = colaggr, lty = lty
)

plotAggregatePeaks(m_scores_norm,
    plotwhat = "median", k = 3, xlim = c(-50, 50),
    mainprefix = "K27ac scores norm to 0h\n",
    ylab = "scores_norm",
    col = colaggr, lty = lty
)

plotAggregatePeaks(m_scores_norm2,
    plotwhat = "median", k = 3, xlim = c(-50, 50),
    mainprefix = "K27ac scores norm to 72h\n",
    ylab = "scores_norm",
    col = colaggr, lty = lty
)

## ----PlotAggregateProfilesComparison, message=FALSE, warning=FALSE, fig.width=7, fig.height=4, fig.cap='Aggregate Capture-C profiles for different intervals.'----
name <- names(grl)[grep("K27me3", names(grl))]
name

## matrix normalized to the center of interactions
## in sample 2 (as defined per normsam) with reads
m_reads_norm2_K27me3 <- getMatrix(pooledSamples,
    zoom = zoom, d = ogop_K27me3, resfolder = resfolder,
    type = "reads", norm = TRUE,
    name = name, rmapgr = rmapgr, normsam = 2
)

# Plots:

colaggr <- rep(col, 2)
lty <- rep(c(1, 3), each = 2)

par(mfrow = c(1, 2))
plotAggregatePeaks(m_reads_norm2,
    plotwhat = "median", k = 3,
    xlim = c(-50, 50),
    mainprefix = "K27ac norm to 72h\n",
    ylab = "reads_norm", col = colaggr, lty = lty
)

plotAggregatePeaks(m_reads_norm2_K27me3,
    plotwhat = "median", k = 3,
    xlim = c(-50, 50),
    mainprefix = "K27me3 norm to 72h\n",
    ylab = "reads_norm", col = colaggr, lty = lty
)

## ----PlotAggregateProfilesDifferentReadNormalization, message=FALSE, warning=FALSE, fig.width=7, fig.height=4, fig.cap=paste0('Aggregate Capture-C profiles for different normalizations. Downsampling normalizes read counts within one experiment, whereas scaling  \n', 'also normalizes to the number of capture view points.')----
## matrix with scaling reads:
m_reads_scaling <- getMatrix(pooledSamples, zoom, ogop_K27ac,
    resfolder = resfolder, type = "reads", norm = FALSE,
    name = name, rmapgr = rmapgr, readnorm = "scaling"
)

## plots:
par(mfrow = c(1, 2))
plotAggregatePeaks(m_reads,
    plotwhat = "median", k = 3,
    xlim = c(-50, 50),
    mainprefix = "K27ac downsampled\n",
    ylab = "reads", col = colaggr, lty = lty
)

plotAggregatePeaks(m_reads_scaling,
    plotwhat = "median", k = 3,
    xlim = c(-50, 50),
    mainprefix = "K27ac scaled\n",
    ylab = "reads", col = colaggr, lty = lty
)

## ----quantifyWithinPeaks------------------------------------------------------
fls_ogop <- list.files(path = quantfolder, pattern = "oneGeneOnePeak")

## Load oneGeneOnePeak file for H3K27me3 peaks:
ogop_K27me3 <- read.delim(paste(quantfolder, fls_ogop[grep("K27me3", fls_ogop)],
                sep = "/"), stringsAsFactors = FALSE)
ogop_K27me3$clusters_refined <- "K27me3 interaction"

## Quantify
annotatedTable <- quantifyWithinPeaks(
    data = ogop_K27me3, 
    cdlist = pooledSamples, 
    cdlist.reps = replicateSamples, 
    rmapgr = rmapgr, 
    minsize = 100, 
    maxsize = 40000,
    file = paste0(outputDir, '/aggregated5_annotated.txt')
)

##Look at the oneGeneOnePeak table before...
names(ogop_K27me3)

##and after the annotation
names(annotatedTable)

## ----boxplotsCapC, warning=FALSE,message=FALSE, fig.width=7, fig.height=6, fig.small=TRUE, fig.cap= paste0('Boxplots for quantitative comparison of Capture-C signal. Two types of read or score quantification is provided that give slightly different results:  \n', '(i) Total signal within a oneGeneOnePeak interaction and  \n', '(ii) Total signal normalized to the number of restriction fragments within which the reads  \n', 'were quantified (right).')----
##plot samples:
boxplotsCapC(annotatedTable, col=col, show_notch = FALSE)

## ----boxplotsCapCReps, warning=FALSE,message=FALSE, fig.width=7, fig.height=6, fig.small=TRUE, fig.cap= paste0('Boxplots for replicate comparison.')----
##plot replicate data:
boxplotsCapC(annotatedTable, col=rep(col,each=2), 
    reps=TRUE, show_notch = FALSE
)

## ----aggregatePeaks, fig.width=8, fig.height=7, fig.cap =paste0('Lineplots with different types of interactions. Interactions are shaded, either for fragment-to-fragment interactions (top left),  \n', 'aggregated within 5 restriction fragments over all (top right), RA_0h-specific (bottom-left)  \n', 'and RA_72h-specific (bottom right) significant interactions.')----
## Define names of significant interactions shown in the plot
ints$clusters_refined <- "interaction"

## Distance in restriction fragments over which to aggregate:
dis <- 5

## Let's aggregate all interactions within 5 fragments distance of each other:
aggregated5 <- aggregatePeaks(ints, dis, names(pooledSamples),
    fileprefix = "ints", outdir = outputDir
)

## Let's aggregate all interactions within 5 fragments distance of each other
## that are significant at 0h:
aggregated5_0h <- aggregatePeaks(ints[ints$RA_0h_score >= 5, ], dis,
    names(pooledSamples),
    fileprefix = "ints_0h", outdir = outputDir
)

## Let's aggregate all interactions within 5 fragments distance of each other
## that are significant at 72h:
aggregated5_72h <- aggregatePeaks(ints[ints$RA_72h_score >= 5, ], dis,
  names(pooledSamples),
  fileprefix = "ints_72h",
  outdir = outputDir
)

nrow(ints)
nrow(aggregated5)
nrow(aggregated5_0h)
nrow(aggregated5_72h)

## Example plots:
name <- "Hoxb3"
id <- baited_genes[baited_genes$genename == name]$re_id
k <- 11
ylim <- c(0, 200)
xlim <- c(96150000, 96350000)
zoom <- as.numeric(NA)
colints <- 'yellow'

par(mfrow = c(2, 2))

plotInteractions(pooledSamples, id, k, zoom,
    ylim = ylim, show.legend = TRUE,
    name = paste("Fragment-level ints \n", name, sep = ""),
    rmapgr = rmapgr, col = col, d = ints, 
    xlim = xlim, colints = colints
)

plotInteractions(pooledSamples, id, k, zoom,
    ylim = ylim, show.legend = TRUE,
    name = paste("Aggregated ints, dis = ", dis, " frags \n", 
                 name, sep = ""),
    rmapgr = rmapgr, col = col, d = aggregated5, 
    xlim = xlim, colints = colints
)

plotInteractions(pooledSamples, id, k, zoom,
    ylim = ylim, show.legend = TRUE,
    name = paste("Aggregated RA_0h ints, dis = ", dis, " frags \n", 
                 name, sep = ""),
    rmapgr = rmapgr, col = col, d = aggregated5_0h, 
    xlim = xlim, colints = colints
)

plotInteractions(pooledSamples, id, k, zoom,
    ylim = ylim, show.legend = TRUE,
    name = paste("Aggregated RA_72h ints, dis = ", dis, " frags \n", 
                 name, sep = ""),
    rmapgr = rmapgr, col = col, d = aggregated5_72h, 
    xlim = xlim, colints = colints
)

## ----aggregatePeaksRegions, fig.width=7, fig.height=3.5, fig.cap ='Aggregation by distance between significant interactions. Interactions are either aggregated over 5 resitriction fragments (left) or over 10kb'----

## Distance in bp over which to aggregate:
disbp <- 10000

## Let's aggregate all interactions within 5 fragments distance of each other:
aggregated10kb <- aggregatePeaks_regions(ints, disbp, names(L),
    fileprefix = "ints",
    outdir = outputDir
)
nrow(aggregated10kb)

## plot:
par(mfrow = c(1, 2))
plotInteractions(pooledSamples, id, k, zoom,
    ylim = ylim, show.legend = TRUE,
    name = paste("Aggregated ints, dis = ", dis, 
                 " frags \n", name, sep = ""),
    rmapgr = rmapgr, col = col, d = aggregated5, 
    xlim = xlim, colints = colints
)
plotInteractions(pooledSamples, id, k, zoom,
    ylim = ylim, show.legend = TRUE,
    name = paste("Aggregated ints, dis = ", disbp / 1000, 
                 "kb \n", name, sep = ""),
    rmapgr = rmapgr, col = col, d = aggregated10kb, 
    xlim = xlim, colints = colints
)

## ----quantifyWithinPeaks_aggregatePeaks---------------------------------------
annotatedTable <- quantifyWithinPeaks(
    data = aggregated5, 
    cdlist = pooledSamples, 
    cdlist.reps = replicateSamples, 
    rmapgr = rmapgr, 
    minsize = 100, 
    maxsize = 40000,
    file = paste0(outputDir, '/aggregated5_annotated.txt')
)


## -----------------------------------------------------------------------------
sessionInfo()