% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/simulation.R
\name{simDOT}
\alias{simDOT}
\title{Simulate Differential ORF Translation (DOT)}
\usage{
simDOT(
  ribo,
  rna,
  annotation = NULL,
  regulation_type = NULL,
  te_genes = 10,
  bgenes = 10,
  num_samples = 2,
  conditions = 2,
  gcoeff = 1.5,
  bcoeff = 0.9,
  num_batches = 2,
  size_factor = NULL,
  min_size = NULL,
  scale_p0 = NULL,
  shape = 0.6,
  scale = 0.5,
  batch_scenario = "balanced",
  diagplot_ribo = FALSE,
  diagplot_rna = FALSE
)
}
\arguments{
\item{ribo}{A matrix or data frame of ribosome profiling counts
(genes x samples).}

\item{rna}{A matrix or data frame of RNA-seq counts (genes x samples).}

\item{annotation}{A GRanges object with ORF level annotation,
typically obtained from \code{\link{getORFs}}.}

\item{regulation_type}{Character. Specifies the type of DOT effect to
simulate. Passed to the \code{scenario} argument of
\code{generate_coefficients}.}

\item{te_genes}{Numeric. Percentage of genes to be assigned as
differentially translated (default: 10).}

\item{bgenes}{Numeric. Percentage of genes to carry a batch effect
(default: 10).}

\item{num_samples}{Integer. Number of biological replicates per condition
(default: 2).}

\item{conditions}{Integer. Number of experimental conditions (default: 2).}

\item{gcoeff}{Numeric. Magnitude of log-fold change for DOT effects
(default: 1.5).}

\item{bcoeff}{Numeric. Magnitude of batch effect coefficient (default: 0.9).}

\item{num_batches}{Integer. Number of batches (default: 2).}

\item{size_factor}{Numeric scalar. A multiplicative factor applied to the
estimated size parameter (\eqn{r}) for all transcripts. Since
dispersion \eqn{\phi = 1/r}, a value greater than 1 (e.g., 1.5) will
decrease biological dispersion (noise), making the simulated data
less variable. A value less than 1 will increase dispersion
(default: 1.5).}

\item{min_size}{Numeric scalar. A lower bound for the modified size
parameter (\eqn{r}). Any transcript whose modified \eqn{r} falls
below this value will be set to \code{min_size}. This caps maximum
dispersion and prevents unrealistic variability (default: 5).}

\item{scale_p0}{Optional numeric scalar to scale the zero-inflation
probabilities.}

\item{shape}{Numeric. Shape parameter for gamma distribution used to
simulate baseline coefficients (default: 0.6).}

\item{scale}{Numeric. Scale parameter for gamma distribution used to
simulate baseline coefficients (default: 0.5).}

\item{batch_scenario}{Character. Specifies the batch effect design. Must
be one of:
\itemize{
\item \code{"balanced"}
\item \code{"confounded"}
\item \code{"random"}
\item \code{"unbalanced"}
\item \code{"nested"}
\item \code{"modality_specific"}
}}

\item{diagplot_ribo}{Logical. If \code{TRUE}, generate diagnostic plots
for ribo data (default: \code{FALSE}).}

\item{diagplot_rna}{Logical. If \code{TRUE}, generate diagnostic plots
for RNA data (default: \code{FALSE}).}
}
\value{
A \code{\link{DOTSeqDataSets-class}} object containing:
\describe{
\item{DOU}{
A \code{\link{DOUData-class}} object containing simulated count
matrix (\code{assay} slot), sample metadata (\code{colData} slot), and ORF-level
annotation (\code{rowRanges} slot). The \code{rowRanges} slot also stores
labels (named binary vector indicating true positive (1)), and
logFC (log-fold changes for the simulated DOU effect) for modeling
Differential ORF Usage (DOU).
}
\item{DTE}{
A \code{\link{DTEData-class}} object used for modeling
Differential Translation Efficiency (DTE). Stores all data above
except for \code{rowRanges}
}
}
}
\description{
Simulates ribosome profiling and matched RNA-seq count matrices with
specified differential ORF translation (DOT) effects. The simulation can
include batch effects and supports multiple experimental conditions and
replicates.
}
\examples{
library(SummarizedExperiment)
dir <- system.file("extdata", package = "DOTSeq")

cnt <- read.table(file.path(dir, "featureCounts.cell_cycle_subset.txt.gz"),
    header = TRUE, comment.char = "#"
)
names(cnt) <- gsub(".*(SRR[0-9]+).*", "\\\\1", names(cnt))

flat <- file.path(dir, "gencode.v47.orf_flattened_subset.gtf.gz")
bed <- file.path(dir, "gencode.v47.orf_flattened_subset.bed.gz")

meta <- read.table(file.path(dir, "metadata.txt.gz"))
names(meta) <- c("run", "strategy", "replicate", "treatment", "condition")
cond <- meta[meta$treatment == "chx", ]
cond$treatment <- NULL

d <- DOTSeqDataSetsFromFeatureCounts(
    count_table = cnt,
    condition_table = cond,
    flattened_gtf = flat,
    flattened_bed = bed
)
raw_counts <- assay(getDOU(d))
raw_counts <- raw_counts[, grep("Cycling|Interphase",
    colnames(raw_counts))]
ribo <- raw_counts[, grep("ribo", colnames(raw_counts))]
rna <- raw_counts[, grep("rna", colnames(raw_counts))]
rowranges <- rowRanges(getDOU(d))
r <- "uORF_up_mORF_down"
g <- 1.5
d <- simDOT(
    ribo,
    rna,
    annotation = rowranges,
    regulation_type = r,
    gcoeff = g,
    num_samples = 1,
    num_batches = 2
)

show(d)

rowData(getDOU(d))

}
\references{
Frazee, A. C., Jaffe, A. E., Langmead, B., & Leek, J. T. (2015).
Polyester: Simulating RNA-seq datasets with differential transcript
expression. Bioinformatics, 31(17), 2778-2784.
DOI: 10.1093/bioinformatics/btv272

Chothani, S., Adami, E., Ouyang, J. F., Viswanathan, S., Hubner, N.,
Cook, S. A., Schafer, S., Rackham, O. J. L.  (2019). deltaTE: Detection
of translationally regulated genes by integrative analysis of Ribo-seq
and RNA-seq data. Current Protocols in Molecular Biology, 129, e108.
DOI: 10.1002/cpmb.108
}
