% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{splitDataByChromatin}
\alias{splitDataByChromatin}
\title{Split methylation data into regions based on the chromatin states}
\usage{
splitDataByChromatin(
  dat,
  chr,
  cell.line,
  states,
  gap = -1,
  min.cpgs = 50,
  max.cpgs = 2000,
  verbose = TRUE
)
}
\arguments{
\item{dat}{a data frame with rows as individual CpGs appearing
in all the samples. The first 4 columns should contain the information of
\code{Meth_Counts} (methylated counts), \code{Total_Counts} (read depths),
\code{Position} (Genomic position for the CpG site) and \code{ID}(sample ID).
The covariate information, such as disease status
or cell type composition, are listed in column 5 and onwards.}

\item{chr}{character vector containing the chromosome information. Its length
should be equal to the number of rows in \code{dat}.}

\item{cell.line}{character defining the cell line of interest. Nine cell
lines are available:
\itemize{
\item \code{"gm12878"}: Lymphoblastoid cells GM12878,
\item \code{"h1hesc"}: Embryonic cells H1 hESC,
\item \code{"hepg2"}: Liver carcinoma HepG2,
\item \code{"hmec"}, Mammary epithelial cells HMEC,
\item \code{"hsmm"}, Skeletal muscle myoblasts HSMM,
\item \code{"huvec"}: Umbilical vein endothelial HUVEC,
\item \code{"k562"}: Myelogenous leukemia K562,
\item \code{"nhek"}: Keratinocytes NHEK,
\item \code{"nhlf"}: Normal human lung fibroblasts NHLF.
}}

\item{states}{character vector defining the chromatin states of interest
among the following available options:
\itemize{
\item \code{"ActivePromoter"}: Active Promoter
\item \code{"WeakPromoter"}: Weak Promoter
\item \code{"PoisedPromoter"}: Poised Promoter
\item \code{"StrongEnhancer"}: Strong Enhancer
\item \code{"WeakEnhancer"}: Weak/poised Enhancer
\item \code{"Insulator"}: Insulator
\item \code{"TxnTransition"}: Transcriptional Transition
\item \code{"TxnElongation"}: Transcriptional Elongation
\item \code{"WeakTxn"}: Weak Transcribed
\item \code{"Repressed"}: Polycomb-Repressed
\item \code{"Heterochrom"}: Heterochromatin; low signal
\item \code{"RepetitiveCNV"}: Repetitive/Copy Number Variation
Use \code{state="all"} to select all the states simultaneously.
}}

\item{gap}{this integer defines the maximum gap that is allowed between
two regions to be considered as overlapping.
According to the \code{GenomicRanges::findOverlaps} function,
the gap between 2 ranges is the number of positions that separate them.
The gap between 2 adjacent ranges is 0. By convention when one range has
its start or end strictly inside the other (i.e. non-disjoint ranges),
the gap is considered to be -1.
Decimal values will be rounded to the nearest integer.
The default value is \code{-1}.}

\item{min.cpgs}{positive integer defining the minimum number of
CpGs within a region for the algorithm to perform optimally.
The default value is 50.}

\item{max.cpgs}{positive integer defining the maximum number of
CpGs within a region for the algorithm to perform optimally.
The default value is 2000.}

\item{verbose}{logical indicates if the algorithm should provide progress
report information.
The default value is TRUE.}
}
\value{
A \code{list} of \code{data.frame} containing the data of each
independent region.
}
\description{
This function splits the methylation data into regions
based on the chromatin states predicted by ChromHMM software
(Ernst and Kellis (2012)).
The annotations come from the Bioconductor package \code{annnotatr}.
Chromatin states determined by chromHMM are
available in hg19 for nine cell lines (Gm12878, H1hesc, Hepg2, Hmec, Hsmm,
Huvec, K562, Nhek, and Nhlf).
}
\examples{
#------------------------------------------------------------#
data(RAdat)
RAdat.f <- na.omit(RAdat[RAdat$Total_Counts != 0, ])
results <- splitDataByChromatin(dat = RAdat.f, 
cell.line = "huvec", chr = rep(x = "chr4", times = nrow(RAdat.f)),
states = "Insulator", verbose = FALSE)

}
\author{
Audrey Lemaçon
}
