The examples here show how iscream output can be converted into other data structures for further analysis.
library(iscream)
## iscream using 1 thread by default but parallelly::availableCores() detects 4 possibly available threads. See `?set_threads` for information on multithreading before trying to use more.
## 'tabix' executable not found in $PATH. tabix() will use htslib to make queries instead which can be slower. See ?tabix for details.
data_dir <- system.file("extdata", package = "iscream")
bedfiles <- list.files(data_dir, pattern = "[a|b|c|d].bed.gz$", full.names = TRUE)
regions <- c(A = "chr1:1-6", B = "chr1:7-10", C = "chr1:11-14")
if (!require("GenomicRanges", quietly = TRUE)) {
stop("The 'GenomicRanges' package must be installed for this functionality")
}
GRanges objects can be used as the input regions to all of iscream’s functions
and can be returned by tabix_gr() and make_mat_gr().
tabix queriestabix_gr() returns a GenomicRanges object. The regions parameter can be a
string vector, a data frame or a GRanges object. If given a GRanges object
with metadata, those columns will be preserved in the output.
gr <- GRanges(regions)
values(gr) <- DataFrame(
gene = c("gene1", "gene2", "gene3"),
some_metadata = c("s1", "s2", "s3")
)
gr
## GRanges object with 3 ranges and 2 metadata columns:
## seqnames ranges strand | gene some_metadata
## <Rle> <IRanges> <Rle> | <character> <character>
## A chr1 1-6 * | gene1 s1
## B chr1 7-10 * | gene2 s2
## C chr1 11-14 * | gene3 s3
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
tabix_gr(bedfiles[1], gr)
## GRanges object with 7 ranges and 4 metadata columns:
## seqnames ranges strand | V1 V2 gene some_metadata
## <Rle> <IRanges> <Rle> | <numeric> <integer> <character> <character>
## [1] chr1 1-2 * | 1.0 1 gene1 s1
## [2] chr1 3-4 * | 1.0 1 gene1 s1
## [3] chr1 5-6 * | 0.0 2 gene1 s1
## [4] chr1 7-8 * | 0.0 1 gene2 s2
## [5] chr1 9-10 * | 0.5 2 gene2 s2
## [6] chr1 11-12 * | 1.0 2 gene3 s3
## [7] chr1 13-14 * | 1.0 3 gene3 s3
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
The data.table output of tabix() can also be piped into GRanges, but does
not preserve input metadata.
tabix(bedfiles[1], gr) |>
makeGRangesFromDataFrame(
starts.in.df.are.0based = TRUE,
keep.extra.columns = TRUE
)
## GRanges object with 7 ranges and 2 metadata columns:
## seqnames ranges strand | V1 V2
## <Rle> <IRanges> <Rle> | <numeric> <integer>
## [1] chr1 1-2 * | 1.0 1
## [2] chr1 3-4 * | 1.0 1
## [3] chr1 5-6 * | 0.0 2
## [4] chr1 7-8 * | 0.0 1
## [5] chr1 9-10 * | 0.5 2
## [6] chr1 11-12 * | 1.0 2
## [7] chr1 13-14 * | 1.0 3
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
If the input BED file is not zero-based (e.g. Bismark coverage files), set
zero_based = FALSE in the tabix() call to get the correct conversion from
data frame to GenomicRanges.
summarize_regions()summarize_regions() returns a data frame with the input regions positions
and summaries of the BED-file data columns.
(summary <- summarize_regions(
bedfiles,
regions,
column = 4,
col_names = ("data_col"),
fun = c("sum", "mean"))
)
## [18:19:10.482745] [iscream::summarize_regions] [info] Summarizing 3 regions from 4 bedfiles
## [18:19:10.482790] [iscream::summarize_regions] [info] using sum, mean
## [18:19:10.482796] [iscream::summarize_regions] [info] with columns 4 as data_col
## chr start end file feature data_col.sum data_col.mean
## <char> <int> <int> <char> <char> <num> <num>
## 1: chr1 1 6 a A 2.0 0.6666667
## 2: chr1 7 10 a B 0.5 0.2500000
## 3: chr1 11 14 a C 2.0 1.0000000
## 4: chr1 1 6 b A 1.0 0.5000000
## 5: chr1 7 10 b B 1.0 1.0000000
## 6: chr1 11 14 b C 1.0 0.5000000
## 7: chr1 1 6 c A 1.0 1.0000000
## 8: chr1 7 10 c B 1.0 0.5000000
## 9: chr1 11 14 c C NA NA
## 10: chr1 1 6 d A 2.0 1.0000000
## 11: chr1 7 10 d B 0.5 0.2500000
## 12: chr1 11 14 d C 1.0 1.0000000
GRanges(summary)
## GRanges object with 12 ranges and 4 metadata columns:
## seqnames ranges strand | file feature data_col.sum
## <Rle> <IRanges> <Rle> | <character> <character> <numeric>
## [1] chr1 1-6 * | a A 2.0
## [2] chr1 7-10 * | a B 0.5
## [3] chr1 11-14 * | a C 2.0
## [4] chr1 1-6 * | b A 1.0
## [5] chr1 7-10 * | b B 1.0
## ... ... ... ... . ... ... ...
## [8] chr1 7-10 * | c B 1.0
## [9] chr1 11-14 * | c C NA
## [10] chr1 1-6 * | d A 2.0
## [11] chr1 7-10 * | d B 0.5
## [12] chr1 11-14 * | d C 1.0
## data_col.mean
## <numeric>
## [1] 0.666667
## [2] 0.250000
## [3] 1.000000
## [4] 0.500000
## [5] 1.000000
## ... ...
## [8] 0.50
## [9] NA
## [10] 1.00
## [11] 0.25
## [12] 1.00
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
make_matmake_mat_gr() returns a GRanges object for dense matrices.
make_mat_gr(bedfiles, regions, column = 4, mat_name = "beta")
## [18:19:10.539632] [iscream::query_all] [info] Querying 3 regions from 4 bedfiles
##
## [18:19:10.539972] [iscream::query_all] [info] Creating metadata vectors
## [18:19:10.540026] [iscream::query_all] [info] 7 loci found - 9993 extra rows allocated with 0 resizes
## [18:19:10.540029] [iscream::query_all] [info] Creating dense matrix
## GRanges object with 7 ranges and 4 metadata columns:
## seqnames ranges strand | a b c d
## <Rle> <IRanges> <Rle> | <numeric> <numeric> <numeric> <numeric>
## [1] chr1 0 * | 1.0 0 0 1.0
## [2] chr1 2 * | 1.0 0 1 1.0
## [3] chr1 4 * | 0.0 1 0 0.0
## [4] chr1 6 * | 0.0 1 0 0.0
## [5] chr1 8 * | 0.5 0 1 0.5
## [6] chr1 10 * | 1.0 0 0 0.0
## [7] chr1 12 * | 1.0 1 0 1.0
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
make_mat_se() returns a RangedSummarizedExperiment for both sparse and dense
matrices.
if (!require("SummarizedExperiment", quietly = TRUE)) {
stop("The 'SummarizedExperiment' package must be installed for this functionality")
}
make_mat_se(bedfiles, regions, column = 4, mat_name = "beta", sparse = TRUE)
## [18:19:13.730464] [iscream::query_all] [info] Querying 3 regions from 4 bedfiles
##
## [18:19:13.730804] [iscream::query_all] [info] Creating metadata vectors
## [18:19:13.730832] [iscream::query_all] [info] 7 loci found - 9993 extra rows allocated with 0 resizes
## [18:19:13.730840] [iscream::query_all] [info] Creating sparse matrix
## class: RangedSummarizedExperiment
## dim: 7 4
## metadata(0):
## assays(1): beta
## rownames: NULL
## rowData names(0):
## colnames(4): a b c d
## colData names(0):
BSseq objectsA bsseq object is a type of SummarizedExperiment, but it cannot handle sparse matrices:
if (!require("bsseq", quietly = TRUE)) {
stop("The 'bsseq' package must be installed for this functionality")
}
mats <- make_mat_bsseq(bedfiles, regions, sparse = FALSE)
## [18:19:19.065634] [iscream::query_all] [info] Querying 3 regions from 4 bedfiles
##
## [18:19:19.065993] [iscream::query_all] [info] Creating metadata vectors
## [18:19:19.066020] [iscream::query_all] [info] 7 loci found - 9993 extra rows allocated with 0 resizes
## [18:19:19.066023] [iscream::query_all] [info] Creating dense matrix
do.call(BSseq, mats)
## An object of type 'BSseq' with
## 7 methylation loci
## 4 samples
## has not been smoothed
## All assays are in-memory
sessionInfo()
## R Under development (unstable) (2026-03-05 r89546)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.4 LTS
##
## Matrix products: default
## BLAS: /home/biocbuild/bbs-3.23-bioc/R/lib/libRblas.so
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0 LAPACK version 3.12.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB LC_COLLATE=C
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: America/New_York
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] bsseq_1.47.1 SummarizedExperiment_1.41.1
## [3] Biobase_2.71.0 MatrixGenerics_1.23.0
## [5] matrixStats_1.5.0 GenomicRanges_1.63.1
## [7] Seqinfo_1.1.0 IRanges_2.45.0
## [9] S4Vectors_0.49.0 BiocGenerics_0.57.0
## [11] generics_0.1.4 iscream_1.1.6
## [13] BiocStyle_2.39.0
##
## loaded via a namespace (and not attached):
## [1] farver_2.1.2 R.utils_2.13.0
## [3] Biostrings_2.79.5 bitops_1.0-9
## [5] fastmap_1.2.0 RCurl_1.98-1.17
## [7] GenomicAlignments_1.47.0 stringfish_0.18.0
## [9] XML_3.99-0.22 digest_0.6.39
## [11] lifecycle_1.0.5 statmod_1.5.1
## [13] compiler_4.6.0 rlang_1.1.7
## [15] sass_0.4.10 tools_4.6.0
## [17] yaml_2.3.12 data.table_1.18.2.1
## [19] rtracklayer_1.71.3 knitr_1.51
## [21] S4Arrays_1.11.1 curl_7.0.0
## [23] DelayedArray_0.37.0 RColorBrewer_1.1-3
## [25] abind_1.4-8 BiocParallel_1.45.0
## [27] HDF5Array_1.39.0 R.oo_1.27.1
## [29] grid_4.6.0 beachmat_2.27.3
## [31] Rhdf5lib_1.33.0 scales_1.4.0
## [33] gtools_3.9.5 dichromat_2.0-0.1
## [35] cli_3.6.5 rmarkdown_2.30
## [37] crayon_1.5.3 otel_0.2.0
## [39] RcppParallel_5.1.11-2 httr_1.4.8
## [41] rjson_0.2.23 DelayedMatrixStats_1.33.0
## [43] pbapply_1.7-4 cachem_1.1.0
## [45] rhdf5_2.55.13 parallel_4.6.0
## [47] BiocManager_1.30.27 XVector_0.51.0
## [49] restfulr_0.0.16 Matrix_1.7-4
## [51] jsonlite_2.0.0 bookdown_0.46
## [53] h5mread_1.3.2 locfit_1.5-9.12
## [55] limma_3.67.0 jquerylib_0.1.4
## [57] glue_1.8.0 parallelly_1.46.1
## [59] codetools_0.2-20 BiocIO_1.21.0
## [61] htmltools_0.5.9 rhdf5filters_1.23.3
## [63] BSgenome_1.79.1 R6_2.6.1
## [65] sparseMatrixStats_1.23.0 evaluate_1.0.5
## [67] lattice_0.22-9 R.methodsS3_1.8.2
## [69] Rsamtools_2.27.1 cigarillo_1.1.0
## [71] bslib_0.10.0 Rcpp_1.1.1
## [73] SparseArray_1.11.11 permute_0.9-10
## [75] xfun_0.56