Contents

1 Introduction

Sequence-based TF affinity scoring can be conducted with the FIMO suite, see @Sonawane2017. We have serialized an object with references to FIMO outputs for 16 TFs.

suppressPackageStartupMessages({
library(TFutils)
library(GenomicRanges)
})
fimo16
## GenomicFiles object with 0 ranges and 16 files: 
## files: M0635_1.02sort.bed.gz, M3433_1.02sort.bed.gz, ..., M6159_1.02sort.bed.gz, M6497_1.02sort.bed.gz 
## detail: use files(), rowRanges(), colData(), ...

While the token bed is used in the filenames, the files are not actually bed format!

2 Importing with scanTabix

We can use reduceByRange to import selected scans.

if (.Platform$OS.type != "windows") {
 si = TFutils::seqinfo_hg19_chr17
 myg = GRanges("chr17", IRanges(38.07e6,38.09e6), seqinfo=si)
 colnames(fimo16) = fimo16$HGNC 
 lk2 = GenomicFiles::reduceByRange(fimo16[, c("POU2F1", "VDR")],
   MAP=function(r,f) scanTabix(f, param=r))
 str(lk2)
}

This result can be massaged into a GRanges or other desirable structure. fimo_granges takes care of this.

#fimo_ranges = function(gf, query) { # prototypical code
# rowRanges(gf) = query
# ans = GenomicFiles::reduceByRange(gf, MAP=function(r,f) scanTabix(f, param=r))
# ans = unlist(ans, recursive=FALSE)  # drop top list structure
# tabs = lapply(ans, lapply, function(x) {
#     con = textConnection(x)
#     on.exit(close(con))
#     dtf = read.delim(con, h=FALSE, stringsAsFactors=FALSE, sep="\t")
#     colnames(dtf) = c("chr", "start", "end", "rname", "score", "dir", "pval")
#     ans = with(dtf, GRanges(seqnames=chr, IRanges(start, end),
#            rname=rname, score=score, dir=dir, pval=pval))
#     ans
#     })
# GRangesList(unlist(tabs, recursive=FALSE))
#}
if (.Platform$OS.type != "windows") {
 rr = fimo_granges(fimo16[, c("POU2F1", "VDR")], myg)
 rr
}
sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows Server 2022 x64 (build 20348)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=C                          
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] UpSetR_1.4.0                magrittr_2.0.3             
##  [3] dplyr_1.1.4                 gwascat_2.41.1             
##  [5] GSEABase_1.71.0             graph_1.87.0               
##  [7] annotate_1.87.0             XML_3.99-0.18              
##  [9] png_0.1-8                   ggplot2_3.5.2              
## [11] knitr_1.50                  data.table_1.17.8          
## [13] GO.db_3.21.0                GenomicFiles_1.45.2        
## [15] SummarizedExperiment_1.39.1 rtracklayer_1.69.1         
## [17] Rsamtools_2.25.2            Biostrings_2.77.2          
## [19] XVector_0.49.0              MatrixGenerics_1.21.0      
## [21] matrixStats_1.5.0           GenomicRanges_1.61.1       
## [23] Seqinfo_0.99.2              BiocParallel_1.43.4        
## [25] org.Hs.eg.db_3.21.0         AnnotationDbi_1.71.1       
## [27] IRanges_2.43.0              S4Vectors_0.47.0           
## [29] Biobase_2.69.0              BiocGenerics_0.55.1        
## [31] generics_0.1.4              TFutils_1.29.1             
## [33] BiocStyle_2.37.0           
## 
## loaded via a namespace (and not attached):
##  [1] DBI_1.2.3                bitops_1.0-9             gridExtra_2.3           
##  [4] httr2_1.2.1              readxl_1.4.5             rlang_1.1.6             
##  [7] compiler_4.5.1           RSQLite_2.4.2            GenomicFeatures_1.61.6  
## [10] vctrs_0.6.5              pkgconfig_2.0.3          crayon_1.5.3            
## [13] fastmap_1.2.0            dbplyr_2.5.0             labeling_0.4.3          
## [16] promises_1.3.3           rmarkdown_2.29           UCSC.utils_1.5.0        
## [19] bit_4.6.0                xfun_0.52                cachem_1.1.0            
## [22] GenomeInfoDb_1.45.9      jsonlite_2.0.0           blob_1.2.4              
## [25] later_1.4.2              DelayedArray_0.35.2      parallel_4.5.1          
## [28] R6_2.6.1                 VariantAnnotation_1.55.1 bslib_0.9.0             
## [31] RColorBrewer_1.1-3       jquerylib_0.1.4          cellranger_1.1.0        
## [34] bookdown_0.43            Rcpp_1.1.0               BiocBaseUtils_1.11.2    
## [37] splines_4.5.1            httpuv_1.6.16            Matrix_1.7-3            
## [40] tidyselect_1.2.1         dichromat_2.0-0.1        abind_1.4-8             
## [43] yaml_2.3.10              codetools_0.2-20         miniUI_0.1.2            
## [46] curl_6.4.0               plyr_1.8.9               lattice_0.22-7          
## [49] tibble_3.3.0             withr_3.0.2              shiny_1.11.1            
## [52] KEGGREST_1.49.1          evaluate_1.0.4           survival_3.8-3          
## [55] BiocFileCache_2.99.5     snpStats_1.59.2          pillar_1.11.0           
## [58] BiocManager_1.30.26      filelock_1.0.3           RCurl_1.98-1.17         
## [61] scales_1.4.0             xtable_1.8-4             glue_1.8.0              
## [64] tools_4.5.1              BiocIO_1.19.0            BSgenome_1.77.1         
## [67] GenomicAlignments_1.45.2 restfulr_0.0.16          cli_3.6.5               
## [70] rappdirs_0.3.3           S4Arrays_1.9.1           gtable_0.3.6            
## [73] sass_0.4.10              digest_0.6.37            SparseArray_1.9.1       
## [76] rjson_0.2.23             farver_2.1.2             memoise_2.0.1           
## [79] htmltools_0.5.8.1        lifecycle_1.0.4          httr_1.4.7              
## [82] mime_0.13                bit64_4.6.0-1