0.1 Load Data and Libraries

# Load Libraries
library(tidyverse)
library(tidyexposomics)

We will start off with our example dataset pulled from the ISGlobal Exposome Data Challenge 2021 (Maitre et al., 2022).

# Load example data
data("tidyexposomics_example")

# Create exposomic set object
expom <- create_exposomicset(
    codebook = tidyexposomics_example$annotated_cb,
    exposure = tidyexposomics_example$meta,
    omics = list(
        "Gene Expression" = tidyexposomics_example$exp_filt,
        "Methylation" = tidyexposomics_example$methyl_filt
    ),
    row_data = list(
        "Gene Expression" = tidyexposomics_example$exp_fdata,
        "Methylation" = tidyexposomics_example$methyl_fdata
    )
)
## Ensuring all omics datasets are matrices with column names.
## Creating SummarizedExperiment objects.
## Creating MultiAssayExperiment object.
## MultiAssayExperiment created successfully.

We will focus on a few exposure variable categories.

# Grab exposure variables
exp_vars <- tidyexposomics_example$annotated_cb |>
    filter(category %in% c(
        "aerosol",
        "main group molecular entity",
        "polyatomic entity"
    )) |>
    pull(variable) |>
    as.character()

1 Quality Control


As in the main vignette, we will impute exposure data using missforest.

# Impute missing values
expom <- run_impute_missing(
    exposomicset = expom,
    exposure_impute_method = "missforest",
    exposure_cols = exp_vars
)
## Imputing exposure data using method: missforest

And we will transform our exposure data to ensure it is more normally distributed using the boxcox_best method.

# Transform variables
expom <- transform_exposure(
    exposomicset = expom,
    transform_method = "boxcox_best",
    exposure_cols = exp_vars
)
## Applying the boxcox_best transformation.

1.1 Exposome Scores


We can calculate exposome scores, which are a summary measure of exposure. The run_exposome_score function is used to calculate the exposome score. The exposure_cols argument is used to set the columns to use for the exposome score. The score_type argument is used to set the type of score to calculate. Here we could use:

  • median: Calculates the median of the exposure variables.

  • mean: Calculates the mean of the exposure variables.

  • sum: Calculates the sum of the exposure variables.

  • pca: Calculates the first principal component of the exposure variables.

  • irt: Uses Item Response Theory to calculate the exposome score.

  • quantile: Calculates the quantile of the exposure variables.

  • var: Calculates the variance of the exposure variables.

The score_column_name argument is used to set the name of the column to store the exposome score in. Here we will define a score for aerosols using a variety of different methods and demonstrate their use in association with asthma status.

# determine which aerosol variables to use
aerosols <- c("h_pm25_ratio_preg_None", "h_pm10_ratio_preg_None")

# Create exposome scores
expom <- expom |>
    run_exposome_score(
        exposure_cols = aerosols,
        score_type = "median",
        score_column_name = "exposome_median_score"
    ) |>
    run_exposome_score(
        exposure_cols = aerosols,
        score_type = "pca",
        score_column_name = "exposome_pca_score"
    ) |>
    run_exposome_score(
        exposure_cols = aerosols,
        score_type = "irt",
        score_column_name = "exposome_irt_score"
    ) |>
    run_exposome_score(
        exposure_cols = aerosols,
        score_type = "quantile",
        score_column_name = "exposome_quantile_score"
    ) |>
    run_exposome_score(
        exposure_cols = aerosols,
        score_type = "var",
        score_column_name = "exposome_var_score"
    )
## Extracting exposure data...
## Extracting exposure data...
## Extracting exposure data...
## Extracting exposure data...
## Extracting exposure data...
## Calculating median exposure scores...
## Calculating PCA exposure scores...
## Calculating IRT exposure scores...
## Warning: EM cycles terminated after 500 iterations.
## Calculating quantile exposure scores...
## Calculating variance exposure scores...

We can then associate these exposome scores with asthma status using the run_association function, just like we did before. However, this time we specify our feature_set to be the exposome scores we just calculated.

# Associate exposome scores with outcome
expom <- run_association(
    exposomicset = expom,
    outcome = "hs_asthma",
    source = "exposures",
    feature_set = c(
        "exposome_median_score",
        "exposome_pca_score",
        "exposome_irt_score",
        "exposome_quantile_score",
        "exposome_var_score"
    ),
    action = "add",
    family = "binomial"
)
## Running GLMs.
# Plot the association forest plot
plot_association(
    exposomicset = expom,
    source = "exposures",
    terms = c(
        "exposome_median_score",
        "exposome_pca_score",
        "exposome_irt_score",
        "exposome_quantile_score",
        "exposome_var_score"
    ),
    filter_col = "p.value",
    filter_thresh = 0.05,
    r2_col = "r2"
)
Associations of aerosol exposome scores with asthma status. The variance-based score has the strongest association with asthma status.

Figure 1: Associations of aerosol exposome scores with asthma status
The variance-based score has the strongest association with asthma status.

1.2 Session Info

sessionInfo()
## R version 4.6.0 RC (2026-04-17 r89917)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.4 LTS
## 
## Matrix products: default
## BLAS:   /home/biocbuild/bbs-3.23-bioc/R/lib/libRblas.so 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0  LAPACK version 3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB              LC_COLLATE=C              
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: America/New_York
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] tidyexposomics_0.99.16      MultiAssayExperiment_1.37.4
##  [3] SummarizedExperiment_1.41.1 Biobase_2.71.0             
##  [5] GenomicRanges_1.63.2        Seqinfo_1.1.0              
##  [7] IRanges_2.45.0              S4Vectors_0.49.2           
##  [9] BiocGenerics_0.57.1         generics_0.1.4             
## [11] MatrixGenerics_1.23.0       matrixStats_1.5.0          
## [13] lubridate_1.9.5             forcats_1.0.1              
## [15] stringr_1.6.0               dplyr_1.2.1                
## [17] purrr_1.2.2                 readr_2.2.0                
## [19] tidyr_1.3.2                 tibble_3.3.1               
## [21] ggplot2_4.0.2               tidyverse_2.0.0            
## [23] BiocStyle_2.39.0           
## 
## loaded via a namespace (and not attached):
##   [1] naniar_1.1.0          httr_1.4.8            RColorBrewer_1.1-3   
##   [4] ggsci_5.0.0           tools_4.6.0           doRNG_1.8.6.3        
##   [7] backports_1.5.1       utf8_1.2.6            R6_2.6.1             
##  [10] DT_0.34.0             vegan_2.7-3           mgcv_1.9-4           
##  [13] permute_0.9-10        withr_3.0.2           gridExtra_2.3        
##  [16] progressr_0.19.0      cli_3.6.6             factoextra_2.0.0     
##  [19] RGCCA_3.0.3           labeling_0.4.3        sass_0.4.10          
##  [22] S7_0.2.1-1            randomForest_4.7-1.2  proxy_0.4-29         
##  [25] pbapply_1.7-4         foreign_0.8-91        R.utils_2.13.0       
##  [28] dichromat_2.0-0.1     sessioninfo_1.2.3     parallelly_1.47.0    
##  [31] itertools_0.1-3       limma_3.67.2          rstudioapi_0.18.0    
##  [34] RSQLite_2.4.6         car_3.1-5             Matrix_1.7-5         
##  [37] clipr_0.8.0           abind_1.4-8           R.methodsS3_1.8.2    
##  [40] lifecycle_1.0.5       yaml_2.3.12           carData_3.0-6        
##  [43] recipes_1.3.2         SparseArray_1.11.13   BiocFileCache_3.1.0  
##  [46] grid_4.6.0            blob_1.3.0            promises_1.5.0       
##  [49] crayon_1.5.3          lattice_0.22-9        magick_2.9.1         
##  [52] pillar_1.11.1         knitr_1.51            corpcor_1.6.10       
##  [55] future.apply_1.20.2   mixOmics_6.35.2       codetools_0.2-20     
##  [58] glue_1.8.1            beepr_2.0             data.table_1.18.2.1  
##  [61] vctrs_0.7.3           Rdpack_2.6.6          testthat_3.3.2       
##  [64] gtable_0.3.6          assertthat_0.2.1      cachem_1.1.0         
##  [67] gower_1.0.2           xfun_0.57             rbibutils_2.4.1      
##  [70] S4Arrays_1.11.1       mime_0.13             prodlim_2026.03.11   
##  [73] survival_3.8-6        timeDate_4052.112     audio_0.1-12         
##  [76] iterators_1.0.14      tinytex_0.59          hardhat_1.4.3        
##  [79] lava_1.9.0            statmod_1.5.1         ipred_0.9-15         
##  [82] nlme_3.1-169          fenr_1.9.2            bit64_4.8.0          
##  [85] filelock_1.0.3        splines2_0.5.4        bslib_0.10.0         
##  [88] Deriv_4.2.0           otel_0.2.0            rpart_4.1.27         
##  [91] colorspace_2.1-2      DBI_1.3.0             Hmisc_5.2-5          
##  [94] nnet_7.3-20           tidyselect_1.2.1      bit_4.6.0            
##  [97] compiler_4.6.0        curl_7.0.0            httr2_1.2.2          
## [100] htmlTable_2.4.3       DelayedArray_0.37.1   stringfish_0.19.0    
## [103] bookdown_0.46         checkmate_2.3.4       scales_1.4.0         
## [106] rappdirs_0.3.4        digest_0.6.39         mirai_2.6.1          
## [109] rmarkdown_2.31        XVector_0.51.0        htmltools_0.5.9      
## [112] pkgconfig_2.0.3       base64enc_0.1-6       SimDesign_2.25       
## [115] dbplyr_2.5.2          fastmap_1.2.0         rlang_1.2.0          
## [118] htmlwidgets_1.6.4     shiny_1.13.0          farver_2.1.2         
## [121] jquerylib_0.1.4       jsonlite_2.0.0        BiocParallel_1.45.0  
## [124] dcurver_0.9.3         ModelMetrics_1.2.2.2  R.oo_1.27.1          
## [127] magrittr_2.0.5        Formula_1.2-5         patchwork_1.3.2      
## [130] Rcpp_1.1.1-1          visdat_0.6.0          stringi_1.8.7        
## [133] pROC_1.19.0.1         brio_1.1.5            MASS_7.3-65          
## [136] plyr_1.8.9            parallel_4.6.0        listenv_0.10.1       
## [139] ggrepel_0.9.8         splines_4.6.0         hms_1.1.4            
## [142] igraph_2.3.0          ggpubr_0.6.3          ranger_0.18.0        
## [145] ggsignif_0.6.4        rngtools_1.5.2        reshape2_1.4.5       
## [148] qs2_0.1.7             GPArotation_2025.3-1  tidybulk_2.1.2       
## [151] evaluate_1.0.5        RcppParallel_5.1.11-2 BiocManager_1.30.27  
## [154] tzdb_0.5.0            nanonext_1.8.2        foreach_1.5.2        
## [157] missForest_1.6.1      httpuv_1.6.17         future_1.70.0        
## [160] mirt_1.46.1           BiocBaseUtils_1.13.0  broom_1.0.12         
## [163] xtable_1.8-8          e1071_1.7-17          RSpectra_0.16-2      
## [166] rstatix_0.7.3         later_1.4.8           class_7.3-23         
## [169] rARPACK_0.11-0        memoise_2.0.1         ellipse_0.5.0        
## [172] cluster_2.1.8.2       timechange_0.4.0      globals_0.19.1       
## [175] caret_7.0-1