# Load Libraries
library(tidyverse)
library(tidyexposomics)
We will start off with our example dataset pulled from the ISGlobal Exposome Data Challenge 2021 (Maitre et al., 2022).
# Load example data
data("tidyexposomics_example")
# Create exposomic set object
expom <- create_exposomicset(
codebook = tidyexposomics_example$annotated_cb,
exposure = tidyexposomics_example$meta,
omics = list(
"Gene Expression" = tidyexposomics_example$exp_filt,
"Methylation" = tidyexposomics_example$methyl_filt
),
row_data = list(
"Gene Expression" = tidyexposomics_example$exp_fdata,
"Methylation" = tidyexposomics_example$methyl_fdata
)
)
## Ensuring all omics datasets are matrices with column names.
## Creating SummarizedExperiment objects.
## Creating MultiAssayExperiment object.
## MultiAssayExperiment created successfully.
We will focus on a few exposure variable categories.
# Grab exposure variables
exp_vars <- tidyexposomics_example$annotated_cb |>
filter(category %in% c(
"aerosol",
"main group molecular entity",
"polyatomic entity"
)) |>
pull(variable) |>
as.character()
As in the main vignette, we will impute exposure data using missforest.
# Impute missing values
expom <- run_impute_missing(
exposomicset = expom,
exposure_impute_method = "missforest",
exposure_cols = exp_vars
)
## Imputing exposure data using method: missforest
And we will transform our exposure data to ensure it is more normally distributed using the boxcox_best method.
# Transform variables
expom <- transform_exposure(
exposomicset = expom,
transform_method = "boxcox_best",
exposure_cols = exp_vars
)
## Applying the boxcox_best transformation.
We can calculate exposome scores, which are a summary measure of exposure. The run_exposome_score function is used to calculate the exposome score. The exposure_cols argument is used to set the columns to use for the exposome score. The score_type argument is used to set the type of score to calculate. Here we could use:
median: Calculates the median of the exposure variables.
mean: Calculates the mean of the exposure variables.
sum: Calculates the sum of the exposure variables.
pca: Calculates the first principal component of the exposure variables.
irt: Uses Item Response Theory to calculate the exposome score.
quantile: Calculates the quantile of the exposure variables.
var: Calculates the variance of the exposure variables.
The score_column_name argument is used to set the name of the column to store the exposome score in. Here we will define a score for aerosols using a variety of different methods and demonstrate their use in association with asthma status.
# determine which aerosol variables to use
aerosols <- c("h_pm25_ratio_preg_None", "h_pm10_ratio_preg_None")
# Create exposome scores
expom <- expom |>
run_exposome_score(
exposure_cols = aerosols,
score_type = "median",
score_column_name = "exposome_median_score"
) |>
run_exposome_score(
exposure_cols = aerosols,
score_type = "pca",
score_column_name = "exposome_pca_score"
) |>
run_exposome_score(
exposure_cols = aerosols,
score_type = "irt",
score_column_name = "exposome_irt_score"
) |>
run_exposome_score(
exposure_cols = aerosols,
score_type = "quantile",
score_column_name = "exposome_quantile_score"
) |>
run_exposome_score(
exposure_cols = aerosols,
score_type = "var",
score_column_name = "exposome_var_score"
)
## Extracting exposure data...
## Extracting exposure data...
## Extracting exposure data...
## Extracting exposure data...
## Extracting exposure data...
## Calculating median exposure scores...
## Calculating PCA exposure scores...
## Calculating IRT exposure scores...
## Warning: EM cycles terminated after 500 iterations.
## Calculating quantile exposure scores...
## Calculating variance exposure scores...
We can then associate these exposome scores with asthma status using the run_association function, just like we did before. However, this time we specify our feature_set to be the exposome scores we just calculated.
# Associate exposome scores with outcome
expom <- run_association(
exposomicset = expom,
outcome = "hs_asthma",
source = "exposures",
feature_set = c(
"exposome_median_score",
"exposome_pca_score",
"exposome_irt_score",
"exposome_quantile_score",
"exposome_var_score"
),
action = "add",
family = "binomial"
)
## Running GLMs.
# Plot the association forest plot
plot_association(
exposomicset = expom,
source = "exposures",
terms = c(
"exposome_median_score",
"exposome_pca_score",
"exposome_irt_score",
"exposome_quantile_score",
"exposome_var_score"
),
filter_col = "p.value",
filter_thresh = 0.05,
r2_col = "r2"
)
Figure 1: Associations of aerosol exposome scores with asthma status
The variance-based score has the strongest association with asthma status.
sessionInfo()
## R version 4.6.0 RC (2026-04-17 r89917)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.4 LTS
##
## Matrix products: default
## BLAS: /home/biocbuild/bbs-3.23-bioc/R/lib/libRblas.so
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0 LAPACK version 3.12.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB LC_COLLATE=C
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: America/New_York
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] tidyexposomics_0.99.16 MultiAssayExperiment_1.37.4
## [3] SummarizedExperiment_1.41.1 Biobase_2.71.0
## [5] GenomicRanges_1.63.2 Seqinfo_1.1.0
## [7] IRanges_2.45.0 S4Vectors_0.49.2
## [9] BiocGenerics_0.57.1 generics_0.1.4
## [11] MatrixGenerics_1.23.0 matrixStats_1.5.0
## [13] lubridate_1.9.5 forcats_1.0.1
## [15] stringr_1.6.0 dplyr_1.2.1
## [17] purrr_1.2.2 readr_2.2.0
## [19] tidyr_1.3.2 tibble_3.3.1
## [21] ggplot2_4.0.2 tidyverse_2.0.0
## [23] BiocStyle_2.39.0
##
## loaded via a namespace (and not attached):
## [1] naniar_1.1.0 httr_1.4.8 RColorBrewer_1.1-3
## [4] ggsci_5.0.0 tools_4.6.0 doRNG_1.8.6.3
## [7] backports_1.5.1 utf8_1.2.6 R6_2.6.1
## [10] DT_0.34.0 vegan_2.7-3 mgcv_1.9-4
## [13] permute_0.9-10 withr_3.0.2 gridExtra_2.3
## [16] progressr_0.19.0 cli_3.6.6 factoextra_2.0.0
## [19] RGCCA_3.0.3 labeling_0.4.3 sass_0.4.10
## [22] S7_0.2.1-1 randomForest_4.7-1.2 proxy_0.4-29
## [25] pbapply_1.7-4 foreign_0.8-91 R.utils_2.13.0
## [28] dichromat_2.0-0.1 sessioninfo_1.2.3 parallelly_1.47.0
## [31] itertools_0.1-3 limma_3.67.2 rstudioapi_0.18.0
## [34] RSQLite_2.4.6 car_3.1-5 Matrix_1.7-5
## [37] clipr_0.8.0 abind_1.4-8 R.methodsS3_1.8.2
## [40] lifecycle_1.0.5 yaml_2.3.12 carData_3.0-6
## [43] recipes_1.3.2 SparseArray_1.11.13 BiocFileCache_3.1.0
## [46] grid_4.6.0 blob_1.3.0 promises_1.5.0
## [49] crayon_1.5.3 lattice_0.22-9 magick_2.9.1
## [52] pillar_1.11.1 knitr_1.51 corpcor_1.6.10
## [55] future.apply_1.20.2 mixOmics_6.35.2 codetools_0.2-20
## [58] glue_1.8.1 beepr_2.0 data.table_1.18.2.1
## [61] vctrs_0.7.3 Rdpack_2.6.6 testthat_3.3.2
## [64] gtable_0.3.6 assertthat_0.2.1 cachem_1.1.0
## [67] gower_1.0.2 xfun_0.57 rbibutils_2.4.1
## [70] S4Arrays_1.11.1 mime_0.13 prodlim_2026.03.11
## [73] survival_3.8-6 timeDate_4052.112 audio_0.1-12
## [76] iterators_1.0.14 tinytex_0.59 hardhat_1.4.3
## [79] lava_1.9.0 statmod_1.5.1 ipred_0.9-15
## [82] nlme_3.1-169 fenr_1.9.2 bit64_4.8.0
## [85] filelock_1.0.3 splines2_0.5.4 bslib_0.10.0
## [88] Deriv_4.2.0 otel_0.2.0 rpart_4.1.27
## [91] colorspace_2.1-2 DBI_1.3.0 Hmisc_5.2-5
## [94] nnet_7.3-20 tidyselect_1.2.1 bit_4.6.0
## [97] compiler_4.6.0 curl_7.0.0 httr2_1.2.2
## [100] htmlTable_2.4.3 DelayedArray_0.37.1 stringfish_0.19.0
## [103] bookdown_0.46 checkmate_2.3.4 scales_1.4.0
## [106] rappdirs_0.3.4 digest_0.6.39 mirai_2.6.1
## [109] rmarkdown_2.31 XVector_0.51.0 htmltools_0.5.9
## [112] pkgconfig_2.0.3 base64enc_0.1-6 SimDesign_2.25
## [115] dbplyr_2.5.2 fastmap_1.2.0 rlang_1.2.0
## [118] htmlwidgets_1.6.4 shiny_1.13.0 farver_2.1.2
## [121] jquerylib_0.1.4 jsonlite_2.0.0 BiocParallel_1.45.0
## [124] dcurver_0.9.3 ModelMetrics_1.2.2.2 R.oo_1.27.1
## [127] magrittr_2.0.5 Formula_1.2-5 patchwork_1.3.2
## [130] Rcpp_1.1.1-1 visdat_0.6.0 stringi_1.8.7
## [133] pROC_1.19.0.1 brio_1.1.5 MASS_7.3-65
## [136] plyr_1.8.9 parallel_4.6.0 listenv_0.10.1
## [139] ggrepel_0.9.8 splines_4.6.0 hms_1.1.4
## [142] igraph_2.3.0 ggpubr_0.6.3 ranger_0.18.0
## [145] ggsignif_0.6.4 rngtools_1.5.2 reshape2_1.4.5
## [148] qs2_0.1.7 GPArotation_2025.3-1 tidybulk_2.1.2
## [151] evaluate_1.0.5 RcppParallel_5.1.11-2 BiocManager_1.30.27
## [154] tzdb_0.5.0 nanonext_1.8.2 foreach_1.5.2
## [157] missForest_1.6.1 httpuv_1.6.17 future_1.70.0
## [160] mirt_1.46.1 BiocBaseUtils_1.13.0 broom_1.0.12
## [163] xtable_1.8-8 e1071_1.7-17 RSpectra_0.16-2
## [166] rstatix_0.7.3 later_1.4.8 class_7.3-23
## [169] rARPACK_0.11-0 memoise_2.0.1 ellipse_0.5.0
## [172] cluster_2.1.8.2 timechange_0.4.0 globals_0.19.1
## [175] caret_7.0-1