From 5e4dc8d9a9f9f182d3335957c5c28157d824a190 Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Thu, 23 Jan 2025 03:22:20 -0600 Subject: [PATCH 01/24] Initial CRAN release preparation commit --- .Rbuildignore | 1 + DESCRIPTION | 5 +++-- NAMESPACE | 1 + NEWS.md | 3 +++ cran-bootstrap.R | 3 +++ cran-comments.md | 6 ++++++ inst/COPYRIGHTS | 11 +++++++++++ 7 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 NEWS.md create mode 100644 cran-comments.md create mode 100644 inst/COPYRIGHTS diff --git a/.Rbuildignore b/.Rbuildignore index 91114bf2..5df376dd 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,2 +1,3 @@ ^.*\.Rproj$ ^\.Rproj\.user$ +^cran-comments\.md$ diff --git a/DESCRIPTION b/DESCRIPTION index 169aa5a3..6b3277dc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,13 +1,14 @@ Package: stochtree Title: Stochastic tree ensembles (XBART and BART) for supervised learning and causal inference -Version: 0.0.1 +Version: 0.1.0 Authors@R: c( person("Drew", "Herren", email = "drewherrenopensource@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4109-6611")), person("Richard", "Hahn", role = "aut"), person("Jared", "Murray", role = "aut"), person("Carlos", "Carvalho", role = "aut"), - person("Jingyu", "He", role = "aut") + person("Jingyu", "He", role = "aut"), + person("stochtree contributors", role = c("cph")) ) Description: Stochastic tree ensembles (XBART and BART) for supervised learning and causal inference. License: MIT + file LICENSE diff --git a/NAMESPACE b/NAMESPACE index 47ba8bcc..41fd66a3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -77,6 +77,7 @@ importFrom(R6,R6Class) importFrom(stats,coef) importFrom(stats,lm) importFrom(stats,model.matrix) +importFrom(stats,predict) importFrom(stats,qgamma) importFrom(stats,resid) importFrom(stats,rnorm) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 00000000..aa0f54d0 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,3 @@ +# stochtree 0.1.0 + +* Initial CRAN submission. diff --git a/cran-bootstrap.R b/cran-bootstrap.R index 4615ee27..6c45e540 100644 --- a/cran-bootstrap.R +++ b/cran-bootstrap.R @@ -48,10 +48,13 @@ pybind_src_files <- list.files("src", pattern = "^(py_)", recursive = TRUE, full r_src_files <- src_files[!(src_files %in% pybind_src_files)] pkg_core_files <- c( ".Rbuildignore", + "cran-comments.md", "DESCRIPTION", + "inst/COPYRIGHTS", "LICENSE", list.files("man", recursive = TRUE, full.names = TRUE), "NAMESPACE", + "NEWS.md", list.files("R", recursive = TRUE, full.names = TRUE), r_src_files ) diff --git a/cran-comments.md b/cran-comments.md new file mode 100644 index 00000000..3d888d22 --- /dev/null +++ b/cran-comments.md @@ -0,0 +1,6 @@ +## R CMD check results + +0 errors | 0 warnings | 2 notes + +* This is a new release. +* checking installed package size ... NOTE installed size is 46.3Mb (linux-only) diff --git a/inst/COPYRIGHTS b/inst/COPYRIGHTS new file mode 100644 index 00000000..40b8dc54 --- /dev/null +++ b/inst/COPYRIGHTS @@ -0,0 +1,11 @@ +stochtree +Copyright 2023-2025 stochtree contributors + +This project includes software from the xgboost project (Apache, 2.0). +* Copyright 2015-2024, XGBoost Contributors + +This project includes software from the LightGBM project (MIT). +* Copyright (c) 2016 Microsoft Corporation + +This project includes software from the scikit-learn project (BSD, 3-clause). +* Copyright (c) 2007-2024 The scikit-learn developers \ No newline at end of file From 2192fd26c70937e8429d2550afc70fa056120074 Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Thu, 23 Jan 2025 11:27:39 -0600 Subject: [PATCH 02/24] Fixing CRAN check warning with generic predict overload --- R/bart.R | 2 +- R/bcf.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/bart.R b/R/bart.R index 25699152..c1a5a5a5 100644 --- a/R/bart.R +++ b/R/bart.R @@ -944,7 +944,7 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, #' y_hat_test <- predict(bart_model, X_test) #' # plot(rowMeans(y_hat_test), y_test, xlab = "predicted", ylab = "actual") #' # abline(0,1,col="red",lty=3,lwd=3) -predict.bartmodel <- function(bart, X_test, W_test = NULL, group_ids_test = NULL, rfx_basis_test = NULL){ +predict.bartmodel <- function(bart, X_test, W_test = NULL, group_ids_test = NULL, rfx_basis_test = NULL, ...){ # Preprocess covariates if ((!is.data.frame(X_test)) && (!is.matrix(X_test))) { stop("X_test must be a matrix or dataframe") diff --git a/R/bcf.R b/R/bcf.R index ed00d25e..61d5ad92 100644 --- a/R/bcf.R +++ b/R/bcf.R @@ -1351,7 +1351,7 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU #' # plot(rowMeans(preds$tau_hat), tau_test, xlab = "predicted", #' # ylab = "actual", main = "Treatment effect") #' # abline(0,1,col="red",lty=3,lwd=3) -predict.bcf <- function(bcf, X_test, Z_test, pi_test = NULL, group_ids_test = NULL, rfx_basis_test = NULL){ +predict.bcf <- function(bcf, X_test, Z_test, pi_test = NULL, group_ids_test = NULL, rfx_basis_test = NULL, ...){ # Preprocess covariates if ((!is.data.frame(X_test)) && (!is.matrix(X_test))) { stop("X_test must be a matrix or dataframe") From f02ed71fdc763c049ec3e97d6f9562810dae177d Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Thu, 23 Jan 2025 12:47:26 -0600 Subject: [PATCH 03/24] Updated R package to pass CRAN checks --- R/bart.R | 61 +++++++++++++++-------------- R/bcf.R | 67 ++++++++++++++++---------------- R/stochtree-package.R | 1 + man/predict.bartmodel.Rd | 9 +++-- man/predict.bcf.Rd | 11 ++++-- man/saveBARTModelToJsonString.Rd | 2 +- man/stochtree-package.Rd | 5 +++ 7 files changed, 85 insertions(+), 71 deletions(-) diff --git a/R/bart.R b/R/bart.R index c1a5a5a5..97041c50 100644 --- a/R/bart.R +++ b/R/bart.R @@ -907,13 +907,14 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, #' Predict from a sampled BART model on new data #' -#' @param bart Object of type `bart` containing draws of a regression forest and associated sampling outputs. +#' @param object Object of type `bart` containing draws of a regression forest and associated sampling outputs. #' @param X_test Covariates used to determine tree leaf predictions for each observation. Must be passed as a matrix or dataframe. #' @param W_test (Optional) Bases used for prediction (by e.g. dot product with leaf values). Default: `NULL`. #' @param group_ids_test (Optional) Test set group labels used for an additive random effects model. #' We do not currently support (but plan to in the near future), test set evaluation for group labels #' that were not in the training set. #' @param rfx_basis_test (Optional) Test set basis for "random-slope" regression in additive random effects model. +#' @param ... (Optional) Other prediction parameters. #' #' @return List of prediction matrices. If model does not have random effects, the list has one element -- the predictions from the forest. #' If the model does have random effects, the list has three elements -- forest predictions, random effects predictions, and their sum (`y_hat`). @@ -944,12 +945,12 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, #' y_hat_test <- predict(bart_model, X_test) #' # plot(rowMeans(y_hat_test), y_test, xlab = "predicted", ylab = "actual") #' # abline(0,1,col="red",lty=3,lwd=3) -predict.bartmodel <- function(bart, X_test, W_test = NULL, group_ids_test = NULL, rfx_basis_test = NULL, ...){ +predict.bartmodel <- function(object, X_test, W_test = NULL, group_ids_test = NULL, rfx_basis_test = NULL, ...){ # Preprocess covariates if ((!is.data.frame(X_test)) && (!is.matrix(X_test))) { stop("X_test must be a matrix or dataframe") } - train_set_metadata <- bart$train_set_metadata + train_set_metadata <- object$train_set_metadata X_test <- preprocessPredictionData(X_test, train_set_metadata) # Convert all input data to matrices if not already converted @@ -961,29 +962,29 @@ predict.bartmodel <- function(bart, X_test, W_test = NULL, group_ids_test = NULL } # Data checks - if ((bart$model_params$requires_basis) && (is.null(W_test))) { + if ((object$model_params$requires_basis) && (is.null(W_test))) { stop("Basis (W_test) must be provided for this model") } if ((!is.null(W_test)) && (nrow(X_test) != nrow(W_test))) { stop("X_test and W_test must have the same number of rows") } - if (bart$model_params$num_covariates != ncol(X_test)) { + if (object$model_params$num_covariates != ncol(X_test)) { stop("X_test and W_test must have the same number of rows") } - if ((bart$model_params$has_rfx) && (is.null(group_ids_test))) { + if ((object$model_params$has_rfx) && (is.null(group_ids_test))) { stop("Random effect group labels (group_ids_test) must be provided for this model") } - if ((bart$model_params$has_rfx_basis) && (is.null(rfx_basis_test))) { + if ((object$model_params$has_rfx_basis) && (is.null(rfx_basis_test))) { stop("Random effects basis (rfx_basis_test) must be provided for this model") } - if ((bart$model_params$num_rfx_basis > 0) && (ncol(rfx_basis_test) != bart$model_params$num_rfx_basis)) { + if ((object$model_params$num_rfx_basis > 0) && (ncol(rfx_basis_test) != object$model_params$num_rfx_basis)) { stop("Random effects basis has a different dimension than the basis used to train this model") } # Recode group IDs to integer vector (if passed as, for example, a vector of county names, etc...) has_rfx <- F if (!is.null(group_ids_test)) { - rfx_unique_group_ids <- bcf$rfx_unique_group_ids + rfx_unique_group_ids <- object$rfx_unique_group_ids group_ids_factor_test <- factor(group_ids_test, levels = rfx_unique_group_ids) if (sum(is.na(group_ids_factor_test)) > 0) { stop("All random effect group labels provided in group_ids_test must be present in group_ids_train") @@ -993,7 +994,7 @@ predict.bartmodel <- function(bart, X_test, W_test = NULL, group_ids_test = NULL } # Produce basis for the "intercept-only" random effects case - if ((bart$model_params$has_rfx) && (is.null(rfx_basis_test))) { + if ((object$model_params$has_rfx) && (is.null(rfx_basis_test))) { rfx_basis_test <- matrix(rep(1, nrow(X_test)), ncol = 1) } @@ -1002,53 +1003,53 @@ predict.bartmodel <- function(bart, X_test, W_test = NULL, group_ids_test = NULL else prediction_dataset <- createForestDataset(X_test) # Compute mean forest predictions - num_samples <- bart$model_params$num_samples - y_std <- bart$model_params$outcome_scale - y_bar <- bart$model_params$outcome_mean - sigma2_init <- bart$model_params$sigma2_init - if (bart$model_params$include_mean_forest) { - mean_forest_predictions <- bart$mean_forests$predict(prediction_dataset)*y_std + y_bar + num_samples <- object$model_params$num_samples + y_std <- object$model_params$outcome_scale + y_bar <- object$model_params$outcome_mean + sigma2_init <- object$model_params$sigma2_init + if (object$model_params$include_mean_forest) { + mean_forest_predictions <- object$mean_forests$predict(prediction_dataset)*y_std + y_bar } # Compute variance forest predictions - if (bart$model_params$include_variance_forest) { - s_x_raw <- bart$variance_forests$predict(prediction_dataset) + if (object$model_params$include_variance_forest) { + s_x_raw <- object$variance_forests$predict(prediction_dataset) } # Compute rfx predictions (if needed) - if (bart$model_params$has_rfx) { - rfx_predictions <- bart$rfx_samples$predict(group_ids_test, rfx_basis_test)*y_std + if (object$model_params$has_rfx) { + rfx_predictions <- object$rfx_samples$predict(group_ids_test, rfx_basis_test)*y_std } # Scale variance forest predictions - if (bart$model_params$include_variance_forest) { - if (bart$model_params$sample_sigma_global) { - sigma2_samples <- bart$sigma2_global_samples + if (object$model_params$include_variance_forest) { + if (object$model_params$sample_sigma_global) { + sigma2_samples <- object$sigma2_global_samples variance_forest_predictions <- sapply(1:num_samples, function(i) sqrt(s_x_raw[,i]*sigma2_samples[i])) } else { variance_forest_predictions <- sqrt(s_x_raw*sigma2_init)*y_std } } - if ((bart$model_params$include_mean_forest) && (bart$model_params$has_rfx)) { + if ((object$model_params$include_mean_forest) && (object$model_params$has_rfx)) { y_hat <- mean_forest_predictions + rfx_predictions - } else if ((bart$model_params$include_mean_forest) && (!bart$model_params$has_rfx)) { + } else if ((object$model_params$include_mean_forest) && (!object$model_params$has_rfx)) { y_hat <- mean_forest_predictions - } else if ((!bart$model_params$include_mean_forest) && (bart$model_params$has_rfx)) { + } else if ((!object$model_params$include_mean_forest) && (object$model_params$has_rfx)) { y_hat <- rfx_predictions } result <- list() - if ((bart$model_params$has_rfx) || (bart$model_params$include_mean_forest)) { + if ((object$model_params$has_rfx) || (object$model_params$include_mean_forest)) { result[["y_hat"]] = y_hat } - if (bart$model_params$include_mean_forest) { + if (object$model_params$include_mean_forest) { result[["mean_forest_predictions"]] = mean_forest_predictions } - if (bart$model_params$has_rfx) { + if (object$model_params$has_rfx) { result[["rfx_predictions"]] = rfx_predictions } - if (bart$model_params$include_variance_forest) { + if (object$model_params$include_variance_forest) { result[["variance_forest_predictions"]] = variance_forest_predictions } return(result) diff --git a/R/bcf.R b/R/bcf.R index 61d5ad92..069247a8 100644 --- a/R/bcf.R +++ b/R/bcf.R @@ -1290,7 +1290,7 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU #' Predict from a sampled BCF model on new data #' -#' @param bcf Object of type `bcf` containing draws of a Bayesian causal forest model and associated sampling outputs. +#' @param object Object of type `bcf` containing draws of a Bayesian causal forest model and associated sampling outputs. #' @param X_test Covariates used to determine tree leaf predictions for each observation. Must be passed as a matrix or dataframe. #' @param Z_test Treatments used for prediction. #' @param pi_test (Optional) Propensities used for prediction. @@ -1298,8 +1298,9 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU #' We do not currently support (but plan to in the near future), test set evaluation for group labels #' that were not in the training set. #' @param rfx_basis_test (Optional) Test set basis for "random-slope" regression in additive random effects model. +#' @param ... (Optional) Other prediction parameters. #' -#' @return List of 3-5 `nrow(X_test)` by `bcf$num_samples` matrices: prognostic function estimates, treatment effect estimates, (optionally) random effects predictions, (optionally) variance forest predictions, and outcome predictions. +#' @return List of 3-5 `nrow(X_test)` by `object$num_samples` matrices: prognostic function estimates, treatment effect estimates, (optionally) random effects predictions, (optionally) variance forest predictions, and outcome predictions. #' @export #' #' @examples @@ -1351,12 +1352,12 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU #' # plot(rowMeans(preds$tau_hat), tau_test, xlab = "predicted", #' # ylab = "actual", main = "Treatment effect") #' # abline(0,1,col="red",lty=3,lwd=3) -predict.bcf <- function(bcf, X_test, Z_test, pi_test = NULL, group_ids_test = NULL, rfx_basis_test = NULL, ...){ +predict.bcf <- function(object, X_test, Z_test, pi_test = NULL, group_ids_test = NULL, rfx_basis_test = NULL, ...){ # Preprocess covariates if ((!is.data.frame(X_test)) && (!is.matrix(X_test))) { stop("X_test must be a matrix or dataframe") } - train_set_metadata <- bcf$train_set_metadata + train_set_metadata <- object$train_set_metadata X_test <- preprocessPredictionData(X_test, train_set_metadata) # Convert all input data to matrices if not already converted @@ -1371,33 +1372,33 @@ predict.bcf <- function(bcf, X_test, Z_test, pi_test = NULL, group_ids_test = NU } # Data checks - if ((bcf$model_params$propensity_covariate != "none") && (is.null(pi_test))) { - if (!bcf$model_params$internal_propensity_model) { + if ((object$model_params$propensity_covariate != "none") && (is.null(pi_test))) { + if (!object$model_params$internal_propensity_model) { stop("pi_test must be provided for this model") } # Compute propensity score using the internal bart model - pi_test <- rowMeans(predict(bcf$bart_propensity_model, X_test)$y_hat) + pi_test <- rowMeans(predict(object$bart_propensity_model, X_test)$y_hat) } if (nrow(X_test) != nrow(Z_test)) { stop("X_test and Z_test must have the same number of rows") } - if (bcf$model_params$num_covariates != ncol(X_test)) { + if (object$model_params$num_covariates != ncol(X_test)) { stop("X_test and must have the same number of columns as the covariates used to train the model") } - if ((bcf$model_params$has_rfx) && (is.null(group_ids_test))) { + if ((object$model_params$has_rfx) && (is.null(group_ids_test))) { stop("Random effect group labels (group_ids_test) must be provided for this model") } - if ((bcf$model_params$has_rfx_basis) && (is.null(rfx_basis_test))) { + if ((object$model_params$has_rfx_basis) && (is.null(rfx_basis_test))) { stop("Random effects basis (rfx_basis_test) must be provided for this model") } - if ((bcf$model_params$num_rfx_basis > 0) && (ncol(rfx_basis_test) != bcf$model_params$num_rfx_basis)) { + if ((object$model_params$num_rfx_basis > 0) && (ncol(rfx_basis_test) != object$model_params$num_rfx_basis)) { stop("Random effects basis has a different dimension than the basis used to train this model") } # Recode group IDs to integer vector (if passed as, for example, a vector of county names, etc...) has_rfx <- F if (!is.null(group_ids_test)) { - rfx_unique_group_ids <- bcf$rfx_unique_group_ids + rfx_unique_group_ids <- object$rfx_unique_group_ids group_ids_factor_test <- factor(group_ids_test, levels = rfx_unique_group_ids) if (sum(is.na(group_ids_factor_test)) > 0) { stop("All random effect group labels provided in group_ids_test must be present in group_ids_train") @@ -1407,12 +1408,12 @@ predict.bcf <- function(bcf, X_test, Z_test, pi_test = NULL, group_ids_test = NU } # Produce basis for the "intercept-only" random effects case - if ((bcf$model_params$has_rfx) && (is.null(rfx_basis_test))) { + if ((object$model_params$has_rfx) && (is.null(rfx_basis_test))) { rfx_basis_test <- matrix(rep(1, nrow(X_test)), ncol = 1) } # Add propensities to covariate set if necessary - if (bcf$model_params$propensity_covariate != "none") { + if (object$model_params$propensity_covariate != "none") { X_test_combined <- cbind(X_test, pi_test) } @@ -1420,34 +1421,34 @@ predict.bcf <- function(bcf, X_test, Z_test, pi_test = NULL, group_ids_test = NU forest_dataset_pred <- createForestDataset(X_test_combined, Z_test) # Compute forest predictions - num_samples <- bcf$model_params$num_samples - y_std <- bcf$model_params$outcome_scale - y_bar <- bcf$model_params$outcome_mean - initial_sigma2 <- bcf$model_params$initial_sigma2 - mu_hat_test <- bcf$forests_mu$predict(forest_dataset_pred)*y_std + y_bar - if (bcf$model_params$adaptive_coding) { - tau_hat_test_raw <- bcf$forests_tau$predict_raw(forest_dataset_pred) - tau_hat_test <- t(t(tau_hat_test_raw) * (bcf$b_1_samples - bcf$b_0_samples))*y_std + num_samples <- object$model_params$num_samples + y_std <- object$model_params$outcome_scale + y_bar <- object$model_params$outcome_mean + initial_sigma2 <- object$model_params$initial_sigma2 + mu_hat_test <- object$forests_mu$predict(forest_dataset_pred)*y_std + y_bar + if (object$model_params$adaptive_coding) { + tau_hat_test_raw <- object$forests_tau$predict_raw(forest_dataset_pred) + tau_hat_test <- t(t(tau_hat_test_raw) * (object$b_1_samples - object$b_0_samples))*y_std } else { - tau_hat_test <- bcf$forests_tau$predict_raw(forest_dataset_pred)*y_std + tau_hat_test <- object$forests_tau$predict_raw(forest_dataset_pred)*y_std } - if (bcf$model_params$include_variance_forest) { - s_x_raw <- bcf$variance_forests$predict(forest_dataset_pred) + if (object$model_params$include_variance_forest) { + s_x_raw <- object$variance_forests$predict(forest_dataset_pred) } # Compute rfx predictions (if needed) - if (bcf$model_params$has_rfx) { - rfx_predictions <- bcf$rfx_samples$predict(group_ids_test, rfx_basis_test)*y_std + if (object$model_params$has_rfx) { + rfx_predictions <- object$rfx_samples$predict(group_ids_test, rfx_basis_test)*y_std } # Compute overall "y_hat" predictions y_hat_test <- mu_hat_test + tau_hat_test * as.numeric(Z_test) - if (bcf$model_params$has_rfx) y_hat_test <- y_hat_test + rfx_predictions + if (object$model_params$has_rfx) y_hat_test <- y_hat_test + rfx_predictions # Scale variance forest predictions - if (bcf$model_params$include_variance_forest) { - if (bcf$model_params$sample_sigma_global) { - sigma2_samples <- bcf$sigma2_global_samples + if (object$model_params$include_variance_forest) { + if (object$model_params$sample_sigma_global) { + sigma2_samples <- object$sigma2_global_samples variance_forest_predictions <- sapply(1:num_samples, function(i) sqrt(s_x_raw[,i]*sigma2_samples[i])) } else { variance_forest_predictions <- sqrt(s_x_raw*initial_sigma2)*y_std @@ -1459,10 +1460,10 @@ predict.bcf <- function(bcf, X_test, Z_test, pi_test = NULL, group_ids_test = NU "tau_hat" = tau_hat_test, "y_hat" = y_hat_test ) - if (bcf$model_params$has_rfx) { + if (object$model_params$has_rfx) { result[["rfx_predictions"]] = rfx_predictions } - if (bcf$model_params$include_variance_forest) { + if (object$model_params$include_variance_forest) { result[["variance_forest_predictions"]] = variance_forest_predictions } return(result) diff --git a/R/stochtree-package.R b/R/stochtree-package.R index 7a912fd7..83a5e477 100644 --- a/R/stochtree-package.R +++ b/R/stochtree-package.R @@ -2,6 +2,7 @@ #' @importFrom stats coef #' @importFrom stats lm #' @importFrom stats model.matrix +#' @importFrom stats predict #' @importFrom stats qgamma #' @importFrom stats resid #' @importFrom stats rnorm diff --git a/man/predict.bartmodel.Rd b/man/predict.bartmodel.Rd index 28237c9b..a8bb8682 100644 --- a/man/predict.bartmodel.Rd +++ b/man/predict.bartmodel.Rd @@ -5,15 +5,16 @@ \title{Predict from a sampled BART model on new data} \usage{ \method{predict}{bartmodel}( - bart, + object, X_test, W_test = NULL, group_ids_test = NULL, - rfx_basis_test = NULL + rfx_basis_test = NULL, + ... ) } \arguments{ -\item{bart}{Object of type \code{bart} containing draws of a regression forest and associated sampling outputs.} +\item{object}{Object of type \code{bart} containing draws of a regression forest and associated sampling outputs.} \item{X_test}{Covariates used to determine tree leaf predictions for each observation. Must be passed as a matrix or dataframe.} @@ -24,6 +25,8 @@ We do not currently support (but plan to in the near future), test set evaluatio that were not in the training set.} \item{rfx_basis_test}{(Optional) Test set basis for "random-slope" regression in additive random effects model.} + +\item{...}{(Optional) Other prediction parameters.} } \value{ List of prediction matrices. If model does not have random effects, the list has one element -- the predictions from the forest. diff --git a/man/predict.bcf.Rd b/man/predict.bcf.Rd index 311cce50..2d5f8301 100644 --- a/man/predict.bcf.Rd +++ b/man/predict.bcf.Rd @@ -5,16 +5,17 @@ \title{Predict from a sampled BCF model on new data} \usage{ \method{predict}{bcf}( - bcf, + object, X_test, Z_test, pi_test = NULL, group_ids_test = NULL, - rfx_basis_test = NULL + rfx_basis_test = NULL, + ... ) } \arguments{ -\item{bcf}{Object of type \code{bcf} containing draws of a Bayesian causal forest model and associated sampling outputs.} +\item{object}{Object of type \code{bcf} containing draws of a Bayesian causal forest model and associated sampling outputs.} \item{X_test}{Covariates used to determine tree leaf predictions for each observation. Must be passed as a matrix or dataframe.} @@ -27,9 +28,11 @@ We do not currently support (but plan to in the near future), test set evaluatio that were not in the training set.} \item{rfx_basis_test}{(Optional) Test set basis for "random-slope" regression in additive random effects model.} + +\item{...}{(Optional) Other prediction parameters.} } \value{ -List of 3-5 \code{nrow(X_test)} by \code{bcf$num_samples} matrices: prognostic function estimates, treatment effect estimates, (optionally) random effects predictions, (optionally) variance forest predictions, and outcome predictions. +List of 3-5 \code{nrow(X_test)} by \code{object$num_samples} matrices: prognostic function estimates, treatment effect estimates, (optionally) random effects predictions, (optionally) variance forest predictions, and outcome predictions. } \description{ Predict from a sampled BCF model on new data diff --git a/man/saveBARTModelToJsonString.Rd b/man/saveBARTModelToJsonString.Rd index 031b6d1e..7159598e 100644 --- a/man/saveBARTModelToJsonString.Rd +++ b/man/saveBARTModelToJsonString.Rd @@ -10,7 +10,7 @@ saveBARTModelToJsonString(object) \item{object}{Object of type \code{bartmodel} containing draws of a BART model and associated sampling outputs.} } \value{ -JSON string +in-memory JSON string } \description{ Convert the persistent aspects of a BART model to (in-memory) JSON string diff --git a/man/stochtree-package.Rd b/man/stochtree-package.Rd index 942f0e4a..0377fb91 100644 --- a/man/stochtree-package.Rd +++ b/man/stochtree-package.Rd @@ -26,4 +26,9 @@ Authors: \item Jingyu He } +Other contributors: +\itemize{ + \item stochtree contributors [copyright holder] +} + } From e3b348158b7ffee94993fe228be3fb83d9de2814 Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Thu, 23 Jan 2025 18:39:12 -0600 Subject: [PATCH 04/24] Updated BART R API --- R/bart.R | 184 ++++++++++++------------ man/bart.Rd | 28 ++-- man/getRandomEffectSamples.bartmodel.Rd | 6 +- man/predict.bartmodel.Rd | 18 +-- 4 files changed, 118 insertions(+), 118 deletions(-) diff --git a/R/bart.R b/R/bart.R index 97041c50..02c77b2d 100644 --- a/R/bart.R +++ b/R/bart.R @@ -6,21 +6,21 @@ #' categorical columns stored as ordered factors will passed as integers to the core algorithm, along with the metadata #' that the column is ordered categorical). #' @param y_train Outcome to be modeled by the ensemble. -#' @param W_train (Optional) Bases used to define a regression model `y ~ W` in +#' @param leaf_basis_train (Optional) Bases used to define a regression model `y ~ W` in #' each leaf of each regression tree. By default, BART assumes constant leaf node #' parameters, implicitly regressing on a constant basis of ones (i.e. `y ~ 1`). -#' @param group_ids_train (Optional) Group labels used for an additive random effects model. +#' @param rfx_group_ids_train (Optional) Group labels used for an additive random effects model. #' @param rfx_basis_train (Optional) Basis for "random-slope" regression in an additive random effects model. -#' If `group_ids_train` is provided with a regression basis, an intercept-only random effects model +#' If `rfx_group_ids_train` is provided with a regression basis, an intercept-only random effects model #' will be estimated. #' @param X_test (Optional) Test set of covariates used to define "out of sample" evaluation data. #' May be provided either as a dataframe or a matrix, but the format of `X_test` must be consistent with #' that of `X_train`. -#' @param W_test (Optional) Test set of bases used to define "out of sample" evaluation data. +#' @param leaf_basis_test (Optional) Test set of bases used to define "out of sample" evaluation data. #' While a test set is optional, the structure of any provided test set must match that -#' of the training set (i.e. if both X_train and W_train are provided, then a test set must -#' consist of X_test and W_test with the same number of columns). -#' @param group_ids_test (Optional) Test set group labels used for an additive random effects model. +#' of the training set (i.e. if both `X_train` and `leaf_basis_train` are provided, then a test set must +#' consist of `X_test` and `leaf_basis_test` with the same number of columns). +#' @param rfx_group_ids_test (Optional) Test set group labels used for an additive random effects model. #' We do not currently support (but plan to in the near future), test set evaluation for group labels #' that were not in the training set. #' @param rfx_basis_test (Optional) Test set basis for "random-slope" regression in additive random effects model. @@ -28,7 +28,7 @@ #' @param num_burnin Number of "burn-in" iterations of the MCMC sampler. Default: 0. #' @param num_mcmc Number of "retained" iterations of the MCMC sampler. Default: 100. #' @param previous_model_json (Optional) JSON string containing a previous BART model. This can be used to "continue" a sampler interactively after inspecting the samples or to run parallel chains "warm-started" from existing forest samples. Default: `NULL`. -#' @param warmstart_sample_num (Optional) Sample number from `previous_model_json` that will be used to warmstart this BART sampler. One-indexed (so that the first sample is used for warm-start by setting `warmstart_sample_num = 1`). Default: `NULL`. +#' @param previous_model_warmstart_sample_num (Optional) Sample number from `previous_model_json` that will be used to warmstart this BART sampler. One-indexed (so that the first sample is used for warm-start by setting `previous_model_warmstart_sample_num = 1`). Default: `NULL`. #' @param general_params (Optional) A list of general (non-forest-specific) model parameters, each of which has a default value processed internally, so this argument list is optional. #' #' - `cutpoint_grid_size` Maximum size of the "grid" of potential cutpoints to consider in the GFR algorithm. Default: `100`. @@ -52,7 +52,7 @@ #' - `beta` Exponent that decreases split probabilities for nodes of depth > 0 in the mean model. Tree split prior combines `alpha` and `beta` via `alpha*(1+node_depth)^-beta`. Default: `2`. #' - `min_samples_leaf` Minimum allowable size of a leaf, in terms of training samples, in the mean model. Default: `5`. #' - `max_depth` Maximum depth of any tree in the ensemble in the mean model. Default: `10`. Can be overridden with ``-1`` which does not enforce any depth limits on trees. -#' - `sample_sigma2_leaf` Whether or not to update the leaf scale variance parameter based on `IG(sigma2_leaf_shape, sigma2_leaf_scale)`. Cannot (currently) be set to true if `ncol(W_train)>1`. Default: `FALSE`. +#' - `sample_sigma2_leaf` Whether or not to update the leaf scale variance parameter based on `IG(sigma2_leaf_shape, sigma2_leaf_scale)`. Cannot (currently) be set to true if `ncol(leaf_basis_train)>1`. Default: `FALSE`. #' - `sigma2_leaf_init` Starting value of leaf node scale parameter. Calibrated internally as `1/num_trees` if not set here. #' - `sigma2_leaf_shape` Shape parameter in the `IG(sigma2_leaf_shape, sigma2_leaf_scale)` leaf node parameter variance model. Default: `3`. #' - `sigma2_leaf_scale` Scale parameter in the `IG(sigma2_leaf_shape, sigma2_leaf_scale)` leaf node parameter variance model. Calibrated internally as `0.5/num_trees` if not set here. @@ -100,11 +100,11 @@ #' bart_model <- bart(X_train = X_train, y_train = y_train, X_test = X_test) #' # plot(rowMeans(bart_model$y_hat_test), y_test, xlab = "predicted", ylab = "actual") #' # abline(0,1,col="red",lty=3,lwd=3) -bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, - rfx_basis_train = NULL, X_test = NULL, W_test = NULL, - group_ids_test = NULL, rfx_basis_test = NULL, +bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train = NULL, + rfx_basis_train = NULL, X_test = NULL, leaf_basis_test = NULL, + rfx_group_ids_test = NULL, rfx_basis_test = NULL, num_gfr = 5, num_burnin = 0, num_mcmc = 100, - previous_model_json = NULL, warmstart_sample_num = NULL, + previous_model_json = NULL, previous_model_warmstart_sample_num = NULL, general_params = list(), mean_forest_params = list(), variance_forest_params = list()) { # Update general BART parameters @@ -199,7 +199,7 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, if (num_mcmc == 0) keep_gfr <- T # Check if previous model JSON is provided and parse it if so - # TODO: check that warmstart_sample_num is <= the number of samples in this previous model + # TODO: check that `previous_model_warmstart_sample_num` is <= the number of samples in this previous model has_prev_model <- !is.null(previous_model_json) if (has_prev_model) { previous_bart_model <- createBARTModelFromJsonString(previous_model_json) @@ -369,11 +369,11 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, } # Convert all input data to matrices if not already converted - if ((is.null(dim(W_train))) && (!is.null(W_train))) { - W_train <- as.matrix(W_train) + if ((is.null(dim(leaf_basis_train))) && (!is.null(leaf_basis_train))) { + leaf_basis_train <- as.matrix(leaf_basis_train) } - if ((is.null(dim(W_test))) && (!is.null(W_test))) { - W_test <- as.matrix(W_test) + if ((is.null(dim(leaf_basis_test))) && (!is.null(leaf_basis_test))) { + leaf_basis_test <- as.matrix(leaf_basis_test) } if ((is.null(dim(rfx_basis_train))) && (!is.null(rfx_basis_train))) { rfx_basis_train <- as.matrix(rfx_basis_train) @@ -385,16 +385,16 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, # Recode group IDs to integer vector (if passed as, for example, a vector of county names, etc...) has_rfx <- F has_rfx_test <- F - if (!is.null(group_ids_train)) { - group_ids_factor <- factor(group_ids_train) - group_ids_train <- as.integer(group_ids_factor) + if (!is.null(rfx_group_ids_train)) { + group_ids_factor <- factor(rfx_group_ids_train) + rfx_group_ids_train <- as.integer(group_ids_factor) has_rfx <- T - if (!is.null(group_ids_test)) { - group_ids_factor_test <- factor(group_ids_test, levels = levels(group_ids_factor)) + if (!is.null(rfx_group_ids_test)) { + group_ids_factor_test <- factor(rfx_group_ids_test, levels = levels(group_ids_factor)) if (sum(is.na(group_ids_factor_test)) > 0) { - stop("All random effect group labels provided in group_ids_test must be present in group_ids_train") + stop("All random effect group labels provided in rfx_group_ids_test must be present in rfx_group_ids_train") } - group_ids_test <- as.integer(group_ids_factor_test) + rfx_group_ids_test <- as.integer(group_ids_factor_test) has_rfx_test <- T } } @@ -403,14 +403,14 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, if ((!is.null(X_test)) && (ncol(X_test) != ncol(X_train))) { stop("X_train and X_test must have the same number of columns") } - if ((!is.null(W_test)) && (ncol(W_test) != ncol(W_train))) { - stop("W_train and W_test must have the same number of columns") + if ((!is.null(leaf_basis_test)) && (ncol(leaf_basis_test) != ncol(leaf_basis_train))) { + stop("leaf_basis_train and leaf_basis_test must have the same number of columns") } - if ((!is.null(W_train)) && (nrow(W_train) != nrow(X_train))) { - stop("W_train and X_train must have the same number of rows") + if ((!is.null(leaf_basis_train)) && (nrow(leaf_basis_train) != nrow(X_train))) { + stop("leaf_basis_train and X_train must have the same number of rows") } - if ((!is.null(W_test)) && (nrow(W_test) != nrow(X_test))) { - stop("W_test and X_test must have the same number of rows") + if ((!is.null(leaf_basis_test)) && (nrow(leaf_basis_test) != nrow(X_test))) { + stop("leaf_basis_test and X_test must have the same number of rows") } if (nrow(X_train) != length(y_train)) { stop("X_train and y_train must have the same number of observations") @@ -418,8 +418,8 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, if ((!is.null(rfx_basis_test)) && (ncol(rfx_basis_test) != ncol(rfx_basis_train))) { stop("rfx_basis_train and rfx_basis_test must have the same number of columns") } - if (!is.null(group_ids_train)) { - if (!is.null(group_ids_test)) { + if (!is.null(rfx_group_ids_train)) { + if (!is.null(rfx_group_ids_test)) { if ((!is.null(rfx_basis_train)) && (is.null(rfx_basis_test))) { stop("rfx_basis_train is provided but rfx_basis_test is not provided") } @@ -436,7 +436,7 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, has_basis_rfx <- T num_basis_rfx <- ncol(rfx_basis_train) } - num_rfx_groups <- length(unique(group_ids_train)) + num_rfx_groups <- length(unique(rfx_group_ids_train)) num_rfx_components <- ncol(rfx_basis_train) if (num_rfx_groups == 1) warning("Only one group was provided for random effect sampling, so the 'redundant parameterization' is likely overkill") } @@ -455,7 +455,7 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, } # Determine whether a basis vector is provided - has_basis = !is.null(W_train) + has_basis = !is.null(leaf_basis_train) # Determine whether a test set is provided has_test = !is.null(X_test) @@ -478,8 +478,8 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, if (is.null(variance_forest_init)) variance_forest_init <- 1.0*var(resid_train) if (is.null(b_leaf)) b_leaf <- var(resid_train)/(2*num_trees_mean) if (has_basis) { - if (ncol(W_train) > 1) { - if (is.null(sigma_leaf_init)) sigma_leaf_init <- diag(var(resid_train)/(num_trees_mean), ncol(W_train)) + if (ncol(leaf_basis_train) > 1) { + if (is.null(sigma_leaf_init)) sigma_leaf_init <- diag(var(resid_train)/(num_trees_mean), ncol(leaf_basis_train)) current_leaf_scale <- sigma_leaf_init } else { if (is.null(sigma_leaf_init)) sigma_leaf_init <- var(resid_train)/(num_trees_mean) @@ -493,9 +493,9 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, # Determine leaf model type if (!has_basis) leaf_model_mean_forest <- 0 - else if (ncol(W_train) == 1) leaf_model_mean_forest <- 1 - else if (ncol(W_train) > 1) leaf_model_mean_forest <- 2 - else stop("W_train passed must be a matrix with at least 1 column") + else if (ncol(leaf_basis_train) == 1) leaf_model_mean_forest <- 1 + else if (ncol(leaf_basis_train) > 1) leaf_model_mean_forest <- 2 + else stop("leaf_basis_train passed must be a matrix with at least 1 column") # Set variance leaf model type (currently only one option) leaf_model_variance_forest <- 3 @@ -507,14 +507,14 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, leaf_regression = F } else if (leaf_model_mean_forest == 1) { stopifnot(has_basis) - stopifnot(ncol(W_train) == 1) + stopifnot(ncol(leaf_basis_train) == 1) output_dimension = 1 is_leaf_constant = F leaf_regression = T } else if (leaf_model_mean_forest == 2) { stopifnot(has_basis) - stopifnot(ncol(W_train) > 1) - output_dimension = ncol(W_train) + stopifnot(ncol(leaf_basis_train) > 1) + output_dimension = ncol(leaf_basis_train) is_leaf_constant = F leaf_regression = T if (sample_sigma_leaf) { @@ -524,8 +524,8 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, # Data if (leaf_regression) { - forest_dataset_train <- createForestDataset(X_train, W_train) - if (has_test) forest_dataset_test <- createForestDataset(X_test, W_test) + forest_dataset_train <- createForestDataset(X_train, leaf_basis_train) + if (has_test) forest_dataset_test <- createForestDataset(X_test, leaf_basis_test) requires_basis <- T } else { forest_dataset_train <- createForestDataset(X_train) @@ -574,8 +574,8 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, sigma_xi_scale <- 1 # Random effects data structure and storage container - rfx_dataset_train <- createRandomEffectsDataset(group_ids_train, rfx_basis_train) - rfx_tracker_train <- createRandomEffectsTracker(group_ids_train) + rfx_dataset_train <- createRandomEffectsDataset(rfx_group_ids_train, rfx_basis_train) + rfx_tracker_train <- createRandomEffectsTracker(rfx_group_ids_train) rfx_model <- createRandomEffectsModel(num_rfx_components, num_rfx_groups) rfx_model$set_working_parameter(alpha_init) rfx_model$set_group_parameters(xi_init) @@ -598,7 +598,7 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, # Initialize the leaves of each tree in the mean forest if (include_mean_forest) { - if (requires_basis) init_values_mean_forest <- rep(0., ncol(W_train)) + if (requires_basis) init_values_mean_forest <- rep(0., ncol(leaf_basis_train)) else init_values_mean_forest <- 0. active_forest_mean$prepare_for_sampler(forest_dataset_train, outcome_train, forest_model_mean, leaf_model_mean_forest, init_values_mean_forest) } @@ -676,15 +676,15 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, if (sample_sigma_global) current_sigma2 <- global_var_samples[forest_ind + 1] } else if (has_prev_model) { if (include_mean_forest) { - resetActiveForest(active_forest_mean, previous_forest_samples_mean, warmstart_sample_num - 1) + resetActiveForest(active_forest_mean, previous_forest_samples_mean, previous_model_warmstart_sample_num - 1) resetForestModel(forest_model_mean, active_forest_mean, forest_dataset_train, outcome_train, TRUE) if (sample_sigma_leaf && (!is.null(previous_leaf_var_samples))) { - leaf_scale_double <- previous_leaf_var_samples[warmstart_sample_num] + leaf_scale_double <- previous_leaf_var_samples[previous_model_warmstart_sample_num] current_leaf_scale <- as.matrix(leaf_scale_double) } } if (include_variance_forest) { - resetActiveForest(active_forest_variance, previous_forest_samples_variance, warmstart_sample_num - 1) + resetActiveForest(active_forest_variance, previous_forest_samples_variance, previous_model_warmstart_sample_num - 1) resetForestModel(forest_model_variance, active_forest_variance, forest_dataset_train, outcome_train, FALSE) } # TODO: also initialize from previous RFX samples @@ -695,7 +695,7 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, # } if (sample_sigma_global) { if (!is.null(previous_global_var_samples)) { - current_sigma2 <- previous_global_var_samples[warmstart_sample_num] + current_sigma2 <- previous_global_var_samples[previous_model_warmstart_sample_num] } } } else { @@ -810,11 +810,11 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, # Random effects predictions if (has_rfx) { - rfx_preds_train <- rfx_samples$predict(group_ids_train, rfx_basis_train)*y_std_train + rfx_preds_train <- rfx_samples$predict(rfx_group_ids_train, rfx_basis_train)*y_std_train y_hat_train <- y_hat_train + rfx_preds_train } if ((has_rfx_test) && (has_test)) { - rfx_preds_test <- rfx_samples$predict(group_ids_test, rfx_basis_test)*y_std_train + rfx_preds_test <- rfx_samples$predict(rfx_group_ids_test, rfx_basis_test)*y_std_train y_hat_test <- y_hat_test + rfx_preds_test } @@ -853,14 +853,14 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, "leaf_regression" = leaf_regression, "requires_basis" = requires_basis, "num_covariates" = ncol(X_train), - "num_basis" = ifelse(is.null(W_train),0,ncol(W_train)), + "num_basis" = ifelse(is.null(leaf_basis_train),0,ncol(leaf_basis_train)), "num_samples" = num_retained_samples, "num_gfr" = num_gfr, "num_burnin" = num_burnin, "num_mcmc" = num_mcmc, "keep_every" = keep_every, "num_chains" = num_chains, - "has_basis" = !is.null(W_train), + "has_basis" = !is.null(leaf_basis_train), "has_rfx" = has_rfx, "has_rfx_basis" = has_basis_rfx, "num_rfx_basis" = num_basis_rfx, @@ -908,12 +908,12 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, #' Predict from a sampled BART model on new data #' #' @param object Object of type `bart` containing draws of a regression forest and associated sampling outputs. -#' @param X_test Covariates used to determine tree leaf predictions for each observation. Must be passed as a matrix or dataframe. -#' @param W_test (Optional) Bases used for prediction (by e.g. dot product with leaf values). Default: `NULL`. -#' @param group_ids_test (Optional) Test set group labels used for an additive random effects model. +#' @param X Covariates used to determine tree leaf predictions for each observation. Must be passed as a matrix or dataframe. +#' @param leaf_basis (Optional) Bases used for prediction (by e.g. dot product with leaf values). Default: `NULL`. +#' @param rfx_group_ids (Optional) Test set group labels used for an additive random effects model. #' We do not currently support (but plan to in the near future), test set evaluation for group labels #' that were not in the training set. -#' @param rfx_basis_test (Optional) Test set basis for "random-slope" regression in additive random effects model. +#' @param rfx_basis_tes (Optional) Test set basis for "random-slope" regression in additive random effects model. #' @param ... (Optional) Other prediction parameters. #' #' @return List of prediction matrices. If model does not have random effects, the list has one element -- the predictions from the forest. @@ -945,62 +945,62 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL, #' y_hat_test <- predict(bart_model, X_test) #' # plot(rowMeans(y_hat_test), y_test, xlab = "predicted", ylab = "actual") #' # abline(0,1,col="red",lty=3,lwd=3) -predict.bartmodel <- function(object, X_test, W_test = NULL, group_ids_test = NULL, rfx_basis_test = NULL, ...){ +predict.bartmodel <- function(object, X, leaf_basis = NULL, rfx_group_ids = NULL, rfx_basis = NULL, ...){ # Preprocess covariates - if ((!is.data.frame(X_test)) && (!is.matrix(X_test))) { - stop("X_test must be a matrix or dataframe") + if ((!is.data.frame(X)) && (!is.matrix(X))) { + stop("X must be a matrix or dataframe") } train_set_metadata <- object$train_set_metadata - X_test <- preprocessPredictionData(X_test, train_set_metadata) + X <- preprocessPredictionData(X, train_set_metadata) # Convert all input data to matrices if not already converted - if ((is.null(dim(W_test))) && (!is.null(W_test))) { - W_test <- as.matrix(W_test) + if ((is.null(dim(leaf_basis))) && (!is.null(leaf_basis))) { + leaf_basis <- as.matrix(leaf_basis) } - if ((is.null(dim(rfx_basis_test))) && (!is.null(rfx_basis_test))) { - rfx_basis_test <- as.matrix(rfx_basis_test) + if ((is.null(dim(rfx_basis))) && (!is.null(rfx_basis))) { + rfx_basis <- as.matrix(rfx_basis) } # Data checks - if ((object$model_params$requires_basis) && (is.null(W_test))) { - stop("Basis (W_test) must be provided for this model") + if ((object$model_params$requires_basis) && (is.null(leaf_basis))) { + stop("Basis (leaf_basis) must be provided for this model") } - if ((!is.null(W_test)) && (nrow(X_test) != nrow(W_test))) { - stop("X_test and W_test must have the same number of rows") + if ((!is.null(leaf_basis)) && (nrow(X) != nrow(leaf_basis))) { + stop("X and leaf_basis must have the same number of rows") } - if (object$model_params$num_covariates != ncol(X_test)) { - stop("X_test and W_test must have the same number of rows") + if (object$model_params$num_covariates != ncol(X)) { + stop("X and leaf_basis must have the same number of rows") } - if ((object$model_params$has_rfx) && (is.null(group_ids_test))) { - stop("Random effect group labels (group_ids_test) must be provided for this model") + if ((object$model_params$has_rfx) && (is.null(rfx_group_ids))) { + stop("Random effect group labels (rfx_group_ids) must be provided for this model") } - if ((object$model_params$has_rfx_basis) && (is.null(rfx_basis_test))) { - stop("Random effects basis (rfx_basis_test) must be provided for this model") + if ((object$model_params$has_rfx_basis) && (is.null(rfx_basis))) { + stop("Random effects basis (rfx_basis) must be provided for this model") } - if ((object$model_params$num_rfx_basis > 0) && (ncol(rfx_basis_test) != object$model_params$num_rfx_basis)) { + if ((object$model_params$num_rfx_basis > 0) && (ncol(rfx_basis) != object$model_params$num_rfx_basis)) { stop("Random effects basis has a different dimension than the basis used to train this model") } # Recode group IDs to integer vector (if passed as, for example, a vector of county names, etc...) has_rfx <- F - if (!is.null(group_ids_test)) { + if (!is.null(rfx_group_ids)) { rfx_unique_group_ids <- object$rfx_unique_group_ids - group_ids_factor_test <- factor(group_ids_test, levels = rfx_unique_group_ids) - if (sum(is.na(group_ids_factor_test)) > 0) { - stop("All random effect group labels provided in group_ids_test must be present in group_ids_train") + group_ids_factor <- factor(rfx_group_ids, levels = rfx_unique_group_ids) + if (sum(is.na(group_ids_factor)) > 0) { + stop("All random effect group labels provided in rfx_group_ids must be present in rfx_group_ids_train") } - group_ids_test <- as.integer(group_ids_factor_test) + rfx_group_ids <- as.integer(group_ids_factor) has_rfx <- T } # Produce basis for the "intercept-only" random effects case - if ((object$model_params$has_rfx) && (is.null(rfx_basis_test))) { - rfx_basis_test <- matrix(rep(1, nrow(X_test)), ncol = 1) + if ((object$model_params$has_rfx) && (is.null(rfx_basis))) { + rfx_basis <- matrix(rep(1, nrow(X)), ncol = 1) } # Create prediction dataset - if (!is.null(W_test)) prediction_dataset <- createForestDataset(X_test, W_test) - else prediction_dataset <- createForestDataset(X_test) + if (!is.null(leaf_basis)) prediction_dataset <- createForestDataset(X, leaf_basis) + else prediction_dataset <- createForestDataset(X) # Compute mean forest predictions num_samples <- object$model_params$num_samples @@ -1018,7 +1018,7 @@ predict.bartmodel <- function(object, X_test, W_test = NULL, group_ids_test = NU # Compute rfx predictions (if needed) if (object$model_params$has_rfx) { - rfx_predictions <- object$rfx_samples$predict(group_ids_test, rfx_basis_test)*y_std + rfx_predictions <- object$rfx_samples$predict(rfx_group_ids, rfx_basis)*y_std } # Scale variance forest predictions @@ -1090,14 +1090,14 @@ predict.bartmodel <- function(object, X_test, W_test = NULL, group_ids_test = NU #' X_train <- X[train_inds,] #' y_test <- y[test_inds] #' y_train <- y[train_inds] -#' group_ids_test <- group_ids[test_inds] -#' group_ids_train <- group_ids[train_inds] +#' rfx_group_ids_test <- group_ids[test_inds] +#' rfx_group_ids_train <- group_ids[train_inds] #' rfx_basis_test <- rfx_basis[test_inds,] #' rfx_basis_train <- rfx_basis[train_inds,] #' rfx_term_test <- rfx_term[test_inds] #' rfx_term_train <- rfx_term[train_inds] #' bart_model <- bart(X_train = X_train, y_train = y_train, X_test = X_test, -#' group_ids_train = group_ids_train, group_ids_test = group_ids_test, +#' rfx_group_ids_train = rfx_group_ids_train, rfx_group_ids_test = rfx_group_ids_test, #' rfx_basis_train = rfx_basis_train, rfx_basis_test = rfx_basis_test, #' num_gfr = 100, num_burnin = 0, num_mcmc = 100) #' rfx_samples <- getRandomEffectSamples(bart_model) diff --git a/man/bart.Rd b/man/bart.Rd index 4c2e6a07..98dfd904 100644 --- a/man/bart.Rd +++ b/man/bart.Rd @@ -7,18 +7,18 @@ bart( X_train, y_train, - W_train = NULL, - group_ids_train = NULL, + leaf_basis_train = NULL, + rfx_group_ids_train = NULL, rfx_basis_train = NULL, X_test = NULL, - W_test = NULL, - group_ids_test = NULL, + leaf_basis_test = NULL, + rfx_group_ids_test = NULL, rfx_basis_test = NULL, num_gfr = 5, num_burnin = 0, num_mcmc = 100, previous_model_json = NULL, - warmstart_sample_num = NULL, + previous_model_warmstart_sample_num = NULL, general_params = list(), mean_forest_params = list(), variance_forest_params = list() @@ -33,26 +33,26 @@ that the column is ordered categorical).} \item{y_train}{Outcome to be modeled by the ensemble.} -\item{W_train}{(Optional) Bases used to define a regression model \code{y ~ W} in +\item{leaf_basis_train}{(Optional) Bases used to define a regression model \code{y ~ W} in each leaf of each regression tree. By default, BART assumes constant leaf node parameters, implicitly regressing on a constant basis of ones (i.e. \code{y ~ 1}).} -\item{group_ids_train}{(Optional) Group labels used for an additive random effects model.} +\item{rfx_group_ids_train}{(Optional) Group labels used for an additive random effects model.} \item{rfx_basis_train}{(Optional) Basis for "random-slope" regression in an additive random effects model. -If \code{group_ids_train} is provided with a regression basis, an intercept-only random effects model +If \code{rfx_group_ids_train} is provided with a regression basis, an intercept-only random effects model will be estimated.} \item{X_test}{(Optional) Test set of covariates used to define "out of sample" evaluation data. May be provided either as a dataframe or a matrix, but the format of \code{X_test} must be consistent with that of \code{X_train}.} -\item{W_test}{(Optional) Test set of bases used to define "out of sample" evaluation data. +\item{leaf_basis_test}{(Optional) Test set of bases used to define "out of sample" evaluation data. While a test set is optional, the structure of any provided test set must match that -of the training set (i.e. if both X_train and W_train are provided, then a test set must -consist of X_test and W_test with the same number of columns).} +of the training set (i.e. if both \code{X_train} and \code{leaf_basis_train} are provided, then a test set must +consist of \code{X_test} and \code{leaf_basis_test} with the same number of columns).} -\item{group_ids_test}{(Optional) Test set group labels used for an additive random effects model. +\item{rfx_group_ids_test}{(Optional) Test set group labels used for an additive random effects model. We do not currently support (but plan to in the near future), test set evaluation for group labels that were not in the training set.} @@ -66,7 +66,7 @@ that were not in the training set.} \item{previous_model_json}{(Optional) JSON string containing a previous BART model. This can be used to "continue" a sampler interactively after inspecting the samples or to run parallel chains "warm-started" from existing forest samples. Default: \code{NULL}.} -\item{warmstart_sample_num}{(Optional) Sample number from \code{previous_model_json} that will be used to warmstart this BART sampler. One-indexed (so that the first sample is used for warm-start by setting \code{warmstart_sample_num = 1}). Default: \code{NULL}.} +\item{previous_model_warmstart_sample_num}{(Optional) Sample number from \code{previous_model_json} that will be used to warmstart this BART sampler. One-indexed (so that the first sample is used for warm-start by setting \code{previous_model_warmstart_sample_num = 1}). Default: \code{NULL}.} \item{general_params}{(Optional) A list of general (non-forest-specific) model parameters, each of which has a default value processed internally, so this argument list is optional. \itemize{ @@ -92,7 +92,7 @@ that were not in the training set.} \item \code{beta} Exponent that decreases split probabilities for nodes of depth > 0 in the mean model. Tree split prior combines \code{alpha} and \code{beta} via \code{alpha*(1+node_depth)^-beta}. Default: \code{2}. \item \code{min_samples_leaf} Minimum allowable size of a leaf, in terms of training samples, in the mean model. Default: \code{5}. \item \code{max_depth} Maximum depth of any tree in the ensemble in the mean model. Default: \code{10}. Can be overridden with \code{-1} which does not enforce any depth limits on trees. -\item \code{sample_sigma2_leaf} Whether or not to update the leaf scale variance parameter based on \code{IG(sigma2_leaf_shape, sigma2_leaf_scale)}. Cannot (currently) be set to true if \code{ncol(W_train)>1}. Default: \code{FALSE}. +\item \code{sample_sigma2_leaf} Whether or not to update the leaf scale variance parameter based on \code{IG(sigma2_leaf_shape, sigma2_leaf_scale)}. Cannot (currently) be set to true if \code{ncol(leaf_basis_train)>1}. Default: \code{FALSE}. \item \code{sigma2_leaf_init} Starting value of leaf node scale parameter. Calibrated internally as \code{1/num_trees} if not set here. \item \code{sigma2_leaf_shape} Shape parameter in the \code{IG(sigma2_leaf_shape, sigma2_leaf_scale)} leaf node parameter variance model. Default: \code{3}. \item \code{sigma2_leaf_scale} Scale parameter in the \code{IG(sigma2_leaf_shape, sigma2_leaf_scale)} leaf node parameter variance model. Calibrated internally as \code{0.5/num_trees} if not set here. diff --git a/man/getRandomEffectSamples.bartmodel.Rd b/man/getRandomEffectSamples.bartmodel.Rd index 14d3084d..2c03e1ba 100644 --- a/man/getRandomEffectSamples.bartmodel.Rd +++ b/man/getRandomEffectSamples.bartmodel.Rd @@ -45,14 +45,14 @@ X_test <- X[test_inds,] X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] -group_ids_test <- group_ids[test_inds] -group_ids_train <- group_ids[train_inds] +rfx_group_ids_test <- group_ids[test_inds] +rfx_group_ids_train <- group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,] rfx_term_test <- rfx_term[test_inds] rfx_term_train <- rfx_term[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train, X_test = X_test, - group_ids_train = group_ids_train, group_ids_test = group_ids_test, + rfx_group_ids_train = rfx_group_ids_train, rfx_group_ids_test = rfx_group_ids_test, rfx_basis_train = rfx_basis_train, rfx_basis_test = rfx_basis_test, num_gfr = 100, num_burnin = 0, num_mcmc = 100) rfx_samples <- getRandomEffectSamples(bart_model) diff --git a/man/predict.bartmodel.Rd b/man/predict.bartmodel.Rd index a8bb8682..b5f30a9d 100644 --- a/man/predict.bartmodel.Rd +++ b/man/predict.bartmodel.Rd @@ -6,27 +6,27 @@ \usage{ \method{predict}{bartmodel}( object, - X_test, - W_test = NULL, - group_ids_test = NULL, - rfx_basis_test = NULL, + X, + leaf_basis = NULL, + rfx_group_ids = NULL, + rfx_basis = NULL, ... ) } \arguments{ \item{object}{Object of type \code{bart} containing draws of a regression forest and associated sampling outputs.} -\item{X_test}{Covariates used to determine tree leaf predictions for each observation. Must be passed as a matrix or dataframe.} +\item{X}{Covariates used to determine tree leaf predictions for each observation. Must be passed as a matrix or dataframe.} -\item{W_test}{(Optional) Bases used for prediction (by e.g. dot product with leaf values). Default: \code{NULL}.} +\item{leaf_basis}{(Optional) Bases used for prediction (by e.g. dot product with leaf values). Default: \code{NULL}.} -\item{group_ids_test}{(Optional) Test set group labels used for an additive random effects model. +\item{rfx_group_ids}{(Optional) Test set group labels used for an additive random effects model. We do not currently support (but plan to in the near future), test set evaluation for group labels that were not in the training set.} -\item{rfx_basis_test}{(Optional) Test set basis for "random-slope" regression in additive random effects model.} - \item{...}{(Optional) Other prediction parameters.} + +\item{rfx_basis_tes}{(Optional) Test set basis for "random-slope" regression in additive random effects model.} } \value{ List of prediction matrices. If model does not have random effects, the list has one element -- the predictions from the forest. From 1c60f55e0d5c1d9647a94923577d7b206d2b5bdc Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Thu, 23 Jan 2025 19:26:57 -0600 Subject: [PATCH 05/24] Propagate parameter name changes in BART and BCF APIs --- R/bcf.R | 297 ++++++++++---------- man/bcf.Rd | 31 +- man/convertBCFModelToJson.Rd | 10 +- man/createBCFModelFromCombinedJsonString.Rd | 10 +- man/createBCFModelFromJson.Rd | 10 +- man/createBCFModelFromJsonFile.Rd | 10 +- man/createBCFModelFromJsonString.Rd | 10 +- man/getRandomEffectSamples.bcf.Rd | 10 +- man/predict.bcf.Rd | 24 +- man/saveBCFModelToJsonFile.Rd | 10 +- man/saveBCFModelToJsonString.Rd | 10 +- test/R/testthat/test-serialization.R | 2 +- tools/debug/debug.R | 4 +- vignettes/BayesianSupervisedLearning.Rmd | 54 ++-- vignettes/CausalInference.Rmd | 68 ++--- vignettes/Heteroskedasticity.Rmd | 8 - vignettes/ModelSerialization.Rmd | 18 +- vignettes/MultiChain.Rmd | 35 +-- vignettes/TreeInspection.Rmd | 2 - 19 files changed, 309 insertions(+), 314 deletions(-) diff --git a/R/bcf.R b/R/bcf.R index 069247a8..621b1fd9 100644 --- a/R/bcf.R +++ b/R/bcf.R @@ -7,17 +7,17 @@ #' that the column is ordered categorical). #' @param Z_train Vector of (continuous or binary) treatment assignments. #' @param y_train Outcome to be modeled by the ensemble. -#' @param pi_train (Optional) Vector of propensity scores. If not provided, this will be estimated from the data. -#' @param group_ids_train (Optional) Group labels used for an additive random effects model. +#' @param propensity_train (Optional) Vector of propensity scores. If not provided, this will be estimated from the data. +#' @param rfx_group_ids_train (Optional) Group labels used for an additive random effects model. #' @param rfx_basis_train (Optional) Basis for "random-slope" regression in an additive random effects model. -#' If `group_ids_train` is provided with a regression basis, an intercept-only random effects model +#' If `rfx_group_ids_train` is provided with a regression basis, an intercept-only random effects model #' will be estimated. #' @param X_test (Optional) Test set of covariates used to define "out of sample" evaluation data. #' May be provided either as a dataframe or a matrix, but the format of `X_test` must be consistent with #' that of `X_train`. #' @param Z_test (Optional) Test set of (continuous or binary) treatment assignments. -#' @param pi_test (Optional) Vector of propensity scores. If not provided, this will be estimated from the data. -#' @param group_ids_test (Optional) Test set group labels used for an additive random effects model. +#' @param propensity_test (Optional) Vector of propensity scores. If not provided, this will be estimated from the data. +#' @param rfx_group_ids_test (Optional) Test set group labels used for an additive random effects model. #' We do not currently support (but plan to in the near future), test set evaluation for group labels #' that were not in the training set. #' @param rfx_basis_test (Optional) Test set basis for "random-slope" regression in additive random effects model. @@ -25,7 +25,7 @@ #' @param num_burnin Number of "burn-in" iterations of the MCMC sampler. Default: 0. #' @param num_mcmc Number of "retained" iterations of the MCMC sampler. Default: 100. #' @param previous_model_json (Optional) JSON string containing a previous BCF model. This can be used to "continue" a sampler interactively after inspecting the samples or to run parallel chains "warm-started" from existing forest samples. Default: `NULL`. -#' @param warmstart_sample_num (Optional) Sample number from `previous_model_json` that will be used to warmstart this BCF sampler. One-indexed (so that the first sample is used for warm-start by setting `warmstart_sample_num = 1`). Default: `NULL`. +#' @param previous_model_warmstart_sample_num (Optional) Sample number from `previous_model_json` that will be used to warmstart this BCF sampler. One-indexed (so that the first sample is used for warm-start by setting `previous_model_warmstart_sample_num = 1`). Default: `NULL`. #' @param general_params (Optional) A list of general (non-forest-specific) model parameters, each of which has a default value processed internally, so this argument list is optional. #' #' - `cutpoint_grid_size` Maximum size of the "grid" of potential cutpoints to consider in the GFR algorithm. Default: `100`. @@ -55,7 +55,7 @@ #' - `min_samples_leaf` Minimum allowable size of a leaf, in terms of training samples, in the prognostic forest. Default: `5`. #' - `max_depth` Maximum depth of any tree in the ensemble in the prognostic forest. Default: `10`. Can be overridden with ``-1`` which does not enforce any depth limits on trees. #' - `variable_weights` Numeric weights reflecting the relative probability of splitting on each variable in the prognostic forest. Does not need to sum to 1 but cannot be negative. Defaults to `rep(1/ncol(X_train), ncol(X_train))` if not set here. -#' - `sample_sigma2_leaf` Whether or not to update the leaf scale variance parameter based on `IG(sigma2_leaf_shape, sigma2_leaf_scale)`. Cannot (currently) be set to true if `ncol(W_train)>1`. Default: `FALSE`. +#' - `sample_sigma2_leaf` Whether or not to update the leaf scale variance parameter based on `IG(sigma2_leaf_shape, sigma2_leaf_scale)`. #' - `sigma2_leaf_init` Starting value of leaf node scale parameter. Calibrated internally as `1/num_trees` if not set here. #' - `sigma2_leaf_shape` Shape parameter in the `IG(sigma2_leaf_shape, sigma2_leaf_scale)` leaf node parameter variance model. Default: `3`. #' - `sigma2_leaf_scale` Scale parameter in the `IG(sigma2_leaf_shape, sigma2_leaf_scale)` leaf node parameter variance model. Calibrated internally as `0.5/num_trees` if not set here. @@ -70,7 +70,7 @@ #' - `min_samples_leaf` Minimum allowable size of a leaf, in terms of training samples, in the treatment effect forest. Default: `5`. #' - `max_depth` Maximum depth of any tree in the ensemble in the treatment effect forest. Default: `5`. Can be overridden with ``-1`` which does not enforce any depth limits on trees. #' - `variable_weights` Numeric weights reflecting the relative probability of splitting on each variable in the treatment effect forest. Does not need to sum to 1 but cannot be negative. Defaults to `rep(1/ncol(X_train), ncol(X_train))` if not set here. -#' - `sample_sigma2_leaf` Whether or not to update the leaf scale variance parameter based on `IG(sigma2_leaf_shape, sigma2_leaf_scale)`. Cannot (currently) be set to true if `ncol(W_train)>1`. Default: `FALSE`. +#' - `sample_sigma2_leaf` Whether or not to update the leaf scale variance parameter based on `IG(sigma2_leaf_shape, sigma2_leaf_scale)`. Cannot (currently) be set to true if `ncol(Z_train)>1`. Default: `FALSE`. #' - `sigma2_leaf_init` Starting value of leaf node scale parameter. Calibrated internally as `1/num_trees` if not set here. #' - `sigma2_leaf_shape` Shape parameter in the `IG(sigma2_leaf_shape, sigma2_leaf_scale)` leaf node parameter variance model. Default: `3`. #' - `sigma2_leaf_scale` Scale parameter in the `IG(sigma2_leaf_shape, sigma2_leaf_scale)` leaf node parameter variance model. Calibrated internally as `0.5/num_trees` if not set here. @@ -135,19 +135,20 @@ #' mu_train <- mu_x[train_inds] #' tau_test <- tau_x[test_inds] #' tau_train <- tau_x[train_inds] -#' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, -#' X_test = X_test, Z_test = Z_test, pi_test = pi_test) +#' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, +#' propensity_train = pi_train, X_test = X_test, Z_test = Z_test, +#' propensity_test = pi_test) #' # plot(rowMeans(bcf_model$mu_hat_test), mu_test, xlab = "predicted", #' # ylab = "actual", main = "Prognostic function") #' # abline(0,1,col="red",lty=3,lwd=3) #' # plot(rowMeans(bcf_model$tau_hat_test), tau_test, xlab = "predicted", #' # ylab = "actual", main = "Treatment effect") #' # abline(0,1,col="red",lty=3,lwd=3) -bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NULL, - rfx_basis_train = NULL, X_test = NULL, Z_test = NULL, pi_test = NULL, - group_ids_test = NULL, rfx_basis_test = NULL, +bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_ids_train = NULL, + rfx_basis_train = NULL, X_test = NULL, Z_test = NULL, propensity_test = NULL, + rfx_group_ids_test = NULL, rfx_basis_test = NULL, num_gfr = 5, num_burnin = 0, num_mcmc = 100, - previous_model_json = NULL, warmstart_sample_num = NULL, + previous_model_json = NULL, previous_model_warmstart_sample_num = NULL, general_params = list(), mu_forest_params = list(), tau_forest_params = list(), variance_forest_params = list()) { # Update general BCF parameters @@ -267,7 +268,7 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU if (num_mcmc == 0) keep_gfr <- T # Check if previous model JSON is provided and parse it if so - # TODO: check that warmstart_sample_num is <= the number of samples in this previous model + # TODO: check that `previous_model_warmstart_sample_num` is <= the number of samples in this previous model has_prev_model <- !is.null(previous_model_json) if (has_prev_model) { previous_bcf_model <- createBCFModelFromJsonString(previous_model_json) @@ -464,14 +465,14 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU if ((is.null(dim(Z_train))) && (!is.null(Z_train))) { Z_train <- as.matrix(as.numeric(Z_train)) } - if ((is.null(dim(pi_train))) && (!is.null(pi_train))) { - pi_train <- as.matrix(pi_train) + if ((is.null(dim(propensity_train))) && (!is.null(propensity_train))) { + propensity_train <- as.matrix(propensity_train) } if ((is.null(dim(Z_test))) && (!is.null(Z_test))) { Z_test <- as.matrix(as.numeric(Z_test)) } - if ((is.null(dim(pi_test))) && (!is.null(pi_test))) { - pi_test <- as.matrix(pi_test) + if ((is.null(dim(propensity_test))) && (!is.null(propensity_test))) { + propensity_test <- as.matrix(propensity_test) } if ((is.null(dim(rfx_basis_train))) && (!is.null(rfx_basis_train))) { rfx_basis_train <- as.matrix(rfx_basis_train) @@ -483,16 +484,16 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU # Recode group IDs to integer vector (if passed as, for example, a vector of county names, etc...) has_rfx <- F has_rfx_test <- F - if (!is.null(group_ids_train)) { - group_ids_factor <- factor(group_ids_train) - group_ids_train <- as.integer(group_ids_factor) + if (!is.null(rfx_group_ids_train)) { + group_ids_factor <- factor(rfx_group_ids_train) + rfx_group_ids_train <- as.integer(group_ids_factor) has_rfx <- T - if (!is.null(group_ids_test)) { - group_ids_factor_test <- factor(group_ids_test, levels = levels(group_ids_factor)) + if (!is.null(rfx_group_ids_test)) { + group_ids_factor_test <- factor(rfx_group_ids_test, levels = levels(group_ids_factor)) if (sum(is.na(group_ids_factor_test)) > 0) { - stop("All random effect group labels provided in group_ids_test must be present in group_ids_train") + stop("All random effect group labels provided in rfx_group_ids_test must be present in rfx_group_ids_train") } - group_ids_test <- as.integer(group_ids_factor_test) + rfx_group_ids_test <- as.integer(group_ids_factor_test) has_rfx_test <- T } } @@ -514,14 +515,14 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU if ((!is.null(Z_train)) && (nrow(Z_train) != nrow(X_train))) { stop("Z_train and X_train must have the same number of rows") } - if ((!is.null(pi_train)) && (nrow(pi_train) != nrow(X_train))) { - stop("pi_train and X_train must have the same number of rows") + if ((!is.null(propensity_train)) && (nrow(propensity_train) != nrow(X_train))) { + stop("propensity_train and X_train must have the same number of rows") } if ((!is.null(Z_test)) && (nrow(Z_test) != nrow(X_test))) { stop("Z_test and X_test must have the same number of rows") } - if ((!is.null(pi_test)) && (nrow(pi_test) != nrow(X_test))) { - stop("pi_test and X_test must have the same number of rows") + if ((!is.null(propensity_test)) && (nrow(propensity_test) != nrow(X_test))) { + stop("propensity_test and X_test must have the same number of rows") } if (nrow(X_train) != length(y_train)) { stop("X_train and y_train must have the same number of observations") @@ -529,8 +530,8 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU if ((!is.null(rfx_basis_test)) && (ncol(rfx_basis_test) != ncol(rfx_basis_train))) { stop("rfx_basis_train and rfx_basis_test must have the same number of columns") } - if (!is.null(group_ids_train)) { - if (!is.null(group_ids_test)) { + if (!is.null(rfx_group_ids_train)) { + if (!is.null(rfx_group_ids_test)) { if ((!is.null(rfx_basis_train)) && (is.null(rfx_basis_test))) { stop("rfx_basis_train is provided but rfx_basis_test is not provided") } @@ -569,7 +570,7 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU has_basis_rfx <- T num_basis_rfx <- ncol(rfx_basis_train) } - num_rfx_groups <- length(unique(group_ids_train)) + num_rfx_groups <- length(unique(rfx_group_ids_train)) num_rfx_components <- ncol(rfx_basis_train) if (num_rfx_groups == 1) warning("Only one group was provided for random effect sampling, so the 'redundant parameterization' is likely overkill") } @@ -614,48 +615,48 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU # Estimate if pre-estimated propensity score is not provided internal_propensity_model <- F - if ((is.null(pi_train)) && (propensity_covariate != "none")) { + if ((is.null(propensity_train)) && (propensity_covariate != "none")) { internal_propensity_model <- T # Estimate using the last of several iterations of GFR BART num_burnin <- 10 num_total <- 50 bart_model_propensity <- bart(X_train = X_train, y_train = as.numeric(Z_train), X_test = X_test_raw, num_gfr = num_total, num_burnin = 0, num_mcmc = 0) - pi_train <- rowMeans(bart_model_propensity$y_hat_train[,(num_burnin+1):num_total]) - if ((is.null(dim(pi_train))) && (!is.null(pi_train))) { - pi_train <- as.matrix(pi_train) + propensity_train <- rowMeans(bart_model_propensity$y_hat_train[,(num_burnin+1):num_total]) + if ((is.null(dim(propensity_train))) && (!is.null(propensity_train))) { + propensity_train <- as.matrix(propensity_train) } if (has_test) { - pi_test <- rowMeans(bart_model_propensity$y_hat_test[,(num_burnin+1):num_total]) - if ((is.null(dim(pi_test))) && (!is.null(pi_test))) { - pi_test <- as.matrix(pi_test) + propensity_test <- rowMeans(bart_model_propensity$y_hat_test[,(num_burnin+1):num_total]) + if ((is.null(dim(propensity_test))) && (!is.null(propensity_test))) { + propensity_test <- as.matrix(propensity_test) } } } if (has_test) { - if (is.null(pi_test)) stop("Propensity score must be provided for the test set if provided for the training set") + if (is.null(propensity_test)) stop("Propensity score must be provided for the test set if provided for the training set") } # Update feature_types and covariates feature_types <- as.integer(feature_types) if (propensity_covariate != "none") { feature_types <- as.integer(c(feature_types,0)) - X_train <- cbind(X_train, pi_train) + X_train <- cbind(X_train, propensity_train) if (propensity_covariate == "mu") { - variable_weights_mu <- c(variable_weights_mu, rep(1./num_cov_orig, ncol(pi_train))) + variable_weights_mu <- c(variable_weights_mu, rep(1./num_cov_orig, ncol(propensity_train))) variable_weights_tau <- c(variable_weights_tau, 0) if (include_variance_forest) variable_weights_variance <- c(variable_weights_variance, 0) } else if (propensity_covariate == "tau") { variable_weights_mu <- c(variable_weights_mu, 0) - variable_weights_tau <- c(variable_weights_tau, rep(1./num_cov_orig, ncol(pi_train))) + variable_weights_tau <- c(variable_weights_tau, rep(1./num_cov_orig, ncol(propensity_train))) if (include_variance_forest) variable_weights_variance <- c(variable_weights_variance, 0) } else if (propensity_covariate == "both") { - variable_weights_mu <- c(variable_weights_mu, rep(1./num_cov_orig, ncol(pi_train))) - variable_weights_tau <- c(variable_weights_tau, rep(1./num_cov_orig, ncol(pi_train))) + variable_weights_mu <- c(variable_weights_mu, rep(1./num_cov_orig, ncol(propensity_train))) + variable_weights_tau <- c(variable_weights_tau, rep(1./num_cov_orig, ncol(propensity_train))) if (include_variance_forest) variable_weights_variance <- c(variable_weights_variance, 0) } - if (has_test) X_test <- cbind(X_test, pi_test) + if (has_test) X_test <- cbind(X_test, propensity_test) } # Renormalize variable weights @@ -699,7 +700,7 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU # Initialize each group parameter based on a regression of outcome on basis in that grou xi_init <- matrix(0,num_rfx_components,num_rfx_groups) for (i in 1:num_rfx_groups) { - group_subset_indices <- group_ids_train == i + group_subset_indices <- rfx_group_ids_train == i basis_group <- rfx_basis_train[group_subset_indices,] resid_group <- resid_train[group_subset_indices] rfx_group_model <- lm(resid_group ~ 0+basis_group) @@ -713,8 +714,8 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU # Random effects data structure and storage container if (has_rfx) { - rfx_dataset_train <- createRandomEffectsDataset(group_ids_train, rfx_basis_train) - rfx_tracker_train <- createRandomEffectsTracker(group_ids_train) + rfx_dataset_train <- createRandomEffectsDataset(rfx_group_ids_train, rfx_basis_train) + rfx_tracker_train <- createRandomEffectsTracker(rfx_group_ids_train) rfx_model <- createRandomEffectsModel(num_rfx_components, num_rfx_groups) rfx_model$set_working_parameter(alpha_init) rfx_model$set_group_parameters(xi_init) @@ -932,28 +933,28 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU } if (sample_sigma_global) current_sigma2 <- global_var_samples[forest_ind + 1] } else if (has_prev_model) { - resetActiveForest(active_forest_mu, previous_forest_samples_mu, warmstart_sample_num - 1) + resetActiveForest(active_forest_mu, previous_forest_samples_mu, previous_model_warmstart_sample_num - 1) resetForestModel(forest_model_mu, active_forest_mu, forest_dataset_train, outcome_train, TRUE) - resetActiveForest(active_forest_tau, previous_forest_samples_tau, warmstart_sample_num - 1) + resetActiveForest(active_forest_tau, previous_forest_samples_tau, previous_model_warmstart_sample_num - 1) resetForestModel(forest_model_tau, active_forest_tau, forest_dataset_train, outcome_train, TRUE) if (include_variance_forest) { - resetActiveForest(active_forest_variance, previous_forest_samples_variance, warmstart_sample_num - 1) + resetActiveForest(active_forest_variance, previous_forest_samples_variance, previous_model_warmstart_sample_num - 1) resetForestModel(forest_model_variance, active_forest_variance, forest_dataset_train, outcome_train, FALSE) } if (sample_sigma_leaf_mu && (!is.null(previous_leaf_var_mu_samples))) { - leaf_scale_mu_double <- previous_leaf_var_mu_samples[warmstart_sample_num] + leaf_scale_mu_double <- previous_leaf_var_mu_samples[previous_model_warmstart_sample_num] current_leaf_scale_mu <- as.matrix(leaf_scale_mu_double) } if (sample_sigma_leaf_tau && (!is.null(previous_leaf_var_tau_samples))) { - leaf_scale_tau_double <- previous_leaf_var_tau_samples[warmstart_sample_num] + leaf_scale_tau_double <- previous_leaf_var_tau_samples[previous_model_warmstart_sample_num] current_leaf_scale_tau <- as.matrix(leaf_scale_tau_double) } if (adaptive_coding) { if (!is.null(previous_b_1_samples)) { - current_b_1 <- previous_b_1_samples[warmstart_sample_num] + current_b_1 <- previous_b_1_samples[previous_model_warmstart_sample_num] } if (!is.null(previous_b_0_samples)) { - current_b_0 <- previous_b_0_samples[warmstart_sample_num] + current_b_0 <- previous_b_0_samples[previous_model_warmstart_sample_num] } tau_basis_train <- (1-Z_train)*current_b_0 + Z_train*current_b_1 forest_dataset_train$update_basis(tau_basis_train) @@ -971,7 +972,7 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU # } if (sample_sigma_global) { if (!is.null(previous_global_var_samples)) { - current_sigma2 <- previous_global_var_samples[warmstart_sample_num] + current_sigma2 <- previous_global_var_samples[previous_model_warmstart_sample_num] } } } else { @@ -1176,11 +1177,11 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU # Random effects predictions if (has_rfx) { - rfx_preds_train <- rfx_samples$predict(group_ids_train, rfx_basis_train)*y_std_train + rfx_preds_train <- rfx_samples$predict(rfx_group_ids_train, rfx_basis_train)*y_std_train y_hat_train <- y_hat_train + rfx_preds_train } if ((has_rfx_test) && (has_test)) { - rfx_preds_test <- rfx_samples$predict(group_ids_test, rfx_basis_test)*y_std_train + rfx_preds_test <- rfx_samples$predict(rfx_group_ids_test, rfx_basis_test)*y_std_train y_hat_test <- y_hat_test + rfx_preds_test } @@ -1291,16 +1292,16 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU #' Predict from a sampled BCF model on new data #' #' @param object Object of type `bcf` containing draws of a Bayesian causal forest model and associated sampling outputs. -#' @param X_test Covariates used to determine tree leaf predictions for each observation. Must be passed as a matrix or dataframe. -#' @param Z_test Treatments used for prediction. -#' @param pi_test (Optional) Propensities used for prediction. -#' @param group_ids_test (Optional) Test set group labels used for an additive random effects model. +#' @param X Covariates used to determine tree leaf predictions for each observation. Must be passed as a matrix or dataframe. +#' @param Z Treatments used for prediction. +#' @param propensity (Optional) Propensities used for prediction. +#' @param rfx_group_ids (Optional) Test set group labels used for an additive random effects model. #' We do not currently support (but plan to in the near future), test set evaluation for group labels #' that were not in the training set. -#' @param rfx_basis_test (Optional) Test set basis for "random-slope" regression in additive random effects model. +#' @param rfx_basis (Optional) Test set basis for "random-slope" regression in additive random effects model. #' @param ... (Optional) Other prediction parameters. #' -#' @return List of 3-5 `nrow(X_test)` by `object$num_samples` matrices: prognostic function estimates, treatment effect estimates, (optionally) random effects predictions, (optionally) variance forest predictions, and outcome predictions. +#' @return List of 3-5 `nrow(X)` by `object$num_samples` matrices: prognostic function estimates, treatment effect estimates, (optionally) random effects predictions, (optionally) variance forest predictions, and outcome predictions. #' @export #' #' @examples @@ -1344,7 +1345,7 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU #' mu_train <- mu_x[train_inds] #' tau_test <- tau_x[test_inds] #' tau_train <- tau_x[train_inds] -#' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train) +#' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train) #' preds <- predict(bcf_model, X_test, Z_test, pi_test) #' # plot(rowMeans(preds$mu_hat), mu_test, xlab = "predicted", #' # ylab = "actual", main = "Prognostic function") @@ -1352,85 +1353,85 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU #' # plot(rowMeans(preds$tau_hat), tau_test, xlab = "predicted", #' # ylab = "actual", main = "Treatment effect") #' # abline(0,1,col="red",lty=3,lwd=3) -predict.bcf <- function(object, X_test, Z_test, pi_test = NULL, group_ids_test = NULL, rfx_basis_test = NULL, ...){ +predict.bcf <- function(object, X, Z, propensity = NULL, rfx_group_ids = NULL, rfx_basis = NULL, ...){ # Preprocess covariates - if ((!is.data.frame(X_test)) && (!is.matrix(X_test))) { - stop("X_test must be a matrix or dataframe") + if ((!is.data.frame(X)) && (!is.matrix(X))) { + stop("X must be a matrix or dataframe") } train_set_metadata <- object$train_set_metadata - X_test <- preprocessPredictionData(X_test, train_set_metadata) + X <- preprocessPredictionData(X, train_set_metadata) # Convert all input data to matrices if not already converted - if ((is.null(dim(Z_test))) && (!is.null(Z_test))) { - Z_test <- as.matrix(as.numeric(Z_test)) + if ((is.null(dim(Z))) && (!is.null(Z))) { + Z <- as.matrix(as.numeric(Z)) } - if ((is.null(dim(pi_test))) && (!is.null(pi_test))) { - pi_test <- as.matrix(pi_test) + if ((is.null(dim(propensity))) && (!is.null(propensity))) { + propensity <- as.matrix(propensity) } - if ((is.null(dim(rfx_basis_test))) && (!is.null(rfx_basis_test))) { - rfx_basis_test <- as.matrix(rfx_basis_test) + if ((is.null(dim(rfx_basis))) && (!is.null(rfx_basis))) { + rfx_basis <- as.matrix(rfx_basis) } # Data checks - if ((object$model_params$propensity_covariate != "none") && (is.null(pi_test))) { + if ((object$model_params$propensity_covariate != "none") && (is.null(propensity))) { if (!object$model_params$internal_propensity_model) { - stop("pi_test must be provided for this model") + stop("propensity must be provided for this model") } # Compute propensity score using the internal bart model - pi_test <- rowMeans(predict(object$bart_propensity_model, X_test)$y_hat) + propensity <- rowMeans(predict(object$bart_propensity_model, X)$y_hat) } - if (nrow(X_test) != nrow(Z_test)) { - stop("X_test and Z_test must have the same number of rows") + if (nrow(X) != nrow(Z)) { + stop("X and Z must have the same number of rows") } - if (object$model_params$num_covariates != ncol(X_test)) { - stop("X_test and must have the same number of columns as the covariates used to train the model") + if (object$model_params$num_covariates != ncol(X)) { + stop("X and must have the same number of columns as the covariates used to train the model") } - if ((object$model_params$has_rfx) && (is.null(group_ids_test))) { - stop("Random effect group labels (group_ids_test) must be provided for this model") + if ((object$model_params$has_rfx) && (is.null(rfx_group_ids))) { + stop("Random effect group labels (rfx_group_ids) must be provided for this model") } - if ((object$model_params$has_rfx_basis) && (is.null(rfx_basis_test))) { - stop("Random effects basis (rfx_basis_test) must be provided for this model") + if ((object$model_params$has_rfx_basis) && (is.null(rfx_basis))) { + stop("Random effects basis (rfx_basis) must be provided for this model") } - if ((object$model_params$num_rfx_basis > 0) && (ncol(rfx_basis_test) != object$model_params$num_rfx_basis)) { + if ((object$model_params$num_rfx_basis > 0) && (ncol(rfx_basis) != object$model_params$num_rfx_basis)) { stop("Random effects basis has a different dimension than the basis used to train this model") } # Recode group IDs to integer vector (if passed as, for example, a vector of county names, etc...) has_rfx <- F - if (!is.null(group_ids_test)) { + if (!is.null(rfx_group_ids)) { rfx_unique_group_ids <- object$rfx_unique_group_ids - group_ids_factor_test <- factor(group_ids_test, levels = rfx_unique_group_ids) - if (sum(is.na(group_ids_factor_test)) > 0) { - stop("All random effect group labels provided in group_ids_test must be present in group_ids_train") + group_ids_factor <- factor(rfx_group_ids, levels = rfx_unique_group_ids) + if (sum(is.na(group_ids_factor)) > 0) { + stop("All random effect group labels provided in rfx_group_ids must be present in rfx_group_ids_train") } - group_ids_test <- as.integer(group_ids_factor_test) + rfx_group_ids <- as.integer(group_ids_factor) has_rfx <- T } # Produce basis for the "intercept-only" random effects case - if ((object$model_params$has_rfx) && (is.null(rfx_basis_test))) { - rfx_basis_test <- matrix(rep(1, nrow(X_test)), ncol = 1) + if ((object$model_params$has_rfx) && (is.null(rfx_basis))) { + rfx_basis <- matrix(rep(1, nrow(X)), ncol = 1) } # Add propensities to covariate set if necessary if (object$model_params$propensity_covariate != "none") { - X_test_combined <- cbind(X_test, pi_test) + X_combined <- cbind(X, propensity) } # Create prediction datasets - forest_dataset_pred <- createForestDataset(X_test_combined, Z_test) + forest_dataset_pred <- createForestDataset(X_combined, Z) # Compute forest predictions num_samples <- object$model_params$num_samples y_std <- object$model_params$outcome_scale y_bar <- object$model_params$outcome_mean initial_sigma2 <- object$model_params$initial_sigma2 - mu_hat_test <- object$forests_mu$predict(forest_dataset_pred)*y_std + y_bar + mu_hat <- object$forests_mu$predict(forest_dataset_pred)*y_std + y_bar if (object$model_params$adaptive_coding) { - tau_hat_test_raw <- object$forests_tau$predict_raw(forest_dataset_pred) - tau_hat_test <- t(t(tau_hat_test_raw) * (object$b_1_samples - object$b_0_samples))*y_std + tau_hat_raw <- object$forests_tau$predict_raw(forest_dataset_pred) + tau_hat <- t(t(tau_hat_raw) * (object$b_1_samples - object$b_0_samples))*y_std } else { - tau_hat_test <- object$forests_tau$predict_raw(forest_dataset_pred)*y_std + tau_hat <- object$forests_tau$predict_raw(forest_dataset_pred)*y_std } if (object$model_params$include_variance_forest) { s_x_raw <- object$variance_forests$predict(forest_dataset_pred) @@ -1438,12 +1439,12 @@ predict.bcf <- function(object, X_test, Z_test, pi_test = NULL, group_ids_test = # Compute rfx predictions (if needed) if (object$model_params$has_rfx) { - rfx_predictions <- object$rfx_samples$predict(group_ids_test, rfx_basis_test)*y_std + rfx_predictions <- object$rfx_samples$predict(rfx_group_ids, rfx_basis)*y_std } # Compute overall "y_hat" predictions - y_hat_test <- mu_hat_test + tau_hat_test * as.numeric(Z_test) - if (object$model_params$has_rfx) y_hat_test <- y_hat_test + rfx_predictions + y_hat <- mu_hat + tau_hat * as.numeric(Z) + if (object$model_params$has_rfx) y_hat <- y_hat + rfx_predictions # Scale variance forest predictions if (object$model_params$include_variance_forest) { @@ -1456,9 +1457,9 @@ predict.bcf <- function(object, X_test, Z_test, pi_test = NULL, group_ids_test = } result <- list( - "mu_hat" = mu_hat_test, - "tau_hat" = tau_hat_test, - "y_hat" = y_hat_test + "mu_hat" = mu_hat, + "tau_hat" = tau_hat, + "y_hat" = y_hat ) if (object$model_params$has_rfx) { result[["rfx_predictions"]] = rfx_predictions @@ -1498,7 +1499,7 @@ predict.bcf <- function(object, X_test, Z_test, pi_test = NULL, group_ids_test = #' Z <- rbinom(n,1,pi_x) #' E_XZ <- mu_x + Z*tau_x #' snr <- 3 -#' group_ids <- rep(c(1,2), n %/% 2) +#' rfx_group_ids <- rep(c(1,2), n %/% 2) #' rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) #' rfx_basis <- cbind(1, runif(n, -1, 1)) #' rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) @@ -1523,8 +1524,8 @@ predict.bcf <- function(object, X_test, Z_test, pi_test = NULL, group_ids_test = #' mu_train <- mu_x[train_inds] #' tau_test <- tau_x[test_inds] #' tau_train <- tau_x[train_inds] -#' group_ids_test <- group_ids[test_inds] -#' group_ids_train <- group_ids[train_inds] +#' rfx_group_ids_test <- rfx_group_ids[test_inds] +#' rfx_group_ids_train <- rfx_group_ids[train_inds] #' rfx_basis_test <- rfx_basis[test_inds,] #' rfx_basis_train <- rfx_basis[train_inds,] #' rfx_term_test <- rfx_term[test_inds] @@ -1532,9 +1533,9 @@ predict.bcf <- function(object, X_test, Z_test, pi_test = NULL, group_ids_test = #' mu_params <- list(sample_sigma_leaf = TRUE) #' tau_params <- list(sample_sigma_leaf = FALSE) #' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, -#' pi_train = pi_train, group_ids_train = group_ids_train, +#' propensity_train = pi_train, rfx_group_ids_train = rfx_group_ids_train, #' rfx_basis_train = rfx_basis_train, X_test = X_test, -#' Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, +#' Z_test = Z_test, propensity_test = pi_test, rfx_group_ids_test = rfx_group_ids_test, #' rfx_basis_test = rfx_basis_test, #' num_gfr = 100, num_burnin = 0, num_mcmc = 100, #' mu_forest_params = mu_params, @@ -1587,7 +1588,7 @@ getRandomEffectSamples.bcf <- function(object, ...){ #' Z <- rbinom(n,1,pi_x) #' E_XZ <- mu_x + Z*tau_x #' snr <- 3 -#' group_ids <- rep(c(1,2), n %/% 2) +#' rfx_group_ids <- rep(c(1,2), n %/% 2) #' rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) #' rfx_basis <- cbind(1, runif(n, -1, 1)) #' rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) @@ -1612,8 +1613,8 @@ getRandomEffectSamples.bcf <- function(object, ...){ #' mu_train <- mu_x[train_inds] #' tau_test <- tau_x[test_inds] #' tau_train <- tau_x[train_inds] -#' group_ids_test <- group_ids[test_inds] -#' group_ids_train <- group_ids[train_inds] +#' rfx_group_ids_test <- rfx_group_ids[test_inds] +#' rfx_group_ids_train <- rfx_group_ids[train_inds] #' rfx_basis_test <- rfx_basis[test_inds,] #' rfx_basis_train <- rfx_basis[train_inds,] #' rfx_term_test <- rfx_term[test_inds] @@ -1621,9 +1622,9 @@ getRandomEffectSamples.bcf <- function(object, ...){ #' mu_params <- list(sample_sigma_leaf = TRUE) #' tau_params <- list(sample_sigma_leaf = FALSE) #' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, -#' pi_train = pi_train, group_ids_train = group_ids_train, +#' propensity_train = pi_train, rfx_group_ids_train = rfx_group_ids_train, #' rfx_basis_train = rfx_basis_train, X_test = X_test, -#' Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, +#' Z_test = Z_test, propensity_test = pi_test, rfx_group_ids_test = rfx_group_ids_test, #' rfx_basis_test = rfx_basis_test, #' num_gfr = 100, num_burnin = 0, num_mcmc = 100, #' mu_forest_params = mu_params, @@ -1746,7 +1747,7 @@ convertBCFModelToJson <- function(object){ #' Z <- rbinom(n,1,pi_x) #' E_XZ <- mu_x + Z*tau_x #' snr <- 3 -#' group_ids <- rep(c(1,2), n %/% 2) +#' rfx_group_ids <- rep(c(1,2), n %/% 2) #' rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) #' rfx_basis <- cbind(1, runif(n, -1, 1)) #' rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) @@ -1771,8 +1772,8 @@ convertBCFModelToJson <- function(object){ #' mu_train <- mu_x[train_inds] #' tau_test <- tau_x[test_inds] #' tau_train <- tau_x[train_inds] -#' group_ids_test <- group_ids[test_inds] -#' group_ids_train <- group_ids[train_inds] +#' rfx_group_ids_test <- rfx_group_ids[test_inds] +#' rfx_group_ids_train <- rfx_group_ids[train_inds] #' rfx_basis_test <- rfx_basis[test_inds,] #' rfx_basis_train <- rfx_basis[train_inds,] #' rfx_term_test <- rfx_term[test_inds] @@ -1780,9 +1781,9 @@ convertBCFModelToJson <- function(object){ #' mu_params <- list(sample_sigma_leaf = TRUE) #' tau_params <- list(sample_sigma_leaf = FALSE) #' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, -#' pi_train = pi_train, group_ids_train = group_ids_train, +#' propensity_train = pi_train, rfx_group_ids_train = rfx_group_ids_train, #' rfx_basis_train = rfx_basis_train, X_test = X_test, -#' Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, +#' Z_test = Z_test, propensity_test = pi_test, rfx_group_ids_test = rfx_group_ids_test, #' rfx_basis_test = rfx_basis_test, #' num_gfr = 100, num_burnin = 0, num_mcmc = 100, #' mu_forest_params = mu_params, @@ -1822,7 +1823,7 @@ saveBCFModelToJsonFile <- function(object, filename){ #' Z <- rbinom(n,1,pi_x) #' E_XZ <- mu_x + Z*tau_x #' snr <- 3 -#' group_ids <- rep(c(1,2), n %/% 2) +#' rfx_group_ids <- rep(c(1,2), n %/% 2) #' rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) #' rfx_basis <- cbind(1, runif(n, -1, 1)) #' rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) @@ -1847,8 +1848,8 @@ saveBCFModelToJsonFile <- function(object, filename){ #' mu_train <- mu_x[train_inds] #' tau_test <- tau_x[test_inds] #' tau_train <- tau_x[train_inds] -#' group_ids_test <- group_ids[test_inds] -#' group_ids_train <- group_ids[train_inds] +#' rfx_group_ids_test <- rfx_group_ids[test_inds] +#' rfx_group_ids_train <- rfx_group_ids[train_inds] #' rfx_basis_test <- rfx_basis[test_inds,] #' rfx_basis_train <- rfx_basis[train_inds,] #' rfx_term_test <- rfx_term[test_inds] @@ -1856,9 +1857,9 @@ saveBCFModelToJsonFile <- function(object, filename){ #' mu_params <- list(sample_sigma_leaf = TRUE) #' tau_params <- list(sample_sigma_leaf = FALSE) #' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, -#' pi_train = pi_train, group_ids_train = group_ids_train, +#' propensity_train = pi_train, rfx_group_ids_train = rfx_group_ids_train, #' rfx_basis_train = rfx_basis_train, X_test = X_test, -#' Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, +#' Z_test = Z_test, propensity_test = pi_test, rfx_group_ids_test = rfx_group_ids_test, #' rfx_basis_test = rfx_basis_test, #' num_gfr = 100, num_burnin = 0, num_mcmc = 100, #' mu_forest_params = mu_params, @@ -1900,7 +1901,7 @@ saveBCFModelToJsonString <- function(object){ #' Z <- rbinom(n,1,pi_x) #' E_XZ <- mu_x + Z*tau_x #' snr <- 3 -#' group_ids <- rep(c(1,2), n %/% 2) +#' rfx_group_ids <- rep(c(1,2), n %/% 2) #' rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) #' rfx_basis <- cbind(1, runif(n, -1, 1)) #' rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) @@ -1925,8 +1926,8 @@ saveBCFModelToJsonString <- function(object){ #' mu_train <- mu_x[train_inds] #' tau_test <- tau_x[test_inds] #' tau_train <- tau_x[train_inds] -#' group_ids_test <- group_ids[test_inds] -#' group_ids_train <- group_ids[train_inds] +#' rfx_group_ids_test <- rfx_group_ids[test_inds] +#' rfx_group_ids_train <- rfx_group_ids[train_inds] #' rfx_basis_test <- rfx_basis[test_inds,] #' rfx_basis_train <- rfx_basis[train_inds,] #' rfx_term_test <- rfx_term[test_inds] @@ -1934,9 +1935,9 @@ saveBCFModelToJsonString <- function(object){ #' mu_params <- list(sample_sigma_leaf = TRUE) #' tau_params <- list(sample_sigma_leaf = FALSE) #' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, -#' pi_train = pi_train, group_ids_train = group_ids_train, +#' propensity_train = pi_train, rfx_group_ids_train = rfx_group_ids_train, #' rfx_basis_train = rfx_basis_train, X_test = X_test, -#' Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, +#' Z_test = Z_test, propensity_test = pi_test, rfx_group_ids_test = rfx_group_ids_test, #' rfx_basis_test = rfx_basis_test, #' num_gfr = 100, num_burnin = 0, num_mcmc = 100, #' mu_forest_params = mu_params, @@ -2063,7 +2064,7 @@ createBCFModelFromJson <- function(json_object){ #' Z <- rbinom(n,1,pi_x) #' E_XZ <- mu_x + Z*tau_x #' snr <- 3 -#' group_ids <- rep(c(1,2), n %/% 2) +#' rfx_group_ids <- rep(c(1,2), n %/% 2) #' rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) #' rfx_basis <- cbind(1, runif(n, -1, 1)) #' rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) @@ -2088,8 +2089,8 @@ createBCFModelFromJson <- function(json_object){ #' mu_train <- mu_x[train_inds] #' tau_test <- tau_x[test_inds] #' tau_train <- tau_x[train_inds] -#' group_ids_test <- group_ids[test_inds] -#' group_ids_train <- group_ids[train_inds] +#' rfx_group_ids_test <- rfx_group_ids[test_inds] +#' rfx_group_ids_train <- rfx_group_ids[train_inds] #' rfx_basis_test <- rfx_basis[test_inds,] #' rfx_basis_train <- rfx_basis[train_inds,] #' rfx_term_test <- rfx_term[test_inds] @@ -2097,9 +2098,9 @@ createBCFModelFromJson <- function(json_object){ #' mu_params <- list(sample_sigma_leaf = TRUE) #' tau_params <- list(sample_sigma_leaf = FALSE) #' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, -#' pi_train = pi_train, group_ids_train = group_ids_train, +#' propensity_train = pi_train, rfx_group_ids_train = rfx_group_ids_train, #' rfx_basis_train = rfx_basis_train, X_test = X_test, -#' Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, +#' Z_test = Z_test, propensity_test = pi_test, rfx_group_ids_test = rfx_group_ids_test, #' rfx_basis_test = rfx_basis_test, #' num_gfr = 100, num_burnin = 0, num_mcmc = 100, #' mu_forest_params = mu_params, @@ -2144,7 +2145,7 @@ createBCFModelFromJsonFile <- function(json_filename){ #' Z <- rbinom(n,1,pi_x) #' E_XZ <- mu_x + Z*tau_x #' snr <- 3 -#' group_ids <- rep(c(1,2), n %/% 2) +#' rfx_group_ids <- rep(c(1,2), n %/% 2) #' rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) #' rfx_basis <- cbind(1, runif(n, -1, 1)) #' rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) @@ -2169,16 +2170,16 @@ createBCFModelFromJsonFile <- function(json_filename){ #' mu_train <- mu_x[train_inds] #' tau_test <- tau_x[test_inds] #' tau_train <- tau_x[train_inds] -#' group_ids_test <- group_ids[test_inds] -#' group_ids_train <- group_ids[train_inds] +#' rfx_group_ids_test <- rfx_group_ids[test_inds] +#' rfx_group_ids_train <- rfx_group_ids[train_inds] #' rfx_basis_test <- rfx_basis[test_inds,] #' rfx_basis_train <- rfx_basis[train_inds,] #' rfx_term_test <- rfx_term[test_inds] #' rfx_term_train <- rfx_term[train_inds] #' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, -#' pi_train = pi_train, group_ids_train = group_ids_train, +#' propensity_train = pi_train, rfx_group_ids_train = rfx_group_ids_train, #' rfx_basis_train = rfx_basis_train, X_test = X_test, -#' Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, +#' Z_test = Z_test, propensity_test = pi_test, rfx_group_ids_test = rfx_group_ids_test, #' rfx_basis_test = rfx_basis_test, #' num_gfr = 100, num_burnin = 0, num_mcmc = 100) #' # bcf_json <- saveBCFModelToJsonString(bcf_model) @@ -2222,7 +2223,7 @@ createBCFModelFromJsonString <- function(json_string){ #' Z <- rbinom(n,1,pi_x) #' E_XZ <- mu_x + Z*tau_x #' snr <- 3 -#' group_ids <- rep(c(1,2), n %/% 2) +#' rfx_group_ids <- rep(c(1,2), n %/% 2) #' rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) #' rfx_basis <- cbind(1, runif(n, -1, 1)) #' rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) @@ -2247,16 +2248,16 @@ createBCFModelFromJsonString <- function(json_string){ #' mu_train <- mu_x[train_inds] #' tau_test <- tau_x[test_inds] #' tau_train <- tau_x[train_inds] -#' group_ids_test <- group_ids[test_inds] -#' group_ids_train <- group_ids[train_inds] +#' rfx_group_ids_test <- rfx_group_ids[test_inds] +#' rfx_group_ids_train <- rfx_group_ids[train_inds] #' rfx_basis_test <- rfx_basis[test_inds,] #' rfx_basis_train <- rfx_basis[train_inds,] #' rfx_term_test <- rfx_term[test_inds] #' rfx_term_train <- rfx_term[train_inds] #' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, -#' pi_train = pi_train, group_ids_train = group_ids_train, +#' propensity_train = pi_train, rfx_group_ids_train = rfx_group_ids_train, #' rfx_basis_train = rfx_basis_train, X_test = X_test, -#' Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, +#' Z_test = Z_test, propensity_test = pi_test, rfx_group_ids_test = rfx_group_ids_test, #' rfx_basis_test = rfx_basis_test, #' num_gfr = 100, num_burnin = 0, num_mcmc = 100) #' # bcf_json_string_list <- list(saveBCFModelToJsonString(bcf_model)) diff --git a/man/bcf.Rd b/man/bcf.Rd index ec2e7939..c5b15e1f 100644 --- a/man/bcf.Rd +++ b/man/bcf.Rd @@ -8,19 +8,19 @@ bcf( X_train, Z_train, y_train, - pi_train = NULL, - group_ids_train = NULL, + propensity_train = NULL, + rfx_group_ids_train = NULL, rfx_basis_train = NULL, X_test = NULL, Z_test = NULL, - pi_test = NULL, - group_ids_test = NULL, + propensity_test = NULL, + rfx_group_ids_test = NULL, rfx_basis_test = NULL, num_gfr = 5, num_burnin = 0, num_mcmc = 100, previous_model_json = NULL, - warmstart_sample_num = NULL, + previous_model_warmstart_sample_num = NULL, general_params = list(), mu_forest_params = list(), tau_forest_params = list(), @@ -38,12 +38,12 @@ that the column is ordered categorical).} \item{y_train}{Outcome to be modeled by the ensemble.} -\item{pi_train}{(Optional) Vector of propensity scores. If not provided, this will be estimated from the data.} +\item{propensity_train}{(Optional) Vector of propensity scores. If not provided, this will be estimated from the data.} -\item{group_ids_train}{(Optional) Group labels used for an additive random effects model.} +\item{rfx_group_ids_train}{(Optional) Group labels used for an additive random effects model.} \item{rfx_basis_train}{(Optional) Basis for "random-slope" regression in an additive random effects model. -If \code{group_ids_train} is provided with a regression basis, an intercept-only random effects model +If \code{rfx_group_ids_train} is provided with a regression basis, an intercept-only random effects model will be estimated.} \item{X_test}{(Optional) Test set of covariates used to define "out of sample" evaluation data. @@ -52,9 +52,9 @@ that of \code{X_train}.} \item{Z_test}{(Optional) Test set of (continuous or binary) treatment assignments.} -\item{pi_test}{(Optional) Vector of propensity scores. If not provided, this will be estimated from the data.} +\item{propensity_test}{(Optional) Vector of propensity scores. If not provided, this will be estimated from the data.} -\item{group_ids_test}{(Optional) Test set group labels used for an additive random effects model. +\item{rfx_group_ids_test}{(Optional) Test set group labels used for an additive random effects model. We do not currently support (but plan to in the near future), test set evaluation for group labels that were not in the training set.} @@ -68,7 +68,7 @@ that were not in the training set.} \item{previous_model_json}{(Optional) JSON string containing a previous BCF model. This can be used to "continue" a sampler interactively after inspecting the samples or to run parallel chains "warm-started" from existing forest samples. Default: \code{NULL}.} -\item{warmstart_sample_num}{(Optional) Sample number from \code{previous_model_json} that will be used to warmstart this BCF sampler. One-indexed (so that the first sample is used for warm-start by setting \code{warmstart_sample_num = 1}). Default: \code{NULL}.} +\item{previous_model_warmstart_sample_num}{(Optional) Sample number from \code{previous_model_json} that will be used to warmstart this BCF sampler. One-indexed (so that the first sample is used for warm-start by setting \code{previous_model_warmstart_sample_num = 1}). Default: \code{NULL}.} \item{general_params}{(Optional) A list of general (non-forest-specific) model parameters, each of which has a default value processed internally, so this argument list is optional. \itemize{ @@ -100,7 +100,7 @@ that were not in the training set.} \item \code{min_samples_leaf} Minimum allowable size of a leaf, in terms of training samples, in the prognostic forest. Default: \code{5}. \item \code{max_depth} Maximum depth of any tree in the ensemble in the prognostic forest. Default: \code{10}. Can be overridden with \code{-1} which does not enforce any depth limits on trees. \item \code{variable_weights} Numeric weights reflecting the relative probability of splitting on each variable in the prognostic forest. Does not need to sum to 1 but cannot be negative. Defaults to \code{rep(1/ncol(X_train), ncol(X_train))} if not set here. -\item \code{sample_sigma2_leaf} Whether or not to update the leaf scale variance parameter based on \code{IG(sigma2_leaf_shape, sigma2_leaf_scale)}. Cannot (currently) be set to true if \code{ncol(W_train)>1}. Default: \code{FALSE}. +\item \code{sample_sigma2_leaf} Whether or not to update the leaf scale variance parameter based on \code{IG(sigma2_leaf_shape, sigma2_leaf_scale)}. \item \code{sigma2_leaf_init} Starting value of leaf node scale parameter. Calibrated internally as \code{1/num_trees} if not set here. \item \code{sigma2_leaf_shape} Shape parameter in the \code{IG(sigma2_leaf_shape, sigma2_leaf_scale)} leaf node parameter variance model. Default: \code{3}. \item \code{sigma2_leaf_scale} Scale parameter in the \code{IG(sigma2_leaf_shape, sigma2_leaf_scale)} leaf node parameter variance model. Calibrated internally as \code{0.5/num_trees} if not set here. @@ -116,7 +116,7 @@ that were not in the training set.} \item \code{min_samples_leaf} Minimum allowable size of a leaf, in terms of training samples, in the treatment effect forest. Default: \code{5}. \item \code{max_depth} Maximum depth of any tree in the ensemble in the treatment effect forest. Default: \code{5}. Can be overridden with \code{-1} which does not enforce any depth limits on trees. \item \code{variable_weights} Numeric weights reflecting the relative probability of splitting on each variable in the treatment effect forest. Does not need to sum to 1 but cannot be negative. Defaults to \code{rep(1/ncol(X_train), ncol(X_train))} if not set here. -\item \code{sample_sigma2_leaf} Whether or not to update the leaf scale variance parameter based on \code{IG(sigma2_leaf_shape, sigma2_leaf_scale)}. Cannot (currently) be set to true if \code{ncol(W_train)>1}. Default: \code{FALSE}. +\item \code{sample_sigma2_leaf} Whether or not to update the leaf scale variance parameter based on \code{IG(sigma2_leaf_shape, sigma2_leaf_scale)}. Cannot (currently) be set to true if \code{ncol(Z_train)>1}. Default: \code{FALSE}. \item \code{sigma2_leaf_init} Starting value of leaf node scale parameter. Calibrated internally as \code{1/num_trees} if not set here. \item \code{sigma2_leaf_shape} Shape parameter in the \code{IG(sigma2_leaf_shape, sigma2_leaf_scale)} leaf node parameter variance model. Default: \code{3}. \item \code{sigma2_leaf_scale} Scale parameter in the \code{IG(sigma2_leaf_shape, sigma2_leaf_scale)} leaf node parameter variance model. Calibrated internally as \code{0.5/num_trees} if not set here. @@ -186,8 +186,9 @@ mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] -bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, - X_test = X_test, Z_test = Z_test, pi_test = pi_test) +bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, + propensity_train = pi_train, X_test = X_test, Z_test = Z_test, + propensity_test = pi_test) # plot(rowMeans(bcf_model$mu_hat_test), mu_test, xlab = "predicted", # ylab = "actual", main = "Prognostic function") # abline(0,1,col="red",lty=3,lwd=3) diff --git a/man/convertBCFModelToJson.Rd b/man/convertBCFModelToJson.Rd index 71a7ff73..892be2a2 100644 --- a/man/convertBCFModelToJson.Rd +++ b/man/convertBCFModelToJson.Rd @@ -35,7 +35,7 @@ pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x snr <- 3 -group_ids <- rep(c(1,2), n \%/\% 2) +rfx_group_ids <- rep(c(1,2), n \%/\% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) @@ -60,8 +60,8 @@ mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] -group_ids_test <- group_ids[test_inds] -group_ids_train <- group_ids[train_inds] +rfx_group_ids_test <- rfx_group_ids[test_inds] +rfx_group_ids_train <- rfx_group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,] rfx_term_test <- rfx_term[test_inds] @@ -69,9 +69,9 @@ rfx_term_train <- rfx_term[train_inds] mu_params <- list(sample_sigma_leaf = TRUE) tau_params <- list(sample_sigma_leaf = FALSE) bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, - pi_train = pi_train, group_ids_train = group_ids_train, + propensity_train = pi_train, rfx_group_ids_train = rfx_group_ids_train, rfx_basis_train = rfx_basis_train, X_test = X_test, - Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, + Z_test = Z_test, propensity_test = pi_test, rfx_group_ids_test = rfx_group_ids_test, rfx_basis_test = rfx_basis_test, num_gfr = 100, num_burnin = 0, num_mcmc = 100, mu_forest_params = mu_params, diff --git a/man/createBCFModelFromCombinedJsonString.Rd b/man/createBCFModelFromCombinedJsonString.Rd index a8a14194..bce0f98b 100644 --- a/man/createBCFModelFromCombinedJsonString.Rd +++ b/man/createBCFModelFromCombinedJsonString.Rd @@ -38,7 +38,7 @@ pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x snr <- 3 -group_ids <- rep(c(1,2), n \%/\% 2) +rfx_group_ids <- rep(c(1,2), n \%/\% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) @@ -63,16 +63,16 @@ mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] -group_ids_test <- group_ids[test_inds] -group_ids_train <- group_ids[train_inds] +rfx_group_ids_test <- rfx_group_ids[test_inds] +rfx_group_ids_train <- rfx_group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,] rfx_term_test <- rfx_term[test_inds] rfx_term_train <- rfx_term[train_inds] bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, - pi_train = pi_train, group_ids_train = group_ids_train, + propensity_train = pi_train, rfx_group_ids_train = rfx_group_ids_train, rfx_basis_train = rfx_basis_train, X_test = X_test, - Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, + Z_test = Z_test, propensity_test = pi_test, rfx_group_ids_test = rfx_group_ids_test, rfx_basis_test = rfx_basis_test, num_gfr = 100, num_burnin = 0, num_mcmc = 100) # bcf_json_string_list <- list(saveBCFModelToJsonString(bcf_model)) diff --git a/man/createBCFModelFromJson.Rd b/man/createBCFModelFromJson.Rd index 254dbe74..ee41bc60 100644 --- a/man/createBCFModelFromJson.Rd +++ b/man/createBCFModelFromJson.Rd @@ -37,7 +37,7 @@ pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x snr <- 3 -group_ids <- rep(c(1,2), n \%/\% 2) +rfx_group_ids <- rep(c(1,2), n \%/\% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) @@ -62,8 +62,8 @@ mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] -group_ids_test <- group_ids[test_inds] -group_ids_train <- group_ids[train_inds] +rfx_group_ids_test <- rfx_group_ids[test_inds] +rfx_group_ids_train <- rfx_group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,] rfx_term_test <- rfx_term[test_inds] @@ -71,9 +71,9 @@ rfx_term_train <- rfx_term[train_inds] mu_params <- list(sample_sigma_leaf = TRUE) tau_params <- list(sample_sigma_leaf = FALSE) bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, - pi_train = pi_train, group_ids_train = group_ids_train, + propensity_train = pi_train, rfx_group_ids_train = rfx_group_ids_train, rfx_basis_train = rfx_basis_train, X_test = X_test, - Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, + Z_test = Z_test, propensity_test = pi_test, rfx_group_ids_test = rfx_group_ids_test, rfx_basis_test = rfx_basis_test, num_gfr = 100, num_burnin = 0, num_mcmc = 100, mu_forest_params = mu_params, diff --git a/man/createBCFModelFromJsonFile.Rd b/man/createBCFModelFromJsonFile.Rd index 64b81b16..230d412e 100644 --- a/man/createBCFModelFromJsonFile.Rd +++ b/man/createBCFModelFromJsonFile.Rd @@ -37,7 +37,7 @@ pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x snr <- 3 -group_ids <- rep(c(1,2), n \%/\% 2) +rfx_group_ids <- rep(c(1,2), n \%/\% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) @@ -62,8 +62,8 @@ mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] -group_ids_test <- group_ids[test_inds] -group_ids_train <- group_ids[train_inds] +rfx_group_ids_test <- rfx_group_ids[test_inds] +rfx_group_ids_train <- rfx_group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,] rfx_term_test <- rfx_term[test_inds] @@ -71,9 +71,9 @@ rfx_term_train <- rfx_term[train_inds] mu_params <- list(sample_sigma_leaf = TRUE) tau_params <- list(sample_sigma_leaf = FALSE) bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, - pi_train = pi_train, group_ids_train = group_ids_train, + propensity_train = pi_train, rfx_group_ids_train = rfx_group_ids_train, rfx_basis_train = rfx_basis_train, X_test = X_test, - Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, + Z_test = Z_test, propensity_test = pi_test, rfx_group_ids_test = rfx_group_ids_test, rfx_basis_test = rfx_basis_test, num_gfr = 100, num_burnin = 0, num_mcmc = 100, mu_forest_params = mu_params, diff --git a/man/createBCFModelFromJsonString.Rd b/man/createBCFModelFromJsonString.Rd index 79b79773..e3cff160 100644 --- a/man/createBCFModelFromJsonString.Rd +++ b/man/createBCFModelFromJsonString.Rd @@ -37,7 +37,7 @@ pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x snr <- 3 -group_ids <- rep(c(1,2), n \%/\% 2) +rfx_group_ids <- rep(c(1,2), n \%/\% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) @@ -62,16 +62,16 @@ mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] -group_ids_test <- group_ids[test_inds] -group_ids_train <- group_ids[train_inds] +rfx_group_ids_test <- rfx_group_ids[test_inds] +rfx_group_ids_train <- rfx_group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,] rfx_term_test <- rfx_term[test_inds] rfx_term_train <- rfx_term[train_inds] bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, - pi_train = pi_train, group_ids_train = group_ids_train, + propensity_train = pi_train, rfx_group_ids_train = rfx_group_ids_train, rfx_basis_train = rfx_basis_train, X_test = X_test, - Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, + Z_test = Z_test, propensity_test = pi_test, rfx_group_ids_test = rfx_group_ids_test, rfx_basis_test = rfx_basis_test, num_gfr = 100, num_burnin = 0, num_mcmc = 100) # bcf_json <- saveBCFModelToJsonString(bcf_model) diff --git a/man/getRandomEffectSamples.bcf.Rd b/man/getRandomEffectSamples.bcf.Rd index ddfb0300..55b41048 100644 --- a/man/getRandomEffectSamples.bcf.Rd +++ b/man/getRandomEffectSamples.bcf.Rd @@ -39,7 +39,7 @@ pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x snr <- 3 -group_ids <- rep(c(1,2), n \%/\% 2) +rfx_group_ids <- rep(c(1,2), n \%/\% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) @@ -64,8 +64,8 @@ mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] -group_ids_test <- group_ids[test_inds] -group_ids_train <- group_ids[train_inds] +rfx_group_ids_test <- rfx_group_ids[test_inds] +rfx_group_ids_train <- rfx_group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,] rfx_term_test <- rfx_term[test_inds] @@ -73,9 +73,9 @@ rfx_term_train <- rfx_term[train_inds] mu_params <- list(sample_sigma_leaf = TRUE) tau_params <- list(sample_sigma_leaf = FALSE) bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, - pi_train = pi_train, group_ids_train = group_ids_train, + propensity_train = pi_train, rfx_group_ids_train = rfx_group_ids_train, rfx_basis_train = rfx_basis_train, X_test = X_test, - Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, + Z_test = Z_test, propensity_test = pi_test, rfx_group_ids_test = rfx_group_ids_test, rfx_basis_test = rfx_basis_test, num_gfr = 100, num_burnin = 0, num_mcmc = 100, mu_forest_params = mu_params, diff --git a/man/predict.bcf.Rd b/man/predict.bcf.Rd index 2d5f8301..dc74c783 100644 --- a/man/predict.bcf.Rd +++ b/man/predict.bcf.Rd @@ -6,33 +6,33 @@ \usage{ \method{predict}{bcf}( object, - X_test, - Z_test, - pi_test = NULL, - group_ids_test = NULL, - rfx_basis_test = NULL, + X, + Z, + propensity = NULL, + rfx_group_ids = NULL, + rfx_basis = NULL, ... ) } \arguments{ \item{object}{Object of type \code{bcf} containing draws of a Bayesian causal forest model and associated sampling outputs.} -\item{X_test}{Covariates used to determine tree leaf predictions for each observation. Must be passed as a matrix or dataframe.} +\item{X}{Covariates used to determine tree leaf predictions for each observation. Must be passed as a matrix or dataframe.} -\item{Z_test}{Treatments used for prediction.} +\item{Z}{Treatments used for prediction.} -\item{pi_test}{(Optional) Propensities used for prediction.} +\item{propensity}{(Optional) Propensities used for prediction.} -\item{group_ids_test}{(Optional) Test set group labels used for an additive random effects model. +\item{rfx_group_ids}{(Optional) Test set group labels used for an additive random effects model. We do not currently support (but plan to in the near future), test set evaluation for group labels that were not in the training set.} -\item{rfx_basis_test}{(Optional) Test set basis for "random-slope" regression in additive random effects model.} +\item{rfx_basis}{(Optional) Test set basis for "random-slope" regression in additive random effects model.} \item{...}{(Optional) Other prediction parameters.} } \value{ -List of 3-5 \code{nrow(X_test)} by \code{object$num_samples} matrices: prognostic function estimates, treatment effect estimates, (optionally) random effects predictions, (optionally) variance forest predictions, and outcome predictions. +List of 3-5 \code{nrow(X)} by \code{object$num_samples} matrices: prognostic function estimates, treatment effect estimates, (optionally) random effects predictions, (optionally) variance forest predictions, and outcome predictions. } \description{ Predict from a sampled BCF model on new data @@ -78,7 +78,7 @@ mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] -bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train) +bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train) preds <- predict(bcf_model, X_test, Z_test, pi_test) # plot(rowMeans(preds$mu_hat), mu_test, xlab = "predicted", # ylab = "actual", main = "Prognostic function") diff --git a/man/saveBCFModelToJsonFile.Rd b/man/saveBCFModelToJsonFile.Rd index 5a0f1512..f6dc04af 100644 --- a/man/saveBCFModelToJsonFile.Rd +++ b/man/saveBCFModelToJsonFile.Rd @@ -37,7 +37,7 @@ pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x snr <- 3 -group_ids <- rep(c(1,2), n \%/\% 2) +rfx_group_ids <- rep(c(1,2), n \%/\% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) @@ -62,8 +62,8 @@ mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] -group_ids_test <- group_ids[test_inds] -group_ids_train <- group_ids[train_inds] +rfx_group_ids_test <- rfx_group_ids[test_inds] +rfx_group_ids_train <- rfx_group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,] rfx_term_test <- rfx_term[test_inds] @@ -71,9 +71,9 @@ rfx_term_train <- rfx_term[train_inds] mu_params <- list(sample_sigma_leaf = TRUE) tau_params <- list(sample_sigma_leaf = FALSE) bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, - pi_train = pi_train, group_ids_train = group_ids_train, + propensity_train = pi_train, rfx_group_ids_train = rfx_group_ids_train, rfx_basis_train = rfx_basis_train, X_test = X_test, - Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, + Z_test = Z_test, propensity_test = pi_test, rfx_group_ids_test = rfx_group_ids_test, rfx_basis_test = rfx_basis_test, num_gfr = 100, num_burnin = 0, num_mcmc = 100, mu_forest_params = mu_params, diff --git a/man/saveBCFModelToJsonString.Rd b/man/saveBCFModelToJsonString.Rd index be405f5f..4ea4e4ee 100644 --- a/man/saveBCFModelToJsonString.Rd +++ b/man/saveBCFModelToJsonString.Rd @@ -35,7 +35,7 @@ pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x snr <- 3 -group_ids <- rep(c(1,2), n \%/\% 2) +rfx_group_ids <- rep(c(1,2), n \%/\% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) @@ -60,8 +60,8 @@ mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] -group_ids_test <- group_ids[test_inds] -group_ids_train <- group_ids[train_inds] +rfx_group_ids_test <- rfx_group_ids[test_inds] +rfx_group_ids_train <- rfx_group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,] rfx_term_test <- rfx_term[test_inds] @@ -69,9 +69,9 @@ rfx_term_train <- rfx_term[train_inds] mu_params <- list(sample_sigma_leaf = TRUE) tau_params <- list(sample_sigma_leaf = FALSE) bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, - pi_train = pi_train, group_ids_train = group_ids_train, + propensity_train = pi_train, rfx_group_ids_train = rfx_group_ids_train, rfx_basis_train = rfx_basis_train, X_test = X_test, - Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, + Z_test = Z_test, propensity_test = pi_test, rfx_group_ids_test = rfx_group_ids_test, rfx_basis_test = rfx_basis_test, num_gfr = 100, num_burnin = 0, num_mcmc = 100, mu_forest_params = mu_params, diff --git a/test/R/testthat/test-serialization.R b/test/R/testthat/test-serialization.R index fcce0bb3..e640d3f8 100644 --- a/test/R/testthat/test-serialization.R +++ b/test/R/testthat/test-serialization.R @@ -80,7 +80,7 @@ test_that("BCF Serialization", { # Sample a BCF model bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, - pi_train = pi_train, num_gfr = 100, num_burnin = 0, num_mcmc = 100) + propensity_train = pi_train, num_gfr = 100, num_burnin = 0, num_mcmc = 100) bcf_preds_orig <- predict(bcf_model, X_test, Z_test, pi_test) mu_hat_orig <- rowMeans(bcf_preds_orig[["mu_hat"]]) tau_hat_orig <- rowMeans(bcf_preds_orig[["tau_hat"]]) diff --git a/tools/debug/debug.R b/tools/debug/debug.R index 9c0ac947..e0c1979e 100644 --- a/tools/debug/debug.R +++ b/tools/debug/debug.R @@ -82,8 +82,8 @@ wrapped_bart_stochtree_analysis <- function(resid_train, resid_test, y_train, y_ # Run BART bart_model <- stochtree::bart( - X_train = X_train, W_train = W_train, y_train = y_train, - X_test = X_test, W_test = W_test, num_trees = 200, num_gfr = num_gfr, + X_train = X_train, leaf_basis_train = W_train, y_train = y_train, + X_test = X_test, leaf_basis_test = W_test, num_trees = 200, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc_retained, sample_sigma = T, sample_tau = F, random_seed = 1234, nu = 3 ) diff --git a/vignettes/BayesianSupervisedLearning.Rmd b/vignettes/BayesianSupervisedLearning.Rmd index 9d98d2a2..d444ac2d 100644 --- a/vignettes/BayesianSupervisedLearning.Rmd +++ b/vignettes/BayesianSupervisedLearning.Rmd @@ -54,8 +54,6 @@ test_inds <- sort(sample(1:n, n_test, replace = FALSE)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- as.data.frame(X[test_inds,]) X_train <- as.data.frame(X[train_inds,]) -W_test <- NULL -W_train <- NULL y_test <- y[test_inds] y_train <- y[train_inds] ``` @@ -133,12 +131,12 @@ p_x <- 10 p_w <- 1 snr <- 3 X <- matrix(runif(n*p_x), ncol = p_x) -W <- matrix(runif(n*p_w), ncol = p_w) +leaf_basis <- matrix(runif(n*p_w), ncol = p_w) f_XW <- ( - ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*W[,1]) + - ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*W[,1]) + - ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*W[,1]) + - ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*W[,1]) + ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*leaf_basis[,1]) + + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*leaf_basis[,1]) + + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*leaf_basis[,1]) + + ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*leaf_basis[,1]) ) noise_sd <- sd(f_XW) / snr y <- f_XW + rnorm(n, 0, 1)*noise_sd @@ -151,8 +149,8 @@ test_inds <- sort(sample(1:n, n_test, replace = FALSE)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- as.data.frame(X[test_inds,]) X_train <- as.data.frame(X[train_inds,]) -W_test <- W[test_inds,] -W_train <- W[train_inds,] +leaf_basis_test <- leaf_basis[test_inds,] +leaf_basis_train <- leaf_basis[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] ``` @@ -173,7 +171,8 @@ num_samples <- num_gfr + num_burnin + num_mcmc general_params <- list(sample_sigma2_global = T) mean_forest_params <- list(sample_sigma2_leaf = T, num_trees = 100) bart_model_warmstart <- stochtree::bart( - X_train = X_train, W_train = W_train, y_train = y_train, X_test = X_test, W_test = W_test, + X_train = X_train, leaf_basis_train = leaf_basis_train, y_train = y_train, + X_test = X_test, leaf_basis_test = leaf_basis_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mean_forest_params = mean_forest_params ) @@ -201,7 +200,8 @@ num_samples <- num_gfr + num_burnin + num_mcmc general_params <- list(sample_sigma2_global = T) mean_forest_params <- list(sample_sigma2_leaf = T, num_trees = 100) bart_model_root <- stochtree::bart( - X_train = X_train, W_train = W_train, y_train = y_train, X_test = X_test, W_test = W_test, + X_train = X_train, leaf_basis_train = leaf_basis_train, y_train = y_train, + X_test = X_test, leaf_basis_test = leaf_basis_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mean_forest_params = mean_forest_params ) @@ -231,15 +231,15 @@ p_x <- 10 p_w <- 1 snr <- 3 X <- matrix(runif(n*p_x), ncol = p_x) -W <- matrix(runif(n*p_w), ncol = p_w) -group_ids <- rep(c(1,2), n %/% 2) +leaf_basis <- matrix(runif(n*p_w), ncol = p_w) +rfx_group_ids <- rep(c(1,2), n %/% 2) rfx_coefs <- matrix(c(-5, -3, 5, 3), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) f_XW <- ( - ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*W[,1]) + - ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*W[,1]) + - ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*W[,1]) + - ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*W[,1]) + ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*leaf_basis[,1]) + + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*leaf_basis[,1]) + + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*leaf_basis[,1]) + + ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*leaf_basis[,1]) ) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) noise_sd <- sd(f_XW) / snr @@ -253,12 +253,12 @@ test_inds <- sort(sample(1:n, n_test, replace = FALSE)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- as.data.frame(X[test_inds,]) X_train <- as.data.frame(X[train_inds,]) -W_test <- W[test_inds,] -W_train <- W[train_inds,] +leaf_basis_test <- leaf_basis[test_inds,] +leaf_basis_train <- leaf_basis[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] -group_ids_test <- group_ids[test_inds] -group_ids_train <- group_ids[train_inds] +rfx_group_ids_test <- rfx_group_ids[test_inds] +rfx_group_ids_train <- rfx_group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,] ``` @@ -279,8 +279,8 @@ num_samples <- num_gfr + num_burnin + num_mcmc general_params <- list(sample_sigma2_global = T) mean_forest_params <- list(sample_sigma2_leaf = T, num_trees = 100) bart_model_warmstart <- stochtree::bart( - X_train = X_train, W_train = W_train, y_train = y_train, group_ids_train = group_ids_train, - rfx_basis_train = rfx_basis_train, X_test = X_test, W_test = W_test, group_ids_test = group_ids_test, + X_train = X_train, leaf_basis_train = leaf_basis_train, y_train = y_train, rfx_group_ids_train = rfx_group_ids_train, + rfx_basis_train = rfx_basis_train, X_test = X_test, leaf_basis_test = leaf_basis_test, rfx_group_ids_test = rfx_group_ids_test, rfx_basis_test = rfx_basis_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mean_forest_params = mean_forest_params ) @@ -308,9 +308,11 @@ num_samples <- num_gfr + num_burnin + num_mcmc general_params <- list(sample_sigma2_global = T) mean_forest_params <- list(sample_sigma2_leaf = T, num_trees = 100) bart_model_root <- stochtree::bart( - X_train = X_train, W_train = W_train, y_train = y_train, group_ids_train = group_ids_train, - rfx_basis_train = rfx_basis_train, X_test = X_test, W_test = W_test, group_ids_test = group_ids_test, - rfx_basis_test = rfx_basis_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, + X_train = X_train, leaf_basis_train = leaf_basis_train, y_train = y_train, + rfx_group_ids_train = rfx_group_ids_train, rfx_basis_train = rfx_basis_train, + X_test = X_test, leaf_basis_test = leaf_basis_test, + rfx_group_ids_test = rfx_group_ids_test, rfx_basis_test = rfx_basis_test, + num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mean_forest_params = mean_forest_params ) ``` diff --git a/vignettes/CausalInference.Rmd b/vignettes/CausalInference.Rmd index 9086b759..42b520d1 100644 --- a/vignettes/CausalInference.Rmd +++ b/vignettes/CausalInference.Rmd @@ -116,8 +116,8 @@ general_params <- list(keep_every = 5) mu_forest_params <- list(sample_sigma2_leaf = F) tau_forest_params <- list(sample_sigma2_leaf = F) bcf_model_warmstart <- bcf( - X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, - X_test = X_test, Z_test = Z_test, pi_test = pi_test, + X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train, + X_test = X_test, Z_test = Z_test, propensity_test = pi_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mu_forest_params = mu_forest_params, tau_forest_params = tau_forest_params @@ -166,8 +166,8 @@ general_params <- list(keep_every = 5) mu_forest_params <- list(sample_sigma2_leaf = F) tau_forest_params <- list(sample_sigma2_leaf = F) bcf_model_root <- bcf( - X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, - X_test = X_test, Z_test = Z_test, pi_test = pi_test, + X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train, + X_test = X_test, Z_test = Z_test, propensity_test = pi_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mu_forest_params = mu_forest_params, tau_forest_params = tau_forest_params @@ -285,8 +285,8 @@ general_params <- list(keep_every = 5) mu_forest_params <- list(sample_sigma2_leaf = F) tau_forest_params <- list(sample_sigma2_leaf = F) bcf_model_warmstart <- bcf( - X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, - X_test = X_test, Z_test = Z_test, pi_test = pi_test, + X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train, + X_test = X_test, Z_test = Z_test, propensity_test = pi_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mu_forest_params = mu_forest_params, tau_forest_params = tau_forest_params @@ -335,8 +335,8 @@ general_params <- list(keep_every = 5) mu_forest_params <- list(sample_sigma2_leaf = F) tau_forest_params <- list(sample_sigma2_leaf = F) bcf_model_root <- bcf( - X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, - X_test = X_test, Z_test = Z_test, pi_test = pi_test, + X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train, + X_test = X_test, Z_test = Z_test, propensity_test = pi_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mu_forest_params = mu_forest_params, tau_forest_params = tau_forest_params @@ -454,8 +454,8 @@ general_params <- list(keep_every = 5) mu_forest_params <- list(sample_sigma2_leaf = F) tau_forest_params <- list(sample_sigma2_leaf = F) bcf_model_warmstart <- bcf( - X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, - X_test = X_test, Z_test = Z_test, pi_test = pi_test, + X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train, + X_test = X_test, Z_test = Z_test, propensity_test = pi_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mu_forest_params = mu_forest_params, tau_forest_params = tau_forest_params @@ -504,8 +504,8 @@ general_params <- list(keep_every = 5) mu_forest_params <- list(sample_sigma2_leaf = F) tau_forest_params <- list(sample_sigma2_leaf = F) bcf_model_root <- bcf( - X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, - X_test = X_test, Z_test = Z_test, pi_test = pi_test, + X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train, + X_test = X_test, Z_test = Z_test, propensity_test = pi_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mu_forest_params = mu_forest_params, tau_forest_params = tau_forest_params @@ -621,8 +621,8 @@ general_params <- list(keep_every = 5) mu_forest_params <- list(sample_sigma2_leaf = F) tau_forest_params <- list(sample_sigma2_leaf = F) bcf_model_warmstart <- bcf( - X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, - X_test = X_test, Z_test = Z_test, pi_test = pi_test, + X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train, + X_test = X_test, Z_test = Z_test, propensity_test = pi_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mu_forest_params = mu_forest_params, tau_forest_params = tau_forest_params @@ -671,8 +671,8 @@ general_params <- list(keep_every = 5) mu_forest_params <- list(sample_sigma2_leaf = F) tau_forest_params <- list(sample_sigma2_leaf = F) bcf_model_root <- bcf( - X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, - X_test = X_test, Z_test = Z_test, pi_test = pi_test, + X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train, + X_test = X_test, Z_test = Z_test, propensity_test = pi_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mu_forest_params = mu_forest_params, tau_forest_params = tau_forest_params @@ -731,7 +731,7 @@ tau_x <- tau2(X) pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x -group_ids <- rep(c(1,2), n %/% 2) +rfx_group_ids <- rep(c(1,2), n %/% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) @@ -758,8 +758,8 @@ mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] -group_ids_test <- group_ids[test_inds] -group_ids_train <- group_ids[train_inds] +rfx_group_ids_test <- rfx_group_ids[test_inds] +rfx_group_ids_train <- rfx_group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,] rfx_term_test <- rfx_term[test_inds] @@ -781,9 +781,9 @@ general_params <- list(keep_every = 5) mu_forest_params <- list(sample_sigma2_leaf = F) tau_forest_params <- list(sample_sigma2_leaf = F) bcf_model_warmstart <- bcf( - X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, - group_ids_train = group_ids_train, rfx_basis_train = rfx_basis_train, - X_test = X_test, Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, + X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train, + rfx_group_ids_train = rfx_group_ids_train, rfx_basis_train = rfx_basis_train, + X_test = X_test, Z_test = Z_test, propensity_test = pi_test, rfx_group_ids_test = rfx_group_ids_test, rfx_basis_test = rfx_basis_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mu_forest_params = mu_forest_params, tau_forest_params = tau_forest_params @@ -898,8 +898,8 @@ general_params <- list(keep_every = 5) mu_forest_params <- list(sample_sigma2_leaf = F) tau_forest_params <- list(sample_sigma2_leaf = F) bcf_model_mcmc <- bcf( - X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, - X_test = X_test, Z_test = Z_test, pi_test = pi_test, + X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train, + X_test = X_test, Z_test = Z_test, propensity_test = pi_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mu_forest_params = mu_forest_params, tau_forest_params = tau_forest_params @@ -960,8 +960,8 @@ general_params <- list(keep_every = 5) mu_forest_params <- list(sample_sigma2_leaf = F) tau_forest_params <- list(sample_sigma2_leaf = F, keep_vars = c("x1","x2")) bcf_model_mcmc <- bcf( - X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, - X_test = X_test, Z_test = Z_test, pi_test = pi_test, + X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train, + X_test = X_test, Z_test = Z_test, propensity_test = pi_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mu_forest_params = mu_forest_params, tau_forest_params = tau_forest_params @@ -1022,8 +1022,8 @@ general_params <- list(keep_every = 5) mu_forest_params <- list(sample_sigma2_leaf = F) tau_forest_params <- list(sample_sigma2_leaf = F) bcf_model_warmstart <- bcf( - X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, - X_test = X_test, Z_test = Z_test, pi_test = pi_test, + X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train, + X_test = X_test, Z_test = Z_test, propensity_test = pi_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mu_forest_params = mu_forest_params, tau_forest_params = tau_forest_params @@ -1084,8 +1084,8 @@ general_params <- list(keep_every = 5) mu_forest_params <- list(sample_sigma2_leaf = F) tau_forest_params <- list(sample_sigma2_leaf = F, keep_vars = c("x1","x2")) bcf_model_warmstart <- bcf( - X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, - X_test = X_test, Z_test = Z_test, pi_test = pi_test, + X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train, + X_test = X_test, Z_test = Z_test, propensity_test = pi_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mu_forest_params = mu_forest_params, tau_forest_params = tau_forest_params @@ -1212,8 +1212,8 @@ general_params <- list(keep_every = 5) mu_forest_params <- list(sample_sigma2_leaf = F) tau_forest_params <- list(sample_sigma2_leaf = F) bcf_model_warmstart <- bcf( - X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, - X_test = X_test, Z_test = Z_test, pi_test = pi_test, + X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train, + X_test = X_test, Z_test = Z_test, propensity_test = pi_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mu_forest_params = mu_forest_params, tau_forest_params = tau_forest_params @@ -1262,8 +1262,8 @@ general_params <- list(keep_every = 5) mu_forest_params <- list(sample_sigma2_leaf = F) tau_forest_params <- list(sample_sigma2_leaf = F) bcf_model_root <- bcf( - X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, - X_test = X_test, Z_test = Z_test, pi_test = pi_test, + X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train, + X_test = X_test, Z_test = Z_test, propensity_test = pi_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mu_forest_params = mu_forest_params, tau_forest_params = tau_forest_params diff --git a/vignettes/Heteroskedasticity.Rmd b/vignettes/Heteroskedasticity.Rmd index 0ffe8f72..30350410 100644 --- a/vignettes/Heteroskedasticity.Rmd +++ b/vignettes/Heteroskedasticity.Rmd @@ -68,8 +68,6 @@ test_inds <- sort(sample(1:n, n_test, replace = FALSE)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- as.data.frame(X[test_inds,]) X_train <- as.data.frame(X[train_inds,]) -W_test <- NULL -W_train <- NULL y_test <- y[test_inds] y_train <- y[train_inds] f_x_test <- f_XW[test_inds] @@ -183,8 +181,6 @@ test_inds <- sort(sample(1:n, n_test, replace = FALSE)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- as.data.frame(X[test_inds,]) X_train <- as.data.frame(X[train_inds,]) -W_test <- NULL -W_train <- NULL y_test <- y[test_inds] y_train <- y[train_inds] f_x_test <- f_XW[test_inds] @@ -312,8 +308,6 @@ test_inds <- sort(sample(1:n, n_test, replace = FALSE)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- as.data.frame(X[test_inds,]) X_train <- as.data.frame(X[train_inds,]) -W_test <- NULL -W_train <- NULL y_test <- y[test_inds] y_train <- y[train_inds] f_x_test <- f_XW[test_inds] @@ -447,8 +441,6 @@ test_inds <- sort(sample(1:n, n_test, replace = FALSE)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- as.data.frame(X[test_inds,]) X_train <- as.data.frame(X[train_inds,]) -W_test <- NULL -W_train <- NULL y_test <- y[test_inds] y_train <- y[train_inds] f_x_test <- f_XW[test_inds] diff --git a/vignettes/ModelSerialization.Rmd b/vignettes/ModelSerialization.Rmd index c36ced79..9b98d6a8 100644 --- a/vignettes/ModelSerialization.Rmd +++ b/vignettes/ModelSerialization.Rmd @@ -58,7 +58,7 @@ tau_x <- tau2(X) pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 Z <- rbinom(n,1,pi_x) E_XZ <- mu_x + Z*tau_x -group_ids <- rep(c(1,2), n %/% 2) +rfx_group_ids <- rep(c(1,2), n %/% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) @@ -85,8 +85,8 @@ mu_test <- mu_x[test_inds] mu_train <- mu_x[train_inds] tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] -group_ids_test <- group_ids[test_inds] -group_ids_train <- group_ids[train_inds] +rfx_group_ids_test <- rfx_group_ids[test_inds] +rfx_group_ids_train <- rfx_group_ids[train_inds] rfx_basis_test <- rfx_basis[test_inds,] rfx_basis_train <- rfx_basis[train_inds,] rfx_term_test <- rfx_term[test_inds] @@ -103,10 +103,10 @@ num_samples <- num_gfr + num_burnin + num_mcmc mu_forest_params <- list(sample_sigma2_leaf = F) tau_forest_params <- list(sample_sigma2_leaf = F) bcf_model <- bcf( - X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, - group_ids_train = group_ids_train, rfx_basis_train = rfx_basis_train, - X_test = X_test, Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test, - rfx_basis_test = rfx_basis_test, + X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train, + rfx_group_ids_train = rfx_group_ids_train, rfx_basis_train = rfx_basis_train, + X_test = X_test, Z_test = Z_test, propensity_test = pi_test, + rfx_group_ids_test = rfx_group_ids_test, rfx_basis_test = rfx_basis_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, mu_forest_params = mu_forest_params, tau_forest_params = tau_forest_params ) @@ -131,7 +131,7 @@ bcf_model_reload <- createBCFModelFromJsonFile("bcf.json") Check that the predictions align with those of the original model. ```{r bcf_roundtrip} -bcf_preds_reload <- predict(bcf_model_reload, X_train, Z_train, pi_train, group_ids_train, rfx_basis_train) +bcf_preds_reload <- predict(bcf_model_reload, X_train, Z_train, pi_train, rfx_group_ids_train, rfx_basis_train) plot(rowMeans(bcf_model$mu_hat_train), rowMeans(bcf_preds_reload$mu_hat), xlab = "Original", ylab = "Deserialized", main = "Prognostic forest") abline(0,1,col="red",lwd=3,lty=3) @@ -174,8 +174,6 @@ test_inds <- sort(sample(1:n, n_test, replace = FALSE)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- as.data.frame(X[test_inds,]) X_train <- as.data.frame(X[train_inds,]) -W_test <- NULL -W_train <- NULL y_test <- y[test_inds] y_train <- y[train_inds] f_x_test <- f_XW[test_inds] diff --git a/vignettes/MultiChain.Rmd b/vignettes/MultiChain.Rmd index 425f771f..8d2b4dd9 100644 --- a/vignettes/MultiChain.Rmd +++ b/vignettes/MultiChain.Rmd @@ -55,12 +55,12 @@ p_x <- 10 p_w <- 1 snr <- 3 X <- matrix(runif(n*p_x), ncol = p_x) -W <- matrix(runif(n*p_w), ncol = p_w) +leaf_basis <- matrix(runif(n*p_w), ncol = p_w) f_XW <- ( - ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*W[,1]) + - ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*W[,1]) + - ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*W[,1]) + - ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*W[,1]) + ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*leaf_basis[,1]) + + ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*leaf_basis[,1]) + + ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*leaf_basis[,1]) + + ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*leaf_basis[,1]) ) noise_sd <- sd(f_XW) / snr y <- f_XW + rnorm(n, 0, 1)*noise_sd @@ -73,8 +73,8 @@ test_inds <- sort(sample(1:n, n_test, replace = FALSE)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- as.data.frame(X[test_inds,]) X_train <- as.data.frame(X[train_inds,]) -W_test <- W[test_inds,] -W_train <- W[train_inds,] +leaf_basis_test <- leaf_basis[test_inds,] +leaf_basis_train <- leaf_basis[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] ``` @@ -101,8 +101,8 @@ general_params <- list(sample_sigma2_global = T) mean_forest_params <- list(sample_sigma2_leaf = T, num_trees = num_trees) for (i in 1:num_chains) { bart_models[[i]] <- stochtree::bart( - X_train = X_train, W_train = W_train, y_train = y_train, X_test = X_test, - W_test = W_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, + X_train = X_train, leaf_basis_train = leaf_basis_train, y_train = y_train, X_test = X_test, + leaf_basis_test = leaf_basis_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mean_forest_params = mean_forest_params ) } @@ -122,7 +122,7 @@ combined_bart <- createBARTModelFromCombinedJsonString(json_string_list) We can predict from this combined forest as follows ```{r} -yhat_combined <- predict(combined_bart, X_test, W_test)$y_hat +yhat_combined <- predict(combined_bart, X_test, leaf_basis_test)$y_hat ``` Compare to the original $\hat{y}$ values @@ -181,7 +181,8 @@ bart_model_outputs <- foreach (i = 1:num_chains) %dopar% { general_params <- list(sample_sigma2_global = T, random_seed = random_seed) mean_forest_params <- list(sample_sigma2_leaf = T, num_trees = num_trees) bart_model <- stochtree::bart( - X_train = X_train, W_train = W_train, y_train = y_train, X_test = X_test, W_test = W_test, + X_train = X_train, leaf_basis_train = leaf_basis_train, y_train = y_train, + X_test = X_test, leaf_basis_test = leaf_basis_test, num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mean_forest_params = mean_forest_params ) @@ -213,7 +214,7 @@ combined_bart <- createBARTModelFromCombinedJsonString(bart_model_strings) We can predict from this combined forest as follows ```{r} -yhat_combined <- predict(combined_bart, X_test, W_test)$y_hat +yhat_combined <- predict(combined_bart, X_test, leaf_basis_test)$y_hat ``` Compare average predictions from each chain to the original predictions. @@ -272,7 +273,8 @@ for several iterations (we will use these forests to see independent parallel ch general_params <- list(sample_sigma2_global = T) mean_forest_params <- list(sample_sigma2_leaf = T, num_trees = num_trees) xbart_model <- stochtree::bart( - X_train = X_train, W_train = W_train, y_train = y_train, X_test = X_test, W_test = W_test, + X_train = X_train, leaf_basis_train = leaf_basis_train, y_train = y_train, + X_test = X_test, leaf_basis_test = leaf_basis_test, num_gfr = num_gfr, num_burnin = 0, num_mcmc = 0, general_params = general_params, mean_forest_params = mean_forest_params ) @@ -309,10 +311,11 @@ bart_model_outputs <- foreach (i = 1:num_chains) %dopar% { general_params <- list(sample_sigma2_global = T, random_seed = random_seed) mean_forest_params <- list(sample_sigma2_leaf = T, num_trees = num_trees) bart_model <- stochtree::bart( - X_train = X_train, W_train = W_train, y_train = y_train, X_test = X_test, W_test = W_test, + X_train = X_train, leaf_basis_train = leaf_basis_train, y_train = y_train, + X_test = X_test, leaf_basis_test = leaf_basis_test, num_gfr = 0, num_burnin = num_burnin, num_mcmc = num_mcmc, general_params = general_params, mean_forest_params = mean_forest_params, - previous_model_json = xbart_model_string, warmstart_sample_num = num_gfr - i + 1, + previous_model_json = xbart_model_string, previous_model_warmstart_sample_num = num_gfr - i + 1, ) bart_model_string <- stochtree::saveBARTModelToJsonString(bart_model) y_hat_test <- bart_model$y_hat_test @@ -342,7 +345,7 @@ combined_bart <- createBARTModelFromCombinedJsonString(bart_model_strings) We can predict from this combined forest as follows ```{r} -yhat_combined <- predict(combined_bart, X_test, W_test)$y_hat +yhat_combined <- predict(combined_bart, X_test, leaf_basis_test)$y_hat ``` Compare average predictions from each chain to the original predictions. diff --git a/vignettes/TreeInspection.Rmd b/vignettes/TreeInspection.Rmd index 66140d67..9b47ff7c 100644 --- a/vignettes/TreeInspection.Rmd +++ b/vignettes/TreeInspection.Rmd @@ -52,8 +52,6 @@ test_inds <- sort(sample(1:n, n_test, replace = FALSE)) train_inds <- (1:n)[!((1:n) %in% test_inds)] X_test <- as.data.frame(X[test_inds,]) X_train <- as.data.frame(X[train_inds,]) -W_test <- NULL -W_train <- NULL y_test <- y[test_inds] y_train <- y[train_inds] ``` From 23f7eee5dac720f7408a1cebb988314c7358f559 Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Fri, 24 Jan 2025 01:02:16 -0500 Subject: [PATCH 06/24] Updated parameter names in examples and vignettes and added to inst/COPYRIGHTS --- R/bart.R | 2 +- R/bcf.R | 16 +- inst/COPYRIGHTS | 1133 ++++++++++++++++++- man/convertBCFModelToJson.Rd | 2 +- man/createBCFModelFromCombinedJsonString.Rd | 2 +- man/createBCFModelFromJson.Rd | 2 +- man/createBCFModelFromJsonFile.Rd | 2 +- man/createBCFModelFromJsonString.Rd | 2 +- man/getRandomEffectSamples.bcf.Rd | 2 +- man/predict.bartmodel.Rd | 4 +- man/saveBCFModelToJsonFile.Rd | 2 +- man/saveBCFModelToJsonString.Rd | 2 +- vignettes/BayesianSupervisedLearning.Rmd | 2 +- vignettes/CausalInference.Rmd | 2 +- vignettes/ModelSerialization.Rmd | 2 +- 15 files changed, 1153 insertions(+), 24 deletions(-) diff --git a/R/bart.R b/R/bart.R index 02c77b2d..a22fab35 100644 --- a/R/bart.R +++ b/R/bart.R @@ -913,7 +913,7 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train #' @param rfx_group_ids (Optional) Test set group labels used for an additive random effects model. #' We do not currently support (but plan to in the near future), test set evaluation for group labels #' that were not in the training set. -#' @param rfx_basis_tes (Optional) Test set basis for "random-slope" regression in additive random effects model. +#' @param rfx_basis (Optional) Test set basis for "random-slope" regression in additive random effects model. #' @param ... (Optional) Other prediction parameters. #' #' @return List of prediction matrices. If model does not have random effects, the list has one element -- the predictions from the forest. diff --git a/R/bcf.R b/R/bcf.R index 621b1fd9..b38b86aa 100644 --- a/R/bcf.R +++ b/R/bcf.R @@ -1502,7 +1502,7 @@ predict.bcf <- function(object, X, Z, propensity = NULL, rfx_group_ids = NULL, r #' rfx_group_ids <- rep(c(1,2), n %/% 2) #' rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) #' rfx_basis <- cbind(1, runif(n, -1, 1)) -#' rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) +#' rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) #' y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) #' X <- as.data.frame(X) #' X$x4 <- factor(X$x4, ordered = TRUE) @@ -1591,7 +1591,7 @@ getRandomEffectSamples.bcf <- function(object, ...){ #' rfx_group_ids <- rep(c(1,2), n %/% 2) #' rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) #' rfx_basis <- cbind(1, runif(n, -1, 1)) -#' rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) +#' rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) #' y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) #' X <- as.data.frame(X) #' X$x4 <- factor(X$x4, ordered = TRUE) @@ -1750,7 +1750,7 @@ convertBCFModelToJson <- function(object){ #' rfx_group_ids <- rep(c(1,2), n %/% 2) #' rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) #' rfx_basis <- cbind(1, runif(n, -1, 1)) -#' rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) +#' rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) #' y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) #' X <- as.data.frame(X) #' X$x4 <- factor(X$x4, ordered = TRUE) @@ -1826,7 +1826,7 @@ saveBCFModelToJsonFile <- function(object, filename){ #' rfx_group_ids <- rep(c(1,2), n %/% 2) #' rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) #' rfx_basis <- cbind(1, runif(n, -1, 1)) -#' rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) +#' rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) #' y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) #' X <- as.data.frame(X) #' X$x4 <- factor(X$x4, ordered = TRUE) @@ -1904,7 +1904,7 @@ saveBCFModelToJsonString <- function(object){ #' rfx_group_ids <- rep(c(1,2), n %/% 2) #' rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) #' rfx_basis <- cbind(1, runif(n, -1, 1)) -#' rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) +#' rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) #' y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) #' X <- as.data.frame(X) #' X$x4 <- factor(X$x4, ordered = TRUE) @@ -2067,7 +2067,7 @@ createBCFModelFromJson <- function(json_object){ #' rfx_group_ids <- rep(c(1,2), n %/% 2) #' rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) #' rfx_basis <- cbind(1, runif(n, -1, 1)) -#' rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) +#' rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) #' y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) #' X <- as.data.frame(X) #' X$x4 <- factor(X$x4, ordered = TRUE) @@ -2148,7 +2148,7 @@ createBCFModelFromJsonFile <- function(json_filename){ #' rfx_group_ids <- rep(c(1,2), n %/% 2) #' rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) #' rfx_basis <- cbind(1, runif(n, -1, 1)) -#' rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) +#' rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) #' y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) #' X <- as.data.frame(X) #' X$x4 <- factor(X$x4, ordered = TRUE) @@ -2226,7 +2226,7 @@ createBCFModelFromJsonString <- function(json_string){ #' rfx_group_ids <- rep(c(1,2), n %/% 2) #' rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) #' rfx_basis <- cbind(1, runif(n, -1, 1)) -#' rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) +#' rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) #' y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) #' X <- as.data.frame(X) #' X$x4 <- factor(X$x4, ordered = TRUE) diff --git a/inst/COPYRIGHTS b/inst/COPYRIGHTS index 40b8dc54..6d246808 100644 --- a/inst/COPYRIGHTS +++ b/inst/COPYRIGHTS @@ -7,5 +7,1134 @@ This project includes software from the xgboost project (Apache, 2.0). This project includes software from the LightGBM project (MIT). * Copyright (c) 2016 Microsoft Corporation -This project includes software from the scikit-learn project (BSD, 3-clause). -* Copyright (c) 2007-2024 The scikit-learn developers \ No newline at end of file +This project includes software from the fast_double_parser project (Apache, 2.0). +* Copyright (c) Daniel Lemire + +This project includes software from the Eigen project (MPL, 2.0), whose headers carry the following copyrights: +File: Eigen/Core +Copyright (C) 2008 Gael Guennebaud +Copyright (C) 2007-2011 Benoit Jacob + +File: Eigen/SparseCholesky +Copyright (C) 2008-2013 Gael Guennebaud + +File: Eigen/SparseLU +Copyright (C) 2012 Désiré Nuentsa-Wakam +Copyright (C) 2012 Gael Guennebaud + +File: Eigen/StdDeque +Copyright (C) 2009 Gael Guennebaud +Copyright (C) 2009 Hauke Heibel + +File: Eigen/StdList +Copyright (C) 2009 Hauke Heibel + +File: Eigen/StdVector +Copyright (C) 2009 Gael Guennebaud +Copyright (C) 2009 Hauke Heibel + +File: Eigen/src/Cholesky/LDLT.h +Copyright (C) 2008-2011 Gael Guennebaud +Copyright (C) 2009 Keir Mierle +Copyright (C) 2009 Benoit Jacob +Copyright (C) 2011 Timothy E. Holy + +File: Eigen/src/Cholesky/LLT_LAPACKE.h +Copyright (c) 2011, Intel Corporation. All rights reserved. + +File: Eigen/src/Cholesky/LLT.h +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/CholmodSupport/CholmodSupport.h +Copyright (C) 2008-2010 Gael Guennebaud + +File: Eigen/src/Core/ArithmeticSequence.h +Copyright (C) 2017 Gael Guennebaud + +File: Eigen/src/Core/Array.h +Copyright (C) 2009 Gael Guennebaud + +File: Eigen/src/Core/ArrayBase.h +Copyright (C) 2009 Gael Guennebaud + +File: Eigen/src/Core/ArrayWrapper.h +Copyright (C) 2009-2010 Gael Guennebaud + +File: Eigen/src/Core/Assign_MKL.h +Copyright (c) 2011, Intel Corporation. All rights reserved. +Copyright (C) 2015 Gael Guennebaud + +File: Eigen/src/Core/Assign.h +Copyright (C) 2007 Michael Olbrich +Copyright (C) 2006-2010 Benoit Jacob +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/Core/AssignEvaluator.h +Copyright (C) 2011 Benoit Jacob +Copyright (C) 2011-2014 Gael Guennebaud +Copyright (C) 2011-2012 Jitse Niesen + +File: Eigen/src/Core/BandMatrix.h +Copyright (C) 2009 Gael Guennebaud + +File: Eigen/src/Core/Block.h +Copyright (C) 2008 Gael Guennebaud +Copyright (C) 2006-2010 Benoit Jacob + +File: Eigen/src/Core/BooleanRedux.h +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/Core/CommaInitializer.h +Copyright (C) 2008 Gael Guennebaud +Copyright (C) 2006-2008 Benoit Jacob + +File: Eigen/src/Core/ConditionEstimator.h +Copyright (C) 2016 Rasmus Munk Larsen (rmlarsen@google.com) + +File: Eigen/src/Core/CoreEvaluators.h +Copyright (C) 2011 Benoit Jacob +Copyright (C) 2011-2014 Gael Guennebaud +Copyright (C) 2011-2012 Jitse Niesen + +File: Eigen/src/Core/CoreIterators.h +Copyright (C) 2008-2014 Gael Guennebaud + +File: Eigen/src/Core/CwiseBinaryOp.h +Copyright (C) 2008-2014 Gael Guennebaud +Copyright (C) 2006-2008 Benoit Jacob + +File: Eigen/src/Core/CwiseNullaryOp.h +Copyright (C) 2008-2010 Gael Guennebaud + +File: Eigen/src/Core/CwiseTernaryOp.h +Copyright (C) 2008-2014 Gael Guennebaud +Copyright (C) 2006-2008 Benoit Jacob +Copyright (C) 2016 Eugene Brevdo + +File: Eigen/src/Core/CwiseUnaryOp.h +Copyright (C) 2008-2014 Gael Guennebaud +Copyright (C) 2006-2008 Benoit Jacob + +File: Eigen/src/Core/CwiseUnaryView.h +Copyright (C) 2009-2010 Gael Guennebaud + +File: Eigen/src/Core/DenseBase.h +Copyright (C) 2007-2010 Benoit Jacob +Copyright (C) 2008-2010 Gael Guennebaud + +File: Eigen/src/Core/DenseCoeffsBase.h +Copyright (C) 2006-2010 Benoit Jacob + +File: Eigen/src/Core/DenseStorage.h +Copyright (C) 2008 Gael Guennebaud +Copyright (C) 2006-2009 Benoit Jacob +Copyright (C) 2010-2013 Hauke Heibel + +File: Eigen/src/Core/Diagonal.h +Copyright (C) 2007-2009 Benoit Jacob +Copyright (C) 2009-2010 Gael Guennebaud + +File: Eigen/src/Core/DiagonalMatrix.h +Copyright (C) 2009 Gael Guennebaud +Copyright (C) 2007-2009 Benoit Jacob + +File: Eigen/src/Core/DiagonalProduct.h +Copyright (C) 2008 Gael Guennebaud +Copyright (C) 2007-2009 Benoit Jacob + +File: Eigen/src/Core/Dot.h +Copyright (C) 2006-2008, 2010 Benoit Jacob + +File: Eigen/src/Core/EigenBase.h +Copyright (C) 2009 Benoit Jacob +Copyright (C) 2009 Gael Guennebaud + +File: Eigen/src/Core/ForceAlignedAccess.h +Copyright (C) 2009-2010 Gael Guennebaud + +File: Eigen/src/Core/Fuzzy.h +Copyright (C) 2006-2008 Benoit Jacob +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/Core/GeneralProduct.h +Copyright (C) 2006-2008 Benoit Jacob +Copyright (C) 2008-2011 Gael Guennebaud + +File: Eigen/src/Core/GenericPacketMath.h +Copyright (C) 2008 Gael Guennebaud +Copyright (C) 2006-2008 Benoit Jacob + +File: Eigen/src/Core/GlobalFunctions.h +Copyright (C) 2010-2016 Gael Guennebaud +Copyright (C) 2010 Benoit Jacob + +File: Eigen/src/Core/IndexedView.h +Copyright (C) 2017 Gael Guennebaud + +File: Eigen/src/Core/Inverse.h +Copyright (C) 2014-2019 Gael Guennebaud + +File: Eigen/src/Core/IO.h +Copyright (C) 2006-2008 Benoit Jacob +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/Core/Map.h +Copyright (C) 2007-2010 Benoit Jacob +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/Core/MapBase.h +Copyright (C) 2007-2010 Benoit Jacob +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/Core/MathFunctions.h +Copyright (C) 2006-2010 Benoit Jacob +Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + +File: Eigen/src/Core/MathFunctionsImpl.h +Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com) +Copyright (C) 2016 Gael Guennebaud + +File: Eigen/src/Core/Matrix.h +Copyright (C) 2006-2010 Benoit Jacob +Copyright (C) 2008-2009 Gael Guennebaud + +File: Eigen/src/Core/MatrixBase.h +Copyright (C) 2006-2009 Benoit Jacob +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/Core/NestByValue.h +Copyright (C) 2008 Gael Guennebaud +Copyright (C) 2006-2008 Benoit Jacob + +File: Eigen/src/Core/NoAlias.h +Copyright (C) 2009 Gael Guennebaud + +File: Eigen/src/Core/NumTraits.h +Copyright (C) 2006-2010 Benoit Jacob + +File: Eigen/src/Core/PartialReduxEvaluator.h +Copyright (C) 2011-2018 Gael Guennebaud + +File: Eigen/src/Core/PermutationMatrix.h +Copyright (C) 2009 Benoit Jacob +Copyright (C) 2009-2015 Gael Guennebaud + +File: Eigen/src/Core/PlainObjectBase.h +Copyright (C) 2008-2009 Gael Guennebaud +Copyright (C) 2006-2008 Benoit Jacob + +File: Eigen/src/Core/Product.h +Copyright (C) 2008-2011 Gael Guennebaud + +File: Eigen/src/Core/ProductEvaluators.h +Copyright (C) 2006-2008 Benoit Jacob +Copyright (C) 2008-2010 Gael Guennebaud +Copyright (C) 2011 Jitse Niesen + +File: Eigen/src/Core/Random.h +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/Core/Redux.h +Copyright (C) 2008 Gael Guennebaud +Copyright (C) 2006-2008 Benoit Jacob + +File: Eigen/src/Core/Ref.h +Copyright (C) 2012 Gael Guennebaud + +File: Eigen/src/Core/Replicate.h +Copyright (C) 2009-2010 Gael Guennebaud + +File: Eigen/src/Core/Reshaped.h +Copyright (C) 2008-2017 Gael Guennebaud +Copyright (C) 2014 yoco + +File: Eigen/src/Core/ReturnByValue.h +Copyright (C) 2009-2010 Gael Guennebaud +Copyright (C) 2009-2010 Benoit Jacob + +File: Eigen/src/Core/Reverse.h +Copyright (C) 2006-2008 Benoit Jacob +Copyright (C) 2009 Ricard Marxer +Copyright (C) 2009-2010 Gael Guennebaud + +File: Eigen/src/Core/Select.h +Copyright (C) 2008-2010 Gael Guennebaud + +File: Eigen/src/Core/SelfAdjointView.h +Copyright (C) 2009 Gael Guennebaud + +File: Eigen/src/Core/SelfCwiseBinaryOp.h +Copyright (C) 2009-2010 Gael Guennebaud + +File: Eigen/src/Core/Solve.h +Copyright (C) 2014 Gael Guennebaud + +File: Eigen/src/Core/SolverBase.h +Copyright (C) 2015 Gael Guennebaud + +File: Eigen/src/Core/SolveTriangular.h +Copyright (C) 2008-2009 Gael Guennebaud + +File: Eigen/src/Core/StableNorm.h +Copyright (C) 2009 Gael Guennebaud + +File: Eigen/src/Core/StlIterators.h +Copyright (C) 2018 Gael Guennebaud + +File: Eigen/src/Core/Stride.h +Copyright (C) 2010 Benoit Jacob + +File: Eigen/src/Core/Swap.h +Copyright (C) 2006-2008 Benoit Jacob + +File: Eigen/src/Core/Transpose.h +Copyright (C) 2006-2008 Benoit Jacob +Copyright (C) 2009-2014 Gael Guennebaud + +File: Eigen/src/Core/Transpositions.h +Copyright (C) 2010-2011 Gael Guennebaud + +File: Eigen/src/Core/TriangularMatrix.h +Copyright (C) 2008 Benoit Jacob +Copyright (C) 2008-2009 Gael Guennebaud + +File: Eigen/src/Core/VectorBlock.h +Copyright (C) 2008-2010 Gael Guennebaud +Copyright (C) 2006-2008 Benoit Jacob + +File: Eigen/src/Core/VectorwiseOp.h +Copyright (C) 2008-2019 Gael Guennebaud +Copyright (C) 2006-2008 Benoit Jacob + +File: Eigen/src/Core/Visitor.h +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/Core/arch/AltiVec/Complex.h +Copyright (C) 2010 Gael Guennebaud +Copyright (C) 2010-2016 Konstantinos Margaritis + +File: Eigen/src/Core/arch/AltiVec/MathFunctions.h +Copyright (C) 2007 Julien Pommier +Copyright (C) 2009 Gael Guennebaud +Copyright (C) 2016 Konstantinos Margaritis + +File: Eigen/src/Core/arch/AltiVec/MatrixProduct.h +Copyright (C) 2020 Everton Constantino (everton.constantino@ibm.com) +Copyright (C) 2021 Chip Kerchner (chip.kerchner@ibm.com) + +File: Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +Copyright (C) 2020 Everton Constantino (everton.constantino@ibm.com) +Copyright (C) 2021 Chip Kerchner (chip.kerchner@ibm.com) + +File: Eigen/src/Core/arch/AltiVec/PacketMath.h +Copyright (C) 2008-2016 Konstantinos Margaritis + +File: Eigen/src/Core/arch/AVX/Complex.h +Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com) + +File: Eigen/src/Core/arch/AVX/MathFunctions.h +Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com) + +File: Eigen/src/Core/arch/AVX/PacketMath.h +Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com) + +File: Eigen/src/Core/arch/AVX/TypeCasting.h +Copyright (C) 2015 Benoit Steiner + +File: Eigen/src/Core/arch/AVX512/Complex.h +Copyright (C) 2018 Gael Guennebaud + +File: Eigen/src/Core/arch/AVX512/MathFunctions.h +Copyright (C) 2016 Pedro Gonnet (pedro.gonnet@gmail.com) + +File: Eigen/src/Core/arch/AVX512/PacketMath.h +Copyright (C) 2016 Benoit Steiner (benoit.steiner.goog@gmail.com) + +File: Eigen/src/Core/arch/AVX512/TypeCasting.h +Copyright (C) 2019 Rasmus Munk Larsen + +File: Eigen/src/Core/arch/CUDA/Complex.h +Copyright (C) 2014 Benoit Steiner +Copyright (C) 2021 C. Antonio Sanchez + +File: Eigen/src/Core/arch/Default/BFloat16.h +Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +File: Eigen/src/Core/arch/Default/ConjHelper.h +Copyright (C) 2017 Gael Guennebaud + +File: Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +Copyright (C) 2007 Julien Pommier +Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com) +Copyright (C) 2009-2019 Gael Guennebaud + +File: Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +Copyright (C) 2019 Gael Guennebaud + +File: Eigen/src/Core/arch/Default/Half.h +Copyright (c) Fabian Giesen, 2016. + +File: Eigen/src/Core/arch/Default/Half.h +Copyright (c) Fabian Giesen, 2016 + +File: Eigen/src/Core/arch/Default/Settings.h +Copyright (C) 2008-2010 Gael Guennebaud +Copyright (C) 2006-2008 Benoit Jacob + +File: Eigen/src/Core/arch/Default/TypeCasting.h +Copyright (C) 2016 Benoit Steiner +Copyright (C) 2019 Rasmus Munk Larsen + +File: Eigen/src/Core/arch/GPU/MathFunctions.h +Copyright (C) 2014 Benoit Steiner + +File: Eigen/src/Core/arch/GPU/PacketMath.h +Copyright (C) 2014 Benoit Steiner + +File: Eigen/src/Core/arch/GPU/TypeCasting.h +Copyright (C) 2016 Benoit Steiner + +File: Eigen/src/Core/arch/MSA/Complex.h +Copyright (C) 2018 Wave Computing, Inc. + +File: Eigen/src/Core/arch/MSA/MathFunctions.h +Copyright (C) 2007 Julien Pommier +Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com) +Copyright (C) 2016 Gael Guennebaud +File: Eigen/src/Core/arch/MSA/MathFunctions.h +Copyright (C) 2018 Wave Computing, Inc. + +File: Eigen/src/Core/arch/MSA/PacketMath.h +Copyright (C) 2018 Wave Computing, Inc. + +File: Eigen/src/Core/arch/NEON/Complex.h +Copyright (C) 2010 Gael Guennebaud +Copyright (C) 2010 Konstantinos Margaritis + +File: Eigen/src/Core/arch/NEON/PacketMath.h +Copyright (C) 2008-2009 Gael Guennebaud +Copyright (C) 2010 Konstantinos Margaritis + +File: Eigen/src/Core/arch/NEON/TypeCasting.h +Copyright (C) 2018 Rasmus Munk Larsen +Copyright (C) 2020 Antonio Sanchez + +File: Eigen/src/Core/arch/SSE/Complex.h +Copyright (C) 2010 Gael Guennebaud + +File: Eigen/src/Core/arch/SSE/MathFunctions.h +Copyright (C) 2007 Julien Pommier +Copyright (C) 2009 Gael Guennebaud + +File: Eigen/src/Core/arch/SSE/PacketMath.h +Copyright (C) 2008-2009 Gael Guennebaud + +File: Eigen/src/Core/arch/SSE/TypeCasting.h +Copyright (C) 2015 Benoit Steiner + +File: Eigen/src/Core/arch/SVE/MathFunctions.h +Copyright (C) 2020, Arm Limited and Contributors + +File: Eigen/src/Core/arch/SVE/PacketMath.h +Copyright (C) 2020, Arm Limited and Contributors + +File: Eigen/src/Core/arch/SVE/TypeCasting.h +Copyright (C) 2020, Arm Limited and Contributors + +File: Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +Copyright (C) 2017 Codeplay Software Limited + +File: Eigen/src/Core/arch/ZVector/Complex.h +Copyright (C) 2010 Gael Guennebaud +Copyright (C) 2016 Konstantinos Margaritis + +File: Eigen/src/Core/arch/ZVector/MathFunctions.h +Copyright (C) 2007 Julien Pommier +Copyright (C) 2009 Gael Guennebaud +Copyright (C) 2016 Konstantinos Margaritis + +File: Eigen/src/Core/arch/ZVector/PacketMath.h +Copyright (C) 2016 Konstantinos Margaritis + +File: Eigen/src/Core/functors/AssignmentFunctors.h +Copyright (C) 2008-2010 Gael Guennebaud + +File: Eigen/src/Core/functors/BinaryFunctors.h +Copyright (C) 2008-2010 Gael Guennebaud + +File: Eigen/src/Core/functors/NullaryFunctors.h +Copyright (C) 2008-2016 Gael Guennebaud + +File: Eigen/src/Core/functors/StlFunctors.h +Copyright (C) 2008-2010 Gael Guennebaud + +File: Eigen/src/Core/functors/TernaryFunctors.h +Copyright (C) 2016 Eugene Brevdo + +File: Eigen/src/Core/functors/UnaryFunctors.h +Copyright (C) 2008-2016 Gael Guennebaud + +File: Eigen/src/Core/products/GeneralBlockPanelKernel.h +Copyright (C) 2008-2009 Gael Guennebaud + +File: Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +Copyright (c) 2011, Intel Corporation. All rights reserved. + +File: Eigen/src/Core/products/GeneralMatrixMatrix.h +Copyright (C) 2008-2009 Gael Guennebaud + +File: Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +Copyright (c) 2011, Intel Corporation. All rights reserved. + +File: Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +Copyright (C) 2009-2010 Gael Guennebaud + +File: Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +Copyright (c) 2011, Intel Corporation. All rights reserved. + +File: Eigen/src/Core/products/GeneralMatrixVector.h +Copyright (C) 2008-2016 Gael Guennebaud + +File: Eigen/src/Core/products/Parallelizer.h +Copyright (C) 2010 Gael Guennebaud + +File: Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +Copyright (c) 2011, Intel Corporation. All rights reserved. + +File: Eigen/src/Core/products/SelfadjointMatrixMatrix.h +Copyright (C) 2009 Gael Guennebaud + +File: Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +Copyright (c) 2011, Intel Corporation. All rights reserved. + +File: Eigen/src/Core/products/SelfadjointMatrixVector.h +Copyright (C) 2008-2009 Gael Guennebaud + +File: Eigen/src/Core/products/SelfadjointProduct.h +Copyright (C) 2009 Gael Guennebaud + +File: Eigen/src/Core/products/SelfadjointRank2Update.h +Copyright (C) 2009 Gael Guennebaud + +File: Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +Copyright (c) 2011, Intel Corporation. All rights reserved. + +File: Eigen/src/Core/products/TriangularMatrixMatrix.h +Copyright (C) 2009 Gael Guennebaud + +File: Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +Copyright (c) 2011, Intel Corporation. All rights reserved. + +File: Eigen/src/Core/products/TriangularMatrixVector.h +Copyright (C) 2009 Gael Guennebaud + +File: Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +Copyright (c) 2011, Intel Corporation. All rights reserved. + +File: Eigen/src/Core/products/TriangularSolverMatrix.h +Copyright (C) 2009 Gael Guennebaud + +File: Eigen/src/Core/products/TriangularSolverVector.h +Copyright (C) 2008-2010 Gael Guennebaud + +File: Eigen/src/Core/util/BlasUtil.h +Copyright (C) 2009-2010 Gael Guennebaud + +File: Eigen/src/Core/util/ConfigureVectorization.h +Copyright (C) 2008-2018 Gael Guennebaud +Copyright (C) 2020, Arm Limited and Contributors + +File: Eigen/src/Core/util/Constants.h +Copyright (C) 2008-2015 Gael Guennebaud +Copyright (C) 2007-2009 Benoit Jacob +Copyright (C) 2020, Arm Limited and Contributors + +File: Eigen/src/Core/util/ForwardDeclarations.h +Copyright (C) 2007-2010 Benoit Jacob +Copyright (C) 2008-2009 Gael Guennebaud + +File: Eigen/src/Core/util/IndexedViewHelper.h +Copyright (C) 2017 Gael Guennebaud + +File: Eigen/src/Core/util/IntegralConstant.h +Copyright (C) 2017 Gael Guennebaud + +File: Eigen/src/Core/util/Macros.h +Copyright (C) 2008-2015 Gael Guennebaud +Copyright (C) 2006-2008 Benoit Jacob + +File: Eigen/src/Core/util/Memory.h +Copyright (C) 2008-2015 Gael Guennebaud +Copyright (C) 2008-2009 Benoit Jacob +Copyright (C) 2009 Kenneth Riddile +Copyright (C) 2010 Hauke Heibel +Copyright (C) 2010 Thomas Capricelli +Copyright (C) 2013 Pavel Holoborodko + +File: Eigen/src/Core/util/Meta.h +Copyright (C) 2008-2015 Gael Guennebaud +Copyright (C) 2006-2008 Benoit Jacob + +File: Eigen/src/Core/util/MKL_support.h +Copyright (c) 2011, Intel Corporation. All rights reserved. + +File: Eigen/src/Core/util/ReshapedHelper.h +Copyright (C) 2017 Gael Guennebaud + +File: Eigen/src/Core/util/StaticAssert.h +Copyright (C) 2008 Gael Guennebaud +Copyright (C) 2008 Benoit Jacob + +File: Eigen/src/Core/util/SymbolicIndex.h +Copyright (C) 2017 Gael Guennebaud + +File: Eigen/src/Core/util/XprHelper.h +Copyright (C) 2008 Gael Guennebaud +Copyright (C) 2006-2008 Benoit Jacob + +File: Eigen/src/Eigenvalues/ComplexEigenSolver.h +Copyright (C) 2009 Claire Maurice +Copyright (C) 2009 Gael Guennebaud +Copyright (C) 2010,2012 Jitse Niesen + +File: Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +Copyright (c) 2011, Intel Corporation. All rights reserved. + +File: Eigen/src/Eigenvalues/ComplexSchur.h +Copyright (C) 2009 Claire Maurice +Copyright (C) 2009 Gael Guennebaud +Copyright (C) 2010,2012 Jitse Niesen + +File: Eigen/src/Eigenvalues/EigenSolver.h +Copyright (C) 2008 Gael Guennebaud +Copyright (C) 2010,2012 Jitse Niesen + +File: Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +Copyright (C) 2012-2016 Gael Guennebaud +Copyright (C) 2010,2012 Jitse Niesen +Copyright (C) 2016 Tobias Wood + +File: Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +Copyright (C) 2008-2010 Gael Guennebaud +Copyright (C) 2010 Jitse Niesen + +File: Eigen/src/Eigenvalues/HessenbergDecomposition.h +Copyright (C) 2008-2009 Gael Guennebaud +Copyright (C) 2010 Jitse Niesen + +File: Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +Copyright (C) 2008 Gael Guennebaud +Copyright (C) 2010 Jitse Niesen + +File: Eigen/src/Eigenvalues/RealQZ.h +Copyright (C) 2012 Alexey Korepanov + +File: Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +Copyright (c) 2011, Intel Corporation. All rights reserved. + +File: Eigen/src/Eigenvalues/RealSchur.h +Copyright (C) 2008 Gael Guennebaud +Copyright (C) 2010,2012 Jitse Niesen + +File: Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +Copyright (c) 2011, Intel Corporation. All rights reserved. + +File: Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +Copyright (C) 2008-2010 Gael Guennebaud +Copyright (C) 2010 Jitse Niesen + +File: Eigen/src/Eigenvalues/Tridiagonalization.h +Copyright (C) 2008 Gael Guennebaud +Copyright (C) 2010 Jitse Niesen + +File: Eigen/src/Geometry/AlignedBox.h +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/Geometry/AlignedBox.h +Copyright (c) 2011-2014, Willow Garage, Inc. +Copyright (c) 2014-2015, Open Source Robotics Foundation + +File: Eigen/src/Geometry/AngleAxis.h +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/Geometry/EulerAngles.h +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/Geometry/Homogeneous.h +Copyright (C) 2009-2010 Gael Guennebaud + +File: Eigen/src/Geometry/Hyperplane.h +Copyright (C) 2008 Gael Guennebaud +Copyright (C) 2008 Benoit Jacob + +File: Eigen/src/Geometry/OrthoMethods.h +Copyright (C) 2008-2009 Gael Guennebaud +Copyright (C) 2006-2008 Benoit Jacob + +File: Eigen/src/Geometry/ParametrizedLine.h +Copyright (C) 2008 Gael Guennebaud +Copyright (C) 2008 Benoit Jacob + +File: Eigen/src/Geometry/Quaternion.h +Copyright (C) 2008-2010 Gael Guennebaud +Copyright (C) 2009 Mathieu Gautier + +File: Eigen/src/Geometry/Rotation2D.h +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/Geometry/RotationBase.h +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/Geometry/Scaling.h +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/Geometry/Transform.h +Copyright (C) 2008 Gael Guennebaud +Copyright (C) 2009 Benoit Jacob +Copyright (C) 2010 Hauke Heibel + +File: Eigen/src/Geometry/Translation.h +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/Geometry/Umeyama.h +Copyright (C) 2009 Hauke Heibel + +File: Eigen/src/Geometry/arch/Geometry_SIMD.h +Copyright (C) 2009 Rohit Garg +Copyright (C) 2009-2010 Gael Guennebaud + +File: Eigen/src/Householder/BlockHouseholder.h +Copyright (C) 2010 Vincent Lejeune +Copyright (C) 2010 Gael Guennebaud + +File: Eigen/src/Householder/Householder.h +Copyright (C) 2010 Benoit Jacob +Copyright (C) 2009 Gael Guennebaud + +File: Eigen/src/Householder/HouseholderSequence.h +Copyright (C) 2009 Gael Guennebaud +Copyright (C) 2010 Benoit Jacob + +File: Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +Copyright (C) 2011-2014 Gael Guennebaud + +File: Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +Copyright (C) 2011-2014 Gael Guennebaud +Copyright (C) 2012 Désiré Nuentsa-Wakam + +File: Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +Copyright (C) 2011-2014 Gael Guennebaud + +File: Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +Copyright (C) 2012 Désiré Nuentsa-Wakam +Copyright (C) 2015 Gael Guennebaud + +File: Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +Copyright (C) 2012 Désiré Nuentsa-Wakam +Copyright (C) 2014 Gael Guennebaud + +File: Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +Copyright (C) 2005, the Regents of the University of Minnesota + +File: Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +Copyright (C) 2011-2014 Gael Guennebaud + +File: Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +Copyright (C) 2015 Gael Guennebaud + +File: Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +Copyright (C) 2014 Gael Guennebaud + +File: Eigen/src/Jacobi/Jacobi.h +Copyright (C) 2009 Benoit Jacob +Copyright (C) 2009 Gael Guennebaud + +File: Eigen/src/KLUSupport/KLUSupport.h +Copyright (C) 2017 Kyle Macfarlan + +File: Eigen/src/LU/Determinant.h +Copyright (C) 2008 Benoit Jacob + +File: Eigen/src/LU/FullPivLU.h +Copyright (C) 2006-2009 Benoit Jacob + +File: Eigen/src/LU/InverseImpl.h +Copyright (C) 2008-2010 Benoit Jacob +Copyright (C) 2014 Gael Guennebaud + +File: Eigen/src/LU/PartialPivLU_LAPACKE.h +Copyright (c) 2011, Intel Corporation. All rights reserved. + +File: Eigen/src/LU/PartialPivLU.h +Copyright (C) 2006-2009 Benoit Jacob +Copyright (C) 2009 Gael Guennebaud + +File: Eigen/src/LU/arch/InverseSize4.h +Copyright (C) 2001 Intel Corporation +Copyright (C) 2010 Gael Guennebaud +Copyright (C) 2009 Benoit Jacob + +File: Eigen/src/LU/arch/InverseSize4.h +Copyright (c) 2001 Intel Corporation. + +File: Eigen/src/MetisSupport/MetisSupport.h +Copyright (C) 2012 Désiré Nuentsa-Wakam + +File: Eigen/src/misc/Image.h +Copyright (C) 2009 Benoit Jacob + +File: Eigen/src/misc/Kernel.h +Copyright (C) 2009 Benoit Jacob + +File: Eigen/src/misc/lapacke.h +Copyright (c) 2010, Intel Corp. + +File: Eigen/src/misc/RealSvd2x2.h +Copyright (C) 2009-2010 Benoit Jacob +Copyright (C) 2013-2016 Gael Guennebaud + +File: Eigen/src/OrderingMethods/Amd.h +Copyright (C) 2010 Gael Guennebaud + +File: Eigen/src/OrderingMethods/Amd.h +Copyright (c) 2006, Timothy A. Davis. + +File: Eigen/src/OrderingMethods/Eigen_Colamd.h +Copyright (C) 2012 Desire Nuentsa Wakam + +File: Eigen/src/OrderingMethods/Eigen_Colamd.h +Copyright (c) 1998-2003 by the University of Florida. + +File: Eigen/src/OrderingMethods/Eigen_Colamd.h +Copyright, this License, and the + +File: Eigen/src/OrderingMethods/Ordering.h +Copyright (C) 2012 Désiré Nuentsa-Wakam + +File: Eigen/src/PardisoSupport/PardisoSupport.h +Copyright (c) 2011, Intel Corporation. All rights reserved. + +File: Eigen/src/PaStiXSupport/PaStiXSupport.h +Copyright (C) 2012 Désiré Nuentsa-Wakam + +File: Eigen/src/plugins/BlockMethods.h +Copyright (C) 2008-2010 Gael Guennebaud +Copyright (C) 2006-2010 Benoit Jacob + +File: Eigen/src/plugins/CommonCwiseBinaryOps.h +Copyright (C) 2008-2016 Gael Guennebaud +Copyright (C) 2006-2008 Benoit Jacob + +File: Eigen/src/plugins/CommonCwiseUnaryOps.h +Copyright (C) 2008-2009 Gael Guennebaud +Copyright (C) 2006-2008 Benoit Jacob + +File: Eigen/src/plugins/IndexedViewMethods.h +Copyright (C) 2017 Gael Guennebaud + +File: Eigen/src/plugins/MatrixCwiseBinaryOps.h +Copyright (C) 2008-2009 Gael Guennebaud +Copyright (C) 2006-2008 Benoit Jacob + +File: Eigen/src/plugins/MatrixCwiseUnaryOps.h +Copyright (C) 2008-2009 Gael Guennebaud +Copyright (C) 2006-2008 Benoit Jacob + +File: Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +Copyright (c) 2011, Intel Corporation. All rights reserved. + +File: Eigen/src/QR/ColPivHouseholderQR.h +Copyright (C) 2008-2009 Gael Guennebaud +Copyright (C) 2009 Benoit Jacob + +File: Eigen/src/QR/CompleteOrthogonalDecomposition.h +Copyright (C) 2016 Rasmus Munk Larsen + +File: Eigen/src/QR/FullPivHouseholderQR.h +Copyright (C) 2008-2009 Gael Guennebaud +Copyright (C) 2009 Benoit Jacob + +File: Eigen/src/QR/HouseholderQR_LAPACKE.h +Copyright (c) 2011, Intel Corporation. All rights reserved. + +File: Eigen/src/QR/HouseholderQR.h +Copyright (C) 2008-2010 Gael Guennebaud +Copyright (C) 2009 Benoit Jacob +Copyright (C) 2010 Vincent Lejeune + +File: Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +Copyright (C) 2008-2012 Gael Guennebaud + +File: Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +Copyright (c) 2005 by Timothy A. Davis. All Rights Reserved. + +File: Eigen/src/SparseCholesky/SimplicialCholesky.h +Copyright (C) 2008-2012 Gael Guennebaud + +File: Eigen/src/SparseCore/AmbiVector.h +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/SparseCore/CompressedStorage.h +Copyright (C) 2008-2014 Gael Guennebaud + +File: Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +Copyright (C) 2008-2015 Gael Guennebaud + +File: Eigen/src/SparseCore/MappedSparseMatrix.h +Copyright (C) 2008-2014 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseAssign.h +Copyright (C) 2008-2014 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseBlock.h +Copyright (C) 2008-2014 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseColEtree.h +Copyright (C) 2012 Désiré Nuentsa-Wakam + +File: Eigen/src/SparseCore/SparseColEtree.h +Copyright (c) 1994 by Xerox Corporation. All rights reserved. + +File: Eigen/src/SparseCore/SparseCompressedBase.h +Copyright (C) 2015 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseCwiseBinaryOp.h +Copyright (C) 2008-2014 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseCwiseUnaryOp.h +Copyright (C) 2008-2015 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseDenseProduct.h +Copyright (C) 2008-2015 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseDiagonalProduct.h +Copyright (C) 2009-2015 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseDot.h +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseFuzzy.h +Copyright (C) 2008-2014 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseMap.h +Copyright (C) 2015 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseMatrix.h +Copyright (C) 2008-2014 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseMatrixBase.h +Copyright (C) 2008-2014 Gael Guennebaud + +File: Eigen/src/SparseCore/SparsePermutation.h +Copyright (C) 2012 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseProduct.h +Copyright (C) 2008-2015 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseRedux.h +Copyright (C) 2008-2014 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseRef.h +Copyright (C) 2015 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseSelfAdjointView.h +Copyright (C) 2009-2014 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseSolverBase.h +Copyright (C) 2014 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseSparseProductWithPruning.h +Copyright (C) 2008-2014 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseTranspose.h +Copyright (C) 2008-2015 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseTriangularView.h +Copyright (C) 2009-2015 Gael Guennebaud +Copyright (C) 2012 Désiré Nuentsa-Wakam + +File: Eigen/src/SparseCore/SparseUtil.h +Copyright (C) 2008-2014 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseVector.h +Copyright (C) 2008-2015 Gael Guennebaud + +File: Eigen/src/SparseCore/SparseView.h +Copyright (C) 2011-2014 Gael Guennebaud +Copyright (C) 2010 Daniel Lowengrub + +File: Eigen/src/SparseCore/TriangularSolver.h +Copyright (C) 2008 Gael Guennebaud + +File: Eigen/src/SparseLU/SparseLU_column_bmod.h +Copyright (C) 2012 Désiré Nuentsa-Wakam +Copyright (C) 2012 Gael Guennebaud + +File: Eigen/src/SparseLU/SparseLU_column_bmod.h +Copyright (c) 1994 by Xerox Corporation. All rights reserved. + +File: Eigen/src/SparseLU/SparseLU_column_dfs.h +Copyright (C) 2012 Désiré Nuentsa-Wakam + +File: Eigen/src/SparseLU/SparseLU_column_dfs.h +Copyright (c) 1994 by Xerox Corporation. All rights reserved. + +File: Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +Copyright (C) 2012 Désiré Nuentsa-Wakam + +File: Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +Copyright (c) 1994 by Xerox Corporation. All rights reserved. + +File: Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +Copyright (C) 2012 Désiré Nuentsa-Wakam + +File: Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +Copyright (c) 1994 by Xerox Corporation. All rights reserved. + +File: Eigen/src/SparseLU/SparseLU_kernel_bmod.h +Copyright (C) 2012 Désiré Nuentsa-Wakam +Copyright (C) 2012 Gael Guennebaud + +File: Eigen/src/SparseLU/SparseLU_Memory.h +Copyright (C) 2012 Désiré Nuentsa-Wakam + +File: Eigen/src/SparseLU/SparseLU_Memory.h +Copyright (c) 1994 by Xerox Corporation. All rights reserved. + +File: Eigen/src/SparseLU/SparseLU_panel_bmod.h +Copyright (C) 2012 Désiré Nuentsa-Wakam +Copyright (C) 2012 Gael Guennebaud + +File: Eigen/src/SparseLU/SparseLU_panel_bmod.h +Copyright (c) 1994 by Xerox Corporation. All rights reserved. + +File: Eigen/src/SparseLU/SparseLU_panel_dfs.h +Copyright (C) 2012 Désiré Nuentsa-Wakam + +File: Eigen/src/SparseLU/SparseLU_panel_dfs.h +Copyright (c) 1994 by Xerox Corporation. All rights reserved. + +File: Eigen/src/SparseLU/SparseLU_pivotL.h +Copyright (C) 2012 Désiré Nuentsa-Wakam + +File: Eigen/src/SparseLU/SparseLU_pivotL.h +Copyright (c) 1994 by Xerox Corporation. All rights reserved. + +File: Eigen/src/SparseLU/SparseLU_pruneL.h +Copyright (C) 2012 Désiré Nuentsa-Wakam + +File: Eigen/src/SparseLU/SparseLU_pruneL.h +Copyright (c) 1994 by Xerox Corporation. All rights reserved. + +File: Eigen/src/SparseLU/SparseLU_relax_snode.h +Copyright (C) 2012 Désiré Nuentsa-Wakam + +File: Eigen/src/SparseLU/SparseLU_relax_snode.h +Copyright (c) 1994 by Xerox Corporation. All rights reserved. + +File: Eigen/src/SparseLU/SparseLU_Structs.h +Copyright (C) 2012 Désiré Nuentsa-Wakam + +File: Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +Copyright (C) 2012 Désiré Nuentsa-Wakam +Copyright (C) 2012 Gael Guennebaud + +File: Eigen/src/SparseLU/SparseLU_Utils.h +Copyright (C) 2012 Désiré Nuentsa-Wakam + +File: Eigen/src/SparseLU/SparseLU.h +Copyright (C) 2012 Désiré Nuentsa-Wakam +Copyright (C) 2012-2014 Gael Guennebaud + +File: Eigen/src/SparseLU/SparseLUImpl.h +Copyright (C) 2012 Désiré Nuentsa-Wakam + +File: Eigen/src/SparseQR/SparseQR.h +Copyright (C) 2012-2013 Desire Nuentsa +Copyright (C) 2012-2014 Gael Guennebaud + +File: Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +Copyright (C) 2012 Desire Nuentsa +Copyright (C) 2014 Gael Guennebaud + +File: Eigen/src/StlSupport/details.h +Copyright (C) 2009 Gael Guennebaud +Copyright (C) 2009 Hauke Heibel + +File: Eigen/src/StlSupport/StdDeque.h +Copyright (C) 2009 Gael Guennebaud +Copyright (C) 2009 Hauke Heibel + +File: Eigen/src/StlSupport/StdList.h +Copyright (C) 2009 Hauke Heibel + +File: Eigen/src/StlSupport/StdVector.h +Copyright (C) 2009 Gael Guennebaud +Copyright (C) 2009 Hauke Heibel + +File: Eigen/src/SuperLUSupport/SuperLUSupport.h +Copyright (C) 2008-2015 Gael Guennebaud + +File: Eigen/src/SuperLUSupport/SuperLUSupport.h +Copyright (c) 1994 by Xerox Corporation. All rights reserved. + +File: Eigen/src/SVD/BDCSVD.h +Copyright (C) 2013 Gauthier Brun +Copyright (C) 2013 Nicolas Carre +Copyright (C) 2013 Jean Ceccato +Copyright (C) 2013 Pierre Zoppitelli +Copyright (C) 2013 Jitse Niesen +Copyright (C) 2014-2017 Gael Guennebaud + +File: Eigen/src/SVD/JacobiSVD_LAPACKE.h +Copyright (c) 2011, Intel Corporation. All rights reserved. + +File: Eigen/src/SVD/JacobiSVD.h +Copyright (C) 2009-2010 Benoit Jacob +Copyright (C) 2013-2014 Gael Guennebaud + +File: Eigen/src/SVD/SVDBase.h +Copyright (C) 2009-2010 Benoit Jacob +Copyright (C) 2014 Gael Guennebaud +File: Eigen/src/SVD/SVDBase.h +Copyright (C) 2013 Gauthier Brun +Copyright (C) 2013 Nicolas Carre +Copyright (C) 2013 Jean Ceccato +Copyright (C) 2013 Pierre Zoppitelli + +File: Eigen/src/SVD/UpperBidiagonalization.h +Copyright (C) 2010 Benoit Jacob +Copyright (C) 2013-2014 Gael Guennebaud + +File: Eigen/src/UmfPackSupport/UmfPackSupport.h +Copyright (C) 2008-2011 Gael Guennebaud + +This project includes software from the fmt project, released under the following license and copyright +Copyright (c) 2012 - present, Victor Zverovich and {fmt} contributors + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +--- Optional exception to the license --- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into a machine-executable object form of such +source code, you may redistribute such embedded portions in such object form +without including the above copyright and permission notices. + diff --git a/man/convertBCFModelToJson.Rd b/man/convertBCFModelToJson.Rd index 892be2a2..44880463 100644 --- a/man/convertBCFModelToJson.Rd +++ b/man/convertBCFModelToJson.Rd @@ -38,7 +38,7 @@ snr <- 3 rfx_group_ids <- rep(c(1,2), n \%/\% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) -rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) +rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) X <- as.data.frame(X) X$x4 <- factor(X$x4, ordered = TRUE) diff --git a/man/createBCFModelFromCombinedJsonString.Rd b/man/createBCFModelFromCombinedJsonString.Rd index bce0f98b..05997993 100644 --- a/man/createBCFModelFromCombinedJsonString.Rd +++ b/man/createBCFModelFromCombinedJsonString.Rd @@ -41,7 +41,7 @@ snr <- 3 rfx_group_ids <- rep(c(1,2), n \%/\% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) -rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) +rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) X <- as.data.frame(X) X$x4 <- factor(X$x4, ordered = TRUE) diff --git a/man/createBCFModelFromJson.Rd b/man/createBCFModelFromJson.Rd index ee41bc60..49af6eca 100644 --- a/man/createBCFModelFromJson.Rd +++ b/man/createBCFModelFromJson.Rd @@ -40,7 +40,7 @@ snr <- 3 rfx_group_ids <- rep(c(1,2), n \%/\% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) -rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) +rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) X <- as.data.frame(X) X$x4 <- factor(X$x4, ordered = TRUE) diff --git a/man/createBCFModelFromJsonFile.Rd b/man/createBCFModelFromJsonFile.Rd index 230d412e..2ea9fb8b 100644 --- a/man/createBCFModelFromJsonFile.Rd +++ b/man/createBCFModelFromJsonFile.Rd @@ -40,7 +40,7 @@ snr <- 3 rfx_group_ids <- rep(c(1,2), n \%/\% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) -rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) +rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) X <- as.data.frame(X) X$x4 <- factor(X$x4, ordered = TRUE) diff --git a/man/createBCFModelFromJsonString.Rd b/man/createBCFModelFromJsonString.Rd index e3cff160..8c5b10fc 100644 --- a/man/createBCFModelFromJsonString.Rd +++ b/man/createBCFModelFromJsonString.Rd @@ -40,7 +40,7 @@ snr <- 3 rfx_group_ids <- rep(c(1,2), n \%/\% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) -rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) +rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) X <- as.data.frame(X) X$x4 <- factor(X$x4, ordered = TRUE) diff --git a/man/getRandomEffectSamples.bcf.Rd b/man/getRandomEffectSamples.bcf.Rd index 55b41048..9ab74b08 100644 --- a/man/getRandomEffectSamples.bcf.Rd +++ b/man/getRandomEffectSamples.bcf.Rd @@ -42,7 +42,7 @@ snr <- 3 rfx_group_ids <- rep(c(1,2), n \%/\% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) -rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) +rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) X <- as.data.frame(X) X$x4 <- factor(X$x4, ordered = TRUE) diff --git a/man/predict.bartmodel.Rd b/man/predict.bartmodel.Rd index b5f30a9d..e894d4bb 100644 --- a/man/predict.bartmodel.Rd +++ b/man/predict.bartmodel.Rd @@ -24,9 +24,9 @@ We do not currently support (but plan to in the near future), test set evaluation for group labels that were not in the training set.} -\item{...}{(Optional) Other prediction parameters.} +\item{rfx_basis}{(Optional) Test set basis for "random-slope" regression in additive random effects model.} -\item{rfx_basis_tes}{(Optional) Test set basis for "random-slope" regression in additive random effects model.} +\item{...}{(Optional) Other prediction parameters.} } \value{ List of prediction matrices. If model does not have random effects, the list has one element -- the predictions from the forest. diff --git a/man/saveBCFModelToJsonFile.Rd b/man/saveBCFModelToJsonFile.Rd index f6dc04af..321470a5 100644 --- a/man/saveBCFModelToJsonFile.Rd +++ b/man/saveBCFModelToJsonFile.Rd @@ -40,7 +40,7 @@ snr <- 3 rfx_group_ids <- rep(c(1,2), n \%/\% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) -rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) +rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) X <- as.data.frame(X) X$x4 <- factor(X$x4, ordered = TRUE) diff --git a/man/saveBCFModelToJsonString.Rd b/man/saveBCFModelToJsonString.Rd index 4ea4e4ee..85e69b78 100644 --- a/man/saveBCFModelToJsonString.Rd +++ b/man/saveBCFModelToJsonString.Rd @@ -38,7 +38,7 @@ snr <- 3 rfx_group_ids <- rep(c(1,2), n \%/\% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) -rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) +rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) X <- as.data.frame(X) X$x4 <- factor(X$x4, ordered = TRUE) diff --git a/vignettes/BayesianSupervisedLearning.Rmd b/vignettes/BayesianSupervisedLearning.Rmd index d444ac2d..2b9337c3 100644 --- a/vignettes/BayesianSupervisedLearning.Rmd +++ b/vignettes/BayesianSupervisedLearning.Rmd @@ -241,7 +241,7 @@ f_XW <- ( ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*leaf_basis[,1]) + ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*leaf_basis[,1]) ) -rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) +rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) noise_sd <- sd(f_XW) / snr y <- f_XW + rfx_term + rnorm(n, 0, 1)*noise_sd diff --git a/vignettes/CausalInference.Rmd b/vignettes/CausalInference.Rmd index 42b520d1..e7a8ff61 100644 --- a/vignettes/CausalInference.Rmd +++ b/vignettes/CausalInference.Rmd @@ -734,7 +734,7 @@ E_XZ <- mu_x + Z*tau_x rfx_group_ids <- rep(c(1,2), n %/% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) -rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) +rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) X <- as.data.frame(X) X$x4 <- factor(X$x4, ordered = TRUE) diff --git a/vignettes/ModelSerialization.Rmd b/vignettes/ModelSerialization.Rmd index 9b98d6a8..60490671 100644 --- a/vignettes/ModelSerialization.Rmd +++ b/vignettes/ModelSerialization.Rmd @@ -61,7 +61,7 @@ E_XZ <- mu_x + Z*tau_x rfx_group_ids <- rep(c(1,2), n %/% 2) rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) rfx_basis <- cbind(1, runif(n, -1, 1)) -rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis) +rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) X <- as.data.frame(X) X$x4 <- factor(X$x4, ordered = TRUE) From d7efd5140f8111a8d98edd56d571b28d63328e3a Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Sun, 26 Jan 2025 17:45:35 -0500 Subject: [PATCH 07/24] Updated aspirational README ahead of CRAN submission --- R_README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/R_README.md b/R_README.md index ff457184..4b6a52f5 100644 --- a/R_README.md +++ b/R_README.md @@ -1,10 +1,16 @@ # stochtree R package -**NOTE**: we are in the process of refactoring this project so that the R, Python, and C++ source code sits in the [same repo](https://github.com/StochasticTree/stochtree/). +Software for building stochastic tree ensembles (i.e. BART, XBART) for supervised learning and causal inference. ## Getting started -The package can be installed in R via +`stochtree` can be installed from CRAN via + +``` +install.package("stochtree") +``` + +The development version of stochtree can be installed from github via ``` remotes::install_github("StochasticTree/stochtree", ref="r-dev") From 899bebaa1461b35b9e07fa6cec140996a66506d0 Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Tue, 28 Jan 2025 21:34:28 -0600 Subject: [PATCH 08/24] Updated docs and CRAN bootstrap script --- R/bart.R | 36 +++++++------- cran-bootstrap.R | 50 ++++++++++++++++---- man/convertBARTModelToJson.Rd | 2 +- man/createBARTModelFromCombinedJson.Rd | 4 +- man/createBARTModelFromCombinedJsonString.Rd | 4 +- man/createBARTModelFromJson.Rd | 4 +- man/createBARTModelFromJsonFile.Rd | 6 ++- man/createBARTModelFromJsonString.Rd | 10 ++-- man/saveBARTModelToJsonFile.Rd | 4 +- man/saveBARTModelToJsonString.Rd | 2 +- 10 files changed, 80 insertions(+), 42 deletions(-) diff --git a/R/bart.R b/R/bart.R index a22fab35..18b7d420 100644 --- a/R/bart.R +++ b/R/bart.R @@ -1150,7 +1150,7 @@ getRandomEffectSamples.bartmodel <- function(object, ...){ #' y_test <- y[test_inds] #' y_train <- y[train_inds] #' bart_model <- bart(X_train = X_train, y_train = y_train) -#' # bart_json <- convertBARTModelToJson(bart_model) +#' bart_json <- convertBARTModelToJson(bart_model) convertBARTModelToJson <- function(object){ jsonobj <- createCppJson() @@ -1317,7 +1317,9 @@ convertBARTStateToJson <- function(param_list, mean_forest = NULL, variance_fore #' y_test <- y[test_inds] #' y_train <- y[train_inds] #' bart_model <- bart(X_train = X_train, y_train = y_train) -#' # saveBARTModelToJsonFile(bart_model, "test.json") +#' tmpjson <- tempfile(fileext = ".json") +#' saveBARTModelToJsonFile(bart_model, file.path(tmpjson)) +#' unlink(tmpjson) saveBARTModelToJsonFile <- function(object, filename){ # Convert to Json jsonobj <- convertBARTModelToJson(object) @@ -1354,7 +1356,7 @@ saveBARTModelToJsonFile <- function(object, filename){ #' y_test <- y[test_inds] #' y_train <- y[train_inds] #' bart_model <- bart(X_train = X_train, y_train = y_train) -#' # saveBARTModelToJsonString(bart_model) +#' bart_json_string <- saveBARTModelToJsonString(bart_model) saveBARTModelToJsonString <- function(object){ # Convert to Json jsonobj <- convertBARTModelToJson(object) @@ -1393,8 +1395,8 @@ saveBARTModelToJsonString <- function(object){ #' y_test <- y[test_inds] #' y_train <- y[train_inds] #' bart_model <- bart(X_train = X_train, y_train = y_train) -#' # bart_json <- convertBARTModelToJson(bart_model) -#' # bart_model_roundtrip <- createBARTModelFromJson(bart_json) +#' bart_json <- convertBARTModelToJson(bart_model) +#' bart_model_roundtrip <- createBARTModelFromJson(bart_json) createBARTModelFromJson <- function(json_object){ # Initialize the BCF model output <- list() @@ -1507,8 +1509,10 @@ createBARTModelFromJson <- function(json_object){ #' y_test <- y[test_inds] #' y_train <- y[train_inds] #' bart_model <- bart(X_train = X_train, y_train = y_train) -#' # saveBARTModelToJsonFile(bart_model, "test.json") -#' # bart_model_roundtrip <- createBARTModelFromJsonFile("test.json") +#' tmpjson <- tempfile(fileext = ".json") +#' saveBARTModelToJsonFile(bart_model, file.path(tmpjson)) +#' bart_model_roundtrip <- createBARTModelFromJsonFile(file.path(tmpjson)) +#' unlink(tmpjson) createBARTModelFromJsonFile <- function(json_filename){ # Load a `CppJson` object from file bart_json <- createCppJsonFile(json_filename) @@ -1549,11 +1553,11 @@ createBARTModelFromJsonFile <- function(json_filename){ #' y_test <- y[test_inds] #' y_train <- y[train_inds] #' bart_model <- bart(X_train = X_train, y_train = y_train) -#' # bart_json <- saveBARTModelToJsonString(bart_model) -#' # bart_model_roundtrip <- createBARTModelFromJsonString(bart_json) -#' # y_hat_mean_roundtrip <- rowMeans(predict(bart_model_roundtrip, X_train)$y_hat) -#' # plot(rowMeans(bart_model$y_hat_train), y_hat_mean_roundtrip, -#' # xlab = "original", ylab = "roundtrip") +#' bart_json <- saveBARTModelToJsonString(bart_model) +#' bart_model_roundtrip <- createBARTModelFromJsonString(bart_json) +#' y_hat_mean_roundtrip <- rowMeans(predict(bart_model_roundtrip, X_train)$y_hat) +#' plot(rowMeans(bart_model$y_hat_train), y_hat_mean_roundtrip, +#' xlab = "original", ylab = "roundtrip") createBARTModelFromJsonString <- function(json_string){ # Load a `CppJson` object from string bart_json <- createCppJsonString(json_string) @@ -1594,8 +1598,8 @@ createBARTModelFromJsonString <- function(json_string){ #' y_test <- y[test_inds] #' y_train <- y[train_inds] #' bart_model <- bart(X_train = X_train, y_train = y_train) -#' # bart_json <- list(convertBARTModelToJson(bart_model)) -#' # bart_model_roundtrip <- createBARTModelFromCombinedJson(bart_json) +#' bart_json <- list(convertBARTModelToJson(bart_model)) +#' bart_model_roundtrip <- createBARTModelFromCombinedJson(bart_json) createBARTModelFromCombinedJson <- function(json_object_list){ # Initialize the BCF model output <- list() @@ -1739,8 +1743,8 @@ createBARTModelFromCombinedJson <- function(json_object_list){ #' y_test <- y[test_inds] #' y_train <- y[train_inds] #' bart_model <- bart(X_train = X_train, y_train = y_train) -#' # bart_json_string_list <- list(saveBARTModelToJsonString(bart_model)) -#' # bart_model_roundtrip <- createBARTModelFromCombinedJsonString(bart_json_string_list) +#' bart_json_string_list <- list(saveBARTModelToJsonString(bart_model)) +#' bart_model_roundtrip <- createBARTModelFromCombinedJsonString(bart_json_string_list) createBARTModelFromCombinedJsonString <- function(json_string_list){ # Initialize the BCF model output <- list() diff --git a/cran-bootstrap.R b/cran-bootstrap.R index 6c45e540..54b9aa7b 100644 --- a/cran-bootstrap.R +++ b/cran-bootstrap.R @@ -10,30 +10,39 @@ # which is MIT licensed with the following copyright: # Copyright (c) Microsoft Corporation # -# Includes one command line argument: +# Includes two command line arguments: # include_vignettes : 1 to include the vignettes folder in the R package subfolder -# 0 to exclude vignettes +# 0 to exclude vignettes (overriden to 1 if pkgdown_build = 1 below) +# +# pkgdown_build : 1 to include pkgdown specific files (R_Readm) +# 0 to exclude vignettes # # Run this script from the command line via # -# Explicitly include vignettes -# ---------------------------- -# Rscript cran-bootstrap.R 1 +# Explicitly include vignettes and build pkgdown site +# --------------------------------------------------- +# Rscript cran-bootstrap.R 1 1 +# +# Explicitly include vignettes but don't build pkgdown site +# --------------------------------------------------------- +# Rscript cran-bootstrap.R 1 0 # -# Explicitly exclude vignettes -# ---------------------------- -# Rscript cran-bootstrap.R 0 +# Explicitly exclude vignettes and don't build pkgdown site +# --------------------------------------------------------- +# Rscript cran-bootstrap.R 0 0 # -# Exclude vignettes by default -# ---------------------------- +# Exclude vignettes and pkgdown by default +# ---------------------------------------- # Rscript cran-bootstrap.R # Unpack command line arguments args <- commandArgs(trailingOnly = T) if (length(args) > 0){ include_vignettes <- as.logical(as.integer(args[1])) + pkgdown_build <- as.logical(as.integer(args[2])) } else{ include_vignettes <- F + pkgdown_build <- F } # Create the stochtree_cran folder @@ -64,6 +73,27 @@ if (include_vignettes) { ) } pkg_core_files_dst <- file.path(cran_dir, pkg_core_files) + +# Handle README separately (change name from R_README.md to README.md) +readme_file_src <- file.path("R_README.md") +readme_file_dst <- file.path(cran_dir, c("README.md")) +if (file.copy(readme_file_src, readme_file_dst)) { + cat("Copied R README.md to CRAN subdirectory\n") +} else { + stop("Failed to copy R README.md") +} + +# Copy _pkgdown.yml if requested +if (pkgdown_build) { + pkgdown_yml_src <- file.path("_pkgdown.yml") + pkgdown_yml_dst <- file.path(cran_dir, c("_pkgdown.yml")) + if (file.copy(pkgdown_yml_src, pkgdown_yml_dst)) { + cat("Copied _pkgdown.yml to CRAN subdirectory\n") + } else { + stop("Failed to copy _pkgdown.yml") + } +} + # Handle tests separately (move from test/R/ folder to tests/ folder) test_files_src <- list.files("test/R", recursive = TRUE, full.names = TRUE) test_files_dst <- file.path(cran_dir, gsub("test/R", "tests", test_files_src)) diff --git a/man/convertBARTModelToJson.Rd b/man/convertBARTModelToJson.Rd index de28613a..50382a06 100644 --- a/man/convertBARTModelToJson.Rd +++ b/man/convertBARTModelToJson.Rd @@ -37,5 +37,5 @@ X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train) -# bart_json <- convertBARTModelToJson(bart_model) +bart_json <- convertBARTModelToJson(bart_model) } diff --git a/man/createBARTModelFromCombinedJson.Rd b/man/createBARTModelFromCombinedJson.Rd index 72c3e675..f85e9406 100644 --- a/man/createBARTModelFromCombinedJson.Rd +++ b/man/createBARTModelFromCombinedJson.Rd @@ -39,6 +39,6 @@ X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train) -# bart_json <- list(convertBARTModelToJson(bart_model)) -# bart_model_roundtrip <- createBARTModelFromCombinedJson(bart_json) +bart_json <- list(convertBARTModelToJson(bart_model)) +bart_model_roundtrip <- createBARTModelFromCombinedJson(bart_json) } diff --git a/man/createBARTModelFromCombinedJsonString.Rd b/man/createBARTModelFromCombinedJsonString.Rd index 99c248b7..1ae404b3 100644 --- a/man/createBARTModelFromCombinedJsonString.Rd +++ b/man/createBARTModelFromCombinedJsonString.Rd @@ -39,6 +39,6 @@ X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train) -# bart_json_string_list <- list(saveBARTModelToJsonString(bart_model)) -# bart_model_roundtrip <- createBARTModelFromCombinedJsonString(bart_json_string_list) +bart_json_string_list <- list(saveBARTModelToJsonString(bart_model)) +bart_model_roundtrip <- createBARTModelFromCombinedJsonString(bart_json_string_list) } diff --git a/man/createBARTModelFromJson.Rd b/man/createBARTModelFromJson.Rd index 0ebea7ee..5dcee7eb 100644 --- a/man/createBARTModelFromJson.Rd +++ b/man/createBARTModelFromJson.Rd @@ -39,6 +39,6 @@ X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train) -# bart_json <- convertBARTModelToJson(bart_model) -# bart_model_roundtrip <- createBARTModelFromJson(bart_json) +bart_json <- convertBARTModelToJson(bart_model) +bart_model_roundtrip <- createBARTModelFromJson(bart_json) } diff --git a/man/createBARTModelFromJsonFile.Rd b/man/createBARTModelFromJsonFile.Rd index e776bb6f..5ed802cb 100644 --- a/man/createBARTModelFromJsonFile.Rd +++ b/man/createBARTModelFromJsonFile.Rd @@ -39,6 +39,8 @@ X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train) -# saveBARTModelToJsonFile(bart_model, "test.json") -# bart_model_roundtrip <- createBARTModelFromJsonFile("test.json") +tmpjson <- tempfile(fileext = ".json") +saveBARTModelToJsonFile(bart_model, file.path(tmpjson)) +bart_model_roundtrip <- createBARTModelFromJsonFile(file.path(tmpjson)) +unlink(tmpjson) } diff --git a/man/createBARTModelFromJsonString.Rd b/man/createBARTModelFromJsonString.Rd index 735fb48f..50651393 100644 --- a/man/createBARTModelFromJsonString.Rd +++ b/man/createBARTModelFromJsonString.Rd @@ -39,9 +39,9 @@ X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train) -# bart_json <- saveBARTModelToJsonString(bart_model) -# bart_model_roundtrip <- createBARTModelFromJsonString(bart_json) -# y_hat_mean_roundtrip <- rowMeans(predict(bart_model_roundtrip, X_train)$y_hat) -# plot(rowMeans(bart_model$y_hat_train), y_hat_mean_roundtrip, -# xlab = "original", ylab = "roundtrip") +bart_json <- saveBARTModelToJsonString(bart_model) +bart_model_roundtrip <- createBARTModelFromJsonString(bart_json) +y_hat_mean_roundtrip <- rowMeans(predict(bart_model_roundtrip, X_train)$y_hat) +plot(rowMeans(bart_model$y_hat_train), y_hat_mean_roundtrip, + xlab = "original", ylab = "roundtrip") } diff --git a/man/saveBARTModelToJsonFile.Rd b/man/saveBARTModelToJsonFile.Rd index 29763e81..869caa44 100644 --- a/man/saveBARTModelToJsonFile.Rd +++ b/man/saveBARTModelToJsonFile.Rd @@ -36,5 +36,7 @@ X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train) -# saveBARTModelToJsonFile(bart_model, "test.json") +tmpjson <- tempfile(fileext = ".json") +saveBARTModelToJsonFile(bart_model, file.path(tmpjson)) +unlink(tmpjson) } diff --git a/man/saveBARTModelToJsonString.Rd b/man/saveBARTModelToJsonString.Rd index 7159598e..ec954f7b 100644 --- a/man/saveBARTModelToJsonString.Rd +++ b/man/saveBARTModelToJsonString.Rd @@ -37,5 +37,5 @@ X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train) -# saveBARTModelToJsonString(bart_model) +bart_json_string <- saveBARTModelToJsonString(bart_model) } From 93d93c2eb4c1517cbc2f188b40f75f975db2c140 Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Tue, 28 Jan 2025 22:31:56 -0600 Subject: [PATCH 09/24] Updated R docstrings --- R/bart.R | 10 +++++----- R/bcf.R | 24 ++++++++++++------------ cran-bootstrap.R | 10 ++++++++++ man/bart.Rd | 4 ++-- man/bcf.Rd | 12 ++++++------ man/predict.bartmodel.Rd | 6 +++--- man/predict.bcf.Rd | 12 ++++++------ 7 files changed, 44 insertions(+), 34 deletions(-) diff --git a/R/bart.R b/R/bart.R index 18b7d420..1b56070a 100644 --- a/R/bart.R +++ b/R/bart.R @@ -98,8 +98,8 @@ #' y_test <- y[test_inds] #' y_train <- y[train_inds] #' bart_model <- bart(X_train = X_train, y_train = y_train, X_test = X_test) -#' # plot(rowMeans(bart_model$y_hat_test), y_test, xlab = "predicted", ylab = "actual") -#' # abline(0,1,col="red",lty=3,lwd=3) +#' plot(rowMeans(bart_model$y_hat_test), y_test, xlab = "predicted", ylab = "actual") +#' abline(0,1,col="red",lty=3,lwd=3) bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train = NULL, rfx_basis_train = NULL, X_test = NULL, leaf_basis_test = NULL, rfx_group_ids_test = NULL, rfx_basis_test = NULL, @@ -942,9 +942,9 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train #' y_test <- y[test_inds] #' y_train <- y[train_inds] #' bart_model <- bart(X_train = X_train, y_train = y_train) -#' y_hat_test <- predict(bart_model, X_test) -#' # plot(rowMeans(y_hat_test), y_test, xlab = "predicted", ylab = "actual") -#' # abline(0,1,col="red",lty=3,lwd=3) +#' y_hat_test <- predict(bart_model, X_test)$y_hat +#' plot(rowMeans(y_hat_test), y_test, xlab = "predicted", ylab = "actual") +#' abline(0,1,col="red",lty=3,lwd=3) predict.bartmodel <- function(object, X, leaf_basis = NULL, rfx_group_ids = NULL, rfx_basis = NULL, ...){ # Preprocess covariates if ((!is.data.frame(X)) && (!is.matrix(X))) { diff --git a/R/bcf.R b/R/bcf.R index b38b86aa..856cbf8b 100644 --- a/R/bcf.R +++ b/R/bcf.R @@ -138,12 +138,12 @@ #' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, #' propensity_train = pi_train, X_test = X_test, Z_test = Z_test, #' propensity_test = pi_test) -#' # plot(rowMeans(bcf_model$mu_hat_test), mu_test, xlab = "predicted", -#' # ylab = "actual", main = "Prognostic function") -#' # abline(0,1,col="red",lty=3,lwd=3) -#' # plot(rowMeans(bcf_model$tau_hat_test), tau_test, xlab = "predicted", -#' # ylab = "actual", main = "Treatment effect") -#' # abline(0,1,col="red",lty=3,lwd=3) +#' plot(rowMeans(bcf_model$mu_hat_test), mu_test, xlab = "predicted", +#' ylab = "actual", main = "Prognostic function") +#' abline(0,1,col="red",lty=3,lwd=3) +#' plot(rowMeans(bcf_model$tau_hat_test), tau_test, xlab = "predicted", +#' ylab = "actual", main = "Treatment effect") +#' abline(0,1,col="red",lty=3,lwd=3) bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_ids_train = NULL, rfx_basis_train = NULL, X_test = NULL, Z_test = NULL, propensity_test = NULL, rfx_group_ids_test = NULL, rfx_basis_test = NULL, @@ -1347,12 +1347,12 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id #' tau_train <- tau_x[train_inds] #' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train) #' preds <- predict(bcf_model, X_test, Z_test, pi_test) -#' # plot(rowMeans(preds$mu_hat), mu_test, xlab = "predicted", -#' # ylab = "actual", main = "Prognostic function") -#' # abline(0,1,col="red",lty=3,lwd=3) -#' # plot(rowMeans(preds$tau_hat), tau_test, xlab = "predicted", -#' # ylab = "actual", main = "Treatment effect") -#' # abline(0,1,col="red",lty=3,lwd=3) +#' plot(rowMeans(preds$mu_hat), mu_test, xlab = "predicted", +#' ylab = "actual", main = "Prognostic function") +#' abline(0,1,col="red",lty=3,lwd=3) +#' plot(rowMeans(preds$tau_hat), tau_test, xlab = "predicted", +#' ylab = "actual", main = "Treatment effect") +#' abline(0,1,col="red",lty=3,lwd=3) predict.bcf <- function(object, X, Z, propensity = NULL, rfx_group_ids = NULL, rfx_basis = NULL, ...){ # Preprocess covariates if ((!is.data.frame(X)) && (!is.matrix(X))) { diff --git a/cran-bootstrap.R b/cran-bootstrap.R index 54b9aa7b..3a47f19f 100644 --- a/cran-bootstrap.R +++ b/cran-bootstrap.R @@ -142,6 +142,16 @@ if (!include_vignettes) { writeLines(description_lines, cran_description) } +# Remove vignettes from _pkgdown.yml if no vignettes +if ((!include_vignettes) & (pkgdown_build)) { + pkgdown_yml <- file.path(cran_dir, "_pkgdown.yml") + pkgdown_yml_lines <- readLines(pkgdown_yml) + articles_begin <- grep("articles:", pkgdown_yml_lines) + articles_end <- length(pkgdown_yml_lines) + pkgdown_yml_lines <- pkgdown_yml_lines[-(articles_begin:articles_end)] + writeLines(pkgdown_yml_lines, pkgdown_yml) +} + # Copy fast_double_parser header to an include/ subdirectory of src/ header_folders <- c("nlohmann", "stochtree") header_files_to_vendor_src <- c() diff --git a/man/bart.Rd b/man/bart.Rd index 98dfd904..3cf6f211 100644 --- a/man/bart.Rd +++ b/man/bart.Rd @@ -143,6 +143,6 @@ X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train, X_test = X_test) -# plot(rowMeans(bart_model$y_hat_test), y_test, xlab = "predicted", ylab = "actual") -# abline(0,1,col="red",lty=3,lwd=3) +plot(rowMeans(bart_model$y_hat_test), y_test, xlab = "predicted", ylab = "actual") +abline(0,1,col="red",lty=3,lwd=3) } diff --git a/man/bcf.Rd b/man/bcf.Rd index c5b15e1f..2fcb3993 100644 --- a/man/bcf.Rd +++ b/man/bcf.Rd @@ -189,10 +189,10 @@ tau_train <- tau_x[train_inds] bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train, X_test = X_test, Z_test = Z_test, propensity_test = pi_test) -# plot(rowMeans(bcf_model$mu_hat_test), mu_test, xlab = "predicted", -# ylab = "actual", main = "Prognostic function") -# abline(0,1,col="red",lty=3,lwd=3) -# plot(rowMeans(bcf_model$tau_hat_test), tau_test, xlab = "predicted", -# ylab = "actual", main = "Treatment effect") -# abline(0,1,col="red",lty=3,lwd=3) +plot(rowMeans(bcf_model$mu_hat_test), mu_test, xlab = "predicted", + ylab = "actual", main = "Prognostic function") +abline(0,1,col="red",lty=3,lwd=3) +plot(rowMeans(bcf_model$tau_hat_test), tau_test, xlab = "predicted", + ylab = "actual", main = "Treatment effect") +abline(0,1,col="red",lty=3,lwd=3) } diff --git a/man/predict.bartmodel.Rd b/man/predict.bartmodel.Rd index e894d4bb..a6fbe26c 100644 --- a/man/predict.bartmodel.Rd +++ b/man/predict.bartmodel.Rd @@ -57,7 +57,7 @@ X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train) -y_hat_test <- predict(bart_model, X_test) -# plot(rowMeans(y_hat_test), y_test, xlab = "predicted", ylab = "actual") -# abline(0,1,col="red",lty=3,lwd=3) +y_hat_test <- predict(bart_model, X_test)$y_hat +plot(rowMeans(y_hat_test), y_test, xlab = "predicted", ylab = "actual") +abline(0,1,col="red",lty=3,lwd=3) } diff --git a/man/predict.bcf.Rd b/man/predict.bcf.Rd index dc74c783..b2651af7 100644 --- a/man/predict.bcf.Rd +++ b/man/predict.bcf.Rd @@ -80,10 +80,10 @@ tau_test <- tau_x[test_inds] tau_train <- tau_x[train_inds] bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, propensity_train = pi_train) preds <- predict(bcf_model, X_test, Z_test, pi_test) -# plot(rowMeans(preds$mu_hat), mu_test, xlab = "predicted", -# ylab = "actual", main = "Prognostic function") -# abline(0,1,col="red",lty=3,lwd=3) -# plot(rowMeans(preds$tau_hat), tau_test, xlab = "predicted", -# ylab = "actual", main = "Treatment effect") -# abline(0,1,col="red",lty=3,lwd=3) +plot(rowMeans(preds$mu_hat), mu_test, xlab = "predicted", + ylab = "actual", main = "Prognostic function") +abline(0,1,col="red",lty=3,lwd=3) +plot(rowMeans(preds$tau_hat), tau_test, xlab = "predicted", + ylab = "actual", main = "Treatment effect") +abline(0,1,col="red",lty=3,lwd=3) } From 4fd36be7badedd76a972dd473186366e3447e0a9 Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Tue, 28 Jan 2025 22:33:04 -0600 Subject: [PATCH 10/24] Updated calibration function name --- NAMESPACE | 2 +- R/calibration.R | 2 +- _pkgdown.yml | 2 +- ...or_variance.Rd => calibrateInverseGammaErrorVariance.Rd} | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) rename man/{calibrate_inverse_gamma_error_variance.Rd => calibrateInverseGammaErrorVariance.Rd} (94%) diff --git a/NAMESPACE b/NAMESPACE index 41fd66a3..4094b693 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,7 +6,7 @@ S3method(predict,bartmodel) S3method(predict,bcf) export(bart) export(bcf) -export(calibrate_inverse_gamma_error_variance) +export(calibrateInverseGammaErrorVariance) export(computeForestLeafIndices) export(computeForestLeafVariances) export(computeMaxLeafIndex) diff --git a/R/calibration.R b/R/calibration.R index 6df0575c..7b11b9d8 100644 --- a/R/calibration.R +++ b/R/calibration.R @@ -21,7 +21,7 @@ #' lambda <- calibrate_inverse_gamma_error_variance(y, X, nu = nu) #' sigma2hat <- mean(resid(lm(y~X))^2) #' mean(var(y)/rgamma(100000, nu, rate = nu*lambda) < sigma2hat) -calibrate_inverse_gamma_error_variance <- function(y, X, W = NULL, nu = 3, quant = 0.9, standardize = TRUE) { +calibrateInverseGammaErrorVariance <- function(y, X, W = NULL, nu = 3, quant = 0.9, standardize = TRUE) { # Compute regression basis if (!is.null(W)) basis <- cbind(X, W) else basis <- X diff --git a/_pkgdown.yml b/_pkgdown.yml index d6922be3..281aad4c 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -91,7 +91,7 @@ reference: - createForestContainer - CppRNG - createRNG - - calibrate_inverse_gamma_error_variance + - calibrateInverseGammaErrorVariance - preprocessParams - computeMaxLeafIndex - computeForestLeafIndices diff --git a/man/calibrate_inverse_gamma_error_variance.Rd b/man/calibrateInverseGammaErrorVariance.Rd similarity index 94% rename from man/calibrate_inverse_gamma_error_variance.Rd rename to man/calibrateInverseGammaErrorVariance.Rd index 4e7aa68e..01dcce92 100644 --- a/man/calibrate_inverse_gamma_error_variance.Rd +++ b/man/calibrateInverseGammaErrorVariance.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/calibration.R -\name{calibrate_inverse_gamma_error_variance} -\alias{calibrate_inverse_gamma_error_variance} +\name{calibrateInverseGammaErrorVariance} +\alias{calibrateInverseGammaErrorVariance} \title{Calibrate the scale parameter on an inverse gamma prior for the global error variance as in Chipman et al (2022)} \usage{ -calibrate_inverse_gamma_error_variance( +calibrateInverseGammaErrorVariance( y, X, W = NULL, From 36aa10664570ce34f93b9cdfc83f00f2c8b89529 Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Tue, 28 Jan 2025 22:55:11 -0600 Subject: [PATCH 11/24] Updated interface and function names --- NAMESPACE | 4 +-- R/bart.R | 6 ++-- R/bcf.R | 8 ++--- R/calibration.R | 2 +- R/forest.R | 2 +- R/model.R | 2 +- _pkgdown.yml | 4 +-- man/calibrateInverseGammaErrorVariance.Rd | 2 +- man/{createRNG.Rd => createCppRNG.Rd} | 6 ++-- ...estContainer.Rd => createForestSamples.Rd} | 6 ++-- test/R/testthat/test-predict.R | 6 ++-- test/R/testthat/test-residual.R | 4 +-- vignettes/CustomSamplingRoutine.Rmd | 30 +++++++++---------- vignettes/PriorCalibration.Rmd | 4 +-- 14 files changed, 43 insertions(+), 43 deletions(-) rename man/{createRNG.Rd => createCppRNG.Rd} (82%) rename man/{createForestContainer.Rd => createForestSamples.Rd} (87%) diff --git a/NAMESPACE b/NAMESPACE index 4094b693..f18aa8ad 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,16 +25,16 @@ export(createBCFModelFromJsonString) export(createCppJson) export(createCppJsonFile) export(createCppJsonString) +export(createCppRNG) export(createForest) -export(createForestContainer) export(createForestCovariates) export(createForestCovariatesFromMetadata) export(createForestDataset) export(createForestModel) +export(createForestSamples) export(createOutcome) export(createPreprocessorFromJson) export(createPreprocessorFromJsonString) -export(createRNG) export(createRandomEffectSamples) export(createRandomEffectsDataset) export(createRandomEffectsModel) diff --git a/R/bart.R b/R/bart.R index 1b56070a..9e579539 100644 --- a/R/bart.R +++ b/R/bart.R @@ -536,7 +536,7 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train # Random number generator (std::mt19937) if (is.null(random_seed)) random_seed = sample(1:10000,1,F) - rng <- createRNG(random_seed) + rng <- createCppRNG(random_seed) # Sampling data structures feature_types <- as.integer(feature_types) @@ -549,11 +549,11 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train # Container of forest samples if (include_mean_forest) { - forest_samples_mean <- createForestContainer(num_trees_mean, output_dimension, is_leaf_constant, FALSE) + forest_samples_mean <- createForestSamples(num_trees_mean, output_dimension, is_leaf_constant, FALSE) active_forest_mean <- createForest(num_trees_mean, output_dimension, is_leaf_constant, FALSE) } if (include_variance_forest) { - forest_samples_variance <- createForestContainer(num_trees_variance, 1, TRUE, TRUE) + forest_samples_variance <- createForestSamples(num_trees_variance, 1, TRUE, TRUE) active_forest_variance <- createForest(num_trees_variance, 1, TRUE, TRUE) } diff --git a/R/bcf.R b/R/bcf.R index 856cbf8b..cd74f950 100644 --- a/R/bcf.R +++ b/R/bcf.R @@ -760,7 +760,7 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id # Random number generator (std::mt19937) if (is.null(random_seed)) random_seed = sample(1:10000,1,F) - rng <- createRNG(random_seed) + rng <- createCppRNG(random_seed) # Sampling data structures forest_model_mu <- createForestModel(forest_dataset_train, feature_types, num_trees_mu, nrow(X_train), alpha_mu, beta_mu, min_samples_leaf_mu, max_depth_mu) @@ -770,12 +770,12 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id } # Container of forest samples - forest_samples_mu <- createForestContainer(num_trees_mu, 1, T) - forest_samples_tau <- createForestContainer(num_trees_tau, 1, F) + forest_samples_mu <- createForestSamples(num_trees_mu, 1, T) + forest_samples_tau <- createForestSamples(num_trees_tau, 1, F) active_forest_mu <- createForest(num_trees_mu, 1, T) active_forest_tau <- createForest(num_trees_tau, 1, F) if (include_variance_forest) { - forest_samples_variance <- createForestContainer(num_trees_variance, 1, TRUE, TRUE) + forest_samples_variance <- createForestSamples(num_trees_variance, 1, TRUE, TRUE) active_forest_variance <- createForest(num_trees_variance, 1, TRUE, TRUE) } diff --git a/R/calibration.R b/R/calibration.R index 7b11b9d8..ea91436f 100644 --- a/R/calibration.R +++ b/R/calibration.R @@ -18,7 +18,7 @@ #' X <- matrix(runif(n*p), ncol = p) #' y <- 10*X[,1] - 20*X[,2] + rnorm(n) #' nu <- 3 -#' lambda <- calibrate_inverse_gamma_error_variance(y, X, nu = nu) +#' lambda <- calibrateInverseGammaErrorVariance(y, X, nu = nu) #' sigma2hat <- mean(resid(lm(y~X))^2) #' mean(var(y)/rgamma(100000, nu, rate = nu*lambda) < sigma2hat) calibrateInverseGammaErrorVariance <- function(y, X, W = NULL, nu = 3, quant = 0.9, standardize = TRUE) { diff --git a/R/forest.R b/R/forest.R index 8f2ddd95..2ed73c01 100644 --- a/R/forest.R +++ b/R/forest.R @@ -758,7 +758,7 @@ Forest <- R6::R6Class( #' #' @return `ForestSamples` object #' @export -createForestContainer <- function(num_trees, output_dimension=1, is_leaf_constant=F, is_exponentiated=F) { +createForestSamples <- function(num_trees, output_dimension=1, is_leaf_constant=F, is_exponentiated=F) { return(invisible(( ForestSamples$new(num_trees, output_dimension, is_leaf_constant, is_exponentiated) ))) diff --git a/R/model.R b/R/model.R index 8e34d32b..e8f2ba61 100644 --- a/R/model.R +++ b/R/model.R @@ -170,7 +170,7 @@ ForestModel <- R6::R6Class( #' #' @return `CppRng` object #' @export -createRNG <- function(random_seed = -1){ +createCppRNG <- function(random_seed = -1){ return(invisible(( CppRNG$new(random_seed) ))) diff --git a/_pkgdown.yml b/_pkgdown.yml index 281aad4c..69c37c58 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -88,9 +88,9 @@ reference: - ForestModel - createForestModel - ForestSamples - - createForestContainer + - createForestSamples - CppRNG - - createRNG + - createCppRNG - calibrateInverseGammaErrorVariance - preprocessParams - computeMaxLeafIndex diff --git a/man/calibrateInverseGammaErrorVariance.Rd b/man/calibrateInverseGammaErrorVariance.Rd index 01dcce92..db8af0ef 100644 --- a/man/calibrateInverseGammaErrorVariance.Rd +++ b/man/calibrateInverseGammaErrorVariance.Rd @@ -38,7 +38,7 @@ p <- 5 X <- matrix(runif(n*p), ncol = p) y <- 10*X[,1] - 20*X[,2] + rnorm(n) nu <- 3 -lambda <- calibrate_inverse_gamma_error_variance(y, X, nu = nu) +lambda <- calibrateInverseGammaErrorVariance(y, X, nu = nu) sigma2hat <- mean(resid(lm(y~X))^2) mean(var(y)/rgamma(100000, nu, rate = nu*lambda) < sigma2hat) } diff --git a/man/createRNG.Rd b/man/createCppRNG.Rd similarity index 82% rename from man/createRNG.Rd rename to man/createCppRNG.Rd index dd8f595a..45e5ef02 100644 --- a/man/createRNG.Rd +++ b/man/createCppRNG.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/model.R -\name{createRNG} -\alias{createRNG} +\name{createCppRNG} +\alias{createCppRNG} \title{Create an R class that wraps a C++ random number generator} \usage{ -createRNG(random_seed = -1) +createCppRNG(random_seed = -1) } \arguments{ \item{random_seed}{(Optional) random seed for sampling} diff --git a/man/createForestContainer.Rd b/man/createForestSamples.Rd similarity index 87% rename from man/createForestContainer.Rd rename to man/createForestSamples.Rd index 67a33e8d..7789ccb9 100644 --- a/man/createForestContainer.Rd +++ b/man/createForestSamples.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/forest.R -\name{createForestContainer} -\alias{createForestContainer} +\name{createForestSamples} +\alias{createForestSamples} \title{Create a container of forest samples} \usage{ -createForestContainer( +createForestSamples( num_trees, output_dimension = 1, is_leaf_constant = F, diff --git a/test/R/testthat/test-predict.R b/test/R/testthat/test-predict.R index aeceaa6f..01b19b53 100644 --- a/test/R/testthat/test-predict.R +++ b/test/R/testthat/test-predict.R @@ -11,7 +11,7 @@ test_that("Prediction from trees with constant leaf", { n <- nrow(X) p <- ncol(X) forest_dataset = createForestDataset(X) - forest_samples <- createForestContainer(num_trees, 1, T) + forest_samples <- createForestSamples(num_trees, 1, T) # Initialize a forest with constant root predictions forest_samples$add_forest_with_constant_leaves(0.) @@ -65,7 +65,7 @@ test_that("Prediction from trees with univariate leaf basis", { n <- nrow(X) p <- ncol(X) forest_dataset = createForestDataset(X,W) - forest_samples <- createForestContainer(num_trees, 1, F) + forest_samples <- createForestSamples(num_trees, 1, F) # Initialize a forest with constant root predictions forest_samples$add_forest_with_constant_leaves(0.) @@ -123,7 +123,7 @@ test_that("Prediction from trees with multivariate leaf basis", { p <- ncol(X) W = matrix(c(1,1,1,1,1,1,-1,-1,-1,1,1,1), byrow=F, nrow=6) forest_dataset = createForestDataset(X,W) - forest_samples <- createForestContainer(num_trees, output_dim, F) + forest_samples <- createForestSamples(num_trees, output_dim, F) # Initialize a forest with constant root predictions forest_samples$add_forest_with_constant_leaves(c(1.,1.)) diff --git a/test/R/testthat/test-residual.R b/test/R/testthat/test-residual.R index 08833585..04165271 100644 --- a/test/R/testthat/test-residual.R +++ b/test/R/testthat/test-residual.R @@ -33,11 +33,11 @@ test_that("Residual updates correctly propagated after forest sampling step", { b_forest = 0 # RNG - cpp_rng = createRNG(-1) + cpp_rng = createCppRNG(-1) # Create forest sampler and forest container forest_model = createForestModel(forest_dataset, feature_types, num_trees, n, alpha, beta, min_samples_leaf, max_depth) - forest_samples = createForestContainer(num_trees, 1, F) + forest_samples = createForestSamples(num_trees, 1, F) active_forest = createForest(num_trees, 1, F) # Initialize the leaves of each tree in the prognostic forest diff --git a/vignettes/CustomSamplingRoutine.Rmd b/vignettes/CustomSamplingRoutine.Rmd index 5d6acae9..a7b70534 100644 --- a/vignettes/CustomSamplingRoutine.Rmd +++ b/vignettes/CustomSamplingRoutine.Rmd @@ -128,7 +128,7 @@ if (leaf_regression) { outcome <- createOutcome(resid) # Random number generator (std::mt19937) -rng <- createRNG() +rng <- createCppRNG() # Sampling data structures forest_model <- createForestModel(forest_dataset, feature_types, @@ -139,10 +139,10 @@ forest_model <- createForestModel(forest_dataset, feature_types, # container of forest samples (which is written to when # a sample is not discarded due to burn-in / thinning) if (leaf_regression) { - forest_samples <- createForestContainer(num_trees, 1, F) + forest_samples <- createForestSamples(num_trees, 1, F) active_forest <- createForest(num_trees, 1, F) } else { - forest_samples <- createForestContainer(num_trees, 1, T) + forest_samples <- createForestSamples(num_trees, 1, T) active_forest <- createForest(num_trees, 1, T) } ``` @@ -317,7 +317,7 @@ if (leaf_regression) { outcome <- createOutcome(resid) # Random number generator (std::mt19937) -rng <- createRNG() +rng <- createCppRNG() # Sampling data structures forest_model <- createForestModel(forest_dataset, feature_types, @@ -328,10 +328,10 @@ forest_model <- createForestModel(forest_dataset, feature_types, # container of forest samples (which is written to when # a sample is not discarded due to burn-in / thinning) if (leaf_regression) { - forest_samples <- createForestContainer(num_trees, 1, F) + forest_samples <- createForestSamples(num_trees, 1, F) active_forest <- createForest(num_trees, 1, F) } else { - forest_samples <- createForestContainer(num_trees, 1, T) + forest_samples <- createForestSamples(num_trees, 1, T) active_forest <- createForest(num_trees, 1, T) } @@ -552,7 +552,7 @@ if (leaf_regression) { outcome <- createOutcome(resid) # Random number generator (std::mt19937) -rng <- createRNG() +rng <- createCppRNG() # Sampling data structures forest_model <- createForestModel(forest_dataset, feature_types, @@ -563,10 +563,10 @@ forest_model <- createForestModel(forest_dataset, feature_types, # container of forest samples (which is written to when # a sample is not discarded due to burn-in / thinning) if (leaf_regression) { - forest_samples <- createForestContainer(num_trees, 1, F) + forest_samples <- createForestSamples(num_trees, 1, F) active_forest <- createForest(num_trees, 1, F) } else { - forest_samples <- createForestContainer(num_trees, 1, T) + forest_samples <- createForestSamples(num_trees, 1, T) active_forest <- createForest(num_trees, 1, T) } @@ -786,7 +786,7 @@ if (leaf_regression) { outcome <- createOutcome(resid) # Random number generator (std::mt19937) -rng <- createRNG() +rng <- createCppRNG() # Sampling data structures forest_model <- createForestModel(forest_dataset, feature_types, @@ -797,10 +797,10 @@ forest_model <- createForestModel(forest_dataset, feature_types, # container of forest samples (which is written to when # a sample is not discarded due to burn-in / thinning) if (leaf_regression) { - forest_samples <- createForestContainer(num_trees, 1, F) + forest_samples <- createForestSamples(num_trees, 1, F) active_forest <- createForest(num_trees, 1, F) } else { - forest_samples <- createForestContainer(num_trees, 1, T) + forest_samples <- createForestSamples(num_trees, 1, T) active_forest <- createForest(num_trees, 1, T) } ``` @@ -1132,7 +1132,7 @@ forest_dataset_tau <- createForestDataset(X_tau, tau_basis) outcome <- createOutcome(resid) # Random number generator (std::mt19937) -rng <- createRNG() +rng <- createCppRNG() # Sampling data structures forest_model_mu <- createForestModel( @@ -1145,9 +1145,9 @@ forest_model_tau <- createForestModel( ) # Container of forest samples -forest_samples_mu <- createForestContainer(num_trees_mu, 1, T) +forest_samples_mu <- createForestSamples(num_trees_mu, 1, T) active_forest_mu <- createForest(num_trees_mu, 1, T) -forest_samples_tau <- createForestContainer(num_trees_tau, 1, F) +forest_samples_tau <- createForestSamples(num_trees_tau, 1, F) active_forest_tau <- createForest(num_trees_tau, 1, F) # Initialize the leaves of each tree in the prognostic forest diff --git a/vignettes/PriorCalibration.Rmd b/vignettes/PriorCalibration.Rmd index 87a09e46..cd3f4f0c 100644 --- a/vignettes/PriorCalibration.Rmd +++ b/vignettes/PriorCalibration.Rmd @@ -49,7 +49,7 @@ In this case, $\nu$ is set by default to 3 and $\lambda$ is calibrated as follow 1. An "overestimate," $\hat{\sigma}^2$, of $\sigma^2$ is obtained via simple linear regression of $y$ on $X$ 2. $\lambda$ is chosen to ensure that $p(\sigma^2 < \hat{\sigma}^2) = q$ for some value $q$, typically set to a default value of 0.9. -This is done in `stochtree` via the `calibrate_inverse_gamma_error_variance` function. +This is done in `stochtree` via the `calibrateInverseGammaErrorVariance` function. ```{r} # Load library @@ -81,7 +81,7 @@ y_train <- y[train_inds] # Calibrate the scale parameter for the variance term as in Chipman et al (2010) nu <- 3 -lambda <- calibrate_inverse_gamma_error_variance(y_train, X_train, nu = nu) +lambda <- calibrateInverseGammaErrorVariance(y_train, X_train, nu = nu) ``` Now we run a BART model with this variance parameterization From 0dc3928b1e3cbec24afc47191e234731edaea23b Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Wed, 29 Jan 2025 00:21:18 -0600 Subject: [PATCH 12/24] Simplifying preprocessing API (and temporarily disabling deprecated preprocessing unit tests) --- NAMESPACE | 11 - R/utils.R | 27 +- _pkgdown.yml | 11 - man/createForestCovariates.Rd | 37 -- man/createForestCovariatesFromMetadata.Rd | 31 -- man/oneHotEncode.Rd | 33 -- man/oneHotInitializeAndEncode.Rd | 28 -- man/orderedCatInitializeAndPreprocess.Rd | 31 -- man/orderedCatPreprocess.Rd | 37 -- man/preprocessParams.Rd | 19 - man/preprocessPredictionDataFrame.Rd | 29 -- man/preprocessPredictionMatrix.Rd | 26 -- man/preprocessTrainDataFrame.Rd | 29 -- man/preprocessTrainMatrix.Rd | 26 -- test/R/testthat/test-data-preprocessing.R | 514 +++++++++++----------- 15 files changed, 268 insertions(+), 621 deletions(-) delete mode 100644 man/createForestCovariates.Rd delete mode 100644 man/createForestCovariatesFromMetadata.Rd delete mode 100644 man/oneHotEncode.Rd delete mode 100644 man/oneHotInitializeAndEncode.Rd delete mode 100644 man/orderedCatInitializeAndPreprocess.Rd delete mode 100644 man/orderedCatPreprocess.Rd delete mode 100644 man/preprocessParams.Rd delete mode 100644 man/preprocessPredictionDataFrame.Rd delete mode 100644 man/preprocessPredictionMatrix.Rd delete mode 100644 man/preprocessTrainDataFrame.Rd delete mode 100644 man/preprocessTrainMatrix.Rd diff --git a/NAMESPACE b/NAMESPACE index f18aa8ad..1a2f145e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,8 +27,6 @@ export(createCppJsonFile) export(createCppJsonString) export(createCppRNG) export(createForest) -export(createForestCovariates) -export(createForestCovariatesFromMetadata) export(createForestDataset) export(createForestModel) export(createForestSamples) @@ -48,17 +46,8 @@ export(loadRandomEffectSamplesCombinedJsonString) export(loadRandomEffectSamplesJson) export(loadScalarJson) export(loadVectorJson) -export(oneHotEncode) -export(oneHotInitializeAndEncode) -export(orderedCatInitializeAndPreprocess) -export(orderedCatPreprocess) -export(preprocessParams) export(preprocessPredictionData) -export(preprocessPredictionDataFrame) -export(preprocessPredictionMatrix) export(preprocessTrainData) -export(preprocessTrainDataFrame) -export(preprocessTrainMatrix) export(resetActiveForest) export(resetForestModel) export(resetRandomEffectsModel) diff --git a/R/utils.R b/R/utils.R index 2f505bca..5d92ba60 100644 --- a/R/utils.R +++ b/R/utils.R @@ -2,9 +2,9 @@ #' #' @param default_params List of parameters with default values set. #' @param user_params (Optional) User-supplied overrides to `default_params`. +#' @noRd #' #' @return Parameter list with defaults overriden by values supplied in `user_params` -#' @export preprocessParams <- function(default_params, user_params = NULL) { # Override defaults from general_params if (!is.null(user_params)) { @@ -89,7 +89,7 @@ preprocessPredictionData <- function(input_data, metadata) { #' @return List with preprocessed (unmodified) data and details on the number of each type #' of variable, unique categories associated with categorical variables, and the #' vector of feature types needed for calls to BART and BCF. -#' @export +#' @noRd #' #' @examples #' cov_mat <- matrix(1:12, ncol = 3) @@ -137,7 +137,7 @@ preprocessTrainMatrix <- function(input_matrix) { #' categories for categorical variables #' #' @return Preprocessed data with categorical variables appropriately preprocessed -#' @export +#' @noRd #' #' @examples #' cov_mat <- matrix(c(1:5, 5:1, 6:10), ncol = 3) @@ -162,16 +162,11 @@ preprocessPredictionMatrix <- function(input_matrix, metadata) { #' #' @param input_df Dataframe of covariates. Users must pre-process any #' categorical variables as factors (ordered for ordered categorical). +#' @noRd #' #' @return List with preprocessed data and details on the number of each type #' of variable, unique categories associated with categorical variables, and the #' vector of feature types needed for calls to BART and BCF. -#' @export -#' -#' @examples -#' cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) -#' preprocess_list <- preprocessTrainDataFrame(cov_df) -#' X <- preprocess_list$X preprocessTrainDataFrame <- function(input_df) { # Input checks / details if (!is.data.frame(input_df)) { @@ -291,7 +286,7 @@ preprocessTrainDataFrame <- function(input_df) { #' categories for categorical variables #' #' @return Preprocessed data with categorical variables appropriately preprocessed -#' @export +#' @noRd #' #' @examples #' cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) @@ -499,7 +494,7 @@ createPreprocessorFromJsonString <- function(json_string){ #' @return List with preprocessed data and details on the number of each type #' of variable, unique categories associated with categorical variables, and the #' vector of feature types needed for calls to BART and BCF. -#' @export +#' @noRd #' #' @examples #' cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) @@ -614,7 +609,7 @@ createForestCovariates <- function(input_data, ordered_cat_vars = NULL, unordere #' categories for categorical variables #' #' @return Preprocessed data with categorical variables appropriately preprocessed -#' @export +#' @noRd #' #' @examples #' cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) @@ -701,7 +696,7 @@ createForestCovariatesFromMetadata <- function(input_data, metadata) { #' #' @return List containing a binary one-hot matrix and the unique levels of the #' input variable. These unique levels are used in the BCF and BART functions. -#' @export +#' @noRd #' #' @examples #' x <- c("a","c","b","c","d","a","c","a","b","d") @@ -733,7 +728,7 @@ oneHotInitializeAndEncode <- function(x_input) { #' the initial one-hot matrix (typically a training set) #' #' @return Binary one-hot matrix -#' @export +#' @noRd #' #' @examples #' x <- sample(1:8, 100, TRUE) @@ -771,7 +766,7 @@ oneHotEncode <- function(x_input, unique_levels) { #' @return List containing a preprocessed vector of integer-converted ordered #' categorical observations and the unique level of the original ordered #' categorical feature. -#' @export +#' @noRd #' #' @examples #' x <- c("1. Strongly disagree", "3. Neither agree nor disagree", "2. Disagree", @@ -805,7 +800,7 @@ orderedCatInitializeAndPreprocess <- function(x_input) { #' @return List containing a preprocessed vector of integer-converted ordered #' categorical observations and the unique level of the original ordered #' categorical feature. -#' @export +#' @noRd #' #' @examples #' x_levels <- c("1. Strongly disagree", "2. Disagree", diff --git a/_pkgdown.yml b/_pkgdown.yml index 69c37c58..33b212fa 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -64,16 +64,6 @@ reference: - createRandomEffectsDataset - preprocessTrainData - preprocessPredictionData - - preprocessTrainDataFrame - - preprocessPredictionDataFrame - - preprocessTrainMatrix - - preprocessPredictionMatrix - - createForestCovariates - - createForestCovariatesFromMetadata - - oneHotEncode - - oneHotInitializeAndEncode - - orderedCatPreprocess - - orderedCatInitializeAndPreprocess - convertPreprocessorToJson - savePreprocessorToJsonString - createPreprocessorFromJson @@ -92,7 +82,6 @@ reference: - CppRNG - createCppRNG - calibrateInverseGammaErrorVariance - - preprocessParams - computeMaxLeafIndex - computeForestLeafIndices - computeForestLeafVariances diff --git a/man/createForestCovariates.Rd b/man/createForestCovariates.Rd deleted file mode 100644 index f749d504..00000000 --- a/man/createForestCovariates.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{createForestCovariates} -\alias{createForestCovariates} -\title{Preprocess a dataframe of covariate values, converting categorical variables -to integers and one-hot encoding if need be. Returns a list including a -matrix of preprocessed covariate values and associated tracking.} -\usage{ -createForestCovariates( - input_data, - ordered_cat_vars = NULL, - unordered_cat_vars = NULL -) -} -\arguments{ -\item{input_data}{Dataframe or matrix of covariates. Users may pre-process any -categorical variables as factors but it is not necessary.} - -\item{ordered_cat_vars}{(Optional) Vector of names of ordered categorical variables, or vector of column indices if \code{input_data} is a matrix.} - -\item{unordered_cat_vars}{(Optional) Vector of names of unordered categorical variables, or vector of column indices if \code{input_data} is a matrix.} -} -\value{ -List with preprocessed data and details on the number of each type -of variable, unique categories associated with categorical variables, and the -vector of feature types needed for calls to BART and BCF. -} -\description{ -Preprocess a dataframe of covariate values, converting categorical variables -to integers and one-hot encoding if need be. Returns a list including a -matrix of preprocessed covariate values and associated tracking. -} -\examples{ -cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) -preprocess_list <- createForestCovariates(cov_df) -X <- preprocess_list$X -} diff --git a/man/createForestCovariatesFromMetadata.Rd b/man/createForestCovariatesFromMetadata.Rd deleted file mode 100644 index 0b34a606..00000000 --- a/man/createForestCovariatesFromMetadata.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{createForestCovariatesFromMetadata} -\alias{createForestCovariatesFromMetadata} -\title{Preprocess a dataframe of covariate values, converting categorical variables -to integers and one-hot encoding if need be. Returns a list including a -matrix of preprocessed covariate values and associated tracking.} -\usage{ -createForestCovariatesFromMetadata(input_data, metadata) -} -\arguments{ -\item{input_data}{Dataframe or matrix of covariates. Users may pre-process any -categorical variables as factors but it is not necessary.} - -\item{metadata}{List containing information on variables, including train set -categories for categorical variables} -} -\value{ -Preprocessed data with categorical variables appropriately preprocessed -} -\description{ -Preprocess a dataframe of covariate values, converting categorical variables -to integers and one-hot encoding if need be. Returns a list including a -matrix of preprocessed covariate values and associated tracking. -} -\examples{ -cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) -metadata <- list(num_ordered_cat_vars = 0, num_unordered_cat_vars = 0, - num_numeric_vars = 3, numeric_vars = c("x1", "x2", "x3")) -X_preprocessed <- createForestCovariatesFromMetadata(cov_df, metadata) -} diff --git a/man/oneHotEncode.Rd b/man/oneHotEncode.Rd deleted file mode 100644 index f604a2c4..00000000 --- a/man/oneHotEncode.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{oneHotEncode} -\alias{oneHotEncode} -\title{Convert a vector of unordered categorical data (either numeric or character -labels) to a "one-hot" encoded matrix in which a 1 in a column indicates -the presence of the relevant category.} -\usage{ -oneHotEncode(x_input, unique_levels) -} -\arguments{ -\item{x_input}{Vector of unordered categorical data (typically either strings -integers, but this function also accepts floating point data).} - -\item{unique_levels}{Unique values of the categorical variable used to create -the initial one-hot matrix (typically a training set)} -} -\value{ -Binary one-hot matrix -} -\description{ -This procedure assumes that a reference set of observations for this variable -(typically a training set that was used to sample a forest) has already been -one-hot encoded and that the unique levels of the training set variable are -available (and passed as \code{unique_levels}). Test set observations that contain -categories not in \code{unique_levels} will all be mapped to the last column of -this matrix -} -\examples{ -x <- sample(1:8, 100, TRUE) -x_test <- sample(1:9, 10, TRUE) -x_onehot <- oneHotEncode(x_test, levels(factor(x))) -} diff --git a/man/oneHotInitializeAndEncode.Rd b/man/oneHotInitializeAndEncode.Rd deleted file mode 100644 index 7cb20f0f..00000000 --- a/man/oneHotInitializeAndEncode.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{oneHotInitializeAndEncode} -\alias{oneHotInitializeAndEncode} -\title{Convert a vector of unordered categorical data (either numeric or character -labels) to a "one-hot" encoded matrix in which a 1 in a column indicates -the presence of the relevant category.} -\usage{ -oneHotInitializeAndEncode(x_input) -} -\arguments{ -\item{x_input}{Vector of unordered categorical data (typically either strings -integers, but this function also accepts floating point data).} -} -\value{ -List containing a binary one-hot matrix and the unique levels of the -input variable. These unique levels are used in the BCF and BART functions. -} -\description{ -To allow for prediction on "unseen" categories in a test dataset, this -procedure pads the one-hot matrix with a blank "other" column. -Test set observations that contain categories not in \code{levels(factor(x_input))} -will all be mapped to this column. -} -\examples{ -x <- c("a","c","b","c","d","a","c","a","b","d") -x_onehot <- oneHotInitializeAndEncode(x) -} diff --git a/man/orderedCatInitializeAndPreprocess.Rd b/man/orderedCatInitializeAndPreprocess.Rd deleted file mode 100644 index 7996808c..00000000 --- a/man/orderedCatInitializeAndPreprocess.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{orderedCatInitializeAndPreprocess} -\alias{orderedCatInitializeAndPreprocess} -\title{Run some simple preprocessing of ordered categorical variables, converting -ordered levels to integers if necessary, and storing the unique levels of a -variable.} -\usage{ -orderedCatInitializeAndPreprocess(x_input) -} -\arguments{ -\item{x_input}{Vector of ordered categorical data. If the data is not already -stored as an ordered factor, it will be converted to one using the default -sort order.} -} -\value{ -List containing a preprocessed vector of integer-converted ordered -categorical observations and the unique level of the original ordered -categorical feature. -} -\description{ -Run some simple preprocessing of ordered categorical variables, converting -ordered levels to integers if necessary, and storing the unique levels of a -variable. -} -\examples{ -x <- c("1. Strongly disagree", "3. Neither agree nor disagree", "2. Disagree", - "4. Agree", "3. Neither agree nor disagree", "5. Strongly agree", "4. Agree") -preprocess_list <- orderedCatInitializeAndPreprocess(x) -x_preprocessed <- preprocess_list$x_preprocessed -} diff --git a/man/orderedCatPreprocess.Rd b/man/orderedCatPreprocess.Rd deleted file mode 100644 index 12011e6a..00000000 --- a/man/orderedCatPreprocess.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{orderedCatPreprocess} -\alias{orderedCatPreprocess} -\title{Run some simple preprocessing of ordered categorical variables, converting -ordered levels to integers if necessary, and storing the unique levels of a -variable.} -\usage{ -orderedCatPreprocess(x_input, unique_levels, var_name = NULL) -} -\arguments{ -\item{x_input}{Vector of ordered categorical data. If the data is not already -stored as an ordered factor, it will be converted to one using the default -sort order.} - -\item{unique_levels}{Vector of unique levels for a categorical feature.} - -\item{var_name}{(Optional) Name of variable.} -} -\value{ -List containing a preprocessed vector of integer-converted ordered -categorical observations and the unique level of the original ordered -categorical feature. -} -\description{ -Run some simple preprocessing of ordered categorical variables, converting -ordered levels to integers if necessary, and storing the unique levels of a -variable. -} -\examples{ -x_levels <- c("1. Strongly disagree", "2. Disagree", - "3. Neither agree nor disagree", - "4. Agree", "5. Strongly agree") -x <- c("1. Strongly disagree", "3. Neither agree nor disagree", "2. Disagree", - "4. Agree", "3. Neither agree nor disagree", "5. Strongly agree", "4. Agree") -x_processed <- orderedCatPreprocess(x, x_levels) -} diff --git a/man/preprocessParams.Rd b/man/preprocessParams.Rd deleted file mode 100644 index 9e1732d4..00000000 --- a/man/preprocessParams.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{preprocessParams} -\alias{preprocessParams} -\title{Preprocess a parameter list, overriding defaults with any provided parameters.} -\usage{ -preprocessParams(default_params, user_params = NULL) -} -\arguments{ -\item{default_params}{List of parameters with default values set.} - -\item{user_params}{(Optional) User-supplied overrides to \code{default_params}.} -} -\value{ -Parameter list with defaults overriden by values supplied in \code{user_params} -} -\description{ -Preprocess a parameter list, overriding defaults with any provided parameters. -} diff --git a/man/preprocessPredictionDataFrame.Rd b/man/preprocessPredictionDataFrame.Rd deleted file mode 100644 index 27b87743..00000000 --- a/man/preprocessPredictionDataFrame.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{preprocessPredictionDataFrame} -\alias{preprocessPredictionDataFrame} -\title{Preprocess a dataframe of covariate values, converting categorical variables -to integers and one-hot encoding if need be.} -\usage{ -preprocessPredictionDataFrame(input_df, metadata) -} -\arguments{ -\item{input_df}{Dataframe of covariates. Users must pre-process any -categorical variables as factors (ordered for ordered categorical).} - -\item{metadata}{List containing information on variables, including train set -categories for categorical variables} -} -\value{ -Preprocessed data with categorical variables appropriately preprocessed -} -\description{ -Preprocess a dataframe of covariate values, converting categorical variables -to integers and one-hot encoding if need be. -} -\examples{ -cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) -metadata <- list(num_ordered_cat_vars = 0, num_unordered_cat_vars = 0, - num_numeric_vars = 3, numeric_vars = c("x1", "x2", "x3")) -X_preprocessed <- preprocessPredictionDataFrame(cov_df, metadata) -} diff --git a/man/preprocessPredictionMatrix.Rd b/man/preprocessPredictionMatrix.Rd deleted file mode 100644 index 3a150069..00000000 --- a/man/preprocessPredictionMatrix.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{preprocessPredictionMatrix} -\alias{preprocessPredictionMatrix} -\title{Preprocess a matrix of covariate values, assuming all columns are numeric.} -\usage{ -preprocessPredictionMatrix(input_matrix, metadata) -} -\arguments{ -\item{input_matrix}{Covariate matrix.} - -\item{metadata}{List containing information on variables, including train set -categories for categorical variables} -} -\value{ -Preprocessed data with categorical variables appropriately preprocessed -} -\description{ -Preprocess a matrix of covariate values, assuming all columns are numeric. -} -\examples{ -cov_mat <- matrix(c(1:5, 5:1, 6:10), ncol = 3) -metadata <- list(num_ordered_cat_vars = 0, num_unordered_cat_vars = 0, - num_numeric_vars = 3, numeric_vars = c("x1", "x2", "x3")) -X_preprocessed <- preprocessPredictionMatrix(cov_mat, metadata) -} diff --git a/man/preprocessTrainDataFrame.Rd b/man/preprocessTrainDataFrame.Rd deleted file mode 100644 index 66cbe41c..00000000 --- a/man/preprocessTrainDataFrame.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{preprocessTrainDataFrame} -\alias{preprocessTrainDataFrame} -\title{Preprocess a dataframe of covariate values, converting categorical variables -to integers and one-hot encoding if need be. Returns a list including a -matrix of preprocessed covariate values and associated tracking.} -\usage{ -preprocessTrainDataFrame(input_df) -} -\arguments{ -\item{input_df}{Dataframe of covariates. Users must pre-process any -categorical variables as factors (ordered for ordered categorical).} -} -\value{ -List with preprocessed data and details on the number of each type -of variable, unique categories associated with categorical variables, and the -vector of feature types needed for calls to BART and BCF. -} -\description{ -Preprocess a dataframe of covariate values, converting categorical variables -to integers and one-hot encoding if need be. Returns a list including a -matrix of preprocessed covariate values and associated tracking. -} -\examples{ -cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) -preprocess_list <- preprocessTrainDataFrame(cov_df) -X <- preprocess_list$X -} diff --git a/man/preprocessTrainMatrix.Rd b/man/preprocessTrainMatrix.Rd deleted file mode 100644 index b90f7afe..00000000 --- a/man/preprocessTrainMatrix.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{preprocessTrainMatrix} -\alias{preprocessTrainMatrix} -\title{Preprocess a matrix of covariate values, assuming all columns are numeric. -Returns a list including a matrix of preprocessed covariate values and associated tracking.} -\usage{ -preprocessTrainMatrix(input_matrix) -} -\arguments{ -\item{input_matrix}{Covariate matrix.} -} -\value{ -List with preprocessed (unmodified) data and details on the number of each type -of variable, unique categories associated with categorical variables, and the -vector of feature types needed for calls to BART and BCF. -} -\description{ -Preprocess a matrix of covariate values, assuming all columns are numeric. -Returns a list including a matrix of preprocessed covariate values and associated tracking. -} -\examples{ -cov_mat <- matrix(1:12, ncol = 3) -preprocess_list <- preprocessTrainMatrix(cov_mat) -X <- preprocess_list$X -} diff --git a/test/R/testthat/test-data-preprocessing.R b/test/R/testthat/test-data-preprocessing.R index 75e100bd..662037f2 100644 --- a/test/R/testthat/test-data-preprocessing.R +++ b/test/R/testthat/test-data-preprocessing.R @@ -1,257 +1,257 @@ -test_that("Preprocessing of all-numeric covariate dataset works", { - cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) - cov_mat <- matrix(c( - 1,2,3,4,5, - 5,4,3,2,1, - 6,7,8,9,10 - ), ncol = 3, byrow = F) - preprocess_list <- createForestCovariates(cov_df) - expect_equal(preprocess_list$data, cov_mat) - expect_equal(preprocess_list$metadata$feature_types, rep(0,3)) - expect_equal(preprocess_list$metadata$num_numeric_vars, 3) - expect_equal(preprocess_list$metadata$num_ordered_cat_vars, 0) - expect_equal(preprocess_list$metadata$num_unordered_cat_vars, 0) - expect_equal(preprocess_list$metadata$numeric_vars, c("x1","x2","x3")) -}) - -test_that("Preprocessing of all-unordered-categorical covariate dataset works", { - cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) - cov_mat <- matrix(c( - 1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0, - 0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0, - 0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0, - 0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0, - 0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0 - ), nrow = 5, byrow = TRUE) - preprocess_list <- createForestCovariates(cov_df, unordered_cat_vars = c("x1","x2","x3")) - expect_equal(preprocess_list$data, cov_mat) - expect_equal(preprocess_list$metadata$feature_types, rep(1,18)) - expect_equal(preprocess_list$metadata$num_numeric_vars, 0) - expect_equal(preprocess_list$metadata$num_ordered_cat_vars, 0) - expect_equal(preprocess_list$metadata$num_unordered_cat_vars, 3) - expect_equal(preprocess_list$metadata$unordered_cat_vars, c("x1","x2","x3")) - expect_equal(preprocess_list$metadata$unordered_unique_levels, - list(x1=c("1","2","3","4","5"), - x2=c("1","2","3","4","5"), - x3=c("6","7","8","9","10")) - ) -}) - -test_that("Preprocessing of all-ordered-categorical covariate dataset works", { - cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) - cov_mat <- matrix(c( - 1,2,3,4,5, - 5,4,3,2,1, - 1,2,3,4,5 - ), ncol = 3, byrow = F) - preprocess_list <- createForestCovariates(cov_df, ordered_cat_vars = c("x1","x2","x3")) - expect_equal(preprocess_list$data, cov_mat) - expect_equal(preprocess_list$metadata$feature_types, rep(1,3)) - expect_equal(preprocess_list$metadata$num_numeric_vars, 0) - expect_equal(preprocess_list$metadata$num_ordered_cat_vars, 3) - expect_equal(preprocess_list$metadata$num_unordered_cat_vars, 0) - expect_equal(preprocess_list$metadata$ordered_cat_vars, c("x1","x2","x3")) - expect_equal(preprocess_list$metadata$ordered_unique_levels, - list(x1=c("1","2","3","4","5"), - x2=c("1","2","3","4","5"), - x3=c("6","7","8","9","10")) - ) -}) - -test_that("Preprocessing of mixed-covariate dataset works", { - cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) - cov_mat <- matrix(c( - 1,5,1,0,0,0,0,0, - 2,4,0,1,0,0,0,0, - 3,3,0,0,1,0,0,0, - 4,2,0,0,0,1,0,0, - 5,1,0,0,0,0,1,0 - ), nrow = 5, byrow = TRUE) - preprocess_list <- createForestCovariates(cov_df, ordered_cat_vars = c("x2"), unordered_cat_vars = "x3") - expect_equal(preprocess_list$data, cov_mat) - expect_equal(preprocess_list$metadata$feature_types, c(0, rep(1,7))) - expect_equal(preprocess_list$metadata$num_numeric_vars, 1) - expect_equal(preprocess_list$metadata$num_ordered_cat_vars, 1) - expect_equal(preprocess_list$metadata$num_unordered_cat_vars, 1) - expect_equal(preprocess_list$metadata$ordered_cat_vars, c("x2")) - expect_equal(preprocess_list$metadata$unordered_cat_vars, c("x3")) - expect_equal(preprocess_list$metadata$ordered_unique_levels, list(x2=c("1","2","3","4","5"))) - expect_equal(preprocess_list$metadata$unordered_unique_levels, list(x3=c("6","7","8","9","10"))) -}) - -test_that("Preprocessing of mixed-covariate matrix works", { - cov_input <- matrix(c(1:5,5:1,6:10),ncol=3,byrow=F) - cov_mat <- matrix(c( - 1,5,1,0,0,0,0,0, - 2,4,0,1,0,0,0,0, - 3,3,0,0,1,0,0,0, - 4,2,0,0,0,1,0,0, - 5,1,0,0,0,0,1,0 - ), nrow = 5, byrow = TRUE) - preprocess_list <- createForestCovariates(cov_input, ordered_cat_vars = 2, unordered_cat_vars = 3) - expect_equal(preprocess_list$data, cov_mat) - expect_equal(preprocess_list$metadata$feature_types, c(0, rep(1,7))) - expect_equal(preprocess_list$metadata$num_numeric_vars, 1) - expect_equal(preprocess_list$metadata$num_ordered_cat_vars, 1) - expect_equal(preprocess_list$metadata$num_unordered_cat_vars, 1) - expect_equal(preprocess_list$metadata$ordered_cat_vars, c("x2")) - expect_equal(preprocess_list$metadata$unordered_cat_vars, c("x3")) - expect_equal(preprocess_list$metadata$ordered_unique_levels, list(x2=c("1","2","3","4","5"))) - expect_equal(preprocess_list$metadata$unordered_unique_levels, list(x3=c("6","7","8","9","10"))) - - alt_preprocess_list <- createForestCovariates(cov_input, ordered_cat_vars = "x2", unordered_cat_vars = "x3") - expect_equal(alt_preprocess_list$data, cov_mat) - expect_equal(alt_preprocess_list$metadata$feature_types, c(0, rep(1,7))) - expect_equal(alt_preprocess_list$metadata$num_numeric_vars, 1) - expect_equal(alt_preprocess_list$metadata$num_ordered_cat_vars, 1) - expect_equal(alt_preprocess_list$metadata$num_unordered_cat_vars, 1) - expect_equal(alt_preprocess_list$metadata$ordered_cat_vars, c("x2")) - expect_equal(alt_preprocess_list$metadata$unordered_cat_vars, c("x3")) - expect_equal(alt_preprocess_list$metadata$ordered_unique_levels, list(x2=c("1","2","3","4","5"))) - expect_equal(alt_preprocess_list$metadata$unordered_unique_levels, list(x3=c("6","7","8","9","10"))) -}) - -test_that("Preprocessing of out-of-sample mixed-covariate dataset works", { - metadata <- list( - num_numeric_vars = 1, - num_ordered_cat_vars = 1, - num_unordered_cat_vars = 1, - numeric_vars = c("x1"), - ordered_cat_vars = c("x2"), - unordered_cat_vars = c("x3"), - ordered_unique_levels = list(x2=c("1","2","3","4","5")), - unordered_unique_levels = list(x3=c("6","7","8","9","10")) - ) - cov_df <- data.frame(x1 = c(1:5,1), x2 = c(5:1,5), x3 = 6:11) - cov_mat <- matrix(c( - 1,5,1,0,0,0,0,0, - 2,4,0,1,0,0,0,0, - 3,3,0,0,1,0,0,0, - 4,2,0,0,0,1,0,0, - 5,1,0,0,0,0,1,0, - 1,5,0,0,0,0,0,1 - ), nrow = 6, byrow = TRUE) - X_preprocessed <- createForestCovariatesFromMetadata(cov_df, metadata) - expect_equal(X_preprocessed, cov_mat) -}) - -test_that("Preprocessing of all-numeric covariate dataset works", { - cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) - cov_mat <- matrix(c( - 1,2,3,4,5, - 5,4,3,2,1, - 6,7,8,9,10 - ), ncol = 3, byrow = F) - preprocess_list <- preprocessTrainDataFrame(cov_df) - expect_equal(preprocess_list$data, cov_mat) - expect_equal(preprocess_list$metadata$feature_types, rep(0,3)) - expect_equal(preprocess_list$metadata$num_numeric_vars, 3) - expect_equal(preprocess_list$metadata$num_ordered_cat_vars, 0) - expect_equal(preprocess_list$metadata$num_unordered_cat_vars, 0) - expect_equal(preprocess_list$metadata$original_var_indices, 1:3) - expect_equal(preprocess_list$metadata$numeric_vars, c("x1","x2","x3")) -}) - -test_that("Preprocessing of all-unordered-categorical covariate dataset works", { - cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) - cov_df$x1 <- factor(cov_df$x1) - cov_df$x2 <- factor(cov_df$x2) - cov_df$x3 <- factor(cov_df$x3) - cov_mat <- matrix(c( - 1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0, - 0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0, - 0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0, - 0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0, - 0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0 - ), nrow = 5, byrow = TRUE) - preprocess_list <- preprocessTrainDataFrame(cov_df) - expect_equal(preprocess_list$data, cov_mat) - expect_equal(preprocess_list$metadata$feature_types, rep(1,18)) - expect_equal(preprocess_list$metadata$num_numeric_vars, 0) - expect_equal(preprocess_list$metadata$num_ordered_cat_vars, 0) - expect_equal(preprocess_list$metadata$num_unordered_cat_vars, 3) - expect_equal(preprocess_list$metadata$unordered_cat_vars, c("x1","x2","x3")) - expected_var_indices <- c(rep(1,6),rep(2,6),rep(3,6)) - expect_equal(preprocess_list$metadata$original_var_indices, expected_var_indices) - expect_equal(preprocess_list$metadata$unordered_unique_levels, - list(x1=c("1","2","3","4","5"), - x2=c("1","2","3","4","5"), - x3=c("6","7","8","9","10")) - ) -}) - -test_that("Preprocessing of all-ordered-categorical covariate dataset works", { - cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) - cov_df$x1 <- factor(cov_df$x1, ordered = TRUE) - cov_df$x2 <- factor(cov_df$x2, ordered = TRUE) - cov_df$x3 <- factor(cov_df$x3, ordered = TRUE) - cov_mat <- matrix(c( - 1,2,3,4,5, - 5,4,3,2,1, - 1,2,3,4,5 - ), ncol = 3, byrow = F) - preprocess_list <- preprocessTrainDataFrame(cov_df) - expect_equal(preprocess_list$data, cov_mat) - expect_equal(preprocess_list$metadata$feature_types, rep(1,3)) - expect_equal(preprocess_list$metadata$num_numeric_vars, 0) - expect_equal(preprocess_list$metadata$num_ordered_cat_vars, 3) - expect_equal(preprocess_list$metadata$num_unordered_cat_vars, 0) - expect_equal(preprocess_list$metadata$ordered_cat_vars, c("x1","x2","x3")) - expect_equal(preprocess_list$metadata$original_var_indices, 1:3) - expect_equal(preprocess_list$metadata$ordered_unique_levels, - list(x1=c("1","2","3","4","5"), - x2=c("1","2","3","4","5"), - x3=c("6","7","8","9","10")) - ) -}) - -test_that("Preprocessing of mixed-covariate dataset works", { - cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) - cov_df$x2 <- factor(cov_df$x2, ordered = TRUE) - cov_df$x3 <- factor(cov_df$x3) - cov_mat <- matrix(c( - 1,5,1,0,0,0,0,0, - 2,4,0,1,0,0,0,0, - 3,3,0,0,1,0,0,0, - 4,2,0,0,0,1,0,0, - 5,1,0,0,0,0,1,0 - ), nrow = 5, byrow = TRUE) - preprocess_list <- preprocessTrainDataFrame(cov_df) - expect_equal(preprocess_list$data, cov_mat) - expect_equal(preprocess_list$metadata$feature_types, c(0, rep(1,7))) - expect_equal(preprocess_list$metadata$num_numeric_vars, 1) - expect_equal(preprocess_list$metadata$num_ordered_cat_vars, 1) - expect_equal(preprocess_list$metadata$num_unordered_cat_vars, 1) - expect_equal(preprocess_list$metadata$ordered_cat_vars, c("x2")) - expect_equal(preprocess_list$metadata$unordered_cat_vars, c("x3")) - expected_var_indices <- c(1,2,rep(3,6)) - expect_equal(preprocess_list$metadata$original_var_indices, expected_var_indices) - expect_equal(preprocess_list$metadata$ordered_unique_levels, list(x2=c("1","2","3","4","5"))) - expect_equal(preprocess_list$metadata$unordered_unique_levels, list(x3=c("6","7","8","9","10"))) -}) - -test_that("Preprocessing of out-of-sample mixed-covariate dataset works", { - metadata <- list( - num_numeric_vars = 1, - num_ordered_cat_vars = 1, - num_unordered_cat_vars = 1, - original_var_indices = c(1, 2, 3, 3, 3, 3, 3, 3), - numeric_vars = c("x1"), - ordered_cat_vars = c("x2"), - unordered_cat_vars = c("x3"), - ordered_unique_levels = list(x2=c("1","2","3","4","5")), - unordered_unique_levels = list(x3=c("6","7","8","9","10")) - ) - cov_df <- data.frame(x1 = c(1:5,1), x2 = c(5:1,5), x3 = 6:11) - var_weights <- rep(1./3., 3) - cov_mat <- matrix(c( - 1,5,1,0,0,0,0,0, - 2,4,0,1,0,0,0,0, - 3,3,0,0,1,0,0,0, - 4,2,0,0,0,1,0,0, - 5,1,0,0,0,0,1,0, - 1,5,0,0,0,0,0,1 - ), nrow = 6, byrow = TRUE) - X_preprocessed <- preprocessPredictionDataFrame(cov_df, metadata) - expect_equal(X_preprocessed, cov_mat) -}) +# test_that("Preprocessing of all-numeric covariate dataset works", { +# cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) +# cov_mat <- matrix(c( +# 1,2,3,4,5, +# 5,4,3,2,1, +# 6,7,8,9,10 +# ), ncol = 3, byrow = F) +# preprocess_list <- createForestCovariates(cov_df) +# expect_equal(preprocess_list$data, cov_mat) +# expect_equal(preprocess_list$metadata$feature_types, rep(0,3)) +# expect_equal(preprocess_list$metadata$num_numeric_vars, 3) +# expect_equal(preprocess_list$metadata$num_ordered_cat_vars, 0) +# expect_equal(preprocess_list$metadata$num_unordered_cat_vars, 0) +# expect_equal(preprocess_list$metadata$numeric_vars, c("x1","x2","x3")) +# }) +# +# test_that("Preprocessing of all-unordered-categorical covariate dataset works", { +# cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) +# cov_mat <- matrix(c( +# 1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0, +# 0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0, +# 0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0, +# 0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0, +# 0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0 +# ), nrow = 5, byrow = TRUE) +# preprocess_list <- createForestCovariates(cov_df, unordered_cat_vars = c("x1","x2","x3")) +# expect_equal(preprocess_list$data, cov_mat) +# expect_equal(preprocess_list$metadata$feature_types, rep(1,18)) +# expect_equal(preprocess_list$metadata$num_numeric_vars, 0) +# expect_equal(preprocess_list$metadata$num_ordered_cat_vars, 0) +# expect_equal(preprocess_list$metadata$num_unordered_cat_vars, 3) +# expect_equal(preprocess_list$metadata$unordered_cat_vars, c("x1","x2","x3")) +# expect_equal(preprocess_list$metadata$unordered_unique_levels, +# list(x1=c("1","2","3","4","5"), +# x2=c("1","2","3","4","5"), +# x3=c("6","7","8","9","10")) +# ) +# }) +# +# test_that("Preprocessing of all-ordered-categorical covariate dataset works", { +# cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) +# cov_mat <- matrix(c( +# 1,2,3,4,5, +# 5,4,3,2,1, +# 1,2,3,4,5 +# ), ncol = 3, byrow = F) +# preprocess_list <- createForestCovariates(cov_df, ordered_cat_vars = c("x1","x2","x3")) +# expect_equal(preprocess_list$data, cov_mat) +# expect_equal(preprocess_list$metadata$feature_types, rep(1,3)) +# expect_equal(preprocess_list$metadata$num_numeric_vars, 0) +# expect_equal(preprocess_list$metadata$num_ordered_cat_vars, 3) +# expect_equal(preprocess_list$metadata$num_unordered_cat_vars, 0) +# expect_equal(preprocess_list$metadata$ordered_cat_vars, c("x1","x2","x3")) +# expect_equal(preprocess_list$metadata$ordered_unique_levels, +# list(x1=c("1","2","3","4","5"), +# x2=c("1","2","3","4","5"), +# x3=c("6","7","8","9","10")) +# ) +# }) +# +# test_that("Preprocessing of mixed-covariate dataset works", { +# cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) +# cov_mat <- matrix(c( +# 1,5,1,0,0,0,0,0, +# 2,4,0,1,0,0,0,0, +# 3,3,0,0,1,0,0,0, +# 4,2,0,0,0,1,0,0, +# 5,1,0,0,0,0,1,0 +# ), nrow = 5, byrow = TRUE) +# preprocess_list <- createForestCovariates(cov_df, ordered_cat_vars = c("x2"), unordered_cat_vars = "x3") +# expect_equal(preprocess_list$data, cov_mat) +# expect_equal(preprocess_list$metadata$feature_types, c(0, rep(1,7))) +# expect_equal(preprocess_list$metadata$num_numeric_vars, 1) +# expect_equal(preprocess_list$metadata$num_ordered_cat_vars, 1) +# expect_equal(preprocess_list$metadata$num_unordered_cat_vars, 1) +# expect_equal(preprocess_list$metadata$ordered_cat_vars, c("x2")) +# expect_equal(preprocess_list$metadata$unordered_cat_vars, c("x3")) +# expect_equal(preprocess_list$metadata$ordered_unique_levels, list(x2=c("1","2","3","4","5"))) +# expect_equal(preprocess_list$metadata$unordered_unique_levels, list(x3=c("6","7","8","9","10"))) +# }) +# +# test_that("Preprocessing of mixed-covariate matrix works", { +# cov_input <- matrix(c(1:5,5:1,6:10),ncol=3,byrow=F) +# cov_mat <- matrix(c( +# 1,5,1,0,0,0,0,0, +# 2,4,0,1,0,0,0,0, +# 3,3,0,0,1,0,0,0, +# 4,2,0,0,0,1,0,0, +# 5,1,0,0,0,0,1,0 +# ), nrow = 5, byrow = TRUE) +# preprocess_list <- createForestCovariates(cov_input, ordered_cat_vars = 2, unordered_cat_vars = 3) +# expect_equal(preprocess_list$data, cov_mat) +# expect_equal(preprocess_list$metadata$feature_types, c(0, rep(1,7))) +# expect_equal(preprocess_list$metadata$num_numeric_vars, 1) +# expect_equal(preprocess_list$metadata$num_ordered_cat_vars, 1) +# expect_equal(preprocess_list$metadata$num_unordered_cat_vars, 1) +# expect_equal(preprocess_list$metadata$ordered_cat_vars, c("x2")) +# expect_equal(preprocess_list$metadata$unordered_cat_vars, c("x3")) +# expect_equal(preprocess_list$metadata$ordered_unique_levels, list(x2=c("1","2","3","4","5"))) +# expect_equal(preprocess_list$metadata$unordered_unique_levels, list(x3=c("6","7","8","9","10"))) +# +# alt_preprocess_list <- createForestCovariates(cov_input, ordered_cat_vars = "x2", unordered_cat_vars = "x3") +# expect_equal(alt_preprocess_list$data, cov_mat) +# expect_equal(alt_preprocess_list$metadata$feature_types, c(0, rep(1,7))) +# expect_equal(alt_preprocess_list$metadata$num_numeric_vars, 1) +# expect_equal(alt_preprocess_list$metadata$num_ordered_cat_vars, 1) +# expect_equal(alt_preprocess_list$metadata$num_unordered_cat_vars, 1) +# expect_equal(alt_preprocess_list$metadata$ordered_cat_vars, c("x2")) +# expect_equal(alt_preprocess_list$metadata$unordered_cat_vars, c("x3")) +# expect_equal(alt_preprocess_list$metadata$ordered_unique_levels, list(x2=c("1","2","3","4","5"))) +# expect_equal(alt_preprocess_list$metadata$unordered_unique_levels, list(x3=c("6","7","8","9","10"))) +# }) +# +# test_that("Preprocessing of out-of-sample mixed-covariate dataset works", { +# metadata <- list( +# num_numeric_vars = 1, +# num_ordered_cat_vars = 1, +# num_unordered_cat_vars = 1, +# numeric_vars = c("x1"), +# ordered_cat_vars = c("x2"), +# unordered_cat_vars = c("x3"), +# ordered_unique_levels = list(x2=c("1","2","3","4","5")), +# unordered_unique_levels = list(x3=c("6","7","8","9","10")) +# ) +# cov_df <- data.frame(x1 = c(1:5,1), x2 = c(5:1,5), x3 = 6:11) +# cov_mat <- matrix(c( +# 1,5,1,0,0,0,0,0, +# 2,4,0,1,0,0,0,0, +# 3,3,0,0,1,0,0,0, +# 4,2,0,0,0,1,0,0, +# 5,1,0,0,0,0,1,0, +# 1,5,0,0,0,0,0,1 +# ), nrow = 6, byrow = TRUE) +# X_preprocessed <- createForestCovariatesFromMetadata(cov_df, metadata) +# expect_equal(X_preprocessed, cov_mat) +# }) +# +# test_that("Preprocessing of all-numeric covariate dataset works", { +# cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) +# cov_mat <- matrix(c( +# 1,2,3,4,5, +# 5,4,3,2,1, +# 6,7,8,9,10 +# ), ncol = 3, byrow = F) +# preprocess_list <- preprocessTrainDataFrame(cov_df) +# expect_equal(preprocess_list$data, cov_mat) +# expect_equal(preprocess_list$metadata$feature_types, rep(0,3)) +# expect_equal(preprocess_list$metadata$num_numeric_vars, 3) +# expect_equal(preprocess_list$metadata$num_ordered_cat_vars, 0) +# expect_equal(preprocess_list$metadata$num_unordered_cat_vars, 0) +# expect_equal(preprocess_list$metadata$original_var_indices, 1:3) +# expect_equal(preprocess_list$metadata$numeric_vars, c("x1","x2","x3")) +# }) +# +# test_that("Preprocessing of all-unordered-categorical covariate dataset works", { +# cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) +# cov_df$x1 <- factor(cov_df$x1) +# cov_df$x2 <- factor(cov_df$x2) +# cov_df$x3 <- factor(cov_df$x3) +# cov_mat <- matrix(c( +# 1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0, +# 0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0, +# 0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0, +# 0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0, +# 0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0 +# ), nrow = 5, byrow = TRUE) +# preprocess_list <- preprocessTrainDataFrame(cov_df) +# expect_equal(preprocess_list$data, cov_mat) +# expect_equal(preprocess_list$metadata$feature_types, rep(1,18)) +# expect_equal(preprocess_list$metadata$num_numeric_vars, 0) +# expect_equal(preprocess_list$metadata$num_ordered_cat_vars, 0) +# expect_equal(preprocess_list$metadata$num_unordered_cat_vars, 3) +# expect_equal(preprocess_list$metadata$unordered_cat_vars, c("x1","x2","x3")) +# expected_var_indices <- c(rep(1,6),rep(2,6),rep(3,6)) +# expect_equal(preprocess_list$metadata$original_var_indices, expected_var_indices) +# expect_equal(preprocess_list$metadata$unordered_unique_levels, +# list(x1=c("1","2","3","4","5"), +# x2=c("1","2","3","4","5"), +# x3=c("6","7","8","9","10")) +# ) +# }) +# +# test_that("Preprocessing of all-ordered-categorical covariate dataset works", { +# cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) +# cov_df$x1 <- factor(cov_df$x1, ordered = TRUE) +# cov_df$x2 <- factor(cov_df$x2, ordered = TRUE) +# cov_df$x3 <- factor(cov_df$x3, ordered = TRUE) +# cov_mat <- matrix(c( +# 1,2,3,4,5, +# 5,4,3,2,1, +# 1,2,3,4,5 +# ), ncol = 3, byrow = F) +# preprocess_list <- preprocessTrainDataFrame(cov_df) +# expect_equal(preprocess_list$data, cov_mat) +# expect_equal(preprocess_list$metadata$feature_types, rep(1,3)) +# expect_equal(preprocess_list$metadata$num_numeric_vars, 0) +# expect_equal(preprocess_list$metadata$num_ordered_cat_vars, 3) +# expect_equal(preprocess_list$metadata$num_unordered_cat_vars, 0) +# expect_equal(preprocess_list$metadata$ordered_cat_vars, c("x1","x2","x3")) +# expect_equal(preprocess_list$metadata$original_var_indices, 1:3) +# expect_equal(preprocess_list$metadata$ordered_unique_levels, +# list(x1=c("1","2","3","4","5"), +# x2=c("1","2","3","4","5"), +# x3=c("6","7","8","9","10")) +# ) +# }) +# +# test_that("Preprocessing of mixed-covariate dataset works", { +# cov_df <- data.frame(x1 = 1:5, x2 = 5:1, x3 = 6:10) +# cov_df$x2 <- factor(cov_df$x2, ordered = TRUE) +# cov_df$x3 <- factor(cov_df$x3) +# cov_mat <- matrix(c( +# 1,5,1,0,0,0,0,0, +# 2,4,0,1,0,0,0,0, +# 3,3,0,0,1,0,0,0, +# 4,2,0,0,0,1,0,0, +# 5,1,0,0,0,0,1,0 +# ), nrow = 5, byrow = TRUE) +# preprocess_list <- preprocessTrainDataFrame(cov_df) +# expect_equal(preprocess_list$data, cov_mat) +# expect_equal(preprocess_list$metadata$feature_types, c(0, rep(1,7))) +# expect_equal(preprocess_list$metadata$num_numeric_vars, 1) +# expect_equal(preprocess_list$metadata$num_ordered_cat_vars, 1) +# expect_equal(preprocess_list$metadata$num_unordered_cat_vars, 1) +# expect_equal(preprocess_list$metadata$ordered_cat_vars, c("x2")) +# expect_equal(preprocess_list$metadata$unordered_cat_vars, c("x3")) +# expected_var_indices <- c(1,2,rep(3,6)) +# expect_equal(preprocess_list$metadata$original_var_indices, expected_var_indices) +# expect_equal(preprocess_list$metadata$ordered_unique_levels, list(x2=c("1","2","3","4","5"))) +# expect_equal(preprocess_list$metadata$unordered_unique_levels, list(x3=c("6","7","8","9","10"))) +# }) +# +# test_that("Preprocessing of out-of-sample mixed-covariate dataset works", { +# metadata <- list( +# num_numeric_vars = 1, +# num_ordered_cat_vars = 1, +# num_unordered_cat_vars = 1, +# original_var_indices = c(1, 2, 3, 3, 3, 3, 3, 3), +# numeric_vars = c("x1"), +# ordered_cat_vars = c("x2"), +# unordered_cat_vars = c("x3"), +# ordered_unique_levels = list(x2=c("1","2","3","4","5")), +# unordered_unique_levels = list(x3=c("6","7","8","9","10")) +# ) +# cov_df <- data.frame(x1 = c(1:5,1), x2 = c(5:1,5), x3 = 6:11) +# var_weights <- rep(1./3., 3) +# cov_mat <- matrix(c( +# 1,5,1,0,0,0,0,0, +# 2,4,0,1,0,0,0,0, +# 3,3,0,0,1,0,0,0, +# 4,2,0,0,0,1,0,0, +# 5,1,0,0,0,0,1,0, +# 1,5,0,0,0,0,0,1 +# ), nrow = 6, byrow = TRUE) +# X_preprocessed <- preprocessPredictionDataFrame(cov_df, metadata) +# expect_equal(X_preprocessed, cov_mat) +# }) From 0ff1c7fb22cd6cdea18c2867864d7423c3734e17 Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Wed, 29 Jan 2025 16:18:27 -0600 Subject: [PATCH 13/24] Removed convertBARTStateToJson function --- R/bart.R | 62 ----------------------------------- _pkgdown.yml | 1 - man/convertBARTStateToJson.Rd | 36 -------------------- 3 files changed, 99 deletions(-) delete mode 100644 man/convertBARTStateToJson.Rd diff --git a/R/bart.R b/R/bart.R index 9e579539..11a5b0f3 100644 --- a/R/bart.R +++ b/R/bart.R @@ -1225,68 +1225,6 @@ convertBARTModelToJson <- function(object){ return(jsonobj) } -#' Convert in-memory BART model objects (forests, random effects, vectors) to in-memory JSON. -#' This function is primarily a convenience function for serialization / deserialization in a parallel BART sampler. -#' -#' @param param_list List containing high-level model state parameters -#' @param mean_forest Container of conditional mean forest samples (optional). Default: `NULL`. -#' @param variance_forest Container of conditional variance forest samples (optional). Default: `NULL`. -#' @param rfx_samples Container of random effect samples (optional). Default: `NULL`. -#' @param global_variance_samples Vector of global error variance samples (optional). Default: `NULL`. -#' @param local_variance_samples Vector of leaf scale samples (optional). Default: `NULL`. -#' -#' @return Object of type `CppJson` -convertBARTStateToJson <- function(param_list, mean_forest = NULL, variance_forest = NULL, - rfx_samples = NULL, global_variance_samples = NULL, - local_variance_samples = NULL) { - # Initialize JSON object - jsonobj <- createCppJson() - - # Add global parameters - jsonobj$add_scalar("outcome_scale", param_list$outcome_scale) - jsonobj$add_scalar("outcome_mean", param_list$outcome_mean) - jsonobj$add_boolean("standardize", param_list$standardize) - jsonobj$add_scalar("sigma2_init", param_list$sigma2_init) - jsonobj$add_boolean("sample_sigma_global", param_list$sample_sigma_global) - jsonobj$add_boolean("sample_sigma_leaf", param_list$sample_sigma_leaf) - jsonobj$add_boolean("include_mean_forest", param_list$include_mean_forest) - jsonobj$add_boolean("include_variance_forest", param_list$include_variance_forest) - jsonobj$add_boolean("has_rfx", param_list$has_rfx) - jsonobj$add_boolean("has_rfx_basis", param_list$has_rfx_basis) - jsonobj$add_scalar("num_rfx_basis", param_list$num_rfx_basis) - jsonobj$add_scalar("num_gfr", param_list$num_gfr) - jsonobj$add_scalar("num_burnin", param_list$num_burnin) - jsonobj$add_scalar("num_mcmc", param_list$num_mcmc) - jsonobj$add_scalar("num_covariates", param_list$num_covariates) - jsonobj$add_scalar("num_basis", param_list$num_basis) - jsonobj$add_scalar("keep_every", param_list$keep_every) - jsonobj$add_boolean("requires_basis", param_list$requires_basis) - - # Add the forests - if (param_list$include_mean_forest) { - jsonobj$add_forest(mean_forest) - } - if (param_list$include_variance_forest) { - jsonobj$add_forest(variance_forest) - } - - # Add sampled parameters - if (param_list$sample_sigma_global) { - jsonobj$add_vector("sigma2_global_samples", global_variance_samples, "parameters") - } - if (param_list$sample_sigma_leaf) { - jsonobj$add_vector("sigma2_leaf_samples", local_variance_samples, "parameters") - } - - # Add random effects - if (param_list$has_rfx) { - jsonobj$add_random_effects(rfx_samples) - jsonobj$add_string_vector("rfx_unique_group_ids", param_list$rfx_unique_group_ids) - } - - return(jsonobj) -} - #' Convert the persistent aspects of a BART model to (in-memory) JSON and save to a file #' #' @param object Object of type `bartmodel` containing draws of a BART model and associated sampling outputs. diff --git a/_pkgdown.yml b/_pkgdown.yml index 33b212fa..cb3665db 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -34,7 +34,6 @@ reference: - loadVectorJson - loadScalarJson - convertBARTModelToJson - - convertBARTStateToJson - createBARTModelFromCombinedJson - createBARTModelFromCombinedJsonString - createBARTModelFromJson diff --git a/man/convertBARTStateToJson.Rd b/man/convertBARTStateToJson.Rd deleted file mode 100644 index 004d8014..00000000 --- a/man/convertBARTStateToJson.Rd +++ /dev/null @@ -1,36 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/bart.R -\name{convertBARTStateToJson} -\alias{convertBARTStateToJson} -\title{Convert in-memory BART model objects (forests, random effects, vectors) to in-memory JSON. -This function is primarily a convenience function for serialization / deserialization in a parallel BART sampler.} -\usage{ -convertBARTStateToJson( - param_list, - mean_forest = NULL, - variance_forest = NULL, - rfx_samples = NULL, - global_variance_samples = NULL, - local_variance_samples = NULL -) -} -\arguments{ -\item{param_list}{List containing high-level model state parameters} - -\item{mean_forest}{Container of conditional mean forest samples (optional). Default: \code{NULL}.} - -\item{variance_forest}{Container of conditional variance forest samples (optional). Default: \code{NULL}.} - -\item{rfx_samples}{Container of random effect samples (optional). Default: \code{NULL}.} - -\item{global_variance_samples}{Vector of global error variance samples (optional). Default: \code{NULL}.} - -\item{local_variance_samples}{Vector of leaf scale samples (optional). Default: \code{NULL}.} -} -\value{ -Object of type \code{CppJson} -} -\description{ -Convert in-memory BART model objects (forests, random effects, vectors) to in-memory JSON. -This function is primarily a convenience function for serialization / deserialization in a parallel BART sampler. -} From 4ce1510a597e12523cdc0c9a873e093fec55a935 Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Wed, 29 Jan 2025 16:30:55 -0600 Subject: [PATCH 14/24] Switched from convert to save for JSON model --- NAMESPACE | 4 ++-- R/bart.R | 12 ++++++------ R/bcf.R | 12 ++++++------ _pkgdown.yml | 12 ++++++------ man/createBARTModelFromCombinedJson.Rd | 2 +- man/createBARTModelFromJson.Rd | 2 +- man/createBCFModelFromJson.Rd | 4 ++-- ...vertBARTModelToJson.Rd => saveBARTModelToJson.Rd} | 8 ++++---- ...onvertBCFModelToJson.Rd => saveBCFModelToJson.Rd} | 8 ++++---- 9 files changed, 32 insertions(+), 32 deletions(-) rename man/{convertBARTModelToJson.Rd => saveBARTModelToJson.Rd} (88%) rename man/{convertBCFModelToJson.Rd => saveBCFModelToJson.Rd} (95%) diff --git a/NAMESPACE b/NAMESPACE index 1a2f145e..cdade319 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,8 +10,6 @@ export(calibrateInverseGammaErrorVariance) export(computeForestLeafIndices) export(computeForestLeafVariances) export(computeMaxLeafIndex) -export(convertBARTModelToJson) -export(convertBCFModelToJson) export(convertPreprocessorToJson) export(createBARTModelFromCombinedJson) export(createBARTModelFromCombinedJsonString) @@ -57,8 +55,10 @@ export(rootResetRandomEffectsModel) export(rootResetRandomEffectsTracker) export(sample_sigma2_one_iteration) export(sample_tau_one_iteration) +export(saveBARTModelToJson) export(saveBARTModelToJsonFile) export(saveBARTModelToJsonString) +export(saveBCFModelToJson) export(saveBCFModelToJsonFile) export(saveBCFModelToJsonString) export(savePreprocessorToJsonString) diff --git a/R/bart.R b/R/bart.R index 11a5b0f3..fb30cb17 100644 --- a/R/bart.R +++ b/R/bart.R @@ -1150,8 +1150,8 @@ getRandomEffectSamples.bartmodel <- function(object, ...){ #' y_test <- y[test_inds] #' y_train <- y[train_inds] #' bart_model <- bart(X_train = X_train, y_train = y_train) -#' bart_json <- convertBARTModelToJson(bart_model) -convertBARTModelToJson <- function(object){ +#' bart_json <- saveBARTModelToJson(bart_model) +saveBARTModelToJson <- function(object){ jsonobj <- createCppJson() if (is.null(object$model_params)) { @@ -1260,7 +1260,7 @@ convertBARTModelToJson <- function(object){ #' unlink(tmpjson) saveBARTModelToJsonFile <- function(object, filename){ # Convert to Json - jsonobj <- convertBARTModelToJson(object) + jsonobj <- saveBARTModelToJson(object) # Save to file jsonobj$save_file(filename) @@ -1297,7 +1297,7 @@ saveBARTModelToJsonFile <- function(object, filename){ #' bart_json_string <- saveBARTModelToJsonString(bart_model) saveBARTModelToJsonString <- function(object){ # Convert to Json - jsonobj <- convertBARTModelToJson(object) + jsonobj <- saveBARTModelToJson(object) # Dump to string return(jsonobj$return_json_string()) @@ -1333,7 +1333,7 @@ saveBARTModelToJsonString <- function(object){ #' y_test <- y[test_inds] #' y_train <- y[train_inds] #' bart_model <- bart(X_train = X_train, y_train = y_train) -#' bart_json <- convertBARTModelToJson(bart_model) +#' bart_json <- saveBARTModelToJson(bart_model) #' bart_model_roundtrip <- createBARTModelFromJson(bart_json) createBARTModelFromJson <- function(json_object){ # Initialize the BCF model @@ -1536,7 +1536,7 @@ createBARTModelFromJsonString <- function(json_string){ #' y_test <- y[test_inds] #' y_train <- y[train_inds] #' bart_model <- bart(X_train = X_train, y_train = y_train) -#' bart_json <- list(convertBARTModelToJson(bart_model)) +#' bart_json <- list(saveBARTModelToJson(bart_model)) #' bart_model_roundtrip <- createBARTModelFromCombinedJson(bart_json) createBARTModelFromCombinedJson <- function(json_object_list){ # Initialize the BCF model diff --git a/R/bcf.R b/R/bcf.R index cd74f950..bbfb42c0 100644 --- a/R/bcf.R +++ b/R/bcf.R @@ -1629,8 +1629,8 @@ getRandomEffectSamples.bcf <- function(object, ...){ #' num_gfr = 100, num_burnin = 0, num_mcmc = 100, #' mu_forest_params = mu_params, #' tau_forest_params = tau_params) -#' # bcf_json <- convertBCFModelToJson(bcf_model) -convertBCFModelToJson <- function(object){ +#' # bcf_json <- saveBCFModelToJson(bcf_model) +saveBCFModelToJson <- function(object){ jsonobj <- createCppJson() if (is.null(object$model_params)) { @@ -1791,7 +1791,7 @@ convertBCFModelToJson <- function(object){ #' # saveBCFModelToJsonFile(bcf_model, "test.json") saveBCFModelToJsonFile <- function(object, filename){ # Convert to Json - jsonobj <- convertBCFModelToJson(object) + jsonobj <- saveBCFModelToJson(object) # Save to file jsonobj$save_file(filename) @@ -1867,7 +1867,7 @@ saveBCFModelToJsonFile <- function(object, filename){ #' # saveBCFModelToJsonString(bcf_model) saveBCFModelToJsonString <- function(object){ # Convert to Json - jsonobj <- convertBCFModelToJson(object) + jsonobj <- saveBCFModelToJson(object) # Dump to string return(jsonobj$return_json_string()) @@ -1942,8 +1942,8 @@ saveBCFModelToJsonString <- function(object){ #' num_gfr = 100, num_burnin = 0, num_mcmc = 100, #' mu_forest_params = mu_params, #' tau_forest_params = tau_params) -#' # bcf_json <- convertBCFModelToJson(bcf_model) -#' # bcf_model_roundtrip <- createBCFModelFromJson(bcf_json) +#' bcf_json <- saveBCFModelToJson(bcf_model) +#' bcf_model_roundtrip <- createBCFModelFromJson(bcf_json) createBCFModelFromJson <- function(json_object){ # Initialize the BCF model output <- list() diff --git a/_pkgdown.yml b/_pkgdown.yml index cb3665db..ed2d8165 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -33,7 +33,12 @@ reference: - loadRandomEffectSamplesJson - loadVectorJson - loadScalarJson - - convertBARTModelToJson + - saveBARTModelToJson + - saveBARTModelToJsonFile + - saveBARTModelToJsonString + - saveBCFModelToJson + - saveBCFModelToJsonFile + - saveBCFModelToJsonString - createBARTModelFromCombinedJson - createBARTModelFromCombinedJsonString - createBARTModelFromJson @@ -41,13 +46,8 @@ reference: - createBARTModelFromJsonString - loadRandomEffectSamplesCombinedJson - loadRandomEffectSamplesCombinedJsonString - - saveBARTModelToJsonFile - - saveBARTModelToJsonString - - saveBCFModelToJsonFile - - saveBCFModelToJsonString - createBCFModelFromJsonFile - createBCFModelFromJsonString - - convertBCFModelToJson - createBCFModelFromJson - createBCFModelFromCombinedJsonString diff --git a/man/createBARTModelFromCombinedJson.Rd b/man/createBARTModelFromCombinedJson.Rd index f85e9406..b39421f3 100644 --- a/man/createBARTModelFromCombinedJson.Rd +++ b/man/createBARTModelFromCombinedJson.Rd @@ -39,6 +39,6 @@ X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train) -bart_json <- list(convertBARTModelToJson(bart_model)) +bart_json <- list(saveBARTModelToJson(bart_model)) bart_model_roundtrip <- createBARTModelFromCombinedJson(bart_json) } diff --git a/man/createBARTModelFromJson.Rd b/man/createBARTModelFromJson.Rd index 5dcee7eb..bfd62f91 100644 --- a/man/createBARTModelFromJson.Rd +++ b/man/createBARTModelFromJson.Rd @@ -39,6 +39,6 @@ X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train) -bart_json <- convertBARTModelToJson(bart_model) +bart_json <- saveBARTModelToJson(bart_model) bart_model_roundtrip <- createBARTModelFromJson(bart_json) } diff --git a/man/createBCFModelFromJson.Rd b/man/createBCFModelFromJson.Rd index 49af6eca..97141d6f 100644 --- a/man/createBCFModelFromJson.Rd +++ b/man/createBCFModelFromJson.Rd @@ -78,6 +78,6 @@ bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, num_gfr = 100, num_burnin = 0, num_mcmc = 100, mu_forest_params = mu_params, tau_forest_params = tau_params) -# bcf_json <- convertBCFModelToJson(bcf_model) -# bcf_model_roundtrip <- createBCFModelFromJson(bcf_json) +bcf_json <- saveBCFModelToJson(bcf_model) +bcf_model_roundtrip <- createBCFModelFromJson(bcf_json) } diff --git a/man/convertBARTModelToJson.Rd b/man/saveBARTModelToJson.Rd similarity index 88% rename from man/convertBARTModelToJson.Rd rename to man/saveBARTModelToJson.Rd index 50382a06..d06323d3 100644 --- a/man/convertBARTModelToJson.Rd +++ b/man/saveBARTModelToJson.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/bart.R -\name{convertBARTModelToJson} -\alias{convertBARTModelToJson} +\name{saveBARTModelToJson} +\alias{saveBARTModelToJson} \title{Convert the persistent aspects of a BART model to (in-memory) JSON} \usage{ -convertBARTModelToJson(object) +saveBARTModelToJson(object) } \arguments{ \item{object}{Object of type \code{bartmodel} containing draws of a BART model and associated sampling outputs.} @@ -37,5 +37,5 @@ X_train <- X[train_inds,] y_test <- y[test_inds] y_train <- y[train_inds] bart_model <- bart(X_train = X_train, y_train = y_train) -bart_json <- convertBARTModelToJson(bart_model) +bart_json <- saveBARTModelToJson(bart_model) } diff --git a/man/convertBCFModelToJson.Rd b/man/saveBCFModelToJson.Rd similarity index 95% rename from man/convertBCFModelToJson.Rd rename to man/saveBCFModelToJson.Rd index 44880463..c7383044 100644 --- a/man/convertBCFModelToJson.Rd +++ b/man/saveBCFModelToJson.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/bcf.R -\name{convertBCFModelToJson} -\alias{convertBCFModelToJson} +\name{saveBCFModelToJson} +\alias{saveBCFModelToJson} \title{Convert the persistent aspects of a BCF model to (in-memory) JSON} \usage{ -convertBCFModelToJson(object) +saveBCFModelToJson(object) } \arguments{ \item{object}{Object of type \code{bcf} containing draws of a Bayesian causal forest model and associated sampling outputs.} @@ -76,5 +76,5 @@ bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, num_gfr = 100, num_burnin = 0, num_mcmc = 100, mu_forest_params = mu_params, tau_forest_params = tau_params) -# bcf_json <- convertBCFModelToJson(bcf_model) +# bcf_json <- saveBCFModelToJson(bcf_model) } From 5096080976a243a31a7c0e5eec4f8accc3171f5d Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Wed, 29 Jan 2025 17:55:20 -0600 Subject: [PATCH 15/24] Adding createBCFModelFromCombinedJson function --- NAMESPACE | 1 + R/bcf.R | 217 +++++++++++++++++++- _pkgdown.yml | 17 +- man/createBCFModelFromCombinedJson.Rd | 80 ++++++++ man/createBCFModelFromCombinedJsonString.Rd | 8 +- 5 files changed, 307 insertions(+), 16 deletions(-) create mode 100644 man/createBCFModelFromCombinedJson.Rd diff --git a/NAMESPACE b/NAMESPACE index cdade319..4a0ffaff 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,6 +16,7 @@ export(createBARTModelFromCombinedJsonString) export(createBARTModelFromJson) export(createBARTModelFromJsonFile) export(createBARTModelFromJsonString) +export(createBCFModelFromCombinedJson) export(createBCFModelFromCombinedJsonString) export(createBCFModelFromJson) export(createBCFModelFromJsonFile) diff --git a/R/bcf.R b/R/bcf.R index bbfb42c0..9de48daf 100644 --- a/R/bcf.R +++ b/R/bcf.R @@ -2194,12 +2194,222 @@ createBCFModelFromJsonString <- function(json_string){ return(bcf_object) } -#' Convert a list of (in-memory) JSON strings that represent BART models to a single combined BART model object +#' Convert a list of (in-memory) JSON strings that represent BCF models to a single combined BCF model object #' which can be used for prediction, etc... #' -#' @param json_string_list List of JSON strings which can be parsed to objects of type `CppJson` containing Json representation of a BART model +#' @param json_object_list List of objects of type `CppJson` containing Json representation of a BCF model #' -#' @return Object of type `bartmodel` +#' @return Object of type `bcf` +#' @export +#' +#' @examples +#' n <- 100 +#' p <- 5 +#' x1 <- rnorm(n) +#' x2 <- rnorm(n) +#' x3 <- rnorm(n) +#' x4 <- rnorm(n) +#' x5 <- rnorm(n) +#' X <- cbind(x1,x2,x3,x4,x5) +#' p <- ncol(X) +#' g <- function(x) {ifelse(x[,5] < -0.44,2,ifelse(x[,5] < 0.44,-1,4))} +#' mu1 <- function(x) {1+g(x)+x[,1]*x[,3]} +#' mu2 <- function(x) {1+g(x)+6*abs(x[,3]-1)} +#' tau1 <- function(x) {rep(3,nrow(x))} +#' tau2 <- function(x) {1+2*x[,2]*(x[,4] > 0)} +#' mu_x <- mu1(X) +#' tau_x <- tau2(X) +#' pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 +#' Z <- rbinom(n,1,pi_x) +#' E_XZ <- mu_x + Z*tau_x +#' snr <- 3 +#' rfx_group_ids <- rep(c(1,2), n %/% 2) +#' rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) +#' rfx_basis <- cbind(1, runif(n, -1, 1)) +#' rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) +#' y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) +#' X <- as.data.frame(X) +#' X$x4 <- factor(X$x4, ordered = TRUE) +#' X$x5 <- factor(X$x5, ordered = TRUE) +#' test_set_pct <- 0.2 +#' n_test <- round(test_set_pct*n) +#' n_train <- n - n_test +#' test_inds <- sort(sample(1:n, n_test, replace = FALSE)) +#' train_inds <- (1:n)[!((1:n) %in% test_inds)] +#' X_test <- X[test_inds,] +#' X_train <- X[train_inds,] +#' pi_test <- pi_x[test_inds] +#' pi_train <- pi_x[train_inds] +#' Z_test <- Z[test_inds] +#' Z_train <- Z[train_inds] +#' y_test <- y[test_inds] +#' y_train <- y[train_inds] +#' mu_test <- mu_x[test_inds] +#' mu_train <- mu_x[train_inds] +#' tau_test <- tau_x[test_inds] +#' tau_train <- tau_x[train_inds] +#' rfx_group_ids_test <- rfx_group_ids[test_inds] +#' rfx_group_ids_train <- rfx_group_ids[train_inds] +#' rfx_basis_test <- rfx_basis[test_inds,] +#' rfx_basis_train <- rfx_basis[train_inds,] +#' rfx_term_test <- rfx_term[test_inds] +#' rfx_term_train <- rfx_term[train_inds] +#' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, +#' propensity_train = pi_train, rfx_group_ids_train = rfx_group_ids_train, +#' rfx_basis_train = rfx_basis_train, X_test = X_test, +#' Z_test = Z_test, propensity_test = pi_test, rfx_group_ids_test = rfx_group_ids_test, +#' rfx_basis_test = rfx_basis_test, +#' num_gfr = 100, num_burnin = 0, num_mcmc = 100) +#' # bcf_json_list <- list(saveBCFModelToJson(bcf_model)) +#' # bcf_model_roundtrip <- createBCFModelFromCombinedJson(bcf_json_list) +createBCFModelFromCombinedJson <- function(json_string_list){ + # Initialize the BCF model + output <- list() + + # For scalar / preprocessing details which aren't sample-dependent, + # defer to the first json + json_object_default <- json_object_list[[1]] + + # Unpack the forests + output[["forests_mu"]] <- loadForestContainerCombinedJson(json_object_list, "forest_0") + output[["forests_tau"]] <- loadForestContainerCombinedJson(json_object_list, "forest_1") + include_variance_forest <- json_object_default$get_boolean("include_variance_forest") + if (include_variance_forest) { + output[["forests_variance"]] <- loadForestContainerCombinedJson(json_object_list, "forest_2") + } + + # Unpack metadata + train_set_metadata = list() + train_set_metadata[["num_numeric_vars"]] <- json_object_default$get_scalar("num_numeric_vars") + train_set_metadata[["num_ordered_cat_vars"]] <- json_object_default$get_scalar("num_ordered_cat_vars") + train_set_metadata[["num_unordered_cat_vars"]] <- json_object_default$get_scalar("num_unordered_cat_vars") + if (train_set_metadata[["num_numeric_vars"]] > 0) { + train_set_metadata[["numeric_vars"]] <- json_object_default$get_string_vector("numeric_vars") + } + if (train_set_metadata[["num_ordered_cat_vars"]] > 0) { + train_set_metadata[["ordered_cat_vars"]] <- json_object_default$get_string_vector("ordered_cat_vars") + train_set_metadata[["ordered_unique_levels"]] <- json_object_default$get_string_list("ordered_unique_levels", train_set_metadata[["ordered_cat_vars"]]) + } + if (train_set_metadata[["num_unordered_cat_vars"]] > 0) { + train_set_metadata[["unordered_cat_vars"]] <- json_object_default$get_string_vector("unordered_cat_vars") + train_set_metadata[["unordered_unique_levels"]] <- json_object_default$get_string_list("unordered_unique_levels", train_set_metadata[["unordered_cat_vars"]]) + } + output[["train_set_metadata"]] <- train_set_metadata + + # Unpack model params + model_params = list() + model_params[["outcome_scale"]] <- json_object_default$get_scalar("outcome_scale") + model_params[["outcome_mean"]] <- json_object_default$get_scalar("outcome_mean") + model_params[["standardize"]] <- json_object_default$get_boolean("standardize") + model_params[["initial_sigma2"]] <- json_object_default$get_scalar("initial_sigma2") + model_params[["sample_sigma_global"]] <- json_object_default$get_boolean("sample_sigma_global") + model_params[["sample_sigma_leaf_mu"]] <- json_object_default$get_boolean("sample_sigma_leaf_mu") + model_params[["sample_sigma_leaf_tau"]] <- json_object_default$get_boolean("sample_sigma_leaf_tau") + model_params[["include_variance_forest"]] <- include_variance_forest + model_params[["propensity_covariate"]] <- json_object_default$get_string("propensity_covariate") + model_params[["has_rfx"]] <- json_object_default$get_boolean("has_rfx") + model_params[["has_rfx_basis"]] <- json_object_default$get_boolean("has_rfx_basis") + model_params[["num_rfx_basis"]] <- json_object_default$get_scalar("num_rfx_basis") + model_params[["num_covariates"]] <- json_object_default$get_scalar("num_covariates") + model_params[["num_chains"]] <- json_object_default$get_scalar("num_chains") + model_params[["keep_every"]] <- json_object_default$get_scalar("keep_every") + model_params[["adaptive_coding"]] <- json_object_default$get_boolean("adaptive_coding") + model_params[["internal_propensity_model"]] <- json_object_default$get_boolean("internal_propensity_model") + + # Combine values that are sample-specific + for (i in 1:length(json_object_list)) { + json_object <- json_object_list[[i]] + if (i == 1) { + model_params[["num_gfr"]] <- json_object$get_scalar("num_gfr") + model_params[["num_burnin"]] <- json_object$get_scalar("num_burnin") + model_params[["num_mcmc"]] <- json_object$get_scalar("num_mcmc") + model_params[["num_samples"]] <- json_object$get_scalar("num_samples") + } else { + prev_json <- json_object_list[[i-1]] + model_params[["num_gfr"]] <- model_params[["num_gfr"]] + json_object$get_scalar("num_gfr") + model_params[["num_burnin"]] <- model_params[["num_burnin"]] + json_object$get_scalar("num_burnin") + model_params[["num_mcmc"]] <- model_params[["num_mcmc"]] + json_object$get_scalar("num_mcmc") + model_params[["num_samples"]] <- model_params[["num_samples"]] + json_object$get_scalar("num_samples") + } + } + output[["model_params"]] <- model_params + + # Unpack sampled parameters + if (model_params[["sample_sigma_global"]]) { + for (i in 1:length(json_object_list)) { + json_object <- json_object_list[[i]] + if (i == 1) { + output[["sigma2_samples"]] <- json_object$get_vector("sigma2_samples", "parameters") + } else { + output[["sigma2_samples"]] <- c(output[["sigma2_samples"]], json_object$get_vector("sigma2_samples", "parameters")) + } + } + } + if (model_params[["sample_sigma_leaf_mu"]]) { + for (i in 1:length(json_object_list)) { + json_object <- json_object_list[[i]] + if (i == 1) { + output[["sigma_leaf_mu_samples"]] <- json_object$get_vector("sigma_leaf_mu_samples", "parameters") + } else { + output[["sigma_leaf_mu_samples"]] <- c(output[["sigma_leaf_mu_samples"]], json_object$get_vector("sigma_leaf_mu_samples", "parameters")) + } + } + } + if (model_params[["sample_sigma_leaf_tau"]]) { + for (i in 1:length(json_object_list)) { + json_object <- json_object_list[[i]] + if (i == 1) { + output[["sigma_leaf_tau_samples"]] <- json_object$get_vector("sigma_leaf_tau_samples", "parameters") + } else { + output[["sigma_leaf_tau_samples"]] <- c(output[["sigma_leaf_tau_samples"]], json_object$get_vector("sigma_leaf_tau_samples", "parameters")) + } + } + } + if (model_params[["sample_sigma_leaf_tau"]]) { + for (i in 1:length(json_object_list)) { + json_object <- json_object_list[[i]] + if (i == 1) { + output[["sigma_leaf_tau_samples"]] <- json_object$get_vector("sigma_leaf_tau_samples", "parameters") + } else { + output[["sigma_leaf_tau_samples"]] <- c(output[["sigma_leaf_tau_samples"]], json_object$get_vector("sigma_leaf_tau_samples", "parameters")) + } + } + } + if (model_params[["adaptive_coding"]]) { + for (i in 1:length(json_object_list)) { + json_object <- json_object_list[[i]] + if (i == 1) { + output[["b_1_samples"]] <- json_object$get_vector("b_1_samples", "parameters") + output[["b_0_samples"]] <- json_object$get_vector("b_0_samples", "parameters") + } else { + output[["b_1_samples"]] <- c(output[["b_1_samples"]], json_object$get_vector("b_1_samples", "parameters")) + output[["b_0_samples"]] <- c(output[["b_0_samples"]], json_object$get_vector("b_0_samples", "parameters")) + } + } + } + + # Unpack random effects + if (model_params[["has_rfx"]]) { + output[["rfx_unique_group_ids"]] <- json_object_default$get_string_vector("rfx_unique_group_ids") + output[["rfx_samples"]] <- loadRandomEffectSamplesCombinedJson(json_object_list, 0) + } + + # Unpack covariate preprocessor + preprocessor_metadata_string <- json_object_default$get_string("preprocessor_metadata") + output[["train_set_metadata"]] <- createPreprocessorFromJsonString( + preprocessor_metadata_string + ) + + class(output) <- "bcf" + return(output) +} + +#' Convert a list of (in-memory) JSON strings that represent BCF models to a single combined BCF model object +#' which can be used for prediction, etc... +#' +#' @param json_string_list List of JSON strings which can be parsed to objects of type `CppJson` containing Json representation of a BCF model +#' +#' @return Object of type `bcf` #' @export #' #' @examples @@ -2416,4 +2626,3 @@ createBCFModelFromCombinedJsonString <- function(json_string_list){ class(output) <- "bcf" return(output) } - diff --git a/_pkgdown.yml b/_pkgdown.yml index ed2d8165..65116269 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -30,25 +30,26 @@ reference: - loadForestContainerJson - loadForestContainerCombinedJson - loadForestContainerCombinedJsonString - - loadRandomEffectSamplesJson - loadVectorJson - loadScalarJson + - loadRandomEffectSamplesJson + - loadRandomEffectSamplesCombinedJson + - loadRandomEffectSamplesCombinedJsonString - saveBARTModelToJson - saveBARTModelToJsonFile - saveBARTModelToJsonString - - saveBCFModelToJson - - saveBCFModelToJsonFile - - saveBCFModelToJsonString - - createBARTModelFromCombinedJson - - createBARTModelFromCombinedJsonString - createBARTModelFromJson - createBARTModelFromJsonFile - createBARTModelFromJsonString - - loadRandomEffectSamplesCombinedJson - - loadRandomEffectSamplesCombinedJsonString + - createBARTModelFromCombinedJson + - createBARTModelFromCombinedJsonString + - saveBCFModelToJson + - saveBCFModelToJsonFile + - saveBCFModelToJsonString - createBCFModelFromJsonFile - createBCFModelFromJsonString - createBCFModelFromJson + - createBCFModelFromCombinedJson - createBCFModelFromCombinedJsonString - subtitle: Data diff --git a/man/createBCFModelFromCombinedJson.Rd b/man/createBCFModelFromCombinedJson.Rd new file mode 100644 index 00000000..378dfaef --- /dev/null +++ b/man/createBCFModelFromCombinedJson.Rd @@ -0,0 +1,80 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bcf.R +\name{createBCFModelFromCombinedJson} +\alias{createBCFModelFromCombinedJson} +\title{Convert a list of (in-memory) JSON strings that represent BCF models to a single combined BCF model object +which can be used for prediction, etc...} +\usage{ +createBCFModelFromCombinedJson(json_string_list) +} +\arguments{ +\item{json_object_list}{List of objects of type \code{CppJson} containing Json representation of a BCF model} +} +\value{ +Object of type \code{bcf} +} +\description{ +Convert a list of (in-memory) JSON strings that represent BCF models to a single combined BCF model object +which can be used for prediction, etc... +} +\examples{ +n <- 100 +p <- 5 +x1 <- rnorm(n) +x2 <- rnorm(n) +x3 <- rnorm(n) +x4 <- rnorm(n) +x5 <- rnorm(n) +X <- cbind(x1,x2,x3,x4,x5) +p <- ncol(X) +g <- function(x) {ifelse(x[,5] < -0.44,2,ifelse(x[,5] < 0.44,-1,4))} +mu1 <- function(x) {1+g(x)+x[,1]*x[,3]} +mu2 <- function(x) {1+g(x)+6*abs(x[,3]-1)} +tau1 <- function(x) {rep(3,nrow(x))} +tau2 <- function(x) {1+2*x[,2]*(x[,4] > 0)} +mu_x <- mu1(X) +tau_x <- tau2(X) +pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10 +Z <- rbinom(n,1,pi_x) +E_XZ <- mu_x + Z*tau_x +snr <- 3 +rfx_group_ids <- rep(c(1,2), n \%/\% 2) +rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE) +rfx_basis <- cbind(1, runif(n, -1, 1)) +rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis) +y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr) +X <- as.data.frame(X) +X$x4 <- factor(X$x4, ordered = TRUE) +X$x5 <- factor(X$x5, ordered = TRUE) +test_set_pct <- 0.2 +n_test <- round(test_set_pct*n) +n_train <- n - n_test +test_inds <- sort(sample(1:n, n_test, replace = FALSE)) +train_inds <- (1:n)[!((1:n) \%in\% test_inds)] +X_test <- X[test_inds,] +X_train <- X[train_inds,] +pi_test <- pi_x[test_inds] +pi_train <- pi_x[train_inds] +Z_test <- Z[test_inds] +Z_train <- Z[train_inds] +y_test <- y[test_inds] +y_train <- y[train_inds] +mu_test <- mu_x[test_inds] +mu_train <- mu_x[train_inds] +tau_test <- tau_x[test_inds] +tau_train <- tau_x[train_inds] +rfx_group_ids_test <- rfx_group_ids[test_inds] +rfx_group_ids_train <- rfx_group_ids[train_inds] +rfx_basis_test <- rfx_basis[test_inds,] +rfx_basis_train <- rfx_basis[train_inds,] +rfx_term_test <- rfx_term[test_inds] +rfx_term_train <- rfx_term[train_inds] +bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, + propensity_train = pi_train, rfx_group_ids_train = rfx_group_ids_train, + rfx_basis_train = rfx_basis_train, X_test = X_test, + Z_test = Z_test, propensity_test = pi_test, rfx_group_ids_test = rfx_group_ids_test, + rfx_basis_test = rfx_basis_test, + num_gfr = 100, num_burnin = 0, num_mcmc = 100) +# bcf_json_list <- list(saveBCFModelToJson(bcf_model)) +# bcf_model_roundtrip <- createBCFModelFromCombinedJson(bcf_json_list) +} diff --git a/man/createBCFModelFromCombinedJsonString.Rd b/man/createBCFModelFromCombinedJsonString.Rd index 05997993..6b4c3091 100644 --- a/man/createBCFModelFromCombinedJsonString.Rd +++ b/man/createBCFModelFromCombinedJsonString.Rd @@ -2,19 +2,19 @@ % Please edit documentation in R/bcf.R \name{createBCFModelFromCombinedJsonString} \alias{createBCFModelFromCombinedJsonString} -\title{Convert a list of (in-memory) JSON strings that represent BART models to a single combined BART model object +\title{Convert a list of (in-memory) JSON strings that represent BCF models to a single combined BCF model object which can be used for prediction, etc...} \usage{ createBCFModelFromCombinedJsonString(json_string_list) } \arguments{ -\item{json_string_list}{List of JSON strings which can be parsed to objects of type \code{CppJson} containing Json representation of a BART model} +\item{json_string_list}{List of JSON strings which can be parsed to objects of type \code{CppJson} containing Json representation of a BCF model} } \value{ -Object of type \code{bartmodel} +Object of type \code{bcf} } \description{ -Convert a list of (in-memory) JSON strings that represent BART models to a single combined BART model object +Convert a list of (in-memory) JSON strings that represent BCF models to a single combined BCF model object which can be used for prediction, etc... } \examples{ From 5167b646461c2226742ec8569abbe8a57bb53e97 Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Wed, 29 Jan 2025 19:04:19 -0600 Subject: [PATCH 16/24] Updated R interface --- NAMESPACE | 4 +- R/bart.R | 6 +-- R/bcf.R | 38 ++++++++++--------- R/kernel.R | 12 +++--- _pkgdown.yml | 4 +- man/computeForestLeafIndices.Rd | 2 +- man/computeForestLeafVariances.Rd | 2 +- man/computeMaxLeafIndex.Rd | 2 +- man/createBCFModelFromCombinedJson.Rd | 4 +- man/createBCFModelFromCombinedJsonString.Rd | 2 +- man/createBCFModelFromJson.Rd | 2 +- man/createBCFModelFromJsonFile.Rd | 2 +- man/createBCFModelFromJsonString.Rd | 2 +- man/getRandomEffectSamples.bartmodel.Rd | 2 +- ....Rd => getRandomEffectSamples.bcfmodel.Rd} | 8 ++-- man/{predict.bcf.Rd => predict.bcfmodel.Rd} | 8 ++-- man/saveBCFModelToJson.Rd | 2 +- man/saveBCFModelToJsonFile.Rd | 2 +- man/saveBCFModelToJsonString.Rd | 2 +- 19 files changed, 55 insertions(+), 51 deletions(-) rename man/{getRandomEffectSamples.bcf.Rd => getRandomEffectSamples.bcfmodel.Rd} (92%) rename man/{predict.bcf.Rd => predict.bcfmodel.Rd} (93%) diff --git a/NAMESPACE b/NAMESPACE index 4a0ffaff..2e65b9c6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,9 @@ # Generated by roxygen2: do not edit by hand S3method(getRandomEffectSamples,bartmodel) -S3method(getRandomEffectSamples,bcf) +S3method(getRandomEffectSamples,bcfmodel) S3method(predict,bartmodel) -S3method(predict,bcf) +S3method(predict,bcfmodel) export(bart) export(bcf) export(calibrateInverseGammaErrorVariance) diff --git a/R/bart.R b/R/bart.R index fb30cb17..a2dc1b61 100644 --- a/R/bart.R +++ b/R/bart.R @@ -1057,7 +1057,7 @@ predict.bartmodel <- function(object, X, leaf_basis = NULL, rfx_group_ids = NULL #' Extract raw sample values for each of the random effect parameter terms. #' -#' @param object Object of type `bcf` containing draws of a Bayesian causal forest model and associated sampling outputs. +#' @param object Object of type `bartmodel` containing draws of a BART model and associated sampling outputs. #' @param ... Other parameters to be used in random effects extraction #' @return List of arrays. The alpha array has dimension (`num_components`, `num_samples`) and is simply a vector if `num_components = 1`. #' The xi and beta arrays have dimension (`num_components`, `num_groups`, `num_samples`) and is simply a matrix if `num_components = 1`. @@ -1582,8 +1582,8 @@ createBARTModelFromCombinedJson <- function(json_object_list){ model_params[["outcome_mean"]] <- json_object_default$get_scalar("outcome_mean") model_params[["standardize"]] <- json_object_default$get_boolean("standardize") model_params[["sigma2_init"]] <- json_object_default$get_scalar("sigma2_init") - model_params[["sample_sigma_global"]] <- json_object$get_boolean("sample_sigma_global") - model_params[["sample_sigma_leaf"]] <- json_object$get_boolean("sample_sigma_leaf") + model_params[["sample_sigma_global"]] <- json_object_default$get_boolean("sample_sigma_global") + model_params[["sample_sigma_leaf"]] <- json_object_default$get_boolean("sample_sigma_leaf") model_params[["include_mean_forest"]] <- include_mean_forest model_params[["include_variance_forest"]] <- include_variance_forest model_params[["has_rfx"]] <- json_object_default$get_boolean("has_rfx") diff --git a/R/bcf.R b/R/bcf.R index 9de48daf..ee5e6a7d 100644 --- a/R/bcf.R +++ b/R/bcf.R @@ -1284,14 +1284,14 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id if (internal_propensity_model) { result[["bart_propensity_model"]] = bart_model_propensity } - class(result) <- "bcf" + class(result) <- "bcfmodel" return(result) } #' Predict from a sampled BCF model on new data #' -#' @param object Object of type `bcf` containing draws of a Bayesian causal forest model and associated sampling outputs. +#' @param object Object of type `bcfmodel` containing draws of a Bayesian causal forest model and associated sampling outputs. #' @param X Covariates used to determine tree leaf predictions for each observation. Must be passed as a matrix or dataframe. #' @param Z Treatments used for prediction. #' @param propensity (Optional) Propensities used for prediction. @@ -1353,7 +1353,7 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id #' plot(rowMeans(preds$tau_hat), tau_test, xlab = "predicted", #' ylab = "actual", main = "Treatment effect") #' abline(0,1,col="red",lty=3,lwd=3) -predict.bcf <- function(object, X, Z, propensity = NULL, rfx_group_ids = NULL, rfx_basis = NULL, ...){ +predict.bcfmodel <- function(object, X, Z, propensity = NULL, rfx_group_ids = NULL, rfx_basis = NULL, ...){ # Preprocess covariates if ((!is.data.frame(X)) && (!is.matrix(X))) { stop("X must be a matrix or dataframe") @@ -1472,7 +1472,7 @@ predict.bcf <- function(object, X, Z, propensity = NULL, rfx_group_ids = NULL, r #' Extract raw sample values for each of the random effect parameter terms. #' -#' @param object Object of type `bcf` containing draws of a Bayesian causal forest model and associated sampling outputs. +#' @param object Object of type `bcfmodel` containing draws of a Bayesian causal forest model and associated sampling outputs. #' @param ... Other parameters to be used in random effects extraction #' @return List of arrays. The alpha array has dimension (`num_components`, `num_samples`) and is simply a vector if `num_components = 1`. #' The xi and beta arrays have dimension (`num_components`, `num_groups`, `num_samples`) and is simply a matrix if `num_components = 1`. @@ -1541,7 +1541,7 @@ predict.bcf <- function(object, X, Z, propensity = NULL, rfx_group_ids = NULL, r #' mu_forest_params = mu_params, #' tau_forest_params = tau_params) #' rfx_samples <- getRandomEffectSamples(bcf_model) -getRandomEffectSamples.bcf <- function(object, ...){ +getRandomEffectSamples.bcfmodel <- function(object, ...){ result = list() if (!object$model_params$has_rfx) { @@ -1563,7 +1563,7 @@ getRandomEffectSamples.bcf <- function(object, ...){ #' Convert the persistent aspects of a BCF model to (in-memory) JSON #' -#' @param object Object of type `bcf` containing draws of a Bayesian causal forest model and associated sampling outputs. +#' @param object Object of type `bcfmodel` containing draws of a Bayesian causal forest model and associated sampling outputs. #' #' @return Object of type `CppJson` #' @export @@ -1633,6 +1633,10 @@ getRandomEffectSamples.bcf <- function(object, ...){ saveBCFModelToJson <- function(object){ jsonobj <- createCppJson() + if (class(object) != "bcfmodel") { + stop("`object` must be a BCF model") + } + if (is.null(object$model_params)) { stop("This BCF model has not yet been sampled") } @@ -1721,7 +1725,7 @@ saveBCFModelToJson <- function(object){ #' Convert the persistent aspects of a BCF model to (in-memory) JSON and save to a file #' -#' @param object Object of type `bcf` containing draws of a Bayesian causal forest model and associated sampling outputs. +#' @param object Object of type `bcfmodel` containing draws of a Bayesian causal forest model and associated sampling outputs. #' @param filename String of filepath, must end in ".json" #' #' @return in-memory JSON string @@ -1799,7 +1803,7 @@ saveBCFModelToJsonFile <- function(object, filename){ #' Convert the persistent aspects of a BCF model to (in-memory) JSON string #' -#' @param object Object of type `bcf` containing draws of a Bayesian causal forest model and associated sampling outputs. +#' @param object Object of type `bcfmodel` containing draws of a Bayesian causal forest model and associated sampling outputs. #' @return JSON string #' @export #' @@ -1878,7 +1882,7 @@ saveBCFModelToJsonString <- function(object){ #' #' @param json_object Object of type `CppJson` containing Json representation of a BCF model #' -#' @return Object of type `bcf` +#' @return Object of type `bcfmodel` #' @export #' #' @examples @@ -2032,7 +2036,7 @@ createBCFModelFromJson <- function(json_object){ preprocessor_metadata_string ) - class(output) <- "bcf" + class(output) <- "bcfmodel" return(output) } @@ -2041,7 +2045,7 @@ createBCFModelFromJson <- function(json_object){ #' #' @param json_filename String of filepath, must end in ".json" #' -#' @return Object of type `bcf` +#' @return Object of type `bcfmodel` #' @export #' #' @examples @@ -2122,7 +2126,7 @@ createBCFModelFromJsonFile <- function(json_filename){ #' #' @param json_string JSON string dump #' -#' @return Object of type `bcf` +#' @return Object of type `bcfmodel` #' @export #' #' @examples @@ -2199,7 +2203,7 @@ createBCFModelFromJsonString <- function(json_string){ #' #' @param json_object_list List of objects of type `CppJson` containing Json representation of a BCF model #' -#' @return Object of type `bcf` +#' @return Object of type `bcfmodel` #' @export #' #' @examples @@ -2262,7 +2266,7 @@ createBCFModelFromJsonString <- function(json_string){ #' num_gfr = 100, num_burnin = 0, num_mcmc = 100) #' # bcf_json_list <- list(saveBCFModelToJson(bcf_model)) #' # bcf_model_roundtrip <- createBCFModelFromCombinedJson(bcf_json_list) -createBCFModelFromCombinedJson <- function(json_string_list){ +createBCFModelFromCombinedJson <- function(json_object_list){ # Initialize the BCF model output <- list() @@ -2400,7 +2404,7 @@ createBCFModelFromCombinedJson <- function(json_string_list){ preprocessor_metadata_string ) - class(output) <- "bcf" + class(output) <- "bcfmodel" return(output) } @@ -2409,7 +2413,7 @@ createBCFModelFromCombinedJson <- function(json_string_list){ #' #' @param json_string_list List of JSON strings which can be parsed to objects of type `CppJson` containing Json representation of a BCF model #' -#' @return Object of type `bcf` +#' @return Object of type `bcfmodel` #' @export #' #' @examples @@ -2623,6 +2627,6 @@ createBCFModelFromCombinedJsonString <- function(json_string_list){ preprocessor_metadata_string ) - class(output) <- "bcf" + class(output) <- "bcfmodel" return(output) } diff --git a/R/kernel.R b/R/kernel.R index fe21f8f1..0850d6ba 100644 --- a/R/kernel.R +++ b/R/kernel.R @@ -12,7 +12,7 @@ #' if tree 1 has 3 leaves, its column indices range from 0 to 2, and then tree 2's #' leaf indices begin at 3, etc...). #' -#' @param model_object Object of type `bartmodel` or `bcf` corresponding to a BART / BCF model with at least one forest sample +#' @param model_object Object of type `bartmodel` or `bcfmodel` corresponding to a BART / BCF model with at least one forest sample #' @param covariates Covariates to use for prediction. Must have the same dimensions / column types as the data used to train a forest. #' @param forest_type Which forest to use from `model_object`. #' Valid inputs depend on the model type, and whether or not a given forest was sampled in that model. @@ -36,7 +36,7 @@ #' @export computeForestLeafIndices <- function(model_object, covariates, forest_type, forest_inds=NULL) { # Extract relevant forest container - stopifnot(class(model_object) %in% c("bartmodel", "bcf")) + stopifnot(class(model_object) %in% c("bartmodel", "bcfmodel")) model_type <- ifelse(class(model_object)=="bartmodel", "bart", "bcf") if (model_type == "bart") { stopifnot(forest_type %in% c("mean", "variance")) @@ -98,7 +98,7 @@ computeForestLeafIndices <- function(model_object, covariates, forest_type, fore #' If leaf scale is not sampled for the forest in question, throws an error that the #' leaf model does not have a stochastic scale parameter. #' -#' @param model_object Object of type `bartmodel` or `bcf` corresponding to a BART / BCF model with at least one forest sample +#' @param model_object Object of type `bartmodel` or `bcfmodel` corresponding to a BART / BCF model with at least one forest sample #' @param forest_type Which forest to use from `model_object`. #' Valid inputs depend on the model type, and whether or not a given forest was sampled in that model. #' @@ -120,7 +120,7 @@ computeForestLeafIndices <- function(model_object, covariates, forest_type, fore #' @export computeForestLeafVariances <- function(model_object, forest_type, forest_inds=NULL) { # Extract relevant forest container - stopifnot(class(model_object) %in% c("bartmodel", "bcf")) + stopifnot(class(model_object) %in% c("bartmodel", "bcfmodel")) model_type <- ifelse(class(model_object)=="bartmodel", "bart", "bcf") if (model_type == "bart") { stopifnot(forest_type %in% c("mean", "variance")) @@ -176,7 +176,7 @@ computeForestLeafVariances <- function(model_object, forest_type, forest_inds=NU #' Compute and return the largest possible leaf index computable by `computeForestLeafIndices` for the forests in a designated forest sample container. #' -#' @param model_object Object of type `bartmodel` or `bcf` corresponding to a BART / BCF model with at least one forest sample +#' @param model_object Object of type `bartmodel` or `bcfmodel` corresponding to a BART / BCF model with at least one forest sample #' @param covariates Covariates to use for prediction. Must have the same dimensions / column types as the data used to train a forest. #' @param forest_type Which forest to use from `model_object`. #' Valid inputs depend on the model type, and whether or not a @@ -199,7 +199,7 @@ computeForestLeafVariances <- function(model_object, forest_type, forest_inds=NU #' @export computeMaxLeafIndex <- function(model_object, covariates, forest_type, forest_inds=NULL) { # Extract relevant forest container - stopifnot(class(model_object) %in% c("bartmodel", "bcf")) + stopifnot(class(model_object) %in% c("bartmodel", "bcfmodel")) model_type <- ifelse(class(model_object)=="bartmodel", "bart", "bcf") if (model_type == "bart") { stopifnot(forest_type %in% c("mean", "variance")) diff --git a/_pkgdown.yml b/_pkgdown.yml index 65116269..e82043e9 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -15,7 +15,7 @@ reference: High-level functionality for estimating causal effects using Bayesian tree ensembles (BCF, XBCF) contents: - bcf - - predict.bcf + - predict.bcfmodel - title: Low-level functionality @@ -101,7 +101,7 @@ reference: - createRandomEffectsTracker - getRandomEffectSamples - getRandomEffectSamples.bartmodel - - getRandomEffectSamples.bcf + - getRandomEffectSamples.bcfmodel - sample_sigma2_one_iteration - sample_tau_one_iteration - resetRandomEffectsModel diff --git a/man/computeForestLeafIndices.Rd b/man/computeForestLeafIndices.Rd index c5e5bfc0..c7b2147a 100644 --- a/man/computeForestLeafIndices.Rd +++ b/man/computeForestLeafIndices.Rd @@ -12,7 +12,7 @@ computeForestLeafIndices( ) } \arguments{ -\item{model_object}{Object of type \code{bartmodel} or \code{bcf} corresponding to a BART / BCF model with at least one forest sample} +\item{model_object}{Object of type \code{bartmodel} or \code{bcfmodel} corresponding to a BART / BCF model with at least one forest sample} \item{covariates}{Covariates to use for prediction. Must have the same dimensions / column types as the data used to train a forest.} diff --git a/man/computeForestLeafVariances.Rd b/man/computeForestLeafVariances.Rd index 06335cf4..15e2dc5f 100644 --- a/man/computeForestLeafVariances.Rd +++ b/man/computeForestLeafVariances.Rd @@ -7,7 +7,7 @@ computeForestLeafVariances(model_object, forest_type, forest_inds = NULL) } \arguments{ -\item{model_object}{Object of type \code{bartmodel} or \code{bcf} corresponding to a BART / BCF model with at least one forest sample} +\item{model_object}{Object of type \code{bartmodel} or \code{bcfmodel} corresponding to a BART / BCF model with at least one forest sample} \item{forest_type}{Which forest to use from \code{model_object}. Valid inputs depend on the model type, and whether or not a given forest was sampled in that model. diff --git a/man/computeMaxLeafIndex.Rd b/man/computeMaxLeafIndex.Rd index c941acb4..2dde866b 100644 --- a/man/computeMaxLeafIndex.Rd +++ b/man/computeMaxLeafIndex.Rd @@ -7,7 +7,7 @@ computeMaxLeafIndex(model_object, covariates, forest_type, forest_inds = NULL) } \arguments{ -\item{model_object}{Object of type \code{bartmodel} or \code{bcf} corresponding to a BART / BCF model with at least one forest sample} +\item{model_object}{Object of type \code{bartmodel} or \code{bcfmodel} corresponding to a BART / BCF model with at least one forest sample} \item{covariates}{Covariates to use for prediction. Must have the same dimensions / column types as the data used to train a forest.} diff --git a/man/createBCFModelFromCombinedJson.Rd b/man/createBCFModelFromCombinedJson.Rd index 378dfaef..e374c311 100644 --- a/man/createBCFModelFromCombinedJson.Rd +++ b/man/createBCFModelFromCombinedJson.Rd @@ -5,13 +5,13 @@ \title{Convert a list of (in-memory) JSON strings that represent BCF models to a single combined BCF model object which can be used for prediction, etc...} \usage{ -createBCFModelFromCombinedJson(json_string_list) +createBCFModelFromCombinedJson(json_object_list) } \arguments{ \item{json_object_list}{List of objects of type \code{CppJson} containing Json representation of a BCF model} } \value{ -Object of type \code{bcf} +Object of type \code{bcfmodel} } \description{ Convert a list of (in-memory) JSON strings that represent BCF models to a single combined BCF model object diff --git a/man/createBCFModelFromCombinedJsonString.Rd b/man/createBCFModelFromCombinedJsonString.Rd index 6b4c3091..f1853d7f 100644 --- a/man/createBCFModelFromCombinedJsonString.Rd +++ b/man/createBCFModelFromCombinedJsonString.Rd @@ -11,7 +11,7 @@ createBCFModelFromCombinedJsonString(json_string_list) \item{json_string_list}{List of JSON strings which can be parsed to objects of type \code{CppJson} containing Json representation of a BCF model} } \value{ -Object of type \code{bcf} +Object of type \code{bcfmodel} } \description{ Convert a list of (in-memory) JSON strings that represent BCF models to a single combined BCF model object diff --git a/man/createBCFModelFromJson.Rd b/man/createBCFModelFromJson.Rd index 97141d6f..602db813 100644 --- a/man/createBCFModelFromJson.Rd +++ b/man/createBCFModelFromJson.Rd @@ -11,7 +11,7 @@ createBCFModelFromJson(json_object) \item{json_object}{Object of type \code{CppJson} containing Json representation of a BCF model} } \value{ -Object of type \code{bcf} +Object of type \code{bcfmodel} } \description{ Convert an (in-memory) JSON representation of a BCF model to a BCF model object diff --git a/man/createBCFModelFromJsonFile.Rd b/man/createBCFModelFromJsonFile.Rd index 2ea9fb8b..2f9be821 100644 --- a/man/createBCFModelFromJsonFile.Rd +++ b/man/createBCFModelFromJsonFile.Rd @@ -11,7 +11,7 @@ createBCFModelFromJsonFile(json_filename) \item{json_filename}{String of filepath, must end in ".json"} } \value{ -Object of type \code{bcf} +Object of type \code{bcfmodel} } \description{ Convert a JSON file containing sample information on a trained BCF model diff --git a/man/createBCFModelFromJsonString.Rd b/man/createBCFModelFromJsonString.Rd index 8c5b10fc..7e27f9bb 100644 --- a/man/createBCFModelFromJsonString.Rd +++ b/man/createBCFModelFromJsonString.Rd @@ -11,7 +11,7 @@ createBCFModelFromJsonString(json_string) \item{json_string}{JSON string dump} } \value{ -Object of type \code{bcf} +Object of type \code{bcfmodel} } \description{ Convert a JSON string containing sample information on a trained BCF model diff --git a/man/getRandomEffectSamples.bartmodel.Rd b/man/getRandomEffectSamples.bartmodel.Rd index 2c03e1ba..2ff00687 100644 --- a/man/getRandomEffectSamples.bartmodel.Rd +++ b/man/getRandomEffectSamples.bartmodel.Rd @@ -7,7 +7,7 @@ \method{getRandomEffectSamples}{bartmodel}(object, ...) } \arguments{ -\item{object}{Object of type \code{bcf} containing draws of a Bayesian causal forest model and associated sampling outputs.} +\item{object}{Object of type \code{bartmodel} containing draws of a BART model and associated sampling outputs.} \item{...}{Other parameters to be used in random effects extraction} } diff --git a/man/getRandomEffectSamples.bcf.Rd b/man/getRandomEffectSamples.bcfmodel.Rd similarity index 92% rename from man/getRandomEffectSamples.bcf.Rd rename to man/getRandomEffectSamples.bcfmodel.Rd index 9ab74b08..ca03ffe4 100644 --- a/man/getRandomEffectSamples.bcf.Rd +++ b/man/getRandomEffectSamples.bcfmodel.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/bcf.R -\name{getRandomEffectSamples.bcf} -\alias{getRandomEffectSamples.bcf} +\name{getRandomEffectSamples.bcfmodel} +\alias{getRandomEffectSamples.bcfmodel} \title{Extract raw sample values for each of the random effect parameter terms.} \usage{ -\method{getRandomEffectSamples}{bcf}(object, ...) +\method{getRandomEffectSamples}{bcfmodel}(object, ...) } \arguments{ -\item{object}{Object of type \code{bcf} containing draws of a Bayesian causal forest model and associated sampling outputs.} +\item{object}{Object of type \code{bcfmodel} containing draws of a Bayesian causal forest model and associated sampling outputs.} \item{...}{Other parameters to be used in random effects extraction} } diff --git a/man/predict.bcf.Rd b/man/predict.bcfmodel.Rd similarity index 93% rename from man/predict.bcf.Rd rename to man/predict.bcfmodel.Rd index b2651af7..3fd2f1a4 100644 --- a/man/predict.bcf.Rd +++ b/man/predict.bcfmodel.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/bcf.R -\name{predict.bcf} -\alias{predict.bcf} +\name{predict.bcfmodel} +\alias{predict.bcfmodel} \title{Predict from a sampled BCF model on new data} \usage{ -\method{predict}{bcf}( +\method{predict}{bcfmodel}( object, X, Z, @@ -15,7 +15,7 @@ ) } \arguments{ -\item{object}{Object of type \code{bcf} containing draws of a Bayesian causal forest model and associated sampling outputs.} +\item{object}{Object of type \code{bcfmodel} containing draws of a Bayesian causal forest model and associated sampling outputs.} \item{X}{Covariates used to determine tree leaf predictions for each observation. Must be passed as a matrix or dataframe.} diff --git a/man/saveBCFModelToJson.Rd b/man/saveBCFModelToJson.Rd index c7383044..89598334 100644 --- a/man/saveBCFModelToJson.Rd +++ b/man/saveBCFModelToJson.Rd @@ -7,7 +7,7 @@ saveBCFModelToJson(object) } \arguments{ -\item{object}{Object of type \code{bcf} containing draws of a Bayesian causal forest model and associated sampling outputs.} +\item{object}{Object of type \code{bcfmodel} containing draws of a Bayesian causal forest model and associated sampling outputs.} } \value{ Object of type \code{CppJson} diff --git a/man/saveBCFModelToJsonFile.Rd b/man/saveBCFModelToJsonFile.Rd index 321470a5..14417564 100644 --- a/man/saveBCFModelToJsonFile.Rd +++ b/man/saveBCFModelToJsonFile.Rd @@ -7,7 +7,7 @@ saveBCFModelToJsonFile(object, filename) } \arguments{ -\item{object}{Object of type \code{bcf} containing draws of a Bayesian causal forest model and associated sampling outputs.} +\item{object}{Object of type \code{bcfmodel} containing draws of a Bayesian causal forest model and associated sampling outputs.} \item{filename}{String of filepath, must end in ".json"} } diff --git a/man/saveBCFModelToJsonString.Rd b/man/saveBCFModelToJsonString.Rd index 85e69b78..e1d6769c 100644 --- a/man/saveBCFModelToJsonString.Rd +++ b/man/saveBCFModelToJsonString.Rd @@ -7,7 +7,7 @@ saveBCFModelToJsonString(object) } \arguments{ -\item{object}{Object of type \code{bcf} containing draws of a Bayesian causal forest model and associated sampling outputs.} +\item{object}{Object of type \code{bcfmodel} containing draws of a Bayesian causal forest model and associated sampling outputs.} } \value{ JSON string From 3c9721c3737fcede5727be82d20265cee98d3b8e Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Wed, 29 Jan 2025 19:25:26 -0600 Subject: [PATCH 17/24] Updated R interface --- NAMESPACE | 2 +- R/kernel.R | 38 +++++++++++++------ _pkgdown.yml | 2 +- man/computeForestLeafIndices.Rd | 9 ++++- ...fIndex.Rd => computeForestMaxLeafIndex.Rd} | 22 ++++++++--- 5 files changed, 51 insertions(+), 22 deletions(-) rename man/{computeMaxLeafIndex.Rd => computeForestMaxLeafIndex.Rd} (67%) diff --git a/NAMESPACE b/NAMESPACE index 2e65b9c6..c722507b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,7 +9,7 @@ export(bcf) export(calibrateInverseGammaErrorVariance) export(computeForestLeafIndices) export(computeForestLeafVariances) -export(computeMaxLeafIndex) +export(computeForestMaxLeafIndex) export(convertPreprocessorToJson) export(createBARTModelFromCombinedJson) export(createBARTModelFromCombinedJsonString) diff --git a/R/kernel.R b/R/kernel.R index 0850d6ba..be2277a1 100644 --- a/R/kernel.R +++ b/R/kernel.R @@ -12,7 +12,7 @@ #' if tree 1 has 3 leaves, its column indices range from 0 to 2, and then tree 2's #' leaf indices begin at 3, etc...). #' -#' @param model_object Object of type `bartmodel` or `bcfmodel` corresponding to a BART / BCF model with at least one forest sample +#' @param model_object Object of type `bartmodel`, `bcfmodel`, or `ForestSamples` corresponding to a BART / BCF model with at least one forest sample, or a low-level `ForestSamples` object. #' @param covariates Covariates to use for prediction. Must have the same dimensions / column types as the data used to train a forest. #' @param forest_type Which forest to use from `model_object`. #' Valid inputs depend on the model type, and whether or not a given forest was sampled in that model. @@ -28,16 +28,21 @@ #' - `'treatment'`: Extracts leaf indices for the treatment effect forest #' - `'variance'`: Extracts leaf indices for the variance forest #' +#' **3. ForestSamples** +#' +#' - `NULL`: It is not necessary to disambiguate when this function is called directly on a `ForestSamples` object. This is the default value of this +#' #' @param forest_inds (Optional) Indices of the forest sample(s) for which to compute leaf indices. If not provided, #' this function will return leaf indices for every sample of a forest. #' This function uses 1-indexing, so the first forest sample corresponds to `forest_num = 1`, and so on. #' @return List of vectors. Each vector is of size `num_obs * num_trees`, where `num_obs = nrow(covariates)` #' and `num_trees` is the number of trees in the relevant forest of `model_object`. #' @export -computeForestLeafIndices <- function(model_object, covariates, forest_type, forest_inds=NULL) { +computeForestLeafIndices <- function(model_object, covariates, forest_type=NULL, forest_inds=NULL) { # Extract relevant forest container - stopifnot(class(model_object) %in% c("bartmodel", "bcfmodel")) - model_type <- ifelse(class(model_object)=="bartmodel", "bart", "bcf") + object_name <- class(model_object)[1] + stopifnot(object_name %in% c("bartmodel", "bcfmodel", "ForestSamples")) + model_type <- ifelse(object_name=="bartmodel", "bart", ifelse(object_name=="bcfmodel", "bcf", "forest_samples")) if (model_type == "bart") { stopifnot(forest_type %in% c("mean", "variance")) if (forest_type=="mean") { @@ -51,7 +56,7 @@ computeForestLeafIndices <- function(model_object, covariates, forest_type, fore } forest_container <- model_object$variance_forests } - } else { + } else if (model_type == "bcf") { stopifnot(forest_type %in% c("prognostic", "treatment", "variance")) if (forest_type=="prognostic") { forest_container <- model_object$forests_mu @@ -63,6 +68,8 @@ computeForestLeafIndices <- function(model_object, covariates, forest_type, fore } forest_container <- model_object$variance_forests } + } else { + forest_container <- model_object } # Preprocess covariates @@ -176,7 +183,7 @@ computeForestLeafVariances <- function(model_object, forest_type, forest_inds=NU #' Compute and return the largest possible leaf index computable by `computeForestLeafIndices` for the forests in a designated forest sample container. #' -#' @param model_object Object of type `bartmodel` or `bcfmodel` corresponding to a BART / BCF model with at least one forest sample +#' @param model_object Object of type `bartmodel`, `bcfmodel`, or `ForestSamples` corresponding to a BART / BCF model with at least one forest sample, or a low-level `ForestSamples` object. #' @param covariates Covariates to use for prediction. Must have the same dimensions / column types as the data used to train a forest. #' @param forest_type Which forest to use from `model_object`. #' Valid inputs depend on the model type, and whether or not a @@ -192,15 +199,20 @@ computeForestLeafVariances <- function(model_object, forest_type, forest_inds=NU #' - `'treatment'`: Extracts leaf indices for the treatment effect forest #' - `'variance'`: Extracts leaf indices for the variance forest #' -#' @param forest_inds (Optional) Indices of the forest sample(s) for which to compute leaf indices. If not provided, -#' this function will return leaf indices for every sample of a forest. +#' **3. ForestSamples** +#' +#' - `NULL`: It is not necessary to disambiguate when this function is called directly on a `ForestSamples` object. This is the default value of this +#' +#' @param forest_inds (Optional) Indices of the forest sample(s) for which to compute max leaf indices. If not provided, +#' this function will return max leaf indices for every sample of a forest. #' This function uses 1-indexing, so the first forest sample corresponds to `forest_num = 1`, and so on. #' @return Vector containing the largest possible leaf index computable by `computeForestLeafIndices` for the forests in a designated forest sample container. #' @export -computeMaxLeafIndex <- function(model_object, covariates, forest_type, forest_inds=NULL) { +computeForestMaxLeafIndex <- function(model_object, covariates, forest_type=NULL, forest_inds=NULL) { # Extract relevant forest container - stopifnot(class(model_object) %in% c("bartmodel", "bcfmodel")) - model_type <- ifelse(class(model_object)=="bartmodel", "bart", "bcf") + object_name <- class(model_object)[1] + stopifnot(object_name %in% c("bartmodel", "bcfmodel", "ForestSamples")) + model_type <- ifelse(object_name=="bartmodel", "bart", ifelse(object_name=="bcfmodel", "bcf", "forest_samples")) if (model_type == "bart") { stopifnot(forest_type %in% c("mean", "variance")) if (forest_type=="mean") { @@ -214,7 +226,7 @@ computeMaxLeafIndex <- function(model_object, covariates, forest_type, forest_in } forest_container <- model_object$variance_forests } - } else { + } else if (model_type == "bcf") { stopifnot(forest_type %in% c("prognostic", "treatment", "variance")) if (forest_type=="prognostic") { forest_container <- model_object$forests_mu @@ -226,6 +238,8 @@ computeMaxLeafIndex <- function(model_object, covariates, forest_type, forest_in } forest_container <- model_object$variance_forests } + } else { + forest_container <- model_object } # Preprocess forest indices diff --git a/_pkgdown.yml b/_pkgdown.yml index e82043e9..187efcf7 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -82,7 +82,7 @@ reference: - CppRNG - createCppRNG - calibrateInverseGammaErrorVariance - - computeMaxLeafIndex + - computeForestMaxLeafIndex - computeForestLeafIndices - computeForestLeafVariances - resetActiveForest diff --git a/man/computeForestLeafIndices.Rd b/man/computeForestLeafIndices.Rd index c7b2147a..ae9c0100 100644 --- a/man/computeForestLeafIndices.Rd +++ b/man/computeForestLeafIndices.Rd @@ -7,12 +7,12 @@ computeForestLeafIndices( model_object, covariates, - forest_type, + forest_type = NULL, forest_inds = NULL ) } \arguments{ -\item{model_object}{Object of type \code{bartmodel} or \code{bcfmodel} corresponding to a BART / BCF model with at least one forest sample} +\item{model_object}{Object of type \code{bartmodel}, \code{bcfmodel}, or \code{ForestSamples} corresponding to a BART / BCF model with at least one forest sample, or a low-level \code{ForestSamples} object.} \item{covariates}{Covariates to use for prediction. Must have the same dimensions / column types as the data used to train a forest.} @@ -30,6 +30,11 @@ Valid inputs depend on the model type, and whether or not a given forest was sam \item \code{'prognostic'}: Extracts leaf indices for the prognostic forest \item \code{'treatment'}: Extracts leaf indices for the treatment effect forest \item \code{'variance'}: Extracts leaf indices for the variance forest +} + +\strong{3. ForestSamples} +\itemize{ +\item \code{NULL}: It is not necessary to disambiguate when this function is called directly on a \code{ForestSamples} object. This is the default value of this }} \item{forest_inds}{(Optional) Indices of the forest sample(s) for which to compute leaf indices. If not provided, diff --git a/man/computeMaxLeafIndex.Rd b/man/computeForestMaxLeafIndex.Rd similarity index 67% rename from man/computeMaxLeafIndex.Rd rename to man/computeForestMaxLeafIndex.Rd index 2dde866b..79f00c03 100644 --- a/man/computeMaxLeafIndex.Rd +++ b/man/computeForestMaxLeafIndex.Rd @@ -1,13 +1,18 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/kernel.R -\name{computeMaxLeafIndex} -\alias{computeMaxLeafIndex} +\name{computeForestMaxLeafIndex} +\alias{computeForestMaxLeafIndex} \title{Compute and return the largest possible leaf index computable by \code{computeForestLeafIndices} for the forests in a designated forest sample container.} \usage{ -computeMaxLeafIndex(model_object, covariates, forest_type, forest_inds = NULL) +computeForestMaxLeafIndex( + model_object, + covariates, + forest_type = NULL, + forest_inds = NULL +) } \arguments{ -\item{model_object}{Object of type \code{bartmodel} or \code{bcfmodel} corresponding to a BART / BCF model with at least one forest sample} +\item{model_object}{Object of type \code{bartmodel}, \code{bcfmodel}, or \code{ForestSamples} corresponding to a BART / BCF model with at least one forest sample, or a low-level \code{ForestSamples} object.} \item{covariates}{Covariates to use for prediction. Must have the same dimensions / column types as the data used to train a forest.} @@ -25,10 +30,15 @@ Valid inputs depend on the model type, and whether or not a \item \code{'prognostic'}: Extracts leaf indices for the prognostic forest \item \code{'treatment'}: Extracts leaf indices for the treatment effect forest \item \code{'variance'}: Extracts leaf indices for the variance forest +} + +\strong{3. ForestSamples} +\itemize{ +\item \code{NULL}: It is not necessary to disambiguate when this function is called directly on a \code{ForestSamples} object. This is the default value of this }} -\item{forest_inds}{(Optional) Indices of the forest sample(s) for which to compute leaf indices. If not provided, -this function will return leaf indices for every sample of a forest. +\item{forest_inds}{(Optional) Indices of the forest sample(s) for which to compute max leaf indices. If not provided, +this function will return max leaf indices for every sample of a forest. This function uses 1-indexing, so the first forest sample corresponds to \code{forest_num = 1}, and so on.} } \value{ From 0cfe8c46c82ce5f8728a09b6f5780e2954eab6e4 Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Wed, 29 Jan 2025 19:37:24 -0600 Subject: [PATCH 18/24] Updated R interface --- NAMESPACE | 1 - R/bart.R | 4 ++-- R/bcf.R | 6 +++--- R/forest.R | 28 +++++++++++++--------------- _pkgdown.yml | 1 - man/resetActiveForest.Rd | 12 +++++++----- man/rootResetActiveForest.Rd | 17 ----------------- 7 files changed, 25 insertions(+), 44 deletions(-) delete mode 100644 man/rootResetActiveForest.Rd diff --git a/NAMESPACE b/NAMESPACE index c722507b..9767cded 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -51,7 +51,6 @@ export(resetActiveForest) export(resetForestModel) export(resetRandomEffectsModel) export(resetRandomEffectsTracker) -export(rootResetActiveForest) export(rootResetRandomEffectsModel) export(rootResetRandomEffectsTracker) export(sample_sigma2_one_iteration) diff --git a/R/bart.R b/R/bart.R index a2dc1b61..74c10200 100644 --- a/R/bart.R +++ b/R/bart.R @@ -700,7 +700,7 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train } } else { if (include_mean_forest) { - rootResetActiveForest(active_forest_mean) + resetActiveForest(active_forest_mean) active_forest_mean$set_root_leaves(init_values_mean_forest / num_trees_mean) resetForestModel(forest_model_mean, active_forest_mean, forest_dataset_train, outcome_train, TRUE) if (sample_sigma_leaf) { @@ -708,7 +708,7 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train } } if (include_variance_forest) { - rootResetActiveForest(active_forest_variance) + resetActiveForest(active_forest_variance) active_forest_variance$set_root_leaves(log(variance_forest_init) / num_trees_variance) resetForestModel(forest_model_variance, active_forest_variance, forest_dataset_train, outcome_train, FALSE) } diff --git a/R/bcf.R b/R/bcf.R index ee5e6a7d..e6bda883 100644 --- a/R/bcf.R +++ b/R/bcf.R @@ -976,10 +976,10 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id } } } else { - rootResetActiveForest(active_forest_mu) + resetActiveForest(active_forest_mu) active_forest_mu$set_root_leaves(init_mu / num_trees_mu) resetForestModel(forest_model_mu, active_forest_mu, forest_dataset_train, outcome_train, TRUE) - rootResetActiveForest(active_forest_tau) + resetActiveForest(active_forest_tau) active_forest_tau$set_root_leaves(init_tau / num_trees_tau) resetForestModel(forest_model_tau, active_forest_tau, forest_dataset_train, outcome_train, TRUE) if (sample_sigma_leaf_mu) { @@ -989,7 +989,7 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id current_leaf_scale_tau <- as.matrix(sigma_leaf_tau) } if (include_variance_forest) { - rootResetActiveForest(active_forest_variance) + resetActiveForest(active_forest_variance) active_forest_variance$set_root_leaves(log(variance_forest_init) / num_trees_variance) resetForestModel(forest_model_variance, active_forest_variance, forest_dataset_train, outcome_train, FALSE) } diff --git a/R/forest.R b/R/forest.R index 2ed73c01..dce4814e 100644 --- a/R/forest.R +++ b/R/forest.R @@ -779,14 +779,22 @@ createForest <- function(num_trees, output_dimension=1, is_leaf_constant=F, is_e ))) } -#' Re-initialize an active forest from a specific forest in a `ForestContainer` +#' Reset an active forest, either from a specific forest in a `ForestContainer` +#' or to an ensemble of single-node (i.e. root) trees #' #' @param active_forest Current active forest -#' @param forest_samples Container of forest samples from which to re-initialize active forest -#' @param forest_num Index of forest samples from which to initialize active forest +#' @param forest_samples (Optional) Container of forest samples from which to re-initialize active forest. If not provided, active forest will be reset to an ensemble of single-node (i.e. root) trees. +#' @param forest_num (Optional) Index of forest samples from which to initialize active forest. If not provided, active forest will be reset to an ensemble of single-node (i.e. root) trees. #' @export -resetActiveForest <- function(active_forest, forest_samples, forest_num) { - reset_active_forest_cpp(active_forest$forest_ptr, forest_samples$forest_container_ptr, forest_num) +resetActiveForest <- function(active_forest, forest_samples=NULL, forest_num=NULL) { + if (is.null(forest_samples)) { + root_reset_active_forest_cpp(active_forest$forest_ptr) + } else { + if (is.null(forest_num)) { + stop("`forest_num` must be specified if `forest_samples` is provided") + } + reset_active_forest_cpp(active_forest$forest_ptr, forest_samples$forest_container_ptr, forest_num) + } } #' Re-initialize a forest model (tracking data structures) from a specific forest in a `ForestContainer` @@ -800,13 +808,3 @@ resetActiveForest <- function(active_forest, forest_samples, forest_num) { resetForestModel <- function(forest_model, forest, dataset, residual, is_mean_model) { reset_forest_model_cpp(forest_model$tracker_ptr, forest$forest_ptr, dataset$data_ptr, residual$data_ptr, is_mean_model) } - -#' Reset an active forest to an ensemble of single-node (i.e. root) trees -#' -#' @param active_forest Current active forest -#' -#' @return `Forest` object -#' @export -rootResetActiveForest <- function(active_forest) { - root_reset_active_forest_cpp(active_forest$forest_ptr) -} diff --git a/_pkgdown.yml b/_pkgdown.yml index 187efcf7..ec2a6836 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -87,7 +87,6 @@ reference: - computeForestLeafVariances - resetActiveForest - resetForestModel - - rootResetActiveForest - subtitle: Random Effects desc: > diff --git a/man/resetActiveForest.Rd b/man/resetActiveForest.Rd index 58d418df..90a26b39 100644 --- a/man/resetActiveForest.Rd +++ b/man/resetActiveForest.Rd @@ -2,17 +2,19 @@ % Please edit documentation in R/forest.R \name{resetActiveForest} \alias{resetActiveForest} -\title{Re-initialize an active forest from a specific forest in a \code{ForestContainer}} +\title{Reset an active forest, either from a specific forest in a \code{ForestContainer} +or to an ensemble of single-node (i.e. root) trees} \usage{ -resetActiveForest(active_forest, forest_samples, forest_num) +resetActiveForest(active_forest, forest_samples = NULL, forest_num = NULL) } \arguments{ \item{active_forest}{Current active forest} -\item{forest_samples}{Container of forest samples from which to re-initialize active forest} +\item{forest_samples}{(Optional) Container of forest samples from which to re-initialize active forest. If not provided, active forest will be reset to an ensemble of single-node (i.e. root) trees.} -\item{forest_num}{Index of forest samples from which to initialize active forest} +\item{forest_num}{(Optional) Index of forest samples from which to initialize active forest. If not provided, active forest will be reset to an ensemble of single-node (i.e. root) trees.} } \description{ -Re-initialize an active forest from a specific forest in a \code{ForestContainer} +Reset an active forest, either from a specific forest in a \code{ForestContainer} +or to an ensemble of single-node (i.e. root) trees } diff --git a/man/rootResetActiveForest.Rd b/man/rootResetActiveForest.Rd deleted file mode 100644 index 1767c7b5..00000000 --- a/man/rootResetActiveForest.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/forest.R -\name{rootResetActiveForest} -\alias{rootResetActiveForest} -\title{Reset an active forest to an ensemble of single-node (i.e. root) trees} -\usage{ -rootResetActiveForest(active_forest) -} -\arguments{ -\item{active_forest}{Current active forest} -} -\value{ -\code{Forest} object -} -\description{ -Reset an active forest to an ensemble of single-node (i.e. root) trees -} From 78be2e5766711de5495fa623e384a8d66c89db7a Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Wed, 29 Jan 2025 19:50:26 -0600 Subject: [PATCH 19/24] Updated R interface to make variance samplers consistent with function naming convention and to rename output_dimension to leaf_dimension --- NAMESPACE | 4 +- R/bart.R | 20 ++++---- R/bcf.R | 16 +++---- R/cpp11.R | 8 ++-- R/forest.R | 46 +++++++++---------- R/variance.R | 4 +- _pkgdown.yml | 4 +- man/Forest.Rd | 14 +++--- man/ForestSamples.Rd | 14 +++--- man/createForest.Rd | 4 +- man/createForestSamples.Rd | 4 +- ... sampleGlobalErrorVarianceOneIteration.Rd} | 6 +-- ...n.Rd => sampleLeafVarianceOneIteration.Rd} | 6 +-- src/cpp11.cpp | 16 +++---- src/forest.cpp | 4 +- vignettes/CustomSamplingRoutine.Rmd | 36 +++++++-------- 16 files changed, 103 insertions(+), 103 deletions(-) rename man/{sample_sigma2_one_iteration.Rd => sampleGlobalErrorVarianceOneIteration.Rd} (74%) rename man/{sample_tau_one_iteration.Rd => sampleLeafVarianceOneIteration.Rd} (79%) diff --git a/NAMESPACE b/NAMESPACE index 9767cded..bcb23c2d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -53,8 +53,8 @@ export(resetRandomEffectsModel) export(resetRandomEffectsTracker) export(rootResetRandomEffectsModel) export(rootResetRandomEffectsTracker) -export(sample_sigma2_one_iteration) -export(sample_tau_one_iteration) +export(sampleGlobalErrorVarianceOneIteration) +export(sampleLeafVarianceOneIteration) export(saveBARTModelToJson) export(saveBARTModelToJsonFile) export(saveBARTModelToJsonString) diff --git a/R/bart.R b/R/bart.R index 74c10200..e70cbdda 100644 --- a/R/bart.R +++ b/R/bart.R @@ -502,19 +502,19 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train # Unpack model type info if (leaf_model_mean_forest == 0) { - output_dimension = 1 + leaf_dimension = 1 is_leaf_constant = T leaf_regression = F } else if (leaf_model_mean_forest == 1) { stopifnot(has_basis) stopifnot(ncol(leaf_basis_train) == 1) - output_dimension = 1 + leaf_dimension = 1 is_leaf_constant = F leaf_regression = T } else if (leaf_model_mean_forest == 2) { stopifnot(has_basis) stopifnot(ncol(leaf_basis_train) > 1) - output_dimension = ncol(leaf_basis_train) + leaf_dimension = ncol(leaf_basis_train) is_leaf_constant = F leaf_regression = T if (sample_sigma_leaf) { @@ -549,8 +549,8 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train # Container of forest samples if (include_mean_forest) { - forest_samples_mean <- createForestSamples(num_trees_mean, output_dimension, is_leaf_constant, FALSE) - active_forest_mean <- createForest(num_trees_mean, output_dimension, is_leaf_constant, FALSE) + forest_samples_mean <- createForestSamples(num_trees_mean, leaf_dimension, is_leaf_constant, FALSE) + active_forest_mean <- createForest(num_trees_mean, leaf_dimension, is_leaf_constant, FALSE) } if (include_variance_forest) { forest_samples_variance <- createForestSamples(num_trees_variance, 1, TRUE, TRUE) @@ -637,11 +637,11 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train ) } if (sample_sigma_global) { - current_sigma2 <- sample_sigma2_one_iteration(outcome_train, forest_dataset_train, rng, a_global, b_global) + current_sigma2 <- sampleGlobalErrorVarianceOneIteration(outcome_train, forest_dataset_train, rng, a_global, b_global) if (keep_sample) global_var_samples[sample_counter] <- current_sigma2 } if (sample_sigma_leaf) { - leaf_scale_double <- sample_tau_one_iteration(active_forest_mean, rng, a_leaf, b_leaf) + leaf_scale_double <- sampleLeafVarianceOneIteration(active_forest_mean, rng, a_leaf, b_leaf) current_leaf_scale <- as.matrix(leaf_scale_double) if (keep_sample) leaf_scale_samples[sample_counter] <- leaf_scale_double } @@ -759,11 +759,11 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train ) } if (sample_sigma_global) { - current_sigma2 <- sample_sigma2_one_iteration(outcome_train, forest_dataset_train, rng, a_global, b_global) + current_sigma2 <- sampleGlobalErrorVarianceOneIteration(outcome_train, forest_dataset_train, rng, a_global, b_global) if (keep_sample) global_var_samples[sample_counter] <- current_sigma2 } if (sample_sigma_leaf) { - leaf_scale_double <- sample_tau_one_iteration(active_forest_mean, rng, a_leaf, b_leaf) + leaf_scale_double <- sampleLeafVarianceOneIteration(active_forest_mean, rng, a_leaf, b_leaf) current_leaf_scale <- as.matrix(leaf_scale_double) if (keep_sample) leaf_scale_samples[sample_counter] <- leaf_scale_double } @@ -848,7 +848,7 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train "outcome_mean" = y_bar_train, "outcome_scale" = y_std_train, "standardize" = standardize, - "output_dimension" = output_dimension, + "leaf_dimension" = leaf_dimension, "is_leaf_constant" = is_leaf_constant, "leaf_regression" = leaf_regression, "requires_basis" = requires_basis, diff --git a/R/bcf.R b/R/bcf.R index e6bda883..49fa908a 100644 --- a/R/bcf.R +++ b/R/bcf.R @@ -817,10 +817,10 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id # Sample variance parameters (if requested) if (sample_sigma_global) { - current_sigma2 <- sample_sigma2_one_iteration(outcome_train, forest_dataset_train, rng, a_global, b_global) + current_sigma2 <- sampleGlobalErrorVarianceOneIteration(outcome_train, forest_dataset_train, rng, a_global, b_global) } if (sample_sigma_leaf_mu) { - leaf_scale_mu_double <- sample_tau_one_iteration(active_forest_mu, rng, a_leaf_mu, b_leaf_mu) + leaf_scale_mu_double <- sampleLeafVarianceOneIteration(active_forest_mu, rng, a_leaf_mu, b_leaf_mu) current_leaf_scale_mu <- as.matrix(leaf_scale_mu_double) if (keep_sample) leaf_scale_mu_samples[sample_counter] <- leaf_scale_mu_double } @@ -878,11 +878,11 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id ) } if (sample_sigma_global) { - current_sigma2 <- sample_sigma2_one_iteration(outcome_train, forest_dataset_train, rng, a_global, b_global) + current_sigma2 <- sampleGlobalErrorVarianceOneIteration(outcome_train, forest_dataset_train, rng, a_global, b_global) if (keep_sample) global_var_samples[sample_counter] <- current_sigma2 } if (sample_sigma_leaf_tau) { - leaf_scale_tau_double <- sample_tau_one_iteration(active_forest_tau, rng, a_leaf_tau, b_leaf_tau) + leaf_scale_tau_double <- sampleLeafVarianceOneIteration(active_forest_tau, rng, a_leaf_tau, b_leaf_tau) current_leaf_scale_tau <- as.matrix(leaf_scale_tau_double) if (keep_sample) leaf_scale_tau_samples[sample_counter] <- leaf_scale_tau_double } @@ -1045,10 +1045,10 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id # Sample variance parameters (if requested) if (sample_sigma_global) { - current_sigma2 <- sample_sigma2_one_iteration(outcome_train, forest_dataset_train, rng, a_global, b_global) + current_sigma2 <- sampleGlobalErrorVarianceOneIteration(outcome_train, forest_dataset_train, rng, a_global, b_global) } if (sample_sigma_leaf_mu) { - leaf_scale_mu_double <- sample_tau_one_iteration(active_forest_mu, rng, a_leaf_mu, b_leaf_mu) + leaf_scale_mu_double <- sampleLeafVarianceOneIteration(active_forest_mu, rng, a_leaf_mu, b_leaf_mu) current_leaf_scale_mu <- as.matrix(leaf_scale_mu_double) if (keep_sample) leaf_scale_mu_samples[sample_counter] <- leaf_scale_mu_double } @@ -1106,11 +1106,11 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id ) } if (sample_sigma_global) { - current_sigma2 <- sample_sigma2_one_iteration(outcome_train, forest_dataset_train, rng, a_global, b_global) + current_sigma2 <- sampleGlobalErrorVarianceOneIteration(outcome_train, forest_dataset_train, rng, a_global, b_global) if (keep_sample) global_var_samples[sample_counter] <- current_sigma2 } if (sample_sigma_leaf_tau) { - leaf_scale_tau_double <- sample_tau_one_iteration(active_forest_tau, rng, a_leaf_tau, b_leaf_tau) + leaf_scale_tau_double <- sampleLeafVarianceOneIteration(active_forest_tau, rng, a_leaf_tau, b_leaf_tau) current_leaf_scale_tau <- as.matrix(leaf_scale_tau_double) if (keep_sample) leaf_scale_tau_samples[sample_counter] <- leaf_scale_tau_double } diff --git a/R/cpp11.R b/R/cpp11.R index bc411e89..8ad8ba24 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -292,8 +292,8 @@ json_load_forest_container_cpp <- function(forest_samples, json_filename) { invisible(.Call(`_stochtree_json_load_forest_container_cpp`, forest_samples, json_filename)) } -output_dimension_forest_container_cpp <- function(forest_samples) { - .Call(`_stochtree_output_dimension_forest_container_cpp`, forest_samples) +leaf_dimension_forest_container_cpp <- function(forest_samples) { + .Call(`_stochtree_leaf_dimension_forest_container_cpp`, forest_samples) } is_leaf_constant_forest_container_cpp <- function(forest_samples) { @@ -464,8 +464,8 @@ predict_raw_active_forest_cpp <- function(active_forest, dataset) { .Call(`_stochtree_predict_raw_active_forest_cpp`, active_forest, dataset) } -output_dimension_active_forest_cpp <- function(active_forest) { - .Call(`_stochtree_output_dimension_active_forest_cpp`, active_forest) +leaf_dimension_active_forest_cpp <- function(active_forest) { + .Call(`_stochtree_leaf_dimension_active_forest_cpp`, active_forest) } average_max_depth_active_forest_cpp <- function(active_forest) { diff --git a/R/forest.R b/R/forest.R index dce4814e..c7a96653 100644 --- a/R/forest.R +++ b/R/forest.R @@ -14,12 +14,12 @@ ForestSamples <- R6::R6Class( #' @description #' Create a new ForestContainer object. #' @param num_trees Number of trees - #' @param output_dimension Dimensionality of the outcome model + #' @param leaf_dimension Dimensionality of the outcome model #' @param is_leaf_constant Whether leaf is constant #' @param is_exponentiated Whether forest predictions should be exponentiated before being returned #' @return A new `ForestContainer` object. - initialize = function(num_trees, output_dimension=1, is_leaf_constant=F, is_exponentiated=F) { - self$forest_container_ptr <- forest_container_cpp(num_trees, output_dimension, is_leaf_constant, is_exponentiated) + initialize = function(num_trees, leaf_dimension=1, is_leaf_constant=F, is_exponentiated=F) { + self$forest_container_ptr <- forest_container_cpp(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) }, #' @description @@ -81,7 +81,7 @@ ForestSamples <- R6::R6Class( predict_raw = function(forest_dataset) { stopifnot(!is.null(forest_dataset$data_ptr)) # Unpack dimensions - output_dim <- output_dimension_forest_container_cpp(self$forest_container_ptr) + output_dim <- leaf_dimension_forest_container_cpp(self$forest_container_ptr) num_samples <- num_samples_forest_container_cpp(self$forest_container_ptr) n <- dataset_num_rows_cpp(forest_dataset$data_ptr) @@ -105,7 +105,7 @@ ForestSamples <- R6::R6Class( predict_raw_single_forest = function(forest_dataset, forest_num) { stopifnot(!is.null(forest_dataset$data_ptr)) # Unpack dimensions - output_dim <- output_dimension_forest_container_cpp(self$forest_container_ptr) + output_dim <- leaf_dimension_forest_container_cpp(self$forest_container_ptr) n <- dataset_num_rows_cpp(forest_dataset$data_ptr) # Predict leaf values from forest @@ -139,10 +139,10 @@ ForestSamples <- R6::R6Class( # Set leaf values if (length(leaf_value) == 1) { - stopifnot(output_dimension_forest_container_cpp(self$forest_container_ptr) == 1) + stopifnot(leaf_dimension_forest_container_cpp(self$forest_container_ptr) == 1) set_leaf_value_forest_container_cpp(self$forest_container_ptr, leaf_value) } else if (length(leaf_value) > 1) { - stopifnot(output_dimension_forest_container_cpp(self$forest_container_ptr) == length(leaf_value)) + stopifnot(leaf_dimension_forest_container_cpp(self$forest_container_ptr) == length(leaf_value)) set_leaf_vector_forest_container_cpp(self$forest_container_ptr, leaf_value) } else { stop("leaf_value must be a numeric value or vector of length >= 1") @@ -225,8 +225,8 @@ ForestSamples <- R6::R6Class( #' @description #' Return output dimension of trees in a `ForestContainer` object #' @return Leaf node parameter size - output_dimension = function() { - return(output_dimension_forest_container_cpp(self$forest_container_ptr)) + leaf_dimension = function() { + return(leaf_dimension_forest_container_cpp(self$forest_container_ptr)) }, #' @description @@ -561,12 +561,12 @@ Forest <- R6::R6Class( #' @description #' Create a new Forest object. #' @param num_trees Number of trees in the forest - #' @param output_dimension Dimensionality of the outcome model + #' @param leaf_dimension Dimensionality of the outcome model #' @param is_leaf_constant Whether leaf is constant #' @param is_exponentiated Whether forest predictions should be exponentiated before being returned #' @return A new `Forest` object. - initialize = function(num_trees, output_dimension=1, is_leaf_constant=F, is_exponentiated=F) { - self$forest_ptr <- active_forest_cpp(num_trees, output_dimension, is_leaf_constant, is_exponentiated) + initialize = function(num_trees, leaf_dimension=1, is_leaf_constant=F, is_exponentiated=F) { + self$forest_ptr <- active_forest_cpp(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) }, #' @description @@ -592,7 +592,7 @@ Forest <- R6::R6Class( predict_raw = function(forest_dataset) { stopifnot(!is.null(forest_dataset$data_ptr)) # Unpack dimensions - output_dim <- output_dimension_active_forest_cpp(self$forest_ptr) + output_dim <- leaf_dimension_active_forest_cpp(self$forest_ptr) n <- dataset_num_rows_cpp(forest_dataset$data_ptr) # Predict leaf values from forest @@ -613,10 +613,10 @@ Forest <- R6::R6Class( # Set leaf values if (length(leaf_value) == 1) { - stopifnot(output_dimension_active_forest_cpp(self$forest_ptr) == 1) + stopifnot(leaf_dimension_active_forest_cpp(self$forest_ptr) == 1) set_leaf_value_active_forest_cpp(self$forest_ptr, leaf_value) } else if (length(leaf_value) > 1) { - stopifnot(output_dimension_active_forest_cpp(self$forest_ptr) == length(leaf_value)) + stopifnot(leaf_dimension_active_forest_cpp(self$forest_ptr) == length(leaf_value)) set_leaf_vector_active_forest_cpp(self$forest_ptr, leaf_value) } else { stop("leaf_value must be a numeric value or vector of length >= 1") @@ -676,8 +676,8 @@ Forest <- R6::R6Class( #' @description #' Return output dimension of trees in a `Forest` object #' @return Leaf node parameter size - output_dimension = function() { - return(output_dimension_active_forest_cpp(self$forest_ptr)) + leaf_dimension = function() { + return(leaf_dimension_active_forest_cpp(self$forest_ptr)) }, #' @description @@ -752,30 +752,30 @@ Forest <- R6::R6Class( #' Create a container of forest samples #' #' @param num_trees Number of trees -#' @param output_dimension Dimensionality of the outcome model +#' @param leaf_dimension Dimensionality of the outcome model #' @param is_leaf_constant Whether leaf is constant #' @param is_exponentiated Whether forest predictions should be exponentiated before being returned #' #' @return `ForestSamples` object #' @export -createForestSamples <- function(num_trees, output_dimension=1, is_leaf_constant=F, is_exponentiated=F) { +createForestSamples <- function(num_trees, leaf_dimension=1, is_leaf_constant=F, is_exponentiated=F) { return(invisible(( - ForestSamples$new(num_trees, output_dimension, is_leaf_constant, is_exponentiated) + ForestSamples$new(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) ))) } #' Create a forest #' #' @param num_trees Number of trees in the forest -#' @param output_dimension Dimensionality of the outcome model +#' @param leaf_dimension Dimensionality of the outcome model #' @param is_leaf_constant Whether leaf is constant #' @param is_exponentiated Whether forest predictions should be exponentiated before being returned #' #' @return `Forest` object #' @export -createForest <- function(num_trees, output_dimension=1, is_leaf_constant=F, is_exponentiated=F) { +createForest <- function(num_trees, leaf_dimension=1, is_leaf_constant=F, is_exponentiated=F) { return(invisible(( - Forest$new(num_trees, output_dimension, is_leaf_constant, is_exponentiated) + Forest$new(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) ))) } diff --git a/R/variance.R b/R/variance.R index b12bc89e..1ae75137 100644 --- a/R/variance.R +++ b/R/variance.R @@ -7,7 +7,7 @@ #' @param b Global variance scale parameter #' #' @export -sample_sigma2_one_iteration <- function(residual, dataset, rng, a, b) { +sampleGlobalErrorVarianceOneIteration <- function(residual, dataset, rng, a, b) { return(sample_sigma2_one_iteration_cpp(residual$data_ptr, dataset$data_ptr, rng$rng_ptr, a, b)) } @@ -19,6 +19,6 @@ sample_sigma2_one_iteration <- function(residual, dataset, rng, a, b) { #' @param b Leaf variance scale parameter #' #' @export -sample_tau_one_iteration <- function(forest, rng, a, b) { +sampleLeafVarianceOneIteration <- function(forest, rng, a, b) { return(sample_tau_one_iteration_cpp(forest$forest_ptr, rng$rng_ptr, a, b)) } diff --git a/_pkgdown.yml b/_pkgdown.yml index ec2a6836..f0a68688 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -101,8 +101,8 @@ reference: - getRandomEffectSamples - getRandomEffectSamples.bartmodel - getRandomEffectSamples.bcfmodel - - sample_sigma2_one_iteration - - sample_tau_one_iteration + - sampleGlobalErrorVarianceOneIteration + - sampleLeafVarianceOneIteration - resetRandomEffectsModel - resetRandomEffectsTracker - rootResetRandomEffectsModel diff --git a/man/Forest.Rd b/man/Forest.Rd index 6866779b..075460f4 100644 --- a/man/Forest.Rd +++ b/man/Forest.Rd @@ -23,7 +23,7 @@ Wrapper around a C++ tree ensemble \item \href{#method-Forest-prepare_for_sampler}{\code{Forest$prepare_for_sampler()}} \item \href{#method-Forest-adjust_residual}{\code{Forest$adjust_residual()}} \item \href{#method-Forest-num_trees}{\code{Forest$num_trees()}} -\item \href{#method-Forest-output_dimension}{\code{Forest$output_dimension()}} +\item \href{#method-Forest-leaf_dimension}{\code{Forest$leaf_dimension()}} \item \href{#method-Forest-is_constant_leaf}{\code{Forest$is_constant_leaf()}} \item \href{#method-Forest-is_exponentiated}{\code{Forest$is_exponentiated()}} \item \href{#method-Forest-add_numeric_split_tree}{\code{Forest$add_numeric_split_tree()}} @@ -42,7 +42,7 @@ Create a new Forest object. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{Forest$new( num_trees, - output_dimension = 1, + leaf_dimension = 1, is_leaf_constant = F, is_exponentiated = F )}\if{html}{\out{
}} @@ -53,7 +53,7 @@ Create a new Forest object. \describe{ \item{\code{num_trees}}{Number of trees in the forest} -\item{\code{output_dimension}}{Dimensionality of the outcome model} +\item{\code{leaf_dimension}}{Dimensionality of the outcome model} \item{\code{is_leaf_constant}}{Whether leaf is constant} @@ -203,12 +203,12 @@ Tree count } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-Forest-output_dimension}{}}} -\subsection{Method \code{output_dimension()}}{ +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Forest-leaf_dimension}{}}} +\subsection{Method \code{leaf_dimension()}}{ Return output dimension of trees in a \code{Forest} object \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Forest$output_dimension()}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{Forest$leaf_dimension()}\if{html}{\out{
}} } \subsection{Returns}{ diff --git a/man/ForestSamples.Rd b/man/ForestSamples.Rd index 6179c274..111b1bd6 100644 --- a/man/ForestSamples.Rd +++ b/man/ForestSamples.Rd @@ -32,7 +32,7 @@ Wrapper around a C++ container of tree ensembles \item \href{#method-ForestSamples-load_json}{\code{ForestSamples$load_json()}} \item \href{#method-ForestSamples-num_samples}{\code{ForestSamples$num_samples()}} \item \href{#method-ForestSamples-num_trees}{\code{ForestSamples$num_trees()}} -\item \href{#method-ForestSamples-output_dimension}{\code{ForestSamples$output_dimension()}} +\item \href{#method-ForestSamples-leaf_dimension}{\code{ForestSamples$leaf_dimension()}} \item \href{#method-ForestSamples-is_constant_leaf}{\code{ForestSamples$is_constant_leaf()}} \item \href{#method-ForestSamples-is_exponentiated}{\code{ForestSamples$is_exponentiated()}} \item \href{#method-ForestSamples-add_forest_with_constant_leaves}{\code{ForestSamples$add_forest_with_constant_leaves()}} @@ -75,7 +75,7 @@ Create a new ForestContainer object. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{ForestSamples$new( num_trees, - output_dimension = 1, + leaf_dimension = 1, is_leaf_constant = F, is_exponentiated = F )}\if{html}{\out{
}} @@ -86,7 +86,7 @@ Create a new ForestContainer object. \describe{ \item{\code{num_trees}}{Number of trees} -\item{\code{output_dimension}}{Dimensionality of the outcome model} +\item{\code{leaf_dimension}}{Dimensionality of the outcome model} \item{\code{is_leaf_constant}}{Whether leaf is constant} @@ -433,12 +433,12 @@ Tree count } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ForestSamples-output_dimension}{}}} -\subsection{Method \code{output_dimension()}}{ +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ForestSamples-leaf_dimension}{}}} +\subsection{Method \code{leaf_dimension()}}{ Return output dimension of trees in a \code{ForestContainer} object \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ForestSamples$output_dimension()}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{ForestSamples$leaf_dimension()}\if{html}{\out{
}} } \subsection{Returns}{ diff --git a/man/createForest.Rd b/man/createForest.Rd index 541dc9f3..e10db262 100644 --- a/man/createForest.Rd +++ b/man/createForest.Rd @@ -6,7 +6,7 @@ \usage{ createForest( num_trees, - output_dimension = 1, + leaf_dimension = 1, is_leaf_constant = F, is_exponentiated = F ) @@ -14,7 +14,7 @@ createForest( \arguments{ \item{num_trees}{Number of trees in the forest} -\item{output_dimension}{Dimensionality of the outcome model} +\item{leaf_dimension}{Dimensionality of the outcome model} \item{is_leaf_constant}{Whether leaf is constant} diff --git a/man/createForestSamples.Rd b/man/createForestSamples.Rd index 7789ccb9..f3a9fb47 100644 --- a/man/createForestSamples.Rd +++ b/man/createForestSamples.Rd @@ -6,7 +6,7 @@ \usage{ createForestSamples( num_trees, - output_dimension = 1, + leaf_dimension = 1, is_leaf_constant = F, is_exponentiated = F ) @@ -14,7 +14,7 @@ createForestSamples( \arguments{ \item{num_trees}{Number of trees} -\item{output_dimension}{Dimensionality of the outcome model} +\item{leaf_dimension}{Dimensionality of the outcome model} \item{is_leaf_constant}{Whether leaf is constant} diff --git a/man/sample_sigma2_one_iteration.Rd b/man/sampleGlobalErrorVarianceOneIteration.Rd similarity index 74% rename from man/sample_sigma2_one_iteration.Rd rename to man/sampleGlobalErrorVarianceOneIteration.Rd index f3d687b9..9e44856a 100644 --- a/man/sample_sigma2_one_iteration.Rd +++ b/man/sampleGlobalErrorVarianceOneIteration.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/variance.R -\name{sample_sigma2_one_iteration} -\alias{sample_sigma2_one_iteration} +\name{sampleGlobalErrorVarianceOneIteration} +\alias{sampleGlobalErrorVarianceOneIteration} \title{Sample one iteration of the (inverse gamma) global variance model} \usage{ -sample_sigma2_one_iteration(residual, dataset, rng, a, b) +sampleGlobalErrorVarianceOneIteration(residual, dataset, rng, a, b) } \arguments{ \item{residual}{Outcome class} diff --git a/man/sample_tau_one_iteration.Rd b/man/sampleLeafVarianceOneIteration.Rd similarity index 79% rename from man/sample_tau_one_iteration.Rd rename to man/sampleLeafVarianceOneIteration.Rd index 8e6201d5..d2e66db2 100644 --- a/man/sample_tau_one_iteration.Rd +++ b/man/sampleLeafVarianceOneIteration.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/variance.R -\name{sample_tau_one_iteration} -\alias{sample_tau_one_iteration} +\name{sampleLeafVarianceOneIteration} +\alias{sampleLeafVarianceOneIteration} \title{Sample one iteration of the leaf parameter variance model (only for univariate basis and constant leaf!)} \usage{ -sample_tau_one_iteration(forest, rng, a, b) +sampleLeafVarianceOneIteration(forest, rng, a, b) } \arguments{ \item{forest}{C++ forest} diff --git a/src/cpp11.cpp b/src/cpp11.cpp index d9c352c3..2364da8f 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -545,10 +545,10 @@ extern "C" SEXP _stochtree_json_load_forest_container_cpp(SEXP forest_samples, S END_CPP11 } // forest.cpp -int output_dimension_forest_container_cpp(cpp11::external_pointer forest_samples); -extern "C" SEXP _stochtree_output_dimension_forest_container_cpp(SEXP forest_samples) { +int leaf_dimension_forest_container_cpp(cpp11::external_pointer forest_samples); +extern "C" SEXP _stochtree_leaf_dimension_forest_container_cpp(SEXP forest_samples) { BEGIN_CPP11 - return cpp11::as_sexp(output_dimension_forest_container_cpp(cpp11::as_cpp>>(forest_samples))); + return cpp11::as_sexp(leaf_dimension_forest_container_cpp(cpp11::as_cpp>>(forest_samples))); END_CPP11 } // forest.cpp @@ -857,10 +857,10 @@ extern "C" SEXP _stochtree_predict_raw_active_forest_cpp(SEXP active_forest, SEX END_CPP11 } // forest.cpp -int output_dimension_active_forest_cpp(cpp11::external_pointer active_forest); -extern "C" SEXP _stochtree_output_dimension_active_forest_cpp(SEXP active_forest) { +int leaf_dimension_active_forest_cpp(cpp11::external_pointer active_forest); +extern "C" SEXP _stochtree_leaf_dimension_active_forest_cpp(SEXP active_forest) { BEGIN_CPP11 - return cpp11::as_sexp(output_dimension_active_forest_cpp(cpp11::as_cpp>>(active_forest))); + return cpp11::as_sexp(leaf_dimension_active_forest_cpp(cpp11::as_cpp>>(active_forest))); END_CPP11 } // forest.cpp @@ -1510,6 +1510,8 @@ static const R_CallMethodDef CallEntries[] = { {"_stochtree_json_load_string_cpp", (DL_FUNC) &_stochtree_json_load_string_cpp, 2}, {"_stochtree_json_save_file_cpp", (DL_FUNC) &_stochtree_json_save_file_cpp, 2}, {"_stochtree_json_save_forest_container_cpp", (DL_FUNC) &_stochtree_json_save_forest_container_cpp, 2}, + {"_stochtree_leaf_dimension_active_forest_cpp", (DL_FUNC) &_stochtree_leaf_dimension_active_forest_cpp, 1}, + {"_stochtree_leaf_dimension_forest_container_cpp", (DL_FUNC) &_stochtree_leaf_dimension_forest_container_cpp, 1}, {"_stochtree_leaf_values_forest_container_cpp", (DL_FUNC) &_stochtree_leaf_values_forest_container_cpp, 4}, {"_stochtree_leaves_forest_container_cpp", (DL_FUNC) &_stochtree_leaves_forest_container_cpp, 3}, {"_stochtree_left_child_node_forest_container_cpp", (DL_FUNC) &_stochtree_left_child_node_forest_container_cpp, 4}, @@ -1523,8 +1525,6 @@ static const R_CallMethodDef CallEntries[] = { {"_stochtree_num_split_nodes_forest_container_cpp", (DL_FUNC) &_stochtree_num_split_nodes_forest_container_cpp, 3}, {"_stochtree_num_trees_active_forest_cpp", (DL_FUNC) &_stochtree_num_trees_active_forest_cpp, 1}, {"_stochtree_num_trees_forest_container_cpp", (DL_FUNC) &_stochtree_num_trees_forest_container_cpp, 1}, - {"_stochtree_output_dimension_active_forest_cpp", (DL_FUNC) &_stochtree_output_dimension_active_forest_cpp, 1}, - {"_stochtree_output_dimension_forest_container_cpp", (DL_FUNC) &_stochtree_output_dimension_forest_container_cpp, 1}, {"_stochtree_overwrite_column_vector_cpp", (DL_FUNC) &_stochtree_overwrite_column_vector_cpp, 2}, {"_stochtree_parent_node_forest_container_cpp", (DL_FUNC) &_stochtree_parent_node_forest_container_cpp, 4}, {"_stochtree_predict_active_forest_cpp", (DL_FUNC) &_stochtree_predict_active_forest_cpp, 2}, diff --git a/src/forest.cpp b/src/forest.cpp index fc2a5d63..c12c4f20 100644 --- a/src/forest.cpp +++ b/src/forest.cpp @@ -133,7 +133,7 @@ void json_load_forest_container_cpp(cpp11::external_pointer forest_samples) { +int leaf_dimension_forest_container_cpp(cpp11::external_pointer forest_samples) { return forest_samples->OutputDimension(); } @@ -632,7 +632,7 @@ cpp11::writable::doubles predict_raw_active_forest_cpp(cpp11::external_pointer active_forest) { +int leaf_dimension_active_forest_cpp(cpp11::external_pointer active_forest) { return active_forest->OutputDimension(); } diff --git a/vignettes/CustomSamplingRoutine.Rmd b/vignettes/CustomSamplingRoutine.Rmd index a7b70534..9ed1725c 100644 --- a/vignettes/CustomSamplingRoutine.Rmd +++ b/vignettes/CustomSamplingRoutine.Rmd @@ -169,12 +169,12 @@ for (i in 1:num_warmstart) { ) # Sample global variance parameter - global_var_samples[i+1] <- sample_sigma2_one_iteration( + global_var_samples[i+1] <- sampleGlobalErrorVarianceOneIteration( outcome, forest_dataset, rng, nu, lambda ) # Sample leaf node variance parameter and update `leaf_prior_scale` - leaf_scale_samples[i+1] <- sample_tau_one_iteration( + leaf_scale_samples[i+1] <- sampleLeafVarianceOneIteration( active_forest, rng, a_leaf, b_leaf ) leaf_prior_scale[1,1] <- leaf_scale_samples[i+1] @@ -194,12 +194,12 @@ for (i in (num_warmstart+1):num_samples) { ) # Sample global variance parameter - global_var_samples[i+1] <- sample_sigma2_one_iteration( + global_var_samples[i+1] <- sampleGlobalErrorVarianceOneIteration( outcome, forest_dataset, rng, nu, lambda ) # Sample leaf node variance parameter and update `leaf_prior_scale` - leaf_scale_samples[i+1] <- sample_tau_one_iteration( + leaf_scale_samples[i+1] <- sampleLeafVarianceOneIteration( active_forest, rng, a_leaf, b_leaf ) leaf_prior_scale[1,1] <- leaf_scale_samples[i+1] @@ -382,12 +382,12 @@ for (i in 1:num_warmstart) { ) # Sample global variance parameter - global_var_samples[i+1] <- sample_sigma2_one_iteration( + global_var_samples[i+1] <- sampleGlobalErrorVarianceOneIteration( outcome, forest_dataset, rng, nu, lambda ) # Sample leaf node variance parameter and update `leaf_prior_scale` - leaf_scale_samples[i+1] <- sample_tau_one_iteration( + leaf_scale_samples[i+1] <- sampleLeafVarianceOneIteration( active_forest, rng, a_leaf, b_leaf ) leaf_prior_scale[1,1] <- leaf_scale_samples[i+1] @@ -411,12 +411,12 @@ for (i in (num_warmstart+1):num_samples) { ) # Sample global variance parameter - global_var_samples[i+1] <- sample_sigma2_one_iteration( + global_var_samples[i+1] <- sampleGlobalErrorVarianceOneIteration( outcome, forest_dataset, rng, nu, lambda ) # Sample leaf node variance parameter and update `leaf_prior_scale` - leaf_scale_samples[i+1] <- sample_tau_one_iteration( + leaf_scale_samples[i+1] <- sampleLeafVarianceOneIteration( active_forest, rng, a_leaf, b_leaf ) leaf_prior_scale[1,1] <- leaf_scale_samples[i+1] @@ -617,12 +617,12 @@ for (i in 1:num_warmstart) { ) # Sample global variance parameter - global_var_samples[i+1] <- sample_sigma2_one_iteration( + global_var_samples[i+1] <- sampleGlobalErrorVarianceOneIteration( outcome, forest_dataset, rng, nu, lambda ) # Sample leaf node variance parameter and update `leaf_prior_scale` - leaf_scale_samples[i+1] <- sample_tau_one_iteration( + leaf_scale_samples[i+1] <- sampleLeafVarianceOneIteration( active_forest, rng, a_leaf, b_leaf ) leaf_prior_scale[1,1] <- leaf_scale_samples[i+1] @@ -646,12 +646,12 @@ for (i in (num_warmstart+1):num_samples) { ) # Sample global variance parameter - global_var_samples[i+1] <- sample_sigma2_one_iteration( + global_var_samples[i+1] <- sampleGlobalErrorVarianceOneIteration( outcome, forest_dataset, rng, nu, lambda ) # Sample leaf node variance parameter and update `leaf_prior_scale` - leaf_scale_samples[i+1] <- sample_tau_one_iteration( + leaf_scale_samples[i+1] <- sampleLeafVarianceOneIteration( active_forest, rng, a_leaf, b_leaf ) leaf_prior_scale[1,1] <- leaf_scale_samples[i+1] @@ -848,7 +848,7 @@ for (i in 1:num_warmstart) { ) # Sample global variance parameter - global_var_samples[i+1] <- sample_sigma2_one_iteration( + global_var_samples[i+1] <- sampleGlobalErrorVarianceOneIteration( outcome, forest_dataset, rng, nu, lambda ) } @@ -886,7 +886,7 @@ for (i in (num_warmstart+1):num_samples) { ) # Sample global variance parameter - global_var_samples[i+1] <- sample_sigma2_one_iteration( + global_var_samples[i+1] <- sampleGlobalErrorVarianceOneIteration( outcome, forest_dataset, rng, nu, lambda ) } @@ -1173,7 +1173,7 @@ if (num_gfr > 0){ ) # Sample variance parameters (if requested) - global_var_samples[i] <- sample_sigma2_one_iteration( + global_var_samples[i] <- sampleGlobalErrorVarianceOneIteration( outcome, forest_dataset_mu, rng, nu, lambda ) current_sigma2 <- global_var_samples[i] @@ -1205,7 +1205,7 @@ if (num_gfr > 0){ b_1_samples[i] <- current_b_1 # Sample variance parameters (if requested) - global_var_samples[i] <- sample_sigma2_one_iteration(outcome, forest_dataset_tau, rng, nu, lambda) + global_var_samples[i] <- sampleGlobalErrorVarianceOneIteration(outcome, forest_dataset_tau, rng, nu, lambda) current_sigma2 <- global_var_samples[i] } } @@ -1224,7 +1224,7 @@ if (num_burnin + num_mcmc > 0) { ) # Sample global variance parameter - global_var_samples[i] <- sample_sigma2_one_iteration(outcome, forest_dataset_mu, rng, nu, lambda) + global_var_samples[i] <- sampleGlobalErrorVarianceOneIteration(outcome, forest_dataset_mu, rng, nu, lambda) current_sigma2 <- global_var_samples[i] # Sample the treatment forest @@ -1253,7 +1253,7 @@ if (num_burnin + num_mcmc > 0) { b_1_samples[i] <- current_b_1 # Sample global variance parameter - global_var_samples[i] <- sample_sigma2_one_iteration(outcome, forest_dataset_tau, rng, nu, lambda) + global_var_samples[i] <- sampleGlobalErrorVarianceOneIteration(outcome, forest_dataset_tau, rng, nu, lambda) current_sigma2 <- global_var_samples[i] } } From 573265d494477727d0681eaa8c9477f1e6c6ba0f Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Wed, 29 Jan 2025 23:15:43 -0600 Subject: [PATCH 20/24] Updated GHA workflow for R CMD Check --- .github/workflows/r-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/r-test.yml b/.github/workflows/r-test.yml index 7260ac0e..cf2fb148 100644 --- a/.github/workflows/r-test.yml +++ b/.github/workflows/r-test.yml @@ -39,7 +39,7 @@ jobs: - name: Create a CRAN-ready version of the R package run: | - Rscript cran-bootstrap.R 0 + Rscript cran-bootstrap.R 0 0 - uses: r-lib/actions/check-r-package@v2 with: From 315fc59d9c7638d34bbefa53ba95d8c6f46fd5b0 Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Thu, 30 Jan 2025 01:38:59 -0600 Subject: [PATCH 21/24] Updating R docs to include more examples --- R/data.R | 20 ++++++++++ R/forest.R | 64 +++++++++++++++++++++++++++++++ R/generics.R | 11 ++++++ R/kernel.R | 48 +++++++++++++++++------ R/model.R | 17 ++++++++ man/computeForestLeafIndices.Rd | 10 ++++- man/computeForestLeafVariances.Rd | 10 ++++- man/computeForestMaxLeafIndex.Rd | 10 ++++- man/createCppRNG.Rd | 4 ++ man/createForest.Rd | 7 ++++ man/createForestDataset.Rd | 8 ++++ man/createForestModel.Rd | 13 +++++++ man/createForestSamples.Rd | 7 ++++ man/createOutcome.Rd | 5 +++ man/createRandomEffectsDataset.Rd | 7 ++++ man/getRandomEffectSamples.Rd | 11 ++++++ man/resetActiveForest.Rd | 14 +++++++ man/resetForestModel.Rd | 36 +++++++++++++++++ 18 files changed, 287 insertions(+), 15 deletions(-) diff --git a/R/data.R b/R/data.R index 0f9d95eb..d90265ae 100644 --- a/R/data.R +++ b/R/data.R @@ -228,6 +228,14 @@ RandomEffectsDataset <- R6::R6Class( #' #' @return `ForestDataset` object #' @export +#' +#' @examples +#' covariate_matrix <- matrix(runif(10*100), ncol = 10) +#' basis_matrix <- matrix(rnorm(3*100), ncol = 3) +#' weight_vector <- rnorm(100) +#' forest_dataset <- createForestDataset(covariate_matrix) +#' forest_dataset <- createForestDataset(covariate_matrix, basis_matrix) +#' forest_dataset <- createForestDataset(covariate_matrix, basis_matrix, weight_vector) createForestDataset <- function(covariates, basis=NULL, variance_weights=NULL){ return(invisible(( ForestDataset$new(covariates, basis, variance_weights) @@ -240,6 +248,11 @@ createForestDataset <- function(covariates, basis=NULL, variance_weights=NULL){ #' #' @return `Outcome` object #' @export +#' +#' @examples +#' X <- matrix(runif(10*100), ncol = 10) +#' y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) +#' outcome <- createOutcome(y) createOutcome <- function(outcome){ return(invisible(( Outcome$new(outcome) @@ -254,6 +267,13 @@ createOutcome <- function(outcome){ #' #' @return `RandomEffectsDataset` object #' @export +#' +#' @examples +#' rfx_group_ids <- sample(1:2, size = 100, replace = TRUE) +#' rfx_basis <- matrix(rnorm(3*100), ncol = 3) +#' weight_vector <- rnorm(100) +#' rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis) +#' rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis, weight_vector) createRandomEffectsDataset <- function(group_labels, basis, variance_weights=NULL){ return(invisible(( RandomEffectsDataset$new(group_labels, basis, variance_weights) diff --git a/R/forest.R b/R/forest.R index c7a96653..4a0680cb 100644 --- a/R/forest.R +++ b/R/forest.R @@ -758,6 +758,13 @@ Forest <- R6::R6Class( #' #' @return `ForestSamples` object #' @export +#' +#' @examples +#' num_trees <- 100 +#' leaf_dimension <- 2 +#' is_leaf_constant <- FALSE +#' is_exponentiated <- FALSE +#' forest_samples <- createForestSamples(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) createForestSamples <- function(num_trees, leaf_dimension=1, is_leaf_constant=F, is_exponentiated=F) { return(invisible(( ForestSamples$new(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) @@ -773,6 +780,13 @@ createForestSamples <- function(num_trees, leaf_dimension=1, is_leaf_constant=F, #' #' @return `Forest` object #' @export +#' +#' @examples +#' num_trees <- 100 +#' leaf_dimension <- 2 +#' is_leaf_constant <- FALSE +#' is_exponentiated <- FALSE +#' forest <- createForest(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) createForest <- function(num_trees, leaf_dimension=1, is_leaf_constant=F, is_exponentiated=F) { return(invisible(( Forest$new(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) @@ -786,6 +800,20 @@ createForest <- function(num_trees, leaf_dimension=1, is_leaf_constant=F, is_exp #' @param forest_samples (Optional) Container of forest samples from which to re-initialize active forest. If not provided, active forest will be reset to an ensemble of single-node (i.e. root) trees. #' @param forest_num (Optional) Index of forest samples from which to initialize active forest. If not provided, active forest will be reset to an ensemble of single-node (i.e. root) trees. #' @export +#' +#' @examples +#' num_trees <- 100 +#' leaf_dimension <- 1 +#' is_leaf_constant <- TRUE +#' is_exponentiated <- FALSE +#' active_forest <- createForest(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) +#' forest_samples <- createForestSamples(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) +#' forest_samples$add_forest_with_constant_leaves(0.0) +#' forest_samples$add_numeric_split_tree(0, 0, 0, 0, 0.5, -1.0, 1.0) +#' forest_samples$add_numeric_split_tree(0, 1, 0, 1, 0.75, 3.4, 0.75) +#' active_forest$set_root_leaves(0.1) +#' resetActiveForest(active_forest, forest_samples, 0) +#' resetActiveForest(active_forest) resetActiveForest <- function(active_forest, forest_samples=NULL, forest_num=NULL) { if (is.null(forest_samples)) { root_reset_active_forest_cpp(active_forest$forest_ptr) @@ -805,6 +833,42 @@ resetActiveForest <- function(active_forest, forest_samples=NULL, forest_num=NUL #' @param residual Residual which will also be updated #' @param is_mean_model Whether the model being updated is a conditional mean model #' @export +#' +#' @examples +#' n <- 100 +#' p <- 10 +#' num_trees <- 100 +#' leaf_dimension <- 1 +#' is_leaf_constant <- TRUE +#' is_exponentiated <- FALSE +#' alpha <- 0.95 +#' beta <- 2.0 +#' min_samples_leaf <- 2 +#' max_depth <- 10 +#' feature_types <- as.integer(rep(0, p)) +#' leaf_model <- 0 +#' sigma2 <- 1.0 +#' leaf_scale <- as.matrix(1.0) +#' variable_weights <- rep(1/p, p) +#' a_forest <- 1 +#' b_forest <- 1 +#' cutpoint_grid_size <- 100 +#' X <- matrix(runif(n*p), ncol = p) +#' forest_dataset <- createForestDataset(X) +#' y <- -5 + 10*(X[,1] > 0.5) + rnorm(n) +#' outcome <- createOutcome(y) +#' rng <- createCppRNG(1234) +#' forest_model <- createForestModel(forest_dataset, feature_types, num_trees, n, alpha, beta, min_samples_leaf, max_depth) +#' active_forest <- createForest(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) +#' forest_samples <- createForestSamples(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) +#' forest_model$sample_one_iteration( +#' forest_dataset, outcome, forest_samples, active_forest, +#' rng, feature_types, leaf_model, leaf_scale, variable_weights, +#' a_forest, b_forest, sigma2, cutpoint_grid_size, keep_forest = TRUE, +#' gfr = FALSE, pre_initialized = TRUE +#' ) +#' resetActiveForest(active_forest, forest_samples, 0) +#' resetForestModel(forest_model, active_forest, forest_dataset, outcome, TRUE) resetForestModel <- function(forest_model, forest, dataset, residual, is_mean_model) { reset_forest_model_cpp(forest_model$tracker_ptr, forest$forest_ptr, dataset$data_ptr, residual$data_ptr, is_mean_model) } diff --git a/R/generics.R b/R/generics.R index 8c62f4f2..1c73ad9a 100644 --- a/R/generics.R +++ b/R/generics.R @@ -4,4 +4,15 @@ #' @param ... Other parameters to be used in random effects extraction #' @return List of random effect samples #' @export +#' +#' @examples +#' n <- 100 +#' p <- 10 +#' X <- matrix(runif(n*p), ncol = p) +#' rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +#' rfx_basis <- rep(1.0, n) +#' y <- (-5 + 10*(X[,1] > 0.5)) + (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) +#' bart_model <- bart(X_train=X, y_train=y, rfx_group_ids_train=rfx_group_ids, +#' rfx_basis_train = rfx_basis, num_gfr=0, num_mcmc=10) +#' rfx_samples <- getRandomEffectSamples(bart_model) getRandomEffectSamples <- function(object, ...) UseMethod("getRandomEffectSamples") diff --git a/R/kernel.R b/R/kernel.R index be2277a1..becbb43b 100644 --- a/R/kernel.R +++ b/R/kernel.R @@ -34,10 +34,18 @@ #' #' @param forest_inds (Optional) Indices of the forest sample(s) for which to compute leaf indices. If not provided, #' this function will return leaf indices for every sample of a forest. -#' This function uses 1-indexing, so the first forest sample corresponds to `forest_num = 1`, and so on. +#' This function uses 0-indexing, so the first forest sample corresponds to `forest_num = 0`, and so on. #' @return List of vectors. Each vector is of size `num_obs * num_trees`, where `num_obs = nrow(covariates)` #' and `num_trees` is the number of trees in the relevant forest of `model_object`. #' @export +#' +#' @examples +#' X <- matrix(runif(10*100), ncol = 10) +#' y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) +#' bart_model <- bart(X, y, num_gfr=0, num_mcmc=10) +#' computeForestLeafIndices(bart_model, X, "mean") +#' computeForestLeafIndices(bart_model, X, "mean", 0) +#' computeForestLeafIndices(bart_model, X, "mean", c(1,3,9)) computeForestLeafIndices <- function(model_object, covariates, forest_type=NULL, forest_inds=NULL) { # Extract relevant forest container object_name <- class(model_object)[1] @@ -84,9 +92,9 @@ computeForestLeafIndices <- function(model_object, covariates, forest_type=NULL, if (is.null(forest_inds)) { forest_inds <- as.integer(1:num_forests - 1) } else { - stopifnot(all(forest_inds <= num_forests)) - stopifnot(all(forest_inds >= 1)) - forest_inds <- as.integer(forest_inds - 1) + stopifnot(all(forest_inds <= num_forests-1)) + stopifnot(all(forest_inds >= 0)) + forest_inds <- as.integer(forest_inds) } # Compute leaf indices @@ -122,9 +130,17 @@ computeForestLeafIndices <- function(model_object, covariates, forest_type=NULL, #' #' @param forest_inds (Optional) Indices of the forest sample(s) for which to compute leaf indices. If not provided, #' this function will return leaf indices for every sample of a forest. -#' This function uses 1-indexing, so the first forest sample corresponds to `forest_num = 1`, and so on. +#' This function uses 0-indexing, so the first forest sample corresponds to `forest_num = 0`, and so on. #' @return Vector of size `length(forest_inds)` with the leaf scale parameter for each requested forest. #' @export +#' +#' @examples +#' X <- matrix(runif(10*100), ncol = 10) +#' y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) +#' bart_model <- bart(X, y, num_gfr=0, num_mcmc=10) +#' computeForestLeafVariances(bart_model, "mean") +#' computeForestLeafVariances(bart_model, "mean", 0) +#' computeForestLeafVariances(bart_model, "mean", c(1,3,5)) computeForestLeafVariances <- function(model_object, forest_type, forest_inds=NULL) { # Extract relevant forest container stopifnot(class(model_object) %in% c("bartmodel", "bcfmodel")) @@ -170,9 +186,9 @@ computeForestLeafVariances <- function(model_object, forest_type, forest_inds=NU if (is.null(forest_inds)) { forest_inds <- as.integer(1:num_forests) } else { - stopifnot(all(forest_inds <= num_forests)) - stopifnot(all(forest_inds >= 1)) - forest_inds <- as.integer(forest_inds) + stopifnot(all(forest_inds <= num_forests-1)) + stopifnot(all(forest_inds >= 0)) + forest_inds <- as.integer(forest_inds + 1) } # Gather leaf scale parameters @@ -205,9 +221,17 @@ computeForestLeafVariances <- function(model_object, forest_type, forest_inds=NU #' #' @param forest_inds (Optional) Indices of the forest sample(s) for which to compute max leaf indices. If not provided, #' this function will return max leaf indices for every sample of a forest. -#' This function uses 1-indexing, so the first forest sample corresponds to `forest_num = 1`, and so on. +#' This function uses 0-indexing, so the first forest sample corresponds to `forest_num = 0`, and so on. #' @return Vector containing the largest possible leaf index computable by `computeForestLeafIndices` for the forests in a designated forest sample container. #' @export +#' +#' @examples +#' X <- matrix(runif(10*100), ncol = 10) +#' y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) +#' bart_model <- bart(X, y, num_gfr=0, num_mcmc=10) +#' computeForestMaxLeafIndex(bart_model, X, "mean") +#' computeForestMaxLeafIndex(bart_model, X, "mean", 0) +#' computeForestMaxLeafIndex(bart_model, X, "mean", c(1,3,9)) computeForestMaxLeafIndex <- function(model_object, covariates, forest_type=NULL, forest_inds=NULL) { # Extract relevant forest container object_name <- class(model_object)[1] @@ -247,9 +271,9 @@ computeForestMaxLeafIndex <- function(model_object, covariates, forest_type=NULL if (is.null(forest_inds)) { forest_inds <- as.integer(1:num_forests - 1) } else { - stopifnot(all(forest_inds <= num_forests)) - stopifnot(all(forest_inds >= 1)) - forest_inds <- as.integer(forest_inds - 1) + stopifnot(all(forest_inds <= num_forests-1)) + stopifnot(all(forest_inds >= 0)) + forest_inds <- as.integer(forest_inds) } # Compute leaf indices diff --git a/R/model.R b/R/model.R index e8f2ba61..534cf7bc 100644 --- a/R/model.R +++ b/R/model.R @@ -170,6 +170,10 @@ ForestModel <- R6::R6Class( #' #' @return `CppRng` object #' @export +#' +#' @examples +#' rng <- createCppRNG(1234) +#' rng <- createCppRNG() createCppRNG <- function(random_seed = -1){ return(invisible(( CppRNG$new(random_seed) @@ -189,6 +193,19 @@ createCppRNG <- function(random_seed = -1){ #' #' @return `ForestModel` object #' @export +#' +#' @examples +#' num_trees <- 100 +#' n <- 100 +#' p <- 10 +#' alpha <- 0.95 +#' beta <- 2.0 +#' min_samples_leaf <- 2 +#' max_depth <- 10 +#' feature_types <- as.integer(rep(0, p)) +#' X <- matrix(runif(n*p), ncol = p) +#' forest_dataset <- createForestDataset(X) +#' forest_model <- createForestModel(forest_dataset, feature_types, num_trees, n, alpha, beta, min_samples_leaf, max_depth) createForestModel <- function(forest_dataset, feature_types, num_trees, n, alpha, beta, min_samples_leaf, max_depth) { return(invisible(( ForestModel$new(forest_dataset, feature_types, num_trees, n, alpha, beta, min_samples_leaf, max_depth) diff --git a/man/computeForestLeafIndices.Rd b/man/computeForestLeafIndices.Rd index ae9c0100..9733708b 100644 --- a/man/computeForestLeafIndices.Rd +++ b/man/computeForestLeafIndices.Rd @@ -39,7 +39,7 @@ Valid inputs depend on the model type, and whether or not a given forest was sam \item{forest_inds}{(Optional) Indices of the forest sample(s) for which to compute leaf indices. If not provided, this function will return leaf indices for every sample of a forest. -This function uses 1-indexing, so the first forest sample corresponds to \code{forest_num = 1}, and so on.} +This function uses 0-indexing, so the first forest sample corresponds to \code{forest_num = 0}, and so on.} } \value{ List of vectors. Each vector is of size \code{num_obs * num_trees}, where \code{num_obs = nrow(covariates)} @@ -58,3 +58,11 @@ mapped column index that corresponds to a single leaf of a single tree (i.e. if tree 1 has 3 leaves, its column indices range from 0 to 2, and then tree 2's leaf indices begin at 3, etc...). } +\examples{ +X <- matrix(runif(10*100), ncol = 10) +y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) +bart_model <- bart(X, y, num_gfr=0, num_mcmc=10) +computeForestLeafIndices(bart_model, X, "mean") +computeForestLeafIndices(bart_model, X, "mean", 0) +computeForestLeafIndices(bart_model, X, "mean", c(1,3,9)) +} diff --git a/man/computeForestLeafVariances.Rd b/man/computeForestLeafVariances.Rd index 15e2dc5f..c2dd6810 100644 --- a/man/computeForestLeafVariances.Rd +++ b/man/computeForestLeafVariances.Rd @@ -27,7 +27,7 @@ Valid inputs depend on the model type, and whether or not a given forest was sam \item{forest_inds}{(Optional) Indices of the forest sample(s) for which to compute leaf indices. If not provided, this function will return leaf indices for every sample of a forest. -This function uses 1-indexing, so the first forest sample corresponds to \code{forest_num = 1}, and so on.} +This function uses 0-indexing, so the first forest sample corresponds to \code{forest_num = 0}, and so on.} } \value{ Vector of size \code{length(forest_inds)} with the leaf scale parameter for each requested forest. @@ -38,3 +38,11 @@ Return each forest's leaf node scale parameters. If leaf scale is not sampled for the forest in question, throws an error that the leaf model does not have a stochastic scale parameter. } +\examples{ +X <- matrix(runif(10*100), ncol = 10) +y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) +bart_model <- bart(X, y, num_gfr=0, num_mcmc=10) +computeForestLeafVariances(bart_model, "mean") +computeForestLeafVariances(bart_model, "mean", 0) +computeForestLeafVariances(bart_model, "mean", c(1,3,5)) +} diff --git a/man/computeForestMaxLeafIndex.Rd b/man/computeForestMaxLeafIndex.Rd index 79f00c03..61b5bd68 100644 --- a/man/computeForestMaxLeafIndex.Rd +++ b/man/computeForestMaxLeafIndex.Rd @@ -39,7 +39,7 @@ Valid inputs depend on the model type, and whether or not a \item{forest_inds}{(Optional) Indices of the forest sample(s) for which to compute max leaf indices. If not provided, this function will return max leaf indices for every sample of a forest. -This function uses 1-indexing, so the first forest sample corresponds to \code{forest_num = 1}, and so on.} +This function uses 0-indexing, so the first forest sample corresponds to \code{forest_num = 0}, and so on.} } \value{ Vector containing the largest possible leaf index computable by \code{computeForestLeafIndices} for the forests in a designated forest sample container. @@ -47,3 +47,11 @@ Vector containing the largest possible leaf index computable by \code{computeFor \description{ Compute and return the largest possible leaf index computable by \code{computeForestLeafIndices} for the forests in a designated forest sample container. } +\examples{ +X <- matrix(runif(10*100), ncol = 10) +y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) +bart_model <- bart(X, y, num_gfr=0, num_mcmc=10) +computeForestMaxLeafIndex(bart_model, X, "mean") +computeForestMaxLeafIndex(bart_model, X, "mean", 0) +computeForestMaxLeafIndex(bart_model, X, "mean", c(1,3,9)) +} diff --git a/man/createCppRNG.Rd b/man/createCppRNG.Rd index 45e5ef02..c78ad885 100644 --- a/man/createCppRNG.Rd +++ b/man/createCppRNG.Rd @@ -15,3 +15,7 @@ createCppRNG(random_seed = -1) \description{ Create an R class that wraps a C++ random number generator } +\examples{ +rng <- createCppRNG(1234) +rng <- createCppRNG() +} diff --git a/man/createForest.Rd b/man/createForest.Rd index e10db262..71737bef 100644 --- a/man/createForest.Rd +++ b/man/createForest.Rd @@ -26,3 +26,10 @@ createForest( \description{ Create a forest } +\examples{ +num_trees <- 100 +leaf_dimension <- 2 +is_leaf_constant <- FALSE +is_exponentiated <- FALSE +forest <- createForest(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) +} diff --git a/man/createForestDataset.Rd b/man/createForestDataset.Rd index e2689a5b..f6dc3daf 100644 --- a/man/createForestDataset.Rd +++ b/man/createForestDataset.Rd @@ -19,3 +19,11 @@ createForestDataset(covariates, basis = NULL, variance_weights = NULL) \description{ Create a forest dataset object } +\examples{ +covariate_matrix <- matrix(runif(10*100), ncol = 10) +basis_matrix <- matrix(rnorm(3*100), ncol = 3) +weight_vector <- rnorm(100) +forest_dataset <- createForestDataset(covariate_matrix) +forest_dataset <- createForestDataset(covariate_matrix, basis_matrix) +forest_dataset <- createForestDataset(covariate_matrix, basis_matrix, weight_vector) +} diff --git a/man/createForestModel.Rd b/man/createForestModel.Rd index 0ca6fe16..05263bbb 100644 --- a/man/createForestModel.Rd +++ b/man/createForestModel.Rd @@ -38,3 +38,16 @@ createForestModel( \description{ Create a forest model object } +\examples{ +num_trees <- 100 +n <- 100 +p <- 10 +alpha <- 0.95 +beta <- 2.0 +min_samples_leaf <- 2 +max_depth <- 10 +feature_types <- as.integer(rep(0, p)) +X <- matrix(runif(n*p), ncol = p) +forest_dataset <- createForestDataset(X) +forest_model <- createForestModel(forest_dataset, feature_types, num_trees, n, alpha, beta, min_samples_leaf, max_depth) +} diff --git a/man/createForestSamples.Rd b/man/createForestSamples.Rd index f3a9fb47..423d3be3 100644 --- a/man/createForestSamples.Rd +++ b/man/createForestSamples.Rd @@ -26,3 +26,10 @@ createForestSamples( \description{ Create a container of forest samples } +\examples{ +num_trees <- 100 +leaf_dimension <- 2 +is_leaf_constant <- FALSE +is_exponentiated <- FALSE +forest_samples <- createForestSamples(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) +} diff --git a/man/createOutcome.Rd b/man/createOutcome.Rd index fab33a11..fa589872 100644 --- a/man/createOutcome.Rd +++ b/man/createOutcome.Rd @@ -15,3 +15,8 @@ createOutcome(outcome) \description{ Create an outcome object } +\examples{ +X <- matrix(runif(10*100), ncol = 10) +y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) +outcome <- createOutcome(y) +} diff --git a/man/createRandomEffectsDataset.Rd b/man/createRandomEffectsDataset.Rd index 4aa9128a..d80c0d3a 100644 --- a/man/createRandomEffectsDataset.Rd +++ b/man/createRandomEffectsDataset.Rd @@ -19,3 +19,10 @@ createRandomEffectsDataset(group_labels, basis, variance_weights = NULL) \description{ Create a random effects dataset object } +\examples{ +rfx_group_ids <- sample(1:2, size = 100, replace = TRUE) +rfx_basis <- matrix(rnorm(3*100), ncol = 3) +weight_vector <- rnorm(100) +rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis) +rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis, weight_vector) +} diff --git a/man/getRandomEffectSamples.Rd b/man/getRandomEffectSamples.Rd index 9022232a..42f52065 100644 --- a/man/getRandomEffectSamples.Rd +++ b/man/getRandomEffectSamples.Rd @@ -17,3 +17,14 @@ List of random effect samples \description{ Generic function for extracting random effect samples from a model object (BCF, BART, etc...) } +\examples{ +n <- 100 +p <- 10 +X <- matrix(runif(n*p), ncol = p) +rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +rfx_basis <- rep(1.0, n) +y <- (-5 + 10*(X[,1] > 0.5)) + (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) +bart_model <- bart(X_train=X, y_train=y, rfx_group_ids_train=rfx_group_ids, + rfx_basis_train = rfx_basis, num_gfr=0, num_mcmc=10) +rfx_samples <- getRandomEffectSamples(bart_model) +} diff --git a/man/resetActiveForest.Rd b/man/resetActiveForest.Rd index 90a26b39..832b6502 100644 --- a/man/resetActiveForest.Rd +++ b/man/resetActiveForest.Rd @@ -18,3 +18,17 @@ resetActiveForest(active_forest, forest_samples = NULL, forest_num = NULL) Reset an active forest, either from a specific forest in a \code{ForestContainer} or to an ensemble of single-node (i.e. root) trees } +\examples{ +num_trees <- 100 +leaf_dimension <- 1 +is_leaf_constant <- TRUE +is_exponentiated <- FALSE +active_forest <- createForest(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) +forest_samples <- createForestSamples(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) +forest_samples$add_forest_with_constant_leaves(0.0) +forest_samples$add_numeric_split_tree(0, 0, 0, 0, 0.5, -1.0, 1.0) +forest_samples$add_numeric_split_tree(0, 1, 0, 1, 0.75, 3.4, 0.75) +active_forest$set_root_leaves(0.1) +resetActiveForest(active_forest, forest_samples, 0) +resetActiveForest(active_forest) +} diff --git a/man/resetForestModel.Rd b/man/resetForestModel.Rd index 4c730b4a..10633e03 100644 --- a/man/resetForestModel.Rd +++ b/man/resetForestModel.Rd @@ -20,3 +20,39 @@ resetForestModel(forest_model, forest, dataset, residual, is_mean_model) \description{ Re-initialize a forest model (tracking data structures) from a specific forest in a \code{ForestContainer} } +\examples{ +n <- 100 +p <- 10 +num_trees <- 100 +leaf_dimension <- 1 +is_leaf_constant <- TRUE +is_exponentiated <- FALSE +alpha <- 0.95 +beta <- 2.0 +min_samples_leaf <- 2 +max_depth <- 10 +feature_types <- as.integer(rep(0, p)) +leaf_model <- 0 +sigma2 <- 1.0 +leaf_scale <- as.matrix(1.0) +variable_weights <- rep(1/p, p) +a_forest <- 1 +b_forest <- 1 +cutpoint_grid_size <- 100 +X <- matrix(runif(n*p), ncol = p) +forest_dataset <- createForestDataset(X) +y <- -5 + 10*(X[,1] > 0.5) + rnorm(n) +outcome <- createOutcome(y) +rng <- createCppRNG(1234) +forest_model <- createForestModel(forest_dataset, feature_types, num_trees, n, alpha, beta, min_samples_leaf, max_depth) +active_forest <- createForest(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) +forest_samples <- createForestSamples(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) +forest_model$sample_one_iteration( + forest_dataset, outcome, forest_samples, active_forest, + rng, feature_types, leaf_model, leaf_scale, variable_weights, + a_forest, b_forest, sigma2, cutpoint_grid_size, keep_forest = TRUE, + gfr = FALSE, pre_initialized = TRUE +) +resetActiveForest(active_forest, forest_samples, 0) +resetForestModel(forest_model, active_forest, forest_dataset, outcome, TRUE) +} From d58bda8a6898f73de10c2593b5e961100e6ead08 Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Thu, 30 Jan 2025 02:11:58 -0600 Subject: [PATCH 22/24] Updated RFX docs --- R/random_effects.R | 135 +++++++++++++++++++++++++++ man/createRandomEffectSamples.Rd | 9 ++ man/createRandomEffectsModel.Rd | 8 ++ man/createRandomEffectsTracker.Rd | 8 ++ man/resetRandomEffectsModel.Rd | 22 +++++ man/resetRandomEffectsTracker.Rd | 23 +++++ man/rootResetRandomEffectsModel.Rd | 29 ++++++ man/rootResetRandomEffectsTracker.Rd | 30 ++++++ 8 files changed, 264 insertions(+) diff --git a/R/random_effects.R b/R/random_effects.R index 4604bd9b..cbae7ed1 100644 --- a/R/random_effects.R +++ b/R/random_effects.R @@ -337,6 +337,15 @@ RandomEffectsModel <- R6::R6Class( #' @param random_effects_tracker Object of type `RandomEffectsTracker` #' @return `RandomEffectSamples` object #' @export +#' +#' @examples +#' n <- 100 +#' rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +#' rfx_basis <- matrix(rep(1.0, n), ncol=1) +#' num_groups <- length(unique(rfx_group_ids)) +#' num_components <- ncol(rfx_basis) +#' rfx_tracker <- createRandomEffectsTracker(rfx_group_ids) +#' rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker) createRandomEffectSamples <- function(num_components, num_groups, random_effects_tracker) { invisible(output <- RandomEffectSamples$new()) output$load_in_session(num_components, num_groups, random_effects_tracker) @@ -348,6 +357,14 @@ createRandomEffectSamples <- function(num_components, num_groups, random_effects #' @param rfx_group_indices Integer indices indicating groups used to define random effects #' @return `RandomEffectsTracker` object #' @export +#' +#' @examples +#' n <- 100 +#' rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +#' rfx_basis <- matrix(rep(1.0, n), ncol=1) +#' num_groups <- length(unique(rfx_group_ids)) +#' num_components <- ncol(rfx_basis) +#' rfx_tracker <- createRandomEffectsTracker(rfx_group_ids) createRandomEffectsTracker <- function(rfx_group_indices) { return(invisible(( RandomEffectsTracker$new(rfx_group_indices) @@ -360,6 +377,14 @@ createRandomEffectsTracker <- function(rfx_group_indices) { #' @param num_groups Number of random effects groups #' @return `RandomEffectsModel` object #' @export +#' +#' @examples +#' n <- 100 +#' rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +#' rfx_basis <- matrix(rep(1.0, n), ncol=1) +#' num_groups <- length(unique(rfx_group_ids)) +#' num_components <- ncol(rfx_basis) +#' rfx_model <- createRandomEffectsModel(num_components, num_groups) createRandomEffectsModel <- function(num_components, num_groups) { return(invisible(( RandomEffectsModel$new(num_components, num_groups) @@ -373,7 +398,35 @@ createRandomEffectsModel <- function(num_components, num_groups) { #' @param sample_num Index of sample stored in `rfx_samples` from which to reset the state of a random effects model. Zero-indexed, so resetting based on the first sample would require setting `sample_num = 0`. #' @param sigma_alpha_init Initial value of the "working parameter" scale parameter. #' @export +#' +#' @examples +#' n <- 100 +#' p <- 10 +#' rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +#' rfx_basis <- matrix(rep(1.0, n), ncol=1) +#' rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis) +#' y <- (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) +#' y_std <- (y-mean(y))/sd(y) +#' outcome <- createOutcome(y_std) +#' rng <- createCppRNG(1234) +#' num_groups <- length(unique(rfx_group_ids)) +#' num_components <- ncol(rfx_basis) +#' rfx_model <- createRandomEffectsModel(num_components, num_groups) +#' rfx_tracker <- createRandomEffectsTracker(rfx_group_ids) +#' rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker) +#' for (i in 1:3) { +#' rfx_model$sample_random_effect(rfx_dataset=rfx_dataset, residual=outcome, +#' rfx_tracker=rfx_tracker, rfx_samples=rfx_samples, +#' keep_sample=TRUE, global_variance=1.0, rng=rng) +#' } +#' resetRandomEffectsModel(rfx_model, rfx_samples, 0, 1.0) resetRandomEffectsModel <- function(rfx_model, rfx_samples, sample_num, sigma_alpha_init) { + if (!is.matrix(sigma_alpha_init)) { + if (!is.double(sigma_alpha_init)) { + stop("`sigma_alpha_init` must be a numeric scalar or matrix") + } + sigma_alpha_init <- as.matrix(sigma_alpha_init) + } reset_rfx_model_cpp(rfx_model$rfx_model_ptr, rfx_samples$rfx_container_ptr, sample_num) rfx_model$set_working_parameter_cov(sigma_alpha_init) } @@ -386,6 +439,29 @@ resetRandomEffectsModel <- function(rfx_model, rfx_samples, sample_num, sigma_al #' @param residual Object of type `Outcome`. #' @param rfx_samples Object of type `RandomEffectSamples`. #' @export +#' +#' @examples +#' n <- 100 +#' p <- 10 +#' rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +#' rfx_basis <- matrix(rep(1.0, n), ncol=1) +#' rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis) +#' y <- (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) +#' y_std <- (y-mean(y))/sd(y) +#' outcome <- createOutcome(y_std) +#' rng <- createCppRNG(1234) +#' num_groups <- length(unique(rfx_group_ids)) +#' num_components <- ncol(rfx_basis) +#' rfx_model <- createRandomEffectsModel(num_components, num_groups) +#' rfx_tracker <- createRandomEffectsTracker(rfx_group_ids) +#' rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker) +#' for (i in 1:3) { +#' rfx_model$sample_random_effect(rfx_dataset=rfx_dataset, residual=outcome, +#' rfx_tracker=rfx_tracker, rfx_samples=rfx_samples, +#' keep_sample=TRUE, global_variance=1.0, rng=rng) +#' } +#' resetRandomEffectsModel(rfx_model, rfx_samples, 0, 1.0) +#' resetRandomEffectsTracker(rfx_tracker, rfx_model, rfx_dataset, outcome, rfx_samples) resetRandomEffectsTracker <- function(rfx_tracker, rfx_model, rfx_dataset, residual, rfx_samples) { reset_rfx_tracker_cpp(rfx_tracker$rfx_tracker_ptr, rfx_dataset$data_ptr, residual$data_ptr, rfx_model$rfx_model_ptr) } @@ -400,6 +476,35 @@ resetRandomEffectsTracker <- function(rfx_tracker, rfx_model, rfx_dataset, resid #' @param sigma_xi_shape Shape parameter for the inverse gamma variance model on the group parameters. #' @param sigma_xi_scale Scale parameter for the inverse gamma variance model on the group parameters. #' @export +#' +#' @examples +#' n <- 100 +#' p <- 10 +#' rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +#' rfx_basis <- matrix(rep(1.0, n), ncol=1) +#' rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis) +#' y <- (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) +#' y_std <- (y-mean(y))/sd(y) +#' outcome <- createOutcome(y_std) +#' rng <- createCppRNG(1234) +#' num_groups <- length(unique(rfx_group_ids)) +#' num_components <- ncol(rfx_basis) +#' alpha_init <- c(1) +#' xi_init <- matrix(rep(alpha_init, num_groups),num_components,num_groups) +#' sigma_alpha_init <- diag(1,num_components,num_components) +#' sigma_xi_init <- diag(1,num_components,num_components) +#' sigma_xi_shape <- 1 +#' sigma_xi_scale <- 1 +#' rfx_model <- createRandomEffectsModel(num_components, num_groups) +#' rfx_tracker <- createRandomEffectsTracker(rfx_group_ids) +#' rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker) +#' for (i in 1:3) { +#' rfx_model$sample_random_effect(rfx_dataset=rfx_dataset, residual=outcome, +#' rfx_tracker=rfx_tracker, rfx_samples=rfx_samples, +#' keep_sample=TRUE, global_variance=1.0, rng=rng) +#' } +#' rootResetRandomEffectsModel(rfx_model, alpha_init, xi_init, sigma_alpha_init, +#' sigma_xi_init, sigma_xi_shape, sigma_xi_scale) rootResetRandomEffectsModel <- function(rfx_model, alpha_init, xi_init, sigma_alpha_init, sigma_xi_init, sigma_xi_shape, sigma_xi_scale) { rfx_model$set_working_parameter(alpha_init) @@ -417,6 +522,36 @@ rootResetRandomEffectsModel <- function(rfx_model, alpha_init, xi_init, sigma_al #' @param rfx_dataset Object of type `RandomEffectsDataset`. #' @param residual Object of type `Outcome`. #' @export +#' +#' @examples +#' n <- 100 +#' p <- 10 +#' rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +#' rfx_basis <- matrix(rep(1.0, n), ncol=1) +#' rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis) +#' y <- (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) +#' y_std <- (y-mean(y))/sd(y) +#' outcome <- createOutcome(y_std) +#' rng <- createCppRNG(1234) +#' num_groups <- length(unique(rfx_group_ids)) +#' num_components <- ncol(rfx_basis) +#' alpha_init <- c(1) +#' xi_init <- matrix(rep(alpha_init, num_groups),num_components,num_groups) +#' sigma_alpha_init <- diag(1,num_components,num_components) +#' sigma_xi_init <- diag(1,num_components,num_components) +#' sigma_xi_shape <- 1 +#' sigma_xi_scale <- 1 +#' rfx_model <- createRandomEffectsModel(num_components, num_groups) +#' rfx_tracker <- createRandomEffectsTracker(rfx_group_ids) +#' rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker) +#' for (i in 1:3) { +#' rfx_model$sample_random_effect(rfx_dataset=rfx_dataset, residual=outcome, +#' rfx_tracker=rfx_tracker, rfx_samples=rfx_samples, +#' keep_sample=TRUE, global_variance=1.0, rng=rng) +#' } +#' rootResetRandomEffectsModel(rfx_model, alpha_init, xi_init, sigma_alpha_init, +#' sigma_xi_init, sigma_xi_shape, sigma_xi_scale) +#' rootResetRandomEffectsTracker(rfx_tracker, rfx_model, rfx_dataset, outcome) rootResetRandomEffectsTracker <- function(rfx_tracker, rfx_model, rfx_dataset, residual) { root_reset_rfx_tracker_cpp(rfx_tracker$rfx_tracker_ptr, rfx_dataset$data_ptr, residual$data_ptr, rfx_model$rfx_model_ptr) } diff --git a/man/createRandomEffectSamples.Rd b/man/createRandomEffectSamples.Rd index 98ee8cf3..871e0340 100644 --- a/man/createRandomEffectSamples.Rd +++ b/man/createRandomEffectSamples.Rd @@ -19,3 +19,12 @@ createRandomEffectSamples(num_components, num_groups, random_effects_tracker) \description{ Create a \code{RandomEffectSamples} object } +\examples{ +n <- 100 +rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +rfx_basis <- matrix(rep(1.0, n), ncol=1) +num_groups <- length(unique(rfx_group_ids)) +num_components <- ncol(rfx_basis) +rfx_tracker <- createRandomEffectsTracker(rfx_group_ids) +rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker) +} diff --git a/man/createRandomEffectsModel.Rd b/man/createRandomEffectsModel.Rd index d71a3c1e..a7ee3ff8 100644 --- a/man/createRandomEffectsModel.Rd +++ b/man/createRandomEffectsModel.Rd @@ -17,3 +17,11 @@ createRandomEffectsModel(num_components, num_groups) \description{ Create a \code{RandomEffectsModel} object } +\examples{ +n <- 100 +rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +rfx_basis <- matrix(rep(1.0, n), ncol=1) +num_groups <- length(unique(rfx_group_ids)) +num_components <- ncol(rfx_basis) +rfx_model <- createRandomEffectsModel(num_components, num_groups) +} diff --git a/man/createRandomEffectsTracker.Rd b/man/createRandomEffectsTracker.Rd index 3cdf7593..dcf1b4a4 100644 --- a/man/createRandomEffectsTracker.Rd +++ b/man/createRandomEffectsTracker.Rd @@ -15,3 +15,11 @@ createRandomEffectsTracker(rfx_group_indices) \description{ Create a \code{RandomEffectsTracker} object } +\examples{ +n <- 100 +rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +rfx_basis <- matrix(rep(1.0, n), ncol=1) +num_groups <- length(unique(rfx_group_ids)) +num_components <- ncol(rfx_basis) +rfx_tracker <- createRandomEffectsTracker(rfx_group_ids) +} diff --git a/man/resetRandomEffectsModel.Rd b/man/resetRandomEffectsModel.Rd index 4b2c4568..e42797de 100644 --- a/man/resetRandomEffectsModel.Rd +++ b/man/resetRandomEffectsModel.Rd @@ -18,3 +18,25 @@ resetRandomEffectsModel(rfx_model, rfx_samples, sample_num, sigma_alpha_init) \description{ Reset a \code{RandomEffectsModel} object based on the parameters indexed by \code{sample_num} in a \code{RandomEffectsSamples} object } +\examples{ +n <- 100 +p <- 10 +rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +rfx_basis <- matrix(rep(1.0, n), ncol=1) +rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis) +y <- (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) +y_std <- (y-mean(y))/sd(y) +outcome <- createOutcome(y_std) +rng <- createCppRNG(1234) +num_groups <- length(unique(rfx_group_ids)) +num_components <- ncol(rfx_basis) +rfx_model <- createRandomEffectsModel(num_components, num_groups) +rfx_tracker <- createRandomEffectsTracker(rfx_group_ids) +rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker) +for (i in 1:3) { + rfx_model$sample_random_effect(rfx_dataset=rfx_dataset, residual=outcome, + rfx_tracker=rfx_tracker, rfx_samples=rfx_samples, + keep_sample=TRUE, global_variance=1.0, rng=rng) +} +resetRandomEffectsModel(rfx_model, rfx_samples, 0, 1.0) +} diff --git a/man/resetRandomEffectsTracker.Rd b/man/resetRandomEffectsTracker.Rd index 14db8d1a..bae73a66 100644 --- a/man/resetRandomEffectsTracker.Rd +++ b/man/resetRandomEffectsTracker.Rd @@ -26,3 +26,26 @@ resetRandomEffectsTracker( \description{ Reset a \code{RandomEffectsTracker} object based on the parameters indexed by \code{sample_num} in a \code{RandomEffectsSamples} object } +\examples{ +n <- 100 +p <- 10 +rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +rfx_basis <- matrix(rep(1.0, n), ncol=1) +rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis) +y <- (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) +y_std <- (y-mean(y))/sd(y) +outcome <- createOutcome(y_std) +rng <- createCppRNG(1234) +num_groups <- length(unique(rfx_group_ids)) +num_components <- ncol(rfx_basis) +rfx_model <- createRandomEffectsModel(num_components, num_groups) +rfx_tracker <- createRandomEffectsTracker(rfx_group_ids) +rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker) +for (i in 1:3) { + rfx_model$sample_random_effect(rfx_dataset=rfx_dataset, residual=outcome, + rfx_tracker=rfx_tracker, rfx_samples=rfx_samples, + keep_sample=TRUE, global_variance=1.0, rng=rng) +} +resetRandomEffectsModel(rfx_model, rfx_samples, 0, 1.0) +resetRandomEffectsTracker(rfx_tracker, rfx_model, rfx_dataset, outcome, rfx_samples) +} diff --git a/man/rootResetRandomEffectsModel.Rd b/man/rootResetRandomEffectsModel.Rd index 409ef715..de1b3448 100644 --- a/man/rootResetRandomEffectsModel.Rd +++ b/man/rootResetRandomEffectsModel.Rd @@ -32,3 +32,32 @@ rootResetRandomEffectsModel( \description{ Reset a \code{RandomEffectsModel} object to its "default" state } +\examples{ +n <- 100 +p <- 10 +rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +rfx_basis <- matrix(rep(1.0, n), ncol=1) +rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis) +y <- (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) +y_std <- (y-mean(y))/sd(y) +outcome <- createOutcome(y_std) +rng <- createCppRNG(1234) +num_groups <- length(unique(rfx_group_ids)) +num_components <- ncol(rfx_basis) +alpha_init <- c(1) +xi_init <- matrix(rep(alpha_init, num_groups),num_components,num_groups) +sigma_alpha_init <- diag(1,num_components,num_components) +sigma_xi_init <- diag(1,num_components,num_components) +sigma_xi_shape <- 1 +sigma_xi_scale <- 1 +rfx_model <- createRandomEffectsModel(num_components, num_groups) +rfx_tracker <- createRandomEffectsTracker(rfx_group_ids) +rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker) +for (i in 1:3) { + rfx_model$sample_random_effect(rfx_dataset=rfx_dataset, residual=outcome, + rfx_tracker=rfx_tracker, rfx_samples=rfx_samples, + keep_sample=TRUE, global_variance=1.0, rng=rng) +} +rootResetRandomEffectsModel(rfx_model, alpha_init, xi_init, sigma_alpha_init, + sigma_xi_init, sigma_xi_shape, sigma_xi_scale) +} diff --git a/man/rootResetRandomEffectsTracker.Rd b/man/rootResetRandomEffectsTracker.Rd index 3fbd1860..1f3c5461 100644 --- a/man/rootResetRandomEffectsTracker.Rd +++ b/man/rootResetRandomEffectsTracker.Rd @@ -18,3 +18,33 @@ rootResetRandomEffectsTracker(rfx_tracker, rfx_model, rfx_dataset, residual) \description{ Reset a \code{RandomEffectsTracker} object to its "default" state } +\examples{ +n <- 100 +p <- 10 +rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +rfx_basis <- matrix(rep(1.0, n), ncol=1) +rfx_dataset <- createRandomEffectsDataset(rfx_group_ids, rfx_basis) +y <- (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) +y_std <- (y-mean(y))/sd(y) +outcome <- createOutcome(y_std) +rng <- createCppRNG(1234) +num_groups <- length(unique(rfx_group_ids)) +num_components <- ncol(rfx_basis) +alpha_init <- c(1) +xi_init <- matrix(rep(alpha_init, num_groups),num_components,num_groups) +sigma_alpha_init <- diag(1,num_components,num_components) +sigma_xi_init <- diag(1,num_components,num_components) +sigma_xi_shape <- 1 +sigma_xi_scale <- 1 +rfx_model <- createRandomEffectsModel(num_components, num_groups) +rfx_tracker <- createRandomEffectsTracker(rfx_group_ids) +rfx_samples <- createRandomEffectSamples(num_components, num_groups, rfx_tracker) +for (i in 1:3) { + rfx_model$sample_random_effect(rfx_dataset=rfx_dataset, residual=outcome, + rfx_tracker=rfx_tracker, rfx_samples=rfx_samples, + keep_sample=TRUE, global_variance=1.0, rng=rng) +} +rootResetRandomEffectsModel(rfx_model, alpha_init, xi_init, sigma_alpha_init, + sigma_xi_init, sigma_xi_shape, sigma_xi_scale) +rootResetRandomEffectsTracker(rfx_tracker, rfx_model, rfx_dataset, outcome) +} From 86fd50772f9b210896ded69cdb8e0b1a6ef64f4c Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Thu, 30 Jan 2025 02:45:38 -0600 Subject: [PATCH 23/24] Updated R serialization docs --- R/random_effects.R | 9 +- R/serialization.R | 96 ++++++++++++++++++- man/createCppJson.Rd | 5 + man/createCppJsonFile.Rd | 9 ++ man/createCppJsonString.Rd | 7 ++ man/loadForestContainerCombinedJson.Rd | 7 ++ man/loadForestContainerCombinedJsonString.Rd | 7 ++ man/loadForestContainerJson.Rd | 7 ++ man/loadRandomEffectSamplesCombinedJson.Rd | 12 +++ ...adRandomEffectSamplesCombinedJsonString.Rd | 12 +++ man/loadRandomEffectSamplesJson.Rd | 12 +++ man/loadScalarJson.Rd | 6 ++ man/loadVectorJson.Rd | 6 ++ 13 files changed, 188 insertions(+), 7 deletions(-) diff --git a/R/random_effects.R b/R/random_effects.R index cbae7ed1..38c47166 100644 --- a/R/random_effects.R +++ b/R/random_effects.R @@ -72,9 +72,9 @@ RandomEffectSamples <- R6::R6Class( #' @param json_rfx_groupids_label Label referring to a particular set of rfx group IDs (i.e. "random_effect_groupids_0") in the overall json hierarchy #' @return A new `RandomEffectSamples` object. load_from_json_string = function(json_string, json_rfx_container_label, json_rfx_mapper_label, json_rfx_groupids_label) { - self$rfx_container_ptr <- rfx_container_from_json_string_cpp(json_object$json_ptr, json_rfx_container_label) - self$label_mapper_ptr <- rfx_label_mapper_from_json_string_cpp(json_object$json_ptr, json_rfx_mapper_label) - self$training_group_ids <- rfx_group_ids_from_json_string_cpp(json_object$json_ptr, json_rfx_groupids_label) + self$rfx_container_ptr <- rfx_container_from_json_string_cpp(json_string, json_rfx_container_label) + self$label_mapper_ptr <- rfx_label_mapper_from_json_string_cpp(json_string, json_rfx_mapper_label) + self$training_group_ids <- rfx_group_ids_from_json_string_cpp(json_string, json_rfx_groupids_label) }, #' @description @@ -85,7 +85,8 @@ RandomEffectSamples <- R6::R6Class( #' @param json_rfx_groupids_label Label referring to a particular set of rfx group IDs (i.e. "random_effect_groupids_0") in the overall json hierarchy #' @return NULL (updates object in-place) append_from_json_string = function(json_string, json_rfx_container_label, json_rfx_mapper_label, json_rfx_groupids_label) { - rfx_container_append_from_json_string_cpp(self$rfx_container_ptr, json_object$json_ptr, json_rfx_container_label) + # Append RFX objects + rfx_container_append_from_json_string_cpp(self$rfx_container_ptr, json_string, json_rfx_container_label) }, #' @description diff --git a/R/serialization.R b/R/serialization.R index bca7f23f..14463c52 100644 --- a/R/serialization.R +++ b/R/serialization.R @@ -377,6 +377,13 @@ CppJson <- R6::R6Class( #' #' @return `ForestSamples` object #' @export +#' +#' @examples +#' X <- matrix(runif(10*100), ncol = 10) +#' y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) +#' bart_model <- bart(X, y, num_gfr=0, num_mcmc=10) +#' bart_json <- saveBARTModelToJson(bart_model) +#' mean_forest <- loadForestContainerJson(bart_json, "forest_0") loadForestContainerJson <- function(json_object, json_forest_label) { invisible(output <- ForestSamples$new(0,1,T)) output$load_from_json(json_object, json_forest_label) @@ -390,6 +397,13 @@ loadForestContainerJson <- function(json_object, json_forest_label) { #' #' @return `ForestSamples` object #' @export +#' +#' @examples +#' X <- matrix(runif(10*100), ncol = 10) +#' y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) +#' bart_model <- bart(X, y, num_gfr=0, num_mcmc=10) +#' bart_json <- list(saveBARTModelToJson(bart_model)) +#' mean_forest <- loadForestContainerCombinedJson(bart_json, "forest_0") loadForestContainerCombinedJson <- function(json_object_list, json_forest_label) { invisible(output <- ForestSamples$new(0,1,T)) for (i in 1:length(json_object_list)) { @@ -410,6 +424,13 @@ loadForestContainerCombinedJson <- function(json_object_list, json_forest_label) #' #' @return `ForestSamples` object #' @export +#' +#' @examples +#' X <- matrix(runif(10*100), ncol = 10) +#' y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) +#' bart_model <- bart(X, y, num_gfr=0, num_mcmc=10) +#' bart_json_string <- list(saveBARTModelToJsonString(bart_model)) +#' mean_forest <- loadForestContainerCombinedJsonString(bart_json_string, "forest_0") loadForestContainerCombinedJsonString <- function(json_string_list, json_forest_label) { invisible(output <- ForestSamples$new(0,1,T)) for (i in 1:length(json_string_list)) { @@ -430,6 +451,18 @@ loadForestContainerCombinedJsonString <- function(json_string_list, json_forest_ #' #' @return `RandomEffectSamples` object #' @export +#' +#' @examples +#' n <- 100 +#' p <- 10 +#' X <- matrix(runif(n*p), ncol = p) +#' rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +#' rfx_basis <- rep(1.0, n) +#' y <- (-5 + 10*(X[,1] > 0.5)) + (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) +#' bart_model <- bart(X_train=X, y_train=y, rfx_group_ids_train=rfx_group_ids, +#' rfx_basis_train = rfx_basis, num_gfr=0, num_mcmc=10) +#' bart_json <- saveBARTModelToJson(bart_model) +#' rfx_samples <- loadRandomEffectSamplesJson(bart_json, 0) loadRandomEffectSamplesJson <- function(json_object, json_rfx_num) { json_rfx_container_label <- paste0("random_effect_container_", json_rfx_num) json_rfx_mapper_label <- paste0("random_effect_label_mapper_", json_rfx_num) @@ -446,6 +479,18 @@ loadRandomEffectSamplesJson <- function(json_object, json_rfx_num) { #' #' @return `RandomEffectSamples` object #' @export +#' +#' @examples +#' n <- 100 +#' p <- 10 +#' X <- matrix(runif(n*p), ncol = p) +#' rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +#' rfx_basis <- rep(1.0, n) +#' y <- (-5 + 10*(X[,1] > 0.5)) + (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) +#' bart_model <- bart(X_train=X, y_train=y, rfx_group_ids_train=rfx_group_ids, +#' rfx_basis_train = rfx_basis, num_gfr=0, num_mcmc=10) +#' bart_json <- list(saveBARTModelToJson(bart_model)) +#' rfx_samples <- loadRandomEffectSamplesCombinedJson(bart_json, 0) loadRandomEffectSamplesCombinedJson <- function(json_object_list, json_rfx_num) { json_rfx_container_label <- paste0("random_effect_container_", json_rfx_num) json_rfx_mapper_label <- paste0("random_effect_label_mapper_", json_rfx_num) @@ -469,6 +514,18 @@ loadRandomEffectSamplesCombinedJson <- function(json_object_list, json_rfx_num) #' #' @return `RandomEffectSamples` object #' @export +#' +#' @examples +#' n <- 100 +#' p <- 10 +#' X <- matrix(runif(n*p), ncol = p) +#' rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +#' rfx_basis <- rep(1.0, n) +#' y <- (-5 + 10*(X[,1] > 0.5)) + (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) +#' bart_model <- bart(X_train=X, y_train=y, rfx_group_ids_train=rfx_group_ids, +#' rfx_basis_train = rfx_basis, num_gfr=0, num_mcmc=10) +#' bart_json_string <- list(saveBARTModelToJsonString(bart_model)) +#' rfx_samples <- loadRandomEffectSamplesCombinedJsonString(bart_json_string, 0) loadRandomEffectSamplesCombinedJsonString <- function(json_string_list, json_rfx_num) { json_rfx_container_label <- paste0("random_effect_container_", json_rfx_num) json_rfx_mapper_label <- paste0("random_effect_label_mapper_", json_rfx_num) @@ -493,9 +550,15 @@ loadRandomEffectSamplesCombinedJsonString <- function(json_string_list, json_rfx #' #' @return R vector #' @export +#' +#' @examples +#' example_vec <- runif(10) +#' example_json <- createCppJson() +#' example_json$add_vector("myvec", example_vec) +#' roundtrip_vec <- loadVectorJson(example_json, "myvec") loadVectorJson <- function(json_object, json_vector_label, subfolder_name = NULL) { if (is.null(subfolder_name)) { - output <- json_object$get_vector(json_vector_label, subfolder_name) + output <- json_object$get_vector(json_vector_label) } else { output <- json_object$get_vector(json_vector_label, subfolder_name) } @@ -510,11 +573,17 @@ loadVectorJson <- function(json_object, json_vector_label, subfolder_name = NULL #' #' @return R vector #' @export +#' +#' @examples +#' example_scalar <- 5.4 +#' example_json <- createCppJson() +#' example_json$add_scalar("myscalar", example_scalar) +#' roundtrip_scalar <- loadScalarJson(example_json, "myscalar") loadScalarJson <- function(json_object, json_scalar_label, subfolder_name = NULL) { if (is.null(subfolder_name)) { - output <- json_object$get_vector(json_scalar_label, subfolder_name) + output <- json_object$get_scalar(json_scalar_label) } else { - output <- json_object$get_vector(json_scalar_label, subfolder_name) + output <- json_object$get_scalar(json_scalar_label, subfolder_name) } return(output) } @@ -523,6 +592,11 @@ loadScalarJson <- function(json_object, json_scalar_label, subfolder_name = NULL #' #' @return `CppJson` object #' @export +#' +#' @examples +#' example_vec <- runif(10) +#' example_json <- createCppJson() +#' example_json$add_vector("myvec", example_vec) createCppJson <- function() { return(invisible(( CppJson$new() @@ -534,6 +608,15 @@ createCppJson <- function() { #' @param json_filename Name of file to read. Must end in `.json`. #' @return `CppJson` object #' @export +#' +#' @examples +#' example_vec <- runif(10) +#' example_json <- createCppJson() +#' example_json$add_vector("myvec", example_vec) +#' tmpjson <- tempfile(fileext = ".json") +#' example_json$save_file(file.path(tmpjson)) +#' example_json_roundtrip <- createCppJsonFile(file.path(tmpjson)) +#' unlink(tmpjson) createCppJsonFile <- function(json_filename) { invisible(( output <- CppJson$new() @@ -547,6 +630,13 @@ createCppJsonFile <- function(json_filename) { #' @param json_string JSON string dump #' @return `CppJson` object #' @export +#' +#' @examples +#' example_vec <- runif(10) +#' example_json <- createCppJson() +#' example_json$add_vector("myvec", example_vec) +#' example_json_string <- example_json$return_json_string() +#' example_json_roundtrip <- createCppJsonString(example_json_string) createCppJsonString <- function(json_string) { invisible(( output <- CppJson$new() diff --git a/man/createCppJson.Rd b/man/createCppJson.Rd index 92d7550b..12bb3666 100644 --- a/man/createCppJson.Rd +++ b/man/createCppJson.Rd @@ -12,3 +12,8 @@ createCppJson() \description{ Create a new (empty) C++ Json object } +\examples{ +example_vec <- runif(10) +example_json <- createCppJson() +example_json$add_vector("myvec", example_vec) +} diff --git a/man/createCppJsonFile.Rd b/man/createCppJsonFile.Rd index 6809665d..0879a870 100644 --- a/man/createCppJsonFile.Rd +++ b/man/createCppJsonFile.Rd @@ -15,3 +15,12 @@ createCppJsonFile(json_filename) \description{ Create a C++ Json object from a Json file } +\examples{ +example_vec <- runif(10) +example_json <- createCppJson() +example_json$add_vector("myvec", example_vec) +tmpjson <- tempfile(fileext = ".json") +example_json$save_file(file.path(tmpjson)) +example_json_roundtrip <- createCppJsonFile(file.path(tmpjson)) +unlink(tmpjson) +} diff --git a/man/createCppJsonString.Rd b/man/createCppJsonString.Rd index a8215cc6..9f8476e8 100644 --- a/man/createCppJsonString.Rd +++ b/man/createCppJsonString.Rd @@ -15,3 +15,10 @@ createCppJsonString(json_string) \description{ Create a C++ Json object from a Json string } +\examples{ +example_vec <- runif(10) +example_json <- createCppJson() +example_json$add_vector("myvec", example_vec) +example_json_string <- example_json$return_json_string() +example_json_roundtrip <- createCppJsonString(example_json_string) +} diff --git a/man/loadForestContainerCombinedJson.Rd b/man/loadForestContainerCombinedJson.Rd index 90d4e051..594b832f 100644 --- a/man/loadForestContainerCombinedJson.Rd +++ b/man/loadForestContainerCombinedJson.Rd @@ -17,3 +17,10 @@ loadForestContainerCombinedJson(json_object_list, json_forest_label) \description{ Combine multiple JSON model objects containing forests (with the same hierarchy / schema) into a single forest_container } +\examples{ +X <- matrix(runif(10*100), ncol = 10) +y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) +bart_model <- bart(X, y, num_gfr=0, num_mcmc=10) +bart_json <- list(saveBARTModelToJson(bart_model)) +mean_forest <- loadForestContainerCombinedJson(bart_json, "forest_0") +} diff --git a/man/loadForestContainerCombinedJsonString.Rd b/man/loadForestContainerCombinedJsonString.Rd index 7b9a4d82..b0d320ea 100644 --- a/man/loadForestContainerCombinedJsonString.Rd +++ b/man/loadForestContainerCombinedJsonString.Rd @@ -17,3 +17,10 @@ loadForestContainerCombinedJsonString(json_string_list, json_forest_label) \description{ Combine multiple JSON strings representing model objects containing forests (with the same hierarchy / schema) into a single forest_container } +\examples{ +X <- matrix(runif(10*100), ncol = 10) +y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) +bart_model <- bart(X, y, num_gfr=0, num_mcmc=10) +bart_json_string <- list(saveBARTModelToJsonString(bart_model)) +mean_forest <- loadForestContainerCombinedJsonString(bart_json_string, "forest_0") +} diff --git a/man/loadForestContainerJson.Rd b/man/loadForestContainerJson.Rd index 03ff68d5..2aad798d 100644 --- a/man/loadForestContainerJson.Rd +++ b/man/loadForestContainerJson.Rd @@ -17,3 +17,10 @@ loadForestContainerJson(json_object, json_forest_label) \description{ Load a container of forest samples from json } +\examples{ +X <- matrix(runif(10*100), ncol = 10) +y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) +bart_model <- bart(X, y, num_gfr=0, num_mcmc=10) +bart_json <- saveBARTModelToJson(bart_model) +mean_forest <- loadForestContainerJson(bart_json, "forest_0") +} diff --git a/man/loadRandomEffectSamplesCombinedJson.Rd b/man/loadRandomEffectSamplesCombinedJson.Rd index d7ef5705..ed327b92 100644 --- a/man/loadRandomEffectSamplesCombinedJson.Rd +++ b/man/loadRandomEffectSamplesCombinedJson.Rd @@ -17,3 +17,15 @@ loadRandomEffectSamplesCombinedJson(json_object_list, json_rfx_num) \description{ Combine multiple JSON model objects containing random effects (with the same hierarchy / schema) into a single container } +\examples{ +n <- 100 +p <- 10 +X <- matrix(runif(n*p), ncol = p) +rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +rfx_basis <- rep(1.0, n) +y <- (-5 + 10*(X[,1] > 0.5)) + (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) +bart_model <- bart(X_train=X, y_train=y, rfx_group_ids_train=rfx_group_ids, + rfx_basis_train = rfx_basis, num_gfr=0, num_mcmc=10) +bart_json <- list(saveBARTModelToJson(bart_model)) +rfx_samples <- loadRandomEffectSamplesCombinedJson(bart_json, 0) +} diff --git a/man/loadRandomEffectSamplesCombinedJsonString.Rd b/man/loadRandomEffectSamplesCombinedJsonString.Rd index 3531b968..7537d573 100644 --- a/man/loadRandomEffectSamplesCombinedJsonString.Rd +++ b/man/loadRandomEffectSamplesCombinedJsonString.Rd @@ -17,3 +17,15 @@ loadRandomEffectSamplesCombinedJsonString(json_string_list, json_rfx_num) \description{ Combine multiple JSON strings representing model objects containing random effects (with the same hierarchy / schema) into a single container } +\examples{ +n <- 100 +p <- 10 +X <- matrix(runif(n*p), ncol = p) +rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +rfx_basis <- rep(1.0, n) +y <- (-5 + 10*(X[,1] > 0.5)) + (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) +bart_model <- bart(X_train=X, y_train=y, rfx_group_ids_train=rfx_group_ids, + rfx_basis_train = rfx_basis, num_gfr=0, num_mcmc=10) +bart_json_string <- list(saveBARTModelToJsonString(bart_model)) +rfx_samples <- loadRandomEffectSamplesCombinedJsonString(bart_json_string, 0) +} diff --git a/man/loadRandomEffectSamplesJson.Rd b/man/loadRandomEffectSamplesJson.Rd index f5cbade1..670b67cb 100644 --- a/man/loadRandomEffectSamplesJson.Rd +++ b/man/loadRandomEffectSamplesJson.Rd @@ -17,3 +17,15 @@ loadRandomEffectSamplesJson(json_object, json_rfx_num) \description{ Load a container of random effect samples from json } +\examples{ +n <- 100 +p <- 10 +X <- matrix(runif(n*p), ncol = p) +rfx_group_ids <- sample(1:2, size = n, replace = TRUE) +rfx_basis <- rep(1.0, n) +y <- (-5 + 10*(X[,1] > 0.5)) + (-2*(rfx_group_ids==1)+2*(rfx_group_ids==2)) + rnorm(n) +bart_model <- bart(X_train=X, y_train=y, rfx_group_ids_train=rfx_group_ids, + rfx_basis_train = rfx_basis, num_gfr=0, num_mcmc=10) +bart_json <- saveBARTModelToJson(bart_model) +rfx_samples <- loadRandomEffectSamplesJson(bart_json, 0) +} diff --git a/man/loadScalarJson.Rd b/man/loadScalarJson.Rd index a683e147..2057ab1d 100644 --- a/man/loadScalarJson.Rd +++ b/man/loadScalarJson.Rd @@ -19,3 +19,9 @@ R vector \description{ Load a scalar from json } +\examples{ +example_scalar <- 5.4 +example_json <- createCppJson() +example_json$add_scalar("myscalar", example_scalar) +roundtrip_scalar <- loadScalarJson(example_json, "myscalar") +} diff --git a/man/loadVectorJson.Rd b/man/loadVectorJson.Rd index 1ac55946..2d1cf846 100644 --- a/man/loadVectorJson.Rd +++ b/man/loadVectorJson.Rd @@ -19,3 +19,9 @@ R vector \description{ Load a vector from json } +\examples{ +example_vec <- runif(10) +example_json <- createCppJson() +example_json$add_vector("myvec", example_vec) +roundtrip_vec <- loadVectorJson(example_json, "myvec") +} From 12299e1d1d2ea96f03005a23ddc0de3953eb0e27 Mon Sep 17 00:00:00 2001 From: Drew Herren Date: Thu, 30 Jan 2025 02:58:44 -0600 Subject: [PATCH 24/24] Updated variance and preprocessing docs --- R/utils.R | 12 ++++++++++ R/variance.R | 23 +++++++++++++++++++- man/createPreprocessorFromJson.Rd | 6 +++++ man/createPreprocessorFromJsonString.Rd | 6 +++++ man/sampleGlobalErrorVarianceOneIteration.Rd | 11 ++++++++++ man/sampleLeafVarianceOneIteration.Rd | 11 ++++++++++ 6 files changed, 68 insertions(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index 5d92ba60..c7b7bda7 100644 --- a/R/utils.R +++ b/R/utils.R @@ -429,6 +429,12 @@ savePreprocessorToJsonString <- function(object){ #' #' @returns Preprocessor object that can be used with the `preprocessPredictionData` function #' @export +#' +#' @examples +#' cov_mat <- matrix(1:12, ncol = 3) +#' preprocess_list <- preprocessTrainData(cov_mat) +#' preprocessor_json <- convertPreprocessorToJson(preprocess_list$metadata) +#' preprocessor_roundtrip <- createPreprocessorFromJson(preprocessor_json) createPreprocessorFromJson <- function(json_object){ # Initialize the metadata list metadata <- list() @@ -472,6 +478,12 @@ createPreprocessorFromJson <- function(json_object){ #' #' @return Preprocessor object that can be used with the `preprocessPredictionData` function #' @export +#' +#' @examples +#' cov_mat <- matrix(1:12, ncol = 3) +#' preprocess_list <- preprocessTrainData(cov_mat) +#' preprocessor_json_string <- savePreprocessorToJsonString(preprocess_list$metadata) +#' preprocessor_roundtrip <- createPreprocessorFromJsonString(preprocessor_json_string) createPreprocessorFromJsonString <- function(json_string){ # Load a `CppJson` object from string preprocessor_json <- createCppJsonString(json_string) diff --git a/R/variance.R b/R/variance.R index 1ae75137..f2688b15 100644 --- a/R/variance.R +++ b/R/variance.R @@ -5,8 +5,18 @@ #' @param rng C++ random number generator #' @param a Global variance shape parameter #' @param b Global variance scale parameter -#' #' @export +#' +#' @examples +#' X <- matrix(runif(10*100), ncol = 10) +#' y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) +#' y_std <- (y-mean(y))/sd(y) +#' forest_dataset <- createForestDataset(X) +#' outcome <- createOutcome(y_std) +#' rng <- createCppRNG(1234) +#' a <- 1.0 +#' b <- 1.0 +#' sigma2 <- sampleGlobalErrorVarianceOneIteration(outcome, forest_dataset, rng, a, b) sampleGlobalErrorVarianceOneIteration <- function(residual, dataset, rng, a, b) { return(sample_sigma2_one_iteration_cpp(residual$data_ptr, dataset$data_ptr, rng$rng_ptr, a, b)) } @@ -19,6 +29,17 @@ sampleGlobalErrorVarianceOneIteration <- function(residual, dataset, rng, a, b) #' @param b Leaf variance scale parameter #' #' @export +#' +#' @examples +#' num_trees <- 100 +#' leaf_dimension <- 1 +#' is_leaf_constant <- TRUE +#' is_exponentiated <- FALSE +#' active_forest <- createForest(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) +#' rng <- createCppRNG(1234) +#' a <- 1.0 +#' b <- 1.0 +#' tau <- sampleLeafVarianceOneIteration(active_forest, rng, a, b) sampleLeafVarianceOneIteration <- function(forest, rng, a, b) { return(sample_tau_one_iteration_cpp(forest$forest_ptr, rng$rng_ptr, a, b)) } diff --git a/man/createPreprocessorFromJson.Rd b/man/createPreprocessorFromJson.Rd index 3edca354..087c0555 100644 --- a/man/createPreprocessorFromJson.Rd +++ b/man/createPreprocessorFromJson.Rd @@ -15,3 +15,9 @@ Preprocessor object that can be used with the \code{preprocessPredictionData} fu \description{ Reload a covariate preprocessor object from a JSON string containing a serialized preprocessor } +\examples{ +cov_mat <- matrix(1:12, ncol = 3) +preprocess_list <- preprocessTrainData(cov_mat) +preprocessor_json <- convertPreprocessorToJson(preprocess_list$metadata) +preprocessor_roundtrip <- createPreprocessorFromJson(preprocessor_json) +} diff --git a/man/createPreprocessorFromJsonString.Rd b/man/createPreprocessorFromJsonString.Rd index 00974b83..9d3ff286 100644 --- a/man/createPreprocessorFromJsonString.Rd +++ b/man/createPreprocessorFromJsonString.Rd @@ -15,3 +15,9 @@ Preprocessor object that can be used with the \code{preprocessPredictionData} fu \description{ Reload a covariate preprocessor object from a JSON string containing a serialized preprocessor } +\examples{ +cov_mat <- matrix(1:12, ncol = 3) +preprocess_list <- preprocessTrainData(cov_mat) +preprocessor_json_string <- savePreprocessorToJsonString(preprocess_list$metadata) +preprocessor_roundtrip <- createPreprocessorFromJsonString(preprocessor_json_string) +} diff --git a/man/sampleGlobalErrorVarianceOneIteration.Rd b/man/sampleGlobalErrorVarianceOneIteration.Rd index 9e44856a..cd8eda75 100644 --- a/man/sampleGlobalErrorVarianceOneIteration.Rd +++ b/man/sampleGlobalErrorVarianceOneIteration.Rd @@ -20,3 +20,14 @@ sampleGlobalErrorVarianceOneIteration(residual, dataset, rng, a, b) \description{ Sample one iteration of the (inverse gamma) global variance model } +\examples{ +X <- matrix(runif(10*100), ncol = 10) +y <- -5 + 10*(X[,1] > 0.5) + rnorm(100) +y_std <- (y-mean(y))/sd(y) +forest_dataset <- createForestDataset(X) +outcome <- createOutcome(y_std) +rng <- createCppRNG(1234) +a <- 1.0 +b <- 1.0 +sigma2 <- sampleGlobalErrorVarianceOneIteration(outcome, forest_dataset, rng, a, b) +} diff --git a/man/sampleLeafVarianceOneIteration.Rd b/man/sampleLeafVarianceOneIteration.Rd index d2e66db2..b444a726 100644 --- a/man/sampleLeafVarianceOneIteration.Rd +++ b/man/sampleLeafVarianceOneIteration.Rd @@ -18,3 +18,14 @@ sampleLeafVarianceOneIteration(forest, rng, a, b) \description{ Sample one iteration of the leaf parameter variance model (only for univariate basis and constant leaf!) } +\examples{ +num_trees <- 100 +leaf_dimension <- 1 +is_leaf_constant <- TRUE +is_exponentiated <- FALSE +active_forest <- createForest(num_trees, leaf_dimension, is_leaf_constant, is_exponentiated) +rng <- createCppRNG(1234) +a <- 1.0 +b <- 1.0 +tau <- sampleLeafVarianceOneIteration(active_forest, rng, a, b) +}