diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..663ccb93
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,16 @@
+* text=auto
+
+*.c   text eol=lf
+*.h   text eol=lf
+*.cc  text eol=lf
+*.cuh text eol=lf
+*.cu  text eol=lf
+*.py  text eol=lf
+*.txt text eol=lf
+*.R   text eol=lf
+
+*.sh text eol=lf
+*.ac text eol=lf
+
+*.md  text eol=lf
+*.csv text eol=lf
\ No newline at end of file
diff --git a/.github/workflows/cpp-test.yml b/.github/workflows/cpp-test.yml
index 84648b5d..5f129e0a 100644
--- a/.github/workflows/cpp-test.yml
+++ b/.github/workflows/cpp-test.yml
@@ -56,6 +56,12 @@ jobs:
       shell: bash
       run: |
         echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT"
+    
+    - name: Set up dependencies (linux clang)
+      # Set up openMP on ubuntu-latest with clang compiler toolset (doesn't ship with the compiler suite like GCC and MSVC)
+      if: matrix.os == 'ubuntu-latest' && matrix.cpp_compiler == 'clang++'
+      run: | 
+        sudo apt-get update && sudo apt-get install -y libomp-dev
 
     - name: Configure CMake
       # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
@@ -69,6 +75,7 @@ jobs:
         -DUSE_SANITIZER=OFF
         -DBUILD_TEST=ON
         -DBUILD_DEBUG_TARGETS=OFF
+        -DUSE_OPENMP=ON
         -S ${{ github.workspace }}
 
     - name: Build
diff --git a/.github/workflows/pypi-wheels.yml b/.github/workflows/pypi-wheels.yml
index 29e15aa1..1772f6fd 100644
--- a/.github/workflows/pypi-wheels.yml
+++ b/.github/workflows/pypi-wheels.yml
@@ -21,26 +21,37 @@ jobs:
         include:
           - os: ubuntu-latest
             cibw_archs: "x86_64"
+            macos_deployment_target: "10.13" # Unused, just setting the variable as a placeholder
           - os: ubuntu-24.04-arm
             cibw_archs: "aarch64"
+            macos_deployment_target: "10.13" # Unused, just setting the variable as a placeholder
           - os: windows-latest
             cibw_archs: "auto64"
+            macos_deployment_target: "10.13" # Unused, just setting the variable as a placeholder
           - os: macos-13
             cibw_archs: "x86_64"
+            macos_deployment_target: "13.0"
           - os: macos-14
             cibw_archs: "arm64"
+            macos_deployment_target: "14.0"
 
     steps:
       - uses: actions/checkout@v4
         with:
           submodules: 'recursive'
+    
+      - name: Set up openmp (macos)
+        # Set up openMP on MacOS since it doesn't ship with the apple clang compiler suite
+        if: matrix.os == 'macos-13' || matrix.os == 'macos-14'
+        run: | 
+          brew install libomp
 
       - name: Build wheels
         uses: pypa/cibuildwheel@v2.23.2
         env:
           CIBW_SKIP: "pp* *-musllinux_* *-win32"
           CIBW_ARCHS: ${{ matrix.cibw_archs }}
-          MACOSX_DEPLOYMENT_TARGET: "10.13"
+          MACOSX_DEPLOYMENT_TARGET: ${{ matrix.macos_deployment_target }}
 
       - uses: actions/upload-artifact@v4
         with:
diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml
index 1b665fb6..719160a6 100644
--- a/.github/workflows/python-test.yml
+++ b/.github/workflows/python-test.yml
@@ -30,6 +30,12 @@ jobs:
       with:
         python-version: "3.10"
         cache: "pip"
+    
+    - name: Set up openmp (macos)
+      # Set up openMP on MacOS since it doesn't ship with the apple clang compiler suite
+      if: matrix.os == 'macos-latest'
+      run: | 
+        brew install libomp
 
     - name: Install Package with Relevant Dependencies
       run: |
diff --git a/.github/workflows/r-test.yml b/.github/workflows/r-test.yml
index 0a8464bc..f00938a0 100644
--- a/.github/workflows/r-test.yml
+++ b/.github/workflows/r-test.yml
@@ -22,6 +22,11 @@ jobs:
         os: [ubuntu-latest, windows-latest, macos-latest]
     
     steps:
+      - name: Prevent conversion of line endings on Windows
+        if: startsWith(matrix.os, 'windows')
+        shell: pwsh
+        run: git config --global core.autocrlf false
+      
       - uses: actions/checkout@v4
         with:
           submodules: 'recursive'
diff --git a/.github/workflows/regression-test.yml b/.github/workflows/regression-test.yml
new file mode 100644
index 00000000..cdc00621
--- /dev/null
+++ b/.github/workflows/regression-test.yml
@@ -0,0 +1,59 @@
+on:
+  workflow_dispatch:
+
+name: Running stochtree on benchmark datasets
+
+jobs:
+  stochtree_r:
+    name: stochtree-r-bart-regression-test
+    runs-on: ubuntu-latest
+    
+    steps:
+      - name: Checkout stochtree repo
+        uses: actions/checkout@v4
+        with:
+          submodules: 'recursive'
+
+      - name: Setup pandoc
+        uses: r-lib/actions/setup-pandoc@v2
+
+      - name: Setup R
+        uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+      
+      - name: Create a properly formatted version of the stochtree R package in a subfolder
+        run: |
+          Rscript cran-bootstrap.R 0 0 1
+
+      - name: Setup R dependencies
+        uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::testthat, any::decor, local::stochtree_cran
+      
+      - name: Create output directory for BART regression test results
+        run: |
+          mkdir -p tools/regression/bart/stochtree_bart_r_results
+          mkdir -p tools/regression/bcf/stochtree_bcf_r_results
+
+      - name: Run the BART regression test benchmark suite
+        run: |
+          Rscript tools/regression/bart/regression_test_dispatch_bart.R
+          Rscript tools/regression/bcf/regression_test_dispatch_bcf.R
+
+      - name: Collate and analyze regression test results
+        run: |
+          Rscript tools/regression/bart/regression_test_analysis_bart.R
+          Rscript tools/regression/bcf/regression_test_analysis_bcf.R
+
+      - name: Store BART benchmark test results as an artifact of the run
+        uses: actions/upload-artifact@v4
+        with:
+          name: stochtree-r-bart-summary
+          path: tools/regression/bart/stochtree_bart_r_results/stochtree_bart_r_summary.csv
+
+      - name: Store BCF benchmark test results as an artifact of the run
+        uses: actions/upload-artifact@v4
+        with:
+          name: stochtree-r-bcf-summary
+          path: tools/regression/bcf/stochtree_bcf_r_results/stochtree_bcf_r_summary.csv
diff --git a/.gitignore b/.gitignore
index 3fa818c1..e305023a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -70,6 +70,11 @@ po/*~
 # RStudio Connect folder
 rsconnect/
 
+# Configuration files generated by R build
+config.status
+config.log
+src/Makevars
+
 ## Python gitignore
 
 # Byte-compiled / optimized / DLL files
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3c8f1796..1d1efe55 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,8 @@
 # Build options
-option(USE_DEBUG "Set to ON for Debug mode" OFF)
+option(USE_DEBUG "Build with debug symbols and without optimization" OFF)
 option(USE_SANITIZER "Use santizer flags" OFF)
+option(USE_OPENMP "Use openMP" ON)
+option(USE_HOMEBREW_FALLBACK "(macOS-only) also look in 'brew --prefix' for libraries (e.g. OpenMP)" ON)
 option(BUILD_TEST "Build C++ tests with Google Test" OFF)
 option(BUILD_DEBUG_TARGETS "Build Standalone C++ Programs for Debugging" ON)
 option(BUILD_PYTHON "Build Shared Library for Python Package" OFF)
@@ -9,8 +11,8 @@ option(BUILD_PYTHON "Build Shared Library for Python Package" OFF)
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
-# Default to CMake 3.16
-cmake_minimum_required(VERSION 3.16)
+# Default to CMake 3.20
+cmake_minimum_required(VERSION 3.20)
 
 # Define the project
 project(stochtree LANGUAGES C CXX)
@@ -34,6 +36,13 @@ if(USE_DEBUG)
     add_definitions(-DDEBUG)
 endif()
 
+# Linker flags (empty by default, updated if using openmp)
+set(
+  STOCHTREE_LINK_FLAGS
+  ""
+)
+
+# Unix / MinGW compiler flags
 if(UNIX OR MINGW OR CYGWIN)
   set(
     CMAKE_CXX_FLAGS
@@ -42,11 +51,12 @@ if(UNIX OR MINGW OR CYGWIN)
   if(USE_DEBUG)
       set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0")
   else()
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O3")
   endif()
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas -Wno-unused-private-field")
 endif()
 
+# MSVC compiler flags
 if(MSVC)
     set(
       variables
@@ -72,6 +82,33 @@ else()
     endif()
 endif()
 
+# OpenMP
+if(USE_OPENMP)
+  add_definitions(-DSTOCHTREE_OPENMP_AVAILABLE)
+  if(APPLE)
+    find_package(OpenMP)
+    if(NOT OpenMP_FOUND)
+      if(USE_HOMEBREW_FALLBACK)
+        execute_process(COMMAND brew --prefix libomp
+                        OUTPUT_VARIABLE HOMEBREW_LIBOMP_PREFIX
+                        OUTPUT_STRIP_TRAILING_WHITESPACE)
+        set(OpenMP_C_FLAGS "-Xclang -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
+        set(OpenMP_CXX_FLAGS "-Xclang -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
+        set(OpenMP_C_INCLUDE_DIR "")
+        set(OpenMP_CXX_INCLUDE_DIR "")
+        set(OpenMP_C_LIB_NAMES libomp)
+        set(OpenMP_CXX_LIB_NAMES libomp)
+        set(OpenMP_libomp_LIBRARY ${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib)
+      endif()
+      find_package(OpenMP REQUIRED)
+    endif()
+  else()
+    find_package(OpenMP REQUIRED)
+  endif()
+  # Update flags with openmp 
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+endif()
+
 # Header file directory
 set(StochTree_HEADER_DIR ${PROJECT_SOURCE_DIR}/include)
 
@@ -80,6 +117,8 @@ set(BOOSTMATH_HEADER_DIR ${PROJECT_SOURCE_DIR}/deps/boost_math/include)
 
 # Eigen header file directory
 set(EIGEN_HEADER_DIR ${PROJECT_SOURCE_DIR}/deps/eigen)
+add_definitions(-DEIGEN_MPL2_ONLY)
+add_definitions(-DEIGEN_DONT_PARALLELIZE)
 
 # fast_double_parser header file directory
 set(FAST_DOUBLE_PARSER_HEADER_DIR ${PROJECT_SOURCE_DIR}/deps/fast_double_parser/include)
@@ -109,10 +148,11 @@ file(
 add_library(stochtree_objs OBJECT ${SOURCES})
 
 # Include the headers in the source library
-target_include_directories(stochtree_objs PRIVATE ${StochTree_HEADER_DIR} ${BOOSTMATH_HEADER_DIR} ${EIGEN_HEADER_DIR} ${FAST_DOUBLE_PARSER_HEADER_DIR} ${FMT_HEADER_DIR})
-
-if(APPLE)
-  set(CMAKE_SHARED_LIBRARY_SUFFIX ".so")
+if(USE_OPENMP)
+  target_include_directories(stochtree_objs PRIVATE ${StochTree_HEADER_DIR} ${BOOSTMATH_HEADER_DIR} ${EIGEN_HEADER_DIR} ${FAST_DOUBLE_PARSER_HEADER_DIR} ${FMT_HEADER_DIR} ${OpenMP_CXX_INCLUDE_DIR})
+  target_link_libraries(stochtree_objs PRIVATE ${OpenMP_libomp_LIBRARY})
+else()
+  target_include_directories(stochtree_objs PRIVATE ${StochTree_HEADER_DIR} ${BOOSTMATH_HEADER_DIR} ${EIGEN_HEADER_DIR} ${FAST_DOUBLE_PARSER_HEADER_DIR} ${FMT_HEADER_DIR})
 endif()
 
 # Python shared library
@@ -122,8 +162,13 @@ if (BUILD_PYTHON)
   pybind11_add_module(stochtree_cpp src/py_stochtree.cpp)
 
   # Link to C++ source and headers
-  target_include_directories(stochtree_cpp PRIVATE ${StochTree_HEADER_DIR} ${BOOSTMATH_HEADER_DIR} ${EIGEN_HEADER_DIR} ${FAST_DOUBLE_PARSER_HEADER_DIR} ${FMT_HEADER_DIR})
-  target_link_libraries(stochtree_cpp PRIVATE stochtree_objs)
+  if(USE_OPENMP)
+    target_include_directories(stochtree_cpp PRIVATE ${StochTree_HEADER_DIR} ${BOOSTMATH_HEADER_DIR} ${EIGEN_HEADER_DIR} ${FAST_DOUBLE_PARSER_HEADER_DIR} ${FMT_HEADER_DIR} ${OpenMP_CXX_INCLUDE_DIR})
+    target_link_libraries(stochtree_cpp PRIVATE stochtree_objs ${OpenMP_libomp_LIBRARY})
+  else()
+    target_include_directories(stochtree_cpp PRIVATE ${StochTree_HEADER_DIR} ${BOOSTMATH_HEADER_DIR} ${EIGEN_HEADER_DIR} ${FAST_DOUBLE_PARSER_HEADER_DIR} ${FMT_HEADER_DIR})
+    target_link_libraries(stochtree_cpp PRIVATE stochtree_objs)
+  endif()
 
   # EXAMPLE_VERSION_INFO is defined by setup.py and passed into the C++ code as a
   # define (VERSION_INFO) here.
@@ -154,8 +199,13 @@ if(BUILD_TEST)
   file(GLOB CPP_TEST_SOURCES test/cpp/*.cpp)
   add_executable(teststochtree ${CPP_TEST_SOURCES})
   set(STOCHTREE_TEST_HEADER_DIR ${PROJECT_SOURCE_DIR}/test/cpp)
-  target_include_directories(teststochtree PRIVATE ${StochTree_HEADER_DIR} ${BOOSTMATH_HEADER_DIR} ${EIGEN_HEADER_DIR} ${STOCHTREE_TEST_HEADER_DIR} ${FAST_DOUBLE_PARSER_HEADER_DIR} ${FMT_HEADER_DIR})
-  target_link_libraries(teststochtree PRIVATE stochtree_objs GTest::gtest_main)
+  if(USE_OPENMP)
+    target_include_directories(teststochtree PRIVATE ${StochTree_HEADER_DIR} ${BOOSTMATH_HEADER_DIR} ${EIGEN_HEADER_DIR} ${STOCHTREE_TEST_HEADER_DIR} ${FAST_DOUBLE_PARSER_HEADER_DIR} ${FMT_HEADER_DIR} ${OpenMP_CXX_INCLUDE_DIR})
+    target_link_libraries(teststochtree PRIVATE stochtree_objs GTest::gtest_main ${OpenMP_libomp_LIBRARY})
+  else()
+    target_include_directories(teststochtree PRIVATE ${StochTree_HEADER_DIR} ${BOOSTMATH_HEADER_DIR} ${EIGEN_HEADER_DIR} ${STOCHTREE_TEST_HEADER_DIR} ${FAST_DOUBLE_PARSER_HEADER_DIR} ${FMT_HEADER_DIR})
+    target_link_libraries(teststochtree PRIVATE stochtree_objs GTest::gtest_main)
+  endif()
   gtest_discover_tests(teststochtree)
 endif()
 
@@ -164,7 +214,12 @@ if(BUILD_DEBUG_TARGETS)
   # Build test suite
   add_executable(debugstochtree debug/api_debug.cpp)
   set(StochTree_DEBUG_HEADER_DIR ${PROJECT_SOURCE_DIR}/debug)
-  target_include_directories(debugstochtree PRIVATE ${StochTree_HEADER_DIR} ${BOOSTMATH_HEADER_DIR} ${EIGEN_HEADER_DIR} ${StochTree_DEBUG_HEADER_DIR} ${FAST_DOUBLE_PARSER_HEADER_DIR} ${FMT_HEADER_DIR})
-  target_link_libraries(debugstochtree PRIVATE stochtree_objs)
+  if(USE_OPENMP)
+    target_include_directories(debugstochtree PRIVATE ${StochTree_HEADER_DIR} ${BOOSTMATH_HEADER_DIR} ${EIGEN_HEADER_DIR} ${StochTree_DEBUG_HEADER_DIR} ${FAST_DOUBLE_PARSER_HEADER_DIR} ${FMT_HEADER_DIR} ${OpenMP_CXX_INCLUDE_DIR})
+    target_link_libraries(debugstochtree PRIVATE stochtree_objs ${OpenMP_libomp_LIBRARY})
+  else()
+    target_include_directories(debugstochtree PRIVATE ${StochTree_HEADER_DIR} ${BOOSTMATH_HEADER_DIR} ${EIGEN_HEADER_DIR} ${StochTree_DEBUG_HEADER_DIR} ${FAST_DOUBLE_PARSER_HEADER_DIR} ${FMT_HEADER_DIR})
+    target_link_libraries(debugstochtree PRIVATE stochtree_objs)
+  endif()
 endif()
 
diff --git a/R/bart.R b/R/bart.R
index 691fda7a..2f334682 100644
--- a/R/bart.R
+++ b/R/bart.R
@@ -51,6 +51,7 @@
 #'   - `rfx_group_parameter_prior_cov` Prior covariance matrix for the random effects "group parameters." Default: `NULL`. Must be a square matrix whose dimension matches the number of random effects bases, or a scalar value that will be expanded to a diagonal matrix.
 #'   - `rfx_variance_prior_shape` Shape parameter for the inverse gamma prior on the variance of the random effects "group parameter." Default: `1`.
 #'   - `rfx_variance_prior_scale` Scale parameter for the inverse gamma prior on the variance of the random effects "group parameter." Default: `1`.
+#'   - `num_threads` Number of threads to use in the GFR and MCMC algorithms, as well as prediction. If OpenMP is not available on a user's setup, this will default to `1`, otherwise to the maximum number of available threads.
 #'
 #' @param mean_forest_params (Optional) A list of mean forest model parameters, each of which has a default value processed internally, so this argument list is optional.
 #'
@@ -130,7 +131,8 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train
         rfx_working_parameter_prior_cov = NULL,
         rfx_group_parameter_prior_cov = NULL,
         rfx_variance_prior_shape = 1,
-        rfx_variance_prior_scale = 1
+        rfx_variance_prior_scale = 1,
+        num_threads = -1
     )
     general_params_updated <- preprocessParams(
         general_params_default, general_params
@@ -186,6 +188,7 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train
     rfx_group_parameter_prior_cov <- general_params_updated$rfx_group_parameter_prior_cov
     rfx_variance_prior_shape <- general_params_updated$rfx_variance_prior_shape
     rfx_variance_prior_scale <- general_params_updated$rfx_variance_prior_scale
+    num_threads <- general_params_updated$num_threads
     
     # 2. Mean forest parameters
     num_trees_mean <- mean_forest_params_updated$num_trees
@@ -795,7 +798,8 @@ bart <- function(X_train, y_train, leaf_basis_train = NULL, rfx_group_ids_train
                 forest_model_mean$sample_one_iteration(
                     forest_dataset = forest_dataset_train, residual = outcome_train, forest_samples = forest_samples_mean, 
                     active_forest = active_forest_mean, rng = rng, forest_model_config = forest_model_config_mean, 
-                    global_model_config = global_model_config, keep_forest = keep_sample, gfr = TRUE
+                    global_model_config = global_model_config, num_threads = num_threads, 
+                    keep_forest = keep_sample, gfr = TRUE
                 )
                 
                 # Cache train set predictions since they are already computed during sampling
@@ -1272,15 +1276,23 @@ predict.bartmodel <- function(object, X, leaf_basis = NULL, rfx_group_ids = NULL
     result <- list()
     if ((object$model_params$has_rfx) || (object$model_params$include_mean_forest)) {
         result[["y_hat"]] = y_hat
+    } else {
+        result[["y_hat"]] <- NULL
     }
     if (object$model_params$include_mean_forest) {
         result[["mean_forest_predictions"]] = mean_forest_predictions
+    } else {
+        result[["mean_forest_predictions"]] <- NULL
     }
     if (object$model_params$has_rfx) {
         result[["rfx_predictions"]] = rfx_predictions
+    } else {
+        result[["rfx_predictions"]] <- NULL
     }
     if (object$model_params$include_variance_forest) {
         result[["variance_forest_predictions"]] = variance_forest_predictions
+    } else {
+        result[["variance_forest_predictions"]] <- NULL
     }
     return(result)
 }
diff --git a/R/bcf.R b/R/bcf.R
index 053f8b21..86aef8cd 100644
--- a/R/bcf.R
+++ b/R/bcf.R
@@ -53,6 +53,7 @@
 #'   - `rfx_group_parameter_prior_cov` Prior covariance matrix for the random effects "group parameters." Default: `NULL`. Must be a square matrix whose dimension matches the number of random effects bases, or a scalar value that will be expanded to a diagonal matrix.
 #'   - `rfx_variance_prior_shape` Shape parameter for the inverse gamma prior on the variance of the random effects "group parameter." Default: `1`.
 #'   - `rfx_variance_prior_scale` Scale parameter for the inverse gamma prior on the variance of the random effects "group parameter." Default: `1`.
+#'   - `num_threads` Number of threads to use in the GFR and MCMC algorithms, as well as prediction. If OpenMP is not available on a user's setup, this will default to `1`, otherwise to the maximum number of available threads.
 #'
 #' @param prognostic_forest_params (Optional) A list of prognostic forest model parameters, each of which has a default value processed internally, so this argument list is optional.
 #'
@@ -174,7 +175,8 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id
         rfx_working_parameter_prior_cov = NULL,
         rfx_group_parameter_prior_cov = NULL,
         rfx_variance_prior_shape = 1,
-        rfx_variance_prior_scale = 1
+        rfx_variance_prior_scale = 1, 
+        num_threads = -1
     )
     general_params_updated <- preprocessParams(
         general_params_default, general_params
@@ -248,6 +250,7 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id
     rfx_group_parameter_prior_cov <- general_params_updated$rfx_group_parameter_prior_cov
     rfx_variance_prior_shape <- general_params_updated$rfx_variance_prior_shape
     rfx_variance_prior_scale <- general_params_updated$rfx_variance_prior_scale
+    num_threads <- general_params_updated$num_threads
     
     # 2. Mu forest parameters
     num_trees_mu <- prognostic_forest_params_updated$num_trees
@@ -1041,7 +1044,7 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id
             forest_model_mu$sample_one_iteration(
                 forest_dataset = forest_dataset_train, residual = outcome_train, forest_samples = forest_samples_mu, 
                 active_forest = active_forest_mu, rng = rng, forest_model_config = forest_model_config_mu, 
-                global_model_config = global_model_config, keep_forest = keep_sample, gfr = TRUE
+                global_model_config = global_model_config, num_threads = num_threads, keep_forest = keep_sample, gfr = TRUE
             )
             
             # Cache train set predictions since they are already computed during sampling
@@ -1065,7 +1068,7 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id
             forest_model_tau$sample_one_iteration(
                 forest_dataset = forest_dataset_train, residual = outcome_train, forest_samples = forest_samples_tau, 
                 active_forest = active_forest_tau, rng = rng, forest_model_config = forest_model_config_tau, 
-                global_model_config = global_model_config, keep_forest = keep_sample, gfr = TRUE
+                global_model_config = global_model_config, num_threads = num_threads, keep_forest = keep_sample, gfr = TRUE
             )
             
             # Cannot cache train set predictions for tau because the cached predictions in the 
@@ -1114,7 +1117,7 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id
                 forest_model_variance$sample_one_iteration(
                     forest_dataset = forest_dataset_train, residual = outcome_train, forest_samples = forest_samples_variance, 
                     active_forest = active_forest_variance, rng = rng, forest_model_config = forest_model_config_variance, 
-                    global_model_config = global_model_config, keep_forest = keep_sample, gfr = TRUE
+                    global_model_config = global_model_config, num_threads = num_threads, keep_forest = keep_sample, gfr = TRUE
                 )
                 
                 # Cache train set predictions since they are already computed during sampling
@@ -1321,7 +1324,7 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id
                 forest_model_mu$sample_one_iteration(
                     forest_dataset = forest_dataset_train, residual = outcome_train, forest_samples = forest_samples_mu, 
                     active_forest = active_forest_mu, rng = rng, forest_model_config = forest_model_config_mu, 
-                    global_model_config = global_model_config, keep_forest = keep_sample, gfr = FALSE
+                    global_model_config = global_model_config, num_threads = num_threads, keep_forest = keep_sample, gfr = FALSE
                 )
                 
                 # Cache train set predictions since they are already computed during sampling
@@ -1345,7 +1348,7 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id
                 forest_model_tau$sample_one_iteration(
                     forest_dataset = forest_dataset_train, residual = outcome_train, forest_samples = forest_samples_tau, 
                     active_forest = active_forest_tau, rng = rng, forest_model_config = forest_model_config_tau, 
-                    global_model_config = global_model_config, keep_forest = keep_sample, gfr = FALSE
+                    global_model_config = global_model_config, num_threads = num_threads, keep_forest = keep_sample, gfr = FALSE
                 )
                 
                 # Cannot cache train set predictions for tau because the cached predictions in the 
@@ -1394,7 +1397,7 @@ bcf <- function(X_train, Z_train, y_train, propensity_train = NULL, rfx_group_id
                     forest_model_variance$sample_one_iteration(
                         forest_dataset = forest_dataset_train, residual = outcome_train, forest_samples = forest_samples_variance, 
                         active_forest = active_forest_variance, rng = rng, forest_model_config = forest_model_config_variance, 
-                        global_model_config = global_model_config, keep_forest = keep_sample, gfr = FALSE
+                        global_model_config = global_model_config, num_threads = num_threads, keep_forest = keep_sample, gfr = FALSE
                     )
                     
                     # Cache train set predictions since they are already computed during sampling
@@ -1797,10 +1800,14 @@ predict.bcfmodel <- function(object, X, Z, propensity = NULL, rfx_group_ids = NU
         "y_hat" = y_hat
     )
     if (object$model_params$has_rfx) {
-        result[["rfx_predictions"]] = rfx_predictions
+        result[["rfx_predictions"]] <- rfx_predictions
+    } else {
+        result[["rfx_predictions"]] <- NULL
     }
     if (object$model_params$include_variance_forest) {
-        result[["variance_forest_predictions"]] = variance_forest_predictions
+        result[["variance_forest_predictions"]] <- variance_forest_predictions
+    } else {
+        result[["variance_forest_predictions"]] <- NULL
     }
     return(result)
 }
diff --git a/R/cpp11.R b/R/cpp11.R
index 39802efe..a71a7722 100644
--- a/R/cpp11.R
+++ b/R/cpp11.R
@@ -580,12 +580,12 @@ compute_leaf_indices_cpp <- function(forest_container, covariates, forest_nums)
   .Call(`_stochtree_compute_leaf_indices_cpp`, forest_container, covariates, forest_nums)
 }
 
-sample_gfr_one_iteration_cpp <- function(data, residual, forest_samples, active_forest, tracker, split_prior, rng, sweep_indices, feature_types, cutpoint_grid_size, leaf_model_scale_input, variable_weights, a_forest, b_forest, global_variance, leaf_model_int, keep_forest, num_features_subsample) {
-  invisible(.Call(`_stochtree_sample_gfr_one_iteration_cpp`, data, residual, forest_samples, active_forest, tracker, split_prior, rng, sweep_indices, feature_types, cutpoint_grid_size, leaf_model_scale_input, variable_weights, a_forest, b_forest, global_variance, leaf_model_int, keep_forest, num_features_subsample))
+sample_gfr_one_iteration_cpp <- function(data, residual, forest_samples, active_forest, tracker, split_prior, rng, sweep_indices, feature_types, cutpoint_grid_size, leaf_model_scale_input, variable_weights, a_forest, b_forest, global_variance, leaf_model_int, keep_forest, num_features_subsample, num_threads) {
+  invisible(.Call(`_stochtree_sample_gfr_one_iteration_cpp`, data, residual, forest_samples, active_forest, tracker, split_prior, rng, sweep_indices, feature_types, cutpoint_grid_size, leaf_model_scale_input, variable_weights, a_forest, b_forest, global_variance, leaf_model_int, keep_forest, num_features_subsample, num_threads))
 }
 
-sample_mcmc_one_iteration_cpp <- function(data, residual, forest_samples, active_forest, tracker, split_prior, rng, sweep_indices, feature_types, cutpoint_grid_size, leaf_model_scale_input, variable_weights, a_forest, b_forest, global_variance, leaf_model_int, keep_forest) {
-  invisible(.Call(`_stochtree_sample_mcmc_one_iteration_cpp`, data, residual, forest_samples, active_forest, tracker, split_prior, rng, sweep_indices, feature_types, cutpoint_grid_size, leaf_model_scale_input, variable_weights, a_forest, b_forest, global_variance, leaf_model_int, keep_forest))
+sample_mcmc_one_iteration_cpp <- function(data, residual, forest_samples, active_forest, tracker, split_prior, rng, sweep_indices, feature_types, cutpoint_grid_size, leaf_model_scale_input, variable_weights, a_forest, b_forest, global_variance, leaf_model_int, keep_forest, num_threads) {
+  invisible(.Call(`_stochtree_sample_mcmc_one_iteration_cpp`, data, residual, forest_samples, active_forest, tracker, split_prior, rng, sweep_indices, feature_types, cutpoint_grid_size, leaf_model_scale_input, variable_weights, a_forest, b_forest, global_variance, leaf_model_int, keep_forest, num_threads))
 }
 
 sample_sigma2_one_iteration_cpp <- function(residual, dataset, rng, a, b) {
diff --git a/R/model.R b/R/model.R
index 5b003055..57523c0d 100644
--- a/R/model.R
+++ b/R/model.R
@@ -67,10 +67,11 @@ ForestModel <- R6::R6Class(
         #' @param rng Wrapper around C++ random number generator
         #' @param forest_model_config ForestModelConfig object containing forest model parameters and settings
         #' @param global_model_config GlobalModelConfig object containing global model parameters and settings
+        #' @param num_threads Number of threads to use in the GFR and MCMC algorithms, as well as prediction. If OpenMP is not available on a user's system, this will default to `1`, otherwise to the maximum number of available threads.
         #' @param keep_forest (Optional) Whether the updated forest sample should be saved to `forest_samples`. Default: `TRUE`.
         #' @param gfr (Optional) Whether or not the forest should be sampled using the "grow-from-root" (GFR) algorithm. Default: `TRUE`.
         sample_one_iteration = function(forest_dataset, residual, forest_samples, active_forest, 
-                                        rng, forest_model_config, global_model_config, 
+                                        rng, forest_model_config, global_model_config, num_threads = -1, 
                                         keep_forest = TRUE, gfr = TRUE) {
             if (active_forest$is_empty()) {
                 stop("`active_forest` has not yet been initialized, which is necessary to run the sampler. Please set constant values for `active_forest`'s leaves using either the `set_root_leaves` or `prepare_for_sampler` methods.")
@@ -114,14 +115,15 @@ ForestModel <- R6::R6Class(
                     forest_dataset$data_ptr, residual$data_ptr, 
                     forest_samples$forest_container_ptr, active_forest$forest_ptr, self$tracker_ptr, 
                     self$tree_prior_ptr, rng$rng_ptr, sweep_update_indices, feature_types, cutpoint_grid_size, leaf_model_scale, 
-                    variable_weights, a_forest, b_forest, global_scale, leaf_model_int, keep_forest, num_features_subsample
+                    variable_weights, a_forest, b_forest, global_scale, leaf_model_int, keep_forest, num_features_subsample, 
+                    num_threads
                 )
             } else {
                 sample_mcmc_one_iteration_cpp(
                     forest_dataset$data_ptr, residual$data_ptr, 
                     forest_samples$forest_container_ptr, active_forest$forest_ptr, self$tracker_ptr, 
                     self$tree_prior_ptr, rng$rng_ptr, sweep_update_indices, feature_types, cutpoint_grid_size, leaf_model_scale, 
-                    variable_weights, a_forest, b_forest, global_scale, leaf_model_int, keep_forest
+                    variable_weights, a_forest, b_forest, global_scale, leaf_model_int, keep_forest, num_threads
                 ) 
             }
         }, 
diff --git a/configure b/configure
new file mode 100755
index 00000000..02737aa7
--- /dev/null
+++ b/configure
@@ -0,0 +1,3002 @@
+#! /bin/sh
+# Guess values for system-dependent variables and create Makefiles.
+# Generated by GNU Autoconf 2.72 for stochtree 0.1.1.
+#
+#
+# Copyright (C) 1992-1996, 1998-2017, 2020-2023 Free Software Foundation,
+# Inc.
+#
+#
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1
+then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else case e in #(
+  e) case `(set -o) 2>/dev/null` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac ;;
+esac
+fi
+
+
+
+# Reset variables that may have inherited troublesome values from
+# the environment.
+
+# IFS needs to be set, to space, tab, and newline, in precisely that order.
+# (If _AS_PATH_WALK were called with IFS unset, it would have the
+# side effect of setting IFS to empty, thus disabling word splitting.)
+# Quoting is to prevent editors from complaining about space-tab.
+as_nl='
+'
+export as_nl
+IFS=" ""	$as_nl"
+
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# Ensure predictable behavior from utilities with locale-dependent output.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# We cannot yet rely on "unset" to work, but we need these variables
+# to be unset--not just set to an empty or harmless value--now, to
+# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh).  This construct
+# also avoids known problems related to "unset" and subshell syntax
+# in other old shells (e.g. bash 2.01 and pdksh 5.2.14).
+for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH
+do eval test \${$as_var+y} \
+  && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+
+# Ensure that fds 0, 1, and 2 are open.
+if (exec 3>&0) 2>/dev/null; then :; else exec 0</dev/null; fi
+if (exec 3>&1) 2>/dev/null; then :; else exec 1>/dev/null; fi
+if (exec 3>&2)            ; then :; else exec 2>/dev/null; fi
+
+# The user is always right.
+if ${PATH_SEPARATOR+false} :; then
+  PATH_SEPARATOR=:
+  (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+    (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+      PATH_SEPARATOR=';'
+  }
+fi
+
+
+# Find who we are.  Look in the path if we contain no directory separator.
+as_myself=
+case $0 in #((
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    test -r "$as_dir$0" && as_myself=$as_dir$0 && break
+  done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as 'sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  exit 1
+fi
+
+
+# Use a proper internal environment variable to ensure we don't fall
+  # into an infinite loop, continuously re-executing ourselves.
+  if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then
+    _as_can_reexec=no; export _as_can_reexec;
+    # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed 'exec'.
+printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2
+exit 255
+  fi
+  # We don't want this to propagate to other subprocesses.
+          { _as_can_reexec=; unset _as_can_reexec;}
+if test "x$CONFIG_SHELL" = x; then
+  as_bourne_compatible="if test \${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1
+then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '\${1+\"\$@\"}'='\"\$@\"'
+  setopt NO_GLOB_SUBST
+else case e in #(
+  e) case \`(set -o) 2>/dev/null\` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac ;;
+esac
+fi
+"
+  as_required="as_fn_return () { (exit \$1); }
+as_fn_success () { as_fn_return 0; }
+as_fn_failure () { as_fn_return 1; }
+as_fn_ret_success () { return 0; }
+as_fn_ret_failure () { return 1; }
+
+exitcode=0
+as_fn_success || { exitcode=1; echo as_fn_success failed.; }
+as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; }
+as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; }
+as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; }
+if ( set x; as_fn_ret_success y && test x = \"\$1\" )
+then :
+
+else case e in #(
+  e) exitcode=1; echo positional parameters were not saved. ;;
+esac
+fi
+test x\$exitcode = x0 || exit 1
+blah=\$(echo \$(echo blah))
+test x\"\$blah\" = xblah || exit 1
+test -x / || exit 1"
+  as_suggested="  as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
+  as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
+  eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
+  test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1"
+  if (eval "$as_required") 2>/dev/null
+then :
+  as_have_required=yes
+else case e in #(
+  e) as_have_required=no ;;
+esac
+fi
+  if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null
+then :
+
+else case e in #(
+  e) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+as_found=false
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+  as_found=:
+  case $as_dir in #(
+	 /*)
+	   for as_base in sh bash ksh sh5; do
+	     # Try only shells that exist, to save several forks.
+	     as_shell=$as_dir$as_base
+	     if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
+		    as_run=a "$as_shell" -c "$as_bourne_compatible""$as_required" 2>/dev/null
+then :
+  CONFIG_SHELL=$as_shell as_have_required=yes
+		   if as_run=a "$as_shell" -c "$as_bourne_compatible""$as_suggested" 2>/dev/null
+then :
+  break 2
+fi
+fi
+	   done;;
+       esac
+  as_found=false
+done
+IFS=$as_save_IFS
+if $as_found
+then :
+
+else case e in #(
+  e) if { test -f "$SHELL" || test -f "$SHELL.exe"; } &&
+	      as_run=a "$SHELL" -c "$as_bourne_compatible""$as_required" 2>/dev/null
+then :
+  CONFIG_SHELL=$SHELL as_have_required=yes
+fi ;;
+esac
+fi
+
+
+      if test "x$CONFIG_SHELL" != x
+then :
+  export CONFIG_SHELL
+             # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed 'exec'.
+printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2
+exit 255
+fi
+
+    if test x$as_have_required = xno
+then :
+  printf "%s\n" "$0: This script requires a shell more modern than all"
+  printf "%s\n" "$0: the shells that I found on your system."
+  if test ${ZSH_VERSION+y} ; then
+    printf "%s\n" "$0: In particular, zsh $ZSH_VERSION has bugs and should"
+    printf "%s\n" "$0: be upgraded to zsh 4.3.4 or later."
+  else
+    printf "%s\n" "$0: Please tell bug-autoconf@gnu.org about your system,
+$0: including any error possibly output before this
+$0: message. Then install a modern shell, or manually run
+$0: the script under such a shell if you do have one."
+  fi
+  exit 1
+fi ;;
+esac
+fi
+fi
+SHELL=${CONFIG_SHELL-/bin/sh}
+export SHELL
+# Unset more variables known to interfere with behavior of common tools.
+CLICOLOR_FORCE= GREP_OPTIONS=
+unset CLICOLOR_FORCE GREP_OPTIONS
+
+## --------------------- ##
+## M4sh Shell Functions. ##
+## --------------------- ##
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+  { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+  return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+  set +e
+  as_fn_set_status $1
+  exit $1
+} # as_fn_exit
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || eval $as_mkdir_p || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+printf "%s\n" X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null
+then :
+  eval 'as_fn_append ()
+  {
+    eval $1+=\$2
+  }'
+else case e in #(
+  e) as_fn_append ()
+  {
+    eval $1=\$$1\$2
+  } ;;
+esac
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null
+then :
+  eval 'as_fn_arith ()
+  {
+    as_val=$(( $* ))
+  }'
+else case e in #(
+  e) as_fn_arith ()
+  {
+    as_val=`expr "$@" || test $? -eq 1`
+  } ;;
+esac
+fi # as_fn_arith
+
+
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with STATUS, using 1 if that was 0.
+as_fn_error ()
+{
+  as_status=$1; test $as_status -eq 0 && as_status=1
+  if test "$4"; then
+    as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
+  fi
+  printf "%s\n" "$as_me: error: $2" >&2
+  as_fn_exit $as_status
+} # as_fn_error
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+printf "%s\n" X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+
+  as_lineno_1=$LINENO as_lineno_1a=$LINENO
+  as_lineno_2=$LINENO as_lineno_2a=$LINENO
+  eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" &&
+  test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || {
+  # Blame Lee E. McMahon (1931-1989) for sed's syntax.  :-)
+  sed -n '
+    p
+    /[$]LINENO/=
+  ' <$as_myself |
+    sed '
+      t clear
+      :clear
+      s/[$]LINENO.*/&-/
+      t lineno
+      b
+      :lineno
+      N
+      :loop
+      s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+      t loop
+      s/-\n.*//
+    ' >$as_me.lineno &&
+  chmod +x "$as_me.lineno" ||
+    { printf "%s\n" "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; }
+
+  # If we had to re-execute with $CONFIG_SHELL, we're ensured to have
+  # already done that, so ensure we don't try to do so again and fall
+  # in an infinite loop.  This has already happened in practice.
+  _as_can_reexec=no; export _as_can_reexec
+  # Don't try to exec as it changes $[0], causing all sort of problems
+  # (the dirname of $[0] is not the place where we might find the
+  # original and so on.  Autoconf is especially sensitive to this).
+  . "./$as_me.lineno"
+  # Exit status is that of the last command.
+  exit
+}
+
+
+# Determine whether it's possible to make 'echo' print without a newline.
+# These variables are no longer used directly by Autoconf, but are AC_SUBSTed
+# for compatibility with existing Makefiles.
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+  case `echo 'xy\c'` in
+  *c*) ECHO_T='	';;	# ECHO_T is single tab character.
+  xy)  ECHO_C='\c';;
+  *)   echo `echo ksh88 bug on AIX 6.1` > /dev/null
+       ECHO_T='	';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+
+# For backward compatibility with old third-party macros, we provide
+# the shell variables $as_echo and $as_echo_n.  New code should use
+# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively.
+as_echo='printf %s\n'
+as_echo_n='printf %s'
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+  if ln -s conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s='ln -s'
+    # ... but there are two gotchas:
+    # 1) On MSYS, both 'ln -s file dir' and 'ln file dir' fail.
+    # 2) DJGPP < 2.04 has no symlinks; 'ln -s' creates a wrapper executable.
+    # In both cases, we have to default to 'cp -pR'.
+    ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+      as_ln_s='cp -pR'
+  elif ln conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s=ln
+  else
+    as_ln_s='cp -pR'
+  fi
+else
+  as_ln_s='cp -pR'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p='mkdir -p "$as_dir"'
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
+
+# Sed expression to map a string onto a valid CPP name.
+as_sed_cpp="y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g"
+as_tr_cpp="eval sed '$as_sed_cpp'" # deprecated
+
+# Sed expression to map a string onto a valid variable name.
+as_sed_sh="y%*+%pp%;s%[^_$as_cr_alnum]%_%g"
+as_tr_sh="eval sed '$as_sed_sh'" # deprecated
+
+
+test -n "$DJDIR" || exec 7<&0 </dev/null
+exec 6>&1
+
+# Name of the host.
+# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status,
+# so uname gets run too.
+ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
+
+#
+# Initializations.
+#
+ac_default_prefix=/usr/local
+ac_clean_files=
+ac_config_libobj_dir=.
+LIBOBJS=
+cross_compiling=no
+subdirs=
+MFLAGS=
+MAKEFLAGS=
+
+# Identity of this package.
+PACKAGE_NAME='stochtree'
+PACKAGE_TARNAME='stochtree'
+PACKAGE_VERSION='0.1.1'
+PACKAGE_STRING='stochtree 0.1.1'
+PACKAGE_BUGREPORT=''
+PACKAGE_URL=''
+
+ac_subst_vars='LTLIBOBJS
+LIBOBJS
+STOCHTREE_CPPFLAGS
+OPENMP_AVAILABILITY_FLAGS
+OPENMP_LIB
+OPENMP_CXXFLAGS
+target_alias
+host_alias
+build_alias
+LIBS
+ECHO_T
+ECHO_N
+ECHO_C
+DEFS
+mandir
+localedir
+libdir
+psdir
+pdfdir
+dvidir
+htmldir
+infodir
+docdir
+oldincludedir
+includedir
+runstatedir
+localstatedir
+sharedstatedir
+sysconfdir
+datadir
+datarootdir
+libexecdir
+sbindir
+bindir
+program_transform_name
+prefix
+exec_prefix
+PACKAGE_URL
+PACKAGE_BUGREPORT
+PACKAGE_STRING
+PACKAGE_VERSION
+PACKAGE_TARNAME
+PACKAGE_NAME
+PATH_SEPARATOR
+SHELL'
+ac_subst_files=''
+ac_user_opts='
+enable_option_checking
+'
+      ac_precious_vars='build_alias
+host_alias
+target_alias'
+
+
+# Initialize some variables set by options.
+ac_init_help=
+ac_init_version=false
+ac_unrecognized_opts=
+ac_unrecognized_sep=
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+cache_file=/dev/null
+exec_prefix=NONE
+no_create=
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+verbose=
+x_includes=NONE
+x_libraries=NONE
+
+# Installation directory options.
+# These are left unexpanded so users can "make install exec_prefix=/foo"
+# and all the variables that are supposed to be based on exec_prefix
+# by default will actually change.
+# Use braces instead of parens because sh, perl, etc. also accept them.
+# (The list follows the same order as the GNU Coding Standards.)
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datarootdir='${prefix}/share'
+datadir='${datarootdir}'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+runstatedir='${localstatedir}/run'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
+infodir='${datarootdir}/info'
+htmldir='${docdir}'
+dvidir='${docdir}'
+pdfdir='${docdir}'
+psdir='${docdir}'
+libdir='${exec_prefix}/lib'
+localedir='${datarootdir}/locale'
+mandir='${datarootdir}/man'
+
+ac_prev=
+ac_dashdash=
+for ac_option
+do
+  # If the previous option needs an argument, assign it.
+  if test -n "$ac_prev"; then
+    eval $ac_prev=\$ac_option
+    ac_prev=
+    continue
+  fi
+
+  case $ac_option in
+  *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+  *=)   ac_optarg= ;;
+  *)    ac_optarg=yes ;;
+  esac
+
+  case $ac_dashdash$ac_option in
+  --)
+    ac_dashdash=yes ;;
+
+  -bindir | --bindir | --bindi | --bind | --bin | --bi)
+    ac_prev=bindir ;;
+  -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+    bindir=$ac_optarg ;;
+
+  -build | --build | --buil | --bui | --bu)
+    ac_prev=build_alias ;;
+  -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+    build_alias=$ac_optarg ;;
+
+  -cache-file | --cache-file | --cache-fil | --cache-fi \
+  | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+    ac_prev=cache_file ;;
+  -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+  | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+    cache_file=$ac_optarg ;;
+
+  --config-cache | -C)
+    cache_file=config.cache ;;
+
+  -datadir | --datadir | --datadi | --datad)
+    ac_prev=datadir ;;
+  -datadir=* | --datadir=* | --datadi=* | --datad=*)
+    datadir=$ac_optarg ;;
+
+  -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \
+  | --dataroo | --dataro | --datar)
+    ac_prev=datarootdir ;;
+  -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \
+  | --dataroot=* | --dataroo=* | --dataro=* | --datar=*)
+    datarootdir=$ac_optarg ;;
+
+  -disable-* | --disable-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid feature name: '$ac_useropt'"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"enable_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval enable_$ac_useropt=no ;;
+
+  -docdir | --docdir | --docdi | --doc | --do)
+    ac_prev=docdir ;;
+  -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*)
+    docdir=$ac_optarg ;;
+
+  -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv)
+    ac_prev=dvidir ;;
+  -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*)
+    dvidir=$ac_optarg ;;
+
+  -enable-* | --enable-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid feature name: '$ac_useropt'"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"enable_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval enable_$ac_useropt=\$ac_optarg ;;
+
+  -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+  | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+  | --exec | --exe | --ex)
+    ac_prev=exec_prefix ;;
+  -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+  | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+  | --exec=* | --exe=* | --ex=*)
+    exec_prefix=$ac_optarg ;;
+
+  -gas | --gas | --ga | --g)
+    # Obsolete; use --with-gas.
+    with_gas=yes ;;
+
+  -help | --help | --hel | --he | -h)
+    ac_init_help=long ;;
+  -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
+    ac_init_help=recursive ;;
+  -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
+    ac_init_help=short ;;
+
+  -host | --host | --hos | --ho)
+    ac_prev=host_alias ;;
+  -host=* | --host=* | --hos=* | --ho=*)
+    host_alias=$ac_optarg ;;
+
+  -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht)
+    ac_prev=htmldir ;;
+  -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \
+  | --ht=*)
+    htmldir=$ac_optarg ;;
+
+  -includedir | --includedir | --includedi | --included | --include \
+  | --includ | --inclu | --incl | --inc)
+    ac_prev=includedir ;;
+  -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+  | --includ=* | --inclu=* | --incl=* | --inc=*)
+    includedir=$ac_optarg ;;
+
+  -infodir | --infodir | --infodi | --infod | --info | --inf)
+    ac_prev=infodir ;;
+  -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+    infodir=$ac_optarg ;;
+
+  -libdir | --libdir | --libdi | --libd)
+    ac_prev=libdir ;;
+  -libdir=* | --libdir=* | --libdi=* | --libd=*)
+    libdir=$ac_optarg ;;
+
+  -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+  | --libexe | --libex | --libe)
+    ac_prev=libexecdir ;;
+  -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+  | --libexe=* | --libex=* | --libe=*)
+    libexecdir=$ac_optarg ;;
+
+  -localedir | --localedir | --localedi | --localed | --locale)
+    ac_prev=localedir ;;
+  -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*)
+    localedir=$ac_optarg ;;
+
+  -localstatedir | --localstatedir | --localstatedi | --localstated \
+  | --localstate | --localstat | --localsta | --localst | --locals)
+    ac_prev=localstatedir ;;
+  -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+  | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*)
+    localstatedir=$ac_optarg ;;
+
+  -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+    ac_prev=mandir ;;
+  -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+    mandir=$ac_optarg ;;
+
+  -nfp | --nfp | --nf)
+    # Obsolete; use --without-fp.
+    with_fp=no ;;
+
+  -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+  | --no-cr | --no-c | -n)
+    no_create=yes ;;
+
+  -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+  | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+    no_recursion=yes ;;
+
+  -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+  | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+  | --oldin | --oldi | --old | --ol | --o)
+    ac_prev=oldincludedir ;;
+  -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+  | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+  | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+    oldincludedir=$ac_optarg ;;
+
+  -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+    ac_prev=prefix ;;
+  -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+    prefix=$ac_optarg ;;
+
+  -program-prefix | --program-prefix | --program-prefi | --program-pref \
+  | --program-pre | --program-pr | --program-p)
+    ac_prev=program_prefix ;;
+  -program-prefix=* | --program-prefix=* | --program-prefi=* \
+  | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+    program_prefix=$ac_optarg ;;
+
+  -program-suffix | --program-suffix | --program-suffi | --program-suff \
+  | --program-suf | --program-su | --program-s)
+    ac_prev=program_suffix ;;
+  -program-suffix=* | --program-suffix=* | --program-suffi=* \
+  | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+    program_suffix=$ac_optarg ;;
+
+  -program-transform-name | --program-transform-name \
+  | --program-transform-nam | --program-transform-na \
+  | --program-transform-n | --program-transform- \
+  | --program-transform | --program-transfor \
+  | --program-transfo | --program-transf \
+  | --program-trans | --program-tran \
+  | --progr-tra | --program-tr | --program-t)
+    ac_prev=program_transform_name ;;
+  -program-transform-name=* | --program-transform-name=* \
+  | --program-transform-nam=* | --program-transform-na=* \
+  | --program-transform-n=* | --program-transform-=* \
+  | --program-transform=* | --program-transfor=* \
+  | --program-transfo=* | --program-transf=* \
+  | --program-trans=* | --program-tran=* \
+  | --progr-tra=* | --program-tr=* | --program-t=*)
+    program_transform_name=$ac_optarg ;;
+
+  -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd)
+    ac_prev=pdfdir ;;
+  -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*)
+    pdfdir=$ac_optarg ;;
+
+  -psdir | --psdir | --psdi | --psd | --ps)
+    ac_prev=psdir ;;
+  -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*)
+    psdir=$ac_optarg ;;
+
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil)
+    silent=yes ;;
+
+  -runstatedir | --runstatedir | --runstatedi | --runstated \
+  | --runstate | --runstat | --runsta | --runst | --runs \
+  | --run | --ru | --r)
+    ac_prev=runstatedir ;;
+  -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
+  | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
+  | --run=* | --ru=* | --r=*)
+    runstatedir=$ac_optarg ;;
+
+  -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+    ac_prev=sbindir ;;
+  -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+  | --sbi=* | --sb=*)
+    sbindir=$ac_optarg ;;
+
+  -sharedstatedir | --sharedstatedir | --sharedstatedi \
+  | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+  | --sharedst | --shareds | --shared | --share | --shar \
+  | --sha | --sh)
+    ac_prev=sharedstatedir ;;
+  -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+  | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+  | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+  | --sha=* | --sh=*)
+    sharedstatedir=$ac_optarg ;;
+
+  -site | --site | --sit)
+    ac_prev=site ;;
+  -site=* | --site=* | --sit=*)
+    site=$ac_optarg ;;
+
+  -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+    ac_prev=srcdir ;;
+  -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+    srcdir=$ac_optarg ;;
+
+  -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+  | --syscon | --sysco | --sysc | --sys | --sy)
+    ac_prev=sysconfdir ;;
+  -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+  | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+    sysconfdir=$ac_optarg ;;
+
+  -target | --target | --targe | --targ | --tar | --ta | --t)
+    ac_prev=target_alias ;;
+  -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+    target_alias=$ac_optarg ;;
+
+  -v | -verbose | --verbose | --verbos | --verbo | --verb)
+    verbose=yes ;;
+
+  -version | --version | --versio | --versi | --vers | -V)
+    ac_init_version=: ;;
+
+  -with-* | --with-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid package name: '$ac_useropt'"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"with_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval with_$ac_useropt=\$ac_optarg ;;
+
+  -without-* | --without-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid package name: '$ac_useropt'"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"with_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval with_$ac_useropt=no ;;
+
+  --x)
+    # Obsolete; use --with-x.
+    with_x=yes ;;
+
+  -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+  | --x-incl | --x-inc | --x-in | --x-i)
+    ac_prev=x_includes ;;
+  -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+  | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+    x_includes=$ac_optarg ;;
+
+  -x-libraries | --x-libraries | --x-librarie | --x-librari \
+  | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+    ac_prev=x_libraries ;;
+  -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+  | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+    x_libraries=$ac_optarg ;;
+
+  -*) as_fn_error $? "unrecognized option: '$ac_option'
+Try '$0 --help' for more information"
+    ;;
+
+  *=*)
+    ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
+    # Reject names that are not valid shell variable names.
+    case $ac_envvar in #(
+      '' | [0-9]* | *[!_$as_cr_alnum]* )
+      as_fn_error $? "invalid variable name: '$ac_envvar'" ;;
+    esac
+    eval $ac_envvar=\$ac_optarg
+    export $ac_envvar ;;
+
+  *)
+    # FIXME: should be removed in autoconf 3.0.
+    printf "%s\n" "$as_me: WARNING: you should use --build, --host, --target" >&2
+    expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+      printf "%s\n" "$as_me: WARNING: invalid host type: $ac_option" >&2
+    : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}"
+    ;;
+
+  esac
+done
+
+if test -n "$ac_prev"; then
+  ac_option=--`echo $ac_prev | sed 's/_/-/g'`
+  as_fn_error $? "missing argument to $ac_option"
+fi
+
+if test -n "$ac_unrecognized_opts"; then
+  case $enable_option_checking in
+    no) ;;
+    fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;;
+    *)     printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;;
+  esac
+fi
+
+# Check all directory arguments for consistency.
+for ac_var in	exec_prefix prefix bindir sbindir libexecdir datarootdir \
+		datadir sysconfdir sharedstatedir localstatedir includedir \
+		oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
+		libdir localedir mandir runstatedir
+do
+  eval ac_val=\$$ac_var
+  # Remove trailing slashes.
+  case $ac_val in
+    */ )
+      ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'`
+      eval $ac_var=\$ac_val;;
+  esac
+  # Be sure to have absolute directory names.
+  case $ac_val in
+    [\\/$]* | ?:[\\/]* )  continue;;
+    NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
+  esac
+  as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val"
+done
+
+# There might be people who depend on the old broken behavior: '$host'
+# used to hold the argument of --host etc.
+# FIXME: To remove some day.
+build=$build_alias
+host=$host_alias
+target=$target_alias
+
+# FIXME: To remove some day.
+if test "x$host_alias" != x; then
+  if test "x$build_alias" = x; then
+    cross_compiling=maybe
+  elif test "x$build_alias" != "x$host_alias"; then
+    cross_compiling=yes
+  fi
+fi
+
+ac_tool_prefix=
+test -n "$host_alias" && ac_tool_prefix=$host_alias-
+
+test "$silent" = yes && exec 6>/dev/null
+
+
+ac_pwd=`pwd` && test -n "$ac_pwd" &&
+ac_ls_di=`ls -di .` &&
+ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
+  as_fn_error $? "working directory cannot be determined"
+test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
+  as_fn_error $? "pwd does not report name of working directory"
+
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+  ac_srcdir_defaulted=yes
+  # Try the directory containing this script, then the parent directory.
+  ac_confdir=`$as_dirname -- "$as_myself" ||
+$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_myself" : 'X\(//\)[^/]' \| \
+	 X"$as_myself" : 'X\(//\)$' \| \
+	 X"$as_myself" : 'X\(/\)' \| . 2>/dev/null ||
+printf "%s\n" X"$as_myself" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+  srcdir=$ac_confdir
+  if test ! -r "$srcdir/$ac_unique_file"; then
+    srcdir=..
+  fi
+else
+  ac_srcdir_defaulted=no
+fi
+if test ! -r "$srcdir/$ac_unique_file"; then
+  test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
+  as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir"
+fi
+ac_msg="sources are in $srcdir, but 'cd $srcdir' does not work"
+ac_abs_confdir=`(
+	cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg"
+	pwd)`
+# When building in place, set srcdir=.
+if test "$ac_abs_confdir" = "$ac_pwd"; then
+  srcdir=.
+fi
+# Remove unnecessary trailing slashes from srcdir.
+# Double slashes in file names in object file debugging info
+# mess up M-x gdb in Emacs.
+case $srcdir in
+*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;;
+esac
+for ac_var in $ac_precious_vars; do
+  eval ac_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_env_${ac_var}_value=\$${ac_var}
+  eval ac_cv_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_cv_env_${ac_var}_value=\$${ac_var}
+done
+
+#
+# Report the --help message.
+#
+if test "$ac_init_help" = "long"; then
+  # Omit some internal or obsolete options to make the list less imposing.
+  # This message is too long to be a string in the A/UX 3.1 sh.
+  cat <<_ACEOF
+'configure' configures stochtree 0.1.1 to adapt to many kinds of systems.
+
+Usage: $0 [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE.  See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+  -h, --help              display this help and exit
+      --help=short        display options specific to this package
+      --help=recursive    display the short help of all the included packages
+  -V, --version           display version information and exit
+  -q, --quiet, --silent   do not print 'checking ...' messages
+      --cache-file=FILE   cache test results in FILE [disabled]
+  -C, --config-cache      alias for '--cache-file=config.cache'
+  -n, --no-create         do not create output files
+      --srcdir=DIR        find the sources in DIR [configure dir or '..']
+
+Installation directories:
+  --prefix=PREFIX         install architecture-independent files in PREFIX
+                          [$ac_default_prefix]
+  --exec-prefix=EPREFIX   install architecture-dependent files in EPREFIX
+                          [PREFIX]
+
+By default, 'make install' will install all the files in
+'$ac_default_prefix/bin', '$ac_default_prefix/lib' etc.  You can specify
+an installation prefix other than '$ac_default_prefix' using '--prefix',
+for instance '--prefix=\$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+  --bindir=DIR            user executables [EPREFIX/bin]
+  --sbindir=DIR           system admin executables [EPREFIX/sbin]
+  --libexecdir=DIR        program executables [EPREFIX/libexec]
+  --sysconfdir=DIR        read-only single-machine data [PREFIX/etc]
+  --sharedstatedir=DIR    modifiable architecture-independent data [PREFIX/com]
+  --localstatedir=DIR     modifiable single-machine data [PREFIX/var]
+  --runstatedir=DIR       modifiable per-process data [LOCALSTATEDIR/run]
+  --libdir=DIR            object code libraries [EPREFIX/lib]
+  --includedir=DIR        C header files [PREFIX/include]
+  --oldincludedir=DIR     C header files for non-gcc [/usr/include]
+  --datarootdir=DIR       read-only arch.-independent data root [PREFIX/share]
+  --datadir=DIR           read-only architecture-independent data [DATAROOTDIR]
+  --infodir=DIR           info documentation [DATAROOTDIR/info]
+  --localedir=DIR         locale-dependent data [DATAROOTDIR/locale]
+  --mandir=DIR            man documentation [DATAROOTDIR/man]
+  --docdir=DIR            documentation root [DATAROOTDIR/doc/stochtree]
+  --htmldir=DIR           html documentation [DOCDIR]
+  --dvidir=DIR            dvi documentation [DOCDIR]
+  --pdfdir=DIR            pdf documentation [DOCDIR]
+  --psdir=DIR             ps documentation [DOCDIR]
+_ACEOF
+
+  cat <<\_ACEOF
+_ACEOF
+fi
+
+if test -n "$ac_init_help"; then
+  case $ac_init_help in
+     short | recursive ) echo "Configuration of stochtree 0.1.1:";;
+   esac
+  cat <<\_ACEOF
+
+Report bugs to the package provider.
+_ACEOF
+ac_status=$?
+fi
+
+if test "$ac_init_help" = "recursive"; then
+  # If there are subdirs, report their specific --help.
+  for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
+    test -d "$ac_dir" ||
+      { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } ||
+      continue
+    ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+    cd "$ac_dir" || { ac_status=$?; continue; }
+    # Check for configure.gnu first; this name is used for a wrapper for
+    # Metaconfig's "Configure" on case-insensitive file systems.
+    if test -f "$ac_srcdir/configure.gnu"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure.gnu" --help=recursive
+    elif test -f "$ac_srcdir/configure"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure" --help=recursive
+    else
+      printf "%s\n" "$as_me: WARNING: no configuration information is in $ac_dir" >&2
+    fi || ac_status=$?
+    cd "$ac_pwd" || { ac_status=$?; break; }
+  done
+fi
+
+test -n "$ac_init_help" && exit $ac_status
+if $ac_init_version; then
+  cat <<\_ACEOF
+stochtree configure 0.1.1
+generated by GNU Autoconf 2.72
+
+Copyright (C) 2023 Free Software Foundation, Inc.
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+_ACEOF
+  exit
+fi
+
+## ------------------------ ##
+## Autoconf initialization. ##
+## ------------------------ ##
+ac_configure_args_raw=
+for ac_arg
+do
+  case $ac_arg in
+  *\'*)
+    ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+  esac
+  as_fn_append ac_configure_args_raw " '$ac_arg'"
+done
+
+case $ac_configure_args_raw in
+  *$as_nl*)
+    ac_safe_unquote= ;;
+  *)
+    ac_unsafe_z='|&;<>()$`\\"*?[ ''	' # This string ends in space, tab.
+    ac_unsafe_a="$ac_unsafe_z#~"
+    ac_safe_unquote="s/ '\\([^$ac_unsafe_a][^$ac_unsafe_z]*\\)'/ \\1/g"
+    ac_configure_args_raw=`      printf "%s\n" "$ac_configure_args_raw" | sed "$ac_safe_unquote"`;;
+esac
+
+cat >config.log <<_ACEOF
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by stochtree $as_me 0.1.1, which was
+generated by GNU Autoconf 2.72.  Invocation command line was
+
+  $ $0$ac_configure_args_raw
+
+_ACEOF
+exec 5>>config.log
+{
+cat <<_ASUNAME
+## --------- ##
+## Platform. ##
+## --------- ##
+
+hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null     || echo unknown`
+
+/bin/arch              = `(/bin/arch) 2>/dev/null              || echo unknown`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null       || echo unknown`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
+/usr/bin/hostinfo      = `(/usr/bin/hostinfo) 2>/dev/null      || echo unknown`
+/bin/machine           = `(/bin/machine) 2>/dev/null           || echo unknown`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null       || echo unknown`
+/bin/universe          = `(/bin/universe) 2>/dev/null          || echo unknown`
+
+_ASUNAME
+
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    printf "%s\n" "PATH: $as_dir"
+  done
+IFS=$as_save_IFS
+
+} >&5
+
+cat >&5 <<_ACEOF
+
+
+## ----------- ##
+## Core tests. ##
+## ----------- ##
+
+_ACEOF
+
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Strip out --silent because we don't want to record it for future runs.
+# Also quote any args containing shell meta-characters.
+# Make two passes to allow for proper duplicate-argument suppression.
+ac_configure_args=
+ac_configure_args0=
+ac_configure_args1=
+ac_must_keep_next=false
+for ac_pass in 1 2
+do
+  for ac_arg
+  do
+    case $ac_arg in
+    -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
+    -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+    | -silent | --silent | --silen | --sile | --sil)
+      continue ;;
+    *\'*)
+      ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    case $ac_pass in
+    1) as_fn_append ac_configure_args0 " '$ac_arg'" ;;
+    2)
+      as_fn_append ac_configure_args1 " '$ac_arg'"
+      if test $ac_must_keep_next = true; then
+	ac_must_keep_next=false # Got value, back to normal.
+      else
+	case $ac_arg in
+	  *=* | --config-cache | -C | -disable-* | --disable-* \
+	  | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
+	  | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
+	  | -with-* | --with-* | -without-* | --without-* | --x)
+	    case "$ac_configure_args0 " in
+	      "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
+	    esac
+	    ;;
+	  -* ) ac_must_keep_next=true ;;
+	esac
+      fi
+      as_fn_append ac_configure_args " '$ac_arg'"
+      ;;
+    esac
+  done
+done
+{ ac_configure_args0=; unset ac_configure_args0;}
+{ ac_configure_args1=; unset ac_configure_args1;}
+
+# When interrupted or exit'd, cleanup temporary files, and complete
+# config.log.  We remove comments because anyway the quotes in there
+# would cause problems or look ugly.
+# WARNING: Use '\'' to represent an apostrophe within the trap.
+# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug.
+trap 'exit_status=$?
+  # Sanitize IFS.
+  IFS=" ""	$as_nl"
+  # Save into config.log some information that might help in debugging.
+  {
+    echo
+
+    printf "%s\n" "## ---------------- ##
+## Cache variables. ##
+## ---------------- ##"
+    echo
+    # The following way of writing the cache mishandles newlines in values,
+(
+  for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+      *) { eval $ac_var=; unset $ac_var;} ;;
+      esac ;;
+    esac
+  done
+  (set) 2>&1 |
+    case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      sed -n \
+	"s/'\''/'\''\\\\'\'''\''/g;
+	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p"
+      ;; #(
+    *)
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+)
+    echo
+
+    printf "%s\n" "## ----------------- ##
+## Output variables. ##
+## ----------------- ##"
+    echo
+    for ac_var in $ac_subst_vars
+    do
+      eval ac_val=\$$ac_var
+      case $ac_val in
+      *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+      esac
+      printf "%s\n" "$ac_var='\''$ac_val'\''"
+    done | sort
+    echo
+
+    if test -n "$ac_subst_files"; then
+      printf "%s\n" "## ------------------- ##
+## File substitutions. ##
+## ------------------- ##"
+      echo
+      for ac_var in $ac_subst_files
+      do
+	eval ac_val=\$$ac_var
+	case $ac_val in
+	*\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+	esac
+	printf "%s\n" "$ac_var='\''$ac_val'\''"
+      done | sort
+      echo
+    fi
+
+    if test -s confdefs.h; then
+      printf "%s\n" "## ----------- ##
+## confdefs.h. ##
+## ----------- ##"
+      echo
+      cat confdefs.h
+      echo
+    fi
+    test "$ac_signal" != 0 &&
+      printf "%s\n" "$as_me: caught signal $ac_signal"
+    printf "%s\n" "$as_me: exit $exit_status"
+  } >&5
+  rm -f core *.core core.conftest.* &&
+    rm -f -r conftest* confdefs* conf$$* $ac_clean_files &&
+    exit $exit_status
+' 0
+for ac_signal in 1 2 13 15; do
+  trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal
+done
+ac_signal=0
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -f -r conftest* confdefs.h
+
+printf "%s\n" "/* confdefs.h */" > confdefs.h
+
+# Predefined preprocessor variables.
+
+printf "%s\n" "#define PACKAGE_NAME \"$PACKAGE_NAME\"" >>confdefs.h
+
+printf "%s\n" "#define PACKAGE_TARNAME \"$PACKAGE_TARNAME\"" >>confdefs.h
+
+printf "%s\n" "#define PACKAGE_VERSION \"$PACKAGE_VERSION\"" >>confdefs.h
+
+printf "%s\n" "#define PACKAGE_STRING \"$PACKAGE_STRING\"" >>confdefs.h
+
+printf "%s\n" "#define PACKAGE_BUGREPORT \"$PACKAGE_BUGREPORT\"" >>confdefs.h
+
+printf "%s\n" "#define PACKAGE_URL \"$PACKAGE_URL\"" >>confdefs.h
+
+
+# Let the site file select an alternate cache file if it wants to.
+# Prefer an explicitly selected file to automatically selected ones.
+if test -n "$CONFIG_SITE"; then
+  ac_site_files="$CONFIG_SITE"
+elif test "x$prefix" != xNONE; then
+  ac_site_files="$prefix/share/config.site $prefix/etc/config.site"
+else
+  ac_site_files="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site"
+fi
+
+for ac_site_file in $ac_site_files
+do
+  case $ac_site_file in #(
+  */*) :
+     ;; #(
+  *) :
+    ac_site_file=./$ac_site_file ;;
+esac
+  if test -f "$ac_site_file" && test -r "$ac_site_file"; then
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5
+printf "%s\n" "$as_me: loading site script $ac_site_file" >&6;}
+    sed 's/^/| /' "$ac_site_file" >&5
+    . "$ac_site_file" \
+      || { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;}
+as_fn_error $? "failed to load site script $ac_site_file
+See 'config.log' for more details" "$LINENO" 5; }
+  fi
+done
+
+if test -r "$cache_file"; then
+  # Some versions of bash will fail to source /dev/null (special files
+  # actually), so we avoid doing that.  DJGPP emulates it as a regular file.
+  if test /dev/null != "$cache_file" && test -f "$cache_file"; then
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5
+printf "%s\n" "$as_me: loading cache $cache_file" >&6;}
+    case $cache_file in
+      [\\/]* | ?:[\\/]* ) . "$cache_file";;
+      *)                      . "./$cache_file";;
+    esac
+  fi
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5
+printf "%s\n" "$as_me: creating cache $cache_file" >&6;}
+  >$cache_file
+fi
+
+# Check that the precious variables saved in the cache have kept the same
+# value.
+ac_cache_corrupted=false
+for ac_var in $ac_precious_vars; do
+  eval ac_old_set=\$ac_cv_env_${ac_var}_set
+  eval ac_new_set=\$ac_env_${ac_var}_set
+  eval ac_old_val=\$ac_cv_env_${ac_var}_value
+  eval ac_new_val=\$ac_env_${ac_var}_value
+  case $ac_old_set,$ac_new_set in
+    set,)
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: '$ac_var' was set to '$ac_old_val' in the previous run" >&5
+printf "%s\n" "$as_me: error: '$ac_var' was set to '$ac_old_val' in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,set)
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: '$ac_var' was not set in the previous run" >&5
+printf "%s\n" "$as_me: error: '$ac_var' was not set in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,);;
+    *)
+      if test "x$ac_old_val" != "x$ac_new_val"; then
+	# differences in whitespace do not lead to failure.
+	ac_old_val_w=`echo x $ac_old_val`
+	ac_new_val_w=`echo x $ac_new_val`
+	if test "$ac_old_val_w" != "$ac_new_val_w"; then
+	  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: '$ac_var' has changed since the previous run:" >&5
+printf "%s\n" "$as_me: error: '$ac_var' has changed since the previous run:" >&2;}
+	  ac_cache_corrupted=:
+	else
+	  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in '$ac_var' since the previous run:" >&5
+printf "%s\n" "$as_me: warning: ignoring whitespace changes in '$ac_var' since the previous run:" >&2;}
+	  eval $ac_var=\$ac_old_val
+	fi
+	{ printf "%s\n" "$as_me:${as_lineno-$LINENO}:   former value:  '$ac_old_val'" >&5
+printf "%s\n" "$as_me:   former value:  '$ac_old_val'" >&2;}
+	{ printf "%s\n" "$as_me:${as_lineno-$LINENO}:   current value: '$ac_new_val'" >&5
+printf "%s\n" "$as_me:   current value: '$ac_new_val'" >&2;}
+      fi;;
+  esac
+  # Pass precious variables to config.status.
+  if test "$ac_new_set" = set; then
+    case $ac_new_val in
+    *\'*) ac_arg=$ac_var=`printf "%s\n" "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
+    *) ac_arg=$ac_var=$ac_new_val ;;
+    esac
+    case " $ac_configure_args " in
+      *" '$ac_arg' "*) ;; # Avoid dups.  Use of quotes ensures accuracy.
+      *) as_fn_append ac_configure_args " '$ac_arg'" ;;
+    esac
+  fi
+done
+if $ac_cache_corrupted; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;}
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5
+printf "%s\n" "$as_me: error: changes in the environment can compromise the build" >&2;}
+  as_fn_error $? "run '${MAKE-make} distclean' and/or 'rm $cache_file'
+	    and start over" "$LINENO" 5
+fi
+## -------------------- ##
+## Main body of script. ##
+## -------------------- ##
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+# Note: consider making version number dynamic as in
+# https://github.com/microsoft/LightGBM/blob/195c26fc7b00eb0fec252dfe841e2e66d6833954/build-cran-package.sh
+
+###########################
+# find compiler and flags #
+###########################
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking location of R" >&5
+printf %s "checking location of R... " >&6; }
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${R_HOME}" >&5
+printf "%s\n" "${R_HOME}" >&6; }
+
+# set up CPP flags
+# find the compiler and compiler flags used by R.
+: ${R_HOME=`R HOME`}
+if test -z "${R_HOME}"; then
+    echo "could not determine R_HOME"
+    exit 1
+fi
+CXX17=`"${R_HOME}/bin/R" CMD config CXX17`
+CXX17STD=`"${R_HOME}/bin/R" CMD config CXX17STD`
+CXX="${CXX17} ${CXX17STD}"
+CPPFLAGS=`"${R_HOME}/bin/R" CMD config CPPFLAGS`
+CXXFLAGS=`"${R_HOME}/bin/R" CMD config CXX17FLAGS`
+LDFLAGS=`"${R_HOME}/bin/R" CMD config LDFLAGS`
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+
+# Stochtree-specific flags
+STOCHTREE_CPPFLAGS=""
+
+#########
+# Eigen #
+#########
+
+STOCHTREE_CPPFLAGS=" ${STOCHTREE_CPPFLAGS} -DEIGEN_MPL2_ONLY -DEIGEN_DONT_PARALLELIZE"
+
+##########
+# OpenMP #
+##########
+
+OPENMP_CXXFLAGS=""
+
+if test `uname -s` = "Linux"
+then
+    OPENMP_CXXFLAGS="\$(SHLIB_OPENMP_CXXFLAGS)"
+    OPENMP_AVAILABILITY_FLAGS='-DSTOCHTREE_OPENMP_AVAILABLE'
+fi
+
+if test `uname -s` = "Darwin"
+then
+    OPENMP_CXXFLAGS='-Xclang -fopenmp'
+    OPENMP_LIB='-lomp'
+    OPENMP_AVAILABILITY_FLAGS='-DSTOCHTREE_OPENMP_AVAILABLE'
+
+    # libomp 15.0+ from brew is keg-only (i.e. not symlinked into the standard paths search by the linker),
+    # so need to search in other locations.
+    # See https://github.com/Homebrew/homebrew-core/issues/112107#issuecomment-1278042927.
+    #
+    # If Homebrew is found and libomp was installed with it, this code adds the necessary
+    # flags for the compiler to find libomp headers and for the linker to find libomp.dylib.
+    HOMEBREW_LIBOMP_PREFIX=""
+    if command -v brew >/dev/null 2>&1; then
+        ac_brew_openmp=no
+        { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenMP was installed via Homebrew" >&5
+printf %s "checking whether OpenMP was installed via Homebrew... " >&6; }
+        brew --prefix libomp >/dev/null 2>&1 && ac_brew_openmp=yes
+        { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${ac_brew_openmp}" >&5
+printf "%s\n" "${ac_brew_openmp}" >&6; }
+        if test "${ac_brew_openmp}" = yes; then
+            HOMEBREW_LIBOMP_PREFIX=`brew --prefix libomp`
+            OPENMP_CXXFLAGS="${OPENMP_CXXFLAGS} -I${HOMEBREW_LIBOMP_PREFIX}/include"
+            OPENMP_LIB="${OPENMP_LIB} -L${HOMEBREW_LIBOMP_PREFIX}/lib"
+        fi
+    fi
+    ac_pkg_openmp=no
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenMP will work in a package" >&5
+printf %s "checking whether OpenMP will work in a package... " >&6; }
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+                    #include <omp.h>
+
+int
+main (void)
+{
+
+                    return (omp_get_max_threads() <= 1);
+
+
+  ;
+  return 0;
+}
+
+
+_ACEOF
+    ${CXX} ${CPPFLAGS} ${CXXFLAGS} ${LDFLAGS} ${OPENMP_CXXFLAGS} ${OPENMP_LIB} -o conftest conftest.cpp 2>/dev/null && ./conftest && ac_pkg_openmp=yes
+
+    # -Xclang is not portable (it is clang-specific)
+    # if compilation above failed, try without that flag
+    if test "${ac_pkg_openmp}" = no; then
+        if test -f "./conftest"; then
+            rm ./conftest
+        fi
+        OPENMP_CXXFLAGS="-fopenmp"
+        ${CXX} ${CPPFLAGS} ${CXXFLAGS} ${LDFLAGS} ${OPENMP_CXXFLAGS} ${OPENMP_LIB} -o conftest conftest.cpp 2>/dev/null && ./conftest && ac_pkg_openmp=yes
+    fi
+
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${ac_pkg_openmp}" >&5
+printf "%s\n" "${ac_pkg_openmp}" >&6; }
+    if test "${ac_pkg_openmp}" = no; then
+        OPENMP_CXXFLAGS=''
+        OPENMP_LIB=''
+        OPENMP_AVAILABILITY_FLAGS=''
+        echo '***********************************************************************************************'
+        echo ' OpenMP is unavailable on this macOS system. stochtree code will run single-threaded as a result.'
+        echo ' To use all CPU cores for training jobs, you should install OpenMP by running'
+        echo ''
+        echo '     brew install libomp'
+        echo '***********************************************************************************************'
+    fi
+fi
+
+# substitute variables from this script into Makevars.in
+
+
+
+
+ac_config_files="$ac_config_files src/Makevars"
+
+
+# write out Autoconf output
+cat >confcache <<\_ACEOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs, see configure's option --config-cache.
+# It is not useful on other systems.  If it contains results you don't
+# want to keep, you may remove or edit it.
+#
+# config.status only pays attention to the cache file if you give it
+# the --recheck option to rerun configure.
+#
+# 'ac_cv_env_foo' variables (set or unset) will be overridden when
+# loading this file, other *unset* 'ac_cv_foo' will be assigned the
+# following values.
+
+_ACEOF
+
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, we kill variables containing newlines.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(
+  for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
+printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+      *) { eval $ac_var=; unset $ac_var;} ;;
+      esac ;;
+    esac
+  done
+
+  (set) 2>&1 |
+    case $as_nl`(ac_space=' '; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      # 'set' does not quote correctly, so add quotes: double-quote
+      # substitution turns \\\\ into \\, and sed turns \\ into \.
+      sed -n \
+	"s/'/'\\\\''/g;
+	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
+      ;; #(
+    *)
+      # 'set' quotes correctly as required by POSIX, so do not add quotes.
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+) |
+  sed '
+     /^ac_cv_env_/b end
+     t clear
+     :clear
+     s/^\([^=]*\)=\(.*[{}].*\)$/test ${\1+y} || &/
+     t end
+     s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+     :end' >>confcache
+if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
+  if test -w "$cache_file"; then
+    if test "x$cache_file" != "x/dev/null"; then
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5
+printf "%s\n" "$as_me: updating cache $cache_file" >&6;}
+      if test ! -f "$cache_file" || test -h "$cache_file"; then
+	cat confcache >"$cache_file"
+      else
+        case $cache_file in #(
+        */* | ?:*)
+	  mv -f confcache "$cache_file"$$ &&
+	  mv -f "$cache_file"$$ "$cache_file" ;; #(
+        *)
+	  mv -f confcache "$cache_file" ;;
+	esac
+      fi
+    fi
+  else
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5
+printf "%s\n" "$as_me: not updating unwritable cache $cache_file" >&6;}
+  fi
+fi
+rm -f confcache
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+# Transform confdefs.h into DEFS.
+# Protect against shell expansion while executing Makefile rules.
+# Protect against Makefile macro expansion.
+#
+# If the first sed substitution is executed (which looks for macros that
+# take arguments), then branch to the quote section.  Otherwise,
+# look for a macro that doesn't take arguments.
+ac_script='
+:mline
+/\\$/{
+ N
+ s,\\\n,,
+ b mline
+}
+t clear
+:clear
+s/^[	 ]*#[	 ]*define[	 ][	 ]*\([^	 (][^	 (]*([^)]*)\)[	 ]*\(.*\)/-D\1=\2/g
+t quote
+s/^[	 ]*#[	 ]*define[	 ][	 ]*\([^	 ][^	 ]*\)[	 ]*\(.*\)/-D\1=\2/g
+t quote
+b any
+:quote
+s/[][	 `~#$^&*(){}\\|;'\''"<>?]/\\&/g
+s/\$/$$/g
+H
+:any
+${
+	g
+	s/^\n//
+	s/\n/ /g
+	p
+}
+'
+DEFS=`sed -n "$ac_script" confdefs.h`
+
+
+ac_libobjs=
+ac_ltlibobjs=
+U=
+for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
+  # 1. Remove the extension, and $U if already installed.
+  ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
+  ac_i=`printf "%s\n" "$ac_i" | sed "$ac_script"`
+  # 2. Prepend LIBOBJDIR.  When used with automake>=1.10 LIBOBJDIR
+  #    will be set to the directory where LIBOBJS objects are built.
+  as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext"
+  as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo'
+done
+LIBOBJS=$ac_libobjs
+
+LTLIBOBJS=$ac_ltlibobjs
+
+
+
+: "${CONFIG_STATUS=./config.status}"
+ac_write_fail=0
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files $CONFIG_STATUS"
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5
+printf "%s\n" "$as_me: creating $CONFIG_STATUS" >&6;}
+as_write_fail=0
+cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1
+#! $SHELL
+# Generated by $as_me.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+
+SHELL=\${CONFIG_SHELL-$SHELL}
+export SHELL
+_ASEOF
+cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1
+then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else case e in #(
+  e) case `(set -o) 2>/dev/null` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac ;;
+esac
+fi
+
+
+
+# Reset variables that may have inherited troublesome values from
+# the environment.
+
+# IFS needs to be set, to space, tab, and newline, in precisely that order.
+# (If _AS_PATH_WALK were called with IFS unset, it would have the
+# side effect of setting IFS to empty, thus disabling word splitting.)
+# Quoting is to prevent editors from complaining about space-tab.
+as_nl='
+'
+export as_nl
+IFS=" ""	$as_nl"
+
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# Ensure predictable behavior from utilities with locale-dependent output.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# We cannot yet rely on "unset" to work, but we need these variables
+# to be unset--not just set to an empty or harmless value--now, to
+# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh).  This construct
+# also avoids known problems related to "unset" and subshell syntax
+# in other old shells (e.g. bash 2.01 and pdksh 5.2.14).
+for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH
+do eval test \${$as_var+y} \
+  && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+
+# Ensure that fds 0, 1, and 2 are open.
+if (exec 3>&0) 2>/dev/null; then :; else exec 0</dev/null; fi
+if (exec 3>&1) 2>/dev/null; then :; else exec 1>/dev/null; fi
+if (exec 3>&2)            ; then :; else exec 2>/dev/null; fi
+
+# The user is always right.
+if ${PATH_SEPARATOR+false} :; then
+  PATH_SEPARATOR=:
+  (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+    (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+      PATH_SEPARATOR=';'
+  }
+fi
+
+
+# Find who we are.  Look in the path if we contain no directory separator.
+as_myself=
+case $0 in #((
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    test -r "$as_dir$0" && as_myself=$as_dir$0 && break
+  done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as 'sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  exit 1
+fi
+
+
+
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with STATUS, using 1 if that was 0.
+as_fn_error ()
+{
+  as_status=$1; test $as_status -eq 0 && as_status=1
+  if test "$4"; then
+    as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
+  fi
+  printf "%s\n" "$as_me: error: $2" >&2
+  as_fn_exit $as_status
+} # as_fn_error
+
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+  return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+  set +e
+  as_fn_set_status $1
+  exit $1
+} # as_fn_exit
+
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+  { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null
+then :
+  eval 'as_fn_append ()
+  {
+    eval $1+=\$2
+  }'
+else case e in #(
+  e) as_fn_append ()
+  {
+    eval $1=\$$1\$2
+  } ;;
+esac
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null
+then :
+  eval 'as_fn_arith ()
+  {
+    as_val=$(( $* ))
+  }'
+else case e in #(
+  e) as_fn_arith ()
+  {
+    as_val=`expr "$@" || test $? -eq 1`
+  } ;;
+esac
+fi # as_fn_arith
+
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+printf "%s\n" X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+
+# Determine whether it's possible to make 'echo' print without a newline.
+# These variables are no longer used directly by Autoconf, but are AC_SUBSTed
+# for compatibility with existing Makefiles.
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+  case `echo 'xy\c'` in
+  *c*) ECHO_T='	';;	# ECHO_T is single tab character.
+  xy)  ECHO_C='\c';;
+  *)   echo `echo ksh88 bug on AIX 6.1` > /dev/null
+       ECHO_T='	';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+
+# For backward compatibility with old third-party macros, we provide
+# the shell variables $as_echo and $as_echo_n.  New code should use
+# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively.
+as_echo='printf %s\n'
+as_echo_n='printf %s'
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+  if ln -s conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s='ln -s'
+    # ... but there are two gotchas:
+    # 1) On MSYS, both 'ln -s file dir' and 'ln file dir' fail.
+    # 2) DJGPP < 2.04 has no symlinks; 'ln -s' creates a wrapper executable.
+    # In both cases, we have to default to 'cp -pR'.
+    ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+      as_ln_s='cp -pR'
+  elif ln conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s=ln
+  else
+    as_ln_s='cp -pR'
+  fi
+else
+  as_ln_s='cp -pR'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || eval $as_mkdir_p || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+printf "%s\n" X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p='mkdir -p "$as_dir"'
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
+
+# Sed expression to map a string onto a valid CPP name.
+as_sed_cpp="y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g"
+as_tr_cpp="eval sed '$as_sed_cpp'" # deprecated
+
+# Sed expression to map a string onto a valid variable name.
+as_sed_sh="y%*+%pp%;s%[^_$as_cr_alnum]%_%g"
+as_tr_sh="eval sed '$as_sed_sh'" # deprecated
+
+
+exec 6>&1
+## ----------------------------------- ##
+## Main body of $CONFIG_STATUS script. ##
+## ----------------------------------- ##
+_ASEOF
+test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# Save the log message, to keep $0 and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling.
+ac_log="
+This file was extended by stochtree $as_me 0.1.1, which was
+generated by GNU Autoconf 2.72.  Invocation command line was
+
+  CONFIG_FILES    = $CONFIG_FILES
+  CONFIG_HEADERS  = $CONFIG_HEADERS
+  CONFIG_LINKS    = $CONFIG_LINKS
+  CONFIG_COMMANDS = $CONFIG_COMMANDS
+  $ $0 $@
+
+on `(hostname || uname -n) 2>/dev/null | sed 1q`
+"
+
+_ACEOF
+
+case $ac_config_files in *"
+"*) set x $ac_config_files; shift; ac_config_files=$*;;
+esac
+
+
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+# Files that config.status was made for.
+config_files="$ac_config_files"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+ac_cs_usage="\
+'$as_me' instantiates files and other configuration actions
+from templates according to the current configuration.  Unless the files
+and actions are specified as TAGs, all are instantiated by default.
+
+Usage: $0 [OPTION]... [TAG]...
+
+  -h, --help       print this help, then exit
+  -V, --version    print version number and configuration settings, then exit
+      --config     print configuration, then exit
+  -q, --quiet, --silent
+                   do not print progress messages
+  -d, --debug      don't remove temporary files
+      --recheck    update $as_me by reconfiguring in the same conditions
+      --file=FILE[:TEMPLATE]
+                   instantiate the configuration file FILE
+
+Configuration files:
+$config_files
+
+Report bugs to the package provider."
+
+_ACEOF
+ac_cs_config=`printf "%s\n" "$ac_configure_args" | sed "$ac_safe_unquote"`
+ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\''/g"`
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_cs_config='$ac_cs_config_escaped'
+ac_cs_version="\\
+stochtree config.status 0.1.1
+configured by $0, generated by GNU Autoconf 2.72,
+  with options \\"\$ac_cs_config\\"
+
+Copyright (C) 2023 Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+
+ac_pwd='$ac_pwd'
+srcdir='$srcdir'
+test -n "\$AWK" || AWK=awk
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# The default lists apply if the user does not specify any file.
+ac_need_defaults=:
+while test $# != 0
+do
+  case $1 in
+  --*=?*)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
+    ac_shift=:
+    ;;
+  --*=)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=
+    ac_shift=:
+    ;;
+  *)
+    ac_option=$1
+    ac_optarg=$2
+    ac_shift=shift
+    ;;
+  esac
+
+  case $ac_option in
+  # Handling of the options.
+  -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+    ac_cs_recheck=: ;;
+  --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
+    printf "%s\n" "$ac_cs_version"; exit ;;
+  --config | --confi | --conf | --con | --co | --c )
+    printf "%s\n" "$ac_cs_config"; exit ;;
+  --debug | --debu | --deb | --de | --d | -d )
+    debug=: ;;
+  --file | --fil | --fi | --f )
+    $ac_shift
+    case $ac_optarg in
+    *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    '') as_fn_error $? "missing file argument" ;;
+    esac
+    as_fn_append CONFIG_FILES " '$ac_optarg'"
+    ac_need_defaults=false;;
+  --he | --h |  --help | --hel | -h )
+    printf "%s\n" "$ac_cs_usage"; exit ;;
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil | --si | --s)
+    ac_cs_silent=: ;;
+
+  # This is an error.
+  -*) as_fn_error $? "unrecognized option: '$1'
+Try '$0 --help' for more information." ;;
+
+  *) as_fn_append ac_config_targets " $1"
+     ac_need_defaults=false ;;
+
+  esac
+  shift
+done
+
+ac_configure_extra_args=
+
+if $ac_cs_silent; then
+  exec 6>/dev/null
+  ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+if \$ac_cs_recheck; then
+  set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+  shift
+  \printf "%s\n" "running CONFIG_SHELL=$SHELL \$*" >&6
+  CONFIG_SHELL='$SHELL'
+  export CONFIG_SHELL
+  exec "\$@"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+exec 5>>config.log
+{
+  echo
+  sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+  printf "%s\n" "$ac_log"
+} >&5
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+
+# Handling of arguments.
+for ac_config_target in $ac_config_targets
+do
+  case $ac_config_target in
+    "src/Makevars") CONFIG_FILES="$CONFIG_FILES src/Makevars" ;;
+
+  *) as_fn_error $? "invalid argument: '$ac_config_target'" "$LINENO" 5;;
+  esac
+done
+
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used.  Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+  test ${CONFIG_FILES+y} || CONFIG_FILES=$config_files
+fi
+
+# Have a temporary directory for convenience.  Make it in the build tree
+# simply because there is no reason against having it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Hook for its removal unless debugging.
+# Note that there is a small window in which the directory will not be cleaned:
+# after its creation but before its name has been assigned to '$tmp'.
+$debug ||
+{
+  tmp= ac_tmp=
+  trap 'exit_status=$?
+  : "${ac_tmp:=$tmp}"
+  { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status
+' 0
+  trap 'as_fn_exit 1' 1 2 13 15
+}
+# Create a (secure) tmp directory for tmp files.
+
+{
+  tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
+  test -d "$tmp"
+}  ||
+{
+  tmp=./conf$$-$RANDOM
+  (umask 077 && mkdir "$tmp")
+} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5
+ac_tmp=$tmp
+
+# Set up the scripts for CONFIG_FILES section.
+# No need to generate them if there are no CONFIG_FILES.
+# This happens for instance with './config.status config.h'.
+if test -n "$CONFIG_FILES"; then
+
+
+ac_cr=`echo X | tr X '\015'`
+# On cygwin, bash can eat \r inside `` if the user requested igncr.
+# But we know of no other shell where ac_cr would be empty at this
+# point, so we can use a bashism as a fallback.
+if test "x$ac_cr" = x; then
+  eval ac_cr=\$\'\\r\'
+fi
+ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' </dev/null 2>/dev/null`
+if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then
+  ac_cs_awk_cr='\\r'
+else
+  ac_cs_awk_cr=$ac_cr
+fi
+
+echo 'BEGIN {' >"$ac_tmp/subs1.awk" &&
+_ACEOF
+
+
+{
+  echo "cat >conf$$subs.awk <<_ACEOF" &&
+  echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' &&
+  echo "_ACEOF"
+} >conf$$subs.sh ||
+  as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'`
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+  . ./conf$$subs.sh ||
+    as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+
+  ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X`
+  if test $ac_delim_n = $ac_delim_num; then
+    break
+  elif $ac_last_try; then
+    as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+rm -f conf$$subs.sh
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK &&
+_ACEOF
+sed -n '
+h
+s/^/S["/; s/!.*/"]=/
+p
+g
+s/^[^!]*!//
+:repl
+t repl
+s/'"$ac_delim"'$//
+t delim
+:nl
+h
+s/\(.\{148\}\)..*/\1/
+t more1
+s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/
+p
+n
+b repl
+:more1
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t nl
+:delim
+h
+s/\(.\{148\}\)..*/\1/
+t more2
+s/["\\]/\\&/g; s/^/"/; s/$/"/
+p
+b
+:more2
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t delim
+' <conf$$subs.awk | sed '
+/^[^""]/{
+  N
+  s/\n//
+}
+' >>$CONFIG_STATUS || ac_write_fail=1
+rm -f conf$$subs.awk
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+_ACAWK
+cat >>"\$ac_tmp/subs1.awk" <<_ACAWK &&
+  for (key in S) S_is_set[key] = 1
+  FS = ""
+
+}
+{
+  line = $ 0
+  nfields = split(line, field, "@")
+  substed = 0
+  len = length(field[1])
+  for (i = 2; i < nfields; i++) {
+    key = field[i]
+    keylen = length(key)
+    if (S_is_set[key]) {
+      value = S[key]
+      line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3)
+      len += length(value) + length(field[++i])
+      substed = 1
+    } else
+      len += 1 + keylen
+  }
+
+  print line
+}
+
+_ACAWK
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then
+  sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g"
+else
+  cat
+fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \
+  || as_fn_error $? "could not setup config files machinery" "$LINENO" 5
+_ACEOF
+
+# VPATH may cause trouble with some makes, so we remove sole $(srcdir),
+# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+  ac_vpsub='/^[	 ]*VPATH[	 ]*=[	 ]*/{
+h
+s///
+s/^/:/
+s/[	 ]*$/:/
+s/:\$(srcdir):/:/g
+s/:\${srcdir}:/:/g
+s/:@srcdir@:/:/g
+s/^:*//
+s/:*$//
+x
+s/\(=[	 ]*\).*/\1/
+G
+s/\n//
+s/^[^=]*=[	 ]*$//
+}'
+fi
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+fi # test -n "$CONFIG_FILES"
+
+
+eval set X "  :F $CONFIG_FILES      "
+shift
+for ac_tag
+do
+  case $ac_tag in
+  :[FHLC]) ac_mode=$ac_tag; continue;;
+  esac
+  case $ac_mode$ac_tag in
+  :[FHL]*:*);;
+  :L* | :C*:*) as_fn_error $? "invalid tag '$ac_tag'" "$LINENO" 5;;
+  :[FH]-) ac_tag=-:-;;
+  :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
+  esac
+  ac_save_IFS=$IFS
+  IFS=:
+  set x $ac_tag
+  IFS=$ac_save_IFS
+  shift
+  ac_file=$1
+  shift
+
+  case $ac_mode in
+  :L) ac_source=$1;;
+  :[FH])
+    ac_file_inputs=
+    for ac_f
+    do
+      case $ac_f in
+      -) ac_f="$ac_tmp/stdin";;
+      *) # Look for the file first in the build tree, then in the source tree
+	 # (if the path is not absolute).  The absolute path cannot be DOS-style,
+	 # because $ac_f cannot contain ':'.
+	 test -f "$ac_f" ||
+	   case $ac_f in
+	   [\\/$]*) false;;
+	   *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
+	   esac ||
+	   as_fn_error 1 "cannot find input file: '$ac_f'" "$LINENO" 5;;
+      esac
+      case $ac_f in *\'*) ac_f=`printf "%s\n" "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac
+      as_fn_append ac_file_inputs " '$ac_f'"
+    done
+
+    # Let's still pretend it is 'configure' which instantiates (i.e., don't
+    # use $as_me), people would be surprised to read:
+    #    /* config.h.  Generated by config.status.  */
+    configure_input='Generated from '`
+	  printf "%s\n" "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g'
+	`' by configure.'
+    if test x"$ac_file" != x-; then
+      configure_input="$ac_file.  $configure_input"
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5
+printf "%s\n" "$as_me: creating $ac_file" >&6;}
+    fi
+    # Neutralize special characters interpreted by sed in replacement strings.
+    case $configure_input in #(
+    *\&* | *\|* | *\\* )
+       ac_sed_conf_input=`printf "%s\n" "$configure_input" |
+       sed 's/[\\\\&|]/\\\\&/g'`;; #(
+    *) ac_sed_conf_input=$configure_input;;
+    esac
+
+    case $ac_tag in
+    *:-:* | *:-) cat >"$ac_tmp/stdin" \
+      || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;;
+    esac
+    ;;
+  esac
+
+  ac_dir=`$as_dirname -- "$ac_file" ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$ac_file" : 'X\(//\)[^/]' \| \
+	 X"$ac_file" : 'X\(//\)$' \| \
+	 X"$ac_file" : 'X\(/\)' \| . 2>/dev/null ||
+printf "%s\n" X"$ac_file" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+  as_dir="$ac_dir"; as_fn_mkdir_p
+  ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+
+  case $ac_mode in
+  :F)
+  #
+  # CONFIG_FILE
+  #
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# If the template does not know about datarootdir, expand it.
+# FIXME: This hack should be removed a few years after 2.60.
+ac_datarootdir_hack=; ac_datarootdir_seen=
+ac_sed_dataroot='
+/datarootdir/ {
+  p
+  q
+}
+/@datadir@/p
+/@docdir@/p
+/@infodir@/p
+/@localedir@/p
+/@mandir@/p'
+case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in
+*datarootdir*) ac_datarootdir_seen=yes;;
+*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
+printf "%s\n" "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;}
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+  ac_datarootdir_hack='
+  s&@datadir@&$datadir&g
+  s&@docdir@&$docdir&g
+  s&@infodir@&$infodir&g
+  s&@localedir@&$localedir&g
+  s&@mandir@&$mandir&g
+  s&\\\${datarootdir}&$datarootdir&g' ;;
+esac
+_ACEOF
+
+# Neutralize VPATH when '$srcdir' = '.'.
+# Shell code in configure.ac might set extrasub.
+# FIXME: do we really want to maintain this feature?
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_sed_extra="$ac_vpsub
+$extrasub
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s|@configure_input@|$ac_sed_conf_input|;t t
+s&@top_builddir@&$ac_top_builddir_sub&;t t
+s&@top_build_prefix@&$ac_top_build_prefix&;t t
+s&@srcdir@&$ac_srcdir&;t t
+s&@abs_srcdir@&$ac_abs_srcdir&;t t
+s&@top_srcdir@&$ac_top_srcdir&;t t
+s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t
+s&@builddir@&$ac_builddir&;t t
+s&@abs_builddir@&$ac_abs_builddir&;t t
+s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
+$ac_datarootdir_hack
+"
+eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \
+  >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+
+test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
+  { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } &&
+  { ac_out=`sed -n '/^[	 ]*datarootdir[	 ]*:*=/p' \
+      "$ac_tmp/out"`; test -z "$ac_out"; } &&
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable 'datarootdir'
+which seems to be undefined.  Please make sure it is defined" >&5
+printf "%s\n" "$as_me: WARNING: $ac_file contains a reference to the variable 'datarootdir'
+which seems to be undefined.  Please make sure it is defined" >&2;}
+
+  rm -f "$ac_tmp/stdin"
+  case $ac_file in
+  -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";;
+  *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";;
+  esac \
+  || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ ;;
+
+
+
+  esac
+
+done # for ac_tag
+
+
+as_fn_exit 0
+_ACEOF
+ac_clean_files=$ac_clean_files_save
+
+test $ac_write_fail = 0 ||
+  as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5
+
+
+# configure is writing to config.log, and then calls config.status.
+# config.status does its own redirection, appending to config.log.
+# Unfortunately, on DOS this fails, as config.log is still kept open
+# by configure, so config.status won't be able to write to it; its
+# output is simply discarded.  So we exec the FD to /dev/null,
+# effectively closing config.log, so it can be properly (re)opened and
+# appended to by config.status.  When coming back to configure, we
+# need to make the FD available again.
+if test "$no_create" != yes; then
+  ac_cs_success=:
+  ac_config_status_args=
+  test "$silent" = yes &&
+    ac_config_status_args="$ac_config_status_args --quiet"
+  exec 5>/dev/null
+  $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
+  exec 5>>config.log
+  # Use ||, not &&, to avoid exiting from the if with $? = 1, which
+  # would make configure fail if this is the last instruction.
+  $ac_cs_success || as_fn_exit 1
+fi
+if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
+printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
+fi
+
diff --git a/configure.ac b/configure.ac
new file mode 100644
index 00000000..6d59e3b0
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,125 @@
+### configure.ac                    -*- Autoconf -*-
+# Template used by Autoconf to generate 'configure' script. Based on:
+# https://github.com/microsoft/LightGBM/blob/master/R-package/configure.ac
+
+AC_PREREQ(2.69)
+AC_INIT([stochtree], [0.1.1], [], [stochtree], [])
+# Note: consider making version number dynamic as in 
+# https://github.com/microsoft/LightGBM/blob/195c26fc7b00eb0fec252dfe841e2e66d6833954/build-cran-package.sh
+
+###########################
+# find compiler and flags #
+###########################
+
+AC_MSG_CHECKING([location of R])
+AC_MSG_RESULT([${R_HOME}])
+
+# set up CPP flags
+# find the compiler and compiler flags used by R.
+: ${R_HOME=`R HOME`}
+if test -z "${R_HOME}"; then
+    echo "could not determine R_HOME"
+    exit 1
+fi
+CXX17=`"${R_HOME}/bin/R" CMD config CXX17`
+CXX17STD=`"${R_HOME}/bin/R" CMD config CXX17STD`
+CXX="${CXX17} ${CXX17STD}"
+CPPFLAGS=`"${R_HOME}/bin/R" CMD config CPPFLAGS`
+CXXFLAGS=`"${R_HOME}/bin/R" CMD config CXX17FLAGS`
+LDFLAGS=`"${R_HOME}/bin/R" CMD config LDFLAGS`
+AC_LANG(C++)
+
+# Stochtree-specific flags
+STOCHTREE_CPPFLAGS=""
+
+#########
+# Eigen #
+#########
+
+STOCHTREE_CPPFLAGS=" ${STOCHTREE_CPPFLAGS} -DEIGEN_MPL2_ONLY -DEIGEN_DONT_PARALLELIZE"
+
+##########
+# OpenMP #
+##########
+
+OPENMP_CXXFLAGS=""
+
+if test `uname -s` = "Linux"
+then
+    OPENMP_CXXFLAGS="\$(SHLIB_OPENMP_CXXFLAGS)"
+    OPENMP_AVAILABILITY_FLAGS='-DSTOCHTREE_OPENMP_AVAILABLE'
+fi
+
+if test `uname -s` = "Darwin"
+then
+    OPENMP_CXXFLAGS='-Xclang -fopenmp'
+    OPENMP_LIB='-lomp'
+    OPENMP_AVAILABILITY_FLAGS='-DSTOCHTREE_OPENMP_AVAILABLE'
+
+    # libomp 15.0+ from brew is keg-only (i.e. not symlinked into the standard paths search by the linker),
+    # so need to search in other locations.
+    # See https://github.com/Homebrew/homebrew-core/issues/112107#issuecomment-1278042927.
+    #
+    # If Homebrew is found and libomp was installed with it, this code adds the necessary
+    # flags for the compiler to find libomp headers and for the linker to find libomp.dylib.
+    HOMEBREW_LIBOMP_PREFIX=""
+    if command -v brew >/dev/null 2>&1; then
+        ac_brew_openmp=no
+        AC_MSG_CHECKING([whether OpenMP was installed via Homebrew])
+        brew --prefix libomp >/dev/null 2>&1 && ac_brew_openmp=yes
+        AC_MSG_RESULT([${ac_brew_openmp}])
+        if test "${ac_brew_openmp}" = yes; then
+            HOMEBREW_LIBOMP_PREFIX=`brew --prefix libomp`
+            OPENMP_CXXFLAGS="${OPENMP_CXXFLAGS} -I${HOMEBREW_LIBOMP_PREFIX}/include"
+            OPENMP_LIB="${OPENMP_LIB} -L${HOMEBREW_LIBOMP_PREFIX}/lib"
+        fi
+    fi
+    ac_pkg_openmp=no
+    AC_MSG_CHECKING([whether OpenMP will work in a package])
+    AC_LANG_CONFTEST(
+        [
+            AC_LANG_PROGRAM(
+                [[
+                    #include <omp.h>
+                ]],
+                [[
+                    return (omp_get_max_threads() <= 1);
+                ]]
+            )
+        ]
+    )
+    ${CXX} ${CPPFLAGS} ${CXXFLAGS} ${LDFLAGS} ${OPENMP_CXXFLAGS} ${OPENMP_LIB} -o conftest conftest.cpp 2>/dev/null && ./conftest && ac_pkg_openmp=yes
+
+    # -Xclang is not portable (it is clang-specific)
+    # if compilation above failed, try without that flag
+    if test "${ac_pkg_openmp}" = no; then
+        if test -f "./conftest"; then
+            rm ./conftest
+        fi
+        OPENMP_CXXFLAGS="-fopenmp"
+        ${CXX} ${CPPFLAGS} ${CXXFLAGS} ${LDFLAGS} ${OPENMP_CXXFLAGS} ${OPENMP_LIB} -o conftest conftest.cpp 2>/dev/null && ./conftest && ac_pkg_openmp=yes
+    fi
+
+    AC_MSG_RESULT([${ac_pkg_openmp}])
+    if test "${ac_pkg_openmp}" = no; then
+        OPENMP_CXXFLAGS=''
+        OPENMP_LIB=''
+        OPENMP_AVAILABILITY_FLAGS=''
+        echo '***********************************************************************************************'
+        echo ' OpenMP is unavailable on this macOS system. stochtree code will run single-threaded as a result.'
+        echo ' To use all CPU cores for training jobs, you should install OpenMP by running'
+        echo ''
+        echo '     brew install libomp'
+        echo '***********************************************************************************************'
+    fi
+fi
+
+# substitute variables from this script into Makevars.in
+AC_SUBST(OPENMP_CXXFLAGS)
+AC_SUBST(OPENMP_LIB)
+AC_SUBST(OPENMP_AVAILABILITY_FLAGS)
+AC_SUBST(STOCHTREE_CPPFLAGS)
+AC_CONFIG_FILES([src/Makevars])
+
+# write out Autoconf output
+AC_OUTPUT
diff --git a/configure.win b/configure.win
new file mode 100644
index 00000000..4bc2a539
--- /dev/null
+++ b/configure.win
@@ -0,0 +1,40 @@
+# Script used to generate `Makevars.win` from `Makevars.win.in` on Windows
+# Adapted from LightGBM https://github.com/microsoft/LightGBM/blob/master/R-package/configure.win
+
+###########################
+# find compiler and flags #
+###########################
+
+R_EXE="${R_HOME}/bin${R_ARCH_BIN}/R"
+CXX17=`"${R_EXE}" CMD config CXX17`
+CXX17STD=`"${R_EXE}" CMD config CXX17STD`
+CXX="${CXX17} ${CXX17STD}"
+CXXFLAGS=`"${R_EXE}" CMD config CXX17FLAGS`
+CXX_STD="CXX17"
+CPPFLAGS=`"${R_EXE}" CMD config CPPFLAGS`
+
+# Stochtree-specific flags
+STOCHTREE_CPPFLAGS=""
+
+#########
+# Eigen #
+#########
+
+STOCHTREE_CPPFLAGS="${STOCHTREE_CPPFLAGS} -DEIGEN_MPL2_ONLY -DEIGEN_DONT_PARALLELIZE"
+
+##########
+# OpenMP #
+##########
+
+STOCHTREE_CPPFLAGS="${STOCHTREE_CPPFLAGS} -DSTOCHTREE_OPENMP_AVAILABLE"
+
+#########################
+# Generate Makevars.win #
+#########################
+
+sed -e \
+    "s/@CXX_STD@/$CXX_STD/" \
+    < src/Makevars.win.in > src/Makevars.win
+sed -e \
+    "s/@STOCHTREE_CPPFLAGS@/$STOCHTREE_CPPFLAGS/" \
+    < src/Makevars.win.in > src/Makevars.win
diff --git a/cran-bootstrap.R b/cran-bootstrap.R
index e06eba11..0e865877 100644
--- a/cran-bootstrap.R
+++ b/cran-bootstrap.R
@@ -60,8 +60,13 @@ if (!dir.exists(cran_dir)) {
 src_files <- list.files("src", pattern = ".[^o]$", recursive = TRUE, full.names = TRUE)
 pybind_src_files <- list.files("src", pattern = "^(py_)", recursive = TRUE, full.names = TRUE)
 r_src_files <- src_files[!(src_files %in% pybind_src_files)]
+r_src_files <- r_src_files[!(r_src_files %in% c("src/Makevars", "src/Makevars.win"))]
+cat(r_src_files)
 pkg_core_files <- c(
     ".Rbuildignore",
+    "configure",
+    "configure.ac",
+    "configure.win",
     "cran-comments.md",
     "DESCRIPTION",
     "inst/COPYRIGHTS",
@@ -135,12 +140,28 @@ if (all(file.exists(pkg_core_files))) {
     }
 }
 
-# Overwrite PKG_CPPFLAGS in src/Makevars
-cran_makevars <- file.path(cran_dir, "src/Makevars")
+# Overwrite PKG_CPPFLAGS in src/Makevars.in
+cran_makevars <- file.path(cran_dir, "src/Makevars.in")
 makevars_lines <- readLines(cran_makevars)
-makevars_lines[grep("^(PKG_CPPFLAGS)", makevars_lines)] <- "PKG_CPPFLAGS= -I$(PKGROOT)/src/include $(STOCHTREE_CPPFLAGS)"
+makevars_lines[grep("    -I$(PKGROOT)/include \\", makevars_lines, fixed = T)] <- "    -I$(PKGROOT)/src/include \\"
+makevars_lines <- makevars_lines[-c(
+    grep("    -I$(PKGROOT)/deps/eigen \\", makevars_lines, fixed = T), 
+    grep("    -I$(PKGROOT)/deps/fmt/include \\", makevars_lines, fixed = T), 
+    grep("    -I$(PKGROOT)/deps/fast_double_parser/include \\", makevars_lines, fixed = T)
+)]
 writeLines(makevars_lines, cran_makevars)
 
+# Overwrite PKG_CPPFLAGS in src/Makevars.win.in
+cran_makevars_win <- file.path(cran_dir, "src/Makevars.win.in")
+makevars_win_lines <- readLines(cran_makevars_win)
+makevars_win_lines[grep("    -I$(PKGROOT)/include \\", makevars_win_lines, fixed = T)] <- "    -I$(PKGROOT)/src/include \\"
+makevars_win_lines <- makevars_win_lines[-c(
+    grep("    -I$(PKGROOT)/deps/eigen \\", makevars_win_lines, fixed = T), 
+    grep("    -I$(PKGROOT)/deps/fmt/include \\", makevars_win_lines, fixed = T), 
+    grep("    -I$(PKGROOT)/deps/fast_double_parser/include \\", makevars_win_lines, fixed = T)
+)]
+writeLines(makevars_win_lines, cran_makevars_win)
+
 # Remove vignette deps from DESCRIPTION if no vignettes
 if (!include_vignettes) {
     cran_description <- file.path(cran_dir, "DESCRIPTION")
diff --git a/debug/README.md b/debug/README.md
index 8dc5a15c..905999fe 100644
--- a/debug/README.md
+++ b/debug/README.md
@@ -4,7 +4,7 @@ This subdirectory contains a debug program for the C++ codebase.
 The program takes several command line arguments (in order):
 
 1. Which data-generating process (DGP) to run (integer-coded, see below for a detailed description)
-1. Which leaf model to sample (integer-coded, see below for a detailed description)
+2. Which leaf model to sample (integer-coded, see below for a detailed description)
 3. Whether or not to include random effects (0 = no, 1 = yes)
 4. Number of grow-from-root (GFR) samples
 5. Number of MCMC samples
@@ -13,6 +13,7 @@ The program takes several command line arguments (in order):
 8. [Optional] index of outcome column in data file (leave this blank as `0`)
 9. [Optional] comma-delimited string of column indices of covariates (leave this blank as `""`)
 10. [Optional] comma-delimited string of column indices of leaf regression bases (leave this blank as `""`)
+11. [Optional] number of threads to use in the GFR sampler (leave this blank as `-1`)
 
 The DGPs are numbered as follows:
 
@@ -30,6 +31,6 @@ The models are numbered as follows:
 
 For an example of how to run this progam for DGP 0, leaf model 1, no random effects, 10 GFR samples, 100 MCMC samples and a default seed (`-1`), run
 
-`./build/debugstochtree 0 1 0 10 100 -1 "" 0 "" ""`
+`./build/debugstochtree 0 1 0 10 100 -1 "" 0 "" "" -1`
 
 from the main `stochtree` project directory after building with `BUILD_DEBUG_TARGETS` set to `ON`.
diff --git a/debug/api_debug.cpp b/debug/api_debug.cpp
index 8958bd93..54457df1 100644
--- a/debug/api_debug.cpp
+++ b/debug/api_debug.cpp
@@ -423,7 +423,7 @@ void OutcomeOffsetScale(ColumnVector& residual, double& outcome_offset, double&
 void RunDebug(int dgp_num = 0, const ModelType model_type = kConstantLeafGaussian, 
               bool rfx_included = false, int num_gfr = 10, int num_mcmc = 100, int random_seed = -1, 
               std::string dataset_filename = "", int outcome_col = -1, std::string covariate_cols = "",
-              std::string basis_cols = "") {
+              std::string basis_cols = "", int num_threads = -1) {
   // Flag the data as row-major
   bool row_major = true;
 
@@ -688,13 +688,13 @@ void RunDebug(int dgp_num = 0, const ModelType model_type = kConstantLeafGaussia
 
       // Sample tree ensemble
       if (model_type == ModelType::kConstantLeafGaussian) {
-        GFRSampleOneIter<GaussianConstantLeafModel, GaussianConstantSuffStat>(active_forest, tracker, forest_samples, std::get<GaussianConstantLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, feature_types, cutpoint_grid_size, true, true, true, num_features_subsample);
+        GFRSampleOneIter<GaussianConstantLeafModel, GaussianConstantSuffStat>(active_forest, tracker, forest_samples, std::get<GaussianConstantLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, feature_types, cutpoint_grid_size, true, true, true, num_features_subsample, num_threads);
       } else if (model_type == ModelType::kUnivariateRegressionLeafGaussian) {
-        GFRSampleOneIter<GaussianUnivariateRegressionLeafModel, GaussianUnivariateRegressionSuffStat>(active_forest, tracker, forest_samples, std::get<GaussianUnivariateRegressionLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, feature_types, cutpoint_grid_size, true, true, true, num_features_subsample);
+        GFRSampleOneIter<GaussianUnivariateRegressionLeafModel, GaussianUnivariateRegressionSuffStat>(active_forest, tracker, forest_samples, std::get<GaussianUnivariateRegressionLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, feature_types, cutpoint_grid_size, true, true, true, num_features_subsample, num_threads);
       } else if (model_type == ModelType::kMultivariateRegressionLeafGaussian) {
-        GFRSampleOneIter<GaussianMultivariateRegressionLeafModel, GaussianMultivariateRegressionSuffStat, int>(active_forest, tracker, forest_samples, std::get<GaussianMultivariateRegressionLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, feature_types, cutpoint_grid_size, true, true, true, num_features_subsample, omega_cols);
+        GFRSampleOneIter<GaussianMultivariateRegressionLeafModel, GaussianMultivariateRegressionSuffStat, int>(active_forest, tracker, forest_samples, std::get<GaussianMultivariateRegressionLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, feature_types, cutpoint_grid_size, true, true, true, num_features_subsample, num_threads, omega_cols);
       } else if (model_type == ModelType::kLogLinearVariance) {
-        GFRSampleOneIter<LogLinearVarianceLeafModel, LogLinearVarianceSuffStat>(active_forest, tracker, forest_samples, std::get<LogLinearVarianceLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, feature_types, cutpoint_grid_size, true, true, false, num_features_subsample);
+        GFRSampleOneIter<LogLinearVarianceLeafModel, LogLinearVarianceSuffStat>(active_forest, tracker, forest_samples, std::get<LogLinearVarianceLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, feature_types, cutpoint_grid_size, true, true, false, num_features_subsample, num_threads);
       }
 
       if (rfx_included) {
@@ -725,13 +725,13 @@ void RunDebug(int dgp_num = 0, const ModelType model_type = kConstantLeafGaussia
 
       // Sample tree ensemble
       if (model_type == ModelType::kConstantLeafGaussian) {
-        MCMCSampleOneIter<GaussianConstantLeafModel, GaussianConstantSuffStat>(active_forest, tracker, forest_samples, std::get<GaussianConstantLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, true, true, true);
+        MCMCSampleOneIter<GaussianConstantLeafModel, GaussianConstantSuffStat>(active_forest, tracker, forest_samples, std::get<GaussianConstantLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, true, true, true, num_threads);
       } else if (model_type == ModelType::kUnivariateRegressionLeafGaussian) {
-        MCMCSampleOneIter<GaussianUnivariateRegressionLeafModel, GaussianUnivariateRegressionSuffStat>(active_forest, tracker, forest_samples, std::get<GaussianUnivariateRegressionLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, true, true, true);
+        MCMCSampleOneIter<GaussianUnivariateRegressionLeafModel, GaussianUnivariateRegressionSuffStat>(active_forest, tracker, forest_samples, std::get<GaussianUnivariateRegressionLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, true, true, true, num_threads);
       } else if (model_type == ModelType::kMultivariateRegressionLeafGaussian) {
-        MCMCSampleOneIter<GaussianMultivariateRegressionLeafModel, GaussianMultivariateRegressionSuffStat, int>(active_forest, tracker, forest_samples, std::get<GaussianMultivariateRegressionLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, true, true, true, omega_cols);
+        MCMCSampleOneIter<GaussianMultivariateRegressionLeafModel, GaussianMultivariateRegressionSuffStat, int>(active_forest, tracker, forest_samples, std::get<GaussianMultivariateRegressionLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, true, true, true, num_threads, omega_cols);
       } else if (model_type == ModelType::kLogLinearVariance) {
-        MCMCSampleOneIter<LogLinearVarianceLeafModel, LogLinearVarianceSuffStat>(active_forest, tracker, forest_samples, std::get<LogLinearVarianceLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, true, true, false);
+        MCMCSampleOneIter<LogLinearVarianceLeafModel, LogLinearVarianceSuffStat>(active_forest, tracker, forest_samples, std::get<LogLinearVarianceLeafModel>(leaf_model), dataset, residual, tree_prior, gen, variable_weights, sweep_indices, global_variance, true, true, false, num_threads);
       }
 
       if (rfx_included) {
@@ -804,8 +804,9 @@ int main(int argc, char* argv[]) {
   int outcome_col = std::stoi(argv[8]);
   std::string covariate_cols = argv[9];
   std::string basis_cols = argv[10];
+  int num_threads = std::stoi(argv[11]);
 
   // Run the debug program
   StochTree::RunDebug(dgp_num, model_type, rfx_included, num_gfr, num_mcmc, random_seed,
-                      dataset_filename, outcome_col, covariate_cols, basis_cols);
+                      dataset_filename, outcome_col, covariate_cols, basis_cols, num_threads);
 }
diff --git a/demo/debug/multi_chain.py b/demo/debug/multi_chain.py
index 6d8aef68..e93577eb 100644
--- a/demo/debug/multi_chain.py
+++ b/demo/debug/multi_chain.py
@@ -118,11 +118,15 @@ def outcome_mean(X, W):
 )
 
 # Inspect the model outputs
-y_hat_mcmc_2 = bart_model_2.predict(X_test, basis_test)
+bart_preds_2 = bart_model_2.predict(X_test, basis_test)
+y_hat_mcmc_2 = bart_preds_2['y_hat']
 y_avg_mcmc_2 = np.squeeze(y_hat_mcmc_2).mean(axis=1, keepdims=True)
-y_hat_mcmc_3 = bart_model_3.predict(X_test, basis_test)
+y_avg_mcmc_2 = np.squeeze(y_hat_mcmc_2).mean(axis=1, keepdims=True)
+bart_preds_3 = bart_model_3.predict(X_test, basis_test)
+y_hat_mcmc_3 = bart_preds_3['y_hat']
 y_avg_mcmc_3 = np.squeeze(y_hat_mcmc_3).mean(axis=1, keepdims=True)
-y_hat_mcmc_4 = bart_model_4.predict(X_test, basis_test)
+bart_preds_4 = bart_model_4.predict(X_test, basis_test)
+y_hat_mcmc_4 = bart_preds_4['y_hat']
 y_avg_mcmc_4 = np.squeeze(y_hat_mcmc_4).mean(axis=1, keepdims=True)
 y_df = pd.DataFrame(
     np.concatenate(
diff --git a/demo/debug/parallel_multi_chain.py b/demo/debug/parallel_multi_chain.py
index ee114aee..ee618df5 100644
--- a/demo/debug/parallel_multi_chain.py
+++ b/demo/debug/parallel_multi_chain.py
@@ -145,7 +145,8 @@ def outcome_mean(X, W):
     )
 
     # Inspect the model outputs
-    y_hat_mcmc = combined_bart.predict(X_test, basis_test)
+    bart_preds = combined_bart.predict(X_test, basis_test)
+    y_hat_mcmc = bart_preds['y_hat']
     y_avg_mcmc = np.squeeze(y_hat_mcmc).mean(axis=1, keepdims=True)
     y_df = pd.DataFrame(
         np.concatenate((y_avg_mcmc, np.expand_dims(y_test, axis=1)), axis=1),
diff --git a/demo/debug/random_effects.py b/demo/debug/random_effects.py
index 9a3c4350..d6bcc358 100644
--- a/demo/debug/random_effects.py
+++ b/demo/debug/random_effects.py
@@ -75,7 +75,8 @@ def outcome_mean(group_labels, basis):
 rfx_model.sample(rfx_dataset, outcome, rfx_tracker, rfx_container, True, 1.0, cpp_rng)
 
 # Inspect the samples
-rfx_preds = rfx_container.predict(group_labels, basis) * y_std + y_bar
+bart_preds = rfx_container.predict(group_labels, basis)
+rfx_preds = bart_preds['y_hat'] * y_std + y_bar
 rfx_comparison_df = pd.DataFrame(
     np.concatenate((rfx_preds, np.expand_dims(rfx_term, axis=1)), axis=1),
     columns=["Predicted", "Actual"],
diff --git a/demo/debug/rfx_serialization.py b/demo/debug/rfx_serialization.py
index db80edea..fec857b6 100644
--- a/demo/debug/rfx_serialization.py
+++ b/demo/debug/rfx_serialization.py
@@ -60,11 +60,13 @@ def rfx_mean(group_labels, basis):
                   rfx_basis_train=basis, num_gfr=10, num_mcmc=10)
 
 # Extract predictions from the sampler
-y_hat_orig = bart_orig.predict(X, W, group_labels, basis)
+bart_preds_orig = bart_orig.predict(X, W, group_labels, basis)
+y_hat_orig = bart_preds_orig['y_hat']
 
 # "Round-trip" the model to JSON string and back and check that the predictions agree
 bart_json_string = bart_orig.to_json()
 bart_reloaded = BARTModel()
 bart_reloaded.from_json(bart_json_string)
-y_hat_reloaded = bart_reloaded.predict(X, W, group_labels, basis)
+bart_preds_reloaded = bart_reloaded.predict(X, W, group_labels, basis)
+y_hat_reloaded = bart_preds_reloaded['y_hat']
 np.testing.assert_almost_equal(y_hat_orig, y_hat_reloaded)
\ No newline at end of file
diff --git a/demo/debug/serialization.py b/demo/debug/serialization.py
index 4ee14cee..75c14679 100644
--- a/demo/debug/serialization.py
+++ b/demo/debug/serialization.py
@@ -98,11 +98,13 @@ def outcome_mean(X, W):
     global_var_samples[i+1] = global_var_model.sample_one_iteration(residual, cpp_rng, a_global, b_global)
 
 # Extract predictions from the sampler
-y_hat_orig = forest_container.predict(dataset)
+bart_preds_orig = forest_container.predict(dataset)
+y_hat_orig = bart_preds_orig['y_hat']
 
 # "Round-trip" the forest to JSON string and back and check that the predictions agree
 forest_json_string = forest_container.dump_json_string()
 forest_container_reloaded = ForestContainer(num_trees, W.shape[1], False, False)
 forest_container_reloaded.load_from_json_string(forest_json_string)
-y_hat_reloaded = forest_container_reloaded.predict(dataset)
+bart_preds_reloaded = forest_container_reloaded.predict(dataset)
+y_hat_reloaded = bart_preds_reloaded['y_hat']
 np.testing.assert_almost_equal(y_hat_orig, y_hat_reloaded)
diff --git a/demo/notebooks/prototype_interface.ipynb b/demo/notebooks/prototype_interface.ipynb
index 064ce9b6..1810241a 100644
--- a/demo/notebooks/prototype_interface.ipynb
+++ b/demo/notebooks/prototype_interface.ipynb
@@ -341,7 +341,8 @@
    "outputs": [],
    "source": [
     "# Forest predictions\n",
-    "forest_preds = forest_container.predict(dataset) * y_std + y_bar\n",
+    "bart_preds = forest_container.predict(dataset)\n",
+    "forest_preds = bart_preds['y_hat'] * y_std + y_bar\n",
     "forest_preds_gfr = forest_preds[:, :num_warmstart]\n",
     "forest_preds_mcmc = forest_preds[:, num_warmstart:num_samples]\n",
     "\n",
@@ -1101,7 +1102,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "venv",
+   "display_name": "venv (3.12.9)",
    "language": "python",
    "name": "python3"
   },
diff --git a/demo/notebooks/serialization.ipynb b/demo/notebooks/serialization.ipynb
index 3e023acf..e9b0f4c1 100644
--- a/demo/notebooks/serialization.ipynb
+++ b/demo/notebooks/serialization.ipynb
@@ -241,7 +241,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "y_hat_deserialized = bart_model_deserialized.predict(X_test, basis_test)\n",
+    "bart_preds_deserialized = bart_model_deserialized.predict(X_test, basis_test)\n",
+    "y_hat_deserialized = bart_preds_deserialized['y_hat']\n",
     "y_avg_mcmc_deserialized = np.squeeze(y_hat_deserialized).mean(axis=1, keepdims=True)\n",
     "y_df = pd.DataFrame(\n",
     "    np.concatenate((y_avg_mcmc, y_avg_mcmc_deserialized), axis=1),\n",
@@ -325,7 +326,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "y_hat_file_deserialized = bart_model_file_deserialized.predict(X_test, basis_test)\n",
+    "bart_preds_file_deserialized = bart_model_file_deserialized.predict(X_test, basis_test)\n",
+    "y_hat_file_deserialized = bart_preds_file_deserialized['y_hat']\n",
     "y_avg_mcmc_file_deserialized = np.squeeze(y_hat_file_deserialized).mean(\n",
     "    axis=1, keepdims=True\n",
     ")\n",
@@ -381,7 +383,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "venv",
+   "display_name": "venv (3.12.9)",
    "language": "python",
    "name": "python3"
   },
diff --git a/include/stochtree/category_tracker.h b/include/stochtree/category_tracker.h
index e5817419..2ce44635 100644
--- a/include/stochtree/category_tracker.h
+++ b/include/stochtree/category_tracker.h
@@ -29,12 +29,8 @@
 #include <stochtree/log.h>
 #include <stochtree/meta.h>
 
-#include <cmath>
 #include <map>
 #include <numeric>
-#include <random>
-#include <set>
-#include <string>
 #include <vector>
 
 namespace StochTree {
diff --git a/include/stochtree/common.h b/include/stochtree/common.h
index c7aab3df..cd57eea2 100644
--- a/include/stochtree/common.h
+++ b/include/stochtree/common.h
@@ -8,22 +8,18 @@
 #include <stochtree/log.h>
 
 #include <algorithm>
-#include <chrono>
 #include <cmath>
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
-#include <functional>
 #include <iomanip>
 #include <iterator>
 #include <limits>
-#include <map>
 #include <memory>
 #include <sstream>
 #include <string>
 #include <type_traits>
-#include <unordered_map>
 #include <utility>
 #include <vector>
 
diff --git a/include/stochtree/container.h b/include/stochtree/container.h
index bb0e7849..4b75ef2f 100644
--- a/include/stochtree/container.h
+++ b/include/stochtree/container.h
@@ -11,12 +11,7 @@
 #include <nlohmann/json.hpp>
 #include <stochtree/tree.h>
 
-#include <algorithm>
-#include <deque>
 #include <fstream>
-#include <optional>
-#include <random>
-#include <unordered_map>
 
 namespace StochTree {
 
diff --git a/include/stochtree/cutpoint_candidates.h b/include/stochtree/cutpoint_candidates.h
index 8c19013a..76f1df4c 100644
--- a/include/stochtree/cutpoint_candidates.h
+++ b/include/stochtree/cutpoint_candidates.h
@@ -42,8 +42,6 @@
 #include <stochtree/meta.h>
 #include <stochtree/partition_tracker.h>
 
-#include <tuple>
-
 namespace StochTree {
 
 /*! \brief Computing and tracking cutpoints available for a given feature at a given node
diff --git a/include/stochtree/data.h b/include/stochtree/data.h
index 47b4fb9b..cc62ab06 100644
--- a/include/stochtree/data.h
+++ b/include/stochtree/data.h
@@ -9,7 +9,6 @@
 #include <stochtree/io.h>
 #include <stochtree/log.h>
 #include <stochtree/meta.h>
-#include <memory>
 
 namespace StochTree {
 
diff --git a/include/stochtree/ensemble.h b/include/stochtree/ensemble.h
index 4624b5a4..4f6ddf42 100644
--- a/include/stochtree/ensemble.h
+++ b/include/stochtree/ensemble.h
@@ -14,12 +14,6 @@
 #include <stochtree/tree.h>
 #include <nlohmann/json.hpp>
 
-#include <algorithm>
-#include <deque>
-#include <optional>
-#include <random>
-#include <unordered_map>
-
 using json = nlohmann::json;
 
 namespace StochTree {
diff --git a/include/stochtree/io.h b/include/stochtree/io.h
index 3bc277fb..55963946 100644
--- a/include/stochtree/io.h
+++ b/include/stochtree/io.h
@@ -28,12 +28,10 @@
 #include <cstdlib>
 #include <cstring>
 #include <functional>
-#include <iostream>
 #include <memory>
 #include <sstream>
 #include <string>
 #include <thread>
-#include <unordered_map>
 #include <utility>
 #include <vector>
 
diff --git a/include/stochtree/leaf_model.h b/include/stochtree/leaf_model.h
index 0e5234b5..5359775d 100644
--- a/include/stochtree/leaf_model.h
+++ b/include/stochtree/leaf_model.h
@@ -13,12 +13,12 @@
 #include <stochtree/log.h>
 #include <stochtree/meta.h>
 #include <stochtree/normal_sampler.h>
+#include <stochtree/openmp_utils.h>
 #include <stochtree/partition_tracker.h>
 #include <stochtree/prior.h>
 #include <stochtree/tree.h>
 
 #include <random>
-#include <tuple>
 #include <variant>
 
 namespace StochTree {
@@ -396,6 +396,16 @@ class GaussianConstantSuffStat {
     sum_w = 0.0;
     sum_yw = 0.0;
   }
+  /*!
+   * \brief Increment the value of each sufficient statistic by the values provided by `suff_stat`
+   * 
+   * \param suff_stat Sufficient statistic to be added to the current sufficient statistics
+   */
+  void AddSuffStatInplace(GaussianConstantSuffStat& suff_stat) {
+    n += suff_stat.n;
+    sum_w += suff_stat.sum_w;
+    sum_yw += suff_stat.sum_yw;
+  }
   /*!
    * \brief Set the value of each sufficient statistic to the sum of the values provided by `lhs` and `rhs`
    * 
@@ -550,6 +560,16 @@ class GaussianUnivariateRegressionSuffStat {
     sum_xxw = 0.0;
     sum_yxw = 0.0;
   }
+  /*!
+   * \brief Increment the value of each sufficient statistic by the values provided by `suff_stat`
+   * 
+   * \param suff_stat Sufficient statistic to be added to the current sufficient statistics
+   */
+  void AddSuffStatInplace(GaussianUnivariateRegressionSuffStat& suff_stat) {
+    n += suff_stat.n;
+    sum_xxw += suff_stat.sum_xxw;
+    sum_yxw += suff_stat.sum_yxw;
+  }
   /*!
    * \brief Set the value of each sufficient statistic to the sum of the values provided by `lhs` and `rhs`
    * 
@@ -695,6 +715,16 @@ class GaussianMultivariateRegressionSuffStat {
     XtWX = Eigen::MatrixXd::Zero(p, p);
     ytWX = Eigen::MatrixXd::Zero(1, p);
   }
+  /*!
+   * \brief Increment the value of each sufficient statistic by the values provided by `suff_stat`
+   * 
+   * \param suff_stat Sufficient statistic to be added to the current sufficient statistics
+   */
+  void AddSuffStatInplace(GaussianMultivariateRegressionSuffStat& suff_stat) {
+    n += suff_stat.n;
+    XtWX += suff_stat.XtWX;
+    ytWX += suff_stat.ytWX;
+  }
   /*!
    * \brief Set the value of each sufficient statistic to the sum of the values provided by `lhs` and `rhs`
    * 
@@ -829,6 +859,15 @@ class LogLinearVarianceSuffStat {
     n = 0;
     weighted_sum_ei = 0.0;
   }
+  /*!
+   * \brief Increment the value of each sufficient statistic by the values provided by `suff_stat`
+   * 
+   * \param suff_stat Sufficient statistic to be added to the current sufficient statistics
+   */
+  void AddSuffStatInplace(LogLinearVarianceSuffStat& suff_stat) {
+    n += suff_stat.n;
+    weighted_sum_ei += suff_stat.weighted_sum_ei;
+  }
   /*!
    * \brief Set the value of each sufficient statistic to the sum of the values provided by `lhs` and `rhs`
    * 
@@ -1005,22 +1044,73 @@ static inline LeafModelVariant leafModelFactory(ModelType model_type, double tau
   }
 }
 
-template<typename SuffStatType>
-static inline void AccumulateSuffStatProposed(SuffStatType& node_suff_stat, SuffStatType& left_suff_stat, SuffStatType& right_suff_stat, ForestDataset& dataset, ForestTracker& tracker, 
-                                ColumnVector& residual, double global_variance, TreeSplit& split, int tree_num, int leaf_num, int split_feature) {
-  // Acquire iterators
-  auto node_begin_iter = tracker.UnsortedNodeBeginIterator(tree_num, leaf_num);
-  auto node_end_iter = tracker.UnsortedNodeEndIterator(tree_num, leaf_num);
+template<typename SuffStatType, typename... SuffStatConstructorArgs>
+static inline void AccumulateSuffStatProposed(
+  SuffStatType& node_suff_stat, SuffStatType& left_suff_stat, SuffStatType& right_suff_stat, ForestDataset& dataset, ForestTracker& tracker, 
+  ColumnVector& residual, double global_variance, TreeSplit& split, int tree_num, int leaf_num, int split_feature, int num_threads, 
+  SuffStatConstructorArgs&... suff_stat_args
+) {
+  // Determine the position of the node's indices in the forest tracking data structure
+  int node_begin_index = tracker.UnsortedNodeBegin(tree_num, leaf_num);
+  int node_end_index = tracker.UnsortedNodeEnd(tree_num, leaf_num);
 
-  // Accumulate sufficient statistics
-  for (auto i = node_begin_iter; i != node_end_iter; i++) {
-    auto idx = *i;
-    double feature_value = dataset.CovariateValue(idx, split_feature);
-    node_suff_stat.IncrementSuffStat(dataset, residual.GetData(), tracker, idx, tree_num);
-    if (split.SplitTrue(feature_value)) {
-      left_suff_stat.IncrementSuffStat(dataset, residual.GetData(), tracker, idx, tree_num);
-    } else {
-      right_suff_stat.IncrementSuffStat(dataset, residual.GetData(), tracker, idx, tree_num);
+  // Extract pointer to the feature partition for tree_num
+  UnsortedNodeSampleTracker* unsorted_node_sample_tracker = tracker.GetUnsortedNodeSampleTracker();
+  FeatureUnsortedPartition* feature_partition = unsorted_node_sample_tracker->GetFeaturePartition(tree_num);
+
+  // Determine the number of threads to use
+  int chunk_size = (node_end_index - node_begin_index) / num_threads;
+  if (chunk_size < 100) {
+    num_threads = 1;
+    chunk_size = node_end_index - node_begin_index;
+  }
+
+  if (num_threads > 1) {
+    // Split the work into num_threads chunks
+    std::vector<std::pair<int, int>> thread_ranges(num_threads);
+    std::vector<SuffStatType> thread_suff_stats_node;
+    std::vector<SuffStatType> thread_suff_stats_left;
+    std::vector<SuffStatType> thread_suff_stats_right;
+    for (int i = 0; i < num_threads; i++) {
+      thread_ranges[i] = std::make_pair(node_begin_index + i * chunk_size, 
+                                        node_begin_index + (i + 1) * chunk_size);
+      thread_suff_stats_node.emplace_back(suff_stat_args...);
+      thread_suff_stats_left.emplace_back(suff_stat_args...);
+      thread_suff_stats_right.emplace_back(suff_stat_args...);
+    }
+    
+    // Accumulate sufficient statistics
+    StochTree::ParallelFor(0, num_threads, num_threads, [&](int i) {
+      int start_idx = thread_ranges[i].first;
+      int end_idx = thread_ranges[i].second;
+      for (int idx = start_idx; idx < end_idx; idx++) {
+        int obs_num = feature_partition->indices_[idx];
+        double feature_value = dataset.CovariateValue(obs_num, split_feature);
+        thread_suff_stats_node[i].IncrementSuffStat(dataset, residual.GetData(), tracker, obs_num, tree_num);
+        if (split.SplitTrue(feature_value)) {
+          thread_suff_stats_left[i].IncrementSuffStat(dataset, residual.GetData(), tracker, obs_num, tree_num);
+        } else {
+          thread_suff_stats_right[i].IncrementSuffStat(dataset, residual.GetData(), tracker, obs_num, tree_num);
+        }
+      }
+    });
+
+    // Combine the thread-local sufficient statistics
+    for (int i = 0; i < num_threads; i++) {
+      node_suff_stat.AddSuffStatInplace(thread_suff_stats_node[i]);
+      left_suff_stat.AddSuffStatInplace(thread_suff_stats_left[i]);
+      right_suff_stat.AddSuffStatInplace(thread_suff_stats_right[i]);
+    }
+  } else {
+    for (int idx = node_begin_index; idx < node_end_index; idx++) {
+      int obs_num = feature_partition->indices_[idx];
+      double feature_value = dataset.CovariateValue(obs_num, split_feature);
+      node_suff_stat.IncrementSuffStat(dataset, residual.GetData(), tracker, obs_num, tree_num);
+      if (split.SplitTrue(feature_value)) {
+        left_suff_stat.IncrementSuffStat(dataset, residual.GetData(), tracker, obs_num, tree_num);
+      } else {
+        right_suff_stat.IncrementSuffStat(dataset, residual.GetData(), tracker, obs_num, tree_num);
+      }
     }
   }
 }
diff --git a/include/stochtree/log.h b/include/stochtree/log.h
index 3a4c5600..9f64c31b 100644
--- a/include/stochtree/log.h
+++ b/include/stochtree/log.h
@@ -15,8 +15,6 @@
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
-#include <exception>
-#include <iostream>
 #include <stdexcept>
 #include <string>
 
diff --git a/include/stochtree/meta.h b/include/stochtree/meta.h
index 991c254f..d0aa4049 100644
--- a/include/stochtree/meta.h
+++ b/include/stochtree/meta.h
@@ -14,7 +14,6 @@
 #include <cstdint>
 #include <functional>
 #include <limits>
-#include <memory>
 #include <unordered_map>
 #include <utility>
 #include <vector>
diff --git a/include/stochtree/openmp_utils.h b/include/stochtree/openmp_utils.h
new file mode 100644
index 00000000..28ed31fb
--- /dev/null
+++ b/include/stochtree/openmp_utils.h
@@ -0,0 +1,114 @@
+#ifndef STOCHTREE_OPENMP_UTILS_H
+#define STOCHTREE_OPENMP_UTILS_H
+
+#include <stochtree/common.h>
+#include <algorithm>
+
+namespace StochTree {
+
+#ifdef STOCHTREE_OPENMP_AVAILABLE
+
+#include <omp.h>
+#define STOCHTREE_HAS_OPENMP 1
+
+// OpenMP thread management
+inline int get_max_threads() {
+    return omp_get_max_threads();
+}
+
+inline int get_thread_num() {
+    return omp_get_thread_num();
+}
+
+inline int get_num_threads() {
+    return omp_get_num_threads();
+}
+
+inline void set_num_threads(int num_threads) {
+    omp_set_num_threads(num_threads);
+}
+    
+#define STOCHTREE_PARALLEL_FOR(num_threads) \
+    _Pragma("omp parallel for num_threads(num_threads)")
+
+#define STOCHTREE_REDUCTION_ADD(var) \
+    _Pragma("omp reduction(+:var)")
+
+#define STOCHTREE_CRITICAL \
+    _Pragma("omp critical")
+
+#else
+#define STOCHTREE_HAS_OPENMP 0
+
+// Fallback implementations when OpenMP is not available
+inline int get_max_threads() {return 1;}
+
+inline int get_thread_num() {return 0;}
+
+inline int get_num_threads() {return 1;}
+
+inline void set_num_threads(int num_threads) {}
+    
+#define STOCHTREE_PARALLEL_FOR(num_threads)
+    
+#define STOCHTREE_REDUCTION_ADD(var)
+
+#define STOCHTREE_CRITICAL
+
+#endif
+
+static int GetMaxThreads() {
+    return get_max_threads();
+}
+
+static int GetCurrentThreadNum() {
+    return get_thread_num();
+}
+    
+static int GetNumThreads() {
+    return get_num_threads();
+}
+    
+static void SetNumThreads(int num_threads) {
+    set_num_threads(num_threads);
+}
+    
+static bool IsOpenMPAvailable() {
+    return STOCHTREE_HAS_OPENMP;
+}
+    
+static int GetOptimalThreadCount(int workload_size, int min_work_per_thread = 1000) {
+    if (!IsOpenMPAvailable()) {
+        return 1;
+    }
+    
+    int max_threads = GetMaxThreads();
+    int optimal_threads = workload_size / min_work_per_thread;
+    
+    return std::min(optimal_threads, max_threads);
+}
+
+// Parallel execution utilities
+template<typename Func>
+void ParallelFor(int start, int end, int num_threads, Func func) {
+    if (num_threads <= 0) {
+        num_threads = GetOptimalThreadCount(end - start);
+    }
+    
+    if (num_threads == 1 || !STOCHTREE_HAS_OPENMP) {
+        // Sequential execution
+        for (int i = start; i < end; ++i) {
+            func(i);
+        }
+    } else {
+        // Parallel execution
+        STOCHTREE_PARALLEL_FOR(num_threads)
+        for (int i = start; i < end; ++i) {
+            func(i);
+        }
+    }
+}
+
+} // namespace StochTree
+
+#endif // STOCHTREE_OPENMP_UTILS_H 
\ No newline at end of file
diff --git a/include/stochtree/partition_tracker.h b/include/stochtree/partition_tracker.h
index 6546b593..0790d87a 100644
--- a/include/stochtree/partition_tracker.h
+++ b/include/stochtree/partition_tracker.h
@@ -28,13 +28,10 @@
 #include <stochtree/data.h>
 #include <stochtree/ensemble.h>
 #include <stochtree/log.h>
+#include <stochtree/openmp_utils.h>
 #include <stochtree/tree.h>
 
-#include <cmath>
 #include <numeric>
-#include <random>
-#include <set>
-#include <string>
 #include <vector>
 
 namespace StochTree {
@@ -68,7 +65,7 @@ class ForestTracker {
   void UpdateSampleTrackers(TreeEnsemble& forest, ForestDataset& dataset);
   void UpdateSampleTrackersResidual(TreeEnsemble& forest, ForestDataset& dataset, ColumnVector& residual, bool is_mean_model);
   void ResetRoot(Eigen::MatrixXd& covariates, std::vector<FeatureType>& feature_types, int32_t tree_num);
-  void AddSplit(Eigen::MatrixXd& covariates, TreeSplit& split, int32_t split_feature, int32_t tree_id, int32_t split_node_id, int32_t left_node_id, int32_t right_node_id, bool keep_sorted = false);
+  void AddSplit(Eigen::MatrixXd& covariates, TreeSplit& split, int32_t split_feature, int32_t tree_id, int32_t split_node_id, int32_t left_node_id, int32_t right_node_id, bool keep_sorted = false, int num_threads = -1);
   void RemoveSplit(Eigen::MatrixXd& covariates, Tree* tree, int32_t tree_id, int32_t split_node_id, int32_t left_node_id, int32_t right_node_id, bool keep_sorted = false);
   double GetSamplePrediction(data_size_t sample_id);
   double GetTreeSamplePrediction(data_size_t sample_id, int tree_id);
@@ -434,7 +431,7 @@ class UnsortedNodeSampleTracker {
   /*! \brief Number of trees */
   int NumTrees() { return num_trees_; }
 
-  /*! \brief Number of trees */
+  /*! \brief Return a pointer to the feature partition tracking tree i */
   FeatureUnsortedPartition* GetFeaturePartition(int i) { return feature_partitions_[i].get(); }
 
  private:
@@ -615,24 +612,24 @@ class SortedNodeSampleTracker {
   }
 
   /*! \brief Partition a node based on a new split rule */
-  void PartitionNode(Eigen::MatrixXd& covariates, int node_id, int feature_split, TreeSplit& split) {
-    for (int i = 0; i < num_features_; i++) {
+  void PartitionNode(Eigen::MatrixXd& covariates, int node_id, int feature_split, TreeSplit& split, int num_threads = -1) {
+    StochTree::ParallelFor(0, num_features_, num_threads, [&](int i) {
       feature_partitions_[i]->SplitFeature(covariates, node_id, feature_split, split);
-    }
+    });
   }
 
   /*! \brief Partition a node based on a new split rule */
-  void PartitionNode(Eigen::MatrixXd& covariates, int node_id, int feature_split, double split_value) {
-    for (int i = 0; i < num_features_; i++) {
+  void PartitionNode(Eigen::MatrixXd& covariates, int node_id, int feature_split, double split_value, int num_threads = -1) {
+    StochTree::ParallelFor(0, num_features_, num_threads, [&](int i) {
       feature_partitions_[i]->SplitFeatureNumeric(covariates, node_id, feature_split, split_value);
-    }
+    });
   }
 
   /*! \brief Partition a node based on a new split rule */
-  void PartitionNode(Eigen::MatrixXd& covariates, int node_id, int feature_split, std::vector<std::uint32_t> const& category_list) {
-    for (int i = 0; i < num_features_; i++) {
+  void PartitionNode(Eigen::MatrixXd& covariates, int node_id, int feature_split, std::vector<std::uint32_t> const& category_list, int num_threads = -1) {
+    StochTree::ParallelFor(0, num_features_, num_threads, [&](int i) {
       feature_partitions_[i]->SplitFeatureCategorical(covariates, node_id, feature_split, category_list);
-    }
+    });
   }
 
   /*! \brief First index of data points contained in node_id */
diff --git a/include/stochtree/random.h b/include/stochtree/random.h
index a841f396..3d39b647 100644
--- a/include/stochtree/random.h
+++ b/include/stochtree/random.h
@@ -5,7 +5,6 @@
 #ifndef STOCHTREE_RANDOM_H_
 #define STOCHTREE_RANDOM_H_
 
-#include <cstdint>
 #include <random>
 #include <set>
 #include <vector>
diff --git a/include/stochtree/random_effects.h b/include/stochtree/random_effects.h
index 701ebeaa..b322a560 100644
--- a/include/stochtree/random_effects.h
+++ b/include/stochtree/random_effects.h
@@ -17,14 +17,11 @@
 #include <nlohmann/json.hpp>
 #include <Eigen/Dense>
 
-#include <cmath>
 #include <fstream>
 #include <map>
 #include <memory>
 #include <random>
-#include <set>
 #include <string>
-#include <type_traits>
 #include <vector>
 
 namespace StochTree {
diff --git a/include/stochtree/tree.h b/include/stochtree/tree.h
index 85ce7191..3810e3cb 100644
--- a/include/stochtree/tree.h
+++ b/include/stochtree/tree.h
@@ -13,9 +13,6 @@
 #include <Eigen/Dense>
 
 #include <cstdint>
-#include <map>
-#include <optional>
-#include <set>
 #include <stack>
 #include <string>
 
diff --git a/include/stochtree/tree_sampler.h b/include/stochtree/tree_sampler.h
index 8810b938..68c9c15a 100644
--- a/include/stochtree/tree_sampler.h
+++ b/include/stochtree/tree_sampler.h
@@ -8,18 +8,13 @@
 #include <stochtree/discrete_sampler.h>
 #include <stochtree/ensemble.h>
 #include <stochtree/leaf_model.h>
+#include <stochtree/openmp_utils.h>
 #include <stochtree/partition_tracker.h>
 #include <stochtree/prior.h>
 
 #include <cmath>
-#include <map>
-#include <memory>
 #include <numeric>
 #include <random>
-#include <set>
-#include <string>
-#include <type_traits>
-#include <variant>
 #include <vector>
 
 namespace StochTree {
@@ -27,7 +22,7 @@ namespace StochTree {
 /*!
  * \defgroup sampling_group Forest Sampler API
  *
- * \brief Functions for sampling from a forest. The core interfce of these functions, 
+ * \brief Functions for sampling from a forest. The core interface of these functions, 
  * as used by the R, Python, and standalone C++ program, is defined by 
  * \ref MCMCSampleOneIter, which runs one iteration of the MCMC sampler for a 
  * given forest, and \ref GFRSampleOneIter, which runs one iteration of the 
@@ -152,7 +147,7 @@ static inline bool NodeNonConstant(ForestDataset& dataset, ForestTracker& tracke
 }
 
 static inline void AddSplitToModel(ForestTracker& tracker, ForestDataset& dataset, TreePrior& tree_prior, TreeSplit& split, std::mt19937& gen, Tree* tree, 
-                                   int tree_num, int leaf_node, int feature_split, bool keep_sorted = false) {
+                                   int tree_num, int leaf_node, int feature_split, bool keep_sorted = false, int num_threads = -1) {
   // Use zeros as a "temporary" leaf values since we draw leaf parameters after tree sampling is complete
   if (tree->OutputDimension() > 1) {
     std::vector<double> temp_leaf_values(tree->OutputDimension(), 0.);
@@ -165,7 +160,7 @@ static inline void AddSplitToModel(ForestTracker& tracker, ForestDataset& datase
   int right_node = tree->RightChild(leaf_node);
 
   // Update the ForestTracker
-  tracker.AddSplit(dataset.GetCovariates(), split, feature_split, tree_num, leaf_node, left_node, right_node, keep_sorted);
+  tracker.AddSplit(dataset.GetCovariates(), split, feature_split, tree_num, leaf_node, left_node, right_node, keep_sorted, num_threads);
 }
 
 static inline void RemoveSplitFromModel(ForestTracker& tracker, ForestDataset& dataset, TreePrior& tree_prior, std::mt19937& gen, Tree* tree, 
@@ -403,7 +398,7 @@ template <typename LeafModel, typename LeafSuffStat, typename... LeafSuffStatCon
 static inline std::tuple<double, double, data_size_t, data_size_t> EvaluateProposedSplit(
   ForestDataset& dataset, ForestTracker& tracker, ColumnVector& residual, LeafModel& leaf_model, 
   TreeSplit& split, int tree_num, int leaf_num, int split_feature, double global_variance, 
-  LeafSuffStatConstructorArgs&... leaf_suff_stat_args
+  int num_threads, LeafSuffStatConstructorArgs&... leaf_suff_stat_args
 ) {
   // Initialize sufficient statistics
   LeafSuffStat node_suff_stat = LeafSuffStat(leaf_suff_stat_args...);
@@ -411,8 +406,11 @@ static inline std::tuple<double, double, data_size_t, data_size_t> EvaluatePropo
   LeafSuffStat right_suff_stat = LeafSuffStat(leaf_suff_stat_args...);
 
   // Accumulate sufficient statistics
-  AccumulateSuffStatProposed<LeafSuffStat>(node_suff_stat, left_suff_stat, right_suff_stat, dataset, tracker, 
-                                           residual, global_variance, split, tree_num, leaf_num, split_feature);
+  AccumulateSuffStatProposed<LeafSuffStat, LeafSuffStatConstructorArgs...>(
+    node_suff_stat, left_suff_stat, right_suff_stat, dataset, tracker, 
+    residual, global_variance, split, tree_num, leaf_num, split_feature, num_threads, 
+    leaf_suff_stat_args...
+  );
   data_size_t left_n = left_suff_stat.n;
   data_size_t right_n = right_suff_stat.n;
 
@@ -469,140 +467,12 @@ static inline void AdjustStateAfterTreeSampling(ForestTracker& tracker, LeafMode
   }
 }
 
-template <typename LeafModel, typename LeafSuffStat, typename... LeafSuffStatConstructorArgs>
-static inline void EvaluateAllPossibleSplits(
-  ForestDataset& dataset, ForestTracker& tracker, ColumnVector& residual, TreePrior& tree_prior, std::mt19937& gen, LeafModel& leaf_model, double global_variance, int tree_num, int split_node_id, 
-  std::vector<double>& log_cutpoint_evaluations, std::vector<int>& cutpoint_features, std::vector<double>& cutpoint_values, std::vector<FeatureType>& cutpoint_feature_types, 
-  data_size_t& valid_cutpoint_count, CutpointGridContainer& cutpoint_grid_container, data_size_t node_begin, data_size_t node_end, std::vector<double>& variable_weights, 
-  std::vector<FeatureType>& feature_types, std::vector<bool>& feature_subset, LeafSuffStatConstructorArgs&... leaf_suff_stat_args
-) {
-  // Initialize sufficient statistics
-  LeafSuffStat node_suff_stat = LeafSuffStat(leaf_suff_stat_args...);
-  LeafSuffStat left_suff_stat = LeafSuffStat(leaf_suff_stat_args...);
-  LeafSuffStat right_suff_stat = LeafSuffStat(leaf_suff_stat_args...);
-
-  // Accumulate aggregate sufficient statistic for the node to be split
-  AccumulateSingleNodeSuffStat<LeafSuffStat, false>(node_suff_stat, dataset, tracker, residual, tree_num, split_node_id);
-
-  // Compute the "no split" log marginal likelihood
-  double no_split_log_ml = leaf_model.NoSplitLogMarginalLikelihood(node_suff_stat, global_variance);
-
-  // Unpack data
-  Eigen::MatrixXd covariates = dataset.GetCovariates();
-  Eigen::VectorXd outcome = residual.GetData();
-  Eigen::VectorXd var_weights;
-  bool has_weights = dataset.HasVarWeights();
-  if (has_weights) var_weights = dataset.GetVarWeights();
-  
-  // Minimum size of newly created leaf nodes (used to rule out invalid splits)
-  int32_t min_samples_in_leaf = tree_prior.GetMinSamplesLeaf();
-
-  // Compute sufficient statistics for each possible split
-  data_size_t num_cutpoints = 0;
-  bool valid_split = false;
-  data_size_t node_row_iter;
-  data_size_t current_bin_begin, current_bin_size, next_bin_begin;
-  data_size_t feature_sort_idx;
-  data_size_t row_iter_idx;
-  double outcome_val, outcome_val_sq;
-  FeatureType feature_type;
-  double feature_value = 0.0;
-  double cutoff_value = 0.0;
-  double log_split_eval = 0.0;
-  double split_log_ml;
-  for (int j = 0; j < covariates.cols(); j++) {
-
-    if ((std::abs(variable_weights.at(j)) > kEpsilon) && (feature_subset[j])) {
-      // Enumerate cutpoint strides
-      cutpoint_grid_container.CalculateStrides(covariates, outcome, tracker.GetSortedNodeSampleTracker(), split_node_id, node_begin, node_end, j, feature_types);
-      
-      // Reset sufficient statistics
-      left_suff_stat.ResetSuffStat();
-      right_suff_stat.ResetSuffStat();
-
-      // Iterate through possible cutpoints
-      int32_t num_feature_cutpoints = cutpoint_grid_container.NumCutpoints(j);
-      feature_type = feature_types[j];
-      // Since we partition an entire cutpoint bin to the left, we must stop one bin before the total number of cutpoint bins
-      for (data_size_t cutpoint_idx = 0; cutpoint_idx < (num_feature_cutpoints - 1); cutpoint_idx++) {
-        current_bin_begin = cutpoint_grid_container.BinStartIndex(cutpoint_idx, j);
-        current_bin_size = cutpoint_grid_container.BinLength(cutpoint_idx, j);
-        next_bin_begin = cutpoint_grid_container.BinStartIndex(cutpoint_idx + 1, j);
-
-        // Accumulate sufficient statistics for the left node
-        AccumulateCutpointBinSuffStat<LeafSuffStat>(left_suff_stat, tracker, cutpoint_grid_container, dataset, residual,
-                                                    global_variance, tree_num, split_node_id, j, cutpoint_idx);
-
-        // Compute the corresponding right node sufficient statistics
-        right_suff_stat.SubtractSuffStat(node_suff_stat, left_suff_stat);
-
-        // Store the bin index as the "cutpoint value" - we can use this to query the actual split 
-        // value or the set of split categories later on once a split is chose
-        cutoff_value = cutpoint_idx;
-
-        // Only include cutpoint for consideration if it defines a valid split in the training data
-        valid_split = (left_suff_stat.SampleGreaterThanEqual(min_samples_in_leaf) && 
-                      right_suff_stat.SampleGreaterThanEqual(min_samples_in_leaf));
-        if (valid_split) {
-          num_cutpoints++;
-          // Add to split rule vector
-          cutpoint_feature_types.push_back(feature_type);
-          cutpoint_features.push_back(j);
-          cutpoint_values.push_back(cutoff_value);
-          // Add the log marginal likelihood of the split to the split eval vector 
-          split_log_ml = leaf_model.SplitLogMarginalLikelihood(left_suff_stat, right_suff_stat, global_variance);
-          log_cutpoint_evaluations.push_back(split_log_ml);
-        }
-      }
-    }
-
-  }
-
-  // Add the log marginal likelihood of the "no-split" option (adjusted for tree prior and cutpoint size per the XBART paper)
-  cutpoint_features.push_back(-1);
-  cutpoint_values.push_back(std::numeric_limits<double>::max());
-  cutpoint_feature_types.push_back(FeatureType::kNumeric);
-  log_cutpoint_evaluations.push_back(no_split_log_ml);
-
-  // Update valid cutpoint count
-  valid_cutpoint_count = num_cutpoints;
-}
-
-template <typename LeafModel, typename LeafSuffStat, typename... LeafSuffStatConstructorArgs>
-static inline void EvaluateCutpoints(Tree* tree, ForestTracker& tracker, LeafModel& leaf_model, ForestDataset& dataset, ColumnVector& residual, TreePrior& tree_prior, 
-                                     std::mt19937& gen, int tree_num, double global_variance, int cutpoint_grid_size, int node_id, data_size_t node_begin, data_size_t node_end, 
-                                     std::vector<double>& log_cutpoint_evaluations, std::vector<int>& cutpoint_features, std::vector<double>& cutpoint_values, 
-                                     std::vector<FeatureType>& cutpoint_feature_types, data_size_t& valid_cutpoint_count, std::vector<double>& variable_weights, 
-                                     std::vector<FeatureType>& feature_types, CutpointGridContainer& cutpoint_grid_container, 
-                                     std::vector<bool>& feature_subset, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
-  // Evaluate all possible cutpoints according to the leaf node model, 
-  // recording their log-likelihood and other split information in a series of vectors.
-  // The last element of these vectors concerns the "no-split" option.
-  EvaluateAllPossibleSplits<LeafModel, LeafSuffStat, LeafSuffStatConstructorArgs...>(
-    dataset, tracker, residual, tree_prior, gen, leaf_model, global_variance, tree_num, node_id, log_cutpoint_evaluations, 
-    cutpoint_features, cutpoint_values, cutpoint_feature_types, valid_cutpoint_count, cutpoint_grid_container, 
-    node_begin, node_end, variable_weights, feature_types, feature_subset, leaf_suff_stat_args...
-  );
-  
-  // Compute an adjustment to reflect the no split prior probability and the number of cutpoints
-  double bart_prior_no_split_adj;
-  double alpha = tree_prior.GetAlpha();
-  double beta = tree_prior.GetBeta();
-  int node_depth = tree->GetDepth(node_id);
-  if (valid_cutpoint_count == 0) {
-    bart_prior_no_split_adj = std::log(((std::pow(1+node_depth, beta))/alpha) - 1.0);
-  } else {
-    bart_prior_no_split_adj = std::log(((std::pow(1+node_depth, beta))/alpha) - 1.0) + std::log(valid_cutpoint_count);
-  }
-  log_cutpoint_evaluations[log_cutpoint_evaluations.size()-1] += bart_prior_no_split_adj;
-}
-
 template <typename LeafModel, typename LeafSuffStat, typename... LeafSuffStatConstructorArgs>
 static inline void SampleSplitRule(Tree* tree, ForestTracker& tracker, LeafModel& leaf_model, ForestDataset& dataset, ColumnVector& residual, 
                                    TreePrior& tree_prior, std::mt19937& gen, int tree_num, double global_variance, int cutpoint_grid_size, 
                                    std::unordered_map<int, std::pair<data_size_t, data_size_t>>& node_index_map, std::deque<node_t>& split_queue, 
                                    int node_id, data_size_t node_begin, data_size_t node_end, std::vector<double>& variable_weights, 
-                                   std::vector<FeatureType>& feature_types, std::vector<bool> feature_subset, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
+                                   std::vector<FeatureType>& feature_types, std::vector<bool> feature_subset, int num_threads, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
   // Leaf depth
   int leaf_depth = tree->GetDepth(node_id);
 
@@ -610,41 +480,153 @@ static inline void SampleSplitRule(Tree* tree, ForestTracker& tracker, LeafModel
   int32_t max_depth = tree_prior.GetMaxDepth();
 
   if ((max_depth == -1) || (leaf_depth < max_depth)) {
-  
-    // Cutpoint enumeration
-    std::vector<double> log_cutpoint_evaluations;
-    std::vector<int> cutpoint_features;
-    std::vector<double> cutpoint_values;
-    std::vector<FeatureType> cutpoint_feature_types;
+
+    // Vector of vectors to store results for each feature
+    int p = dataset.NumCovariates();
+    std::vector<std::vector<double>> feature_log_cutpoint_evaluations(p+1);
+    std::vector<std::vector<double>> feature_cutpoint_values(p+1);
+    std::vector<int> feature_cutpoint_counts(p+1, 0);
     StochTree::data_size_t valid_cutpoint_count;
-    CutpointGridContainer cutpoint_grid_container(dataset.GetCovariates(), residual.GetData(), cutpoint_grid_size);
-    EvaluateCutpoints<LeafModel, LeafSuffStat, LeafSuffStatConstructorArgs...>(
-      tree, tracker, leaf_model, dataset, residual, tree_prior, gen, tree_num, global_variance,
-      cutpoint_grid_size, node_id, node_begin, node_end, log_cutpoint_evaluations, cutpoint_features, 
-      cutpoint_values, cutpoint_feature_types, valid_cutpoint_count, variable_weights, feature_types, 
-      cutpoint_grid_container, feature_subset, leaf_suff_stat_args...
-    );
-    // TODO: maybe add some checks here?
     
-    // Convert log marginal likelihood to marginal likelihood, normalizing by the maximum log-likelihood
-    double largest_mll = *std::max_element(log_cutpoint_evaluations.begin(), log_cutpoint_evaluations.end());
-    std::vector<double> cutpoint_evaluations(log_cutpoint_evaluations.size());
-    for (data_size_t i = 0; i < log_cutpoint_evaluations.size(); i++){
-      cutpoint_evaluations[i] = std::exp(log_cutpoint_evaluations[i] - largest_mll);
+    // Evaluate all possible cutpoints according to the leaf node model, 
+    // recording their log-likelihood and other split information in a series of vectors.
+
+    // Initialize node sufficient statistics
+    LeafSuffStat node_suff_stat = LeafSuffStat(leaf_suff_stat_args...);
+
+    // Accumulate aggregate sufficient statistic for the node to be split
+    AccumulateSingleNodeSuffStat<LeafSuffStat, false>(node_suff_stat, dataset, tracker, residual, tree_num, node_id);
+
+    // Compute the "no split" log marginal likelihood
+    double no_split_log_ml = leaf_model.NoSplitLogMarginalLikelihood(node_suff_stat, global_variance);
+
+    // Unpack data
+    Eigen::MatrixXd& covariates = dataset.GetCovariates();
+    Eigen::VectorXd& outcome = residual.GetData();
+    Eigen::VectorXd var_weights;
+    bool has_weights = dataset.HasVarWeights();
+    if (has_weights) var_weights = dataset.GetVarWeights();
+    
+    // Minimum size of newly created leaf nodes (used to rule out invalid splits)
+    int32_t min_samples_in_leaf = tree_prior.GetMinSamplesLeaf();
+
+    // Compute sufficient statistics for each possible split
+    data_size_t num_cutpoints = 0;
+    if (num_threads == -1) {
+      num_threads = GetOptimalThreadCount(static_cast<int>(covariates.cols() * covariates.rows()));
     }
+
+    // Initialize cutpoint grid container
+    CutpointGridContainer cutpoint_grid_container(covariates, outcome, cutpoint_grid_size);
     
-    // Sample the split (including a "no split" option)
-    std::discrete_distribution<data_size_t> split_dist(cutpoint_evaluations.begin(), cutpoint_evaluations.end());
-    data_size_t split_chosen = split_dist(gen);
+    // Evaluate all possible splits for each feature in parallel
+    StochTree::ParallelFor(0, covariates.cols(), num_threads, [&](int j) {
+      if ((std::abs(variable_weights.at(j)) > kEpsilon) && (feature_subset[j])) {
+        // Enumerate cutpoint strides
+        cutpoint_grid_container.CalculateStrides(covariates, outcome, tracker.GetSortedNodeSampleTracker(), node_id, node_begin, node_end, j, feature_types);
+        
+        // Left and right node sufficient statistics
+        LeafSuffStat left_suff_stat = LeafSuffStat(leaf_suff_stat_args...);
+        LeafSuffStat right_suff_stat = LeafSuffStat(leaf_suff_stat_args...);
+
+        // Iterate through possible cutpoints
+        int32_t num_feature_cutpoints = cutpoint_grid_container.NumCutpoints(j);
+        FeatureType feature_type = feature_types[j];
+        // Since we partition an entire cutpoint bin to the left, we must stop one bin before the total number of cutpoint bins
+        for (data_size_t cutpoint_idx = 0; cutpoint_idx < (num_feature_cutpoints - 1); cutpoint_idx++) {
+          data_size_t current_bin_begin = cutpoint_grid_container.BinStartIndex(cutpoint_idx, j);
+          data_size_t current_bin_size = cutpoint_grid_container.BinLength(cutpoint_idx, j);
+          data_size_t next_bin_begin = cutpoint_grid_container.BinStartIndex(cutpoint_idx + 1, j);
+
+          // Accumulate sufficient statistics for the left node
+          AccumulateCutpointBinSuffStat<LeafSuffStat>(left_suff_stat, tracker, cutpoint_grid_container, dataset, residual,
+                                                      global_variance, tree_num, node_id, j, cutpoint_idx);
+
+          // Compute the corresponding right node sufficient statistics
+          right_suff_stat.SubtractSuffStat(node_suff_stat, left_suff_stat);
+
+          // Store the bin index as the "cutpoint value" - we can use this to query the actual split 
+          // value or the set of split categories later on once a split is chose
+          double cutoff_value = cutpoint_idx;
+
+          // Only include cutpoint for consideration if it defines a valid split in the training data
+          bool valid_split = (left_suff_stat.SampleGreaterThanEqual(min_samples_in_leaf) && 
+                              right_suff_stat.SampleGreaterThanEqual(min_samples_in_leaf));
+          if (valid_split) {
+            feature_cutpoint_counts[j]++;
+            // Add to split rule vector
+            feature_cutpoint_values[j].push_back(cutoff_value);
+            // Add the log marginal likelihood of the split to the split eval vector 
+            double split_log_ml = leaf_model.SplitLogMarginalLikelihood(left_suff_stat, right_suff_stat, global_variance);
+            feature_log_cutpoint_evaluations[j].push_back(split_log_ml);
+          }
+        }
+      }
+    });
+
+    // Compute total number of cutpoints
+    valid_cutpoint_count = std::accumulate(feature_cutpoint_counts.begin(), feature_cutpoint_counts.end(), 0);
+
+    // Add the log marginal likelihood of the "no-split" option (adjusted for tree prior and cutpoint size per the XBART paper)
+    feature_log_cutpoint_evaluations[covariates.cols()].push_back(no_split_log_ml);
     
-    if (split_chosen == valid_cutpoint_count){
+    // Compute an adjustment to reflect the no split prior probability and the number of cutpoints
+    double bart_prior_no_split_adj;
+    double alpha = tree_prior.GetAlpha();
+    double beta = tree_prior.GetBeta();
+    int node_depth = tree->GetDepth(node_id);
+    if (valid_cutpoint_count == 0) {
+      bart_prior_no_split_adj = std::log(((std::pow(1+node_depth, beta))/alpha) - 1.0);
+    } else {
+      bart_prior_no_split_adj = std::log(((std::pow(1+node_depth, beta))/alpha) - 1.0) + std::log(valid_cutpoint_count);
+    }
+    feature_log_cutpoint_evaluations[covariates.cols()][0] += bart_prior_no_split_adj;
+
+    
+    // Convert log marginal likelihood to marginal likelihood, normalizing by the maximum log-likelihood
+    double largest_ml = -std::numeric_limits<double>::infinity();
+    for (int j = 0; j < p + 1; j++) {
+      if (feature_log_cutpoint_evaluations[j].size() > 0) {
+        double feature_max_ml = *std::max_element(feature_log_cutpoint_evaluations[j].begin(), feature_log_cutpoint_evaluations[j].end());;
+        largest_ml = std::max(largest_ml, feature_max_ml);
+      }
+    }
+    std::vector<std::vector<double>> feature_cutpoint_evaluations(p+1);
+    for (int j = 0; j < p + 1; j++) {
+      if (feature_log_cutpoint_evaluations[j].size() > 0) {
+        feature_cutpoint_evaluations[j].resize(feature_log_cutpoint_evaluations[j].size());
+        for (int i = 0; i < feature_log_cutpoint_evaluations[j].size(); i++) {
+          feature_cutpoint_evaluations[j][i] = std::exp(feature_log_cutpoint_evaluations[j][i] - largest_ml);
+        }
+      }
+    }
+
+    // Compute sum of marginal likelihoods for each feature
+    std::vector<double> feature_total_cutpoint_evaluations(p+1, 0.0);
+    for (int j = 0; j < p + 1; j++) {
+      if (feature_log_cutpoint_evaluations[j].size() > 0) {
+        feature_total_cutpoint_evaluations[j] = std::accumulate(feature_cutpoint_evaluations[j].begin(), feature_cutpoint_evaluations[j].end(), 0.0);
+      } else {
+        feature_total_cutpoint_evaluations[j] = 0.0;
+      }
+    }
+
+    // First, sample a feature according to feature_total_cutpoint_evaluations
+    std::discrete_distribution<data_size_t> feature_dist(feature_total_cutpoint_evaluations.begin(), feature_total_cutpoint_evaluations.end());
+    int feature_chosen = feature_dist(gen);
+
+    // Then, sample a cutpoint according to feature_cutpoint_evaluations[feature_chosen]
+    std::discrete_distribution<data_size_t> cutpoint_dist(feature_cutpoint_evaluations[feature_chosen].begin(), feature_cutpoint_evaluations[feature_chosen].end());
+    data_size_t cutpoint_chosen = cutpoint_dist(gen);
+    
+    if (feature_chosen == p){
       // "No split" sampled, don't split or add any nodes to split queue
       return;
     } else {
       // Split sampled
-      int feature_split = cutpoint_features[split_chosen];
-      FeatureType feature_type = cutpoint_feature_types[split_chosen];
-      double split_value = cutpoint_values[split_chosen];
+      int feature_split = feature_chosen;
+      FeatureType feature_type = feature_types[feature_split];
+      double split_value = feature_cutpoint_values[feature_split][cutpoint_chosen];
       // Perform all of the relevant "split" operations in the model, tree and training dataset
       
       // Compute node sample size
@@ -679,7 +661,7 @@ static inline void SampleSplitRule(Tree* tree, ForestTracker& tracker, LeafModel
       }
       
       // Add split to tree and trackers
-      AddSplitToModel(tracker, dataset, tree_prior, tree_split, gen, tree, tree_num, node_id, feature_split, true);
+      AddSplitToModel(tracker, dataset, tree_prior, tree_split, gen, tree, tree_num, node_id, feature_split, true, num_threads);
 
       // Determine the number of observation in the newly created left node
       int left_node = tree->LeftChild(node_id);
@@ -705,7 +687,7 @@ template <typename LeafModel, typename LeafSuffStat, typename... LeafSuffStatCon
 static inline void GFRSampleTreeOneIter(Tree* tree, ForestTracker& tracker, ForestContainer& forests, LeafModel& leaf_model, ForestDataset& dataset,
                                         ColumnVector& residual, TreePrior& tree_prior, std::mt19937& gen, std::vector<double>& variable_weights, 
                                         int tree_num, double global_variance, std::vector<FeatureType>& feature_types, int cutpoint_grid_size, 
-                                        int num_features_subsample, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
+                                        int num_features_subsample, int num_threads, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
   int root_id = Tree::kRoot;
   int curr_node_id;
   data_size_t curr_node_begin;
@@ -761,7 +743,8 @@ static inline void GFRSampleTreeOneIter(Tree* tree, ForestTracker& tracker, Fore
     SampleSplitRule<LeafModel, LeafSuffStat, LeafSuffStatConstructorArgs...>(
       tree, tracker, leaf_model, dataset, residual, tree_prior, gen, tree_num, global_variance, cutpoint_grid_size, 
       node_index_map, split_queue, curr_node_id, curr_node_begin, curr_node_end, variable_weights, feature_types, 
-      feature_subset, leaf_suff_stat_args...);
+      feature_subset, num_threads, leaf_suff_stat_args...
+    );
   }
 }
 
@@ -799,7 +782,7 @@ template <typename LeafModel, typename LeafSuffStat, typename... LeafSuffStatCon
 static inline void GFRSampleOneIter(TreeEnsemble& active_forest, ForestTracker& tracker, ForestContainer& forests, LeafModel& leaf_model, ForestDataset& dataset, 
                                     ColumnVector& residual, TreePrior& tree_prior, std::mt19937& gen, std::vector<double>& variable_weights, 
                                     std::vector<int>& sweep_update_indices, double global_variance, std::vector<FeatureType>& feature_types, int cutpoint_grid_size, 
-                                    bool keep_forest, bool pre_initialized, bool backfitting, int num_features_subsample, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
+                                    bool keep_forest, bool pre_initialized, bool backfitting, int num_features_subsample, int num_threads, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
   // Run the GFR algorithm for each tree
   int num_trees = forests.NumTrees();
   for (const int& i : sweep_update_indices) {
@@ -819,7 +802,7 @@ static inline void GFRSampleOneIter(TreeEnsemble& active_forest, ForestTracker&
     GFRSampleTreeOneIter<LeafModel, LeafSuffStat, LeafSuffStatConstructorArgs...>(
       tree, tracker, forests, leaf_model, dataset, residual, tree_prior, gen, 
       variable_weights, i, global_variance, feature_types, cutpoint_grid_size, 
-      num_features_subsample, leaf_suff_stat_args...
+      num_features_subsample, num_threads, leaf_suff_stat_args...
     );
     
     // Sample leaf parameters for tree i
@@ -841,7 +824,7 @@ static inline void GFRSampleOneIter(TreeEnsemble& active_forest, ForestTracker&
 template <typename LeafModel, typename LeafSuffStat, typename... LeafSuffStatConstructorArgs>
 static inline void MCMCGrowTreeOneIter(Tree* tree, ForestTracker& tracker, LeafModel& leaf_model, ForestDataset& dataset, ColumnVector& residual, 
                                        TreePrior& tree_prior, std::mt19937& gen, int tree_num, std::vector<double>& variable_weights, 
-                                       double global_variance, double prob_grow_old, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
+                                       double global_variance, double prob_grow_old, int num_threads, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
   // Extract dataset information
   data_size_t n = dataset.GetCovariates().rows();
 
@@ -886,7 +869,7 @@ static inline void MCMCGrowTreeOneIter(Tree* tree, ForestTracker& tracker, LeafM
 
     // Compute the marginal likelihood of split and no split, given the leaf prior
     std::tuple<double, double, int32_t, int32_t> split_eval = EvaluateProposedSplit<LeafModel, LeafSuffStat, LeafSuffStatConstructorArgs...>(
-      dataset, tracker, residual, leaf_model, split, tree_num, leaf_chosen, var_chosen, global_variance, leaf_suff_stat_args...
+      dataset, tracker, residual, leaf_model, split, tree_num, leaf_chosen, var_chosen, global_variance, num_threads, leaf_suff_stat_args...
     );
     double split_log_marginal_likelihood = std::get<0>(split_eval);
     double no_split_log_marginal_likelihood = std::get<1>(split_eval);
@@ -936,7 +919,7 @@ static inline void MCMCGrowTreeOneIter(Tree* tree, ForestTracker& tracker, LeafM
       double log_acceptance_prob = std::log(mh_accept(gen));
       if (log_acceptance_prob <= log_mh_ratio) {
         accept = true;
-        AddSplitToModel(tracker, dataset, tree_prior, split, gen, tree, tree_num, leaf_chosen, var_chosen, false);
+        AddSplitToModel(tracker, dataset, tree_prior, split, gen, tree, tree_num, leaf_chosen, var_chosen, false, num_threads);
       } else {
         accept = false;
       }
@@ -949,7 +932,7 @@ static inline void MCMCGrowTreeOneIter(Tree* tree, ForestTracker& tracker, LeafM
 
 template <typename LeafModel, typename LeafSuffStat, typename... LeafSuffStatConstructorArgs>
 static inline void MCMCPruneTreeOneIter(Tree* tree, ForestTracker& tracker, LeafModel& leaf_model, ForestDataset& dataset, ColumnVector& residual, 
-                                        TreePrior& tree_prior, std::mt19937& gen, int tree_num, double global_variance, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
+                                        TreePrior& tree_prior, std::mt19937& gen, int tree_num, double global_variance, int num_threads, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
   // Choose a "leaf parent" node at random
   int num_leaves = tree->NumLeaves();
   int num_leaf_parents = tree->NumLeafParents();
@@ -1028,7 +1011,7 @@ static inline void MCMCPruneTreeOneIter(Tree* tree, ForestTracker& tracker, Leaf
 template <typename LeafModel, typename LeafSuffStat, typename... LeafSuffStatConstructorArgs>
 static inline void MCMCSampleTreeOneIter(Tree* tree, ForestTracker& tracker, ForestContainer& forests, LeafModel& leaf_model, ForestDataset& dataset,
                                          ColumnVector& residual, TreePrior& tree_prior, std::mt19937& gen, std::vector<double>& variable_weights, 
-                                         int tree_num, double global_variance, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
+                                         int tree_num, double global_variance, int num_threads, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
   // Determine whether it is possible to grow any of the leaves
   bool grow_possible = false;
   std::vector<int> leaves = tree->GetLeaves();
@@ -1068,11 +1051,11 @@ static inline void MCMCSampleTreeOneIter(Tree* tree, ForestTracker& tracker, For
   
   if (step_chosen == 0) {
     MCMCGrowTreeOneIter<LeafModel, LeafSuffStat, LeafSuffStatConstructorArgs...>(
-      tree, tracker, leaf_model, dataset, residual, tree_prior, gen, tree_num, variable_weights, global_variance, prob_grow, leaf_suff_stat_args...
+      tree, tracker, leaf_model, dataset, residual, tree_prior, gen, tree_num, variable_weights, global_variance, prob_grow, num_threads, leaf_suff_stat_args...
     );
   } else {
     MCMCPruneTreeOneIter<LeafModel, LeafSuffStat, LeafSuffStatConstructorArgs...>(
-      tree, tracker, leaf_model, dataset, residual, tree_prior, gen, tree_num, global_variance, leaf_suff_stat_args...
+      tree, tracker, leaf_model, dataset, residual, tree_prior, gen, tree_num, global_variance, num_threads, leaf_suff_stat_args...
     );
   }
 }
@@ -1107,7 +1090,8 @@ static inline void MCMCSampleTreeOneIter(Tree* tree, ForestTracker& tracker, For
 template <typename LeafModel, typename LeafSuffStat, typename... LeafSuffStatConstructorArgs>
 static inline void MCMCSampleOneIter(TreeEnsemble& active_forest, ForestTracker& tracker, ForestContainer& forests, LeafModel& leaf_model, ForestDataset& dataset, 
                                      ColumnVector& residual, TreePrior& tree_prior, std::mt19937& gen, std::vector<double>& variable_weights, 
-                                     std::vector<int>& sweep_update_indices, double global_variance, bool keep_forest, bool pre_initialized, bool backfitting, LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
+                                     std::vector<int>& sweep_update_indices, double global_variance, bool keep_forest, bool pre_initialized, bool backfitting, int num_threads, 
+                                     LeafSuffStatConstructorArgs&... leaf_suff_stat_args) {
   // Run the MCMC algorithm for each tree
   int num_trees = forests.NumTrees();
   for (const int& i : sweep_update_indices) {
@@ -1122,7 +1106,7 @@ static inline void MCMCSampleOneIter(TreeEnsemble& active_forest, ForestTracker&
     tree = active_forest.GetTree(i);
     MCMCSampleTreeOneIter<LeafModel, LeafSuffStat, LeafSuffStatConstructorArgs...>(
       tree, tracker, forests, leaf_model, dataset, residual, tree_prior, gen, variable_weights, i, 
-      global_variance, leaf_suff_stat_args...
+      global_variance, num_threads, leaf_suff_stat_args...
     );
     
     // Sample leaf parameters for tree i
diff --git a/include/stochtree/variance_model.h b/include/stochtree/variance_model.h
index 79b8831f..b1c2dabe 100644
--- a/include/stochtree/variance_model.h
+++ b/include/stochtree/variance_model.h
@@ -12,11 +12,7 @@
 #include <stochtree/ig_sampler.h>
 #include <stochtree/meta.h>
 
-#include <cmath>
 #include <random>
-#include <set>
-#include <string>
-#include <vector>
 
 namespace StochTree {
 
diff --git a/man/ForestModel.Rd b/man/ForestModel.Rd
index 3bb7a1db..bec10621 100644
--- a/man/ForestModel.Rd
+++ b/man/ForestModel.Rd
@@ -92,6 +92,7 @@ Run a single iteration of the forest sampling algorithm (MCMC or GFR)
   rng,
   forest_model_config,
   global_model_config,
+  num_threads = -1,
   keep_forest = TRUE,
   gfr = TRUE
 )}\if{html}{\out{</div>}}
@@ -114,6 +115,8 @@ Run a single iteration of the forest sampling algorithm (MCMC or GFR)
 
 \item{\code{global_model_config}}{GlobalModelConfig object containing global model parameters and settings}
 
+\item{\code{num_threads}}{Number of threads to use in the GFR and MCMC algorithms, as well as prediction. If OpenMP is not available on a user's setup, this will default to \code{1}, otherwise to the maximum number of available threads.}
+
 \item{\code{keep_forest}}{(Optional) Whether the updated forest sample should be saved to \code{forest_samples}. Default: \code{TRUE}.}
 
 \item{\code{gfr}}{(Optional) Whether or not the forest should be sampled using the "grow-from-root" (GFR) algorithm. Default: \code{TRUE}.}
diff --git a/man/bart.Rd b/man/bart.Rd
index 78da3eda..66a9b9ad 100644
--- a/man/bart.Rd
+++ b/man/bart.Rd
@@ -90,6 +90,7 @@ that were not in the training set.}
 \item \code{rfx_group_parameter_prior_cov} Prior covariance matrix for the random effects "group parameters." Default: \code{NULL}. Must be a square matrix whose dimension matches the number of random effects bases, or a scalar value that will be expanded to a diagonal matrix.
 \item \code{rfx_variance_prior_shape} Shape parameter for the inverse gamma prior on the variance of the random effects "group parameter." Default: \code{1}.
 \item \code{rfx_variance_prior_scale} Scale parameter for the inverse gamma prior on the variance of the random effects "group parameter." Default: \code{1}.
+\item \code{num_threads} Number of threads to use in the GFR and MCMC algorithms, as well as prediction. If OpenMP is not available on a user's setup, this will default to \code{1}, otherwise to the maximum number of available threads.
 }}
 
 \item{mean_forest_params}{(Optional) A list of mean forest model parameters, each of which has a default value processed internally, so this argument list is optional.
diff --git a/setup.py b/setup.py
index c91678f5..affbefa0 100644
--- a/setup.py
+++ b/setup.py
@@ -54,6 +54,8 @@ def build_extension(self, ext: CMakeExtension) -> None:
             "-DBUILD_TEST=OFF", 
             "-DBUILD_DEBUG_TARGETS=OFF", 
             "-DBUILD_PYTHON=ON", 
+            "-DUSE_OPENMP=ON", 
+            "-DUSE_HOMEBREW_FALLBACK=ON", 
         ]
         build_args = []
         # Adding CMake arguments set as environment variable
diff --git a/src/Makevars b/src/Makevars
deleted file mode 100644
index 83bd4627..00000000
--- a/src/Makevars
+++ /dev/null
@@ -1,26 +0,0 @@
-# package root
-PKGROOT=..
-
-STOCHTREE_CPPFLAGS = -DSTOCHTREE_R_BUILD
-
-# PKG_CPPFLAGS= -I$(PKGROOT)/include -I$(PKGROOT)/deps/eigen -I$(PKGROOT)/deps/fmt/include -I$(PKGROOT)/deps/fast_double_parser/include -I$(PKGROOT)/deps/boost_math/include $(STOCHTREE_CPPFLAGS)
-PKG_CPPFLAGS= -I$(PKGROOT)/include -I$(PKGROOT)/deps/eigen -I$(PKGROOT)/deps/fmt/include -I$(PKGROOT)/deps/fast_double_parser/include $(STOCHTREE_CPPFLAGS)
-
-CXX_STD=CXX17
-
-OBJECTS = \
-    forest.o \
-    kernel.o \
-    R_data.o \
-    R_random_effects.o \
-    sampler.o \
-    serialization.o \
-    cpp11.o \
-    container.o \
-    cutpoint_candidates.o \
-    data.o \
-    io.o \
-    leaf_model.o \
-    partition_tracker.o \
-    random_effects.o \
-    tree.o
diff --git a/src/Makevars.in b/src/Makevars.in
new file mode 100644
index 00000000..4eb970cb
--- /dev/null
+++ b/src/Makevars.in
@@ -0,0 +1,39 @@
+CXX_STD=CXX17
+
+PKGROOT=..
+
+STOCHTREE_CPPFLAGS = \
+    @STOCHTREE_CPPFLAGS@ \
+    @OPENMP_AVAILABILITY_FLAGS@ \
+    -DSTOCHTREE_R_BUILD
+
+PKG_CPPFLAGS = \
+    -I$(PKGROOT)/include \
+    -I$(PKGROOT)/deps/eigen \
+    -I$(PKGROOT)/deps/fmt/include \
+    -I$(PKGROOT)/deps/fast_double_parser/include \
+    $(STOCHTREE_CPPFLAGS)
+
+PKG_CXXFLAGS = \
+    @OPENMP_CXXFLAGS@
+
+PKG_LIBS = \
+    @OPENMP_CXXFLAGS@ \
+    @OPENMP_LIB@
+
+OBJECTS = \
+    forest.o \
+    kernel.o \
+    R_data.o \
+    R_random_effects.o \
+    sampler.o \
+    serialization.o \
+    cpp11.o \
+    container.o \
+    cutpoint_candidates.o \
+    data.o \
+    io.o \
+    leaf_model.o \
+    partition_tracker.o \
+    random_effects.o \
+    tree.o
diff --git a/src/Makevars.win.in b/src/Makevars.win.in
new file mode 100644
index 00000000..95bff1dd
--- /dev/null
+++ b/src/Makevars.win.in
@@ -0,0 +1,39 @@
+CXX_STD = @CXX_STD@
+
+PKGROOT=..
+
+STOCHTREE_CPPFLAGS = \
+    @STOCHTREE_CPPFLAGS@ \
+    -DSTOCHTREE_R_BUILD
+
+PKG_CPPFLAGS = \
+    -I$(PKGROOT)/include \
+    -I$(PKGROOT)/deps/eigen \
+    -I$(PKGROOT)/deps/fmt/include \
+    -I$(PKGROOT)/deps/fast_double_parser/include \
+    $(STOCHTREE_CPPFLAGS)
+
+PKG_CXXFLAGS = \
+    ${SHLIB_OPENMP_CXXFLAGS} \
+    ${SHLIB_PTHREAD_FLAGS}
+
+PKG_LIBS = \
+    ${SHLIB_OPENMP_CXXFLAGS} \
+    ${SHLIB_PTHREAD_FLAGS}
+
+OBJECTS = \
+    forest.o \
+    kernel.o \
+    R_data.o \
+    R_random_effects.o \
+    sampler.o \
+    serialization.o \
+    cpp11.o \
+    container.o \
+    cutpoint_candidates.o \
+    data.o \
+    io.o \
+    leaf_model.o \
+    partition_tracker.o \
+    random_effects.o \
+    tree.o
diff --git a/src/R_data.cpp b/src/R_data.cpp
index c6c75c29..0f495436 100644
--- a/src/R_data.cpp
+++ b/src/R_data.cpp
@@ -4,7 +4,6 @@
 #include <stochtree/partition_tracker.h>
 #include <stochtree/tree_sampler.h>
 #include <memory>
-#include <vector>
 
 [[cpp11::register]]
 cpp11::external_pointer<StochTree::ForestDataset> create_forest_dataset_cpp() {
diff --git a/src/R_random_effects.cpp b/src/R_random_effects.cpp
index f627b3c5..e291121c 100644
--- a/src/R_random_effects.cpp
+++ b/src/R_random_effects.cpp
@@ -7,9 +7,7 @@
 #include <stochtree/random_effects.h>
 #include <stochtree/tree_sampler.h>
 #include <stochtree/variance_model.h>
-#include <functional>
 #include <memory>
-#include <vector>
 
 [[cpp11::register]]
 cpp11::external_pointer<StochTree::RandomEffectsContainer> rfx_container_cpp(int num_components, int num_groups) {
diff --git a/src/cpp11.cpp b/src/cpp11.cpp
index 873b0c25..67f79ab2 100644
--- a/src/cpp11.cpp
+++ b/src/cpp11.cpp
@@ -1076,18 +1076,18 @@ extern "C" SEXP _stochtree_compute_leaf_indices_cpp(SEXP forest_container, SEXP
   END_CPP11
 }
 // sampler.cpp
-void sample_gfr_one_iteration_cpp(cpp11::external_pointer<StochTree::ForestDataset> data, cpp11::external_pointer<StochTree::ColumnVector> residual, cpp11::external_pointer<StochTree::ForestContainer> forest_samples, cpp11::external_pointer<StochTree::TreeEnsemble> active_forest, cpp11::external_pointer<StochTree::ForestTracker> tracker, cpp11::external_pointer<StochTree::TreePrior> split_prior, cpp11::external_pointer<std::mt19937> rng, cpp11::integers sweep_indices, cpp11::integers feature_types, int cutpoint_grid_size, cpp11::doubles_matrix<> leaf_model_scale_input, cpp11::doubles variable_weights, double a_forest, double b_forest, double global_variance, int leaf_model_int, bool keep_forest, int num_features_subsample);
-extern "C" SEXP _stochtree_sample_gfr_one_iteration_cpp(SEXP data, SEXP residual, SEXP forest_samples, SEXP active_forest, SEXP tracker, SEXP split_prior, SEXP rng, SEXP sweep_indices, SEXP feature_types, SEXP cutpoint_grid_size, SEXP leaf_model_scale_input, SEXP variable_weights, SEXP a_forest, SEXP b_forest, SEXP global_variance, SEXP leaf_model_int, SEXP keep_forest, SEXP num_features_subsample) {
+void sample_gfr_one_iteration_cpp(cpp11::external_pointer<StochTree::ForestDataset> data, cpp11::external_pointer<StochTree::ColumnVector> residual, cpp11::external_pointer<StochTree::ForestContainer> forest_samples, cpp11::external_pointer<StochTree::TreeEnsemble> active_forest, cpp11::external_pointer<StochTree::ForestTracker> tracker, cpp11::external_pointer<StochTree::TreePrior> split_prior, cpp11::external_pointer<std::mt19937> rng, cpp11::integers sweep_indices, cpp11::integers feature_types, int cutpoint_grid_size, cpp11::doubles_matrix<> leaf_model_scale_input, cpp11::doubles variable_weights, double a_forest, double b_forest, double global_variance, int leaf_model_int, bool keep_forest, int num_features_subsample, int num_threads);
+extern "C" SEXP _stochtree_sample_gfr_one_iteration_cpp(SEXP data, SEXP residual, SEXP forest_samples, SEXP active_forest, SEXP tracker, SEXP split_prior, SEXP rng, SEXP sweep_indices, SEXP feature_types, SEXP cutpoint_grid_size, SEXP leaf_model_scale_input, SEXP variable_weights, SEXP a_forest, SEXP b_forest, SEXP global_variance, SEXP leaf_model_int, SEXP keep_forest, SEXP num_features_subsample, SEXP num_threads) {
   BEGIN_CPP11
-    sample_gfr_one_iteration_cpp(cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ForestDataset>>>(data), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ColumnVector>>>(residual), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ForestContainer>>>(forest_samples), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::TreeEnsemble>>>(active_forest), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ForestTracker>>>(tracker), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::TreePrior>>>(split_prior), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<std::mt19937>>>(rng), cpp11::as_cpp<cpp11::decay_t<cpp11::integers>>(sweep_indices), cpp11::as_cpp<cpp11::decay_t<cpp11::integers>>(feature_types), cpp11::as_cpp<cpp11::decay_t<int>>(cutpoint_grid_size), cpp11::as_cpp<cpp11::decay_t<cpp11::doubles_matrix<>>>(leaf_model_scale_input), cpp11::as_cpp<cpp11::decay_t<cpp11::doubles>>(variable_weights), cpp11::as_cpp<cpp11::decay_t<double>>(a_forest), cpp11::as_cpp<cpp11::decay_t<double>>(b_forest), cpp11::as_cpp<cpp11::decay_t<double>>(global_variance), cpp11::as_cpp<cpp11::decay_t<int>>(leaf_model_int), cpp11::as_cpp<cpp11::decay_t<bool>>(keep_forest), cpp11::as_cpp<cpp11::decay_t<int>>(num_features_subsample));
+    sample_gfr_one_iteration_cpp(cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ForestDataset>>>(data), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ColumnVector>>>(residual), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ForestContainer>>>(forest_samples), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::TreeEnsemble>>>(active_forest), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ForestTracker>>>(tracker), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::TreePrior>>>(split_prior), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<std::mt19937>>>(rng), cpp11::as_cpp<cpp11::decay_t<cpp11::integers>>(sweep_indices), cpp11::as_cpp<cpp11::decay_t<cpp11::integers>>(feature_types), cpp11::as_cpp<cpp11::decay_t<int>>(cutpoint_grid_size), cpp11::as_cpp<cpp11::decay_t<cpp11::doubles_matrix<>>>(leaf_model_scale_input), cpp11::as_cpp<cpp11::decay_t<cpp11::doubles>>(variable_weights), cpp11::as_cpp<cpp11::decay_t<double>>(a_forest), cpp11::as_cpp<cpp11::decay_t<double>>(b_forest), cpp11::as_cpp<cpp11::decay_t<double>>(global_variance), cpp11::as_cpp<cpp11::decay_t<int>>(leaf_model_int), cpp11::as_cpp<cpp11::decay_t<bool>>(keep_forest), cpp11::as_cpp<cpp11::decay_t<int>>(num_features_subsample), cpp11::as_cpp<cpp11::decay_t<int>>(num_threads));
     return R_NilValue;
   END_CPP11
 }
 // sampler.cpp
-void sample_mcmc_one_iteration_cpp(cpp11::external_pointer<StochTree::ForestDataset> data, cpp11::external_pointer<StochTree::ColumnVector> residual, cpp11::external_pointer<StochTree::ForestContainer> forest_samples, cpp11::external_pointer<StochTree::TreeEnsemble> active_forest, cpp11::external_pointer<StochTree::ForestTracker> tracker, cpp11::external_pointer<StochTree::TreePrior> split_prior, cpp11::external_pointer<std::mt19937> rng, cpp11::integers sweep_indices, cpp11::integers feature_types, int cutpoint_grid_size, cpp11::doubles_matrix<> leaf_model_scale_input, cpp11::doubles variable_weights, double a_forest, double b_forest, double global_variance, int leaf_model_int, bool keep_forest);
-extern "C" SEXP _stochtree_sample_mcmc_one_iteration_cpp(SEXP data, SEXP residual, SEXP forest_samples, SEXP active_forest, SEXP tracker, SEXP split_prior, SEXP rng, SEXP sweep_indices, SEXP feature_types, SEXP cutpoint_grid_size, SEXP leaf_model_scale_input, SEXP variable_weights, SEXP a_forest, SEXP b_forest, SEXP global_variance, SEXP leaf_model_int, SEXP keep_forest) {
+void sample_mcmc_one_iteration_cpp(cpp11::external_pointer<StochTree::ForestDataset> data, cpp11::external_pointer<StochTree::ColumnVector> residual, cpp11::external_pointer<StochTree::ForestContainer> forest_samples, cpp11::external_pointer<StochTree::TreeEnsemble> active_forest, cpp11::external_pointer<StochTree::ForestTracker> tracker, cpp11::external_pointer<StochTree::TreePrior> split_prior, cpp11::external_pointer<std::mt19937> rng, cpp11::integers sweep_indices, cpp11::integers feature_types, int cutpoint_grid_size, cpp11::doubles_matrix<> leaf_model_scale_input, cpp11::doubles variable_weights, double a_forest, double b_forest, double global_variance, int leaf_model_int, bool keep_forest, int num_threads);
+extern "C" SEXP _stochtree_sample_mcmc_one_iteration_cpp(SEXP data, SEXP residual, SEXP forest_samples, SEXP active_forest, SEXP tracker, SEXP split_prior, SEXP rng, SEXP sweep_indices, SEXP feature_types, SEXP cutpoint_grid_size, SEXP leaf_model_scale_input, SEXP variable_weights, SEXP a_forest, SEXP b_forest, SEXP global_variance, SEXP leaf_model_int, SEXP keep_forest, SEXP num_threads) {
   BEGIN_CPP11
-    sample_mcmc_one_iteration_cpp(cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ForestDataset>>>(data), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ColumnVector>>>(residual), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ForestContainer>>>(forest_samples), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::TreeEnsemble>>>(active_forest), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ForestTracker>>>(tracker), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::TreePrior>>>(split_prior), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<std::mt19937>>>(rng), cpp11::as_cpp<cpp11::decay_t<cpp11::integers>>(sweep_indices), cpp11::as_cpp<cpp11::decay_t<cpp11::integers>>(feature_types), cpp11::as_cpp<cpp11::decay_t<int>>(cutpoint_grid_size), cpp11::as_cpp<cpp11::decay_t<cpp11::doubles_matrix<>>>(leaf_model_scale_input), cpp11::as_cpp<cpp11::decay_t<cpp11::doubles>>(variable_weights), cpp11::as_cpp<cpp11::decay_t<double>>(a_forest), cpp11::as_cpp<cpp11::decay_t<double>>(b_forest), cpp11::as_cpp<cpp11::decay_t<double>>(global_variance), cpp11::as_cpp<cpp11::decay_t<int>>(leaf_model_int), cpp11::as_cpp<cpp11::decay_t<bool>>(keep_forest));
+    sample_mcmc_one_iteration_cpp(cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ForestDataset>>>(data), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ColumnVector>>>(residual), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ForestContainer>>>(forest_samples), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::TreeEnsemble>>>(active_forest), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::ForestTracker>>>(tracker), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<StochTree::TreePrior>>>(split_prior), cpp11::as_cpp<cpp11::decay_t<cpp11::external_pointer<std::mt19937>>>(rng), cpp11::as_cpp<cpp11::decay_t<cpp11::integers>>(sweep_indices), cpp11::as_cpp<cpp11::decay_t<cpp11::integers>>(feature_types), cpp11::as_cpp<cpp11::decay_t<int>>(cutpoint_grid_size), cpp11::as_cpp<cpp11::decay_t<cpp11::doubles_matrix<>>>(leaf_model_scale_input), cpp11::as_cpp<cpp11::decay_t<cpp11::doubles>>(variable_weights), cpp11::as_cpp<cpp11::decay_t<double>>(a_forest), cpp11::as_cpp<cpp11::decay_t<double>>(b_forest), cpp11::as_cpp<cpp11::decay_t<double>>(global_variance), cpp11::as_cpp<cpp11::decay_t<int>>(leaf_model_int), cpp11::as_cpp<cpp11::decay_t<bool>>(keep_forest), cpp11::as_cpp<cpp11::decay_t<int>>(num_threads));
     return R_NilValue;
   END_CPP11
 }
@@ -1684,8 +1684,8 @@ static const R_CallMethodDef CallEntries[] = {
     {"_stochtree_rng_cpp",                                             (DL_FUNC) &_stochtree_rng_cpp,                                              1},
     {"_stochtree_root_reset_active_forest_cpp",                        (DL_FUNC) &_stochtree_root_reset_active_forest_cpp,                         1},
     {"_stochtree_root_reset_rfx_tracker_cpp",                          (DL_FUNC) &_stochtree_root_reset_rfx_tracker_cpp,                           4},
-    {"_stochtree_sample_gfr_one_iteration_cpp",                        (DL_FUNC) &_stochtree_sample_gfr_one_iteration_cpp,                        18},
-    {"_stochtree_sample_mcmc_one_iteration_cpp",                       (DL_FUNC) &_stochtree_sample_mcmc_one_iteration_cpp,                       17},
+    {"_stochtree_sample_gfr_one_iteration_cpp",                        (DL_FUNC) &_stochtree_sample_gfr_one_iteration_cpp,                        19},
+    {"_stochtree_sample_mcmc_one_iteration_cpp",                       (DL_FUNC) &_stochtree_sample_mcmc_one_iteration_cpp,                       18},
     {"_stochtree_sample_sigma2_one_iteration_cpp",                     (DL_FUNC) &_stochtree_sample_sigma2_one_iteration_cpp,                      5},
     {"_stochtree_sample_tau_one_iteration_cpp",                        (DL_FUNC) &_stochtree_sample_tau_one_iteration_cpp,                         4},
     {"_stochtree_sample_without_replacement_integer_cpp",              (DL_FUNC) &_stochtree_sample_without_replacement_integer_cpp,               3},
diff --git a/src/cutpoint_candidates.cpp b/src/cutpoint_candidates.cpp
index 4a0845c7..e43b8219 100644
--- a/src/cutpoint_candidates.cpp
+++ b/src/cutpoint_candidates.cpp
@@ -2,7 +2,6 @@
 #include <stochtree/meta.h>
 
 #include <numeric>
-#include <tuple>
 
 namespace StochTree {
 
diff --git a/src/data.cpp b/src/data.cpp
index cd2913cf..e48e9255 100644
--- a/src/data.cpp
+++ b/src/data.cpp
@@ -1,7 +1,6 @@
 /*! Copyright (c) 2024 by stochtree authors */
 #include <Eigen/Dense>
 #include <stochtree/data.h>
-#include <iostream>
 
 namespace StochTree {
 
diff --git a/src/forest.cpp b/src/forest.cpp
index 02757aa7..968fe95c 100644
--- a/src/forest.cpp
+++ b/src/forest.cpp
@@ -7,9 +7,7 @@
 #include <stochtree/partition_tracker.h>
 #include <stochtree/tree_sampler.h>
 #include <stochtree/variance_model.h>
-#include <functional>
 #include <memory>
-#include <vector>
 
 [[cpp11::register]]
 cpp11::external_pointer<StochTree::TreeEnsemble> active_forest_cpp(int num_trees, int output_dimension = 1, bool is_leaf_constant = true, bool is_exponentiated = false) {
diff --git a/src/io.cpp b/src/io.cpp
index 1324957f..50774d9b 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -7,9 +7,7 @@
 #include <stochtree/io.h>
 #include <stochtree/log.h>
 
-#include <algorithm>
 #include <sstream>
-#include <unordered_map>
 
 namespace StochTree {
 
diff --git a/src/kernel.cpp b/src/kernel.cpp
index 6b5867bb..88f12c53 100644
--- a/src/kernel.cpp
+++ b/src/kernel.cpp
@@ -3,8 +3,6 @@
 #include <stochtree/log.h>
 #include <Eigen/Dense>
 #include <Eigen/Dense>
-#include <memory>
-#include <vector>
 
 typedef Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>> DoubleMatrixType;
 typedef Eigen::Map<Eigen::Matrix<int, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>> IntMatrixType;
diff --git a/src/partition_tracker.cpp b/src/partition_tracker.cpp
index cda214be..9d643380 100644
--- a/src/partition_tracker.cpp
+++ b/src/partition_tracker.cpp
@@ -6,12 +6,6 @@
 #include <stochtree/io.h>
 #include <stochtree/partition_tracker.h>
 
-#include <chrono>
-#include <cstdio>
-#include <limits>
-#include <sstream>
-#include <unordered_map>
-
 namespace StochTree {
 
 ForestTracker::ForestTracker(Eigen::MatrixXd& covariates, std::vector<FeatureType>& feature_types, int num_trees, int num_observations) {
@@ -282,11 +276,11 @@ void ForestTracker::UpdatePredictions(TreeEnsemble* ensemble, ForestDataset& dat
   }
 }
 
-void ForestTracker::AddSplit(Eigen::MatrixXd& covariates, TreeSplit& split, int32_t split_feature, int32_t tree_id, int32_t split_node_id, int32_t left_node_id, int32_t right_node_id, bool keep_sorted) {
+void ForestTracker::AddSplit(Eigen::MatrixXd& covariates, TreeSplit& split, int32_t split_feature, int32_t tree_id, int32_t split_node_id, int32_t left_node_id, int32_t right_node_id, bool keep_sorted, int num_threads) {
   sample_node_mapper_->AddSplit(covariates, split, split_feature, tree_id, split_node_id, left_node_id, right_node_id);
   unsorted_node_sample_tracker_->PartitionTreeNode(covariates, tree_id, split_node_id, left_node_id, right_node_id, split_feature, split);
   if (keep_sorted) {
-    sorted_node_sample_tracker_->PartitionNode(covariates, split_node_id, split_feature, split);
+    sorted_node_sample_tracker_->PartitionNode(covariates, split_node_id, split_feature, split, num_threads);
   }
 }
 
diff --git a/src/py_stochtree.cpp b/src/py_stochtree.cpp
index f90f5cc6..32bbd707 100644
--- a/src/py_stochtree.cpp
+++ b/src/py_stochtree.cpp
@@ -12,7 +12,6 @@
 #include <stochtree/variance_model.h>
 #include <functional>
 #include <memory>
-#include <string>
 
 #define STRINGIFY(x) #x
 #define MACRO_STRINGIFY(x) STRINGIFY(x)
@@ -1028,7 +1027,7 @@ class ForestSamplerCpp {
   void SampleOneIteration(ForestContainerCpp& forest_samples, ForestCpp& forest, ForestDatasetCpp& dataset, ResidualCpp& residual, RngCpp& rng, 
                           py::array_t<int> feature_types, py::array_t<int> sweep_update_indices, int cutpoint_grid_size, py::array_t<double> leaf_model_scale_input, 
                           py::array_t<double> variable_weights, double a_forest, double b_forest, double global_variance, 
-                          int leaf_model_int, int num_features_subsample, bool keep_forest = true, bool gfr = true) {
+                          int leaf_model_int, int num_features_subsample, bool keep_forest = true, bool gfr = true, int num_threads = -1) {
     // Refactoring completely out of the Python interface.
     // Intention to refactor out of the C++ interface in the future.
     bool pre_initialized = true;
@@ -1090,23 +1089,23 @@ class ForestSamplerCpp {
     std::mt19937* rng_ptr = rng.GetRng();
     if (gfr) {
       if (model_type == StochTree::ModelType::kConstantLeafGaussian) {
-        StochTree::GFRSampleOneIter<StochTree::GaussianConstantLeafModel, StochTree::GaussianConstantSuffStat>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::GaussianConstantLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_features_subsample);
+        StochTree::GFRSampleOneIter<StochTree::GaussianConstantLeafModel, StochTree::GaussianConstantSuffStat>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::GaussianConstantLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_features_subsample, num_threads);
       } else if (model_type == StochTree::ModelType::kUnivariateRegressionLeafGaussian) {
-        StochTree::GFRSampleOneIter<StochTree::GaussianUnivariateRegressionLeafModel, StochTree::GaussianUnivariateRegressionSuffStat>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::GaussianUnivariateRegressionLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_features_subsample);
+        StochTree::GFRSampleOneIter<StochTree::GaussianUnivariateRegressionLeafModel, StochTree::GaussianUnivariateRegressionSuffStat>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::GaussianUnivariateRegressionLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_features_subsample, num_threads);
       } else if (model_type == StochTree::ModelType::kMultivariateRegressionLeafGaussian) {
-        StochTree::GFRSampleOneIter<StochTree::GaussianMultivariateRegressionLeafModel, StochTree::GaussianMultivariateRegressionSuffStat, int>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::GaussianMultivariateRegressionLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_features_subsample, num_basis);
+        StochTree::GFRSampleOneIter<StochTree::GaussianMultivariateRegressionLeafModel, StochTree::GaussianMultivariateRegressionSuffStat, int>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::GaussianMultivariateRegressionLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_features_subsample, num_threads, num_basis);
       } else if (model_type == StochTree::ModelType::kLogLinearVariance) {
-        StochTree::GFRSampleOneIter<StochTree::LogLinearVarianceLeafModel, StochTree::LogLinearVarianceSuffStat>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::LogLinearVarianceLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, false, num_features_subsample);
+        StochTree::GFRSampleOneIter<StochTree::LogLinearVarianceLeafModel, StochTree::LogLinearVarianceSuffStat>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::LogLinearVarianceLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, false, num_features_subsample, num_threads);
       }
     } else {
       if (model_type == StochTree::ModelType::kConstantLeafGaussian) {
-        StochTree::MCMCSampleOneIter<StochTree::GaussianConstantLeafModel, StochTree::GaussianConstantSuffStat>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::GaussianConstantLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, keep_forest, pre_initialized, true);
+        StochTree::MCMCSampleOneIter<StochTree::GaussianConstantLeafModel, StochTree::GaussianConstantSuffStat>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::GaussianConstantLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, keep_forest, pre_initialized, true, num_threads);
       } else if (model_type == StochTree::ModelType::kUnivariateRegressionLeafGaussian) {
-        StochTree::MCMCSampleOneIter<StochTree::GaussianUnivariateRegressionLeafModel, StochTree::GaussianUnivariateRegressionSuffStat>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::GaussianUnivariateRegressionLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, keep_forest, pre_initialized, true);
+        StochTree::MCMCSampleOneIter<StochTree::GaussianUnivariateRegressionLeafModel, StochTree::GaussianUnivariateRegressionSuffStat>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::GaussianUnivariateRegressionLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, keep_forest, pre_initialized, true, num_threads);
       } else if (model_type == StochTree::ModelType::kMultivariateRegressionLeafGaussian) {
-        StochTree::MCMCSampleOneIter<StochTree::GaussianMultivariateRegressionLeafModel, StochTree::GaussianMultivariateRegressionSuffStat, int>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::GaussianMultivariateRegressionLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, keep_forest, pre_initialized, true, num_basis);
+        StochTree::MCMCSampleOneIter<StochTree::GaussianMultivariateRegressionLeafModel, StochTree::GaussianMultivariateRegressionSuffStat, int>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::GaussianMultivariateRegressionLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, keep_forest, pre_initialized, true, num_threads, num_basis);
       } else if (model_type == StochTree::ModelType::kLogLinearVariance) {
-        StochTree::MCMCSampleOneIter<StochTree::LogLinearVarianceLeafModel, StochTree::LogLinearVarianceSuffStat>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::LogLinearVarianceLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, keep_forest, pre_initialized, false);
+        StochTree::MCMCSampleOneIter<StochTree::LogLinearVarianceLeafModel, StochTree::LogLinearVarianceSuffStat>(*active_forest_ptr, *(tracker_.get()), *forest_sample_ptr, std::get<StochTree::LogLinearVarianceLeafModel>(leaf_model), *forest_data_ptr, *residual_data_ptr, *(split_prior_.get()), *rng_ptr, var_weights_vector, sweep_update_indices_, global_variance, keep_forest, pre_initialized, false, num_threads);
       }
     }
   }
diff --git a/src/sampler.cpp b/src/sampler.cpp
index af45d6d6..212ccb42 100644
--- a/src/sampler.cpp
+++ b/src/sampler.cpp
@@ -7,10 +7,7 @@
 #include <stochtree/partition_tracker.h>
 #include <stochtree/tree_sampler.h>
 #include <stochtree/variance_model.h>
-#include <functional>
 #include <memory>
-#include <variant>
-#include <vector>
 
 [[cpp11::register]]
 void sample_gfr_one_iteration_cpp(cpp11::external_pointer<StochTree::ForestDataset> data, 
@@ -26,7 +23,8 @@ void sample_gfr_one_iteration_cpp(cpp11::external_pointer<StochTree::ForestDatas
                                   cpp11::doubles variable_weights, 
                                   double a_forest, double b_forest,
                                   double global_variance, int leaf_model_int, 
-                                  bool keep_forest, int num_features_subsample
+                                  bool keep_forest, int num_features_subsample, 
+                                  int num_threads
 ) {
     // Refactoring completely out of the R interface.
     // Intention to refactor out of the C++ interface in the future.
@@ -84,13 +82,13 @@ void sample_gfr_one_iteration_cpp(cpp11::external_pointer<StochTree::ForestDatas
     
     // Run one iteration of the sampler
     if (model_type == StochTree::ModelType::kConstantLeafGaussian) {
-        StochTree::GFRSampleOneIter<StochTree::GaussianConstantLeafModel, StochTree::GaussianConstantSuffStat>(*active_forest, *tracker, *forest_samples, std::get<StochTree::GaussianConstantLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_features_subsample);
+        StochTree::GFRSampleOneIter<StochTree::GaussianConstantLeafModel, StochTree::GaussianConstantSuffStat>(*active_forest, *tracker, *forest_samples, std::get<StochTree::GaussianConstantLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_features_subsample, num_threads);
     } else if (model_type == StochTree::ModelType::kUnivariateRegressionLeafGaussian) {
-        StochTree::GFRSampleOneIter<StochTree::GaussianUnivariateRegressionLeafModel, StochTree::GaussianUnivariateRegressionSuffStat>(*active_forest, *tracker, *forest_samples, std::get<StochTree::GaussianUnivariateRegressionLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_features_subsample);
+        StochTree::GFRSampleOneIter<StochTree::GaussianUnivariateRegressionLeafModel, StochTree::GaussianUnivariateRegressionSuffStat>(*active_forest, *tracker, *forest_samples, std::get<StochTree::GaussianUnivariateRegressionLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_features_subsample, num_threads);
     } else if (model_type == StochTree::ModelType::kMultivariateRegressionLeafGaussian) {
-        StochTree::GFRSampleOneIter<StochTree::GaussianMultivariateRegressionLeafModel, StochTree::GaussianMultivariateRegressionSuffStat, int>(*active_forest, *tracker, *forest_samples, std::get<StochTree::GaussianMultivariateRegressionLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_features_subsample, num_basis);
+        StochTree::GFRSampleOneIter<StochTree::GaussianMultivariateRegressionLeafModel, StochTree::GaussianMultivariateRegressionSuffStat, int>(*active_forest, *tracker, *forest_samples, std::get<StochTree::GaussianMultivariateRegressionLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, true, num_features_subsample, num_threads, num_basis);
     } else if (model_type == StochTree::ModelType::kLogLinearVariance) {
-        StochTree::GFRSampleOneIter<StochTree::LogLinearVarianceLeafModel, StochTree::LogLinearVarianceSuffStat>(*active_forest, *tracker, *forest_samples, std::get<StochTree::LogLinearVarianceLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, false, num_features_subsample);
+        StochTree::GFRSampleOneIter<StochTree::LogLinearVarianceLeafModel, StochTree::LogLinearVarianceSuffStat>(*active_forest, *tracker, *forest_samples, std::get<StochTree::LogLinearVarianceLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, feature_types_, cutpoint_grid_size, keep_forest, pre_initialized, false, num_features_subsample, num_threads);
     }
 }
 
@@ -108,7 +106,7 @@ void sample_mcmc_one_iteration_cpp(cpp11::external_pointer<StochTree::ForestData
                                    cpp11::doubles variable_weights, 
                                    double a_forest, double b_forest,
                                    double global_variance, int leaf_model_int, 
-                                   bool keep_forest
+                                   bool keep_forest, int num_threads
 ) {
     // Refactoring completely out of the R interface.
     // Intention to refactor out of the C++ interface in the future.
@@ -166,13 +164,13 @@ void sample_mcmc_one_iteration_cpp(cpp11::external_pointer<StochTree::ForestData
     
     // Run one iteration of the sampler
     if (model_type == StochTree::ModelType::kConstantLeafGaussian) {
-        StochTree::MCMCSampleOneIter<StochTree::GaussianConstantLeafModel, StochTree::GaussianConstantSuffStat>(*active_forest, *tracker, *forest_samples, std::get<StochTree::GaussianConstantLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, keep_forest, pre_initialized, true);
+        StochTree::MCMCSampleOneIter<StochTree::GaussianConstantLeafModel, StochTree::GaussianConstantSuffStat>(*active_forest, *tracker, *forest_samples, std::get<StochTree::GaussianConstantLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, keep_forest, pre_initialized, true, num_threads);
     } else if (model_type == StochTree::ModelType::kUnivariateRegressionLeafGaussian) {
-        StochTree::MCMCSampleOneIter<StochTree::GaussianUnivariateRegressionLeafModel, StochTree::GaussianUnivariateRegressionSuffStat>(*active_forest, *tracker, *forest_samples, std::get<StochTree::GaussianUnivariateRegressionLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, keep_forest, pre_initialized, true);
+        StochTree::MCMCSampleOneIter<StochTree::GaussianUnivariateRegressionLeafModel, StochTree::GaussianUnivariateRegressionSuffStat>(*active_forest, *tracker, *forest_samples, std::get<StochTree::GaussianUnivariateRegressionLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, keep_forest, pre_initialized, true, num_threads);
     } else if (model_type == StochTree::ModelType::kMultivariateRegressionLeafGaussian) {
-        StochTree::MCMCSampleOneIter<StochTree::GaussianMultivariateRegressionLeafModel, StochTree::GaussianMultivariateRegressionSuffStat, int>(*active_forest, *tracker, *forest_samples, std::get<StochTree::GaussianMultivariateRegressionLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, keep_forest, pre_initialized, true, num_basis);
+        StochTree::MCMCSampleOneIter<StochTree::GaussianMultivariateRegressionLeafModel, StochTree::GaussianMultivariateRegressionSuffStat, int>(*active_forest, *tracker, *forest_samples, std::get<StochTree::GaussianMultivariateRegressionLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, keep_forest, pre_initialized, true, num_threads, num_basis);
     } else if (model_type == StochTree::ModelType::kLogLinearVariance) {
-        StochTree::MCMCSampleOneIter<StochTree::LogLinearVarianceLeafModel, StochTree::LogLinearVarianceSuffStat>(*active_forest, *tracker, *forest_samples, std::get<StochTree::LogLinearVarianceLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, keep_forest, pre_initialized, false);
+        StochTree::MCMCSampleOneIter<StochTree::LogLinearVarianceLeafModel, StochTree::LogLinearVarianceSuffStat>(*active_forest, *tracker, *forest_samples, std::get<StochTree::LogLinearVarianceLeafModel>(leaf_model), *data, *residual, *split_prior, *rng, var_weights_vector, sweep_indices_, global_variance, keep_forest, pre_initialized, false, num_threads);
     }
 }
 
diff --git a/src/serialization.cpp b/src/serialization.cpp
index 749395e8..fb248f62 100644
--- a/src/serialization.cpp
+++ b/src/serialization.cpp
@@ -8,9 +8,6 @@
 #include <stochtree/tree_sampler.h>
 #include <stochtree/variance_model.h>
 #include <nlohmann/json.hpp>
-#include <functional>
-#include <memory>
-#include <vector>
 
 [[cpp11::register]]
 cpp11::external_pointer<nlohmann::json> init_json_cpp() {
diff --git a/src/tree.cpp b/src/tree.cpp
index fa6fd8f8..3bc85c74 100644
--- a/src/tree.cpp
+++ b/src/tree.cpp
@@ -8,9 +8,6 @@
 #include <stochtree/tree.h>
 
 #include <algorithm>
-#include <cmath>
-#include <type_traits>
-#include <sstream>
 
 namespace StochTree {
 
diff --git a/stochtree/bart.py b/stochtree/bart.py
index 8554d64e..1f65ea17 100644
--- a/stochtree/bart.py
+++ b/stochtree/bart.py
@@ -138,6 +138,7 @@ def sample(
             * `rfx_group_parameter_prior_cov`: Prior covariance matrix for the random effects "group parameters." Default: `None`. Must be a square numpy matrix whose dimension matches the number of random effects bases, or a scalar value that will be expanded to a diagonal matrix.
             * `rfx_variance_prior_shape`: Shape parameter for the inverse gamma prior on the variance of the random effects "group parameter." Default: `1`.
             * `rfx_variance_prior_scale`: Scale parameter for the inverse gamma prior on the variance of the random effects "group parameter." Default: `1`.
+            * `num_threads`: Number of threads to use in the GFR and MCMC algorithms, as well as prediction. If OpenMP is not available on a user's setup, this will default to `1`, otherwise to the maximum number of available threads.
 
         mean_forest_params : dict, optional
             Dictionary of mean forest model parameters, each of which has a default value processed internally, so this argument is optional.
@@ -202,6 +203,7 @@ def sample(
             "rfx_group_parameter_prior_cov": None,
             "rfx_variance_prior_shape": 1.0,
             "rfx_variance_prior_scale": 1.0,
+            "num_threads": -1,
         }
         general_params_updated = _preprocess_params(
             general_params_default, general_params
@@ -266,6 +268,7 @@ def sample(
         rfx_group_parameter_prior_cov = general_params_updated["rfx_group_parameter_prior_cov"]
         rfx_variance_prior_shape = general_params_updated["rfx_variance_prior_shape"]
         rfx_variance_prior_scale = general_params_updated["rfx_variance_prior_scale"]
+        num_threads = general_params_updated["num_threads"]
 
         # 2. Mean forest parameters
         num_trees_mean = mean_forest_params_updated["num_trees"]
@@ -1226,6 +1229,7 @@ def sample(
                         forest_model_config_mean,
                         keep_sample,
                         True,
+                        num_threads,
                     )
 
                     # Cache train set predictions since they are already computed during sampling
@@ -1244,6 +1248,7 @@ def sample(
                         forest_model_config_variance,
                         keep_sample,
                         True,
+                        num_threads,
                     )
 
                     # Cache train set predictions since they are already computed during sampling
@@ -1426,6 +1431,7 @@ def sample(
                             forest_model_config_mean,
                             keep_sample,
                             False,
+                            num_threads,
                         )
 
                         if keep_sample:
@@ -1443,6 +1449,7 @@ def sample(
                             forest_model_config_variance,
                             keep_sample,
                             False,
+                            num_threads,
                         )
 
                         if keep_sample:
@@ -1684,11 +1691,11 @@ def predict(
 
         has_mean_predictions = self.include_mean_forest or self.has_rfx
         if has_mean_predictions and self.include_variance_forest:
-            return (mean_pred, variance_pred)
+            return {"y_hat": mean_pred, "variance_forest_predictions": variance_pred}
         elif has_mean_predictions and not self.include_variance_forest:
-            return mean_pred
+            return {"y_hat": mean_pred, "variance_forest_predictions": None}
         elif not has_mean_predictions and self.include_variance_forest:
-            return variance_pred
+            return {"y_hat": None, "variance_forest_predictions": variance_pred}
 
     def predict_mean(
         self,
diff --git a/stochtree/bcf.py b/stochtree/bcf.py
index 0ccd9e31..bfe9cc34 100644
--- a/stochtree/bcf.py
+++ b/stochtree/bcf.py
@@ -158,7 +158,7 @@ def sample(
             * `rfx_group_parameter_prior_cov`: Prior covariance matrix for the random effects "group parameters." Default: `None`. Must be a square numpy matrix whose dimension matches the number of random effects bases, or a scalar value that will be expanded to a diagonal matrix.
             * `rfx_variance_prior_shape`: Shape parameter for the inverse gamma prior on the variance of the random effects "group parameter." Default: `1`.
             * `rfx_variance_prior_scale`: Scale parameter for the inverse gamma prior on the variance of the random effects "group parameter." Default: `1`.
-
+            * `num_threads`: Number of threads to use in the GFR and MCMC algorithms, as well as prediction. If OpenMP is not available on a user's setup, this will default to `1`, otherwise to the maximum number of available threads.
 
         prognostic_forest_params : dict, optional
             Dictionary of prognostic forest model parameters, each of which has a default value processed internally, so this argument is optional.
@@ -240,6 +240,7 @@ def sample(
             "rfx_group_parameter_prior_cov": None,
             "rfx_variance_prior_shape": 1.0,
             "rfx_variance_prior_scale": 1.0,
+            "num_threads": -1,
         }
         general_params_updated = _preprocess_params(
             general_params_default, general_params
@@ -328,6 +329,7 @@ def sample(
         rfx_group_parameter_prior_cov = general_params_updated["rfx_group_parameter_prior_cov"]
         rfx_variance_prior_shape = general_params_updated["rfx_variance_prior_shape"]
         rfx_variance_prior_scale = general_params_updated["rfx_variance_prior_scale"]
+        num_threads = general_params_updated["num_threads"]
 
         # 2. Mu forest parameters
         num_trees_mu = prognostic_forest_params_updated["num_trees"]
@@ -1735,6 +1737,7 @@ def sample(
                     forest_model_config_mu,
                     keep_sample,
                     True,
+                    num_threads,
                 )
 
                 # Cache train set predictions since they are already computed during sampling
@@ -1772,6 +1775,7 @@ def sample(
                     forest_model_config_tau,
                     keep_sample,
                     True,
+                    num_threads,
                 )
 
                 # Cannot cache train set predictions for tau because the cached predictions in the 
@@ -1833,6 +1837,7 @@ def sample(
                         forest_model_config_variance,
                         keep_sample,
                         True,
+                        num_threads,
                     )
 
                     # Cache train set predictions since they are already computed during sampling
@@ -1928,6 +1933,7 @@ def sample(
                     forest_model_config_mu,
                     keep_sample,
                     False,
+                    num_threads,
                 )
 
                 # Cache train set predictions since they are already computed during sampling
@@ -1965,6 +1971,7 @@ def sample(
                     forest_model_config_tau,
                     keep_sample,
                     False,
+                    num_threads,
                 )
 
                 # Cannot cache train set predictions for tau because the cached predictions in the 
@@ -2026,6 +2033,7 @@ def sample(
                         forest_model_config_variance,
                         keep_sample,
                         True,
+                        num_threads,
                     )
 
                     # Cache train set predictions since they are already computed during sampling
@@ -2231,23 +2239,6 @@ def predict_tau(
         # Data checks
         if Z.shape[0] != X.shape[0]:
             raise ValueError("X and Z must have the same number of rows")
-        if propensity is not None:
-            if propensity.shape[0] != X.shape[0]:
-                raise ValueError("X and propensity must have the same number of rows")
-        else:
-            if self.propensity_covariate == "tau":
-                if not self.internal_propensity_model:
-                    raise ValueError(
-                        "Propensity scores not provided, but no propensity model was trained during sampling"
-                    )
-                else:
-                    propensity = np.mean(
-                        self.bart_propensity_model.predict(X), axis=1, keepdims=True
-                    )
-            else:
-                # Dummy propensities if not provided but also not needed
-                propensity = np.ones(X.shape[0])
-                propensity = np.expand_dims(propensity, 1)
 
         # Covariate preprocessing
         if not self._covariate_preprocessor._check_is_fitted():
@@ -2269,6 +2260,22 @@ def predict_tau(
                 covariates_processed = X
         else:
             covariates_processed = self._covariate_preprocessor.transform(X)
+        
+        # Handle propensities
+        if propensity is not None:
+            if propensity.shape[0] != X.shape[0]:
+                raise ValueError("X and propensity must have the same number of rows")
+        else:
+            if self.propensity_covariate != "none":
+                if not self.internal_propensity_model:
+                    raise ValueError(
+                        "Propensity scores not provided, but no propensity model was trained during sampling"
+                    )
+                else:
+                    internal_propensity_preds = self.bart_propensity_model.predict(covariates_processed)
+                    propensity = np.mean(
+                        internal_propensity_preds['y_hat'], axis=1, keepdims=True
+                    )
 
         # Update covariates to include propensities if requested
         if self.propensity_covariate == "none":
@@ -2354,6 +2361,22 @@ def predict_variance(
                 covariates_processed = covariates
         else:
             covariates_processed = self._covariate_preprocessor.transform(covariates)
+        
+        # Handle propensities
+        if propensity is not None:
+            if propensity.shape[0] != covariates.shape[0]:
+                raise ValueError("X and propensity must have the same number of rows")
+        else:
+            if self.propensity_covariate != "none":
+                if not self.internal_propensity_model:
+                    raise ValueError(
+                        "Propensity scores not provided, but no propensity model was trained during sampling"
+                    )
+                else:
+                    internal_propensity_preds = self.bart_propensity_model.predict(covariates_processed)
+                    propensity = np.mean(
+                        internal_propensity_preds['y_hat'], axis=1, keepdims=True
+                    )
 
         # Update covariates to include propensities if requested
         if self.propensity_covariate == "none":
@@ -2394,7 +2417,7 @@ def predict(
         propensity: np.array = None,
         rfx_group_ids: np.array = None,
         rfx_basis: np.array = None,
-    ) -> tuple:
+    ) -> dict:
         """Predict outcome model components (CATE function and prognostic function) as well as overall outcome for every provided observation.
         Predicted outcomes are computed as `yhat = mu_x + Z*tau_x` where mu_x is a sample of the prognostic function and tau_x is a sample of the treatment effect (CATE) function.
 
@@ -2447,19 +2470,6 @@ def predict(
         # Data checks
         if Z.shape[0] != X.shape[0]:
             raise ValueError("X and Z must have the same number of rows")
-        if propensity is not None:
-            if propensity.shape[0] != X.shape[0]:
-                raise ValueError("X and propensity must have the same number of rows")
-        else:
-            if self.propensity_covariate != "none":
-                if not self.internal_propensity_model:
-                    raise ValueError(
-                        "Propensity scores not provided, but no propensity model was trained during sampling"
-                    )
-                else:
-                    propensity = np.mean(
-                        self.bart_propensity_model.predict(X), axis=1, keepdims=True
-                    )
 
         # Covariate preprocessing
         if not self._covariate_preprocessor._check_is_fitted():
@@ -2481,6 +2491,22 @@ def predict(
                 covariates_processed = X
         else:
             covariates_processed = self._covariate_preprocessor.transform(X)
+        
+        # Handle propensities
+        if propensity is not None:
+            if propensity.shape[0] != X.shape[0]:
+                raise ValueError("X and propensity must have the same number of rows")
+        else:
+            if self.propensity_covariate != "none":
+                if not self.internal_propensity_model:
+                    raise ValueError(
+                        "Propensity scores not provided, but no propensity model was trained during sampling"
+                    )
+                else:
+                    internal_propensity_preds = self.bart_propensity_model.predict(covariates_processed)
+                    propensity = np.mean(
+                        internal_propensity_preds['y_hat'], axis=1, keepdims=True
+                    )
 
         # Update covariates to include propensities if requested
         if self.propensity_covariate == "none":
@@ -2535,13 +2561,13 @@ def predict(
 
         # Return result matrices as a tuple
         if self.has_rfx and self.include_variance_forest:
-            return (tau_x, mu_x, rfx_preds, yhat_x, sigma2_x)
+            return {"mu_hat": mu_x, "tau_hat": tau_x, "y_hat": yhat_x, "rfx_predictions": rfx_preds, "variance_forest_predictions": sigma2_x}
         elif not self.has_rfx and self.include_variance_forest:
-            return (tau_x, mu_x, yhat_x, sigma2_x)
+            return {"mu_hat": mu_x, "tau_hat": tau_x, "y_hat": yhat_x, "rfx_predictions": None, "variance_forest_predictions": sigma2_x}
         elif self.has_rfx and not self.include_variance_forest:
-            return (tau_x, mu_x, rfx_preds, yhat_x)
+            return {"mu_hat": mu_x, "tau_hat": tau_x, "y_hat": yhat_x, "rfx_predictions": rfx_preds, "variance_forest_predictions": None}
         else:
-            return (tau_x, mu_x, yhat_x)
+            return {"mu_hat": mu_x, "tau_hat": tau_x, "y_hat": yhat_x, "rfx_predictions": None, "variance_forest_predictions": None}
 
     def to_json(self) -> str:
         """
diff --git a/stochtree/sampler.py b/stochtree/sampler.py
index 8ac4f013..cbab9ce6 100644
--- a/stochtree/sampler.py
+++ b/stochtree/sampler.py
@@ -103,6 +103,7 @@ def sample_one_iteration(
         forest_config: ForestModelConfig,
         keep_forest: bool,
         gfr: bool,
+        num_threads: int = -1,
     ) -> None:
         """
         Sample one iteration of a forest using the specified model and tree sampling algorithm
@@ -127,6 +128,8 @@ def sample_one_iteration(
             Whether or not the resulting forest should be retained in `forest_container` or discarded (due to burnin or thinning for example)
         gfr : bool
             Whether or not the "grow-from-root" (GFR) sampler is run (if this is `True` and `leaf_model_int=0` this is equivalent to XBART, if this is `FALSE` and `leaf_model_int=0` this is equivalent to the original BART)
+        num_threads : int
+            Number of threads to use in the GFR and MCMC algorithms, as well as prediction. If OpenMP is not available on a user's system, this will default to `1`, otherwise to the maximum number of available threads.
         """
         # Ensure forest has been initialized
         if forest.is_empty():
@@ -173,6 +176,7 @@ def sample_one_iteration(
             forest_config.get_num_features_subsample(),
             keep_forest,
             gfr,
+            num_threads, 
         )
 
     def prepare_for_sampler(
diff --git a/test/cpp/test_model.cpp b/test/cpp/test_model.cpp
index dbe85b7e..184c2552 100644
--- a/test/cpp/test_model.cpp
+++ b/test/cpp/test_model.cpp
@@ -1,259 +1,259 @@
-#include <gtest/gtest.h>
-#include <testutils.h>
-#include <stochtree/cutpoint_candidates.h>
-#include <stochtree/leaf_model.h>
-#include <stochtree/log.h>
-#include <stochtree/partition_tracker.h>
-#include <stochtree/tree_sampler.h>
-#include <iostream>
-#include <memory>
-#include <vector>
-
-TEST(LeafConstantModel, FullEnumeration) {
-  // Load test data
-  StochTree::TestUtils::TestDataset test_dataset;
-  test_dataset = StochTree::TestUtils::LoadSmallDatasetUnivariateBasis();
-  std::vector<StochTree::FeatureType> feature_types(test_dataset.x_cols, StochTree::FeatureType::kNumeric);
-  std::vector<double> variable_weights(test_dataset.x_cols, 1./test_dataset.x_cols);
-  std::vector<bool> feature_subset(test_dataset.x_cols, true);
-  std::random_device rd;
-  std::mt19937 gen(rd());
-
-  // Construct datasets
-  using data_size_t = StochTree::data_size_t;
-  data_size_t n = test_dataset.n;
-  int p = test_dataset.x_cols;
-  StochTree::ForestDataset dataset = StochTree::ForestDataset();
-  dataset.AddCovariates(test_dataset.covariates.data(), n, test_dataset.x_cols, test_dataset.row_major);
-  StochTree::ColumnVector residual = StochTree::ColumnVector(test_dataset.outcome.data(), n);
-
-  // Construct a ForestTracker
-  int num_trees = 1;
-  StochTree::ForestTracker tracker = StochTree::ForestTracker(dataset.GetCovariates(), feature_types, num_trees, n);
-
-  // Set sampling parameters
-  double alpha = 0.95;
-  double beta = 1.25;
-  int min_samples_leaf = 1;
-  int max_depth = -1;
-  double global_variance = 1.;
-  double tau = 1.;
-  int cutpoint_grid_size = n;
-  StochTree::TreePrior tree_prior = StochTree::TreePrior(alpha, beta, min_samples_leaf, max_depth);
-
-  // Construct temporary data structures needed to enumerate splits
-  std::vector<double> log_cutpoint_evaluations;
-  std::vector<int> cutpoint_features;
-  std::vector<double> cutpoint_values;
-  std::vector<StochTree::FeatureType> cutpoint_feature_types;
-  StochTree::data_size_t valid_cutpoint_count = 0;
-  StochTree::CutpointGridContainer cutpoint_grid_container(dataset.GetCovariates(), residual.GetData(), cutpoint_grid_size);
-
-  // Initialize a leaf model
-  StochTree::GaussianConstantLeafModel leaf_model = StochTree::GaussianConstantLeafModel(tau);
-
-  // Evaluate all possible cutpoints
-  StochTree::EvaluateAllPossibleSplits<StochTree::GaussianConstantLeafModel, StochTree::GaussianConstantSuffStat>(
-    dataset, tracker, residual, tree_prior, gen, leaf_model, global_variance, 0, 0, log_cutpoint_evaluations, cutpoint_features, cutpoint_values, 
-    cutpoint_feature_types, valid_cutpoint_count, cutpoint_grid_container, 0, n, variable_weights, feature_types, feature_subset
-  );
-
-  // Check that there are (n - 2*min_samples_leaf + 1)*p + 1 cutpoints considered
-  ASSERT_EQ(log_cutpoint_evaluations.size(), (n - 2*min_samples_leaf + 1)*p + 1);
-
-  // Check the values of the cutpoint evaluations
-  std::vector<double> expected_split_evals{3.773828, 3.349927, 3.001568, 3.085074, 2.989927, 3.101841, 2.980939, 3.068029, 3.822045, 3.663843, 3.710592, 3.354912, 3.135288,
-                                           3.553728, 2.969388, 3.540838, 3.961885, 3.822045, 4.908861, 4.032006, 4.083473, 4.442268, 5.023573, 4.171735, 3.353457, 3.862124,
-                                           3.323620, 3.998112, 3.425777, 3.096926, 3.131347, 2.947921, 2.935892, 3.224115, 3.144767, 3.213065, 3.863427, 3.792850, 3.146056,
-                                           3.348693, 3.487161, 4.600861, 4.226219, 4.879161, 3.773828, 3.940111};
-  for (int i = 0; i < log_cutpoint_evaluations.size(); i++) {
-    ASSERT_NEAR(log_cutpoint_evaluations[i], expected_split_evals[i], 0.01);
-  }
-}
-
-TEST(LeafConstantModel, CutpointThinning) {
-  // Load test data
-  StochTree::TestUtils::TestDataset test_dataset;
-  test_dataset = StochTree::TestUtils::LoadSmallDatasetUnivariateBasis();
-  std::vector<StochTree::FeatureType> feature_types(test_dataset.x_cols, StochTree::FeatureType::kNumeric);
-  std::vector<double> variable_weights(test_dataset.x_cols, 1./test_dataset.x_cols);
-  std::vector<bool> feature_subset(test_dataset.x_cols, true);
-  std::random_device rd;
-  std::mt19937 gen(rd());
-
-  // Construct datasets
-  using data_size_t = StochTree::data_size_t;
-  data_size_t n = test_dataset.n;
-  int p = test_dataset.x_cols;
-  StochTree::ForestDataset dataset = StochTree::ForestDataset();
-  dataset.AddCovariates(test_dataset.covariates.data(), n, test_dataset.x_cols, test_dataset.row_major);
-  StochTree::ColumnVector residual = StochTree::ColumnVector(test_dataset.outcome.data(), n);
-
-  // Construct a ForestTracker
-  int num_trees = 1;
-  StochTree::ForestTracker tracker = StochTree::ForestTracker(dataset.GetCovariates(), feature_types, num_trees, n);
-
-  // Set sampling parameters
-  double alpha = 0.95;
-  double beta = 1.25;
-  int min_samples_leaf = 1;
-  int max_depth = -1;
-  double global_variance = 1.;
-  double tau = 1.;
-  int cutpoint_grid_size = 5;
-  StochTree::TreePrior tree_prior = StochTree::TreePrior(alpha, beta, min_samples_leaf, max_depth);
-
-  // Construct temporary data structures needed to enumerate splits
-  std::vector<double> log_cutpoint_evaluations;
-  std::vector<int> cutpoint_features;
-  std::vector<double> cutpoint_values;
-  std::vector<StochTree::FeatureType> cutpoint_feature_types;
-  StochTree::data_size_t valid_cutpoint_count = 0;
-  StochTree::CutpointGridContainer cutpoint_grid_container(dataset.GetCovariates(), residual.GetData(), cutpoint_grid_size);
-
-  // Initialize a leaf model
-  StochTree::GaussianConstantLeafModel leaf_model = StochTree::GaussianConstantLeafModel(tau);
-
-  // Evaluate all possible cutpoints
-  StochTree::EvaluateAllPossibleSplits<StochTree::GaussianConstantLeafModel, StochTree::GaussianConstantSuffStat>(
-    dataset, tracker, residual, tree_prior, gen, leaf_model, global_variance, 0, 0, log_cutpoint_evaluations, cutpoint_features, cutpoint_values, 
-    cutpoint_feature_types, valid_cutpoint_count, cutpoint_grid_container, 0, n, variable_weights, feature_types, feature_subset
-  );
-
-  // Check that there are (n - 2*min_samples_leaf + 1)*p + 1 cutpoints considered
-  ASSERT_EQ(log_cutpoint_evaluations.size(), (cutpoint_grid_size - 1)*p + 1);
-
-  // Check the values of the cutpoint evaluations
-  std::vector<double> expected_split_evals{3.349927, 3.085074, 3.101841, 3.068029, 3.710592, 3.135288, 2.969388, 3.961885, 4.032006, 
-                                           4.442268, 4.171735, 3.862124, 3.425777, 3.131347, 2.935892, 3.144767, 3.792850, 3.348693, 
-                                           4.600861, 4.879161, 3.940111};
-  for (int i = 0; i < log_cutpoint_evaluations.size(); i++) {
-    ASSERT_NEAR(log_cutpoint_evaluations[i], expected_split_evals[i], 0.01);
-  }
-}
-
-TEST(LeafUnivariateRegressionModel, FullEnumeration) {
-  // Load test data
-  StochTree::TestUtils::TestDataset test_dataset;
-  test_dataset = StochTree::TestUtils::LoadSmallDatasetUnivariateBasis();
-  std::vector<StochTree::FeatureType> feature_types(test_dataset.x_cols, StochTree::FeatureType::kNumeric);
-  std::vector<double> variable_weights(test_dataset.x_cols, 1./test_dataset.x_cols);
-  std::vector<bool> feature_subset(test_dataset.x_cols, true);
-  std::random_device rd;
-  std::mt19937 gen(rd());
-
-  // Construct datasets
-  using data_size_t = StochTree::data_size_t;
-  data_size_t n = test_dataset.n;
-  int p = test_dataset.x_cols;
-  StochTree::ForestDataset dataset = StochTree::ForestDataset();
-  dataset.AddCovariates(test_dataset.covariates.data(), n, test_dataset.x_cols, test_dataset.row_major);
-  dataset.AddBasis(test_dataset.omega.data(), test_dataset.n, test_dataset.omega_cols, test_dataset.row_major);
-  StochTree::ColumnVector residual = StochTree::ColumnVector(test_dataset.outcome.data(), n);
-
-  // Construct a ForestTracker
-  int num_trees = 1;
-  StochTree::ForestTracker tracker = StochTree::ForestTracker(dataset.GetCovariates(), feature_types, num_trees, n);
-
-  // Set sampling parameters
-  double alpha = 0.95;
-  double beta = 1.25;
-  int min_samples_leaf = 1;
-  int max_depth = -1;
-  double global_variance = 1.;
-  double tau = 1.;
-  int cutpoint_grid_size = n;
-  StochTree::TreePrior tree_prior = StochTree::TreePrior(alpha, beta, min_samples_leaf, max_depth);
-
-  // Construct temporary data structures needed to enumerate splits
-  std::vector<double> log_cutpoint_evaluations;
-  std::vector<int> cutpoint_features;
-  std::vector<double> cutpoint_values;
-  std::vector<StochTree::FeatureType> cutpoint_feature_types;
-  StochTree::data_size_t valid_cutpoint_count = 0;
-  StochTree::CutpointGridContainer cutpoint_grid_container(dataset.GetCovariates(), residual.GetData(), cutpoint_grid_size);
-
-  // Initialize a leaf model
-  StochTree::GaussianUnivariateRegressionLeafModel leaf_model = StochTree::GaussianUnivariateRegressionLeafModel(tau);
-
-  // Evaluate all possible cutpoints
-  StochTree::EvaluateAllPossibleSplits<StochTree::GaussianUnivariateRegressionLeafModel, StochTree::GaussianUnivariateRegressionSuffStat>(
-    dataset, tracker, residual, tree_prior, gen, leaf_model, global_variance, 0, 0, log_cutpoint_evaluations, cutpoint_features, cutpoint_values, 
-    cutpoint_feature_types, valid_cutpoint_count, cutpoint_grid_container, 0, n, variable_weights, feature_types, feature_subset
-  );
-
-  // Check that there are (n - 2*min_samples_leaf + 1)*p + 1 cutpoints considered
-  ASSERT_EQ(log_cutpoint_evaluations.size(), (n - 2*min_samples_leaf + 1)*p + 1);
-
-  // Check the values of the cutpoint evaluations
-  std::vector<double> expected_split_evals{4.978556, 4.067172, 3.823266, 3.850415, 3.796388, 3.791759, 3.864699, 3.970411, 5.105565, 4.886562, 4.812292, 4.450645, 4.180200, 
-                                           4.625754, 3.983956, 4.906961, 5.307099, 5.105565, 6.057032, 5.463854, 5.312733, 5.504701, 5.872222, 4.936127, 4.203568, 4.192258, 
-                                           4.633795, 4.060248, 4.032323, 4.040458, 4.176712, 3.809356, 3.854872, 4.404108, 4.243114, 4.116230, 5.167773, 5.031023, 4.203335, 
-                                           4.094302, 4.280394, 5.557678, 5.394644, 5.945185, 4.978556, 5.069763};
-  for (int i = 0; i < log_cutpoint_evaluations.size(); i++) {
-    ASSERT_NEAR(log_cutpoint_evaluations[i], expected_split_evals[i], 0.01);
-  }
-}
-
-TEST(LeafUnivariateRegressionModel, CutpointThinning) {
-  // Load test data
-  StochTree::TestUtils::TestDataset test_dataset;
-  test_dataset = StochTree::TestUtils::LoadSmallDatasetUnivariateBasis();
-  std::vector<StochTree::FeatureType> feature_types(test_dataset.x_cols, StochTree::FeatureType::kNumeric);
-  std::vector<double> variable_weights(test_dataset.x_cols, 1./test_dataset.x_cols);
-  std::vector<bool> feature_subset(test_dataset.x_cols, true);
-  std::random_device rd;
-  std::mt19937 gen(rd());
-
-  // Construct datasets
-  using data_size_t = StochTree::data_size_t;
-  data_size_t n = test_dataset.n;
-  int p = test_dataset.x_cols;
-  StochTree::ForestDataset dataset = StochTree::ForestDataset();
-  dataset.AddCovariates(test_dataset.covariates.data(), n, test_dataset.x_cols, test_dataset.row_major);
-  dataset.AddBasis(test_dataset.omega.data(), test_dataset.n, test_dataset.omega_cols, test_dataset.row_major);
-  StochTree::ColumnVector residual = StochTree::ColumnVector(test_dataset.outcome.data(), n);
-
-  // Construct a ForestTracker
-  int num_trees = 1;
-  StochTree::ForestTracker tracker = StochTree::ForestTracker(dataset.GetCovariates(), feature_types, num_trees, n);
-
-  // Set sampling parameters
-  double alpha = 0.95;
-  double beta = 1.25;
-  int min_samples_leaf = 1;
-  int max_depth = -1;
-  double global_variance = 1.;
-  double tau = 1.;
-  int cutpoint_grid_size = 5;
-  StochTree::TreePrior tree_prior = StochTree::TreePrior(alpha, beta, min_samples_leaf, max_depth);
-
-  // Construct temporary data structures needed to enumerate splits
-  std::vector<double> log_cutpoint_evaluations;
-  std::vector<int> cutpoint_features;
-  std::vector<double> cutpoint_values;
-  std::vector<StochTree::FeatureType> cutpoint_feature_types;
-  StochTree::data_size_t valid_cutpoint_count = 0;
-  StochTree::CutpointGridContainer cutpoint_grid_container(dataset.GetCovariates(), residual.GetData(), cutpoint_grid_size);
-
-  // Initialize a leaf model
-  StochTree::GaussianUnivariateRegressionLeafModel leaf_model = StochTree::GaussianUnivariateRegressionLeafModel(tau);
-
-  // Evaluate all possible cutpoints
-  StochTree::EvaluateAllPossibleSplits<StochTree::GaussianUnivariateRegressionLeafModel, StochTree::GaussianUnivariateRegressionSuffStat>(
-    dataset, tracker, residual, tree_prior, gen, leaf_model, global_variance, 0, 0, log_cutpoint_evaluations, cutpoint_features, cutpoint_values, 
-    cutpoint_feature_types, valid_cutpoint_count, cutpoint_grid_container, 0, n, variable_weights, feature_types, feature_subset
-  );
-
-
-  // Check that there are (n - 2*min_samples_leaf + 1)*p + 1 cutpoints considered
-  ASSERT_EQ(log_cutpoint_evaluations.size(), (cutpoint_grid_size - 1)*p + 1);
-
-  // Check the values of the cutpoint evaluations
-  std::vector<double> expected_split_evals{4.067172, 3.850415, 3.791759, 3.970411, 4.812292, 4.180200, 3.983956, 5.307099, 5.463854, 
-                                           5.504701, 4.936127, 4.192258, 4.032323, 4.176712, 3.854872, 4.243114, 5.031023, 4.094302, 
-                                           5.557678, 5.945185, 5.069763};
-  for (int i = 0; i < log_cutpoint_evaluations.size(); i++) {
-    ASSERT_NEAR(log_cutpoint_evaluations[i], expected_split_evals[i], 0.01);
-  }
-}
+// #include <gtest/gtest.h>
+// #include <testutils.h>
+// #include <stochtree/cutpoint_candidates.h>
+// #include <stochtree/leaf_model.h>
+// #include <stochtree/log.h>
+// #include <stochtree/partition_tracker.h>
+// #include <stochtree/tree_sampler.h>
+// #include <iostream>
+// #include <memory>
+// #include <vector>
+
+// TEST(LeafConstantModel, FullEnumeration) {
+//   // Load test data
+//   StochTree::TestUtils::TestDataset test_dataset;
+//   test_dataset = StochTree::TestUtils::LoadSmallDatasetUnivariateBasis();
+//   std::vector<StochTree::FeatureType> feature_types(test_dataset.x_cols, StochTree::FeatureType::kNumeric);
+//   std::vector<double> variable_weights(test_dataset.x_cols, 1./test_dataset.x_cols);
+//   std::vector<bool> feature_subset(test_dataset.x_cols, true);
+//   std::random_device rd;
+//   std::mt19937 gen(rd());
+
+//   // Construct datasets
+//   using data_size_t = StochTree::data_size_t;
+//   data_size_t n = test_dataset.n;
+//   int p = test_dataset.x_cols;
+//   StochTree::ForestDataset dataset = StochTree::ForestDataset();
+//   dataset.AddCovariates(test_dataset.covariates.data(), n, test_dataset.x_cols, test_dataset.row_major);
+//   StochTree::ColumnVector residual = StochTree::ColumnVector(test_dataset.outcome.data(), n);
+
+//   // Construct a ForestTracker
+//   int num_trees = 1;
+//   StochTree::ForestTracker tracker = StochTree::ForestTracker(dataset.GetCovariates(), feature_types, num_trees, n);
+
+//   // Set sampling parameters
+//   double alpha = 0.95;
+//   double beta = 1.25;
+//   int min_samples_leaf = 1;
+//   int max_depth = -1;
+//   double global_variance = 1.;
+//   double tau = 1.;
+//   int cutpoint_grid_size = n;
+//   StochTree::TreePrior tree_prior = StochTree::TreePrior(alpha, beta, min_samples_leaf, max_depth);
+
+//   // Construct temporary data structures needed to enumerate splits
+//   std::vector<double> log_cutpoint_evaluations;
+//   std::vector<int> cutpoint_features;
+//   std::vector<double> cutpoint_values;
+//   std::vector<StochTree::FeatureType> cutpoint_feature_types;
+//   StochTree::data_size_t valid_cutpoint_count = 0;
+//   StochTree::CutpointGridContainer cutpoint_grid_container(dataset.GetCovariates(), residual.GetData(), cutpoint_grid_size);
+
+//   // Initialize a leaf model
+//   StochTree::GaussianConstantLeafModel leaf_model = StochTree::GaussianConstantLeafModel(tau);
+
+//   // Evaluate all possible cutpoints
+//   StochTree::EvaluateAllPossibleSplits<StochTree::GaussianConstantLeafModel, StochTree::GaussianConstantSuffStat>(
+//     dataset, tracker, residual, tree_prior, gen, leaf_model, global_variance, 0, 0, log_cutpoint_evaluations, cutpoint_features, cutpoint_values, 
+//     cutpoint_feature_types, valid_cutpoint_count, cutpoint_grid_container, 0, n, variable_weights, feature_types, feature_subset
+//   );
+
+//   // Check that there are (n - 2*min_samples_leaf + 1)*p + 1 cutpoints considered
+//   ASSERT_EQ(log_cutpoint_evaluations.size(), (n - 2*min_samples_leaf + 1)*p + 1);
+
+//   // Check the values of the cutpoint evaluations
+//   std::vector<double> expected_split_evals{3.773828, 3.349927, 3.001568, 3.085074, 2.989927, 3.101841, 2.980939, 3.068029, 3.822045, 3.663843, 3.710592, 3.354912, 3.135288,
+//                                            3.553728, 2.969388, 3.540838, 3.961885, 3.822045, 4.908861, 4.032006, 4.083473, 4.442268, 5.023573, 4.171735, 3.353457, 3.862124,
+//                                            3.323620, 3.998112, 3.425777, 3.096926, 3.131347, 2.947921, 2.935892, 3.224115, 3.144767, 3.213065, 3.863427, 3.792850, 3.146056,
+//                                            3.348693, 3.487161, 4.600861, 4.226219, 4.879161, 3.773828, 3.940111};
+//   for (int i = 0; i < log_cutpoint_evaluations.size(); i++) {
+//     ASSERT_NEAR(log_cutpoint_evaluations[i], expected_split_evals[i], 0.01);
+//   }
+// }
+
+// TEST(LeafConstantModel, CutpointThinning) {
+//   // Load test data
+//   StochTree::TestUtils::TestDataset test_dataset;
+//   test_dataset = StochTree::TestUtils::LoadSmallDatasetUnivariateBasis();
+//   std::vector<StochTree::FeatureType> feature_types(test_dataset.x_cols, StochTree::FeatureType::kNumeric);
+//   std::vector<double> variable_weights(test_dataset.x_cols, 1./test_dataset.x_cols);
+//   std::vector<bool> feature_subset(test_dataset.x_cols, true);
+//   std::random_device rd;
+//   std::mt19937 gen(rd());
+
+//   // Construct datasets
+//   using data_size_t = StochTree::data_size_t;
+//   data_size_t n = test_dataset.n;
+//   int p = test_dataset.x_cols;
+//   StochTree::ForestDataset dataset = StochTree::ForestDataset();
+//   dataset.AddCovariates(test_dataset.covariates.data(), n, test_dataset.x_cols, test_dataset.row_major);
+//   StochTree::ColumnVector residual = StochTree::ColumnVector(test_dataset.outcome.data(), n);
+
+//   // Construct a ForestTracker
+//   int num_trees = 1;
+//   StochTree::ForestTracker tracker = StochTree::ForestTracker(dataset.GetCovariates(), feature_types, num_trees, n);
+
+//   // Set sampling parameters
+//   double alpha = 0.95;
+//   double beta = 1.25;
+//   int min_samples_leaf = 1;
+//   int max_depth = -1;
+//   double global_variance = 1.;
+//   double tau = 1.;
+//   int cutpoint_grid_size = 5;
+//   StochTree::TreePrior tree_prior = StochTree::TreePrior(alpha, beta, min_samples_leaf, max_depth);
+
+//   // Construct temporary data structures needed to enumerate splits
+//   std::vector<double> log_cutpoint_evaluations;
+//   std::vector<int> cutpoint_features;
+//   std::vector<double> cutpoint_values;
+//   std::vector<StochTree::FeatureType> cutpoint_feature_types;
+//   StochTree::data_size_t valid_cutpoint_count = 0;
+//   StochTree::CutpointGridContainer cutpoint_grid_container(dataset.GetCovariates(), residual.GetData(), cutpoint_grid_size);
+
+//   // Initialize a leaf model
+//   StochTree::GaussianConstantLeafModel leaf_model = StochTree::GaussianConstantLeafModel(tau);
+
+//   // Evaluate all possible cutpoints
+//   StochTree::EvaluateAllPossibleSplits<StochTree::GaussianConstantLeafModel, StochTree::GaussianConstantSuffStat>(
+//     dataset, tracker, residual, tree_prior, gen, leaf_model, global_variance, 0, 0, log_cutpoint_evaluations, cutpoint_features, cutpoint_values, 
+//     cutpoint_feature_types, valid_cutpoint_count, cutpoint_grid_container, 0, n, variable_weights, feature_types, feature_subset
+//   );
+
+//   // Check that there are (n - 2*min_samples_leaf + 1)*p + 1 cutpoints considered
+//   ASSERT_EQ(log_cutpoint_evaluations.size(), (cutpoint_grid_size - 1)*p + 1);
+
+//   // Check the values of the cutpoint evaluations
+//   std::vector<double> expected_split_evals{3.349927, 3.085074, 3.101841, 3.068029, 3.710592, 3.135288, 2.969388, 3.961885, 4.032006, 
+//                                            4.442268, 4.171735, 3.862124, 3.425777, 3.131347, 2.935892, 3.144767, 3.792850, 3.348693, 
+//                                            4.600861, 4.879161, 3.940111};
+//   for (int i = 0; i < log_cutpoint_evaluations.size(); i++) {
+//     ASSERT_NEAR(log_cutpoint_evaluations[i], expected_split_evals[i], 0.01);
+//   }
+// }
+
+// TEST(LeafUnivariateRegressionModel, FullEnumeration) {
+//   // Load test data
+//   StochTree::TestUtils::TestDataset test_dataset;
+//   test_dataset = StochTree::TestUtils::LoadSmallDatasetUnivariateBasis();
+//   std::vector<StochTree::FeatureType> feature_types(test_dataset.x_cols, StochTree::FeatureType::kNumeric);
+//   std::vector<double> variable_weights(test_dataset.x_cols, 1./test_dataset.x_cols);
+//   std::vector<bool> feature_subset(test_dataset.x_cols, true);
+//   std::random_device rd;
+//   std::mt19937 gen(rd());
+
+//   // Construct datasets
+//   using data_size_t = StochTree::data_size_t;
+//   data_size_t n = test_dataset.n;
+//   int p = test_dataset.x_cols;
+//   StochTree::ForestDataset dataset = StochTree::ForestDataset();
+//   dataset.AddCovariates(test_dataset.covariates.data(), n, test_dataset.x_cols, test_dataset.row_major);
+//   dataset.AddBasis(test_dataset.omega.data(), test_dataset.n, test_dataset.omega_cols, test_dataset.row_major);
+//   StochTree::ColumnVector residual = StochTree::ColumnVector(test_dataset.outcome.data(), n);
+
+//   // Construct a ForestTracker
+//   int num_trees = 1;
+//   StochTree::ForestTracker tracker = StochTree::ForestTracker(dataset.GetCovariates(), feature_types, num_trees, n);
+
+//   // Set sampling parameters
+//   double alpha = 0.95;
+//   double beta = 1.25;
+//   int min_samples_leaf = 1;
+//   int max_depth = -1;
+//   double global_variance = 1.;
+//   double tau = 1.;
+//   int cutpoint_grid_size = n;
+//   StochTree::TreePrior tree_prior = StochTree::TreePrior(alpha, beta, min_samples_leaf, max_depth);
+
+//   // Construct temporary data structures needed to enumerate splits
+//   std::vector<double> log_cutpoint_evaluations;
+//   std::vector<int> cutpoint_features;
+//   std::vector<double> cutpoint_values;
+//   std::vector<StochTree::FeatureType> cutpoint_feature_types;
+//   StochTree::data_size_t valid_cutpoint_count = 0;
+//   StochTree::CutpointGridContainer cutpoint_grid_container(dataset.GetCovariates(), residual.GetData(), cutpoint_grid_size);
+
+//   // Initialize a leaf model
+//   StochTree::GaussianUnivariateRegressionLeafModel leaf_model = StochTree::GaussianUnivariateRegressionLeafModel(tau);
+
+//   // Evaluate all possible cutpoints
+//   StochTree::EvaluateAllPossibleSplits<StochTree::GaussianUnivariateRegressionLeafModel, StochTree::GaussianUnivariateRegressionSuffStat>(
+//     dataset, tracker, residual, tree_prior, gen, leaf_model, global_variance, 0, 0, log_cutpoint_evaluations, cutpoint_features, cutpoint_values, 
+//     cutpoint_feature_types, valid_cutpoint_count, cutpoint_grid_container, 0, n, variable_weights, feature_types, feature_subset
+//   );
+
+//   // Check that there are (n - 2*min_samples_leaf + 1)*p + 1 cutpoints considered
+//   ASSERT_EQ(log_cutpoint_evaluations.size(), (n - 2*min_samples_leaf + 1)*p + 1);
+
+//   // Check the values of the cutpoint evaluations
+//   std::vector<double> expected_split_evals{4.978556, 4.067172, 3.823266, 3.850415, 3.796388, 3.791759, 3.864699, 3.970411, 5.105565, 4.886562, 4.812292, 4.450645, 4.180200, 
+//                                            4.625754, 3.983956, 4.906961, 5.307099, 5.105565, 6.057032, 5.463854, 5.312733, 5.504701, 5.872222, 4.936127, 4.203568, 4.192258, 
+//                                            4.633795, 4.060248, 4.032323, 4.040458, 4.176712, 3.809356, 3.854872, 4.404108, 4.243114, 4.116230, 5.167773, 5.031023, 4.203335, 
+//                                            4.094302, 4.280394, 5.557678, 5.394644, 5.945185, 4.978556, 5.069763};
+//   for (int i = 0; i < log_cutpoint_evaluations.size(); i++) {
+//     ASSERT_NEAR(log_cutpoint_evaluations[i], expected_split_evals[i], 0.01);
+//   }
+// }
+
+// TEST(LeafUnivariateRegressionModel, CutpointThinning) {
+//   // Load test data
+//   StochTree::TestUtils::TestDataset test_dataset;
+//   test_dataset = StochTree::TestUtils::LoadSmallDatasetUnivariateBasis();
+//   std::vector<StochTree::FeatureType> feature_types(test_dataset.x_cols, StochTree::FeatureType::kNumeric);
+//   std::vector<double> variable_weights(test_dataset.x_cols, 1./test_dataset.x_cols);
+//   std::vector<bool> feature_subset(test_dataset.x_cols, true);
+//   std::random_device rd;
+//   std::mt19937 gen(rd());
+
+//   // Construct datasets
+//   using data_size_t = StochTree::data_size_t;
+//   data_size_t n = test_dataset.n;
+//   int p = test_dataset.x_cols;
+//   StochTree::ForestDataset dataset = StochTree::ForestDataset();
+//   dataset.AddCovariates(test_dataset.covariates.data(), n, test_dataset.x_cols, test_dataset.row_major);
+//   dataset.AddBasis(test_dataset.omega.data(), test_dataset.n, test_dataset.omega_cols, test_dataset.row_major);
+//   StochTree::ColumnVector residual = StochTree::ColumnVector(test_dataset.outcome.data(), n);
+
+//   // Construct a ForestTracker
+//   int num_trees = 1;
+//   StochTree::ForestTracker tracker = StochTree::ForestTracker(dataset.GetCovariates(), feature_types, num_trees, n);
+
+//   // Set sampling parameters
+//   double alpha = 0.95;
+//   double beta = 1.25;
+//   int min_samples_leaf = 1;
+//   int max_depth = -1;
+//   double global_variance = 1.;
+//   double tau = 1.;
+//   int cutpoint_grid_size = 5;
+//   StochTree::TreePrior tree_prior = StochTree::TreePrior(alpha, beta, min_samples_leaf, max_depth);
+
+//   // Construct temporary data structures needed to enumerate splits
+//   std::vector<double> log_cutpoint_evaluations;
+//   std::vector<int> cutpoint_features;
+//   std::vector<double> cutpoint_values;
+//   std::vector<StochTree::FeatureType> cutpoint_feature_types;
+//   StochTree::data_size_t valid_cutpoint_count = 0;
+//   StochTree::CutpointGridContainer cutpoint_grid_container(dataset.GetCovariates(), residual.GetData(), cutpoint_grid_size);
+
+//   // Initialize a leaf model
+//   StochTree::GaussianUnivariateRegressionLeafModel leaf_model = StochTree::GaussianUnivariateRegressionLeafModel(tau);
+
+//   // Evaluate all possible cutpoints
+//   StochTree::EvaluateAllPossibleSplits<StochTree::GaussianUnivariateRegressionLeafModel, StochTree::GaussianUnivariateRegressionSuffStat>(
+//     dataset, tracker, residual, tree_prior, gen, leaf_model, global_variance, 0, 0, log_cutpoint_evaluations, cutpoint_features, cutpoint_values, 
+//     cutpoint_feature_types, valid_cutpoint_count, cutpoint_grid_container, 0, n, variable_weights, feature_types, feature_subset
+//   );
+
+
+//   // Check that there are (n - 2*min_samples_leaf + 1)*p + 1 cutpoints considered
+//   ASSERT_EQ(log_cutpoint_evaluations.size(), (cutpoint_grid_size - 1)*p + 1);
+
+//   // Check the values of the cutpoint evaluations
+//   std::vector<double> expected_split_evals{4.067172, 3.850415, 3.791759, 3.970411, 4.812292, 4.180200, 3.983956, 5.307099, 5.463854, 
+//                                            5.504701, 4.936127, 4.192258, 4.032323, 4.176712, 3.854872, 4.243114, 5.031023, 4.094302, 
+//                                            5.557678, 5.945185, 5.069763};
+//   for (int i = 0; i < log_cutpoint_evaluations.size(); i++) {
+//     ASSERT_NEAR(log_cutpoint_evaluations[i], expected_split_evals[i], 0.01);
+//   }
+// }
diff --git a/test/python/test_bart.py b/test/python/test_bart.py
index 3c20bb76..4b22ab7b 100644
--- a/test/python/test_bart.py
+++ b/test/python/test_bart.py
@@ -83,7 +83,8 @@ def outcome_mean(X):
         bart_model_3.from_json_string_list(bart_models_json)
 
         # Assertions
-        y_hat_train_combined = bart_model_3.predict(covariates=X_train)
+        bart_preds_combined = bart_model_3.predict(covariates=X_train)
+        y_hat_train_combined = bart_preds_combined['y_hat']
         assert y_hat_train_combined.shape == (n_train, num_mcmc * 2)
         np.testing.assert_allclose(
             y_hat_train_combined[:, 0:num_mcmc], bart_model.y_hat_train
@@ -188,9 +189,10 @@ def outcome_mean(X, W):
         bart_model_3.from_json_string_list(bart_models_json)
 
         # Assertions
-        y_hat_train_combined = bart_model_3.predict(
+        bart_preds_combined = bart_model_3.predict(
             covariates=X_train, basis=basis_train
         )
+        y_hat_train_combined = bart_preds_combined['y_hat']
         assert y_hat_train_combined.shape == (n_train, num_mcmc * 2)
         np.testing.assert_allclose(
             y_hat_train_combined[:, 0:num_mcmc], bart_model.y_hat_train
@@ -295,9 +297,10 @@ def outcome_mean(X, W):
         bart_model_3.from_json_string_list(bart_models_json)
 
         # Assertions
-        y_hat_train_combined = bart_model_3.predict(
+        bart_preds_combined = bart_model_3.predict(
             covariates=X_train, basis=basis_train
         )
+        y_hat_train_combined = bart_preds_combined['y_hat']
         assert y_hat_train_combined.shape == (n_train, num_mcmc * 2)
         np.testing.assert_allclose(
             y_hat_train_combined[:, 0:num_mcmc], bart_model.y_hat_train
@@ -407,7 +410,8 @@ def conditional_stddev(X):
         bart_model_3.from_json_string_list(bart_models_json)
 
         # Assertions
-        y_hat_train_combined, sigma2_x_train_combined = bart_model_3.predict(covariates=X_train)
+        bart_preds_combined = bart_model_3.predict(covariates=X_train)
+        y_hat_train_combined, sigma2_x_train_combined = bart_preds_combined['y_hat'], bart_preds_combined['variance_forest_predictions']
         assert y_hat_train_combined.shape == (n_train, num_mcmc * 2)
         assert sigma2_x_train_combined.shape == (n_train, num_mcmc * 2)
         np.testing.assert_allclose(
@@ -536,9 +540,10 @@ def conditional_stddev(X):
         bart_model_3.from_json_string_list(bart_models_json)
 
         # Assertions
-        y_hat_train_combined, _ = bart_model_3.predict(
+        bart_preds_combined = bart_model_3.predict(
             covariates=X_train, basis=basis_train
         )
+        y_hat_train_combined = bart_preds_combined['y_hat']
         assert y_hat_train_combined.shape == (n_train, num_mcmc * 2)
         np.testing.assert_allclose(
             y_hat_train_combined[:, 0:num_mcmc], bart_model.y_hat_train
@@ -660,9 +665,10 @@ def conditional_stddev(X):
         bart_model_3.from_json_string_list(bart_models_json)
 
         # Assertions
-        y_hat_train_combined, _ = bart_model_3.predict(
+        bart_preds_combined = bart_model_3.predict(
             covariates=X_train, basis=basis_train
         )
+        y_hat_train_combined = bart_preds_combined['y_hat']
         assert y_hat_train_combined.shape == (n_train, num_mcmc * 2)
         np.testing.assert_allclose(
             y_hat_train_combined[:, 0:num_mcmc], bart_model.y_hat_train
@@ -814,11 +820,12 @@ def rfx_term(group_labels, basis):
         )
 
         # Assertions
-        y_hat_train_combined, _ = bart_model_3.predict(
+        bart_preds_combined = bart_model_3.predict(
             covariates=X_train,
             rfx_group_ids=group_labels_train,
             rfx_basis=rfx_basis_train,
         )
+        y_hat_train_combined = bart_preds_combined['y_hat']
         assert y_hat_train_combined.shape == (n_train, num_mcmc * 2)
         np.testing.assert_allclose(
             y_hat_train_combined[:, 0:num_mcmc], bart_model.y_hat_train
@@ -986,12 +993,13 @@ def conditional_stddev(X):
         )
 
         # Assertions
-        y_hat_train_combined, _ = bart_model_3.predict(
+        bart_preds_combined = bart_model_3.predict(
             covariates=X_train,
             basis=basis_train,
             rfx_group_ids=group_labels_train,
             rfx_basis=rfx_basis_train,
         )
+        y_hat_train_combined = bart_preds_combined['y_hat']
         assert y_hat_train_combined.shape == (n_train, num_mcmc * 2)
         np.testing.assert_allclose(
             y_hat_train_combined[:, 0:num_mcmc], bart_model.y_hat_train
diff --git a/test/python/test_bcf.py b/test/python/test_bcf.py
index 28f221ca..2e2f7fbf 100644
--- a/test/python/test_bcf.py
+++ b/test/python/test_bcf.py
@@ -70,7 +70,8 @@ def test_binary_bcf(self):
         assert bcf_model.tau_hat_test.shape == (n_test, num_mcmc)
 
         # Check overall prediction method
-        tau_hat, mu_hat, y_hat = bcf_model.predict(X_test, Z_test, pi_test)
+        bcf_preds = bcf_model.predict(X_test, Z_test, pi_test)
+        tau_hat, mu_hat, y_hat = bcf_preds['tau_hat'], bcf_preds['mu_hat'], bcf_preds['y_hat']
         assert tau_hat.shape == (n_test, num_mcmc)
         assert mu_hat.shape == (n_test, num_mcmc)
         assert y_hat.shape == (n_test, num_mcmc)
@@ -99,11 +100,11 @@ def test_binary_bcf(self):
         assert bcf_model.tau_hat_train.shape == (n_train, num_mcmc)
 
         # Check overall prediction method
-        tau_hat, mu_hat, y_hat = bcf_model.predict(X_test, Z_test, pi_test)
+        bcf_preds = bcf_model.predict(X_test, Z_test, pi_test)
+        tau_hat, mu_hat, y_hat = bcf_preds['tau_hat'], bcf_preds['mu_hat'], bcf_preds['y_hat']
         assert tau_hat.shape == (n_test, num_mcmc)
         assert mu_hat.shape == (n_test, num_mcmc)
         assert y_hat.shape == (n_test, num_mcmc)
-
         # Check treatment effect prediction method
         tau_hat = bcf_model.predict_tau(X_test, Z_test, pi_test)
         assert tau_hat.shape == (n_test, num_mcmc)
@@ -134,7 +135,8 @@ def test_binary_bcf(self):
         assert bcf_model.bart_propensity_model.y_hat_test.shape == (n_test, 10)
 
         # Check overall prediction method
-        tau_hat, mu_hat, y_hat = bcf_model.predict(X_test, Z_test)
+        bcf_preds = bcf_model.predict(X_test, Z_test)
+        tau_hat, mu_hat, y_hat = bcf_preds['tau_hat'], bcf_preds['mu_hat'], bcf_preds['y_hat']
         assert tau_hat.shape == (n_test, num_mcmc)
         assert mu_hat.shape == (n_test, num_mcmc)
         assert y_hat.shape == (n_test, num_mcmc)
@@ -163,7 +165,8 @@ def test_binary_bcf(self):
         assert bcf_model.bart_propensity_model.y_hat_train.shape == (n_train, 10)
 
         # Check overall prediction method
-        tau_hat, mu_hat, y_hat = bcf_model.predict(X_test, Z_test)
+        bcf_preds = bcf_model.predict(X_test, Z_test)
+        tau_hat, mu_hat, y_hat = bcf_preds['tau_hat'], bcf_preds['mu_hat'], bcf_preds['y_hat']
         assert tau_hat.shape == (n_test, num_mcmc)
         assert mu_hat.shape == (n_test, num_mcmc)
         assert y_hat.shape == (n_test, num_mcmc)
@@ -235,7 +238,8 @@ def test_continuous_univariate_bcf(self):
         assert bcf_model.tau_hat_test.shape == (n_test, num_mcmc)
 
         # Check overall prediction method
-        tau_hat, mu_hat, y_hat = bcf_model.predict(X_test, Z_test, pi_test)
+        bcf_preds = bcf_model.predict(X_test, Z_test, pi_test)
+        tau_hat, mu_hat, y_hat = bcf_preds['tau_hat'], bcf_preds['mu_hat'], bcf_preds['y_hat']
         assert tau_hat.shape == (n_test, num_mcmc)
         assert mu_hat.shape == (n_test, num_mcmc)
         assert y_hat.shape == (n_test, num_mcmc)
@@ -270,7 +274,8 @@ def test_continuous_univariate_bcf(self):
         assert bcf_model_2.tau_hat_test.shape == (n_test, num_mcmc)
 
         # Check overall prediction method
-        tau_hat_2, mu_hat_2, y_hat_2 = bcf_model_2.predict(X_test, Z_test, pi_test)
+        bcf_preds_2 = bcf_model_2.predict(X_test, Z_test, pi_test)
+        tau_hat_2, mu_hat_2, y_hat_2 = bcf_preds_2['tau_hat'], bcf_preds_2['mu_hat'], bcf_preds_2['y_hat']
         assert tau_hat_2.shape == (n_test, num_mcmc)
         assert mu_hat_2.shape == (n_test, num_mcmc)
         assert y_hat_2.shape == (n_test, num_mcmc)
@@ -285,7 +290,8 @@ def test_continuous_univariate_bcf(self):
         bcf_model_3.from_json_string_list(bcf_models_json)
 
         # Assertions
-        tau_hat_3, mu_hat_3, y_hat_3 = bcf_model_3.predict(X_test, Z_test, pi_test)
+        bcf_preds_3 = bcf_model_3.predict(X_test, Z_test, pi_test)
+        tau_hat_3, mu_hat_3, y_hat_3 = bcf_preds_3['tau_hat'], bcf_preds_3['mu_hat'], bcf_preds_3['y_hat']
         assert tau_hat_3.shape == (n_train, num_mcmc * 2)
         assert mu_hat_3.shape == (n_train, num_mcmc * 2)
         assert y_hat_3.shape == (n_train, num_mcmc * 2)
@@ -323,7 +329,8 @@ def test_continuous_univariate_bcf(self):
         assert bcf_model.tau_hat_train.shape == (n_train, num_mcmc)
 
         # Check overall prediction method
-        tau_hat, mu_hat, y_hat = bcf_model.predict(X_test, Z_test, pi_test)
+        bcf_preds = bcf_model.predict(X_test, Z_test, pi_test)
+        tau_hat, mu_hat, y_hat = bcf_preds['tau_hat'], bcf_preds['mu_hat'], bcf_preds['y_hat']
         assert tau_hat.shape == (n_test, num_mcmc)
         assert mu_hat.shape == (n_test, num_mcmc)
         assert y_hat.shape == (n_test, num_mcmc)
@@ -358,7 +365,8 @@ def test_continuous_univariate_bcf(self):
         assert bcf_model.bart_propensity_model.y_hat_test.shape == (n_test, 10)
 
         # Check overall prediction method
-        tau_hat, mu_hat, y_hat = bcf_model.predict(X_test, Z_test)
+        bcf_preds = bcf_model.predict(X_test, Z_test, pi_test)
+        tau_hat, mu_hat, y_hat = bcf_preds['tau_hat'], bcf_preds['mu_hat'], bcf_preds['y_hat']
         assert tau_hat.shape == (n_test, num_mcmc)
         assert mu_hat.shape == (n_test, num_mcmc)
         assert y_hat.shape == (n_test, num_mcmc)
@@ -387,7 +395,8 @@ def test_continuous_univariate_bcf(self):
         assert bcf_model.bart_propensity_model.y_hat_train.shape == (n_train, 10)
 
         # Check overall prediction method
-        tau_hat, mu_hat, y_hat = bcf_model.predict(X_test, Z_test)
+        bcf_preds = bcf_model.predict(X_test, Z_test)
+        tau_hat, mu_hat, y_hat = bcf_preds['tau_hat'], bcf_preds['mu_hat'], bcf_preds['y_hat']
         assert tau_hat.shape == (n_test, num_mcmc)
         assert mu_hat.shape == (n_test, num_mcmc)
         assert y_hat.shape == (n_test, num_mcmc)
@@ -414,7 +423,8 @@ def test_continuous_univariate_bcf(self):
         assert bcf_model_2.tau_hat_train.shape == (n_train, num_mcmc)
 
         # Check overall prediction method
-        tau_hat_2, mu_hat_2, y_hat_2 = bcf_model_2.predict(X_test, Z_test)
+        bcf_preds_2 = bcf_model_2.predict(X_test, Z_test)
+        tau_hat_2, mu_hat_2, y_hat_2 = bcf_preds_2['tau_hat'], bcf_preds_2['mu_hat'], bcf_preds_2['y_hat']
         assert tau_hat_2.shape == (n_test, num_mcmc)
         assert mu_hat_2.shape == (n_test, num_mcmc)
         assert y_hat_2.shape == (n_test, num_mcmc)
@@ -429,7 +439,8 @@ def test_continuous_univariate_bcf(self):
         bcf_model_3.from_json_string_list(bcf_models_json)
 
         # Assertions
-        tau_hat_3, mu_hat_3, y_hat_3 = bcf_model_3.predict(X_test, Z_test)
+        bcf_preds_3 = bcf_model_3.predict(X_test, Z_test)
+        tau_hat_3, mu_hat_3, y_hat_3 = bcf_preds_3['tau_hat'], bcf_preds_3['mu_hat'], bcf_preds_3['y_hat']
         assert tau_hat_3.shape == (n_train, num_mcmc * 2)
         assert mu_hat_3.shape == (n_train, num_mcmc * 2)
         assert y_hat_3.shape == (n_train, num_mcmc * 2)
@@ -510,7 +521,8 @@ def test_multivariate_bcf(self):
         assert bcf_model.tau_hat_test.shape == (n_test, num_mcmc, treatment_dim)
 
         # Check overall prediction method
-        tau_hat, mu_hat, y_hat = bcf_model.predict(X_test, Z_test, pi_test)
+        bcf_preds = bcf_model.predict(X_test, Z_test, pi_test)
+        tau_hat, mu_hat, y_hat = bcf_preds['tau_hat'], bcf_preds['mu_hat'], bcf_preds['y_hat']
         assert tau_hat.shape == (n_test, num_mcmc, treatment_dim)
         assert mu_hat.shape == (n_test, num_mcmc)
         assert y_hat.shape == (n_test, num_mcmc)
@@ -539,7 +551,8 @@ def test_multivariate_bcf(self):
         assert bcf_model.tau_hat_train.shape == (n_train, num_mcmc, treatment_dim)
 
         # Check overall prediction method
-        tau_hat, mu_hat, y_hat = bcf_model.predict(X_test, Z_test, pi_test)
+        bcf_preds = bcf_model.predict(X_test, Z_test, pi_test)
+        tau_hat, mu_hat, y_hat = bcf_preds['tau_hat'], bcf_preds['mu_hat'], bcf_preds['y_hat']
         assert tau_hat.shape == (n_test, num_mcmc, treatment_dim)
         assert mu_hat.shape == (n_test, num_mcmc)
         assert y_hat.shape == (n_test, num_mcmc)
@@ -644,11 +657,12 @@ def test_binary_bcf_heteroskedastic(self):
         assert bcf_model.sigma2_x_test.shape == (n_train, num_mcmc)
 
         # Check overall prediction method
-        tau_hat, mu_hat, y_hat, sigma2_hat = bcf_model.predict(X_test, Z_test, pi_test)
+        bcf_preds = bcf_model.predict(X_test, Z_test, pi_test)
+        tau_hat, mu_hat, y_hat, sigma2_x_hat = bcf_preds['tau_hat'], bcf_preds['mu_hat'], bcf_preds['y_hat'], bcf_preds['variance_forest_predictions']
         assert tau_hat.shape == (n_test, num_mcmc)
         assert mu_hat.shape == (n_test, num_mcmc)
         assert y_hat.shape == (n_test, num_mcmc)
-        assert sigma2_hat.shape == (n_test, num_mcmc)
+        assert sigma2_x_hat.shape == (n_test, num_mcmc)
 
         # Check treatment effect prediction method
         tau_hat = bcf_model.predict_tau(X_test, Z_test, pi_test)
@@ -675,29 +689,29 @@ def test_binary_bcf_heteroskedastic(self):
         assert bcf_model.sigma2_x_train.shape == (n_train, num_mcmc)
 
         # Check overall prediction method
-        tau_hat, mu_hat, y_hat, sigma2_hat = bcf_model.predict(X_test, Z_test, pi_test)
-        assert tau_hat.shape == (n_test, num_mcmc)
-        assert mu_hat.shape == (n_test, num_mcmc)
-        assert y_hat.shape == (n_test, num_mcmc)
-        assert sigma2_hat.shape == (n_test, num_mcmc)
+        bcf_preds = bcf_model.predict(X_test, Z_test, pi_test)
+        assert bcf_preds['tau_hat'].shape == (n_test, num_mcmc)
+        assert bcf_preds['mu_hat'].shape == (n_test, num_mcmc)
+        assert bcf_preds['y_hat'].shape == (n_test, num_mcmc)
+        assert bcf_preds['variance_forest_predictions'].shape == (n_test, num_mcmc)
 
         # Check predictions match
-        tau_hat, mu_hat, y_hat, sigma2_hat = bcf_model.predict(X_train, Z_train, pi_train)
-        assert tau_hat.shape == (n_train, num_mcmc)
-        assert mu_hat.shape == (n_train, num_mcmc)
-        assert y_hat.shape == (n_train, num_mcmc)
-        assert sigma2_hat.shape == (n_train, num_mcmc)
+        bcf_preds = bcf_model.predict(X_train, Z_train, pi_train)
+        assert bcf_preds['tau_hat'].shape == (n_train, num_mcmc)
+        assert bcf_preds['mu_hat'].shape == (n_train, num_mcmc)
+        assert bcf_preds['y_hat'].shape == (n_train, num_mcmc)
+        assert bcf_preds['variance_forest_predictions'].shape == (n_train, num_mcmc)
         np.testing.assert_allclose(
-            y_hat, bcf_model.y_hat_train
+            bcf_preds['y_hat'], bcf_model.y_hat_train
         )
         np.testing.assert_allclose(
-            mu_hat, bcf_model.mu_hat_train
+            bcf_preds['mu_hat'], bcf_model.mu_hat_train
         )
         np.testing.assert_allclose(
-            tau_hat, bcf_model.tau_hat_train
+            bcf_preds['tau_hat'], bcf_model.tau_hat_train
         )
         np.testing.assert_allclose(
-            sigma2_hat, bcf_model.sigma2_x_train
+            bcf_preds['variance_forest_predictions'], bcf_model.sigma2_x_train
         )
 
         # Check treatment effect prediction method
@@ -731,11 +745,11 @@ def test_binary_bcf_heteroskedastic(self):
         assert bcf_model.bart_propensity_model.y_hat_test.shape == (n_test, 10)
 
         # Check overall prediction method
-        tau_hat, mu_hat, y_hat, sigma2_hat = bcf_model.predict(X_test, Z_test)
-        assert tau_hat.shape == (n_test, num_mcmc)
-        assert mu_hat.shape == (n_test, num_mcmc)
-        assert y_hat.shape == (n_test, num_mcmc)
-        assert sigma2_hat.shape == (n_test, num_mcmc)
+        bcf_preds = bcf_model.predict(X_test, Z_test)
+        assert bcf_preds['tau_hat'].shape == (n_test, num_mcmc)
+        assert bcf_preds['mu_hat'].shape == (n_test, num_mcmc)
+        assert bcf_preds['y_hat'].shape == (n_test, num_mcmc)
+        assert bcf_preds['variance_forest_predictions'].shape == (n_test, num_mcmc)
 
         # Check treatment effect prediction method
         tau_hat = bcf_model.predict_tau(X_test, Z_test)
@@ -761,10 +775,10 @@ def test_binary_bcf_heteroskedastic(self):
         assert bcf_model.bart_propensity_model.y_hat_train.shape == (n_train, 10)
 
         # Check overall prediction method
-        tau_hat, mu_hat, y_hat = bcf_model.predict(X_test, Z_test)
-        assert tau_hat.shape == (n_test, num_mcmc)
-        assert mu_hat.shape == (n_test, num_mcmc)
-        assert y_hat.shape == (n_test, num_mcmc)
+        bcf_preds = bcf_model.predict(X_test, Z_test)
+        assert bcf_preds['tau_hat'].shape == (n_test, num_mcmc)
+        assert bcf_preds['mu_hat'].shape == (n_test, num_mcmc)
+        assert bcf_preds['y_hat'].shape == (n_test, num_mcmc)
 
         # Check treatment effect prediction method
         tau_hat = bcf_model.predict_tau(X_test, Z_test)
diff --git a/test/python/test_json.py b/test/python/test_json.py
index 1e3d9c5a..1d0dc66d 100644
--- a/test/python/test_json.py
+++ b/test/python/test_json.py
@@ -130,7 +130,8 @@ def outcome_mean(X):
         forest_preds_y_mcmc_cached = bart_model.y_hat_train
 
         # Extract original predictions
-        forest_preds_y_mcmc_retrieved = bart_model.predict(X)
+        bart_preds = bart_model.predict(X)
+        forest_preds_y_mcmc_retrieved = bart_preds['y_hat']
 
         # Roundtrip to / from JSON
         json_test = JSONSerializer()
@@ -336,13 +337,15 @@ def outcome_mean(X, W):
         bart_orig.sample(X_train=X, y_train=y, leaf_basis_train=W, num_gfr=10, num_mcmc=10)
 
         # Extract predictions from the sampler
-        y_hat_orig = bart_orig.predict(X, W)
+        bart_preds_orig = bart_orig.predict(X, W)
+        y_hat_orig = bart_preds_orig['y_hat']
 
         # "Round-trip" the model to JSON string and back and check that the predictions agree
         bart_json_string = bart_orig.to_json()
         bart_reloaded = BARTModel()
         bart_reloaded.from_json(bart_json_string)
-        y_hat_reloaded = bart_reloaded.predict(X, W)
+        bart_preds_reloaded = bart_reloaded.predict(X, W)
+        y_hat_reloaded = bart_preds_reloaded['y_hat']
         np.testing.assert_almost_equal(y_hat_orig, y_hat_reloaded)
 
     def test_bart_rfx_string(self):
@@ -408,13 +411,15 @@ def rfx_mean(group_labels, basis):
                          rfx_basis_train=basis, num_gfr=10, num_mcmc=10)
 
         # Extract predictions from the sampler
-        y_hat_orig = bart_orig.predict(X, W, group_labels, basis)
+        bart_preds_orig = bart_orig.predict(X, W, group_labels, basis)
+        y_hat_orig = bart_preds_orig['y_hat']
 
         # "Round-trip" the model to JSON string and back and check that the predictions agree
         bart_json_string = bart_orig.to_json()
         bart_reloaded = BARTModel()
         bart_reloaded.from_json(bart_json_string)
-        y_hat_reloaded = bart_reloaded.predict(X, W, group_labels, basis)
+        bart_preds_reloaded = bart_reloaded.predict(X, W, group_labels, basis)
+        y_hat_reloaded = bart_preds_reloaded['y_hat']
         np.testing.assert_almost_equal(y_hat_orig, y_hat_reloaded)
 
     def test_bcf_string(self):
@@ -444,15 +449,17 @@ def test_bcf_string(self):
         )
 
         # Extract predictions from the sampler
-        mu_hat_orig, tau_hat_orig, y_hat_orig = bcf_orig.predict(X, Z, pi_X)
+        bcf_preds_orig = bcf_orig.predict(X, Z, pi_X)
+        mu_hat_orig, tau_hat_orig, y_hat_orig = bcf_preds_orig['mu_hat'], bcf_preds_orig['tau_hat'], bcf_preds_orig['y_hat']
 
         # "Round-trip" the model to JSON string and back and check that the predictions agree
         bcf_json_string = bcf_orig.to_json()
         bcf_reloaded = BCFModel()
         bcf_reloaded.from_json(bcf_json_string)
-        mu_hat_reloaded, tau_hat_reloaded, y_hat_reloaded = bcf_reloaded.predict(
+        bcf_preds_reloaded = bcf_reloaded.predict(
             X, Z, pi_X
         )
+        mu_hat_reloaded, tau_hat_reloaded, y_hat_reloaded = bcf_preds_reloaded['mu_hat'], bcf_preds_reloaded['tau_hat'], bcf_preds_reloaded['y_hat']
         np.testing.assert_almost_equal(y_hat_orig, y_hat_reloaded)
         np.testing.assert_almost_equal(tau_hat_orig, tau_hat_reloaded)
         np.testing.assert_almost_equal(mu_hat_orig, mu_hat_reloaded)
@@ -508,15 +515,17 @@ def rfx_mean(group_labels, basis):
         )
 
         # Extract predictions from the sampler
-        mu_hat_orig, tau_hat_orig, rfx_hat_orig, y_hat_orig = bcf_orig.predict(X, Z, pi_X, group_labels, basis)
+        bcf_preds_orig = bcf_orig.predict(X, Z, pi_X, group_labels, basis)
+        mu_hat_orig, tau_hat_orig, rfx_hat_orig, y_hat_orig = bcf_preds_orig['mu_hat'], bcf_preds_orig['tau_hat'], bcf_preds_orig['rfx_predictions'], bcf_preds_orig['y_hat']
 
         # "Round-trip" the model to JSON string and back and check that the predictions agree
         bcf_json_string = bcf_orig.to_json()
         bcf_reloaded = BCFModel()
         bcf_reloaded.from_json(bcf_json_string)
-        mu_hat_reloaded, tau_hat_reloaded, rfx_hat_reloaded, y_hat_reloaded = bcf_reloaded.predict(
+        bcf_preds_reloaded = bcf_reloaded.predict(
             X, Z, pi_X, group_labels, basis
         )
+        mu_hat_reloaded, tau_hat_reloaded, rfx_hat_reloaded, y_hat_reloaded = bcf_preds_reloaded['mu_hat'], bcf_preds_reloaded['tau_hat'], bcf_preds_reloaded['rfx_predictions'], bcf_preds_reloaded['y_hat']
         np.testing.assert_almost_equal(y_hat_orig, y_hat_reloaded)
         np.testing.assert_almost_equal(tau_hat_orig, tau_hat_reloaded)
         np.testing.assert_almost_equal(mu_hat_orig, mu_hat_reloaded)
@@ -547,15 +556,17 @@ def test_bcf_propensity_string(self):
         bcf_orig.sample(X_train=X, Z_train=Z, y_train=y, num_gfr=10, num_mcmc=10)
 
         # Extract predictions from the sampler
-        mu_hat_orig, tau_hat_orig, y_hat_orig = bcf_orig.predict(X, Z, pi_X)
+        bcf_preds_orig = bcf_orig.predict(X, Z, pi_X)
+        mu_hat_orig, tau_hat_orig, y_hat_orig = bcf_preds_orig['mu_hat'], bcf_preds_orig['tau_hat'], bcf_preds_orig['y_hat']
 
         # "Round-trip" the model to JSON string and back and check that the predictions agree
         bcf_json_string = bcf_orig.to_json()
         bcf_reloaded = BCFModel()
         bcf_reloaded.from_json(bcf_json_string)
-        mu_hat_reloaded, tau_hat_reloaded, y_hat_reloaded = bcf_reloaded.predict(
+        bcf_preds_reloaded = bcf_reloaded.predict(
             X, Z, pi_X
         )
+        mu_hat_reloaded, tau_hat_reloaded, y_hat_reloaded = bcf_preds_reloaded['mu_hat'], bcf_preds_reloaded['tau_hat'], bcf_preds_reloaded['y_hat']
         np.testing.assert_almost_equal(y_hat_orig, y_hat_reloaded)
         np.testing.assert_almost_equal(tau_hat_orig, tau_hat_reloaded)
         np.testing.assert_almost_equal(mu_hat_orig, mu_hat_reloaded)
diff --git a/tools/debug/bart_profile.R b/tools/debug/bart_profile.R
index 2b33211e..a63331b9 100644
--- a/tools/debug/bart_profile.R
+++ b/tools/debug/bart_profile.R
@@ -6,7 +6,7 @@ library(stochtree)
 Rprof()
 
 start_time <- Sys.time()
-n <- 10000
+n <- 50000
 p <- 50
 X <- matrix(runif(n*p), ncol = p)
 f_XW <- (
@@ -26,7 +26,10 @@ X_test <- X[test_inds,]
 X_train <- X[train_inds,]
 y_test <- y[test_inds]
 y_train <- y[train_inds]
-bart_model <- bart(X_train = X_train, y_train = y_train, X_test = X_test)
+general_params <- list(num_threads = 10)
+bart_model <- bart(X_train = X_train, y_train = y_train, X_test = X_test, 
+                   num_gfr = 100, num_mcmc = 100, 
+                   general_params = general_params)
 end_time <- Sys.time()
 print(paste("runtime:", end_time - start_time))
 
diff --git a/tools/perf/bart_profiling_script.R b/tools/perf/bart_profiling_script.R
index 7a60eed2..04fef961 100644
--- a/tools/perf/bart_profiling_script.R
+++ b/tools/perf/bart_profiling_script.R
@@ -9,6 +9,7 @@ if (length(args) > 0){
     num_gfr <- as.integer(args[3])
     num_mcmc <- as.integer(args[4])
     snr <- as.numeric(args[5])
+    num_threads <- as.numeric(args[6])
 } else{
     # Default arguments
     n <- 1000
@@ -16,9 +17,11 @@ if (length(args) > 0){
     num_gfr <- 10
     num_mcmc <- 100
     snr <- 3.0
+    num_threads <- -1
 }
 cat("n = ", n, "\np = ", p, "\nnum_gfr = ", num_gfr, 
-    "\nnum_mcmc = ", num_mcmc, "\nsnr = ", snr, "\n", sep = "")
+    "\nnum_mcmc = ", num_mcmc, "\nsnr = ", snr, 
+    "\nnum_threads = ", num_threads, "\n", sep = "")
 
 # Generate data needed to train BART model
 X <- matrix(runif(n*p), ncol = p)
@@ -49,8 +52,9 @@ y_train <- y[train_inds]
 
 system.time({
     # Sample BART model
+    general_params <- list(num_threads = num_threads)
     bart_model <- bart(X_train = X_train, y_train = y_train, X_test = X_test, 
-                       num_gfr = num_gfr, num_mcmc = num_mcmc)
+                       num_gfr = num_gfr, num_mcmc = num_mcmc, general_params = general_params)
     
     # Predict on the test set
     test_preds <- predict(bart_model, X = X_test)
diff --git a/tools/perf/bart_profiling_script.py b/tools/perf/bart_profiling_script.py
new file mode 100644
index 00000000..a54fc575
--- /dev/null
+++ b/tools/perf/bart_profiling_script.py
@@ -0,0 +1,68 @@
+# Load libraries
+import argparse
+import numpy as np
+from sklearn.model_selection import train_test_split
+from stochtree import BARTModel
+import time
+
+def outcome_mean(X: np.ndarray) -> np.ndarray:
+    plm_term = np.where(
+        (X[:,0] >= 0.0) & (X[:,0] < 0.25), -7.5 * X[:,1], 
+        np.where(
+            (X[:,0] >= 0.25) & (X[:,0] < 0.5), -2.5 * X[:,1], 
+            np.where(
+                (X[:,0] >= 0.5) & (X[:,0] < 0.75), 2.5 * X[:,1], 
+                7.5 * X[:,1]
+            )
+        )
+    )
+    trig_term = 2*np.sin(X[:,2]*2*np.pi) - 1.5*np.cos(X[:,3]*2*np.pi)
+    return plm_term + trig_term
+
+if __name__ == "__main__":
+    # Handle optional command line arguments
+    parser = argparse.ArgumentParser(
+                    prog='bart_profiling_script',
+                    description='Runs BART on synthetic data, following user-provided parameters')
+    parser.add_argument("--n", action='store', default=1000, type=int)
+    parser.add_argument("--p", action='store', default=5, type=int)
+    parser.add_argument("--num_gfr", action='store', default=10, type=int)
+    parser.add_argument("--num_mcmc", action='store', default=100, type=int)
+    parser.add_argument("--snr", action='store', default=2.0, type=float)
+    parser.add_argument("--num_threads", action='store', default=-1, type=int)
+    args = parser.parse_args()
+    n = args.n
+    p = args.p
+    num_gfr = args.num_gfr
+    num_mcmc = args.num_mcmc
+    snr = args.snr
+    num_threads = args.num_threads
+    print(f"n = {n:d}\np = {p:d}\nnum_gfr = {num_gfr:d}\nnum_mcmc = {num_mcmc:d}\nsnr = {snr:.2f}\nnum_threads = {num_threads:d}")
+
+    # Generate synthetic data
+    rng = np.random.default_rng()
+    X = rng.uniform(0, 1, (n, p))
+    f_X = outcome_mean(X)
+    noise_sd = np.std(f_X)/snr
+    epsilon = rng.normal(loc=0., scale=noise_sd, size=n)
+    y = f_X + epsilon
+
+    # Test train split
+    sample_inds = np.arange(n)
+    train_inds, test_inds = train_test_split(sample_inds, test_size=0.2)
+    X_train = X[train_inds,:]
+    X_test = X[test_inds,:]
+    y_train = y[train_inds]
+    y_test = y[test_inds]
+
+    # Time the BART model and prediction
+    start_time = time.time()
+    general_params = {'num_threads': num_threads}
+    bart_model = BARTModel()
+    bart_model.sample(X_train=X_train, y_train=y_train, X_test=X_test, 
+                      num_gfr=num_gfr, num_mcmc=num_mcmc, general_params=general_params)
+    bart_preds = bart_model.predict(covariates=X_test)
+    test_preds = bart_preds['y_hat']
+    end_time = time.time()
+    total_runtime = end_time - start_time
+    print(f"Total runtime: {total_runtime:.3f} seconds")
diff --git a/tools/perf/bcf_profiling_script.R b/tools/perf/bcf_profiling_script.R
new file mode 100644
index 00000000..9e1d60ef
--- /dev/null
+++ b/tools/perf/bcf_profiling_script.R
@@ -0,0 +1,80 @@
+# Load libraries
+library(stochtree)
+
+# Capture command line arguments
+args <- commandArgs(trailingOnly = T)
+if (length(args) > 0){
+    n <- as.integer(args[1])
+    p <- as.integer(args[2])
+    num_gfr <- as.integer(args[3])
+    num_mcmc <- as.integer(args[4])
+    snr <- as.numeric(args[5])
+    num_threads <- as.numeric(args[6])
+} else{
+    # Default arguments
+    n <- 1000
+    p <- 5
+    num_gfr <- 10
+    num_mcmc <- 100
+    snr <- 3.0
+    num_threads <- -1
+}
+cat("n = ", n, "\np = ", p, "\nnum_gfr = ", num_gfr, 
+    "\nnum_mcmc = ", num_mcmc, "\nsnr = ", snr, 
+    "\nnum_threads = ", num_threads, "\n", sep = "")
+
+# Generate data needed to train BCF model
+g <- function(x) {ifelse(x[,5]==1,2,ifelse(x[,5]==2,-1,-4))}
+mu1 <- function(x) {1+g(x)+x[,1]*x[,3]}
+tau1 <- function(x) {1+2*x[,2]*x[,4]}
+x1 <- rnorm(n)
+x2 <- rnorm(n)
+x3 <- rnorm(n)
+x4 <- as.numeric(rbinom(n,1,0.5))
+x5 <- as.numeric(sample(1:3,n,replace=TRUE))
+X <- cbind(x1,x2,x3,x4,x5)
+p_remaining <- p - 5
+if (p_remaining > 0) {
+    X_remaining <- matrix(rnorm(n*p_remaining), ncol = p_remaining)
+    X <- cbind(X, X_remaining)
+}
+mu_x <- mu1(X)
+tau_x <- tau1(X)
+pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10
+Z <- rbinom(n,1,pi_x)
+E_XZ <- mu_x + Z*tau_x
+rfx_group_ids <- rep(c(1,2), n %/% 2)
+rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE)
+rfx_basis <- cbind(1, runif(n, -1, 1))
+rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis)
+y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr)
+X <- as.data.frame(X)
+X$x4 <- factor(X$x4, ordered = TRUE)
+X$x5 <- factor(X$x5, ordered = TRUE)
+
+# Split into train and test sets
+test_set_pct <- 0.2
+n_test <- round(test_set_pct*n)
+n_train <- n - n_test
+test_inds <- sort(sample(1:n, n_test, replace = FALSE))
+train_inds <- (1:n)[!((1:n) %in% test_inds)]
+X_test <- X[test_inds,]
+X_train <- X[train_inds,]
+Z_test <- Z[test_inds]
+Z_train <- Z[train_inds]
+pi_test <- pi_x[test_inds]
+pi_train <- pi_x[train_inds]
+y_test <- y[test_inds]
+y_train <- y[train_inds]
+
+system.time({
+    # Sample BCF model
+    general_params <- list(num_threads = num_threads)
+    bcf_model <- bcf(X_train = X_train, Z_train = Z_train, propensity_train = pi_train, 
+                     y_train = y_train, X_test = X_test, Z_test = Z_test, 
+                     propensity_test = pi_test, num_gfr = num_gfr, 
+                     num_mcmc = num_mcmc, general_params = general_params)
+    
+    # Predict on the test set
+    test_preds <- predict(bcf_model, X = X_test, Z = Z_test, propensity = pi_test)
+})
diff --git a/tools/regression/README.md b/tools/regression/README.md
new file mode 100644
index 00000000..2ab7e37b
--- /dev/null
+++ b/tools/regression/README.md
@@ -0,0 +1,141 @@
+# Regression Testing for BART and BCF
+
+This directory contains scripts that constitute a lowkey "regression testing" procedure for `stochtree`'s BART and BCF models. The goal is to ensure that the models are functioning correctly and that any changes made to the code do not introduce new bugs.
+
+## Overview
+
+`stochtree` is by its nature a stochastic software tool, meaning that it generates different results each time it is run. This complicates regression testing slightly, as we cannot always distinguish between a genuine performance regression and an expected variation in results.
+
+For this reason, the "regression testing" setup is somewhat informal. It is designed to catch any strinkingly obvious issues. Both BART and BCF are run on a set of example datasets, and the results can then be compared against previously saved outputs.
+
+## Usage
+
+The primary scripts for running the regression tests are located in the `tools/regression/bart` and `tools/regression/bcf` directories. Here, there are scripts that run each models on a variety of datasets and combine and summarize their results (RMSE, coverage, runtime).
+
+This section documents how to run the end-to-end regression test suite for both the R and Python packages of `stochtree`. For more information on the individual tests, see the following section for documentation on the data generating processes tested and the test script parameters.
+
+### R Package
+
+To run the regression tests for the R package, first navigate to the `stochtree` repository in your terminal and then run:
+
+```bash
+Rscript tools/regression/bart/regression_test_dispatch_bart.R
+Rscript tools/regression/bcf/regression_test_dispatch_bcf.R
+```
+
+Then, to combine and analyze the results, run:
+
+```bash
+Rscript tools/regression/bart/regression_test_analysis_bart.R
+Rscript tools/regression/bcf/regression_test_analysis_bcf.R
+```
+
+This can be compared against any previously saved results. We are currently figuring out a solution for hosting previous regression results, so for now, you will need to manually save and compare outputs of the tests locally before and after after making your changes.
+
+### Python Package
+
+To run the regression tests for the Python package, first navigate to the `stochtree` repository in your terminal and then run:
+
+```bash
+python tools/regression/bart/regression_test_dispatch_bart.py
+python tools/regression/bcf/regression_test_dispatch_bcf.py
+```
+
+Then, to combine and analyze the results, run:
+
+```bash
+python tools/regression/bart/regression_test_analysis_bart.py
+python tools/regression/bcf/regression_test_analysis_bcf.py
+```
+
+## Individual Regression Tests
+
+### BART
+
+#### Data-generating processes (DGPs):
+
+1. **DGP 1**: Basic BART without basis or random effects
+2. **DGP 2**: BART with basis but no random effects
+3. **DGP 3**: BART with random effects but no basis
+4. **DGP 4**: BART with both basis and random effects
+
+#### Script
+
+The individual regression tests are dispatched by a `tools/regression/bart/regression_test_dispatch_bart` script for R or Python, both of which accept the following (options) command line arguments:
+
+- `n_iter`: Number of iterations (default: 5)
+- `n`: Sample size (default: 1000)
+- `p`: Number of covariates (default: 5)
+- `num_gfr`: Number of GFR iterations (default: 10)
+- `num_mcmc`: Number of MCMC iterations (default: 100)
+- `dgp_num`: Data generating process number 1-4 (default: 1)
+- `snr`: Signal-to-noise ratio (default: 2.0)
+- `test_set_pct`: Test set percentage (default: 0.2)
+- `num_threads`: Number of threads, -1 for all available (default: -1)
+
+Run this script in python:
+
+```bash
+python tools/regression/bart/individual_regression_test_bart.py [n_iter] [n] [p] [num_gfr] [num_mcmc] [dgp_num] [snr] [test_set_pct] [num_threads]
+```
+
+or in R:
+
+```bash
+Rscript tools/regression/bart/individual_regression_test_bart.R [n_iter] [n] [p] [num_gfr] [num_mcmc] [dgp_num] [snr] [test_set_pct] [num_threads]
+```
+
+#### Output
+
+BART results are saved to CSV files in the `tools/regression/bart/stochtree_bart_python_results/` or `tools/regression/bart/stochtree_bart_r_results/` directory with filenames that encode the parameter values. Each file contains:
+
+- Parameter values (n, p, num_gfr, num_mcmc, dgp_num, snr, test_set_pct, num_threads)
+- Iteration number
+- RMSE on test set
+- Coverage of 95% prediction intervals
+- Runtime in seconds
+
+### BCF
+
+#### Data-generating processes (DGPs):
+
+1. **DGP 1**: Basic BCF without random effects
+2. **DGP 2**: BCF with multivariate treatment but no random effects
+3. **DGP 3**: BCF with random effects but univariate treatment
+4. **DGP 4**: BCF with both multivariate treatment and random effects
+
+#### Script
+
+The individual regression tests are dispatched by a `tools/regression/bcf/regression_test_dispatch_bcf` script for R or Python, both of which accept the following (options) command line arguments:
+
+- `n_iter`: Number of iterations (default: 5)
+- `n`: Sample size (default: 1000)
+- `p`: Number of covariates (default: 5)
+- `num_gfr`: Number of GFR iterations (default: 10)
+- `num_mcmc`: Number of MCMC iterations (default: 100)
+- `dgp_num`: Data generating process number 1-4 (default: 1)
+- `snr`: Signal-to-noise ratio (default: 2.0)
+- `test_set_pct`: Test set percentage (default: 0.2)
+- `num_threads`: Number of threads, -1 for all available (default: -1)
+
+Run this script in python:
+
+```bash
+python tools/regression/bcf/individual_regression_test_bcf.py [n_iter] [n] [p] [num_gfr] [num_mcmc] [dgp_num] [snr] [test_set_pct] [num_threads]
+```
+
+or in R:
+
+```bash
+Rscript tools/regression/bcf/individual_regression_test_bcf.R [n_iter] [n] [p] [num_gfr] [num_mcmc] [dgp_num] [snr] [test_set_pct] [num_threads]
+```
+
+#### Outputs
+
+BCF results are saved to CSV files in the `tools/regression/bcf/stochtree_bcf_python_results/` or `tools/regression/bcf/stochtree_bcf_r_results/` directory with filenames that encode the parameter values. Each file contains:
+
+- Parameter values (n, p, num_gfr, num_mcmc, dgp_num, snr, test_set_pct, num_threads)
+- Iteration number
+- Outcome RMSE and coverage
+- Treatment effect RMSE and coverage
+- Runtime in seconds
diff --git a/tools/regression/bart/individual_regression_test_bart.R b/tools/regression/bart/individual_regression_test_bart.R
new file mode 100644
index 00000000..3f71e8cc
--- /dev/null
+++ b/tools/regression/bart/individual_regression_test_bart.R
@@ -0,0 +1,235 @@
+# Load libraries
+library(stochtree)
+
+# Define DGPs
+dgp1 <- function(n, p, snr) {
+    X <- matrix(runif(n*p), ncol = p)
+    plm_term <- (
+        ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*X[,2]) + 
+        ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*X[,2]) + 
+        ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*X[,2]) + 
+        ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*X[,2])
+    )
+    trig_term <- (
+        2*sin(X[,3]*2*pi) - 
+        1.5*cos(X[,4]*2*pi)
+    )
+    f_XW <- plm_term + trig_term
+    noise_sd <- sd(f_XW)/snr
+    y <- f_XW + rnorm(n, 0, noise_sd)
+    return(list(covariates = X, basis = NULL, outcome = y, conditional_mean = f_XW, 
+                rfx_group_ids = NULL, rfx_basis = NULL))
+}
+dgp2 <- function(n, p, snr) {
+    X <- matrix(runif(n*p), ncol = p)
+    W <- matrix(runif(n*2), ncol = 2)
+    plm_term <- (
+        ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*W[,1]) + 
+        ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*W[,1]) + 
+        ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*W[,1]) + 
+        ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*W[,1])
+    )
+    trig_term <- (
+        2*sin(X[,3]*2*pi) - 
+        1.5*cos(X[,4]*2*pi)
+    )
+    f_XW <- plm_term + trig_term
+    noise_sd <- sd(f_XW)/snr
+    y <- f_XW + rnorm(n, 0, noise_sd)
+    return(list(covariates = X, basis = W, outcome = y, conditional_mean = f_XW, 
+                rfx_group_ids = NULL, rfx_basis = NULL))
+}
+dgp3 <- function(n, p, snr) {
+    X <- matrix(runif(n*p), ncol = p)
+    plm_term <- (
+        ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*X[,2]) + 
+        ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*X[,2]) + 
+        ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*X[,2]) + 
+        ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*X[,2])
+    )
+    trig_term <- (
+        2*sin(X[,3]*2*pi) - 
+        1.5*cos(X[,4]*2*pi)
+    )
+    rfx_group_ids <- sample(1:3, size = n, replace = T)
+    rfx_coefs <- t(matrix(c(-5, -3, -1, 5, 3, 1), nrow=2, byrow=TRUE))
+    rfx_basis <- cbind(1, runif(n, -1, 1))
+    rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis)
+    f_XW <- plm_term + trig_term + rfx_term
+    noise_sd <- sd(f_XW)/snr
+    y <- f_XW + rnorm(n, 0, noise_sd)
+    return(list(covariates = X, basis = NULL, outcome = y, conditional_mean = f_XW, 
+                rfx_group_ids = rfx_group_ids, rfx_basis = rfx_basis))
+}
+dgp4 <- function(n, p, snr) {
+    X <- matrix(runif(n*p), ncol = p)
+    W <- matrix(runif(n*2), ncol = 2)
+    plm_term <- (
+        ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*W[,1]) + 
+        ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*W[,1]) + 
+        ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*W[,1]) + 
+        ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*W[,1])
+    )
+    trig_term <- (
+        2*sin(X[,3]*2*pi) - 
+        1.5*cos(X[,4]*2*pi)
+    )
+    rfx_group_ids <- sample(1:3, size = n, replace = T)
+    rfx_coefs <- t(matrix(c(-5, -3, -1, 5, 3, 1), nrow=2, byrow=TRUE))
+    rfx_basis <- cbind(1, runif(n, -1, 1))
+    rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis)
+    f_XW <- plm_term + trig_term + rfx_term
+    noise_sd <- sd(f_XW)/snr
+    y <- f_XW + rnorm(n, 0, noise_sd)
+    return(list(covariates = X, basis = W, outcome = y, conditional_mean = f_XW, 
+                rfx_group_ids = rfx_group_ids, rfx_basis = rfx_basis))
+}
+
+# Test / train split utilities
+compute_test_train_indices <- function(n, test_set_pct) {
+    n_test <- round(test_set_pct*n)
+    n_train <- n - n_test
+    test_inds <- sort(sample(1:n, n_test, replace = FALSE))
+    train_inds <- (1:n)[!((1:n) %in% test_inds)]
+    return(list(test_inds = test_inds, train_inds = train_inds))
+}
+subset_data <- function(data, subset_inds) {
+    if (is.matrix(data)) {
+        return(data[subset_inds,])
+    } else {
+        return(data[subset_inds])
+    }
+}
+
+# Capture command line arguments
+args <- commandArgs(trailingOnly = T)
+if (length(args) > 0){
+    n_iter <- as.integer(args[1])
+    n <- as.integer(args[2])
+    p <- as.integer(args[3])
+    num_gfr <- as.integer(args[4])
+    num_mcmc <- as.integer(args[5])
+    dgp_num <- as.integer(args[6])
+    snr <- as.numeric(args[7])
+    test_set_pct <- as.numeric(args[8])
+    num_threads <- as.integer(args[9])
+} else{
+    # Default arguments
+    n_iter <- 5
+    n <- 1000
+    p <- 5
+    num_gfr <- 10
+    num_mcmc <- 100
+    dgp_num <- 1
+    snr <- 2.0
+    test_set_pct <- 0.2
+    num_threads <- -1
+}
+cat("n_iter = ", n_iter, "\nn = ", n, "\np = ", p, "\nnum_gfr = ", num_gfr, 
+    "\nnum_mcmc = ", num_mcmc, "\ndgp_num = ", dgp_num, "\nsnr = ", snr, 
+    "\ntest_set_pct = ", test_set_pct, "\nnum_threads = ", num_threads, "\n", sep = "")
+
+# Run the performance evaluation
+results <- matrix(NA, nrow = n_iter, ncol = 4)
+colnames(results) <- c("iter", "rmse", "coverage", "runtime")
+for (i in 1:n_iter) {
+    # Generate data
+    if (dgp_num == 1) {
+        data_list <- dgp1(n = n, p = p, snr = snr)
+    } else if (dgp_num == 2) {
+        data_list <- dgp2(n = n, p = p, snr = snr)
+    } else if (dgp_num == 3) {
+        data_list <- dgp3(n = n, p = p, snr = snr)
+    } else if (dgp_num == 4) {
+        data_list <- dgp4(n = n, p = p, snr = snr)
+    } else {
+        stop("Invalid DGP input")
+    }
+    covariates <- data_list[['covariates']]
+    basis <- data_list[['basis']]
+    conditional_mean <- data_list[['conditional_mean']]
+    outcome <- data_list[['outcome']]
+    rfx_group_ids <- data_list[['rfx_group_ids']]
+    rfx_basis <- data_list[['rfx_basis']]
+    
+    # Split into train / test sets
+    subset_inds_list <- compute_test_train_indices(n, test_set_pct)
+    test_inds <- subset_inds_list$test_inds
+    train_inds <- subset_inds_list$train_inds
+    covariates_train <- subset_data(covariates, train_inds)
+    covariates_test <- subset_data(covariates, test_inds)
+    outcome_train <- subset_data(outcome, train_inds)
+    outcome_test <- subset_data(outcome, test_inds)
+    conditional_mean_train <- subset_data(conditional_mean, train_inds)
+    conditional_mean_test <- subset_data(conditional_mean, test_inds)
+    has_basis <- !is.null(basis)
+    has_rfx <- !is.null(rfx_group_ids)
+    if (has_basis) {
+        basis_train <- subset_data(basis, train_inds)
+        basis_test <- subset_data(basis, test_inds)
+    } else {
+        basis_train <- NULL
+        basis_test <- NULL
+    }
+    if (has_rfx) {
+        rfx_group_ids_train <- subset_data(rfx_group_ids, train_inds)
+        rfx_group_ids_test <- subset_data(rfx_group_ids, test_inds)
+        rfx_basis_train <- subset_data(rfx_basis, train_inds)
+        rfx_basis_test <- subset_data(rfx_basis, test_inds)
+    } else {
+        rfx_group_ids_train <- NULL
+        rfx_group_ids_test <- NULL
+        rfx_basis_train <- NULL
+        rfx_basis_test <- NULL
+    }
+    
+    # Run (and time) BART
+    bart_timing <- system.time({
+        # Sample BART model
+        general_params <- list(num_threads = num_threads)
+        bart_model <- stochtree::bart(
+            X_train = covariates_train, y_train = outcome_train, leaf_basis_train = basis_train, 
+            rfx_group_ids_train = rfx_group_ids_train, rfx_basis_train = rfx_basis_train, 
+            num_gfr = num_gfr, num_mcmc = num_mcmc, general_params = general_params
+        )
+        
+        # Predict on the test set
+        test_preds <- predict(
+            bart_model, X = covariates_test, leaf_basis = basis_test, 
+            rfx_group_ids = rfx_group_ids_test, rfx_basis = rfx_basis_test
+        )
+    })[3]
+    
+    # Compute test set evals
+    y_hat_posterior <- test_preds$y_hat
+    y_hat_posterior_mean <- rowMeans(y_hat_posterior)
+    rmse_test <- sqrt(mean((y_hat_posterior_mean - outcome_test)^2))
+    y_hat_posterior_quantile_025 <- apply(y_hat_posterior, 1, function(x) quantile(x, 0.025))
+    y_hat_posterior_quantile_975 <- apply(y_hat_posterior, 1, function(x) quantile(x, 0.975))
+    covered <- rep(NA, nrow(y_hat_posterior))
+    for (j in 1:nrow(y_hat_posterior)) {
+        covered[j] <- (
+            (conditional_mean_test[j] >= y_hat_posterior_quantile_025[j]) & 
+            (conditional_mean_test[j] <= y_hat_posterior_quantile_975[j])
+        )
+    }
+    coverage_test <- mean(covered)
+
+    # Store evaluations
+    results[i,] <- c(i, rmse_test, coverage_test, bart_timing)
+}
+
+# Wrangle and save results to CSV
+results_df <- data.frame(
+    cbind(n, p, num_gfr, num_mcmc, dgp_num, snr, test_set_pct, num_threads, results)
+)
+snr_rounded <- as.integer(snr)
+test_set_pct_rounded <- as.integer(test_set_pct*100)
+num_threads_clean <- ifelse(num_threads < 0, 0, num_threads)
+filename <- paste(
+    "stochtree", "bart", "r", "n", n, "p", p, "num_gfr", num_gfr, "num_mcmc", num_mcmc, 
+    "dgp_num", dgp_num, "snr", snr_rounded, "test_set_pct", test_set_pct_rounded, 
+    "num_threads", num_threads_clean, sep = "_"
+)
+filename_full <- paste0("tools/regression/stochtree_bart_r_results/", filename, ".csv")
+write.csv(x = results_df, file = filename_full, row.names = F)
diff --git a/tools/regression/bart/individual_regression_test_bart.py b/tools/regression/bart/individual_regression_test_bart.py
new file mode 100644
index 00000000..6fb14c47
--- /dev/null
+++ b/tools/regression/bart/individual_regression_test_bart.py
@@ -0,0 +1,344 @@
+import numpy as np
+import pandas as pd
+import time
+import sys
+import os
+from typing import Dict
+from stochtree import BARTModel
+from sklearn.model_selection import train_test_split
+
+def dgp1(n: int, p: int, snr: float) -> Dict:
+    rng = np.random.default_rng()
+    
+    # Covariates
+    X = rng.uniform(0, 1, size=(n, p))
+    
+    # Piecewise linear term
+    plm_term = np.where(
+        (X[:,0] >= 0.0) & (X[:,0] < 0.25), -7.5 * X[:,1], 
+        np.where(
+            (X[:,0] >= 0.25) & (X[:,0] < 0.5), -2.5 * X[:,1], 
+            np.where(
+                (X[:,0] >= 0.5) & (X[:,0] < 0.75), 2.5 * X[:,1], 
+                7.5 * X[:,1]
+            )
+        )
+    )
+    
+    # Trigonometric term
+    trig_term = 2 * np.sin(X[:, 2] * 2 * np.pi) - 1.5 * np.cos(X[:, 3] * 2 * np.pi)
+    
+    # Outcome
+    f_XW = plm_term + trig_term
+    noise_sd = np.std(f_XW) / snr
+    y = f_XW + rng.normal(0, noise_sd, n)
+    
+    return {
+        'covariates': X,
+        'basis': None,
+        'outcome': y,
+        'conditional_mean': f_XW,
+        'rfx_group_ids': None,
+        'rfx_basis': None
+    }
+
+
+def dgp2(n: int, p: int, snr: float) -> Dict:
+    rng = np.random.default_rng()
+    
+    # Covariates and basis
+    X = rng.uniform(0, 1, (n, p))
+    W = rng.uniform(0, 1, (n, 2))
+    
+    # Piecewise linear term using basis W
+    plm_term = np.where(
+        (X[:,0] >= 0.0) & (X[:,0] < 0.25), -7.5 * W[:,0], 
+        np.where(
+            (X[:,0] >= 0.25) & (X[:,0] < 0.5), -2.5 * W[:,0], 
+            np.where(
+                (X[:,0] >= 0.5) & (X[:,0] < 0.75), 2.5 * W[:,0], 
+                7.5 * W[:,0]
+            )
+        )
+    )
+    
+    # Trigonometric term
+    trig_term = 2 * np.sin(X[:, 2] * 2 * np.pi) - 1.5 * np.cos(X[:, 3] * 2 * np.pi)
+    
+    # Outcome
+    f_XW = plm_term + trig_term
+    noise_sd = np.std(f_XW) / snr
+    y = f_XW + rng.normal(0, noise_sd, n)
+    
+    return {
+        'covariates': X,
+        'basis': W,
+        'outcome': y,
+        'conditional_mean': f_XW,
+        'rfx_group_ids': None,
+        'rfx_basis': None
+    }
+
+
+def dgp3(n: int, p: int, snr: float) -> Dict:
+    rng = np.random.default_rng()
+    
+    # Covariates
+    X = rng.uniform(0, 1, size=(n, p))
+    
+    # Piecewise linear term
+    plm_term = np.where(
+        (X[:,0] >= 0.0) & (X[:,0] < 0.25), -7.5 * X[:,1], 
+        np.where(
+            (X[:,0] >= 0.25) & (X[:,0] < 0.5), -2.5 * X[:,1], 
+            np.where(
+                (X[:,0] >= 0.5) & (X[:,0] < 0.75), 2.5 * X[:,1], 
+                7.5 * X[:,1]
+            )
+        )
+    )
+    
+    # Trigonometric term
+    trig_term = 2 * np.sin(X[:, 2] * 2 * np.pi) - 1.5 * np.cos(X[:, 3] * 2 * np.pi)
+    
+    # Random effects
+    num_groups = 3
+    rfx_group_ids = rng.choice(num_groups, size=n)
+    rfx_coefs = np.array([[-5, -3, -1], [5, 3, 1]]).T
+    rfx_basis = np.column_stack([np.ones(n), np.random.uniform(-1, 1, n)])
+    rfx_term = np.sum(rfx_coefs[rfx_group_ids] * rfx_basis, axis=1)
+    
+    # Outcome
+    f_XW = plm_term + trig_term + rfx_term
+    noise_sd = np.std(f_XW) / snr
+    y = f_XW + rng.normal(0, noise_sd, n)
+    
+    return {
+        'covariates': X,
+        'basis': None,
+        'outcome': y,
+        'conditional_mean': f_XW,
+        'rfx_group_ids': rfx_group_ids,
+        'rfx_basis': rfx_basis
+    }
+
+
+def dgp4(n: int, p: int, snr: float) -> Dict:
+    rng = np.random.default_rng()
+    
+    # Covariates and basis
+    X = rng.uniform(0, 1, (n, p))
+    W = rng.uniform(0, 1, (n, 2))
+    
+    # Piecewise linear term using basis W
+    plm_term = np.where(
+        (X[:,0] >= 0.0) & (X[:,0] < 0.25), -7.5 * W[:,0], 
+        np.where(
+            (X[:,0] >= 0.25) & (X[:,0] < 0.5), -2.5 * W[:,0], 
+            np.where(
+                (X[:,0] >= 0.5) & (X[:,0] < 0.75), 2.5 * W[:,0], 
+                7.5 * W[:,0]
+            )
+        )
+    )
+    
+    # Trigonometric term
+    trig_term = 2 * np.sin(X[:, 2] * 2 * np.pi) - 1.5 * np.cos(X[:, 3] * 2 * np.pi)
+    
+    # Random effects
+    num_groups = 3
+    rfx_group_ids = rng.choice(num_groups, size=n)
+    rfx_coefs = np.array([[-5, -3, -1], [5, 3, 1]]).T
+    rfx_basis = np.column_stack([np.ones(n), np.random.uniform(-1, 1, n)])
+    rfx_term = np.sum(rfx_coefs[rfx_group_ids] * rfx_basis, axis=1)
+    
+    # Outcome
+    f_XW = plm_term + trig_term + rfx_term
+    noise_sd = np.std(f_XW) / snr
+    y = f_XW + np.random.normal(0, noise_sd, n)
+    
+    return {
+        'covariates': X,
+        'basis': W,
+        'outcome': y,
+        'conditional_mean': f_XW,
+        'rfx_group_ids': rfx_group_ids,
+        'rfx_basis': rfx_basis
+    }
+
+
+def compute_test_train_indices(n: int, test_set_pct: float) -> Dict[str, np.ndarray]:
+    sample_inds = np.arange(n)
+    train_inds, test_inds = train_test_split(sample_inds, test_size=test_set_pct)
+    return {'test_inds': test_inds, 'train_inds': train_inds}
+
+
+def subset_data(data: np.ndarray, subset_inds: np.ndarray) -> np.ndarray:
+    if isinstance(data, np.ndarray):
+        if data.ndim == 1:
+            return data[subset_inds]
+        else:
+            return data[subset_inds, :]
+    else:
+        raise ValueError("Data must be a numpy array")
+
+
+def main():
+    # Parse command line arguments
+    if len(sys.argv) > 1:
+        n_iter = int(sys.argv[1])
+        n = int(sys.argv[2])
+        p = int(sys.argv[3])
+        num_gfr = int(sys.argv[4])
+        num_mcmc = int(sys.argv[5])
+        dgp_num = int(sys.argv[6])
+        snr = float(sys.argv[7])
+        test_set_pct = float(sys.argv[8])
+        num_threads = int(sys.argv[9])
+    else:
+        # Default arguments
+        n_iter = 5
+        n = 1000
+        p = 5
+        num_gfr = 10
+        num_mcmc = 100
+        dgp_num = 1
+        snr = 2.0
+        test_set_pct = 0.2
+        num_threads = -1
+    
+    print(f"n_iter = {n_iter}")
+    print(f"n = {n}")
+    print(f"p = {p}")
+    print(f"num_gfr = {num_gfr}")
+    print(f"num_mcmc = {num_mcmc}")
+    print(f"dgp_num = {dgp_num}")
+    print(f"snr = {snr}")
+    print(f"test_set_pct = {test_set_pct}")
+    print(f"num_threads = {num_threads}")
+    
+    # Run the performance evaluation
+    results = np.empty((n_iter, 4), dtype=float)
+    
+    for i in range(n_iter):
+        print(f"Running iteration {i+1}/{n_iter}")
+        
+        # Generate data
+        if dgp_num == 1:
+            data_dict = dgp1(n=n, p=p, snr=snr)
+        elif dgp_num == 2:
+            data_dict = dgp2(n=n, p=p, snr=snr)
+        elif dgp_num == 3:
+            data_dict = dgp3(n=n, p=p, snr=snr)
+        elif dgp_num == 4:
+            data_dict = dgp4(n=n, p=p, snr=snr)
+        else:
+            raise ValueError("Invalid DGP input")
+        
+        covariates = data_dict['covariates']
+        basis = data_dict['basis']
+        conditional_mean = data_dict['conditional_mean']
+        outcome = data_dict['outcome']
+        rfx_group_ids = data_dict['rfx_group_ids']
+        rfx_basis = data_dict['rfx_basis']
+        
+        # Split into train / test sets
+        subset_inds_dict = compute_test_train_indices(n, test_set_pct)
+        test_inds = subset_inds_dict['test_inds']
+        train_inds = subset_inds_dict['train_inds']
+        covariates_train = subset_data(covariates, train_inds)
+        covariates_test = subset_data(covariates, test_inds)
+        outcome_train = subset_data(outcome, train_inds)
+        outcome_test = subset_data(outcome, test_inds)
+        conditional_mean_train = subset_data(conditional_mean, train_inds)
+        conditional_mean_test = subset_data(conditional_mean, test_inds)
+        has_basis = basis is not None
+        has_rfx = rfx_group_ids is not None
+        if has_basis:
+            basis_train = subset_data(basis, train_inds)
+            basis_test = subset_data(basis, test_inds)
+        else:
+            basis_train = None
+            basis_test = None
+        if has_rfx:
+            rfx_group_ids_train = subset_data(rfx_group_ids, train_inds)
+            rfx_group_ids_test = subset_data(rfx_group_ids, test_inds)
+            rfx_basis_train = subset_data(rfx_basis, train_inds)
+            rfx_basis_test = subset_data(rfx_basis, test_inds)
+        else:
+            rfx_group_ids_train = None
+            rfx_group_ids_test = None
+            rfx_basis_train = None
+            rfx_basis_test = None
+        
+        # Run (and time) BART
+        start_time = time.time()
+        
+        # Sample BART model
+        general_params = {'num_threads': num_threads}
+        bart_model = BARTModel()
+        bart_model.sample(
+            X_train=covariates_train,
+            y_train=outcome_train,
+            leaf_basis_train=basis_train,
+            rfx_group_ids_train=rfx_group_ids_train,
+            rfx_basis_train=rfx_basis_train,
+            num_gfr=num_gfr,
+            num_mcmc=num_mcmc,
+            general_params=general_params
+        )
+        
+        # Predict on the test set
+        test_preds = bart_model.predict(
+            covariates=covariates_test,
+            basis=basis_test,
+            rfx_group_ids=rfx_group_ids_test,
+            rfx_basis=rfx_basis_test
+        )
+        
+        bart_timing = time.time() - start_time
+        
+        # Compute test set evaluations
+        y_hat_posterior = test_preds['y_hat']
+        y_hat_posterior_mean = np.mean(y_hat_posterior, axis=1)
+        rmse_test = np.sqrt(np.mean((y_hat_posterior_mean - conditional_mean_test) ** 2))
+        
+        y_hat_posterior_quantile_025 = np.percentile(y_hat_posterior, 2.5, axis=1)
+        y_hat_posterior_quantile_975 = np.percentile(y_hat_posterior, 97.5, axis=1)
+        
+        covered = np.logical_and(
+            conditional_mean_test >= y_hat_posterior_quantile_025,
+            conditional_mean_test <= y_hat_posterior_quantile_975
+        )
+        coverage_test = np.mean(covered)
+        
+        # Store evaluations
+        results[i, :] = [i+1, rmse_test, coverage_test, bart_timing]
+    
+    # Wrangle and save results to CSV
+    results_df = pd.DataFrame({
+        'n': n,
+        'p': p,
+        'num_gfr': num_gfr,
+        'num_mcmc': num_mcmc,
+        'dgp_num': dgp_num,
+        'snr': snr,
+        'test_set_pct': test_set_pct,
+        'num_threads': num_threads,
+        'iter': results[:, 0],
+        'rmse': results[:, 1],
+        'coverage': results[:, 2],
+        'runtime': results[:, 3]
+    })
+    
+    snr_rounded = int(snr)
+    test_set_pct_rounded = int(test_set_pct * 100)
+    num_threads_clean = 0 if num_threads < 0 else num_threads
+    filename = f"stochtree_bart_python_n_{n}_p_{p}_num_gfr_{num_gfr}_num_mcmc_{num_mcmc}_dgp_num_{dgp_num}_snr_{snr_rounded}_test_set_pct_{test_set_pct_rounded}_num_threads_{num_threads_clean}.csv"
+    output_dir = "tools/regression/bart/stochtree_bart_python_results"
+    filename_full = os.path.join(output_dir, filename)
+    results_df.to_csv(filename_full, index=False)
+
+
+if __name__ == "__main__":
+    main() 
\ No newline at end of file
diff --git a/tools/regression/bart/regression_test_analysis_bart.R b/tools/regression/bart/regression_test_analysis_bart.R
new file mode 100644
index 00000000..ce754ba1
--- /dev/null
+++ b/tools/regression/bart/regression_test_analysis_bart.R
@@ -0,0 +1,16 @@
+reg_test_dir <- "tools/regression/bart/stochtree_bart_r_results"
+reg_test_files <- list.files(reg_test_dir, pattern = ".csv", full.names = T)
+
+reg_test_df <- data.frame()
+for (file in reg_test_files) {
+    temp_df <- read.csv(file)
+    reg_test_df <- rbind(reg_test_df, temp_df)
+}
+
+summary_df <- aggregate(
+    cbind(rmse, coverage, runtime) ~ n + p + num_gfr + num_mcmc + dgp_num + snr + test_set_pct + num_threads, 
+    data = reg_test_df, FUN = median, drop = TRUE
+)
+
+summary_file_output <- file.path(reg_test_dir, "stochtree_bart_r_summary.csv")
+write.csv(summary_df, summary_file_output, row.names = F)
diff --git a/tools/regression/bart/regression_test_analysis_bart.py b/tools/regression/bart/regression_test_analysis_bart.py
new file mode 100644
index 00000000..203631bd
--- /dev/null
+++ b/tools/regression/bart/regression_test_analysis_bart.py
@@ -0,0 +1,44 @@
+import pandas as pd
+import os
+import glob
+
+
+def main():
+    # Define the directory containing results
+    reg_test_dir = "tools/regression/bart/stochtree_bart_python_results"
+    
+    # Get all CSV files in the directory
+    reg_test_files = glob.glob(os.path.join(reg_test_dir, "*.csv"))
+    
+    # Read and combine all results
+    reg_test_df = pd.DataFrame()
+    for file in reg_test_files:
+        temp_df = pd.read_csv(file)
+        reg_test_df = pd.concat([reg_test_df, temp_df], ignore_index=True)
+    
+    # Create summary by aggregating results
+    summary_df = reg_test_df.groupby([
+        'n', 'p', 'num_gfr', 'num_mcmc', 'dgp_num', 'snr', 'test_set_pct', 'num_threads'
+    ]).agg({
+        'rmse': 'median',
+        'coverage': 'median', 
+        'runtime': 'median'
+    }).reset_index()
+    
+    # Save summary to CSV
+    summary_file_output = os.path.join(reg_test_dir, "stochtree_bart_python_summary.csv")
+    summary_df.to_csv(summary_file_output, index=False)
+    print(f"Summary saved to {summary_file_output}")
+    
+    # Print some basic statistics
+    print(f"Total number of result files: {len(reg_test_files)}")
+    print(f"Total number of iterations: {len(reg_test_df)}")
+    print(f"Number of unique parameter combinations: {len(summary_df)}")
+    
+    # Print summary statistics
+    print("\nSummary statistics:")
+    print(summary_df.describe())
+
+
+if __name__ == "__main__":
+    main() 
\ No newline at end of file
diff --git a/tools/regression/bart/regression_test_dispatch_bart.R b/tools/regression/bart/regression_test_dispatch_bart.R
new file mode 100644
index 00000000..3f62a4ca
--- /dev/null
+++ b/tools/regression/bart/regression_test_dispatch_bart.R
@@ -0,0 +1,28 @@
+# Test case parameters
+dgps <- 1:4
+ns <- c(1000, 10000)
+ps <- c(5, 20)
+threads <- c(-1, 1)
+varying_param_grid <- expand.grid(dgps, ns, ps, threads)
+test_case_grid <- cbind(
+    5, varying_param_grid[,2], varying_param_grid[,3], 
+    10, 100, varying_param_grid[,1], 2.0, 0.2, varying_param_grid[,4]
+)
+
+# Run script for every case
+script_path <- "tools/regression/bart/individual_regression_test_bart.R"
+for (i in 1:nrow(test_case_grid)) {
+    n_iter <- test_case_grid[i,1]
+    n <- test_case_grid[i,2]
+    p <- test_case_grid[i,3]
+    num_gfr <- test_case_grid[i,4]
+    num_mcmc <- test_case_grid[i,5]
+    dgp_num <- test_case_grid[i,6]
+    snr <- test_case_grid[i,7]
+    test_set_pct <- test_case_grid[i,8]
+    num_threads <- test_case_grid[i,9]
+    system2(
+        "Rscript", 
+        args = c(script_path, n_iter, n, p, num_gfr, num_mcmc, dgp_num, snr, test_set_pct, num_threads)
+    )
+}
diff --git a/tools/regression/bart/regression_test_dispatch_bart.py b/tools/regression/bart/regression_test_dispatch_bart.py
new file mode 100644
index 00000000..8fddc8cf
--- /dev/null
+++ b/tools/regression/bart/regression_test_dispatch_bart.py
@@ -0,0 +1,69 @@
+import subprocess
+import sys
+
+
+def main():
+    # Test case parameters
+    dgps = [1, 2, 3, 4]
+    ns = [1000, 10000]
+    ps = [5, 20]
+    threads = [-1, 1]
+    
+    # Create parameter grid
+    varying_param_grid = []
+    for dgp in dgps:
+        for n in ns:
+            for p in ps:
+                for thread in threads:
+                    varying_param_grid.append([dgp, n, p, thread])
+    
+    # Fixed parameters
+    n_iter = 5
+    num_gfr = 10
+    num_mcmc = 100
+    snr = 2.0
+    test_set_pct = 0.2
+    
+    # Script path
+    script_path = "tools/regression/bart/individual_regression_test_bart.py"
+    
+    # Run script for every case
+    for i, params in enumerate(varying_param_grid):
+        dgp_num, n, p, num_threads = params
+        
+        print(f"Running test case {i+1}/{len(varying_param_grid)}:")
+        print(f"  DGP: {dgp_num}, n: {n}, p: {p}, threads: {num_threads}")
+        
+        # Construct command
+        cmd = [
+            sys.executable,  # Use current Python interpreter
+            script_path,
+            str(n_iter),
+            str(n),
+            str(p),
+            str(num_gfr),
+            str(num_mcmc),
+            str(dgp_num),
+            str(snr),
+            str(test_set_pct),
+            str(num_threads)
+        ]
+        
+        # Run the command
+        try:
+            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+            print("  Completed successfully")
+            if result.stdout:
+                print(f"    Output:\n{result.stdout.strip()}")
+        except subprocess.CalledProcessError as e:
+            print(f"  Failed with error code {e.returncode}")
+            if e.stderr:
+                print(f"    Error: {e.stderr.strip()}")
+            if e.stdout:
+                print(f"    Output: {e.stdout.strip()}")
+    
+    print(f"\nCompleted {len(varying_param_grid)} test cases")
+
+
+if __name__ == "__main__":
+    main() 
\ No newline at end of file
diff --git a/tools/regression/bart/stochtree_bart_python_results/.gitkeep b/tools/regression/bart/stochtree_bart_python_results/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/tools/regression/bart/stochtree_bart_r_results/.gitkeep b/tools/regression/bart/stochtree_bart_r_results/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/tools/regression/bcf/individual_regression_test_bcf.R b/tools/regression/bcf/individual_regression_test_bcf.R
new file mode 100644
index 00000000..cb186ac4
--- /dev/null
+++ b/tools/regression/bcf/individual_regression_test_bcf.R
@@ -0,0 +1,283 @@
+# Load libraries
+library(stochtree)
+
+# Define DGPs
+dgp1 <- function(n, p, snr) {
+    X <- matrix(rnorm(n*p), ncol = p)
+    plm_term <- (
+        ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*X[,2]) + 
+        ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*X[,2]) + 
+        ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*X[,2]) + 
+        ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*X[,2])
+    )
+    trig_term <- (
+        2*sin(X[,3]*2*pi) - 
+        2*cos(X[,4]*2*pi)
+    )
+    mu_x <- plm_term + trig_term
+    pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10
+    Z <- rbinom(n,1,pi_x)
+    tau_x <- 1 + 2*X[,2]*X[,4]
+    f_XZ <- mu_x + tau_x * Z
+    noise_sd <- sd(f_XZ)/snr
+    y <- f_XZ + rnorm(n, 0, noise_sd)
+    return(list(covariates = X, treatment = Z, outcome = y, propensity = pi_x,
+                prognostic_effect = mu_x, treatment_effect = tau_x, 
+                conditional_mean = f_XZ, rfx_group_ids = NULL, rfx_basis = NULL))
+}
+dgp2 <- function(n, p, snr) {
+    X <- matrix(runif(n*p), ncol = p)
+    pi_x <- cbind(0.125 + 0.75 * X[, 1], 0.875 - 0.75 * X[, 2])
+    mu_x <- pi_x[, 1] * 5 + pi_x[, 2] * 2 + 2 * X[, 3]
+    tau_x <- cbind(X[, 2], X[, 3]) * 2
+    Z <- matrix(NA_real_, nrow = n, ncol = ncol(pi_x))
+    for (i in 1:ncol(pi_x)) {
+        Z[, i] <- rbinom(n, 1, pi_x[, i])
+    }
+    f_XZ <- mu_x + rowSums(Z * tau_x)
+    noise_sd <- sd(f_XZ)/snr
+    y <- f_XZ + rnorm(n, 0, noise_sd)
+    return(list(covariates = X, treatment = Z, outcome = y, propensity = pi_x,
+                prognostic_effect = mu_x, treatment_effect = tau_x, 
+                conditional_mean = f_XZ, rfx_group_ids = NULL, rfx_basis = NULL))
+}
+dgp3 <- function(n, p, snr) {
+    X <- matrix(rnorm(n*p), ncol = p)
+    plm_term <- (
+        ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5*X[,2]) + 
+            ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5*X[,2]) + 
+            ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5*X[,2]) + 
+            ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5*X[,2])
+    )
+    trig_term <- (
+        2*sin(X[,3]*2*pi) - 
+            2*cos(X[,4]*2*pi)
+    )
+    mu_x <- plm_term + trig_term
+    pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10
+    Z <- rbinom(n,1,pi_x)
+    tau_x <- 1 + 2*X[,2]*X[,4]
+    rfx_group_ids <- sample(1:3, size = n, replace = T)
+    rfx_coefs <- t(matrix(c(-5, -3, -1, 5, 3, 1), nrow=2, byrow=TRUE))
+    rfx_basis <- cbind(1, runif(n, -1, 1))
+    rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis)
+    f_XZ <- mu_x + tau_x * Z + rfx_term
+    noise_sd <- sd(f_XZ)/snr
+    y <- f_XZ + rnorm(n, 0, noise_sd)
+    return(list(covariates = X, treatment = Z, outcome = y, propensity = pi_x,
+                prognostic_effect = mu_x, treatment_effect = tau_x, 
+                conditional_mean = f_XZ, rfx_group_ids = rfx_group_ids, rfx_basis = rfx_basis))
+}
+dgp4 <- function(n, p, snr) {
+    X <- matrix(runif(n*p), ncol = p)
+    pi_x <- cbind(0.125 + 0.75 * X[, 1], 0.875 - 0.75 * X[, 2])
+    mu_x <- pi_x[, 1] * 5 + pi_x[, 2] * 2 + 2 * X[, 3]
+    tau_x <- cbind(X[, 2], X[, 3]) * 2
+    Z <- matrix(NA_real_, nrow = n, ncol = ncol(pi_x))
+    for (i in 1:ncol(pi_x)) {
+        Z[, i] <- rbinom(n, 1, pi_x[, i])
+    }
+    rfx_group_ids <- sample(1:3, size = n, replace = T)
+    rfx_coefs <- t(matrix(c(-5, -3, -1, 5, 3, 1), nrow=2, byrow=TRUE))
+    rfx_basis <- cbind(1, runif(n, -1, 1))
+    rfx_term <- rowSums(rfx_coefs[rfx_group_ids,] * rfx_basis)
+    f_XZ <- mu_x + rowSums(Z * tau_x) + rfx_term
+    noise_sd <- sd(f_XZ)/snr
+    y <- f_XZ + rnorm(n, 0, noise_sd)
+    return(list(covariates = X, treatment = Z, outcome = y, propensity = pi_x,
+                prognostic_effect = mu_x, treatment_effect = tau_x, 
+                conditional_mean = f_XZ, rfx_group_ids = rfx_group_ids, rfx_basis = rfx_basis))
+}
+
+# Test / train split utilities
+compute_test_train_indices <- function(n, test_set_pct) {
+    n_test <- round(test_set_pct*n)
+    n_train <- n - n_test
+    test_inds <- sort(sample(1:n, n_test, replace = FALSE))
+    train_inds <- (1:n)[!((1:n) %in% test_inds)]
+    return(list(test_inds = test_inds, train_inds = train_inds))
+}
+subset_data <- function(data, subset_inds) {
+    if (is.matrix(data)) {
+        return(data[subset_inds,])
+    } else {
+        return(data[subset_inds])
+    }
+}
+
+# Capture command line arguments
+args <- commandArgs(trailingOnly = T)
+if (length(args) > 0){
+    n_iter <- as.integer(args[1])
+    n <- as.integer(args[2])
+    p <- as.integer(args[3])
+    num_gfr <- as.integer(args[4])
+    num_mcmc <- as.integer(args[5])
+    dgp_num <- as.integer(args[6])
+    snr <- as.numeric(args[7])
+    test_set_pct <- as.numeric(args[8])
+    num_threads <- as.integer(args[9])
+} else{
+    # Default arguments
+    n_iter <- 5
+    n <- 1000
+    p <- 5
+    num_gfr <- 10
+    num_mcmc <- 100
+    dgp_num <- 1
+    snr <- 2.0
+    test_set_pct <- 0.2
+    num_threads <- -1
+}
+cat("n_iter = ", n_iter, "\nn = ", n, "\np = ", p, "\nnum_gfr = ", num_gfr, 
+    "\nnum_mcmc = ", num_mcmc, "\ndgp_num = ", dgp_num, "\nsnr = ", snr, 
+    "\ntest_set_pct = ", test_set_pct, "\nnum_threads = ", num_threads, "\n", sep = "")
+
+# Run the performance evaluation
+results <- matrix(NA, nrow = n_iter, ncol = 6)
+colnames(results) <- c("iter", "outcome_rmse", "outcome_coverage", "treatment_effect_rmse", "treatment_effect_coverage", "runtime")
+for (i in 1:n_iter) {
+    # Generate data
+    if (dgp_num == 1) {
+        data_list <- dgp1(n = n, p = p, snr = snr)
+    } else if (dgp_num == 2) {
+        data_list <- dgp2(n = n, p = p, snr = snr)
+    } else if (dgp_num == 3) {
+        data_list <- dgp3(n = n, p = p, snr = snr)
+    } else if (dgp_num == 4) {
+        data_list <- dgp4(n = n, p = p, snr = snr)
+    } else {
+        stop("Invalid DGP input")
+    }
+    covariates <- data_list[['covariates']]
+    treatment <- data_list[['treatment']]
+    propensity <- data_list[['propensity']]
+    prognostic_effect <- data_list[['prognostic_effect']]
+    treatment_effect <- data_list[['treatment_effect']]
+    conditional_mean <- data_list[['conditional_mean']]
+    outcome <- data_list[['outcome']]
+    rfx_group_ids <- data_list[['rfx_group_ids']]
+    rfx_basis <- data_list[['rfx_basis']]
+    if (dgp_num %in% c(2,4)) {
+        has_multivariate_treatment <- T
+    } else {
+        has_multivariate_treatment <- F
+    }
+    
+    # Split into train / test sets
+    subset_inds_list <- compute_test_train_indices(n, test_set_pct)
+    test_inds <- subset_inds_list$test_inds
+    train_inds <- subset_inds_list$train_inds
+    covariates_train <- subset_data(covariates, train_inds)
+    covariates_test <- subset_data(covariates, test_inds)
+    treatment_train <- subset_data(treatment, train_inds)
+    treatment_test <- subset_data(treatment, test_inds)
+    propensity_train <- subset_data(propensity, train_inds)
+    propensity_test <- subset_data(propensity, test_inds)
+    outcome_train <- subset_data(outcome, train_inds)
+    outcome_test <- subset_data(outcome, test_inds)
+    prognostic_effect_train <- subset_data(prognostic_effect, train_inds)
+    prognostic_effect_test <- subset_data(prognostic_effect, test_inds)
+    treatment_effect_train <- subset_data(treatment_effect, train_inds)
+    treatment_effect_test <- subset_data(treatment_effect, test_inds)
+    conditional_mean_train <- subset_data(conditional_mean, train_inds)
+    conditional_mean_test <- subset_data(conditional_mean, test_inds)
+    has_rfx <- !is.null(rfx_group_ids)
+    if (has_rfx) {
+        rfx_group_ids_train <- subset_data(rfx_group_ids, train_inds)
+        rfx_group_ids_test <- subset_data(rfx_group_ids, test_inds)
+        rfx_basis_train <- subset_data(rfx_basis, train_inds)
+        rfx_basis_test <- subset_data(rfx_basis, test_inds)
+    } else {
+        rfx_group_ids_train <- NULL
+        rfx_group_ids_test <- NULL
+        rfx_basis_train <- NULL
+        rfx_basis_test <- NULL
+    }
+    
+    # Run (and time) BCF
+    bcf_timing <- system.time({
+        # Sample BCF model
+        general_params <- list(num_threads = num_threads, adaptive_coding = F)
+        prognostic_forest_params <- list(sample_sigma2_leaf = F)
+        treatment_effect_forest_params <- list(sample_sigma2_leaf = F)
+        bcf_model <- stochtree::bcf(
+            X_train = covariates_train, Z_train = treatment_train, 
+            propensity_train = propensity_train, y_train = outcome_train, 
+            rfx_group_ids_train = rfx_group_ids_train, rfx_basis_train = rfx_basis_train, 
+            num_gfr = num_gfr, num_mcmc = num_mcmc, general_params = general_params, 
+            prognostic_forest_params = prognostic_forest_params, 
+            treatment_effect_forest_params = treatment_effect_forest_params
+        )
+        
+        # Predict on the test set
+        test_preds <- predict(
+            bcf_model, X = covariates_test, Z = treatment_test, propensity = propensity_test, 
+            rfx_group_ids = rfx_group_ids_test, rfx_basis = rfx_basis_test
+        )
+    })[3]
+    
+    # Compute test set evals
+    y_hat_posterior <- test_preds$y_hat
+    y_hat_posterior_mean <- rowMeans(y_hat_posterior)
+    tau_hat_posterior <- test_preds$tau_hat
+    if (has_multivariate_treatment) tau_hat_posterior_mean <- apply(tau_hat_posterior, c(1,2), mean)
+    else tau_hat_posterior_mean <- apply(tau_hat_posterior, 1, mean)
+    y_hat_rmse_test <- sqrt(mean((y_hat_posterior_mean - outcome_test)^2))
+    tau_hat_rmse_test <- sqrt(mean((tau_hat_posterior_mean - treatment_effect_test)^2))
+    y_hat_posterior_quantile_025 <- apply(y_hat_posterior, 1, function(x) quantile(x, 0.025))
+    y_hat_posterior_quantile_975 <- apply(y_hat_posterior, 1, function(x) quantile(x, 0.975))
+    if (has_multivariate_treatment) {
+        tau_hat_posterior_quantile_025 <- apply(tau_hat_posterior, c(1,2), function(x) quantile(x, 0.025))
+        tau_hat_posterior_quantile_975 <- apply(tau_hat_posterior, c(1,2), function(x) quantile(x, 0.975))
+    } else {
+        tau_hat_posterior_quantile_025 <- apply(tau_hat_posterior, 1, function(x) quantile(x, 0.025))
+        tau_hat_posterior_quantile_975 <- apply(tau_hat_posterior, 1, function(x) quantile(x, 0.975))
+    }
+    y_hat_covered <- rep(NA, nrow(y_hat_posterior))
+    for (j in 1:nrow(y_hat_posterior)) {
+        y_hat_covered[j] <- (
+            (conditional_mean_test[j] >= y_hat_posterior_quantile_025[j]) & 
+            (conditional_mean_test[j] <= y_hat_posterior_quantile_975[j])
+        )
+    }
+    y_hat_coverage_test <- mean(y_hat_covered)
+    if (has_multivariate_treatment) {
+        tau_hat_covered <- matrix(NA_real_, nrow(tau_hat_posterior_mean), ncol(tau_hat_posterior_mean))
+        for (j in 1:nrow(tau_hat_covered)) {
+            for (k in 1:ncol(tau_hat_covered)) {
+                tau_hat_covered[j,k] <- (
+                    (treatment_effect_test[j,k] >= tau_hat_posterior_quantile_025[j,k]) & 
+                    (treatment_effect_test[j,k] <= tau_hat_posterior_quantile_975[j,k])
+                )
+            }
+        }
+    } else {
+        tau_hat_covered <- rep(NA, nrow(tau_hat_posterior))
+        for (j in 1:nrow(tau_hat_posterior)) {
+            tau_hat_covered[j] <- (
+                (treatment_effect_test[j] >= tau_hat_posterior_quantile_025[j]) & 
+                (treatment_effect_test[j] <= tau_hat_posterior_quantile_025[j])
+            )
+        }
+    }
+    tau_hat_coverage_test <- mean(tau_hat_covered)
+
+    # Store evaluations
+    results[i,] <- c(i, y_hat_rmse_test, y_hat_coverage_test, tau_hat_rmse_test, tau_hat_coverage_test, bcf_timing)
+}
+
+# Wrangle and save results to CSV
+results_df <- data.frame(
+    cbind(n, p, num_gfr, num_mcmc, dgp_num, snr, test_set_pct, num_threads, results)
+)
+snr_rounded <- as.integer(snr)
+test_set_pct_rounded <- as.integer(test_set_pct*100)
+num_threads_clean <- ifelse(num_threads < 0, 0, num_threads)
+filename <- paste(
+    "stochtree", "bcf", "r", "n", n, "p", p, "num_gfr", num_gfr, "num_mcmc", num_mcmc, 
+    "dgp_num", dgp_num, "snr", snr_rounded, "test_set_pct", test_set_pct_rounded, 
+    "num_threads", num_threads_clean, sep = "_"
+)
+filename_full <- paste0("tools/regression/bcf/stochtree_bcf_r_results/", filename, ".csv")
+write.csv(x = results_df, file = filename_full, row.names = F)
diff --git a/tools/regression/bcf/individual_regression_test_bcf.py b/tools/regression/bcf/individual_regression_test_bcf.py
new file mode 100644
index 00000000..7a1ce9a8
--- /dev/null
+++ b/tools/regression/bcf/individual_regression_test_bcf.py
@@ -0,0 +1,441 @@
+import numpy as np
+import pandas as pd
+import time
+import sys
+import os
+from typing import Dict
+from stochtree import BCFModel
+from sklearn.model_selection import train_test_split
+from scipy.stats import norm
+
+
+def dgp1(n: int, p: int, snr: float) -> Dict:
+    rng = np.random.default_rng()
+    
+    # Covariates
+    X = rng.normal(0, 1, size=(n, p))
+    
+    # Piecewise linear term
+    plm_term = np.where(
+        (X[:,0] >= 0.0) & (X[:,0] < 0.25), -7.5 * X[:,1], 
+        np.where(
+            (X[:,0] >= 0.25) & (X[:,0] < 0.5), -2.5 * X[:,1], 
+            np.where(
+                (X[:,0] >= 0.5) & (X[:,0] < 0.75), 2.5 * X[:,1], 
+                7.5 * X[:,1]
+            )
+        )
+    )
+    
+    # Trigonometric term
+    trig_term = 2 * np.sin(X[:, 2] * 2 * np.pi) - 2 * np.cos(X[:, 3] * 2 * np.pi)
+    
+    # Prognostic effect
+    mu_x = plm_term + trig_term
+    
+    # Propensity score
+    pi_x = 0.8 * norm.cdf((3 * mu_x / np.std(mu_x)) - 0.5 * X[:, 0]) + 0.05 + rng.uniform(0, 1, n) / 10
+    
+    # Treatment assignment
+    Z = rng.binomial(1, pi_x, n)
+    
+    # Treatment effect
+    tau_x = 1 + 2 * X[:, 1] * X[:, 3]
+    
+    # Outcome
+    f_XZ = mu_x + tau_x * Z
+    noise_sd = np.std(f_XZ) / snr
+    y = f_XZ + rng.normal(0, noise_sd, n)
+    
+    return {
+        'covariates': X,
+        'treatment': Z,
+        'outcome': y,
+        'propensity': pi_x,
+        'prognostic_effect': mu_x,
+        'treatment_effect': tau_x,
+        'conditional_mean': f_XZ,
+        'rfx_group_ids': None,
+        'rfx_basis': None
+    }
+
+
+def dgp2(n: int, p: int, snr: float) -> Dict:
+    rng = np.random.default_rng()
+    
+    # Covariates
+    X = rng.uniform(0, 1, size=(n, p))
+    
+    # Propensity scores (multivariate)
+    pi_x = np.column_stack([
+        0.125 + 0.75 * X[:, 0],
+        0.875 - 0.75 * X[:, 1]
+    ])
+    
+    # Prognostic effect
+    mu_x = pi_x[:, 0] * 5 + pi_x[:, 1] * 2 + 2 * X[:, 2]
+    
+    # Treatment effects (multivariate)
+    tau_x = np.column_stack([
+        X[:, 1] * 2,
+        X[:, 2] * 2
+    ])
+    
+    # Treatment assignment (multivariate)
+    Z = np.column_stack([
+        rng.binomial(1, pi_x[:, 0], n),
+        rng.binomial(1, pi_x[:, 1], n)
+    ])
+    
+    # Outcome
+    f_XZ = mu_x + np.sum(Z * tau_x, axis=1)
+    noise_sd = np.std(f_XZ) / snr
+    y = f_XZ + rng.normal(0, noise_sd, n)
+    
+    return {
+        'covariates': X,
+        'treatment': Z,
+        'outcome': y,
+        'propensity': pi_x,
+        'prognostic_effect': mu_x,
+        'treatment_effect': tau_x,
+        'conditional_mean': f_XZ,
+        'rfx_group_ids': None,
+        'rfx_basis': None
+    }
+
+
+def dgp3(n: int, p: int, snr: float) -> Dict:
+    rng = np.random.default_rng()
+    
+    # Covariates
+    X = rng.normal(0, 1, size=(n, p))
+    
+    # Piecewise linear term
+    plm_term = np.where(
+        (X[:,0] >= 0.0) & (X[:,0] < 0.25), -7.5 * X[:,1], 
+        np.where(
+            (X[:,0] >= 0.25) & (X[:,0] < 0.5), -2.5 * X[:,1], 
+            np.where(
+                (X[:,0] >= 0.5) & (X[:,0] < 0.75), 2.5 * X[:,1], 
+                7.5 * X[:,1]
+            )
+        )
+    )
+    
+    # Trigonometric term
+    trig_term = 2 * np.sin(X[:, 2] * 2 * np.pi) - 2 * np.cos(X[:, 3] * 2 * np.pi)
+    
+    # Prognostic effect
+    mu_x = plm_term + trig_term
+    
+    # Propensity score
+    pi_x = 0.8 * norm.cdf((3 * mu_x / np.std(mu_x)) - 0.5 * X[:, 0]) + 0.05 + rng.uniform(0, 1, n) / 10
+    
+    # Treatment assignment
+    Z = rng.binomial(1, pi_x, n)
+    
+    # Treatment effect
+    tau_x = 1 + 2 * X[:, 1] * X[:, 3]
+    
+    # Random effects
+    rfx_group_ids = rng.choice([0, 1, 2], size=n, replace=True)
+    rfx_coefs = np.array([[-5, -3, -1], [5, 3, 1]]).T
+    rfx_basis = np.column_stack([np.ones(n), rng.uniform(-1, 1, n)])
+    rfx_term = np.sum(rfx_coefs[rfx_group_ids] * rfx_basis, axis=1)
+    
+    # Outcome
+    f_XZ = mu_x + tau_x * Z + rfx_term
+    noise_sd = np.std(f_XZ) / snr
+    y = f_XZ + rng.normal(0, noise_sd, n)
+    
+    return {
+        'covariates': X,
+        'treatment': Z,
+        'outcome': y,
+        'propensity': pi_x,
+        'prognostic_effect': mu_x,
+        'treatment_effect': tau_x,
+        'conditional_mean': f_XZ,
+        'rfx_group_ids': rfx_group_ids,
+        'rfx_basis': rfx_basis
+    }
+
+
+def dgp4(n: int, p: int, snr: float) -> Dict:
+    rng = np.random.default_rng()
+    
+    # Covariates
+    X = rng.uniform(0, 1, size=(n, p))
+    
+    # Propensity scores (multivariate)
+    pi_x = np.column_stack([
+        0.125 + 0.75 * X[:, 0],
+        0.875 - 0.75 * X[:, 1]
+    ])
+    
+    # Prognostic effect
+    mu_x = pi_x[:, 0] * 5 + pi_x[:, 1] * 2 + 2 * X[:, 2]
+    
+    # Treatment effects (multivariate)
+    tau_x = np.column_stack([
+        X[:, 1] * 2,
+        X[:, 2] * 2
+    ])
+    
+    # Treatment assignment (multivariate)
+    Z = np.column_stack([
+        rng.binomial(1, pi_x[:, 0], n),
+        rng.binomial(1, pi_x[:, 1], n)
+    ])
+    
+    # Random effects
+    rfx_group_ids = rng.choice([0, 1, 2], size=n, replace=True)
+    rfx_coefs = np.array([[-5, -3, -1], [5, 3, 1]]).T
+    rfx_basis = np.column_stack([np.ones(n), rng.uniform(-1, 1, n)])
+    rfx_term = np.sum(rfx_coefs[rfx_group_ids] * rfx_basis, axis=1)
+    
+    # Outcome
+    f_XZ = mu_x + np.sum(Z * tau_x, axis=1) + rfx_term
+    noise_sd = np.std(f_XZ) / snr
+    y = f_XZ + rng.normal(0, noise_sd, n)
+    
+    return {
+        'covariates': X,
+        'treatment': Z,
+        'outcome': y,
+        'propensity': pi_x,
+        'prognostic_effect': mu_x,
+        'treatment_effect': tau_x,
+        'conditional_mean': f_XZ,
+        'rfx_group_ids': rfx_group_ids,
+        'rfx_basis': rfx_basis
+    }
+
+
+def compute_test_train_indices(n: int, test_set_pct: float) -> Dict[str, np.ndarray]:
+    sample_inds = np.arange(n)
+    train_inds, test_inds = train_test_split(sample_inds, test_size=test_set_pct)
+    return {'test_inds': test_inds, 'train_inds': train_inds}
+
+
+def subset_data(data: np.ndarray, subset_inds: np.ndarray) -> np.ndarray:
+    if isinstance(data, np.ndarray):
+        if data.ndim == 1:
+            return data[subset_inds]
+        else:
+            return data[subset_inds, :]
+    else:
+        raise ValueError("Data must be a numpy array")
+
+
+def main():
+    # Parse command line arguments
+    if len(sys.argv) > 1:
+        n_iter = int(sys.argv[1])
+        n = int(sys.argv[2])
+        p = int(sys.argv[3])
+        num_gfr = int(sys.argv[4])
+        num_mcmc = int(sys.argv[5])
+        dgp_num = int(sys.argv[6])
+        snr = float(sys.argv[7])
+        test_set_pct = float(sys.argv[8])
+        num_threads = int(sys.argv[9])
+    else:
+        # Default arguments
+        n_iter = 5
+        n = 1000
+        p = 5
+        num_gfr = 10
+        num_mcmc = 100
+        dgp_num = 1
+        snr = 2.0
+        test_set_pct = 0.2
+        num_threads = -1
+    
+    print(f"n_iter = {n_iter}")
+    print(f"n = {n}")
+    print(f"p = {p}")
+    print(f"num_gfr = {num_gfr}")
+    print(f"num_mcmc = {num_mcmc}")
+    print(f"dgp_num = {dgp_num}")
+    print(f"snr = {snr}")
+    print(f"test_set_pct = {test_set_pct}")
+    print(f"num_threads = {num_threads}")
+    
+    # Run the performance evaluation
+    results = np.empty((n_iter, 6), dtype=float)
+    
+    for i in range(n_iter):
+        print(f"Running iteration {i+1}/{n_iter}")
+        
+        # Generate data
+        if dgp_num == 1:
+            data_dict = dgp1(n=n, p=p, snr=snr)
+        elif dgp_num == 2:
+            data_dict = dgp2(n=n, p=p, snr=snr)
+        elif dgp_num == 3:
+            data_dict = dgp3(n=n, p=p, snr=snr)
+        elif dgp_num == 4:
+            data_dict = dgp4(n=n, p=p, snr=snr)
+        else:
+            raise ValueError("Invalid DGP input")
+        
+        covariates = data_dict['covariates']
+        treatment = data_dict['treatment']
+        propensity = data_dict['propensity']
+        prognostic_effect = data_dict['prognostic_effect']
+        treatment_effect = data_dict['treatment_effect']
+        conditional_mean = data_dict['conditional_mean']
+        outcome = data_dict['outcome']
+        rfx_group_ids = data_dict['rfx_group_ids']
+        rfx_basis = data_dict['rfx_basis']
+        
+        # Check if multivariate treatment
+        has_multivariate_treatment = dgp_num in [2, 4]
+        
+        # Split into train / test sets
+        subset_inds_dict = compute_test_train_indices(n, test_set_pct)
+        test_inds = subset_inds_dict['test_inds']
+        train_inds = subset_inds_dict['train_inds']
+        covariates_train = subset_data(covariates, train_inds)
+        covariates_test = subset_data(covariates, test_inds)
+        treatment_train = subset_data(treatment, train_inds)
+        treatment_test = subset_data(treatment, test_inds)
+        propensity_train = subset_data(propensity, train_inds)
+        propensity_test = subset_data(propensity, test_inds)
+        outcome_train = subset_data(outcome, train_inds)
+        outcome_test = subset_data(outcome, test_inds)
+        prognostic_effect_train = subset_data(prognostic_effect, train_inds)
+        prognostic_effect_test = subset_data(prognostic_effect, test_inds)
+        treatment_effect_train = subset_data(treatment_effect, train_inds)
+        treatment_effect_test = subset_data(treatment_effect, test_inds)
+        conditional_mean_train = subset_data(conditional_mean, train_inds)
+        conditional_mean_test = subset_data(conditional_mean, test_inds)
+        has_rfx = rfx_group_ids is not None
+        if has_rfx:
+            rfx_group_ids_train = subset_data(rfx_group_ids, train_inds)
+            rfx_group_ids_test = subset_data(rfx_group_ids, test_inds)
+            rfx_basis_train = subset_data(rfx_basis, train_inds)
+            rfx_basis_test = subset_data(rfx_basis, test_inds)
+        else:
+            rfx_group_ids_train = None
+            rfx_group_ids_test = None
+            rfx_basis_train = None
+            rfx_basis_test = None
+        
+        # Run (and time) BCF
+        start_time = time.time()
+        
+        # Sample BCF model
+        general_params = {'num_threads': num_threads, 'adaptive_coding': False}
+        prognostic_forest_params = {'sample_sigma2_leaf': False}
+        treatment_effect_forest_params = {'sample_sigma2_leaf': False}
+        
+        bcf_model = BCFModel()
+        bcf_model.sample(
+            X_train=covariates_train,
+            Z_train=treatment_train,
+            y_train=outcome_train,
+            pi_train=propensity_train,
+            rfx_group_ids_train=rfx_group_ids_train,
+            rfx_basis_train=rfx_basis_train,
+            num_gfr=num_gfr,
+            num_mcmc=num_mcmc,
+            general_params=general_params,
+            prognostic_forest_params=prognostic_forest_params,
+            treatment_effect_forest_params=treatment_effect_forest_params
+        )
+        
+        # Predict on the test set
+        test_preds = bcf_model.predict(
+            X=covariates_test,
+            Z=treatment_test,
+            propensity=propensity_test,
+            rfx_group_ids=rfx_group_ids_test,
+            rfx_basis=rfx_basis_test
+        )
+        
+        bcf_timing = time.time() - start_time
+        
+        # Compute test set evaluations
+        y_hat_posterior = test_preds['y_hat']
+        tau_hat_posterior = test_preds['tau_hat']
+        
+        y_hat_posterior_mean = np.mean(y_hat_posterior, axis=1)
+        if has_multivariate_treatment:
+            # For multivariate treatment, tau_hat_posterior has shape (n_test, n_samples, n_treatments)
+            # We want to average over the samples (axis 1) to get (n_test, n_treatments)
+            tau_hat_posterior_mean = np.mean(tau_hat_posterior, axis=1)
+        else:
+            # For univariate treatment, tau_hat_posterior has shape (n_test, n_samples)
+            # We want to average over the samples (axis 1) to get (n_test,)
+            tau_hat_posterior_mean = np.mean(tau_hat_posterior, axis=1)
+        
+        # Outcome RMSE and coverage
+        y_hat_rmse_test = np.sqrt(np.mean((y_hat_posterior_mean - conditional_mean_test) ** 2))
+        y_hat_posterior_quantile_025 = np.percentile(y_hat_posterior, 2.5, axis=1)
+        y_hat_posterior_quantile_975 = np.percentile(y_hat_posterior, 97.5, axis=1)
+        
+        y_hat_covered = np.logical_and(
+            conditional_mean_test >= y_hat_posterior_quantile_025,
+            conditional_mean_test <= y_hat_posterior_quantile_975
+        )
+        y_hat_coverage_test = np.mean(y_hat_covered)
+        
+        # Treatment effect RMSE and coverage
+        tau_hat_rmse_test = np.sqrt(np.mean((tau_hat_posterior_mean - treatment_effect_test) ** 2))
+        
+        if has_multivariate_treatment:
+            # For multivariate treatment, compute percentiles over samples (axis 1)
+            tau_hat_posterior_quantile_025 = np.percentile(tau_hat_posterior, 2.5, axis=1)
+            tau_hat_posterior_quantile_975 = np.percentile(tau_hat_posterior, 97.5, axis=1)
+            tau_hat_covered = np.logical_and(
+                treatment_effect_test >= tau_hat_posterior_quantile_025,
+                treatment_effect_test <= tau_hat_posterior_quantile_975
+            )
+        else:
+            # For univariate treatment, compute percentiles over samples (axis 1)
+            tau_hat_posterior_quantile_025 = np.percentile(tau_hat_posterior, 2.5, axis=1)
+            tau_hat_posterior_quantile_975 = np.percentile(tau_hat_posterior, 97.5, axis=1)
+            tau_hat_covered = np.logical_and(
+                treatment_effect_test >= tau_hat_posterior_quantile_025,
+                treatment_effect_test <= tau_hat_posterior_quantile_975
+            )
+        
+        tau_hat_coverage_test = np.mean(tau_hat_covered)
+        
+        # Store evaluations
+        results[i, :] = [i+1, y_hat_rmse_test, y_hat_coverage_test, tau_hat_rmse_test, tau_hat_coverage_test, bcf_timing]
+    
+    # Wrangle and save results to CSV
+    results_df = pd.DataFrame({
+        'n': n,
+        'p': p,
+        'num_gfr': num_gfr,
+        'num_mcmc': num_mcmc,
+        'dgp_num': dgp_num,
+        'snr': snr,
+        'test_set_pct': test_set_pct,
+        'num_threads': num_threads,
+        'iter': results[:, 0],
+        'outcome_rmse': results[:, 1],
+        'outcome_coverage': results[:, 2],
+        'treatment_effect_rmse': results[:, 3],
+        'treatment_effect_coverage': results[:, 4],
+        'runtime': results[:, 5]
+    })
+    
+    snr_rounded = int(snr)
+    test_set_pct_rounded = int(test_set_pct * 100)
+    num_threads_clean = 0 if num_threads < 0 else num_threads
+    
+    filename = f"stochtree_bcf_python_n_{n}_p_{p}_num_gfr_{num_gfr}_num_mcmc_{num_mcmc}_dgp_num_{dgp_num}_snr_{snr_rounded}_test_set_pct_{test_set_pct_rounded}_num_threads_{num_threads_clean}.csv"
+    output_dir = "tools/regression/bcf/stochtree_bcf_python_results"
+    filename_full = os.path.join(output_dir, filename)
+    results_df.to_csv(filename_full, index=False)
+    print(f"Results saved to {filename_full}")
+
+
+if __name__ == "__main__":
+    main() 
\ No newline at end of file
diff --git a/tools/regression/bcf/regression_test_analysis_bcf.R b/tools/regression/bcf/regression_test_analysis_bcf.R
new file mode 100644
index 00000000..935832bb
--- /dev/null
+++ b/tools/regression/bcf/regression_test_analysis_bcf.R
@@ -0,0 +1,16 @@
+reg_test_dir <- "tools/regression/bcf/stochtree_bcf_r_results"
+reg_test_files <- list.files(reg_test_dir, pattern = ".csv", full.names = T)
+
+reg_test_df <- data.frame()
+for (file in reg_test_files) {
+    temp_df <- read.csv(file)
+    reg_test_df <- rbind(reg_test_df, temp_df)
+}
+
+summary_df <- aggregate(
+    cbind(outcome_rmse, outcome_coverage, treatment_effect_rmse, treatment_effect_coverage, runtime) ~ n + p + num_gfr + num_mcmc + dgp_num + snr + test_set_pct + num_threads, 
+    data = reg_test_df, FUN = median, drop = TRUE
+)
+
+summary_file_output <- file.path(reg_test_dir, "stochtree_bcf_r_summary.csv")
+write.csv(summary_df, summary_file_output, row.names = F)
diff --git a/tools/regression/bcf/regression_test_analysis_bcf.py b/tools/regression/bcf/regression_test_analysis_bcf.py
new file mode 100644
index 00000000..4cabb82f
--- /dev/null
+++ b/tools/regression/bcf/regression_test_analysis_bcf.py
@@ -0,0 +1,46 @@
+import pandas as pd
+import os
+import glob
+
+
+def main():
+    # Define the directory containing results
+    reg_test_dir = "tools/regression/bcf/stochtree_bcf_python_results"
+    
+    # Get all CSV files in the directory
+    reg_test_files = glob.glob(os.path.join(reg_test_dir, "*.csv"))
+    
+    # Read and combine all results
+    reg_test_df = pd.DataFrame()
+    for file in reg_test_files:
+        temp_df = pd.read_csv(file)
+        reg_test_df = pd.concat([reg_test_df, temp_df], ignore_index=True)
+    
+    # Create summary by aggregating results
+    summary_df = reg_test_df.groupby([
+        'n', 'p', 'num_gfr', 'num_mcmc', 'dgp_num', 'snr', 'test_set_pct', 'num_threads'
+    ]).agg({
+        'outcome_rmse': 'median',
+        'outcome_coverage': 'median',
+        'treatment_effect_rmse': 'median',
+        'treatment_effect_coverage': 'median',
+        'runtime': 'median'
+    }).reset_index()
+    
+    # Save summary to CSV
+    summary_file_output = os.path.join(reg_test_dir, "stochtree_bcf_python_summary.csv")
+    summary_df.to_csv(summary_file_output, index=False)
+    print(f"Summary saved to {summary_file_output}")
+    
+    # Print some basic statistics
+    print(f"Total number of result files: {len(reg_test_files)}")
+    print(f"Total number of iterations: {len(reg_test_df)}")
+    print(f"Number of unique parameter combinations: {len(summary_df)}")
+    
+    # Print summary statistics
+    print("\nSummary statistics:")
+    print(summary_df.describe())
+
+
+if __name__ == "__main__":
+    main() 
\ No newline at end of file
diff --git a/tools/regression/bcf/regression_test_dispatch_bcf.R b/tools/regression/bcf/regression_test_dispatch_bcf.R
new file mode 100644
index 00000000..6e5abf7b
--- /dev/null
+++ b/tools/regression/bcf/regression_test_dispatch_bcf.R
@@ -0,0 +1,28 @@
+# Test case parameters
+dgps <- 1:4
+ns <- c(1000, 10000)
+ps <- c(5, 20)
+threads <- c(-1, 1)
+varying_param_grid <- expand.grid(dgps, ns, ps, threads)
+test_case_grid <- cbind(
+    5, varying_param_grid[,2], varying_param_grid[,3], 
+    10, 100, varying_param_grid[,1], 2.0, 0.2, varying_param_grid[,4]
+)
+
+# Run script for every case
+script_path <- "tools/regression/bcf/individual_regression_test_bcf.R"
+for (i in 1:nrow(test_case_grid)) {
+    n_iter <- test_case_grid[i,1]
+    n <- test_case_grid[i,2]
+    p <- test_case_grid[i,3]
+    num_gfr <- test_case_grid[i,4]
+    num_mcmc <- test_case_grid[i,5]
+    dgp_num <- test_case_grid[i,6]
+    snr <- test_case_grid[i,7]
+    test_set_pct <- test_case_grid[i,8]
+    num_threads <- test_case_grid[i,9]
+    system2(
+        "Rscript", 
+        args = c(script_path, n_iter, n, p, num_gfr, num_mcmc, dgp_num, snr, test_set_pct, num_threads)
+    )
+}
diff --git a/tools/regression/bcf/regression_test_dispatch_bcf.py b/tools/regression/bcf/regression_test_dispatch_bcf.py
new file mode 100644
index 00000000..ddf7d64c
--- /dev/null
+++ b/tools/regression/bcf/regression_test_dispatch_bcf.py
@@ -0,0 +1,69 @@
+import subprocess
+import sys
+
+
+def main():
+    # Test case parameters
+    dgps = [1, 2, 3, 4]
+    ns = [1000, 10000]
+    ps = [5, 20]
+    threads = [-1, 1]
+    
+    # Create parameter grid
+    varying_param_grid = []
+    for dgp in dgps:
+        for n in ns:
+            for p in ps:
+                for thread in threads:
+                    varying_param_grid.append([dgp, n, p, thread])
+    
+    # Fixed parameters
+    n_iter = 5
+    num_gfr = 10
+    num_mcmc = 100
+    snr = 2.0
+    test_set_pct = 0.2
+    
+    # Script path
+    script_path = "tools/regression/bcf/individual_regression_test_bcf.py"
+    
+    # Run script for every case
+    for i, params in enumerate(varying_param_grid):
+        dgp_num, n, p, num_threads = params
+        
+        print(f"Running test case {i+1}/{len(varying_param_grid)}:")
+        print(f"  DGP: {dgp_num}, n: {n}, p: {p}, threads: {num_threads}")
+        
+        # Construct command
+        cmd = [
+            sys.executable,  # Use current Python interpreter
+            script_path,
+            str(n_iter),
+            str(n),
+            str(p),
+            str(num_gfr),
+            str(num_mcmc),
+            str(dgp_num),
+            str(snr),
+            str(test_set_pct),
+            str(num_threads)
+        ]
+        
+        # Run the command
+        try:
+            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+            print("  Completed successfully")
+            if result.stdout:
+                print(f"    Output: {result.stdout.strip()}")
+        except subprocess.CalledProcessError as e:
+            print(f"  Failed with error code {e.returncode}")
+            if e.stderr:
+                print(f"    Error: {e.stderr.strip()}")
+            if e.stdout:
+                print(f"    Output: {e.stdout.strip()}")
+    
+    print(f"\nCompleted {len(varying_param_grid)} test cases")
+
+
+if __name__ == "__main__":
+    main() 
\ No newline at end of file
diff --git a/tools/regression/bcf/stochtree_bcf_python_results/.gitkeep b/tools/regression/bcf/stochtree_bcf_python_results/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/tools/regression/bcf/stochtree_bcf_r_results/.gitkeep b/tools/regression/bcf/stochtree_bcf_r_results/.gitkeep
new file mode 100644
index 00000000..e69de29b