From 7b1f17164be9c5f506ff0594850da3c9100e7000 Mon Sep 17 00:00:00 2001 From: BehramUlukir Date: Wed, 30 Jul 2025 20:42:36 +0300 Subject: [PATCH 1/3] adding discrete option to ppc_rootogram --- R/ppc-discrete.R | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/R/ppc-discrete.R b/R/ppc-discrete.R index 199c394b..28193664 100644 --- a/R/ppc-discrete.R +++ b/R/ppc-discrete.R @@ -237,7 +237,7 @@ ppc_bars_grouped <- #' ppc_rootogram <- function(y, yrep, - style = c("standing", "hanging", "suspended"), + style = c("standing", "hanging", "suspended", "discrete"), ..., prob = 0.9, size = 1) { @@ -266,6 +266,44 @@ ppc_rootogram <- function(y, } tyrep <- do.call(rbind, tyrep) tyrep[is.na(tyrep)] <- 0 + + #Discrete style + pred_mean <- colMeans(tyrep) + pred_quantile <- t(apply(tyrep, 2, quantile, probs = probs)) + colnames(pred_quantile) <- c("lower", "upper") + + # prepare a table for y + ty <- table(y) + y_count <- as.numeric(ty[match(xpos, rownames(ty))]) + y_count[is.na(y_count)] <- 0 + + if (style == "discrete") { + obs_color <- ifelse(y_count >= pred_quantile[, "lower"] & y_count <= pred_quantile[, "upper"], "blue", "red") + + data <- data.frame( + xpos = xpos, + obs = y_count, + pred_mean = pred_mean, + lower = pred_quantile[, "lower"], + upper = pred_quantile[, "upper"], + obs_color = obs_color + ) + + graph <- ggplot(data, aes(x = xpos)) + + geom_point(aes(y = obs, fill = "Observed"), size = size * 3.5, color = obs_color, shape=18) + + geom_pointrange(aes(y = pred_mean, ymin = lower + (pred_mean - lower)*0.5, ymax = upper - (upper - pred_mean)*0.5, color = "Expected"), linewidth = size, size = size, fatten = 2, alpha = 0.6) + + geom_linerange(aes(y = pred_mean, ymin = lower, ymax = upper, color = "Expected"), linewidth = size, size = size, alpha = 0.4) + + scale_y_sqrt() + + scale_fill_manual("", values = get_color("l")) + + scale_color_manual("", values = get_color("dh")) + + labs(x = expression(italic(y)), y = "Count") + + bayesplot_theme_get() + + reduce_legend_spacing(0.25) + return(graph) + } + + + #Standing, hanging, and suspended styles tyexp <- sqrt(colMeans(tyrep)) tyquantile <- sqrt(t(apply(tyrep, 2, quantile, probs = probs))) colnames(tyquantile) <- c("tylower", "tyupper") @@ -395,7 +433,7 @@ ppc_bars_data <- data <- reshape2::melt(tmp_data, id.vars = "group") %>% count(.data$group, .data$value, .data$variable) %>% - tidyr::complete(.data$group, .data$value, .data$variable, fill = list(n = 0)) %>% + tidyr::complete(.data$group, .data$value, .data$variable, fill = list(n = 0)) %>% group_by(.data$variable, .data$group) %>% mutate(proportion = .data$n / sum(.data$n)) %>% ungroup() %>% From a0034adb287b046d653cd69b573eb966ae79d976 Mon Sep 17 00:00:00 2001 From: BehramUlukir Date: Thu, 31 Jul 2025 16:13:37 +0300 Subject: [PATCH 2/3] updated visuals for discrete rootogram --- R/ppc-discrete.R | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/R/ppc-discrete.R b/R/ppc-discrete.R index 28193664..973274e4 100644 --- a/R/ppc-discrete.R +++ b/R/ppc-discrete.R @@ -198,16 +198,16 @@ ppc_bars_grouped <- fatten = 2.5, linewidth = 1, freq = TRUE) { - check_ignored_arguments(...) - call <- match.call(expand.dots = FALSE) - g <- eval(ungroup_call("ppc_bars", call), parent.frame()) - if (fixed_y(facet_args)) { - g <- g + expand_limits(y = 1.05 * max(g$data[["h"]], na.rm = TRUE)) + check_ignored_arguments(...) + call <- match.call(expand.dots = FALSE) + g <- eval(ungroup_call("ppc_bars", call), parent.frame()) + if (fixed_y(facet_args)) { + g <- g + expand_limits(y = 1.05 * max(g$data[["h"]], na.rm = TRUE)) + } + g + + bars_group_facets(facet_args) + + force_axes_in_facets() } - g + - bars_group_facets(facet_args) + - force_axes_in_facets() -} #' @rdname PPC-discrete @@ -278,7 +278,7 @@ ppc_rootogram <- function(y, y_count[is.na(y_count)] <- 0 if (style == "discrete") { - obs_color <- ifelse(y_count >= pred_quantile[, "lower"] & y_count <= pred_quantile[, "upper"], "blue", "red") + obs_shape <- ifelse(y_count >= pred_quantile[, "lower"] & y_count <= pred_quantile[, "upper"], 24, 25) data <- data.frame( xpos = xpos, @@ -286,19 +286,20 @@ ppc_rootogram <- function(y, pred_mean = pred_mean, lower = pred_quantile[, "lower"], upper = pred_quantile[, "upper"], - obs_color = obs_color + obs_shape = obs_shape ) - + # Create the graph graph <- ggplot(data, aes(x = xpos)) + - geom_point(aes(y = obs, fill = "Observed"), size = size * 3.5, color = obs_color, shape=18) + - geom_pointrange(aes(y = pred_mean, ymin = lower + (pred_mean - lower)*0.5, ymax = upper - (upper - pred_mean)*0.5, color = "Expected"), linewidth = size, size = size, fatten = 2, alpha = 0.6) + - geom_linerange(aes(y = pred_mean, ymin = lower, ymax = upper, color = "Expected"), linewidth = size, size = size, alpha = 0.4) + + geom_pointrange(aes(y = pred_mean, ymin = lower, ymax = upper, color = "Expected"), fill = get_color("d"), linewidth = size, size = size, fatten = 2, alpha = 0.65) + + geom_point(aes(y = obs, shape=ifelse(obs_shape==24, "In", "Out")), size = size * 2, color = get_color("lh"), fill = get_color("lh")) + scale_y_sqrt() + - scale_fill_manual("", values = get_color("l")) + + scale_fill_manual("", values = get_color("lh"), guide="none") + scale_color_manual("", values = get_color("dh")) + labs(x = expression(italic(y)), y = "Count") + bayesplot_theme_get() + - reduce_legend_spacing(0.25) + reduce_legend_spacing(0.25) + + scale_shape_manual(values = c("Out"=24, "In"=25), guide = "legend") + + guides(shape = guide_legend(" Observation \n within bounds")) return(graph) } From 8abdf26fea6acbb243d75e595ea53f4a11549d38 Mon Sep 17 00:00:00 2001 From: BehramUlukir Date: Mon, 4 Aug 2025 19:56:38 +0300 Subject: [PATCH 3/3] updating discrete style, documentation, and adding tests --- R/ppc-discrete.R | 45 ++++++++--- man/PPC-discrete.Rd | 18 +++-- ...iscrete-prob-size-bound-distinct-false.svg | 79 ++++++++++++++++++ ...ppc-rootogram-style-discrete-prob-size.svg | 81 +++++++++++++++++++ tests/testthat/test-ppc-discrete.R | 26 ++++++ 5 files changed, 231 insertions(+), 18 deletions(-) create mode 100644 tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size-bound-distinct-false.svg create mode 100644 tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size.svg diff --git a/R/ppc-discrete.R b/R/ppc-discrete.R index 973274e4..5fe8170f 100644 --- a/R/ppc-discrete.R +++ b/R/ppc-discrete.R @@ -21,9 +21,12 @@ #' @param size,fatten,linewidth For bar plots, `size`, `fatten`, and `linewidth` #' are passed to [ggplot2::geom_pointrange()] to control the appearance of the #' `yrep` points and intervals. For rootograms `size` is passed to -#' [ggplot2::geom_line()]. +#' [ggplot2::geom_line()] and [ggplot2::geom_pointrange()]. #' @param freq For bar plots only, if `TRUE` (the default) the y-axis will #' display counts. Setting `freq=FALSE` will put proportions on the y-axis. +#' @param bound_distinct For `ppc_rootogram(style = "discrete)`, +#' if `TRUE` then the observed counts will be plotted with different shapes +#' depending on whether they are within the bounds of the expected quantiles. #' #' @template return-ggplot-or-data #' @@ -52,10 +55,12 @@ #' style can be adjusted to focus on different aspects of the data: #' * _Standing_: basic histogram of observed counts with curve #' showing expected counts. -#' * _Hanging_: observed counts counts hanging from the curve +#' * _Hanging_: observed counts hanging from the curve #' representing expected counts. #' * _Suspended_: histogram of the differences between expected and #' observed counts. +#' * _Discrete_: a dot-and-whisker plot of the expected counts and dots +#' representing observed counts #' #' **All of the rootograms are plotted on the square root scale**. See Kleiber #' and Zeileis (2016) for advice on interpreting rootograms and selecting @@ -213,7 +218,7 @@ ppc_bars_grouped <- #' @rdname PPC-discrete #' @export #' @param style For `ppc_rootogram`, a string specifying the rootogram -#' style. The options are `"standing"`, `"hanging"`, and +#' style. The options are `"discrete", "standing"`, `"hanging"`, and #' `"suspended"`. See the **Plot Descriptions** section, below, for #' details on the different styles. #' @@ -234,13 +239,15 @@ ppc_bars_grouped <- #' #' ppc_rootogram(y, yrep, style = "hanging", prob = 0.8) #' ppc_rootogram(y, yrep, style = "suspended") +#' ppc_rootogram(y, yrep, style = "discrete") #' ppc_rootogram <- function(y, yrep, style = c("standing", "hanging", "suspended", "discrete"), ..., prob = 0.9, - size = 1) { + size = 1, + bound_distinct = TRUE) { check_ignored_arguments(...) style <- match.arg(style) y <- validate_y(y) @@ -268,7 +275,7 @@ ppc_rootogram <- function(y, tyrep[is.na(tyrep)] <- 0 #Discrete style - pred_mean <- colMeans(tyrep) + pred_median <- apply(tyrep, 2, median) pred_quantile <- t(apply(tyrep, 2, quantile, probs = probs)) colnames(pred_quantile) <- c("lower", "upper") @@ -278,28 +285,40 @@ ppc_rootogram <- function(y, y_count[is.na(y_count)] <- 0 if (style == "discrete") { - obs_shape <- ifelse(y_count >= pred_quantile[, "lower"] & y_count <= pred_quantile[, "upper"], 24, 25) + if (bound_distinct) { + # If the observed count is within the bounds of the predicted quantiles, + # use a different shape for the point + obs_shape <- obs_shape <- ifelse(y_count >= pred_quantile[, "lower"] & y_count <= pred_quantile[, "upper"], "In", "Out") + } else { + obs_shape <- rep("Observed", length(y_count)) # all points are the same shape for obsved + } data <- data.frame( xpos = xpos, obs = y_count, - pred_mean = pred_mean, + pred_median = pred_median, lower = pred_quantile[, "lower"], upper = pred_quantile[, "upper"], obs_shape = obs_shape ) # Create the graph graph <- ggplot(data, aes(x = xpos)) + - geom_pointrange(aes(y = pred_mean, ymin = lower, ymax = upper, color = "Expected"), fill = get_color("d"), linewidth = size, size = size, fatten = 2, alpha = 0.65) + - geom_point(aes(y = obs, shape=ifelse(obs_shape==24, "In", "Out")), size = size * 2, color = get_color("lh"), fill = get_color("lh")) + + geom_pointrange(aes(y = pred_median, ymin = lower, ymax = upper, color = "Expected"), fill = get_color("lh"), linewidth = size, size = size, fatten = 2, alpha = 1) + + geom_point(aes(y = obs, shape = obs_shape), size = size * 1.5, color = get_color("d"), fill = get_color("d")) + scale_y_sqrt() + - scale_fill_manual("", values = get_color("lh"), guide="none") + - scale_color_manual("", values = get_color("dh")) + + scale_fill_manual("", values = get_color("d"), guide="none") + + scale_color_manual("", values = get_color("lh")) + labs(x = expression(italic(y)), y = "Count") + bayesplot_theme_get() + reduce_legend_spacing(0.25) + - scale_shape_manual(values = c("Out"=24, "In"=25), guide = "legend") + - guides(shape = guide_legend(" Observation \n within bounds")) + scale_shape_manual(values = c("In" = 22, "Out" = 23, "Observed" = 22), guide = "legend") + if (bound_distinct) { + graph <- graph + + guides(shape = guide_legend(" Observation \n within bounds")) + } else { + graph <- graph + + guides(shape = guide_legend("")) + } return(graph) } diff --git a/man/PPC-discrete.Rd b/man/PPC-discrete.Rd index 434ba7bd..bff46d34 100644 --- a/man/PPC-discrete.Rd +++ b/man/PPC-discrete.Rd @@ -37,10 +37,11 @@ ppc_bars_grouped( ppc_rootogram( y, yrep, - style = c("standing", "hanging", "suspended"), + style = c("standing", "hanging", "suspended", "discrete"), ..., prob = 0.9, - size = 1 + size = 1, + bound_distinct = TRUE ) ppc_bars_data(y, yrep, group = NULL, prob = 0.9, freq = TRUE) @@ -69,7 +70,7 @@ the bar width.} \item{size, fatten, linewidth}{For bar plots, \code{size}, \code{fatten}, and \code{linewidth} are passed to \code{\link[ggplot2:geom_linerange]{ggplot2::geom_pointrange()}} to control the appearance of the \code{yrep} points and intervals. For rootograms \code{size} is passed to -\code{\link[ggplot2:geom_path]{ggplot2::geom_line()}}.} +\code{\link[ggplot2:geom_path]{ggplot2::geom_line()}} and \code{\link[ggplot2:geom_linerange]{ggplot2::geom_pointrange()}}.} \item{freq}{For bar plots only, if \code{TRUE} (the default) the y-axis will display counts. Setting \code{freq=FALSE} will put proportions on the y-axis.} @@ -83,9 +84,13 @@ to the corresponding observation.} passed to \code{\link[ggplot2:facet_wrap]{ggplot2::facet_wrap()}} to control faceting.} \item{style}{For \code{ppc_rootogram}, a string specifying the rootogram -style. The options are \code{"standing"}, \code{"hanging"}, and +style. The options are \verb{"discrete", "standing"}, \code{"hanging"}, and \code{"suspended"}. See the \strong{Plot Descriptions} section, below, for details on the different styles.} + +\item{bound_distinct}{For \verb{ppc_rootogram(style = "discrete)}, +if \code{TRUE} then the observed counts will be plotted with different shapes +depending on whether they are within the bounds of the expected quantiles.} } \value{ The plotting functions return a ggplot object that can be further @@ -129,10 +134,12 @@ style can be adjusted to focus on different aspects of the data: \itemize{ \item \emph{Standing}: basic histogram of observed counts with curve showing expected counts. -\item \emph{Hanging}: observed counts counts hanging from the curve +\item \emph{Hanging}: observed counts hanging from the curve representing expected counts. \item \emph{Suspended}: histogram of the differences between expected and observed counts. +\item \emph{Discrete}: a dot-and-whisker plot of the expected counts and dots +representing observed counts } \strong{All of the rootograms are plotted on the square root scale}. See Kleiber @@ -206,6 +213,7 @@ ppc_rootogram(y, yrep, prob = 0) ppc_rootogram(y, yrep, style = "hanging", prob = 0.8) ppc_rootogram(y, yrep, style = "suspended") +ppc_rootogram(y, yrep, style = "discrete") } \references{ diff --git a/tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size-bound-distinct-false.svg b/tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size-bound-distinct-false.svg new file mode 100644 index 00000000..dfe3c771 --- /dev/null +++ b/tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size-bound-distinct-false.svg @@ -0,0 +1,79 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +4 +8 +12 + + + + + + + + + + + +0 +1 +2 +3 +4 +5 +y +Count + +Observed + + +Expected +ppc_rootogram (style='discrete', prob, size, bound_distinct=FALSE) + + diff --git a/tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size.svg b/tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size.svg new file mode 100644 index 00000000..67c9d8a8 --- /dev/null +++ b/tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size.svg @@ -0,0 +1,81 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +5 +10 + + + + + + + + + + +0 +1 +2 +3 +4 +5 +y +Count + Observation + within bounds + + +In +Out + + +Expected +ppc_rootogram (style='discrete', prob, size) + + diff --git a/tests/testthat/test-ppc-discrete.R b/tests/testthat/test-ppc-discrete.R index a5ddb622..b91f787e 100644 --- a/tests/testthat/test-ppc-discrete.R +++ b/tests/testthat/test-ppc-discrete.R @@ -87,6 +87,7 @@ test_that("ppc_rootogram returns a ggplot object", { expect_gg(ppc_rootogram(y2, yrep2)) expect_gg(ppc_rootogram(y2, yrep3, style = "hanging", prob = 0.5)) expect_gg(ppc_rootogram(y2, yrep3, style = "suspended")) + expect_gg(ppc_rootogram(y2, yrep3, style = "discrete")) }) test_that("ppc_rootogram errors if y/yrep not counts", { @@ -176,5 +177,30 @@ test_that("ppc_rootogram renders correctly", { vdiffr::expect_doppelganger( title = "ppc_rootogram (style='hanging', prob, size)", fig = p_custom_hanging) + + p_discrete <- ppc_rootogram( + y = vdiff_y2, + yrep = vdiff_yrep2, + prob = 0.5, + size = 1, + style = "discrete" + ) + + vdiffr::expect_doppelganger( + title = "ppc_rootogram (style='discrete', prob, size)", + fig = p_discrete) + + p_discrete_nonbound <- ppc_rootogram( + y = vdiff_y2, + yrep = vdiff_yrep2, + prob = 0.8, + size = 1, + style = "discrete", + bound_distinct = FALSE + ) + + vdiffr::expect_doppelganger( + title = "ppc_rootogram (style='discrete', prob, size, bound_distinct=FALSE)", + fig = p_discrete_nonbound) })