diff --git a/NEWS.md b/NEWS.md index 941ef230..7ad30dab 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,7 @@ * `ppc_error_scatter_avg_vs_x(x = some_expression)` labels the *x* axis with `some_expression`. * Add `ppc_dots()` and `ppd_dots()` by @behramulukir (#357) * Add `x` argument to `ppc_error_binned` by @behramulukir (#359) +* Add `discrete` style to `ppc_rootogram` by @behramulukir (#362) # bayesplot 1.13.0 diff --git a/R/ppc-discrete.R b/R/ppc-discrete.R index 199c394b..885bc8ff 100644 --- a/R/ppc-discrete.R +++ b/R/ppc-discrete.R @@ -21,9 +21,12 @@ #' @param size,fatten,linewidth For bar plots, `size`, `fatten`, and `linewidth` #' are passed to [ggplot2::geom_pointrange()] to control the appearance of the #' `yrep` points and intervals. For rootograms `size` is passed to -#' [ggplot2::geom_line()]. +#' [ggplot2::geom_line()] and [ggplot2::geom_pointrange()]. #' @param freq For bar plots only, if `TRUE` (the default) the y-axis will #' display counts. Setting `freq=FALSE` will put proportions on the y-axis. +#' @param bound_distinct For `ppc_rootogram(style = "discrete)`, +#' if `TRUE` then the observed counts will be plotted with different shapes +#' depending on whether they are within the bounds of the `y` quantiles. #' #' @template return-ggplot-or-data #' @@ -44,18 +47,26 @@ #' } #' \item{`ppc_rootogram()`}{ #' Rootograms allow for diagnosing problems in count data models such as -#' overdispersion or excess zeros. They consist of a histogram of `y` with the -#' expected counts based on `yrep` overlaid as a line along with uncertainty -#' intervals. The y-axis represents the square roots of the counts to +#' overdispersion or excess zeros. In `standing`, `hanging`, and `suspended` +#' styles, they consist of a histogram of `y` with the expected counts based on +#' `yrep` overlaid as a line along with uncertainty intervals. +#' +#' Meanwhile, in `discrete` style, median counts based on `yrep` are laid +#' as a point range with uncertainty intervals along with dots +#' representing the `y`. +#' +#' The y-axis represents the square roots of the counts to #' approximately adjust for scale differences and thus ease comparison between -#' observed and expected counts. Using the `style` argument, the histogram -#' style can be adjusted to focus on different aspects of the data: +#' observed and expected counts. Using the `style` argument, the rootogram +#' can be adjusted to focus on different aspects of the data: #' * _Standing_: basic histogram of observed counts with curve #' showing expected counts. -#' * _Hanging_: observed counts counts hanging from the curve +#' * _Hanging_: observed counts hanging from the curve #' representing expected counts. #' * _Suspended_: histogram of the differences between expected and #' observed counts. +#' * _Discrete_: a dot-and-whisker plot of the median counts and +#' dots representing observed counts. #' #' **All of the rootograms are plotted on the square root scale**. See Kleiber #' and Zeileis (2016) for advice on interpreting rootograms and selecting @@ -198,22 +209,22 @@ ppc_bars_grouped <- fatten = 2.5, linewidth = 1, freq = TRUE) { - check_ignored_arguments(...) - call <- match.call(expand.dots = FALSE) - g <- eval(ungroup_call("ppc_bars", call), parent.frame()) - if (fixed_y(facet_args)) { - g <- g + expand_limits(y = 1.05 * max(g$data[["h"]], na.rm = TRUE)) + check_ignored_arguments(...) + call <- match.call(expand.dots = FALSE) + g <- eval(ungroup_call("ppc_bars", call), parent.frame()) + if (fixed_y(facet_args)) { + g <- g + expand_limits(y = 1.05 * max(g$data[["h"]], na.rm = TRUE)) + } + g + + bars_group_facets(facet_args) + + force_axes_in_facets() } - g + - bars_group_facets(facet_args) + - force_axes_in_facets() -} #' @rdname PPC-discrete #' @export #' @param style For `ppc_rootogram`, a string specifying the rootogram -#' style. The options are `"standing"`, `"hanging"`, and +#' style. The options are `"discrete", "standing"`, `"hanging"`, and #' `"suspended"`. See the **Plot Descriptions** section, below, for #' details on the different styles. #' @@ -234,13 +245,15 @@ ppc_bars_grouped <- #' #' ppc_rootogram(y, yrep, style = "hanging", prob = 0.8) #' ppc_rootogram(y, yrep, style = "suspended") +#' ppc_rootogram(y, yrep, style = "discrete") #' ppc_rootogram <- function(y, yrep, - style = c("standing", "hanging", "suspended"), + style = c("standing", "hanging", "suspended", "discrete"), ..., prob = 0.9, - size = 1) { + size = 1, + bound_distinct = TRUE) { check_ignored_arguments(...) style <- match.arg(style) y <- validate_y(y) @@ -266,6 +279,57 @@ ppc_rootogram <- function(y, } tyrep <- do.call(rbind, tyrep) tyrep[is.na(tyrep)] <- 0 + + #Discrete style + pred_median <- apply(tyrep, 2, median) + pred_quantile <- t(apply(tyrep, 2, quantile, probs = probs)) + colnames(pred_quantile) <- c("lower", "upper") + + # prepare a table for y + ty <- table(y) + y_count <- as.numeric(ty[match(xpos, rownames(ty))]) + y_count[is.na(y_count)] <- 0 + + if (style == "discrete") { + if (bound_distinct) { + # If the observed count is within the bounds of the predicted quantiles, + # use a different shape for the point + obs_shape <- obs_shape <- ifelse(y_count >= pred_quantile[, "lower"] & y_count <= pred_quantile[, "upper"], "In", "Out") + } else { + obs_shape <- rep("y", length(y_count)) # all points are the same shape for observed + } + + data <- data.frame( + xpos = xpos, + obs = y_count, + pred_median = pred_median, + lower = pred_quantile[, "lower"], + upper = pred_quantile[, "upper"], + obs_shape = obs_shape + ) + # Create the graph + graph <- ggplot(data, aes(x = xpos)) + + geom_pointrange(aes(y = pred_median, ymin = lower, ymax = upper, color = "y_rep"), fill = get_color("lh"), linewidth = size, size = size, fatten = 2, alpha = 1) + + geom_point(aes(y = obs, shape = obs_shape), size = size * 1.5, color = get_color("d"), fill = get_color("d")) + + scale_y_sqrt() + + scale_fill_manual("", values = get_color("d"), guide="none") + + scale_color_manual("", values = get_color("lh"), labels = yrep_label()) + + labs(x = expression(italic(y)), y = "Count") + + bayesplot_theme_get() + + reduce_legend_spacing(0.25) + + scale_shape_manual(values = c("In" = 22, "Out" = 23, "y" = 22), guide = "legend", labels = c("y" = expression(italic(y)))) + if (bound_distinct) { + graph <- graph + + guides(shape = guide_legend(expression(italic(y)~within~bounds))) + } else { + graph <- graph + + guides(shape = guide_legend(" ")) + } + return(graph) + } + + + #Standing, hanging, and suspended styles tyexp <- sqrt(colMeans(tyrep)) tyquantile <- sqrt(t(apply(tyrep, 2, quantile, probs = probs))) colnames(tyquantile) <- c("tylower", "tyupper") @@ -395,7 +459,7 @@ ppc_bars_data <- data <- reshape2::melt(tmp_data, id.vars = "group") %>% count(.data$group, .data$value, .data$variable) %>% - tidyr::complete(.data$group, .data$value, .data$variable, fill = list(n = 0)) %>% + tidyr::complete(.data$group, .data$value, .data$variable, fill = list(n = 0)) %>% group_by(.data$variable, .data$group) %>% mutate(proportion = .data$n / sum(.data$n)) %>% ungroup() %>% diff --git a/man/PPC-discrete.Rd b/man/PPC-discrete.Rd index 434ba7bd..11d26ede 100644 --- a/man/PPC-discrete.Rd +++ b/man/PPC-discrete.Rd @@ -37,10 +37,11 @@ ppc_bars_grouped( ppc_rootogram( y, yrep, - style = c("standing", "hanging", "suspended"), + style = c("standing", "hanging", "suspended", "discrete"), ..., prob = 0.9, - size = 1 + size = 1, + bound_distinct = TRUE ) ppc_bars_data(y, yrep, group = NULL, prob = 0.9, freq = TRUE) @@ -69,7 +70,7 @@ the bar width.} \item{size, fatten, linewidth}{For bar plots, \code{size}, \code{fatten}, and \code{linewidth} are passed to \code{\link[ggplot2:geom_linerange]{ggplot2::geom_pointrange()}} to control the appearance of the \code{yrep} points and intervals. For rootograms \code{size} is passed to -\code{\link[ggplot2:geom_path]{ggplot2::geom_line()}}.} +\code{\link[ggplot2:geom_path]{ggplot2::geom_line()}} and \code{\link[ggplot2:geom_linerange]{ggplot2::geom_pointrange()}}.} \item{freq}{For bar plots only, if \code{TRUE} (the default) the y-axis will display counts. Setting \code{freq=FALSE} will put proportions on the y-axis.} @@ -83,9 +84,13 @@ to the corresponding observation.} passed to \code{\link[ggplot2:facet_wrap]{ggplot2::facet_wrap()}} to control faceting.} \item{style}{For \code{ppc_rootogram}, a string specifying the rootogram -style. The options are \code{"standing"}, \code{"hanging"}, and +style. The options are \verb{"discrete", "standing"}, \code{"hanging"}, and \code{"suspended"}. See the \strong{Plot Descriptions} section, below, for details on the different styles.} + +\item{bound_distinct}{For \verb{ppc_rootogram(style = "discrete)}, +if \code{TRUE} then the observed counts will be plotted with different shapes +depending on whether they are within the bounds of the \code{y} quantiles.} } \value{ The plotting functions return a ggplot object that can be further @@ -120,19 +125,27 @@ level of a grouping variable. } \item{\code{ppc_rootogram()}}{ Rootograms allow for diagnosing problems in count data models such as -overdispersion or excess zeros. They consist of a histogram of \code{y} with the -expected counts based on \code{yrep} overlaid as a line along with uncertainty -intervals. The y-axis represents the square roots of the counts to +overdispersion or excess zeros. In \code{standing}, \code{hanging}, and \code{suspended} +styles, they consist of a histogram of \code{y} with the expected counts based on +\code{yrep} overlaid as a line along with uncertainty intervals. + +Meanwhile, in \code{discrete} style, median counts based on \code{yrep} are laid +as a point range with uncertainty intervals along with dots +representing the \code{y}. + +The y-axis represents the square roots of the counts to approximately adjust for scale differences and thus ease comparison between -observed and expected counts. Using the \code{style} argument, the histogram -style can be adjusted to focus on different aspects of the data: +observed and expected counts. Using the \code{style} argument, the rootogram +can be adjusted to focus on different aspects of the data: \itemize{ \item \emph{Standing}: basic histogram of observed counts with curve showing expected counts. -\item \emph{Hanging}: observed counts counts hanging from the curve +\item \emph{Hanging}: observed counts hanging from the curve representing expected counts. \item \emph{Suspended}: histogram of the differences between expected and observed counts. +\item \emph{Discrete}: a dot-and-whisker plot of the median counts and +dots representing observed counts. } \strong{All of the rootograms are plotted on the square root scale}. See Kleiber @@ -206,6 +219,7 @@ ppc_rootogram(y, yrep, prob = 0) ppc_rootogram(y, yrep, style = "hanging", prob = 0.8) ppc_rootogram(y, yrep, style = "suspended") +ppc_rootogram(y, yrep, style = "discrete") } \references{ diff --git a/tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size-bound-distinct-false.svg b/tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size-bound-distinct-false.svg new file mode 100644 index 00000000..589609dc --- /dev/null +++ b/tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size-bound-distinct-false.svg @@ -0,0 +1,83 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +4 +8 +12 + + + + + + + + + + + +0 +1 +2 +3 +4 +5 +y +Count + + +y + + +y +r +e +p +ppc_rootogram (style='discrete', prob, size, bound_distinct=FALSE) + + diff --git a/tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size.svg b/tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size.svg new file mode 100644 index 00000000..718cfd76 --- /dev/null +++ b/tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size.svg @@ -0,0 +1,97 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +5 +10 + + + + + + + + + + +0 +1 +2 +3 +4 +5 +y +Count +y + +w +i +t +h +i +n + +b +o +u +n +d +s + + +In +Out + + +y +r +e +p +ppc_rootogram (style='discrete', prob, size) + + diff --git a/tests/testthat/test-ppc-discrete.R b/tests/testthat/test-ppc-discrete.R index a5ddb622..b91f787e 100644 --- a/tests/testthat/test-ppc-discrete.R +++ b/tests/testthat/test-ppc-discrete.R @@ -87,6 +87,7 @@ test_that("ppc_rootogram returns a ggplot object", { expect_gg(ppc_rootogram(y2, yrep2)) expect_gg(ppc_rootogram(y2, yrep3, style = "hanging", prob = 0.5)) expect_gg(ppc_rootogram(y2, yrep3, style = "suspended")) + expect_gg(ppc_rootogram(y2, yrep3, style = "discrete")) }) test_that("ppc_rootogram errors if y/yrep not counts", { @@ -176,5 +177,30 @@ test_that("ppc_rootogram renders correctly", { vdiffr::expect_doppelganger( title = "ppc_rootogram (style='hanging', prob, size)", fig = p_custom_hanging) + + p_discrete <- ppc_rootogram( + y = vdiff_y2, + yrep = vdiff_yrep2, + prob = 0.5, + size = 1, + style = "discrete" + ) + + vdiffr::expect_doppelganger( + title = "ppc_rootogram (style='discrete', prob, size)", + fig = p_discrete) + + p_discrete_nonbound <- ppc_rootogram( + y = vdiff_y2, + yrep = vdiff_yrep2, + prob = 0.8, + size = 1, + style = "discrete", + bound_distinct = FALSE + ) + + vdiffr::expect_doppelganger( + title = "ppc_rootogram (style='discrete', prob, size, bound_distinct=FALSE)", + fig = p_discrete_nonbound) })