diff --git a/NEWS.md b/NEWS.md
index 941ef230..7ad30dab 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,6 +4,7 @@
* `ppc_error_scatter_avg_vs_x(x = some_expression)` labels the *x* axis with `some_expression`.
* Add `ppc_dots()` and `ppd_dots()` by @behramulukir (#357)
* Add `x` argument to `ppc_error_binned` by @behramulukir (#359)
+* Add `discrete` style to `ppc_rootogram` by @behramulukir (#362)
# bayesplot 1.13.0
diff --git a/R/ppc-discrete.R b/R/ppc-discrete.R
index 199c394b..885bc8ff 100644
--- a/R/ppc-discrete.R
+++ b/R/ppc-discrete.R
@@ -21,9 +21,12 @@
#' @param size,fatten,linewidth For bar plots, `size`, `fatten`, and `linewidth`
#' are passed to [ggplot2::geom_pointrange()] to control the appearance of the
#' `yrep` points and intervals. For rootograms `size` is passed to
-#' [ggplot2::geom_line()].
+#' [ggplot2::geom_line()] and [ggplot2::geom_pointrange()].
#' @param freq For bar plots only, if `TRUE` (the default) the y-axis will
#' display counts. Setting `freq=FALSE` will put proportions on the y-axis.
+#' @param bound_distinct For `ppc_rootogram(style = "discrete)`,
+#' if `TRUE` then the observed counts will be plotted with different shapes
+#' depending on whether they are within the bounds of the `y` quantiles.
#'
#' @template return-ggplot-or-data
#'
@@ -44,18 +47,26 @@
#' }
#' \item{`ppc_rootogram()`}{
#' Rootograms allow for diagnosing problems in count data models such as
-#' overdispersion or excess zeros. They consist of a histogram of `y` with the
-#' expected counts based on `yrep` overlaid as a line along with uncertainty
-#' intervals. The y-axis represents the square roots of the counts to
+#' overdispersion or excess zeros. In `standing`, `hanging`, and `suspended`
+#' styles, they consist of a histogram of `y` with the expected counts based on
+#' `yrep` overlaid as a line along with uncertainty intervals.
+#'
+#' Meanwhile, in `discrete` style, median counts based on `yrep` are laid
+#' as a point range with uncertainty intervals along with dots
+#' representing the `y`.
+#'
+#' The y-axis represents the square roots of the counts to
#' approximately adjust for scale differences and thus ease comparison between
-#' observed and expected counts. Using the `style` argument, the histogram
-#' style can be adjusted to focus on different aspects of the data:
+#' observed and expected counts. Using the `style` argument, the rootogram
+#' can be adjusted to focus on different aspects of the data:
#' * _Standing_: basic histogram of observed counts with curve
#' showing expected counts.
-#' * _Hanging_: observed counts counts hanging from the curve
+#' * _Hanging_: observed counts hanging from the curve
#' representing expected counts.
#' * _Suspended_: histogram of the differences between expected and
#' observed counts.
+#' * _Discrete_: a dot-and-whisker plot of the median counts and
+#' dots representing observed counts.
#'
#' **All of the rootograms are plotted on the square root scale**. See Kleiber
#' and Zeileis (2016) for advice on interpreting rootograms and selecting
@@ -198,22 +209,22 @@ ppc_bars_grouped <-
fatten = 2.5,
linewidth = 1,
freq = TRUE) {
- check_ignored_arguments(...)
- call <- match.call(expand.dots = FALSE)
- g <- eval(ungroup_call("ppc_bars", call), parent.frame())
- if (fixed_y(facet_args)) {
- g <- g + expand_limits(y = 1.05 * max(g$data[["h"]], na.rm = TRUE))
+ check_ignored_arguments(...)
+ call <- match.call(expand.dots = FALSE)
+ g <- eval(ungroup_call("ppc_bars", call), parent.frame())
+ if (fixed_y(facet_args)) {
+ g <- g + expand_limits(y = 1.05 * max(g$data[["h"]], na.rm = TRUE))
+ }
+ g +
+ bars_group_facets(facet_args) +
+ force_axes_in_facets()
}
- g +
- bars_group_facets(facet_args) +
- force_axes_in_facets()
-}
#' @rdname PPC-discrete
#' @export
#' @param style For `ppc_rootogram`, a string specifying the rootogram
-#' style. The options are `"standing"`, `"hanging"`, and
+#' style. The options are `"discrete", "standing"`, `"hanging"`, and
#' `"suspended"`. See the **Plot Descriptions** section, below, for
#' details on the different styles.
#'
@@ -234,13 +245,15 @@ ppc_bars_grouped <-
#'
#' ppc_rootogram(y, yrep, style = "hanging", prob = 0.8)
#' ppc_rootogram(y, yrep, style = "suspended")
+#' ppc_rootogram(y, yrep, style = "discrete")
#'
ppc_rootogram <- function(y,
yrep,
- style = c("standing", "hanging", "suspended"),
+ style = c("standing", "hanging", "suspended", "discrete"),
...,
prob = 0.9,
- size = 1) {
+ size = 1,
+ bound_distinct = TRUE) {
check_ignored_arguments(...)
style <- match.arg(style)
y <- validate_y(y)
@@ -266,6 +279,57 @@ ppc_rootogram <- function(y,
}
tyrep <- do.call(rbind, tyrep)
tyrep[is.na(tyrep)] <- 0
+
+ #Discrete style
+ pred_median <- apply(tyrep, 2, median)
+ pred_quantile <- t(apply(tyrep, 2, quantile, probs = probs))
+ colnames(pred_quantile) <- c("lower", "upper")
+
+ # prepare a table for y
+ ty <- table(y)
+ y_count <- as.numeric(ty[match(xpos, rownames(ty))])
+ y_count[is.na(y_count)] <- 0
+
+ if (style == "discrete") {
+ if (bound_distinct) {
+ # If the observed count is within the bounds of the predicted quantiles,
+ # use a different shape for the point
+ obs_shape <- obs_shape <- ifelse(y_count >= pred_quantile[, "lower"] & y_count <= pred_quantile[, "upper"], "In", "Out")
+ } else {
+ obs_shape <- rep("y", length(y_count)) # all points are the same shape for observed
+ }
+
+ data <- data.frame(
+ xpos = xpos,
+ obs = y_count,
+ pred_median = pred_median,
+ lower = pred_quantile[, "lower"],
+ upper = pred_quantile[, "upper"],
+ obs_shape = obs_shape
+ )
+ # Create the graph
+ graph <- ggplot(data, aes(x = xpos)) +
+ geom_pointrange(aes(y = pred_median, ymin = lower, ymax = upper, color = "y_rep"), fill = get_color("lh"), linewidth = size, size = size, fatten = 2, alpha = 1) +
+ geom_point(aes(y = obs, shape = obs_shape), size = size * 1.5, color = get_color("d"), fill = get_color("d")) +
+ scale_y_sqrt() +
+ scale_fill_manual("", values = get_color("d"), guide="none") +
+ scale_color_manual("", values = get_color("lh"), labels = yrep_label()) +
+ labs(x = expression(italic(y)), y = "Count") +
+ bayesplot_theme_get() +
+ reduce_legend_spacing(0.25) +
+ scale_shape_manual(values = c("In" = 22, "Out" = 23, "y" = 22), guide = "legend", labels = c("y" = expression(italic(y))))
+ if (bound_distinct) {
+ graph <- graph +
+ guides(shape = guide_legend(expression(italic(y)~within~bounds)))
+ } else {
+ graph <- graph +
+ guides(shape = guide_legend(" "))
+ }
+ return(graph)
+ }
+
+
+ #Standing, hanging, and suspended styles
tyexp <- sqrt(colMeans(tyrep))
tyquantile <- sqrt(t(apply(tyrep, 2, quantile, probs = probs)))
colnames(tyquantile) <- c("tylower", "tyupper")
@@ -395,7 +459,7 @@ ppc_bars_data <-
data <-
reshape2::melt(tmp_data, id.vars = "group") %>%
count(.data$group, .data$value, .data$variable) %>%
- tidyr::complete(.data$group, .data$value, .data$variable, fill = list(n = 0)) %>%
+ tidyr::complete(.data$group, .data$value, .data$variable, fill = list(n = 0)) %>%
group_by(.data$variable, .data$group) %>%
mutate(proportion = .data$n / sum(.data$n)) %>%
ungroup() %>%
diff --git a/man/PPC-discrete.Rd b/man/PPC-discrete.Rd
index 434ba7bd..11d26ede 100644
--- a/man/PPC-discrete.Rd
+++ b/man/PPC-discrete.Rd
@@ -37,10 +37,11 @@ ppc_bars_grouped(
ppc_rootogram(
y,
yrep,
- style = c("standing", "hanging", "suspended"),
+ style = c("standing", "hanging", "suspended", "discrete"),
...,
prob = 0.9,
- size = 1
+ size = 1,
+ bound_distinct = TRUE
)
ppc_bars_data(y, yrep, group = NULL, prob = 0.9, freq = TRUE)
@@ -69,7 +70,7 @@ the bar width.}
\item{size, fatten, linewidth}{For bar plots, \code{size}, \code{fatten}, and \code{linewidth}
are passed to \code{\link[ggplot2:geom_linerange]{ggplot2::geom_pointrange()}} to control the appearance of the
\code{yrep} points and intervals. For rootograms \code{size} is passed to
-\code{\link[ggplot2:geom_path]{ggplot2::geom_line()}}.}
+\code{\link[ggplot2:geom_path]{ggplot2::geom_line()}} and \code{\link[ggplot2:geom_linerange]{ggplot2::geom_pointrange()}}.}
\item{freq}{For bar plots only, if \code{TRUE} (the default) the y-axis will
display counts. Setting \code{freq=FALSE} will put proportions on the y-axis.}
@@ -83,9 +84,13 @@ to the corresponding observation.}
passed to \code{\link[ggplot2:facet_wrap]{ggplot2::facet_wrap()}} to control faceting.}
\item{style}{For \code{ppc_rootogram}, a string specifying the rootogram
-style. The options are \code{"standing"}, \code{"hanging"}, and
+style. The options are \verb{"discrete", "standing"}, \code{"hanging"}, and
\code{"suspended"}. See the \strong{Plot Descriptions} section, below, for
details on the different styles.}
+
+\item{bound_distinct}{For \verb{ppc_rootogram(style = "discrete)},
+if \code{TRUE} then the observed counts will be plotted with different shapes
+depending on whether they are within the bounds of the \code{y} quantiles.}
}
\value{
The plotting functions return a ggplot object that can be further
@@ -120,19 +125,27 @@ level of a grouping variable.
}
\item{\code{ppc_rootogram()}}{
Rootograms allow for diagnosing problems in count data models such as
-overdispersion or excess zeros. They consist of a histogram of \code{y} with the
-expected counts based on \code{yrep} overlaid as a line along with uncertainty
-intervals. The y-axis represents the square roots of the counts to
+overdispersion or excess zeros. In \code{standing}, \code{hanging}, and \code{suspended}
+styles, they consist of a histogram of \code{y} with the expected counts based on
+\code{yrep} overlaid as a line along with uncertainty intervals.
+
+Meanwhile, in \code{discrete} style, median counts based on \code{yrep} are laid
+as a point range with uncertainty intervals along with dots
+representing the \code{y}.
+
+The y-axis represents the square roots of the counts to
approximately adjust for scale differences and thus ease comparison between
-observed and expected counts. Using the \code{style} argument, the histogram
-style can be adjusted to focus on different aspects of the data:
+observed and expected counts. Using the \code{style} argument, the rootogram
+can be adjusted to focus on different aspects of the data:
\itemize{
\item \emph{Standing}: basic histogram of observed counts with curve
showing expected counts.
-\item \emph{Hanging}: observed counts counts hanging from the curve
+\item \emph{Hanging}: observed counts hanging from the curve
representing expected counts.
\item \emph{Suspended}: histogram of the differences between expected and
observed counts.
+\item \emph{Discrete}: a dot-and-whisker plot of the median counts and
+dots representing observed counts.
}
\strong{All of the rootograms are plotted on the square root scale}. See Kleiber
@@ -206,6 +219,7 @@ ppc_rootogram(y, yrep, prob = 0)
ppc_rootogram(y, yrep, style = "hanging", prob = 0.8)
ppc_rootogram(y, yrep, style = "suspended")
+ppc_rootogram(y, yrep, style = "discrete")
}
\references{
diff --git a/tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size-bound-distinct-false.svg b/tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size-bound-distinct-false.svg
new file mode 100644
index 00000000..589609dc
--- /dev/null
+++ b/tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size-bound-distinct-false.svg
@@ -0,0 +1,83 @@
+
+
diff --git a/tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size.svg b/tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size.svg
new file mode 100644
index 00000000..718cfd76
--- /dev/null
+++ b/tests/testthat/_snaps/ppc-discrete/ppc-rootogram-style-discrete-prob-size.svg
@@ -0,0 +1,97 @@
+
+
diff --git a/tests/testthat/test-ppc-discrete.R b/tests/testthat/test-ppc-discrete.R
index a5ddb622..b91f787e 100644
--- a/tests/testthat/test-ppc-discrete.R
+++ b/tests/testthat/test-ppc-discrete.R
@@ -87,6 +87,7 @@ test_that("ppc_rootogram returns a ggplot object", {
expect_gg(ppc_rootogram(y2, yrep2))
expect_gg(ppc_rootogram(y2, yrep3, style = "hanging", prob = 0.5))
expect_gg(ppc_rootogram(y2, yrep3, style = "suspended"))
+ expect_gg(ppc_rootogram(y2, yrep3, style = "discrete"))
})
test_that("ppc_rootogram errors if y/yrep not counts", {
@@ -176,5 +177,30 @@ test_that("ppc_rootogram renders correctly", {
vdiffr::expect_doppelganger(
title = "ppc_rootogram (style='hanging', prob, size)",
fig = p_custom_hanging)
+
+ p_discrete <- ppc_rootogram(
+ y = vdiff_y2,
+ yrep = vdiff_yrep2,
+ prob = 0.5,
+ size = 1,
+ style = "discrete"
+ )
+
+ vdiffr::expect_doppelganger(
+ title = "ppc_rootogram (style='discrete', prob, size)",
+ fig = p_discrete)
+
+ p_discrete_nonbound <- ppc_rootogram(
+ y = vdiff_y2,
+ yrep = vdiff_yrep2,
+ prob = 0.8,
+ size = 1,
+ style = "discrete",
+ bound_distinct = FALSE
+ )
+
+ vdiffr::expect_doppelganger(
+ title = "ppc_rootogram (style='discrete', prob, size, bound_distinct=FALSE)",
+ fig = p_discrete_nonbound)
})