From 9c8a15d4e2cfc5bb4974c61f5a0d29c6c6c8742c Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 3 Dec 2024 13:40:08 +0100 Subject: [PATCH 01/19] capture bin argument fixup in function --- R/bin.R | 39 +++++++++++++++++++++++++++++++ R/stat-bin.R | 24 +------------------ tests/testthat/_snaps/stat-bin.md | 2 +- 3 files changed, 41 insertions(+), 24 deletions(-) diff --git a/R/bin.R b/R/bin.R index 055721f0e4..a60ce3b3e4 100644 --- a/R/bin.R +++ b/R/bin.R @@ -186,3 +186,42 @@ bin_out <- function(count = integer(0), x = numeric(0), width = numeric(0), .size = length(count) ) } + +fix_bin_params = function(params, fun, version) { + + if (!is.null(params$drop)) { + args <- paste0(fun, c("(drop)", "(pad)")) + deprecate_warn0(version, args[1], args[2]) + params$drop <- NULL + } + + if (!is.null(params$origin)) { + args <- paste0(fun, c("(origin)", "(boundary)")) + deprecate_warn0(version, args[1], args[2]) + params$boudnary <- params$origin + params$origin <- NULL + } + + if (!is.null(params$right)) { + args <- paste0(fun, c("(right)", "(closed)")) + deprecate_warn0(version, args[1], args[2]) + params$closed <- if (isTRUE(params$right)) "right" else "left" + params$right <- NULL + } + + if (!is.null(params$boundary) && !is.null(params$center)) { + cli::cli_abort( + "Only one of {.arg boundary} and {.arg center} may be specified \\ + in {.fn {fun}}." + ) + } + + if (is.null(params$breaks %||% params$binwidth %||% params$bins)) { + cli::cli_inform( + "{.fn {fun}} using {.code bins = 30}. Pick better value {.arg binwidth}." + ) + params$bins <- 30 + } + + params +} diff --git a/R/stat-bin.R b/R/stat-bin.R index 9c571ae519..2995a21706 100644 --- a/R/stat-bin.R +++ b/R/stat-bin.R @@ -118,29 +118,7 @@ StatBin <- ggproto("StatBin", Stat, )) } - if (!is.null(params$drop)) { - deprecate_warn0("2.1.0", "stat_bin(drop)", "stat_bin(pad)") - params$drop <- NULL - } - if (!is.null(params$origin)) { - deprecate_warn0("2.1.0", "stat_bin(origin)", "stat_bin(boundary)") - params$boundary <- params$origin - params$origin <- NULL - } - if (!is.null(params$right)) { - deprecate_warn0("2.1.0", "stat_bin(right)", "stat_bin(closed)") - params$closed <- if (params$right) "right" else "left" - params$right <- NULL - } - if (!is.null(params$boundary) && !is.null(params$center)) { - cli::cli_abort("Only one of {.arg boundary} and {.arg center} may be specified in {.fn {snake_class(self)}}.") - } - - if (is.null(params$breaks) && is.null(params$binwidth) && is.null(params$bins)) { - cli::cli_inform("{.fn {snake_class(self)}} using {.code bins = 30}. Pick better value with {.arg binwidth}.") - params$bins <- 30 - } - + params <- fix_bin_params(params, fun = snake_class(self), version = "2.1.0") params }, diff --git a/tests/testthat/_snaps/stat-bin.md b/tests/testthat/_snaps/stat-bin.md index 2b5ee05525..1d3f49ad1a 100644 --- a/tests/testthat/_snaps/stat-bin.md +++ b/tests/testthat/_snaps/stat-bin.md @@ -61,7 +61,7 @@ Error in `stat_bin()`: ! Problem while computing stat. i Error occurred in the 1st layer. - Caused by error in `setup_params()`: + Caused by error in `fix_bin_params()`: ! Only one of `boundary` and `center` may be specified in `stat_bin()`. # bin errors at high bin counts From 9702e92c5002b9668a2ef61cbada2ecaf628df28 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 3 Dec 2024 13:41:26 +0100 Subject: [PATCH 02/19] capture binning logic in function --- R/bin.R | 35 +++++++++++++++++++++++++++++++++++ R/stat-bin.R | 23 +++++------------------ 2 files changed, 40 insertions(+), 18 deletions(-) diff --git a/R/bin.R b/R/bin.R index a60ce3b3e4..62b1a6cc7b 100644 --- a/R/bin.R +++ b/R/bin.R @@ -128,6 +128,41 @@ bin_breaks_bins <- function(x_range, bins = 30, center = NULL, # Compute bins ------------------------------------------------------------ +compute_bins <- function(x, scale, breaks = NULL, binwidth = NULL, bins = NULL, + center = NULL, boundary = NULL, + closed = c("right", "left")) { + + if (!is.null(breaks)) { + if (is.function(breaks)) { + breaks <- breaks(x) + } + if (!scale$is_discrete()) { + breaks <- scale$transform(breaks) + } + bins <- bin_breaks(breaks, closed) + return(bins) + } + + if (!is.null(binwidth)) { + if (is.function(binwidth)) { + binwidth <- binwidth(x) + } + bins <- bin_breaks_width( + scale$dimension(), binwidth, + center = center, boundary = boundary, closed = closed + ) + return(bins) + } + + if (is.function(bins)) { + bins <- bins(x) + } + bin_breaks_bins( + scale$dimension(), bins, + center = center, boundary = boundary, closed = closed + ) +} + bin_vector <- function(x, bins, weight = NULL, pad = FALSE) { check_object(bins, is_bins, "a {.cls ggplot2_bins} object") diff --git a/R/stat-bin.R b/R/stat-bin.R index 2995a21706..b5bde20c6e 100644 --- a/R/stat-bin.R +++ b/R/stat-bin.R @@ -132,24 +132,11 @@ StatBin <- ggproto("StatBin", Stat, # be listed so parameters are computed correctly origin = NULL, right = NULL, drop = NULL) { x <- flipped_names(flipped_aes)$x - if (!is.null(breaks)) { - if (is.function(breaks)) { - breaks <- breaks(data[[x]]) - } - if (!scales[[x]]$is_discrete()) { - breaks <- scales[[x]]$transform(breaks) - } - bins <- bin_breaks(breaks, closed) - } else if (!is.null(binwidth)) { - if (is.function(binwidth)) { - binwidth <- binwidth(data[[x]]) - } - bins <- bin_breaks_width(scales[[x]]$dimension(), binwidth, - center = center, boundary = boundary, closed = closed) - } else { - bins <- bin_breaks_bins(scales[[x]]$dimension(), bins, center = center, - boundary = boundary, closed = closed) - } + bins <- compute_bins( + data[[x]], scales[[x]], + breaks = breaks, binwidth = binwidth, bins = bins, + center = center, boundary = boundary, closed = closed + ) bins <- bin_vector(data[[x]], bins, weight = data$weight, pad = pad) keep <- switch( From 5477d8ade9f19462abc67de7b7ff4306611b3c95 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 3 Dec 2024 14:52:04 +0100 Subject: [PATCH 03/19] helper for cutting bins --- R/bin.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/bin.R b/R/bin.R index 62b1a6cc7b..6e0cef1e48 100644 --- a/R/bin.R +++ b/R/bin.R @@ -176,8 +176,7 @@ bin_vector <- function(x, bins, weight = NULL, pad = FALSE) { weight[is.na(weight)] <- 0 } - bin_idx <- cut(x, bins$fuzzy, right = bins$right_closed, - include.lowest = TRUE) + bin_idx <- bin_cut(x, bins) bin_count <- as.numeric(tapply(weight, bin_idx, sum, na.rm = TRUE)) bin_count[is.na(bin_count)] <- 0 @@ -205,6 +204,10 @@ bin_vector <- function(x, bins, weight = NULL, pad = FALSE) { bin_out(bin_count, bin_x, bin_widths) } +bin_cut <- function(x, bins) { + cut(x, bins$fuzzy, right = bins$right_closed, include.lowest = TRUE) +} + bin_out <- function(count = integer(0), x = numeric(0), width = numeric(0), xmin = x - width / 2, xmax = x + width / 2) { density <- count / width / sum(abs(count)) From 4873a0c06fe1bd5bed627e17bb946257f69fc92b Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 3 Dec 2024 14:53:58 +0100 Subject: [PATCH 04/19] consistency of `stat_bin2d()` --- R/stat-bin2d.R | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/R/stat-bin2d.R b/R/stat-bin2d.R index bdb69db23a..88ef209311 100644 --- a/R/stat-bin2d.R +++ b/R/stat-bin2d.R @@ -52,30 +52,45 @@ StatBin2d <- ggproto("StatBin2d", Stat, default_aes = aes(weight = 1, fill = after_stat(count)), required_aes = c("x", "y"), - compute_group = function(data, scales, binwidth = NULL, bins = 30, - breaks = NULL, origin = NULL, drop = TRUE) { + setup_params = function(self, data, params) { + params <- fix_bin_params(params, fun = snake_class(self), version = "3.5.2") - origin <- dual_param(origin, list(NULL, NULL)) - binwidth <- dual_param(binwidth, list(NULL, NULL)) - breaks <- dual_param(breaks, list(NULL, NULL)) - bins <- dual_param(bins, list(x = 30, y = 30)) + vars <- c("origin", "binwidth", "breaks", "center", "boundary") + params[vars] <- lapply(params[vars], dual_param) + params$closed <- dual_param(params$closed, list(x = "right", y = "right")) + + params + }, + + compute_group = function(data, scales, binwidth = NULL, + bins = 30, breaks = NULL, + center = NULL, boundary = NULL, closed = NULL, + origin = NULL, drop = TRUE) { - xbreaks <- bin2d_breaks(scales$x, breaks$x, origin$x, binwidth$x, bins$x) - ybreaks <- bin2d_breaks(scales$y, breaks$y, origin$y, binwidth$y, bins$y) + bins <- dual_param(bins, list(x = 30, y = 30)) - xbin <- cut(data$x, xbreaks, include.lowest = TRUE, labels = FALSE) - ybin <- cut(data$y, ybreaks, include.lowest = TRUE, labels = FALSE) + xbin <- compute_bins( + data$x, scales$x, breaks$x, binwidth$x, bins$x, + center$x, boundary$x, closed$x + ) + ybin <- compute_bins( + data$y, scales$y, breaks$y, binwidth$y, bins$y, + center$y, boundary$y, closed$y + ) - if (is.null(data$weight)) - data$weight <- 1 + data$weight <- data$weight %||% 1 - out <- tapply_df(data$weight, list(xbin = xbin, ybin = ybin), sum, drop = drop) + cut_id <- list( + xbin = as.integer(bin_cut(data$x, xbin)), + ybin = as.integer(bin_cut(data$y, ybin)) + ) + out <- tapply_df(data$weight, cut_id, sum, drop = drop) - xdim <- bin_loc(xbreaks, out$xbin) + xdim <- bin_loc(xbin$breaks, out$xbin) out$x <- xdim$mid out$width <- xdim$length - ydim <- bin_loc(ybreaks, out$ybin) + ydim <- bin_loc(ybin$breaks, out$ybin) out$y <- ydim$mid out$height <- ydim$length From b5f303911780bf40e8d210100e0ab5b9ce68e251 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 3 Dec 2024 15:18:28 +0100 Subject: [PATCH 05/19] necromancy: resurrect `stat_bin(drop)` by sacrificing `stat_bin(keep.zeroes)` --- NEWS.md | 2 +- R/bin.R | 6 ------ R/stat-bin.R | 25 +++++++++++++++---------- R/stat-bin2d.R | 6 +++++- tests/testthat/test-stat-bin.R | 8 ++++---- 5 files changed, 25 insertions(+), 22 deletions(-) diff --git a/NEWS.md b/NEWS.md index 8214635e8c..e1712ee00a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -225,7 +225,7 @@ * The ellipsis argument is now checked in `fortify()`, `get_alt_text()`, `labs()` and several guides (@teunbrand, #3196). * `stat_summary_bin()` no longer ignores `width` parameter (@teunbrand, #4647). -* Added `keep.zeroes` argument to `stat_bin()` (@teunbrand, #3449) +* Reintroduced `drop` argument to `stat_bin()` (@teunbrand, #3449) * (internal) removed barriers for using 2D structures as aesthetics (@teunbrand, #4189). * `coord_sf()` no longer errors when dealing with empty graticules (@teunbrand, #6052) diff --git a/R/bin.R b/R/bin.R index 6e0cef1e48..3a0feba9bb 100644 --- a/R/bin.R +++ b/R/bin.R @@ -227,12 +227,6 @@ bin_out <- function(count = integer(0), x = numeric(0), width = numeric(0), fix_bin_params = function(params, fun, version) { - if (!is.null(params$drop)) { - args <- paste0(fun, c("(drop)", "(pad)")) - deprecate_warn0(version, args[1], args[2]) - params$drop <- NULL - } - if (!is.null(params$origin)) { args <- paste0(fun, c("(origin)", "(boundary)")) deprecate_warn0(version, args[1], args[2]) diff --git a/R/stat-bin.R b/R/stat-bin.R index b5bde20c6e..711b0c5ad7 100644 --- a/R/stat-bin.R +++ b/R/stat-bin.R @@ -26,10 +26,11 @@ #' or left edges of bins are included in the bin. #' @param pad If `TRUE`, adds empty bins at either end of x. This ensures #' frequency polygons touch 0. Defaults to `FALSE`. -#' @param keep.zeroes Treatment of zero count bins. If `"all"` (default), such +#' @param drop Treatment of zero count bins. If `"all"` (default), such #' bins are kept as-is. If `"none"`, all zero count bins are filtered out. #' If `"inner"` only zero count bins at the flanks are filtered out, but not -#' in the middle. +#' in the middle. `TRUE` is shorthand for `"all"` and `FALSE` is shorthand +#' for `"none"`. #' @eval rd_computed_vars( #' count = "number of points in bin.", #' density = "density of points in bin, scaled to integrate to 1.", @@ -59,7 +60,7 @@ stat_bin <- function(mapping = NULL, data = NULL, closed = c("right", "left"), pad = FALSE, na.rm = FALSE, - keep.zeroes = "all", + drop = "all", orientation = NA, show.legend = NA, inherit.aes = TRUE) { @@ -82,7 +83,7 @@ stat_bin <- function(mapping = NULL, data = NULL, pad = pad, na.rm = na.rm, orientation = orientation, - keep.zeroes = keep.zeroes, + drop = drop, ... ) ) @@ -95,9 +96,13 @@ stat_bin <- function(mapping = NULL, data = NULL, StatBin <- ggproto("StatBin", Stat, setup_params = function(self, data, params) { params$flipped_aes <- has_flipped_aes(data, params, main_is_orthogonal = FALSE) - params$keep.zeroes <- arg_match0( - params$keep.zeroes %||% "all", - c("all", "none", "inner"), arg_nm = "keep.zeroes" + + if (is.logical(params$drop)) { + params$drop <- if (isTRUE(params$drop)) "all" else "none" + } + params$drop <- arg_match0( + params$drop %||% "all", + c("all", "none", "inner"), arg_nm = "drop" ) has_x <- !(is.null(data$x) && is.null(params$x)) @@ -127,10 +132,10 @@ StatBin <- ggproto("StatBin", Stat, compute_group = function(data, scales, binwidth = NULL, bins = NULL, center = NULL, boundary = NULL, closed = c("right", "left"), pad = FALSE, - breaks = NULL, flipped_aes = FALSE, keep.zeroes = "all", + breaks = NULL, flipped_aes = FALSE, drop = "all", # The following arguments are not used, but must # be listed so parameters are computed correctly - origin = NULL, right = NULL, drop = NULL) { + origin = NULL, right = NULL) { x <- flipped_names(flipped_aes)$x bins <- compute_bins( data[[x]], scales[[x]], @@ -140,7 +145,7 @@ StatBin <- ggproto("StatBin", Stat, bins <- bin_vector(data[[x]], bins, weight = data$weight, pad = pad) keep <- switch( - keep.zeroes, + drop, none = bins$count != 0, inner = inner_runs(bins$count != 0), TRUE diff --git a/R/stat-bin2d.R b/R/stat-bin2d.R index 88ef209311..4c730c94da 100644 --- a/R/stat-bin2d.R +++ b/R/stat-bin2d.R @@ -53,8 +53,12 @@ StatBin2d <- ggproto("StatBin2d", Stat, required_aes = c("x", "y"), setup_params = function(self, data, params) { - params <- fix_bin_params(params, fun = snake_class(self), version = "3.5.2") + if (is.character(params$drop)) { + params$drop <- !identical(params$drop, "none") + } + + params <- fix_bin_params(params, fun = snake_class(self), version = "3.5.2") vars <- c("origin", "binwidth", "breaks", "center", "boundary") params[vars] <- lapply(params[vars], dual_param) params$closed <- dual_param(params$closed, list(x = "right", y = "right")) diff --git a/tests/testthat/test-stat-bin.R b/tests/testthat/test-stat-bin.R index a114748daf..4dda347182 100644 --- a/tests/testthat/test-stat-bin.R +++ b/tests/testthat/test-stat-bin.R @@ -118,17 +118,17 @@ test_that("stat_bin() provides width (#3522)", { expect_equal(out$xmax - out$xmin, rep(binwidth, 10)) }) -test_that("stat_bin(keep.zeroes) options work as intended", { +test_that("stat_bin(drop) options work as intended", { p <- ggplot(data.frame(x = c(1, 2, 2, 3, 5, 6, 6, 7)), aes(x)) + scale_x_continuous(limits = c(-1, 9)) - ld <- layer_data(p + geom_histogram(binwidth = 1, keep.zeroes = "all")) + ld <- layer_data(p + geom_histogram(binwidth = 1, drop = "all")) expect_equal(ld$x, -1:9) - ld <- layer_data(p + geom_histogram(binwidth = 1, keep.zeroes = "inner")) + ld <- layer_data(p + geom_histogram(binwidth = 1, drop = "inner")) expect_equal(ld$x, c(1:7)) - ld <- layer_data(p + geom_histogram(binwidth = 1, keep.zeroes = "none")) + ld <- layer_data(p + geom_histogram(binwidth = 1, drop = "none")) expect_equal(ld$x, c(1:3, 5:7)) }) From 0b620cfdf2e49ea264d4a8a77ae69fc8dd798de1 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 3 Dec 2024 15:23:15 +0100 Subject: [PATCH 06/19] fix `boundary = 0` --- R/stat-bin2d.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/stat-bin2d.R b/R/stat-bin2d.R index 4c730c94da..fb745175fe 100644 --- a/R/stat-bin2d.R +++ b/R/stat-bin2d.R @@ -16,6 +16,7 @@ stat_bin_2d <- function(mapping = NULL, data = NULL, ..., bins = 30, binwidth = NULL, + boundary = 0, drop = TRUE, na.rm = FALSE, show.legend = NA, @@ -32,6 +33,7 @@ stat_bin_2d <- function(mapping = NULL, data = NULL, bins = bins, binwidth = binwidth, drop = drop, + boundary = boundary, na.rm = na.rm, ... ) @@ -68,7 +70,7 @@ StatBin2d <- ggproto("StatBin2d", Stat, compute_group = function(data, scales, binwidth = NULL, bins = 30, breaks = NULL, - center = NULL, boundary = NULL, closed = NULL, + center = NULL, boundary = 0, closed = NULL, origin = NULL, drop = TRUE) { bins <- dual_param(bins, list(x = 30, y = 30)) From 7aabdb7d348ee37ab2d92aa67124a8798001a4f7 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 3 Dec 2024 15:39:51 +0100 Subject: [PATCH 07/19] same treatment for `stat_summary2d()` --- R/stat-summary-2d.R | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/R/stat-summary-2d.R b/R/stat-summary-2d.R index 60e5e49813..2bf05e1516 100644 --- a/R/stat-summary-2d.R +++ b/R/stat-summary-2d.R @@ -92,31 +92,50 @@ StatSummary2d <- ggproto("StatSummary2d", Stat, required_aes = c("x", "y", "z"), dropped_aes = "z", # z gets dropped during statistical transformation + setup_params = function(self, data, params) { + + if (is.character(params$drop)) { + params$drop <- !identical(params$drop, "none") + } + + params <- fix_bin_params(params, fun = snake_class(self), version = "3.5.2") + vars <- c("origin", "binwidth", "breaks", "center", "boundary") + params[vars] <- lapply(params[vars], dual_param) + params$closed <- dual_param(params$closed, list(x = "right", y = "right")) + + params + }, + compute_group = function(data, scales, binwidth = NULL, bins = 30, breaks = NULL, origin = NULL, drop = TRUE, - fun = "mean", fun.args = list()) { - origin <- dual_param(origin, list(NULL, NULL)) - binwidth <- dual_param(binwidth, list(NULL, NULL)) - breaks <- dual_param(breaks, list(NULL, NULL)) + fun = "mean", fun.args = list(), + boundary = 0, closed = NULL, center = NULL) { bins <- dual_param(bins, list(x = 30, y = 30)) - xbreaks <- bin2d_breaks(scales$x, breaks$x, origin$x, binwidth$x, bins$x) - ybreaks <- bin2d_breaks(scales$y, breaks$y, origin$y, binwidth$y, bins$y) - - xbin <- cut(data$x, xbreaks, include.lowest = TRUE, labels = FALSE) - ybin <- cut(data$y, ybreaks, include.lowest = TRUE, labels = FALSE) + xbin <- compute_bins( + data$x, scales$x, breaks$x, binwidth$x, bins$x, + center$x, boundary$x, closed$x + ) + ybin <- compute_bins( + data$y, scales$y, breaks$y, binwidth$y, bins$y, + center$y, boundary$y, closed$y + ) + cut_id <- list( + xbin = as.integer(bin_cut(data$x, xbin)), + ybin = as.integer(bin_cut(data$y, ybin)) + ) fun <- as_function(fun) f <- function(x) { inject(fun(x, !!!fun.args)) } - out <- tapply_df(data$z, list(xbin = xbin, ybin = ybin), f, drop = drop) + out <- tapply_df(data$z, cut_id, f, drop = drop) - xdim <- bin_loc(xbreaks, out$xbin) + xdim <- bin_loc(xbin$breaks, out$xbin) out$x <- xdim$mid out$width <- xdim$length - ydim <- bin_loc(ybreaks, out$ybin) + ydim <- bin_loc(ybin$breaks, out$ybin) out$y <- ydim$mid out$height <- ydim$length From d1c963ace2250e52810172a6f0cabf493b4763e5 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 3 Dec 2024 15:54:20 +0100 Subject: [PATCH 08/19] Implement `StatBin2d` as subclass of `StatSummary2d` --- R/stat-bin2d.R | 59 +++++++++++--------------------------------------- 1 file changed, 13 insertions(+), 46 deletions(-) diff --git a/R/stat-bin2d.R b/R/stat-bin2d.R index fb745175fe..7009e3f6b5 100644 --- a/R/stat-bin2d.R +++ b/R/stat-bin2d.R @@ -47,67 +47,34 @@ stat_bin_2d <- function(mapping = NULL, data = NULL, stat_bin2d <- stat_bin_2d #' @rdname ggplot2-ggproto +#' @include stat-summary-2d.R #' @format NULL #' @usage NULL #' @export -StatBin2d <- ggproto("StatBin2d", Stat, +StatBin2d <- ggproto( + "StatBin2d", StatSummary2d, default_aes = aes(weight = 1, fill = after_stat(count)), required_aes = c("x", "y"), - setup_params = function(self, data, params) { + compute_group = function(self, data, scales, binwidth = NULL, bins = 30, + breaks = NULL, origin = NULL, drop = TRUE, + boundary = 0, closed = NULL, center = NULL) { - if (is.character(params$drop)) { - params$drop <- !identical(params$drop, "none") - } - - params <- fix_bin_params(params, fun = snake_class(self), version = "3.5.2") - vars <- c("origin", "binwidth", "breaks", "center", "boundary") - params[vars] <- lapply(params[vars], dual_param) - params$closed <- dual_param(params$closed, list(x = "right", y = "right")) - - params - }, - - compute_group = function(data, scales, binwidth = NULL, - bins = 30, breaks = NULL, - center = NULL, boundary = 0, closed = NULL, - origin = NULL, drop = TRUE) { + data$z <- data$weight %||% 1 + data$weight <- NULL - bins <- dual_param(bins, list(x = 30, y = 30)) - - xbin <- compute_bins( - data$x, scales$x, breaks$x, binwidth$x, bins$x, - center$x, boundary$x, closed$x - ) - ybin <- compute_bins( - data$y, scales$y, breaks$y, binwidth$y, bins$y, - center$y, boundary$y, closed$y + out <- StatSummary2d$compute_group( + data, scales, binwidth = binwidth, bins = bins, breaks = breaks, + drop = drop, fun = "sum", boundary = boundary, closed = closed, + center = center ) - data$weight <- data$weight %||% 1 - - cut_id <- list( - xbin = as.integer(bin_cut(data$x, xbin)), - ybin = as.integer(bin_cut(data$y, ybin)) - ) - out <- tapply_df(data$weight, cut_id, sum, drop = drop) - - xdim <- bin_loc(xbin$breaks, out$xbin) - out$x <- xdim$mid - out$width <- xdim$length - - ydim <- bin_loc(ybin$breaks, out$ybin) - out$y <- ydim$mid - out$height <- ydim$length - out$count <- out$value out$ncount <- out$count / max(out$count, na.rm = TRUE) out$density <- out$count / sum(out$count, na.rm = TRUE) out$ndensity <- out$density / max(out$density, na.rm = TRUE) out - }, - - dropped_aes = "weight" # No longer available after transformation + } ) dual_param <- function(x, default = list(x = NULL, y = NULL)) { From 4919783b42ce3a2a7ba2e89e7261c5078388906d Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 4 Dec 2024 09:19:37 +0100 Subject: [PATCH 09/19] consistency for `stat_summary_bin()` --- R/stat-summary-bin.R | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/R/stat-summary-bin.R b/R/stat-summary-bin.R index a56bea189e..e3db18b102 100644 --- a/R/stat-summary-bin.R +++ b/R/stat-summary-bin.R @@ -79,16 +79,21 @@ StatSummaryBin <- ggproto("StatSummaryBin", Stat, compute_group = function(data, scales, fun = NULL, bins = 30, binwidth = NULL, breaks = NULL, origin = NULL, right = FALSE, na.rm = FALSE, - flipped_aes = FALSE, width = NULL) { - data <- flip_data(data, flipped_aes) + flipped_aes = FALSE, width = NULL, center = NULL, + boundary = NULL, closed = c("right", "left")) { + x <- flipped_names(flipped_aes)$x - breaks <- bin2d_breaks(scales[[x]], breaks, origin, binwidth, bins, - closed = if (right) "right" else "left") + bins <- compute_bins( + data[[x]], scales[[x]], + breaks = breaks, binwidth = binwidth, bins = bins, + center = center, boundary = boundary, closed = closed + ) + data$bin <- bin_cut(data[[x]], bins) - data$bin <- cut(data$x, breaks, include.lowest = TRUE, labels = FALSE) + data <- flip_data(data, flipped_aes) out <- dapply(data, "bin", fun %||% function(df) mean_se(df$y)) - locs <- bin_loc(breaks, out$bin) + locs <- bin_loc(bins$breaks, out$bin) out$x <- locs$mid out$width <- width %||% if (scales[[x]]$is_discrete()) 0.9 else locs$length out$flipped_aes <- flipped_aes From 3933a923f7a1984c50de1a886af1b38fbefb9a23 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 4 Dec 2024 09:27:14 +0100 Subject: [PATCH 10/19] document --- DESCRIPTION | 2 +- man/geom_bin_2d.Rd | 1 + man/geom_histogram.Rd | 7 ++++--- man/ggplot2-ggproto.Rd | 47 +++++++++++++++++++++--------------------- 4 files changed, 30 insertions(+), 27 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 84b7e6357b..4dc60b28e2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -242,6 +242,7 @@ Collate: 'scales-.R' 'stat-align.R' 'stat-bin.R' + 'stat-summary-2d.R' 'stat-bin2d.R' 'stat-bindot.R' 'stat-binhex.R' @@ -262,7 +263,6 @@ Collate: 'stat-smooth-methods.R' 'stat-smooth.R' 'stat-sum.R' - 'stat-summary-2d.R' 'stat-summary-bin.R' 'stat-summary-hex.R' 'stat-summary.R' diff --git a/man/geom_bin_2d.Rd b/man/geom_bin_2d.Rd index fa3b32b4ce..c481fd65f7 100644 --- a/man/geom_bin_2d.Rd +++ b/man/geom_bin_2d.Rd @@ -26,6 +26,7 @@ stat_bin_2d( ..., bins = 30, binwidth = NULL, + boundary = 0, drop = TRUE, na.rm = FALSE, show.legend = NA, diff --git a/man/geom_histogram.Rd b/man/geom_histogram.Rd index 32f9c39610..0a27e87c10 100644 --- a/man/geom_histogram.Rd +++ b/man/geom_histogram.Rd @@ -46,7 +46,7 @@ stat_bin( closed = c("right", "left"), pad = FALSE, na.rm = FALSE, - keep.zeroes = "all", + drop = "all", orientation = NA, show.legend = NA, inherit.aes = TRUE @@ -174,10 +174,11 @@ or left edges of bins are included in the bin.} \item{pad}{If \code{TRUE}, adds empty bins at either end of x. This ensures frequency polygons touch 0. Defaults to \code{FALSE}.} -\item{keep.zeroes}{Treatment of zero count bins. If \code{"all"} (default), such +\item{drop}{Treatment of zero count bins. If \code{"all"} (default), such bins are kept as-is. If \code{"none"}, all zero count bins are filtered out. If \code{"inner"} only zero count bins at the flanks are filtered out, but not -in the middle.} +in the middle. \code{TRUE} is shorthand for \code{"all"} and \code{FALSE} is shorthand +for \code{"none"}.} } \description{ Visualise the distribution of a single continuous variable by dividing diff --git a/man/ggplot2-ggproto.Rd b/man/ggplot2-ggproto.Rd index 602ce35e4f..4d934a1e46 100644 --- a/man/ggplot2-ggproto.Rd +++ b/man/ggplot2-ggproto.Rd @@ -4,28 +4,29 @@ % R/geom-raster.R, R/annotation-raster.R, R/axis-secondary.R, R/coord-.R, % R/coord-cartesian-.R, R/coord-fixed.R, R/coord-flip.R, R/coord-map.R, % R/coord-polar.R, R/coord-quickmap.R, R/coord-radial.R, R/coord-transform.R, -% R/facet-.R, R/facet-grid-.R, R/facet-null.R, R/facet-wrap.R, R/stat-.R, -% R/geom-abline.R, R/geom-rect.R, R/geom-bar.R, R/geom-tile.R, R/geom-bin2d.R, -% R/geom-blank.R, R/geom-boxplot.R, R/geom-col.R, R/geom-path.R, -% R/geom-contour.R, R/geom-crossbar.R, R/geom-segment.R, R/geom-curve.R, -% R/geom-ribbon.R, R/geom-density.R, R/geom-density2d.R, R/geom-dotplot.R, -% R/geom-errorbar.R, R/geom-function.R, R/geom-hex.R, R/geom-hline.R, -% R/geom-label.R, R/geom-linerange.R, R/geom-point.R, R/geom-pointrange.R, -% R/geom-quantile.R, R/geom-rug.R, R/geom-smooth.R, R/geom-spoke.R, -% R/geom-text.R, R/geom-violin.R, R/geom-vline.R, R/guide-.R, R/guide-axis.R, -% R/guide-axis-logticks.R, R/guide-axis-stack.R, R/guide-axis-theta.R, -% R/guide-legend.R, R/guide-bins.R, R/guide-colorbar.R, R/guide-colorsteps.R, -% R/guide-custom.R, R/guide-none.R, R/guide-old.R, R/layout.R, R/position-.R, -% R/position-dodge.R, R/position-dodge2.R, R/position-identity.R, -% R/position-jitter.R, R/position-jitterdodge.R, R/position-nudge.R, -% R/position-stack.R, R/scale-.R, R/scale-binned.R, R/scale-continuous.R, -% R/scale-date.R, R/scale-discrete-.R, R/scale-identity.R, R/stat-align.R, -% R/stat-bin.R, R/stat-bin2d.R, R/stat-bindot.R, R/stat-binhex.R, -% R/stat-boxplot.R, R/stat-contour.R, R/stat-count.R, R/stat-density-2d.R, -% R/stat-density.R, R/stat-ecdf.R, R/stat-ellipse.R, R/stat-function.R, -% R/stat-identity.R, R/stat-qq-line.R, R/stat-qq.R, R/stat-quantilemethods.R, -% R/stat-smooth.R, R/stat-sum.R, R/stat-summary-2d.R, R/stat-summary-bin.R, -% R/stat-summary-hex.R, R/stat-summary.R, R/stat-unique.R, R/stat-ydensity.R +% R/facet-.R, R/facet-grid-.R, R/facet-null.R, R/facet-wrap.R, +% R/stat-.R, R/geom-abline.R, R/geom-rect.R, R/geom-bar.R, R/geom-tile.R, +% R/geom-bin2d.R, R/geom-blank.R, R/geom-boxplot.R, R/geom-col.R, +% R/geom-path.R, R/geom-contour.R, R/geom-crossbar.R, R/geom-segment.R, +% R/geom-curve.R, R/geom-ribbon.R, R/geom-density.R, R/geom-density2d.R, +% R/geom-dotplot.R, R/geom-errorbar.R, R/geom-function.R, R/geom-hex.R, +% R/geom-hline.R, R/geom-label.R, R/geom-linerange.R, R/geom-point.R, +% R/geom-pointrange.R, R/geom-quantile.R, R/geom-rug.R, R/geom-smooth.R, +% R/geom-spoke.R, R/geom-text.R, R/geom-violin.R, R/geom-vline.R, +% R/guide-.R, R/guide-axis.R, R/guide-axis-logticks.R, R/guide-axis-stack.R, +% R/guide-axis-theta.R, R/guide-legend.R, R/guide-bins.R, R/guide-colorbar.R, +% R/guide-colorsteps.R, R/guide-custom.R, R/guide-none.R, R/guide-old.R, +% R/layout.R, R/position-.R, R/position-dodge.R, R/position-dodge2.R, +% R/position-identity.R, R/position-jitter.R, R/position-jitterdodge.R, +% R/position-nudge.R, R/position-stack.R, R/scale-.R, R/scale-binned.R, +% R/scale-continuous.R, R/scale-date.R, R/scale-discrete-.R, +% R/scale-identity.R, R/stat-align.R, R/stat-bin.R, R/stat-summary-2d.R, +% R/stat-bin2d.R, R/stat-bindot.R, R/stat-binhex.R, R/stat-boxplot.R, +% R/stat-contour.R, R/stat-count.R, R/stat-density-2d.R, R/stat-density.R, +% R/stat-ecdf.R, R/stat-ellipse.R, R/stat-function.R, R/stat-identity.R, +% R/stat-qq-line.R, R/stat-qq.R, R/stat-quantilemethods.R, R/stat-smooth.R, +% R/stat-sum.R, R/stat-summary-bin.R, R/stat-summary-hex.R, R/stat-summary.R, +% R/stat-unique.R, R/stat-ydensity.R \docType{data} \name{ggplot2-ggproto} \alias{ggplot2-ggproto} @@ -125,6 +126,7 @@ \alias{ScaleContinuousIdentity} \alias{StatAlign} \alias{StatBin} +\alias{StatSummary2d} \alias{StatBin2d} \alias{StatBindot} \alias{StatBinhex} @@ -144,7 +146,6 @@ \alias{StatQuantile} \alias{StatSmooth} \alias{StatSum} -\alias{StatSummary2d} \alias{StatSummaryBin} \alias{StatSummaryHex} \alias{StatSummary} From d0417c3f49e3c5b266a2202717156fe07bd7a0bf Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 4 Dec 2024 09:27:54 +0100 Subject: [PATCH 11/19] collect bin utilities in one place --- R/bin.R | 12 ++++++++++++ R/stat-bin2d.R | 12 ------------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/R/bin.R b/R/bin.R index 3a0feba9bb..21b0cbe738 100644 --- a/R/bin.R +++ b/R/bin.R @@ -225,6 +225,18 @@ bin_out <- function(count = integer(0), x = numeric(0), width = numeric(0), ) } +bin_loc <- function(x, id) { + left <- x[-length(x)] + right <- x[-1] + + list( + left = left[id], + right = right[id], + mid = ((left + right) / 2)[id], + length = diff(x)[id] + ) +} + fix_bin_params = function(params, fun, version) { if (!is.null(params$origin)) { diff --git a/R/stat-bin2d.R b/R/stat-bin2d.R index 7009e3f6b5..0a29abc1b9 100644 --- a/R/stat-bin2d.R +++ b/R/stat-bin2d.R @@ -128,15 +128,3 @@ bin2d_breaks <- function(scale, breaks = NULL, origin = NULL, binwidth = NULL, } bins(breaks, closed)$fuzzy } - -bin_loc <- function(x, id) { - left <- x[-length(x)] - right <- x[-1] - - list( - left = left[id], - right = right[id], - mid = ((left + right) / 2)[id], - length = diff(x)[id] - ) -} From 22690b4a84a4901fed3e9ae4d8089aaa542fdea3 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 4 Dec 2024 09:31:48 +0100 Subject: [PATCH 12/19] remove vestigial `bin2d_breaks()` --- R/stat-bin2d.R | 38 -------------------------------------- 1 file changed, 38 deletions(-) diff --git a/R/stat-bin2d.R b/R/stat-bin2d.R index 0a29abc1b9..678c578690 100644 --- a/R/stat-bin2d.R +++ b/R/stat-bin2d.R @@ -90,41 +90,3 @@ dual_param <- function(x, default = list(x = NULL, y = NULL)) { list(x = x, y = x) } } - -bin2d_breaks <- function(scale, breaks = NULL, origin = NULL, binwidth = NULL, - bins = 30, closed = "right") { - # Bins for categorical data should take the width of one level, - # and should show up centered over their tick marks. All other parameters - # are ignored. - if (scale$is_discrete()) { - breaks <- scale$get_breaks() - return(-0.5 + seq_len(length(breaks) + 1)) - } else { - if (!is.null(breaks)) { - breaks <- scale$transform(breaks) - } - } - - if (!is.null(breaks)) - return(breaks) - - range <- scale$get_limits() - - if (is.null(binwidth) || identical(binwidth, NA)) { - binwidth <- diff(range) / bins - } - check_number_decimal(binwidth) - - if (is.null(origin) || identical(origin, NA)) { - origin <- round_any(range[1], binwidth, floor) - } - check_number_decimal(origin) - - breaks <- seq(origin, range[2] + binwidth, binwidth) - - # Check if the last bin lies fully outside the range - if (length(breaks) > 1 && breaks[length(breaks) - 1] >= range[2]) { - breaks <- breaks[-length(breaks)] - } - bins(breaks, closed)$fuzzy -} From 11f5a841b46b1c0b3d0576b1661e1ad6f97c61a3 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 4 Dec 2024 09:45:08 +0100 Subject: [PATCH 13/19] discard superfluous `self` --- R/stat-bin2d.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/stat-bin2d.R b/R/stat-bin2d.R index 678c578690..032f810647 100644 --- a/R/stat-bin2d.R +++ b/R/stat-bin2d.R @@ -56,7 +56,7 @@ StatBin2d <- ggproto( default_aes = aes(weight = 1, fill = after_stat(count)), required_aes = c("x", "y"), - compute_group = function(self, data, scales, binwidth = NULL, bins = 30, + compute_group = function(data, scales, binwidth = NULL, bins = 30, breaks = NULL, origin = NULL, drop = TRUE, boundary = 0, closed = NULL, center = NULL) { From bb65b8e8691edc6c44beba7a150b49bab3e19d04 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 4 Dec 2024 10:02:13 +0100 Subject: [PATCH 14/19] bring `stat_bindot()` into the fold --- R/stat-bindot.R | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/R/stat-bindot.R b/R/stat-bindot.R index 85eecc4d54..66184a527c 100644 --- a/R/stat-bindot.R +++ b/R/stat-bindot.R @@ -77,13 +77,11 @@ StatBindot <- ggproto("StatBindot", Stat, } if (method == "histodot") { - closed <- if (right) "right" else "left" - if (!is.null(binwidth)) { - bins <- bin_breaks_width(range, binwidth, boundary = origin, closed = closed) - } else { - bins <- bin_breaks_bins(range, 30, boundary = origin, closed = closed) - } - + bins <- compute_bins( + values, scales[[binaxis]], + breaks = NULL, binwidth = binwidth, bins = 30, center = NULL, + boundary = origin, closed = if (right) "right" else "left" + ) data <- bin_vector(values, bins, weight = data$weight, pad = FALSE) # Change "width" column to "binwidth" for consistency From 917045af020631e903517da806f6d3a67984b53e Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 4 Dec 2024 10:23:25 +0100 Subject: [PATCH 15/19] centralise argument checking in `compute_bins()` --- R/bin.R | 41 ++++++++++++----------------- tests/testthat/_snaps/stat-bin.md | 37 ++++++-------------------- tests/testthat/_snaps/stat-bin2d.md | 6 ++--- tests/testthat/test-stat-bin.R | 23 +++++++--------- tests/testthat/test-stat-bin2d.R | 2 +- 5 files changed, 39 insertions(+), 70 deletions(-) diff --git a/R/bin.R b/R/bin.R index 21b0cbe738..91062288d5 100644 --- a/R/bin.R +++ b/R/bin.R @@ -54,19 +54,12 @@ bin_breaks <- function(breaks, closed = c("right", "left")) { bin_breaks_width <- function(x_range, width = NULL, center = NULL, boundary = NULL, closed = c("right", "left")) { - check_length(x_range, 2L) - # binwidth seems to be the argument name supplied to width. (stat-bin and stat-bindot) - check_number_decimal(width, min = 0, allow_infinite = FALSE, arg = "binwidth") - - if (!is.null(boundary) && !is.null(center)) { - cli::cli_abort("Only one of {.arg boundary} and {.arg center} may be specified.") - } else if (is.null(boundary)) { + if (is.null(boundary)) { if (is.null(center)) { # If neither edge nor center given, compute both using tile layer's # algorithm. This puts min and max of data in outer half of their bins. boundary <- width / 2 - } else { # If center given but not boundary, compute boundary. boundary <- center - width / 2 @@ -75,9 +68,6 @@ bin_breaks_width <- function(x_range, width = NULL, center = NULL, # Find the left side of left-most bin: inputs could be Dates or POSIXct, so # coerce to numeric first. - x_range <- as.numeric(x_range) - width <- as.numeric(width) - boundary <- as.numeric(boundary) shift <- floor((x_range[1] - boundary) / width) origin <- boundary + shift * width @@ -104,9 +94,7 @@ bin_breaks_width <- function(x_range, width = NULL, center = NULL, bin_breaks_bins <- function(x_range, bins = 30, center = NULL, boundary = NULL, closed = c("right", "left")) { - check_length(x_range, 2L) - check_number_whole(bins, min = 1) if (zero_range(x_range)) { # 0.1 is the same width as the expansion `default_expansion()` gives for 0-width data width <- 0.1 @@ -128,27 +116,38 @@ bin_breaks_bins <- function(x_range, bins = 30, center = NULL, # Compute bins ------------------------------------------------------------ -compute_bins <- function(x, scale, breaks = NULL, binwidth = NULL, bins = NULL, +compute_bins <- function(x, scale = NULL, breaks = NULL, binwidth = NULL, bins = NULL, center = NULL, boundary = NULL, closed = c("right", "left")) { + range <- if (is.scale(scale)) scale$dimension() else range(x) + check_length(range, 2L) + if (!is.null(breaks)) { if (is.function(breaks)) { breaks <- breaks(x) } - if (!scale$is_discrete()) { + if (is.scale(scale) && !scale$is_discrete()) { breaks <- scale$transform(breaks) } + check_numeric(breaks) bins <- bin_breaks(breaks, closed) return(bins) } + check_number_decimal(boundary, allow_infinite = FALSE, allow_null = TRUE) + check_number_decimal(center, allow_infinite = FALSE, allow_null = TRUE) + if (!is.null(boundary) && !is.null(center)) { + cli::cli_abort("Only one of {.arg boundary} and {.arg center} may be specified.") + } + if (!is.null(binwidth)) { if (is.function(binwidth)) { binwidth <- binwidth(x) } + check_number_decimal(binwidth, min = 0, allow_infinite = FALSE) bins <- bin_breaks_width( - scale$dimension(), binwidth, + range, binwidth, center = center, boundary = boundary, closed = closed ) return(bins) @@ -157,8 +156,9 @@ compute_bins <- function(x, scale, breaks = NULL, binwidth = NULL, bins = NULL, if (is.function(bins)) { bins <- bins(x) } + check_number_whole(bins, min = 1, allow_infinite = FALSE) bin_breaks_bins( - scale$dimension(), bins, + range, bins, center = center, boundary = boundary, closed = closed ) } @@ -253,13 +253,6 @@ fix_bin_params = function(params, fun, version) { params$right <- NULL } - if (!is.null(params$boundary) && !is.null(params$center)) { - cli::cli_abort( - "Only one of {.arg boundary} and {.arg center} may be specified \\ - in {.fn {fun}}." - ) - } - if (is.null(params$breaks %||% params$binwidth %||% params$bins)) { cli::cli_inform( "{.fn {fun}} using {.code bins = 30}. Pick better value {.arg binwidth}." diff --git a/tests/testthat/_snaps/stat-bin.md b/tests/testthat/_snaps/stat-bin.md index 1d3f49ad1a..db0b8f44c0 100644 --- a/tests/testthat/_snaps/stat-bin.md +++ b/tests/testthat/_snaps/stat-bin.md @@ -23,51 +23,30 @@ # inputs to binning are checked - Computation failed in `stat_bin()`. - Caused by error in `bins()`: - ! `breaks` must be a vector, not a character vector. + `breaks` must be a vector, not a character vector. --- - `x_range` must be a vector of length 2, not length 1. + `binwidth` must be a number, not a character vector. --- - Computation failed in `stat_bin()`. - Caused by error in `bin_breaks_width()`: - ! `binwidth` must be a number, not a character vector. + `binwidth` must be a number larger than or equal to 0, not the number -4. --- - Computation failed in `stat_bin()`. - Caused by error in `bin_breaks_width()`: - ! `binwidth` must be a number larger than or equal to 0, not the number -4. - ---- - - `x_range` must be a vector of length 2, not length 1. - ---- - - Computation failed in `stat_bin()`. - Caused by error in `bin_breaks_bins()`: - ! `bins` must be a whole number larger than or equal to 1, not the number -4. + `bins` must be a whole number larger than or equal to 1, not the number -4. # setting boundary and center - Code - comp_bin(df, boundary = 5, center = 0) - Condition - Error in `stat_bin()`: - ! Problem while computing stat. - i Error occurred in the 1st layer. - Caused by error in `fix_bin_params()`: - ! Only one of `boundary` and `center` may be specified in `stat_bin()`. + Computation failed in `stat_bin()`. + Caused by error in `compute_bins()`: + ! Only one of `boundary` and `center` may be specified. # bin errors at high bin counts Code - bin_breaks_width(c(1, 2e+06), 1) + compute_bins(c(1, 2e+06), binwidth = 1) Condition Error in `bin_breaks_width()`: ! The number of histogram bins must be less than 1,000,000. diff --git a/tests/testthat/_snaps/stat-bin2d.md b/tests/testthat/_snaps/stat-bin2d.md index ffc60d7f92..a0bb2eebc7 100644 --- a/tests/testthat/_snaps/stat-bin2d.md +++ b/tests/testthat/_snaps/stat-bin2d.md @@ -1,12 +1,12 @@ # binwidth is respected Computation failed in `stat_bin2d()`. - Caused by error in `bin2d_breaks()`: + Caused by error in `compute_bins()`: ! `binwidth` must be a number, not a double vector. --- Computation failed in `stat_bin2d()`. - Caused by error in `bin2d_breaks()`: - ! `origin` must be a number, not a double vector. + Caused by error in `compute_bins()`: + ! `boundary` must be a number or `NULL`, not a double vector. diff --git a/tests/testthat/test-stat-bin.R b/tests/testthat/test-stat-bin.R index 4dda347182..3df87821b8 100644 --- a/tests/testthat/test-stat-bin.R +++ b/tests/testthat/test-stat-bin.R @@ -147,19 +147,19 @@ test_that("bins is strictly adhered to", { # Default case nbreaks <- vapply(nbins, function(bins) { - length(bin_breaks_bins(c(0, 10), bins)$breaks) + length(compute_bins(c(0, 10), bins = bins)$breaks) }, numeric(1)) expect_equal(nbreaks, nbins + 1) # Center is provided nbreaks <- vapply(nbins, function(bins) { - length(bin_breaks_bins(c(0, 10), bins, center = 0)$breaks) + length(compute_bins(c(0, 10), bins = bins, center = 0)$breaks) }, numeric(1)) expect_equal(nbreaks, nbins + 1) # Boundary is provided nbreaks <- vapply(nbins, function(bins) { - length(bin_breaks_bins(c(0, 10), bins, boundary = 0)$breaks) + length(compute_bins(c(0, 10), bins = bins, boundary = 0)$breaks) }, numeric(1)) expect_equal(nbreaks, nbins + 1) @@ -172,13 +172,10 @@ comp_bin <- function(df, ...) { test_that("inputs to binning are checked", { dat <- data_frame(x = c(0, 10)) - expect_snapshot_error(comp_bin(dat, breaks = letters)) - expect_snapshot_error(bin_breaks_width(3)) - expect_snapshot_error(comp_bin(dat, binwidth = letters)) - expect_snapshot_error(comp_bin(dat, binwidth = -4)) - - expect_snapshot_error(bin_breaks_bins(3)) - expect_snapshot_error(comp_bin(dat, bins = -4)) + expect_snapshot_error(compute_bins(dat, breaks = letters)) + expect_snapshot_error(compute_bins(dat, binwidth = letters)) + expect_snapshot_error(compute_bins(dat, binwidth = -4)) + expect_snapshot_error(compute_bins(dat, bins = -4)) }) test_that("closed left or right", { @@ -208,14 +205,14 @@ test_that("setting boundary and center", { df <- data_frame(x = c(0, 30)) # Error if both boundary and center are specified - expect_snapshot(comp_bin(df, boundary = 5, center = 0), error = TRUE) + expect_snapshot_warning(comp_bin(df, boundary = 5, center = 0, bins = 30)) res <- comp_bin(df, binwidth = 10, boundary = 0, pad = FALSE) expect_identical(res$count, c(1, 0, 1)) expect_identical(res$xmin[1], 0) expect_identical(res$xmax[3], 30) - res <- comp_bin(df, binwidth = 10, center = 0, pad = FALSE) + res <- comp_bin(df, binwidth = 10, center = 0, boundary = NULL, pad = FALSE) expect_identical(res$count, c(1, 0, 0, 1)) expect_identical(res$xmin[1], df$x[1] - 5) expect_identical(res$xmax[4], df$x[2] + 5) @@ -230,7 +227,7 @@ test_that("weights are added", { }) test_that("bin errors at high bin counts", { - expect_snapshot(bin_breaks_width(c(1, 2e6), 1), error = TRUE) + expect_snapshot(compute_bins(c(1, 2e6), binwidth = 1), error = TRUE) }) # stat_count -------------------------------------------------------------- diff --git a/tests/testthat/test-stat-bin2d.R b/tests/testthat/test-stat-bin2d.R index 54d95679c9..6d83448956 100644 --- a/tests/testthat/test-stat-bin2d.R +++ b/tests/testthat/test-stat-bin2d.R @@ -14,7 +14,7 @@ test_that("binwidth is respected", { expect_snapshot_warning(ggplot_build(p)) p <- ggplot(df, aes(x, y)) + - stat_bin_2d(geom = "tile", origin = c(0.25, 0.5, 0.75)) + stat_bin_2d(geom = "tile", boundary = c(0.25, 0.5, 0.75)) expect_snapshot_warning(ggplot_build(p)) }) From b08c93dc07d3fdbbaba61fdc29511e6b5844cb9e Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 4 Dec 2024 11:28:55 +0100 Subject: [PATCH 16/19] `stat_bin_2d(boundary)` internally defaults to 0 --- R/stat-bin2d.R | 7 ++++--- R/stat-summary-2d.R | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/R/stat-bin2d.R b/R/stat-bin2d.R index 032f810647..58060b8c45 100644 --- a/R/stat-bin2d.R +++ b/R/stat-bin2d.R @@ -16,7 +16,6 @@ stat_bin_2d <- function(mapping = NULL, data = NULL, ..., bins = 30, binwidth = NULL, - boundary = 0, drop = TRUE, na.rm = FALSE, show.legend = NA, @@ -33,7 +32,6 @@ stat_bin_2d <- function(mapping = NULL, data = NULL, bins = bins, binwidth = binwidth, drop = drop, - boundary = boundary, na.rm = na.rm, ... ) @@ -58,11 +56,14 @@ StatBin2d <- ggproto( compute_group = function(data, scales, binwidth = NULL, bins = 30, breaks = NULL, origin = NULL, drop = TRUE, - boundary = 0, closed = NULL, center = NULL) { + boundary = NULL, closed = NULL, center = NULL) { data$z <- data$weight %||% 1 data$weight <- NULL + # For backward compatibility, boundary defaults to 0 + boundary <- boundary %||% if (is.null(center)) list(x = 0, y = 0) + out <- StatSummary2d$compute_group( data, scales, binwidth = binwidth, bins = bins, breaks = breaks, drop = drop, fun = "sum", boundary = boundary, closed = closed, diff --git a/R/stat-summary-2d.R b/R/stat-summary-2d.R index 2bf05e1516..1a388f6c1d 100644 --- a/R/stat-summary-2d.R +++ b/R/stat-summary-2d.R @@ -100,7 +100,7 @@ StatSummary2d <- ggproto("StatSummary2d", Stat, params <- fix_bin_params(params, fun = snake_class(self), version = "3.5.2") vars <- c("origin", "binwidth", "breaks", "center", "boundary") - params[vars] <- lapply(params[vars], dual_param) + params[vars] <- lapply(params[vars], dual_param, default = NULL) params$closed <- dual_param(params$closed, list(x = "right", y = "right")) params From eb80474401c9997b8adf1a107d6b6c33393812be Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 4 Dec 2024 11:45:26 +0100 Subject: [PATCH 17/19] add news bullets --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index dffd4d2fdd..8b47eaa921 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # ggplot2 (development version) +* All binning stats now use the `boundary`/`center` parametrisation rather + than `origin`, following in `stat_bin()`'s footsteps (@teunbrand). +* `stat_summary_2d()` and `stat_bin_2d()` now deal with zero-range data + more elegantly (@teunbrand, #6207). * New stat: `stat_manual()` for arbitrary computations (@teunbrand, #3501) * Reversal of a dimension, typically 'x' or 'y', is now controlled by the `reverse` argument in `coord_cartesian()`, `coord_fixed()`, `coord_radial()` From 653410fb6a40448b71f83ad8d3c7cf34e33c6d83 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 4 Dec 2024 12:47:58 +0100 Subject: [PATCH 18/19] allow lambda notation for breaks/binwidth/bins --- R/bin.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/bin.R b/R/bin.R index 91062288d5..f45fe90090 100644 --- a/R/bin.R +++ b/R/bin.R @@ -124,6 +124,7 @@ compute_bins <- function(x, scale = NULL, breaks = NULL, binwidth = NULL, bins = check_length(range, 2L) if (!is.null(breaks)) { + breaks <- allow_lambda(breaks) if (is.function(breaks)) { breaks <- breaks(x) } @@ -142,6 +143,7 @@ compute_bins <- function(x, scale = NULL, breaks = NULL, binwidth = NULL, bins = } if (!is.null(binwidth)) { + binwidth <- allow_lambda(binwidth) if (is.function(binwidth)) { binwidth <- binwidth(x) } @@ -153,6 +155,7 @@ compute_bins <- function(x, scale = NULL, breaks = NULL, binwidth = NULL, bins = return(bins) } + bins <- allow_lambda(bins) if (is.function(bins)) { bins <- bins(x) } From d15151f53f6f3ea35fc29646acd7d264be469111 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 8 Jan 2025 10:54:15 +0100 Subject: [PATCH 19/19] improve docs --- R/geom-bin2d.R | 2 +- R/stat-bin2d.R | 18 ++++++++++++---- R/stat-binhex.R | 7 +++++++ R/stat-summary-2d.R | 1 + man/geom_bin_2d.Rd | 47 ++++++++++++++++++++++++++++++++++++------ man/geom_hex.Rd | 28 ++++++++++++++++++++----- man/stat_summary_2d.Rd | 26 +++++++++++++++++++---- 7 files changed, 109 insertions(+), 20 deletions(-) diff --git a/R/geom-bin2d.R b/R/geom-bin2d.R index 5a143df51d..e0c78ab5e4 100644 --- a/R/geom-bin2d.R +++ b/R/geom-bin2d.R @@ -25,7 +25,7 @@ NULL #' # You can control the size of the bins by specifying the number of #' # bins in each direction: #' d + geom_bin_2d(bins = 10) -#' d + geom_bin_2d(bins = 30) +#' d + geom_bin_2d(bins = list(x = 30, y = 10)) #' #' # Or by specifying the width of the bins #' d + geom_bin_2d(binwidth = c(0.1, 0.1)) diff --git a/R/stat-bin2d.R b/R/stat-bin2d.R index 58060b8c45..fe27a41162 100644 --- a/R/stat-bin2d.R +++ b/R/stat-bin2d.R @@ -1,7 +1,4 @@ -#' @param bins numeric vector giving number of bins in both vertical and -#' horizontal directions. Set to 30 by default. -#' @param binwidth Numeric vector giving bin width in both vertical and -#' horizontal directions. Overrides `bins` if both set. +#' @inheritParams stat_bin #' @param drop if `TRUE` removes all cells with 0 counts. #' @export #' @rdname geom_bin_2d @@ -11,11 +8,21 @@ #' ncount = "count, scaled to maximum of 1.", #' ndensity = "density, scaled to a maximum of 1." #' ) +#' @section Controlling binning parameters for the x and y directions: +#' The arguments `bins`, `binwidth`, `breaks`, `center`, and `boundary` can +#' be set separately for the x and y directions. When given as a scalar, one +#' value applies to both directions. When given as a vector of length two, +#' the first is applied to the x direction and the second to the y direction. +#' Alternatively, these can be a named list containing `x` and `y` elements, +#' for example `list(x = 10, y = 20)`. stat_bin_2d <- function(mapping = NULL, data = NULL, geom = "tile", position = "identity", ..., bins = 30, binwidth = NULL, + center = NULL, + boundary = NULL, + breaks = NULL, drop = TRUE, na.rm = FALSE, show.legend = NA, @@ -31,6 +38,9 @@ stat_bin_2d <- function(mapping = NULL, data = NULL, params = list2( bins = bins, binwidth = binwidth, + center = center, + boundary = boundary, + breaks = breaks, drop = drop, na.rm = na.rm, ... diff --git a/R/stat-binhex.R b/R/stat-binhex.R index 0b5d3991c6..be5b61daf7 100644 --- a/R/stat-binhex.R +++ b/R/stat-binhex.R @@ -7,6 +7,13 @@ #' ncount = "count, scaled to maximum of 1.", #' ndensity = "density, scaled to maximum of 1." #' ) +#' @section Controlling binning parameters for the x and y directions: +#' The arguments `bins` and `binwidth` can +#' be set separately for the x and y directions. When given as a scalar, one +#' value applies to both directions. When given as a vector of length two, +#' the first is applied to the x direction and the second to the y direction. +#' Alternatively, these can be a named list containing `x` and `y` elements, +#' for example `list(x = 10, y = 20)`. stat_bin_hex <- function(mapping = NULL, data = NULL, geom = "hex", position = "identity", ..., diff --git a/R/stat-summary-2d.R b/R/stat-summary-2d.R index 1a388f6c1d..41d0c5b588 100644 --- a/R/stat-summary-2d.R +++ b/R/stat-summary-2d.R @@ -28,6 +28,7 @@ #' @param drop drop if the output of `fun` is `NA`. #' @param fun function for summary. #' @param fun.args A list of extra arguments to pass to `fun` +#' @inheritSection stat_bin_2d Controlling binning parameters for the x and y directions #' @export #' @examples #' d <- ggplot(diamonds, aes(carat, depth, z = price)) diff --git a/man/geom_bin_2d.Rd b/man/geom_bin_2d.Rd index fa3b32b4ce..dc0b9ce082 100644 --- a/man/geom_bin_2d.Rd +++ b/man/geom_bin_2d.Rd @@ -26,6 +26,9 @@ stat_bin_2d( ..., bins = 30, binwidth = NULL, + center = NULL, + boundary = NULL, + breaks = NULL, drop = TRUE, na.rm = FALSE, show.legend = NA, @@ -115,11 +118,33 @@ the default plot specification, e.g. \code{\link[=borders]{borders()}}.} these connections, see how the \link[=layer_stats]{stat} and \link[=layer_geoms]{geom} arguments work.} -\item{bins}{numeric vector giving number of bins in both vertical and -horizontal directions. Set to 30 by default.} - -\item{binwidth}{Numeric vector giving bin width in both vertical and -horizontal directions. Overrides \code{bins} if both set.} +\item{bins}{Number of bins. Overridden by \code{binwidth}. Defaults to 30.} + +\item{binwidth}{The width of the bins. Can be specified as a numeric value +or as a function that takes x after scale transformation as input and +returns a single numeric value. When specifying a function along with a +grouping structure, the function will be called once per group. +The default is to use the number of bins in \code{bins}, +covering the range of the data. You should always override +this value, exploring multiple widths to find the best to illustrate the +stories in your data. + +The bin width of a date variable is the number of days in each time; the +bin width of a time variable is the number of seconds.} + +\item{center, boundary}{bin position specifiers. Only one, \code{center} or +\code{boundary}, may be specified for a single plot. \code{center} specifies the +center of one of the bins. \code{boundary} specifies the boundary between two +bins. Note that if either is above or below the range of the data, things +will be shifted by the appropriate integer multiple of \code{binwidth}. +For example, to center on integers use \code{binwidth = 1} and \code{center = 0}, even +if \code{0} is outside the range of the data. Alternatively, this same alignment +can be specified with \code{binwidth = 1} and \code{boundary = 0.5}, even if \code{0.5} is +outside the range of the data.} + +\item{breaks}{Alternatively, you can supply a numeric vector giving +the bin boundaries. Overrides \code{binwidth}, \code{bins}, \code{center}, +and \code{boundary}. Can also be a function that takes group-wise values as input and returns bin boundaries.} \item{drop}{if \code{TRUE} removes all cells with 0 counts.} } @@ -153,6 +178,16 @@ These are calculated by the 'stat' part of layers and can be accessed with \link } } +\section{Controlling binning parameters for the x and y directions}{ + +The arguments \code{bins}, \code{binwidth}, \code{breaks}, \code{center}, and \code{boundary} can +be set separately for the x and y directions. When given as a scalar, one +value applies to both directions. When given as a vector of length two, +the first is applied to the x direction and the second to the y direction. +Alternatively, these can be a named list containing \code{x} and \code{y} elements, +for example \code{list(x = 10, y = 20)}. +} + \examples{ d <- ggplot(diamonds, aes(x, y)) + xlim(4, 10) + ylim(4, 10) d + geom_bin_2d() @@ -160,7 +195,7 @@ d + geom_bin_2d() # You can control the size of the bins by specifying the number of # bins in each direction: d + geom_bin_2d(bins = 10) -d + geom_bin_2d(bins = 30) +d + geom_bin_2d(bins = list(x = 30, y = 10)) # Or by specifying the width of the bins d + geom_bin_2d(binwidth = c(0.1, 0.1)) diff --git a/man/geom_hex.Rd b/man/geom_hex.Rd index 553787761b..da103b0e3c 100644 --- a/man/geom_hex.Rd +++ b/man/geom_hex.Rd @@ -112,11 +112,19 @@ the default plot specification, e.g. \code{\link[=borders]{borders()}}.} \code{stat_bin_hex()}. For more information about overriding these connections, see how the \link[=layer_stats]{stat} and \link[=layer_geoms]{geom} arguments work.} -\item{bins}{numeric vector giving number of bins in both vertical and -horizontal directions. Set to 30 by default.} - -\item{binwidth}{Numeric vector giving bin width in both vertical and -horizontal directions. Overrides \code{bins} if both set.} +\item{bins}{Number of bins. Overridden by \code{binwidth}. Defaults to 30.} + +\item{binwidth}{The width of the bins. Can be specified as a numeric value +or as a function that takes x after scale transformation as input and +returns a single numeric value. When specifying a function along with a +grouping structure, the function will be called once per group. +The default is to use the number of bins in \code{bins}, +covering the range of the data. You should always override +this value, exploring multiple widths to find the best to illustrate the +stories in your data. + +The bin width of a date variable is the number of days in each time; the +bin width of a time variable is the number of seconds.} } \description{ Divides the plane into regular hexagons, counts the number of cases in @@ -162,6 +170,16 @@ These are calculated by the 'stat' part of layers and can be accessed with \link } } +\section{Controlling binning parameters for the x and y directions}{ + +The arguments \code{bins} and \code{binwidth} can +be set separately for the x and y directions. When given as a scalar, one +value applies to both directions. When given as a vector of length two, +the first is applied to the x direction and the second to the y direction. +Alternatively, these can be a named list containing \code{x} and \code{y} elements, +for example \code{list(x = 10, y = 20)}. +} + \examples{ d <- ggplot(diamonds, aes(carat, price)) d + geom_hex() diff --git a/man/stat_summary_2d.Rd b/man/stat_summary_2d.Rd index 9ee4604b65..da62dd0a15 100644 --- a/man/stat_summary_2d.Rd +++ b/man/stat_summary_2d.Rd @@ -113,11 +113,19 @@ lists which parameters it can accept. \link[=draw_key]{key glyphs}, to change the display of the layer in the legend. }} -\item{bins}{numeric vector giving number of bins in both vertical and -horizontal directions. Set to 30 by default.} +\item{bins}{Number of bins. Overridden by \code{binwidth}. Defaults to 30.} -\item{binwidth}{Numeric vector giving bin width in both vertical and -horizontal directions. Overrides \code{bins} if both set.} +\item{binwidth}{The width of the bins. Can be specified as a numeric value +or as a function that takes x after scale transformation as input and +returns a single numeric value. When specifying a function along with a +grouping structure, the function will be called once per group. +The default is to use the number of bins in \code{bins}, +covering the range of the data. You should always override +this value, exploring multiple widths to find the best to illustrate the +stories in your data. + +The bin width of a date variable is the number of days in each time; the +bin width of a time variable is the number of seconds.} \item{drop}{drop if the output of \code{fun} is \code{NA}.} @@ -173,6 +181,16 @@ These are calculated by the 'stat' part of layers and can be accessed with \link } } +\section{Controlling binning parameters for the x and y directions}{ + +The arguments \code{bins}, \code{binwidth}, \code{breaks}, \code{center}, and \code{boundary} can +be set separately for the x and y directions. When given as a scalar, one +value applies to both directions. When given as a vector of length two, +the first is applied to the x direction and the second to the y direction. +Alternatively, these can be a named list containing \code{x} and \code{y} elements, +for example \code{list(x = 10, y = 20)}. +} + \examples{ d <- ggplot(diamonds, aes(carat, depth, z = price)) d + stat_summary_2d()