Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# tidyr (development version)

* `pivot_wider()` no longer accidentally retains `values_from` when pivoting
a zero row data frame (#1249).

* `pivot_wider_spec()` now works correctly with a 0-row data frame and a `spec`
that doesn't identify any rows (#1250, #1252).

Expand Down
77 changes: 64 additions & 13 deletions R/pivot-wide.R
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,9 @@ pivot_wider.data.frame <- function(data,
) {
names_from <- enquo(names_from)
values_from <- enquo(values_from)
spec <- build_wider_spec(data,

spec <- build_wider_spec(
data = data,
names_from = !!names_from,
values_from = !!values_from,
names_prefix = names_prefix,
Expand All @@ -142,8 +144,17 @@ pivot_wider.data.frame <- function(data,
names_sort = names_sort
)

id_cols <- enquo(id_cols)
pivot_wider_spec(data, spec, !!id_cols,
id_cols <- build_wider_id_cols_expr(
data = data,
id_cols = {{id_cols}},
names_from = !!names_from,
values_from = !!values_from
)

pivot_wider_spec(
data = data,
spec = spec,
id_cols = !!id_cols,
names_repair = names_repair,
values_fill = values_fill,
values_fn = values_fn
Expand All @@ -168,6 +179,12 @@ pivot_wider.data.frame <- function(data,
#' pivoted from the wide format.
#' The special `.seq` variable is used to disambiguate rows internally;
#' it is automatically removed after pivotting.
#' @param id_cols <[`tidy-select`][tidyr_tidy_select]> A set of columns that
#' uniquely identifies each observation. Defaults to all columns in `data`
#' except for the columns specified in `spec$.value` and the columns of the
#' `spec` that aren't named `.name` or `.value`. Typically used when you have
#' redundant variables, i.e. variables whose values are perfectly correlated
#' with existing variables.
#'
#' @examples
#' # See vignette("pivot") for examples and explanation
Expand Down Expand Up @@ -218,22 +235,20 @@ pivot_wider_spec <- function(data,
abort("`values_fill` must be NULL, a scalar, or a named list")
}

values <- vec_unique(spec$.value)
spec_cols <- c(names(spec)[-(1:2)], values)
non_id_cols <- vec_unique(spec$.value)
non_id_cols <- c(names(spec)[-(1:2)], non_id_cols)

id_cols <- enquo(id_cols)
if (!quo_is_null(id_cols)) {
key_vars <- names(tidyselect::eval_select(enquo(id_cols), data))
} else {
key_vars <- tbl_vars(data)
}
key_vars <- setdiff(key_vars, spec_cols)
id_cols <- select_wider_id_cols(
data = data,
id_cols = {{id_cols}},
non_id_cols = non_id_cols
)

# Figure out rows in output.
# Early conversion to tibble because data.table returns zero rows if
# zero cols are selected.
rows <- as_tibble(data)
rows <- rows[key_vars]
rows <- rows[id_cols]
row_id <- vec_group_id(rows)
nrow <- attr(row_id, "n")
rows <- vec_slice(rows, vec_unique_loc(row_id))
Expand Down Expand Up @@ -339,6 +354,42 @@ build_wider_spec <- function(data,
out
}

build_wider_id_cols_expr <- function(data,
id_cols = NULL,
names_from = name,
values_from = value) {
# TODO: Use `allow_rename = FALSE`.
# Requires https://github.com/r-lib/tidyselect/issues/225.
names_from <- names(tidyselect::eval_select(enquo(names_from), data))
values_from <- names(tidyselect::eval_select(enquo(values_from), data))
non_id_cols <- c(names_from, values_from)

out <- select_wider_id_cols(
data = data,
id_cols = {{id_cols}},
non_id_cols = non_id_cols
)

expr(c(!!!out))
}

select_wider_id_cols <- function(data,
id_cols = NULL,
non_id_cols = character()) {
id_cols <- enquo(id_cols)

# Remove known non-id-cols so they are never selected
data <- data[setdiff(names(data), non_id_cols)]

if (quo_is_null(id_cols)) {
names(data)
} else {
# TODO: Use `allow_rename = FALSE`.
# Requires https://github.com/r-lib/tidyselect/issues/225.
names(tidyselect::eval_select(enquo(id_cols), data))
}
}

# Helpers -----------------------------------------------------------------

# Not a great name as it now also casts
Expand Down
7 changes: 4 additions & 3 deletions man/pivot_wider_spec.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

54 changes: 54 additions & 0 deletions tests/testthat/test-pivot-wide.R
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,60 @@ test_that("can override default keys", {
expect_equal(nrow(pv), 2)
})

test_that("`id_cols = everything()` excludes `names_from` and `values_from`", {
df <- tibble(key = "x", name = "a", value = 1L)

expect_identical(
pivot_wider(df, id_cols = everything()),
tibble(key = "x", a = 1L)
)

spec <- build_wider_spec(df)

expect_identical(
pivot_wider_spec(df, spec, id_cols = everything()),
tibble(key = "x", a = 1L)
)
})

test_that("pivoting a zero row data frame drops `names_from` and `values_from` (#1249)", {
df <- tibble(key = character(), name = character(), value = integer())

expect_identical(
pivot_wider(df, names_from = name, values_from = value),
tibble(key = character())
)
})

test_that("known bug - building a wider spec with a zero row data frame loses `values_from` info (#1249)", {
# We can't currently change this behavior in `pivot_wider_spec()`,
# for fear of breaking backwards compatibility

df <- tibble(key = character(), name = character(), value = integer())

# Building the spec loses the fact that `value` was specified as `values_from`,
# which would normally be in the `spec$.value` column
spec <- build_wider_spec(df, names_from = name, values_from = value)

# So pivoting with this spec accidentally keeps `value` around
expect_identical(
pivot_wider_spec(df, spec),
tibble(key = character(), value = integer())
)

# If you specify `id_cols` to be the `key` column, it works right
expect_identical(
pivot_wider_spec(df, spec, id_cols = key),
tibble(key = character())
)

# But `id_cols = everything()` won't work as intended, because we can't know
# to remove `value` from `names(data)` before computing the tidy-selection
expect_identical(
pivot_wider_spec(df, spec, id_cols = everything()),
tibble(key = character(), value = integer())
)
})

# non-unique keys ---------------------------------------------------------

Expand Down