tidyverse · DavisVaughan · Dec 13, 2021 · Dec 3, 2021 · Dec 3, 2021 · Dec 3, 2021
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,8 @@
 # tidyr (development version)
 
+* `pivot_wider()` no longer accidentally retains `values_from` when pivoting
+  a zero row data frame (#1249).
+
 * `pivot_wider_spec()` now works correctly with a 0-row data frame and a `spec`
   that doesn't identify any rows (#1250, #1252).
 

diff --git a/R/pivot-wide.R b/R/pivot-wide.R
@@ -133,7 +133,9 @@ pivot_wider.data.frame <- function(data,
                                    ) {
   names_from <- enquo(names_from)
   values_from <- enquo(values_from)
-  spec <- build_wider_spec(data,
+
+  spec <- build_wider_spec(
+    data = data,
     names_from = !!names_from,
     values_from = !!values_from,
     names_prefix = names_prefix,
@@ -142,8 +144,17 @@ pivot_wider.data.frame <- function(data,
     names_sort = names_sort
   )
 
-  id_cols <- enquo(id_cols)
-  pivot_wider_spec(data, spec, !!id_cols,
+  id_cols <- build_wider_id_cols_expr(
+    data = data,
+    id_cols = {{id_cols}},
+    names_from = !!names_from,
+    values_from = !!values_from
+  )
+
+  pivot_wider_spec(
+    data = data,
+    spec = spec,
+    id_cols = !!id_cols,
     names_repair = names_repair,
     values_fill = values_fill,
     values_fn = values_fn
@@ -168,6 +179,12 @@ pivot_wider.data.frame <- function(data,
 #'   pivoted from the wide format.
 #'   The special `.seq` variable is used to disambiguate rows internally;
 #'   it is automatically removed after pivotting.
+#' @param id_cols <[`tidy-select`][tidyr_tidy_select]> A set of columns that
+#'   uniquely identifies each observation. Defaults to all columns in `data`
+#'   except for the columns specified in `spec$.value` and the columns of the
+#'   `spec` that aren't named `.name` or `.value`. Typically used when you have
+#'   redundant variables, i.e. variables whose values are perfectly correlated
+#'   with existing variables.
 #'
 #' @examples
 #' # See vignette("pivot") for examples and explanation
@@ -218,22 +235,20 @@ pivot_wider_spec <- function(data,
     abort("`values_fill` must be NULL, a scalar, or a named list")
   }
 
-  values <- vec_unique(spec$.value)
-  spec_cols <- c(names(spec)[-(1:2)], values)
+  non_id_cols <- vec_unique(spec$.value)
+  non_id_cols <- c(names(spec)[-(1:2)], non_id_cols)
 
-  id_cols <- enquo(id_cols)
-  if (!quo_is_null(id_cols)) {
-    key_vars <- names(tidyselect::eval_select(enquo(id_cols), data))
-  } else {
-    key_vars <- tbl_vars(data)
-  }
-  key_vars <- setdiff(key_vars, spec_cols)
+  id_cols <- select_wider_id_cols(
+    data = data,
+    id_cols = {{id_cols}},
+    non_id_cols = non_id_cols
+  )
 
   # Figure out rows in output.
   # Early conversion to tibble because data.table returns zero rows if
   # zero cols are selected.
   rows <- as_tibble(data)
-  rows <- rows[key_vars]
+  rows <- rows[id_cols]
   row_id <- vec_group_id(rows)
   nrow <- attr(row_id, "n")
   rows <- vec_slice(rows, vec_unique_loc(row_id))
@@ -339,6 +354,42 @@ build_wider_spec <- function(data,
   out
 }
 
+build_wider_id_cols_expr <- function(data,
+                                     id_cols = NULL,
+                                     names_from = name,
+                                     values_from = value) {
+  # TODO: Use `allow_rename = FALSE`.
+  # Requires https://github.com/r-lib/tidyselect/issues/225.
+  names_from <- names(tidyselect::eval_select(enquo(names_from), data))
+  values_from <- names(tidyselect::eval_select(enquo(values_from), data))
+  non_id_cols <- c(names_from, values_from)
+
+  out <- select_wider_id_cols(
+    data = data,
+    id_cols = {{id_cols}},
+    non_id_cols = non_id_cols
+  )
+
+  expr(c(!!!out))
+}
+
+select_wider_id_cols <- function(data,
+                                 id_cols = NULL,
+                                 non_id_cols = character()) {
+  id_cols <- enquo(id_cols)
+
+  # Remove known non-id-cols so they are never selected
+  data <- data[setdiff(names(data), non_id_cols)]
+
+  if (quo_is_null(id_cols)) {
+    names(data)
+  } else {
+    # TODO: Use `allow_rename = FALSE`.
+    # Requires https://github.com/r-lib/tidyselect/issues/225.
+    names(tidyselect::eval_select(enquo(id_cols), data))
+  }
+}
+
 # Helpers -----------------------------------------------------------------
 
 # Not a great name as it now also casts

diff --git a/man/pivot_wider_spec.Rd b/man/pivot_wider_spec.Rd
diff --git a/tests/testthat/test-pivot-wide.R b/tests/testthat/test-pivot-wide.R
@@ -202,6 +202,60 @@ test_that("can override default keys", {
   expect_equal(nrow(pv), 2)
 })
 
+test_that("`id_cols = everything()` excludes `names_from` and `values_from`", {
+  df <- tibble(key = "x", name = "a", value = 1L)
+
+  expect_identical(
+    pivot_wider(df, id_cols = everything()),
+    tibble(key = "x", a = 1L)
+  )
+
+  spec <- build_wider_spec(df)
+
+  expect_identical(
+    pivot_wider_spec(df, spec, id_cols = everything()),
+    tibble(key = "x", a = 1L)
+  )
+})
+
+test_that("pivoting a zero row data frame drops `names_from` and `values_from` (#1249)", {
+  df <- tibble(key = character(), name = character(), value = integer())
+
+  expect_identical(
+    pivot_wider(df, names_from = name, values_from = value),
+    tibble(key = character())
+  )
+})
+
+test_that("known bug - building a wider spec with a zero row data frame loses `values_from` info (#1249)", {
+  # We can't currently change this behavior in `pivot_wider_spec()`,
+  # for fear of breaking backwards compatibility
+
+  df <- tibble(key = character(), name = character(), value = integer())
+
+  # Building the spec loses the fact that `value` was specified as `values_from`,
+  # which would normally be in the `spec$.value` column
+  spec <- build_wider_spec(df, names_from = name, values_from = value)
+
+  # So pivoting with this spec accidentally keeps `value` around
+  expect_identical(
+    pivot_wider_spec(df, spec),
+    tibble(key = character(), value = integer())
+  )
+
+  # If you specify `id_cols` to be the `key` column, it works right
+  expect_identical(
+    pivot_wider_spec(df, spec, id_cols = key),
+    tibble(key = character())
+  )
+
+  # But `id_cols = everything()` won't work as intended, because we can't know
+  # to remove `value` from `names(data)` before computing the tidy-selection
+  expect_identical(
+    pivot_wider_spec(df, spec, id_cols = everything()),
+    tibble(key = character(), value = integer())
+  )
+})
 
 # non-unique keys ---------------------------------------------------------