Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# tidyr (development version)

* `unnest_wider()` now generates automatic names for _partially_ unnamed
vectors. Previously it only generated them for fully unnamed vectors,
resulting in a strange mix of automatic names and name-repaired names (#1367).

* `unnest_wider()` now errors if any values being unnested are unnamed and
`names_sep` is not provided (#1367).

* `nest()` has gained a new argument, `.by`, which allows you to specify the
columns to nest by (rather than the columns to nest, i.e. through `...`).
Additionally, the `.key` argument is no longer deprecated, and is used
Expand Down
52 changes: 28 additions & 24 deletions R/unnest-wider.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
#' as is. If a string, the outer and inner names will be pasted together using
#' `names_sep` as a separator.
#'
#' If the values being unnested are unnamed and `names_sep` is supplied, the
#' inner names will be automatically generated as an increasing sequence of
#' integers.
#' If any values being unnested are unnamed, then `names_sep` must be
#' supplied, otherwise an error is thrown. When `names_sep` is supplied,
#' names are automatically generated for unnamed values as an increasing
#' sequence of integers.
#' @param strict A single logical specifying whether or not to apply strict
#' vctrs typing rules. If `FALSE`, typed empty values (like `list()` or
#' `integer()`) nested within list-columns will be treated like `NULL` and
Expand Down Expand Up @@ -58,7 +59,7 @@
#' x = 1:3,
#' y = list(NULL, 1:3, 4:5)
#' )
#' # where you'll usually want to provide names_sep:
#' # but you must supply `names_sep` to do so, which generates automatic names:
#' df %>% unnest_wider(y, names_sep = "_")
#'
#' # 0-length elements ---------------------------------------------------------
Expand Down Expand Up @@ -205,6 +206,8 @@ elt_to_wide <- function(x, name, strict, names_sep, error_call = caller_env()) {
# which we want to treat like lists where we know the type of each element
x <- tidyr_new_list(x)
x <- map(x, list_of)
names <- names2(x)
x <- set_names(x, NULL)
} else {
if (!strict && vec_is_list(x)) {
empty <- list_sizes(x) == 0L
Expand All @@ -214,34 +217,35 @@ elt_to_wide <- function(x, name, strict, names_sep, error_call = caller_env()) {
}
}

names <- vec_names(x)

if (is.null(names)) {
x <- vec_chop(x)
} else {
# Promote names to column names
x <- vec_set_names(x, NULL)
x <- vec_chop(x)
x <- vec_set_names(x, names)
}
names <- vec_names2(x)
x <- vec_set_names(x, NULL)
x <- vec_chop(x)
}

empty <- names == ""
any_empty <- any(empty)

if (is.null(names_sep)) {
names(x) <- vec_as_names(names2(x), repair = "unique", call = error_call)
if (any_empty) {
stop_use_names_sep(error_call = error_call)
}
} else {
outer <- name

inner <- names(x)
if (is.null(inner)) {
inner <- as.character(seq_along(x))
} else {
inner <- vec_as_names(inner, repair = "unique", call = error_call)
if (any_empty) {
names[empty] <- as.character(which(empty))
}

names(x) <- apply_names_sep(outer, inner, names_sep)
names <- apply_names_sep(name, names, names_sep)
}

x <- set_names(x, names)
x <- new_data_frame(x, n = 1L)

x
}

stop_use_names_sep <- function(error_call = caller_env()) {
message <- c(
"Can't unnest elements with missing names.",
i = "Supply {.arg names_sep} to generate automatic names."
)
cli::cli_abort(message, call = error_call)
}
9 changes: 5 additions & 4 deletions man/unnest_wider.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

63 changes: 37 additions & 26 deletions tests/testthat/_snaps/unnest-wider.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,50 +7,61 @@
Error in `unnest_wider()`:
! List-column `y` must contain only vectors.

# can unnest a vector with a mix of named/unnamed elements (#1200 comment)
# can't unnest unnamed elements without `names_sep` (#1367)

Code
out <- unnest_wider(df, x, names_sep = "_")
Message
New names:
* `` -> `...1`
unnest_wider(df, col)
Condition
Error in `unnest_wider()`:
! Can't unnest elements with missing names.
i Supply `names_sep` to generate automatic names.

# unique name repair is done on the elements before applying `names_sep` (#1200 comment)
---

Code
out <- unnest_wider(df, col, names_sep = "_")
Message
New names:
* `` -> `...1`
unnest_wider(df, col)
Condition
Error in `unnest_wider()`:
! Can't unnest elements with missing names.
i Supply `names_sep` to generate automatic names.

---

Code
out <- unnest_wider(df, col, names_sep = "_")
Message
New names:
* `` -> `...1`
* `` -> `...2`
unnest_wider(df, col)
Condition
Error in `unnest_wider()`:
! Can't unnest elements with missing names.
i Supply `names_sep` to generate automatic names.

# output structure is the same whether or not `names_sep` is applied (#1200 comment)
---

Code
out1 <- unnest_wider(df, col)
Message
New names:
* `` -> `...1`
New names:
* `` -> `...1`
unnest_wider(df, col)
Condition
Error in `unnest_wider()`:
! Can't unnest elements with missing names.
i Supply `names_sep` to generate automatic names.

# catches duplicate inner names in the same vector

Code
unnest_wider(df, col)
Condition
Error in `unnest_wider()`:
! Names must be unique.
x These names are duplicated:
* "a" at locations 1 and 2.
i Use argument `names_repair` to specify repair strategy.

---

Code
out2 <- unnest_wider(df, col, names_sep = "_")
out <- unnest_wider(df, col, names_repair = "unique")
Message
New names:
* `` -> `...1`
New names:
* `` -> `...1`
* `a` -> `a...1`
* `a` -> `a...2`

# unnest_wider() advises on outer / inner name duplication (#1367)

Expand Down
81 changes: 63 additions & 18 deletions tests/testthat/test-unnest-wider.R
Original file line number Diff line number Diff line change
Expand Up @@ -182,11 +182,11 @@ test_that("can unnest multiple columns wider at once (#740)", {
)
})

test_that("can unnest a vector with a mix of named/unnamed elements (#1200 comment)", {
test_that("can unnest a vector with a mix of named/unnamed elements (#1200 comment, #1367)", {
df <- tibble(x = c(a = 1L, 2L))
expect_snapshot(out <- unnest_wider(df, x, names_sep = "_"))
out <- unnest_wider(df, x, names_sep = "_")
expect_identical(out$x_a, c(1L, NA))
expect_identical(out$x_...1, c(NA, 2L))
expect_identical(out$x_1, c(NA, 2L))
})

test_that("can unnest a list with a mix of named/unnamed elements (#1200 comment)", {
Expand All @@ -196,34 +196,43 @@ test_that("can unnest a list with a mix of named/unnamed elements (#1200 comment
expect_identical(out$x_2, c(2L, 4L))
})

test_that("unique name repair is done on the elements before applying `names_sep` (#1200 comment)", {
test_that("integer names are generated before applying `names_sep` (#1200 comment, #1367)", {
df <- tibble(col = list(set_names(1, "")))
expect_snapshot(out <- unnest_wider(df, col, names_sep = "_"))
expect_named(out, "col_...1")
out <- unnest_wider(df, col, names_sep = "_")
expect_named(out, "col_1")

df <- tibble(col = list(set_names(1:2, c("", ""))))
expect_snapshot(out <- unnest_wider(df, col, names_sep = "_"))
expect_named(out, c("col_...1", "col_...2"))
out <- unnest_wider(df, col, names_sep = "_")
expect_named(out, c("col_1", "col_2"))
})

test_that("output structure is the same whether or not `names_sep` is applied (#1200 comment)", {
test_that("integer names are generated for partially named vectors (#1367)", {
df <- tibble(col = list(set_names(1:4, c("x", "", "z", ""))))
out <- unnest_wider(df, col, names_sep = "_")
expect_named(out, c("col_x", "col_2", "col_z", "col_4"))

df <- tibble(col = list(
set_names(1:4, c("x", "", "z", "")),
set_names(5:8, c("", "", "z", ""))
))
out <- unnest_wider(df, col, names_sep = "_")
expect_named(out, c("col_x", "col_2", "col_z", "col_4", "col_1"))
expect_identical(out$col_x, c(1L, NA))
expect_identical(out$col_1, c(NA, 5L))
})

test_that("`NA_character_` name is treated like the empty string (#1200 comment)", {
col <- list(
set_names(1, "a"),
set_names(1, NA_character_),
set_names(1, "")
)
df <- tibble(col = col)

# Column structure between these two must be the same,
# we consider an `NA_character_` name as identical to `""`.
expect_snapshot(out1 <- unnest_wider(df, col))
expect_snapshot(out2 <- unnest_wider(df, col, names_sep = "_"))

expect_identical(out1$a, c(1, NA, NA))
expect_identical(out1$...1, c(NA, 1, 1))
out <- unnest_wider(df, col, names_sep = "_")

expect_identical(out2$col_a, c(1, NA, NA))
expect_identical(out2$col_...1, c(NA, 1, 1))
expect_identical(out$col_a, c(1, NA, NA))
expect_identical(out$col_1, c(NA, 1, 1))
})

test_that("can combine `<list> + <list_of<ptype>>`", {
Expand All @@ -232,6 +241,42 @@ test_that("can combine `<list> + <list_of<ptype>>`", {
expect_identical(out$a, list(1:2, 1L))
})

test_that("can't unnest unnamed elements without `names_sep` (#1367)", {
df <- tibble(col = list(1))
expect_snapshot(error = TRUE, {
unnest_wider(df, col)
})

df <- tibble(col = list(set_names(1, "")))
expect_snapshot(error = TRUE, {
unnest_wider(df, col)
})

df <- tibble(col = list(set_names(1, NA_character_)))
expect_snapshot(error = TRUE, {
unnest_wider(df, col)
})

# Partially missing within an element
df <- tibble(col = list(c(a = 1), c(a = 1, 2)))
expect_snapshot(error = TRUE, {
unnest_wider(df, col)
})
})

test_that("catches duplicate inner names in the same vector", {
df <- tibble(col = list(c(a = 1, a = 2)))

expect_snapshot(error = TRUE, {
unnest_wider(df, col)
})

expect_snapshot({
out <- unnest_wider(df, col, names_repair = "unique")
})
expect_named(out, c("a...1", "a...2"))
})

test_that("unnest_wider() advises on outer / inner name duplication (#1367)", {
df <- tibble(x = 1, y = list(list(x = 2)))

Expand Down