Improve pivot_wider() performance (#790)

DavisVaughan · hadley · commit cb512471f2a3 · 2019-11-13T15:16:50.000-06:00
Use `vec_group_id()` and `vec_unique_loc()` to improve performance
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -37,7 +37,7 @@ Imports:
     tibble (>= 2.1.1),
     tidyselect (>= 0.2.5),
     utils,
-    vctrs (>= 0.2.0),
+    vctrs (>= 0.2.0.9007),
     lifecycle
 Suggests: 
     covr,
@@ -55,3 +55,5 @@ Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.0.0
+Remotes:
+  r-lib/vctrs
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,8 @@
 # tidyr (development version)
 
+* `pivot_wider()` and `pivot_longer()` are considerably more performant, thanks
+  largely to improvements in the underlying vctrs code (#790, @DavisVaughan)
+
 * `unnest_wider()` and `unnest_longer()` can now unnest `list_of` columns. This
   is important for unnesting columns created from `nest()`, which are always
   `list_of` columns, and for usage after `pivot_wider()`, which, by default, 
diff --git a/R/pivot-wide.R b/R/pivot-wide.R
@@ -131,10 +131,12 @@ pivot_wider_spec <- function(data,
   df_rows <- data[key_vars]
   if (ncol(df_rows) == 0) {
     rows <- tibble(.rows = 1)
+    nrow <- 1L
     row_id <- rep(1L, nrow(df_rows))
   } else {
-    rows <- vec_unique(df_rows)
-    row_id <- vec_match(df_rows, rows)
+    row_id <- vec_group_id(df_rows)
+    nrow <- attr(row_id, "n")
+    rows <- vec_slice(df_rows, vec_unique_loc(row_id))
   }
 
   value_specs <- unname(split(spec, spec$.value))
@@ -158,7 +160,6 @@ pivot_wider_spec <- function(data,
     val_id <- dedup$key
     val <- dedup$val
 
-    nrow <- nrow(rows)
     ncol <- nrow(spec_i)
 
     fill <- values_fill[[value]]