Skip to content

Commit f0bcfd9

Browse files
authored
Remove stringi dependency #936 (#986)
Fixes #936
2 parents 9c4f908 + 35c11a6 commit f0bcfd9

File tree

10 files changed

+92
-14
lines changed

10 files changed

+92
-14
lines changed

DESCRIPTION

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ Imports:
3333
purrr,
3434
Rcpp,
3535
rlang,
36-
stringi,
3736
tibble (>= 2.1.1),
3837
tidyselect (>= 1.1.0),
3938
utils,

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# tidyr (development version)
22

3+
* stringi dependency has been removed; this was a substantial dependency that
4+
make tidyr hard to compile in resource constrained environments
5+
(@rjpat, #936).
6+
37
# tidyr 1.1.0
48

59
## General features

R/extract.R

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,18 +55,14 @@ str_extract <- function(x, into, regex, convert = FALSE) {
5555
is_character(into)
5656
)
5757

58-
matches <- stringi::stri_match_first_regex(x, regex)[, -1, drop = FALSE]
59-
60-
if (ncol(matches) != length(into)) {
58+
out <- str_match_first(x, regex)
59+
if (length(out) != length(into)) {
6160
stop(
6261
"`regex` should define ", length(into), " groups; ", ncol(matches), " found.",
6362
call. = FALSE
6463
)
6564
}
6665

67-
out <- as_tibble(matches, .name_repair = "minimal")
68-
out <- as.list(out)
69-
7066
# Handle duplicated names
7167
if (anyDuplicated(into)) {
7268
pieces <- split(out, into)
@@ -88,3 +84,27 @@ str_extract <- function(x, into, regex, convert = FALSE) {
8884

8985
out
9086
}
87+
88+
# Helpers -----------------------------------------------------------------
89+
90+
str_match_first <- function(string, regex) {
91+
loc <- regexpr(regex, string, perl = TRUE)
92+
loc <- group_loc(loc)
93+
94+
out <- lapply(
95+
seq_len(loc$matches),
96+
function(i) substr(string, loc$start[, i], loc$end[, i])
97+
)
98+
out[-1]
99+
}
100+
101+
group_loc <- function(x) {
102+
start <- cbind(as.vector(x), attr(x, "capture.start"))
103+
end <- start + cbind(attr(x, "match.length"), attr(x, "capture.length")) - 1L
104+
105+
no_match <- start == -1L
106+
start[no_match] <- NA
107+
end[no_match] <- NA
108+
109+
list(matches = ncol(start), start = start, end = end)
110+
}

R/pivot-long.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ build_longer_spec <- function(data, cols,
281281
if (is.null(names_prefix)) {
282282
names <- names(cols)
283283
} else {
284-
names <- stringi::stri_replace_all_regex(names(cols), paste0("^", names_prefix), "")
284+
names <- gsub(paste0("^", names_prefix), "", names(cols))
285285
}
286286

287287
if (length(names_to) > 1) {

R/separate-rows.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ separate_rows.data.frame <- function(data,
3131
convert = FALSE) {
3232
vars <- tidyselect::eval_select(expr(c(...)), data)
3333

34-
out <- purrr::modify_at(data, vars, stringi::stri_split_regex, pattern = sep)
34+
out <- purrr::modify_at(data, vars, strsplit, split = sep, perl = TRUE)
3535
out <- unchop(as_tibble(out), any_of(vars))
3636
if (convert) {
3737
out[vars] <- map(out[vars], type.convert, as.is = TRUE)

R/separate.R

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,17 +108,18 @@ str_separate <- function(x, into, sep, convert = FALSE, extra = "warn", fill = "
108108
}
109109

110110
strsep <- function(x, sep) {
111-
nchar <- stringi::stri_length(x)
111+
nchar <- nchar(x)
112112
pos <- map(sep, function(i) {
113113
if (i >= 0) return(i)
114114
pmax(0, nchar + i)
115115
})
116116
pos <- c(list(0), pos, list(nchar))
117117

118118
map(1:(length(pos) - 1), function(i) {
119-
stringi::stri_sub(x, pos[[i]] + 1, pos[[i + 1]])
119+
substr(x, pos[[i]] + 1, pos[[i + 1]])
120120
})
121121
}
122+
122123
str_split_fixed <- function(value, sep, n, extra = "warn", fill = "warn") {
123124
if (extra == "error") {
124125
warn(glue(
@@ -132,7 +133,7 @@ str_split_fixed <- function(value, sep, n, extra = "warn", fill = "warn") {
132133
fill <- arg_match(fill, c("warn", "left", "right"))
133134

134135
n_max <- if (extra == "merge") n else -1L
135-
pieces <- stringi::stri_split_regex(value, sep, n_max)
136+
pieces <- str_split_n(value, sep, n_max = n_max)
136137

137138
simp <- simplifyPieces(pieces, n, fill == "left")
138139

@@ -151,6 +152,24 @@ str_split_fixed <- function(value, sep, n, extra = "warn", fill = "warn") {
151152
simp$strings
152153
}
153154

155+
str_split_n <- function(x, pattern, n_max = -1) {
156+
m <- gregexpr(pattern, x, perl = TRUE)
157+
if (n_max > 0) {
158+
m <- lapply(m, function(x) slice_match(x, seq_along(x) < n_max))
159+
}
160+
regmatches(x, m, invert = TRUE)
161+
}
162+
163+
slice_match <- function(x, i) {
164+
structure(
165+
x[i],
166+
match.length = attr(x, "match.length")[i],
167+
index.type = attr(x, "index.type"),
168+
useBytes = attr(x, "useBytes")
169+
)
170+
}
171+
172+
154173
list_indices <- function(x, max = 20) {
155174
if (length(x) > max) {
156175
x <- c(x[seq_len(max)], "...")

data-raw/population.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ pop <- read_csv("data-raw/TB_burden_countries_2014-11-07.csv",
1111
population <- pop %>%
1212
select(country, year, population = e_pop_num) %>%
1313
filter(year >= 1995) %>%
14-
mutate(country = stringi::stri_trans_general(country, "latin-ascii"))
14+
mutate(country = iconv(country, from = "UTF-8", to = "ASCII//TRANSLIT"))
1515

1616
write_csv(population, "data-raw/population.csv")
1717
save(population, file = "data/population.rdata")

data-raw/who.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ who <- who_raw %>%
1010
new_sn_m014:new_sn_m65, new_sn_f014:new_sn_f65, new_ep_m014:new_ep_m65,
1111
new_ep_f014:new_ep_f65, newrel_m014:newrel_m65, newrel_f014:newrel_f65
1212
) %>%
13-
mutate(country = stringi::stri_trans_general(country, "latin-ascii"))
13+
mutate(country = iconv(country, from = "UTF-8", to = "ASCII//TRANSLIT"))
1414

1515
write_csv(who, "data-raw/who.csv")
1616
save(who, file = "data/who.rdata")

tests/testthat/test-extract.R

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,26 @@ test_that("informative error message if wrong number of groups", {
5252
expect_error(extract(df, x, "y", "."), "should define 1 groups")
5353
expect_error(extract(df, x, c("y", "z"), "."), "should define 2 groups")
5454
})
55+
56+
test_that("str_match_first handles edge cases", {
57+
expect_identical(
58+
str_match_first(c("r-2", "d-2-3-4"), "(.)-(.)"),
59+
list(c("r", "d"), c("2", "2"))
60+
)
61+
expect_identical(
62+
str_match_first(NA, "test"),
63+
list()
64+
)
65+
expect_equivalent(
66+
str_match_first(c("", " "), "^(.*)$"),
67+
list(c("", " "))
68+
)
69+
expect_equivalent(
70+
str_match_first("", "(.)-(.)"),
71+
list(NA_character_, NA_character_)
72+
)
73+
expect_equivalent(
74+
str_match_first(character(), "(.)-(.)"),
75+
list(character(), character())
76+
)
77+
})

tests/testthat/test-separate.R

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,19 @@ test_that("checks type of `into` and `sep`", {
119119
)
120120
})
121121

122+
# helpers -----------------------------------------------------------------
123+
124+
test_that("str_split_n can cap number of splits", {
125+
expect_equal(str_split_n(c("x,x"), ",", 1), list("x,x"))
126+
expect_equal(str_split_n(c("x,x"), ",", 2), list(c("x", "x")))
127+
expect_equal(str_split_n(c("x,x"), ",", 3), list(c("x", "x")))
128+
})
129+
130+
test_that("str_split_n handles edge cases", {
131+
expect_equal(str_split_n(character(), ",", 1), list())
132+
expect_equal(str_split_n(NA, ",", 1), list(NA_character_))
133+
})
134+
122135
test_that("list_indices truncates long warnings", {
123136
expect_equal(list_indices(letters, max = 3), "a, b, c, ...")
124137
})

0 commit comments

Comments
 (0)