Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 54 additions & 3 deletions R/parse.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,13 @@ make_timestamp <- function(input) {
# TODO: make sure this is the right timestamp format
return(input)
}
safe_format(input, "%Y-%m-%dT%H:%M:%SZ")

# In the call to `safe_format`:
# - The format specifier adds a literal "Z" to the end of the timestamp, which
# tells Connect "This is UTC".
# - The `tz` argument tells R to produce times in the UTC time zone.
# - The `usetz` argument says "Don't concatenate ' UTC' to the end of the string".
safe_format(input, "%Y-%m-%dT%H:%M:%SZ", tz = "UTC", usetz = FALSE)
}

ensure_columns <- function(.data, ptype) {
Expand Down Expand Up @@ -107,8 +113,7 @@ coerce_datetime <- function(x, to, ...) {
} else if (is.numeric(x)) {
vctrs::new_datetime(as.double(x), tzone = tzone(to))
} else if (is.character(x)) {
# Parse as ISO8601
as.POSIXct(strptime(x, format = "%Y-%m-%dT%H:%M:%SZ"), tz = tzone(to))
parse_connect_rfc3339(x)
} else if (inherits(x, "POSIXct")) {
x
} else if (all(is.logical(x) & is.na(x)) && length(is.logical(x) & is.na(x)) > 0) {
Expand All @@ -118,6 +123,52 @@ coerce_datetime <- function(x, to, ...) {
}
}

# Parse character dates received from Connect which use RFC 3339.
#
# R parses as ISO 8601. When specifying %z, it expects time zones to be
# specified as `-1400` to `+1400`.
# Connect returns times in RFC 3339. It denotes time zones with `-14:00` to
# `+14:00`, and indicates zero offset with `Z`.
# https://github.com/golang/go/blob/54fe0fd43fcf8609666c16ae6d15ed92873b1564/src/time/format.go#L86
# I don't understand why replacing Z with `+0000` and parsing with %z doesn't work, but it doesn't.
# We have to parse zero-offset time stamps expecting literal Zs.
# For example:
# - "2023-08-22T14:13:14Z"
# - "2023-08-22T15:13:14+01:00"
# - "2020-01-01T00:02:03-01:00"
parse_connect_rfc3339 <- function(x) {
# Convert any timestamps with offsets to a format recognized by `strptime`.
x <- gsub("([+-]\\d\\d):(\\d\\d)$", "\\1\\2", x)

# Times with and without offsets require different formats, so create a vector
# of formats to be used in parallel.
format_strings <- ifelse(
grepl("Z$", x),
"%Y-%m-%dT%H:%M:%SZ",
"%Y-%m-%dT%H:%M:%S%z"
)

# Parse with an inner call to `strptime()`; convert the resulting `POSIXlt`
# object to `POSIXct`.
#
# We must specify `tz` in the inner call to correctly compute date math.
# Specifying `tz` when parsing just changes the time zone without doing any
# date math!
#
# > xlt
# [1] "2024-08-29 16:36:33 EDT"
# > tzone(xlt)
# [1] "America/New_York"
# > as.POSIXct(xlt, tz = "UTC")
# [1] "2024-08-29 16:36:33 UTC"
#
# `purrr::map2_vec()` converts to POSIXct automatically, but we need
# `as.POSIXct()` in there to account for only one item.
purrr::map2_vec(x, format_strings, function(.x, .y) {
as.POSIXct(strptime(.x, format = .y, tz = "UTC"))
})
}

vec_cast.POSIXct.double <- function(x, to, ...) {
warn_experimental("vec_cast.POSIXct.double")
vctrs::new_datetime(x, tzone = tzone(to))
Expand Down
62 changes: 58 additions & 4 deletions tests/testthat/test-parse.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,67 @@ test_that("coerce_datetime fills the void", {
expect_error(coerce_datetime(NA_complex_, NA_datetime_, name = "complexity"), class = "vctrs_error_incompatible_type")
})

test_that("make_timestamp works with POSIXct", {
outcome <- "2020-01-01T01:02:03Z"
test_that("parse_connect_rfc3339 parses timestamps we expect from Connect", {
original_tz <- Sys.getenv("TZ")
withr::defer(Sys.setenv(TZ = original_tz))

xs <- c(
"2023-08-22T14:13:14Z",
"2020-01-01T01:02:03Z",
"2023-08-22T15:13:14+01:00",
"2020-01-01T00:02:03-01:00"
)

expected <- as.POSIXct(strptime(c(
"2023-08-22T14:13:14+0000",
"2020-01-01T01:02:03+0000",
"2023-08-22T15:13:14+0100",
"2020-01-01T00:02:03-0100"
), format = "%Y-%m-%dT%H:%M:%S%z", tz = "UTC"))

Sys.setenv(TZ = "America/New_York")
expect_identical(parse_connect_rfc3339(xs), expected)

Sys.setenv(TZ = "UTC")
expect_identical(parse_connect_rfc3339(xs), expected)
})

test_that("make_timestamp produces expected output", {
original_tz <- Sys.getenv("TZ")
withr::defer(Sys.setenv(TZ = original_tz))

inputs <- c(
"2023-08-22T14:13:14Z",
"2020-01-01T01:02:03Z",
"2023-08-22T15:13:14+01:00",
"2020-01-01T00:02:03-01:00"
)
outcome <- c(
"2023-08-22T14:13:14Z",
"2020-01-01T01:02:03Z",
"2023-08-22T14:13:14Z",
"2020-01-01T01:02:03Z"
)
Sys.setenv(TZ = "America/New_York")

ts <- coerce_datetime(outcome, NA_datetime_)
expect_equal(make_timestamp(ts), outcome)

# Works on a single item
expect_equal(make_timestamp(ts[1]), outcome[1])

# Idempotent
expect_equal(make_timestamp(make_timestamp(ts)), outcome)

Sys.setenv(TZ = "UTC")

ts <- coerce_datetime(outcome, NA_datetime_)
expect_equal(make_timestamp(ts), outcome)
expect_equal(make_timestamp(rep(ts, 10)), rep(outcome, 10))

# idempotent
# Works on a single item
expect_equal(make_timestamp(ts[1]), outcome[1])

# Idempotent
expect_equal(make_timestamp(make_timestamp(ts)), outcome)
})

Expand Down
Loading