diff --git a/cpp11test/src/test-r_vector.cpp b/cpp11test/src/test-r_vector.cpp index b665c533..c73f48a8 100644 --- a/cpp11test/src/test-r_vector.cpp +++ b/cpp11test/src/test-r_vector.cpp @@ -325,4 +325,47 @@ context("r_vector-C++") { expect_true(x.data() != R_NilValue); expect_true(x.size() == 3); } + + test_that( + "writable vector truncation resizes names and retains attributes (but not dim or " + "dim names)") { + cpp11::writable::integers x(2); + x[0] = 1; + x[1] = 2; + + // Doubles the capacity from 2 to 4, meaning the underlying SEXP has length 4 now. + x.push_back(3); + expect_true(Rf_xlength(x.data()) == 4); + + // Set some names + SEXP names = PROTECT(Rf_allocVector(STRSXP, 3)); + SET_STRING_ELT(names, 0, Rf_mkCharCE("x", CE_UTF8)); + SET_STRING_ELT(names, 1, Rf_mkCharCE("y", CE_UTF8)); + SET_STRING_ELT(names, 2, Rf_mkCharCE("z", CE_UTF8)); + x.names() = names; + + // Length of names SEXP is actually 4 now, extended by `setAttrib()` to match + // the internal capacity + expect_true(Rf_xlength(Rf_getAttrib(x.data(), R_NamesSymbol)) == 4); + + // Set an attribute + SEXP bar = PROTECT(Rf_ScalarInteger(1)); + x.attr("foo") = bar; + + // Extract out the underlying SEXP using the operator: + // - This truncates to size 3 + // - This truncates and keeps names + // - This copies over attributes like `"foo"` + // - This updates the internal SEXP in `x` to the one in `x_sexp` (gross but users + // probably expect this at this point) + SEXP x_sexp = x; + + expect_true(Rf_xlength(x_sexp) == 3); + expect_true(Rf_xlength(Rf_getAttrib(x_sexp, R_NamesSymbol)) == 3); + expect_true(Rf_getAttrib(x_sexp, Rf_install("foo")) == bar); + + expect_true(x.data() == x_sexp); + + UNPROTECT(2); + } } diff --git a/inst/include/cpp11/doubles.hpp b/inst/include/cpp11/doubles.hpp index eb5de79e..5a5cd12f 100644 --- a/inst/include/cpp11/doubles.hpp +++ b/inst/include/cpp11/doubles.hpp @@ -37,6 +37,12 @@ inline typename r_vector::underlying_type* r_vector::get_p(bool } } +template <> +inline typename r_vector::underlying_type const* r_vector::get_const_p( + bool is_altrep, SEXP data) { + return REAL_OR_NULL(data); +} + template <> inline void r_vector::get_region(SEXP x, R_xlen_t i, R_xlen_t n, typename r_vector::underlying_type* buf) { diff --git a/inst/include/cpp11/integers.hpp b/inst/include/cpp11/integers.hpp index 559ecdf5..ff82cda5 100644 --- a/inst/include/cpp11/integers.hpp +++ b/inst/include/cpp11/integers.hpp @@ -38,6 +38,12 @@ inline typename r_vector::underlying_type* r_vector::get_p(bool is_alt } } +template <> +inline typename r_vector::underlying_type const* r_vector::get_const_p( + bool is_altrep, SEXP data) { + return INTEGER_OR_NULL(data); +} + template <> inline void r_vector::get_region(SEXP x, R_xlen_t i, R_xlen_t n, typename r_vector::underlying_type* buf) { diff --git a/inst/include/cpp11/list.hpp b/inst/include/cpp11/list.hpp index dbb6937d..b480374b 100644 --- a/inst/include/cpp11/list.hpp +++ b/inst/include/cpp11/list.hpp @@ -30,6 +30,18 @@ inline typename r_vector::underlying_type* r_vector::get_p(bool, SEX return nullptr; } +template <> +inline typename r_vector::underlying_type const* r_vector::get_const_p( + bool is_altrep, SEXP data) { + // No `VECTOR_PTR_OR_NULL()` + if (is_altrep) { + return nullptr; + } else { + // TODO: Use `VECTOR_PTR_RO()` conditionally once R 4.5.0 is officially released + return static_cast(DATAPTR_RO(data)); + } +} + /// Specialization for lists, where `x["oob"]` returns `R_NilValue`, like at the R level template <> inline SEXP r_vector::get_oob() { diff --git a/inst/include/cpp11/logicals.hpp b/inst/include/cpp11/logicals.hpp index e3305010..0b197373 100644 --- a/inst/include/cpp11/logicals.hpp +++ b/inst/include/cpp11/logicals.hpp @@ -37,6 +37,12 @@ inline typename r_vector::underlying_type* r_vector::get_p(bool } } +template <> +inline typename r_vector::underlying_type const* r_vector::get_const_p( + bool is_altrep, SEXP data) { + return LOGICAL_OR_NULL(data); +} + template <> inline void r_vector::get_region(SEXP x, R_xlen_t i, R_xlen_t n, typename r_vector::underlying_type* buf) { diff --git a/inst/include/cpp11/r_vector.hpp b/inst/include/cpp11/r_vector.hpp index c399a87d..3d15e5d1 100644 --- a/inst/include/cpp11/r_vector.hpp +++ b/inst/include/cpp11/r_vector.hpp @@ -5,6 +5,7 @@ #include // for max #include // for array #include // for snprintf +#include // for memcpy #include // for exception #include // for initializer_list #include // for forward_iterator_tag, random_ac... @@ -145,6 +146,8 @@ class r_vector { /// Implemented in specialization static underlying_type* get_p(bool is_altrep, SEXP data); /// Implemented in specialization + static underlying_type const* get_const_p(bool is_altrep, SEXP data); + /// Implemented in specialization static void get_region(SEXP x, R_xlen_t i, R_xlen_t n, underlying_type* buf); /// Implemented in specialization static SEXPTYPE get_sexptype(); @@ -311,8 +314,13 @@ class r_vector : public cpp11::r_vector { /// Implemented in specialization static void set_elt(SEXP x, R_xlen_t i, underlying_type value); + static SEXP reserve_data(SEXP x, bool is_altrep, R_xlen_t size); + static SEXP resize_data(SEXP x, bool is_altrep, R_xlen_t size); + static SEXP resize_names(SEXP x, R_xlen_t size); + using cpp11::r_vector::get_elt; using cpp11::r_vector::get_p; + using cpp11::r_vector::get_const_p; using cpp11::r_vector::get_sexptype; using cpp11::r_vector::valid_type; using cpp11::r_vector::valid_length; @@ -759,8 +767,25 @@ inline r_vector::r_vector(SEXP&& data, bool is_altrep) : cpp11::r_vector(data, is_altrep), capacity_(length_) {} template -inline r_vector::r_vector(const r_vector& rhs) - : cpp11::r_vector(safe[Rf_shallow_duplicate](rhs)), capacity_(rhs.capacity_) {} +inline r_vector::r_vector(const r_vector& rhs) { + // We don't want to just pass through to the read-only constructor because we'd + // have to convert to `SEXP` first, which could truncate, and then we'd still have + // to shallow duplicate after that to ensure we have a duplicate, which can result in + // too many copies (#369). + // + // Instead we take control of setting all fields to try and only duplicate 1 time. + // We try and reclaim unused capacity during the duplication by only reserving up to + // the `rhs.length_`. This is nice because if the user returns this object, the + // truncation has already been done and they don't have to pay for another allocation. + // Importantly, `reserve_data()` always duplicates even if there wasn't extra capacity, + // which ensures we have our own copy. + data_ = reserve_data(rhs.data_, rhs.is_altrep_, rhs.length_); + protect_ = detail::store::insert(data_); + is_altrep_ = ALTREP(data_); + data_p_ = get_p(is_altrep_, data_); + length_ = rhs.length_; + capacity_ = rhs.length_; +} template inline r_vector::r_vector(r_vector&& rhs) { @@ -1048,7 +1073,7 @@ inline void r_vector::reserve(R_xlen_t new_capacity) { SEXP old_protect = protect_; data_ = (data_ == R_NilValue) ? safe[Rf_allocVector](get_sexptype(), new_capacity) - : safe[Rf_xlengthgets](data_, new_capacity); + : reserve_data(data_, is_altrep_, new_capacity); protect_ = detail::store::insert(data_); is_altrep_ = ALTREP(data_); data_p_ = get_p(is_altrep_, data_); @@ -1249,6 +1274,83 @@ inline typename r_vector::iterator r_vector::iterator::operator+(R_xlen_t return it; } +// Compared to `Rf_xlengthgets()`: +// - This always allocates, even if it is the same size, which is important when we use +// it in a constructor and need to ensure that it duplicates on the way in. +// - This copies over attributes with `Rf_copyMostAttrib()`, which is important when we +// use it in constructors and when we truncate right before returning from the `SEXP` +// operator. +// - This is more friendly to ALTREP `x`. +template +inline SEXP r_vector::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) { + // Resize core data + SEXP out = PROTECT(resize_data(x, is_altrep, size)); + + // Resize names, if required + SEXP names = Rf_getAttrib(x, R_NamesSymbol); + if (names != R_NilValue) { + names = resize_names(names, size); + Rf_setAttrib(out, R_NamesSymbol, names); + } + + // Copy over "most" attributes, and set OBJECT bit and S4 bit as needed. + // Does not copy over names, dim, or dim names. + // Names are handled already. Dim and dim names should not be applicable, + // as this is a vector. + // Does not look like it would ever error in our use cases, so no `safe[]`. + Rf_copyMostAttrib(x, out); + + UNPROTECT(1); + return out; +} + +template +inline SEXP r_vector::resize_data(SEXP x, bool is_altrep, R_xlen_t size) { + underlying_type const* v_x = get_const_p(is_altrep, x); + + SEXP out = PROTECT(safe[Rf_allocVector](get_sexptype(), size)); + underlying_type* v_out = get_p(ALTREP(out), out); + + const R_xlen_t x_size = Rf_xlength(x); + const R_xlen_t copy_size = (x_size > size) ? size : x_size; + + // Copy over data from `x` up to `copy_size` (we could be truncating so don't blindly + // copy everything from `x`) + if (v_x != nullptr && v_out != nullptr) { + std::memcpy(v_out, v_x, copy_size * sizeof(underlying_type)); + } else { + // Handles ALTREP `x` with no const pointer, VECSXP, STRSXP + for (R_xlen_t i = 0; i < copy_size; ++i) { + set_elt(out, i, get_elt(x, i)); + } + } + + UNPROTECT(1); + return out; +} + +template +inline SEXP r_vector::resize_names(SEXP x, R_xlen_t size) { + const SEXP* v_x = STRING_PTR_RO(x); + + SEXP out = PROTECT(safe[Rf_allocVector](STRSXP, size)); + + const R_xlen_t x_size = Rf_xlength(x); + const R_xlen_t copy_size = (x_size > size) ? size : x_size; + + for (R_xlen_t i = 0; i < copy_size; ++i) { + SET_STRING_ELT(out, i, v_x[i]); + } + + // Ensure remaining names are initialized to `""` + for (R_xlen_t i = copy_size; i < size; ++i) { + SET_STRING_ELT(out, i, R_BlankString); + } + + UNPROTECT(1); + return out; +} + } // namespace writable // TODO: is there a better condition we could use, e.g. assert something true diff --git a/inst/include/cpp11/raws.hpp b/inst/include/cpp11/raws.hpp index 8da99e58..60baf74f 100644 --- a/inst/include/cpp11/raws.hpp +++ b/inst/include/cpp11/raws.hpp @@ -35,6 +35,12 @@ inline typename r_vector::underlying_type r_vector::get_elt( return RAW_ELT(x, i); } +template <> +inline typename r_vector::underlying_type const* r_vector::get_const_p( + bool is_altrep, SEXP data) { + return RAW_OR_NULL(data); +} + template <> inline typename r_vector::underlying_type* r_vector::get_p( bool is_altrep, SEXP data) { diff --git a/inst/include/cpp11/strings.hpp b/inst/include/cpp11/strings.hpp index f84b0edb..dabe2950 100644 --- a/inst/include/cpp11/strings.hpp +++ b/inst/include/cpp11/strings.hpp @@ -34,6 +34,17 @@ inline typename r_vector::underlying_type* r_vector::get_p(b return nullptr; } +template <> +inline typename r_vector::underlying_type const* +r_vector::get_const_p(bool is_altrep, SEXP data) { + // No `STRING_PTR_OR_NULL()` + if (is_altrep) { + return nullptr; + } else { + return STRING_PTR_RO(data); + } +} + template <> inline void r_vector::get_region(SEXP x, R_xlen_t i, R_xlen_t n, typename r_vector::underlying_type* buf) {