Skip to content

Take control over resizing to enforce required invariants #383

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions cpp11test/src/test-r_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,4 +325,47 @@ context("r_vector-C++") {
expect_true(x.data() != R_NilValue);
expect_true(x.size() == 3);
}

test_that(
"writable vector truncation resizes names and retains attributes (but not dim or "
"dim names)") {
cpp11::writable::integers x(2);
x[0] = 1;
x[1] = 2;

// Doubles the capacity from 2 to 4, meaning the underlying SEXP has length 4 now.
x.push_back(3);
expect_true(Rf_xlength(x.data()) == 4);

// Set some names
SEXP names = PROTECT(Rf_allocVector(STRSXP, 3));
SET_STRING_ELT(names, 0, Rf_mkCharCE("x", CE_UTF8));
SET_STRING_ELT(names, 1, Rf_mkCharCE("y", CE_UTF8));
SET_STRING_ELT(names, 2, Rf_mkCharCE("z", CE_UTF8));
x.names() = names;

// Length of names SEXP is actually 4 now, extended by `setAttrib()` to match
// the internal capacity
expect_true(Rf_xlength(Rf_getAttrib(x.data(), R_NamesSymbol)) == 4);

// Set an attribute
SEXP bar = PROTECT(Rf_ScalarInteger(1));
x.attr("foo") = bar;

// Extract out the underlying SEXP using the operator:
// - This truncates to size 3
// - This truncates and keeps names
// - This copies over attributes like `"foo"`
// - This updates the internal SEXP in `x` to the one in `x_sexp` (gross but users
// probably expect this at this point)
SEXP x_sexp = x;

expect_true(Rf_xlength(x_sexp) == 3);
expect_true(Rf_xlength(Rf_getAttrib(x_sexp, R_NamesSymbol)) == 3);
expect_true(Rf_getAttrib(x_sexp, Rf_install("foo")) == bar);

expect_true(x.data() == x_sexp);

UNPROTECT(2);
}
}
6 changes: 6 additions & 0 deletions inst/include/cpp11/doubles.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ inline typename r_vector<double>::underlying_type* r_vector<double>::get_p(bool
}
}

template <>
inline typename r_vector<double>::underlying_type const* r_vector<double>::get_const_p(
bool is_altrep, SEXP data) {
return REAL_OR_NULL(data);
}
Comment on lines +40 to +44
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using utilities like REAL_OR_NULL() gives ALTREP types that can cheaply return a const pointer to an underlying buffer a chance to do so (like ALTREP views)


template <>
inline void r_vector<double>::get_region(SEXP x, R_xlen_t i, R_xlen_t n,
typename r_vector::underlying_type* buf) {
Expand Down
6 changes: 6 additions & 0 deletions inst/include/cpp11/integers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ inline typename r_vector<int>::underlying_type* r_vector<int>::get_p(bool is_alt
}
}

template <>
inline typename r_vector<int>::underlying_type const* r_vector<int>::get_const_p(
bool is_altrep, SEXP data) {
return INTEGER_OR_NULL(data);
}

template <>
inline void r_vector<int>::get_region(SEXP x, R_xlen_t i, R_xlen_t n,
typename r_vector::underlying_type* buf) {
Expand Down
12 changes: 12 additions & 0 deletions inst/include/cpp11/list.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,18 @@ inline typename r_vector<SEXP>::underlying_type* r_vector<SEXP>::get_p(bool, SEX
return nullptr;
}

template <>
inline typename r_vector<SEXP>::underlying_type const* r_vector<SEXP>::get_const_p(
bool is_altrep, SEXP data) {
// No `VECTOR_PTR_OR_NULL()`
if (is_altrep) {
return nullptr;
} else {
// TODO: Use `VECTOR_PTR_RO()` conditionally once R 4.5.0 is officially released
return static_cast<SEXP const*>(DATAPTR_RO(data));
}
}

/// Specialization for lists, where `x["oob"]` returns `R_NilValue`, like at the R level
template <>
inline SEXP r_vector<SEXP>::get_oob() {
Expand Down
6 changes: 6 additions & 0 deletions inst/include/cpp11/logicals.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ inline typename r_vector<r_bool>::underlying_type* r_vector<r_bool>::get_p(bool
}
}

template <>
inline typename r_vector<r_bool>::underlying_type const* r_vector<r_bool>::get_const_p(
bool is_altrep, SEXP data) {
return LOGICAL_OR_NULL(data);
}

template <>
inline void r_vector<r_bool>::get_region(SEXP x, R_xlen_t i, R_xlen_t n,
typename r_vector::underlying_type* buf) {
Expand Down
108 changes: 105 additions & 3 deletions inst/include/cpp11/r_vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <algorithm> // for max
#include <array> // for array
#include <cstdio> // for snprintf
#include <cstring> // for memcpy
#include <exception> // for exception
#include <initializer_list> // for initializer_list
#include <iterator> // for forward_iterator_tag, random_ac...
Expand Down Expand Up @@ -145,6 +146,8 @@ class r_vector {
/// Implemented in specialization
static underlying_type* get_p(bool is_altrep, SEXP data);
/// Implemented in specialization
static underlying_type const* get_const_p(bool is_altrep, SEXP data);
/// Implemented in specialization
static void get_region(SEXP x, R_xlen_t i, R_xlen_t n, underlying_type* buf);
/// Implemented in specialization
static SEXPTYPE get_sexptype();
Expand Down Expand Up @@ -311,8 +314,13 @@ class r_vector : public cpp11::r_vector<T> {
/// Implemented in specialization
static void set_elt(SEXP x, R_xlen_t i, underlying_type value);

static SEXP reserve_data(SEXP x, bool is_altrep, R_xlen_t size);
static SEXP resize_data(SEXP x, bool is_altrep, R_xlen_t size);
static SEXP resize_names(SEXP x, R_xlen_t size);

using cpp11::r_vector<T>::get_elt;
using cpp11::r_vector<T>::get_p;
using cpp11::r_vector<T>::get_const_p;
using cpp11::r_vector<T>::get_sexptype;
using cpp11::r_vector<T>::valid_type;
using cpp11::r_vector<T>::valid_length;
Expand Down Expand Up @@ -759,8 +767,25 @@ inline r_vector<T>::r_vector(SEXP&& data, bool is_altrep)
: cpp11::r_vector<T>(data, is_altrep), capacity_(length_) {}

template <typename T>
inline r_vector<T>::r_vector(const r_vector& rhs)
: cpp11::r_vector<T>(safe[Rf_shallow_duplicate](rhs)), capacity_(rhs.capacity_) {}
inline r_vector<T>::r_vector(const r_vector& rhs) {
// We don't want to just pass through to the read-only constructor because we'd
// have to convert to `SEXP` first, which could truncate, and then we'd still have
// to shallow duplicate after that to ensure we have a duplicate, which can result in
// too many copies (#369).
//
// Instead we take control of setting all fields to try and only duplicate 1 time.
// We try and reclaim unused capacity during the duplication by only reserving up to
// the `rhs.length_`. This is nice because if the user returns this object, the
// truncation has already been done and they don't have to pay for another allocation.
// Importantly, `reserve_data()` always duplicates even if there wasn't extra capacity,
// which ensures we have our own copy.
data_ = reserve_data(rhs.data_, rhs.is_altrep_, rhs.length_);
protect_ = detail::store::insert(data_);
is_altrep_ = ALTREP(data_);
data_p_ = get_p(is_altrep_, data_);
length_ = rhs.length_;
capacity_ = rhs.length_;
}
Comment on lines -762 to +788
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This avoids the double copy from #369

Before

> profmem::profmem(test_())
Rprofmem memory profiling of:
test_()

Memory allocations:
       what   bytes              calls
1     alloc  400048 test_() -> .Call()
2     alloc  800048 test_() -> .Call()
3     alloc  400056 test_() -> .Call()
4     alloc  400056 test_() -> .Call() # <- this one is extra
total       2000208   

After

> profmem::profmem(test_())
Rprofmem memory profiling of:
test_()

Memory allocations:
       what   bytes              calls
1     alloc  400048 test_() -> .Call()
2     alloc  800048 test_() -> .Call()
3     alloc  400056 test_() -> .Call()
total       1600152  


template <typename T>
inline r_vector<T>::r_vector(r_vector&& rhs) {
Expand Down Expand Up @@ -1048,7 +1073,7 @@ inline void r_vector<T>::reserve(R_xlen_t new_capacity) {
SEXP old_protect = protect_;

data_ = (data_ == R_NilValue) ? safe[Rf_allocVector](get_sexptype(), new_capacity)
: safe[Rf_xlengthgets](data_, new_capacity);
: reserve_data(data_, is_altrep_, new_capacity);
protect_ = detail::store::insert(data_);
is_altrep_ = ALTREP(data_);
data_p_ = get_p(is_altrep_, data_);
Expand Down Expand Up @@ -1249,6 +1274,83 @@ inline typename r_vector<T>::iterator r_vector<T>::iterator::operator+(R_xlen_t
return it;
}

// Compared to `Rf_xlengthgets()`:
// - This always allocates, even if it is the same size, which is important when we use
// it in a constructor and need to ensure that it duplicates on the way in.
// - This copies over attributes with `Rf_copyMostAttrib()`, which is important when we
// use it in constructors and when we truncate right before returning from the `SEXP`
// operator.
// - This is more friendly to ALTREP `x`.
template <typename T>
inline SEXP r_vector<T>::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) {
Comment on lines +1277 to +1285
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Important invariants of reserve_data() vs Rf_xlengthgets()

Useful for both:

  • Usage in constructors, when we need to enforce a duplication to occur
  • Usage during truncation, when we need to copy attributes too

// Resize core data
SEXP out = PROTECT(resize_data(x, is_altrep, size));

// Resize names, if required
SEXP names = Rf_getAttrib(x, R_NamesSymbol);
if (names != R_NilValue) {
names = resize_names(names, size);
Rf_setAttrib(out, R_NamesSymbol, names);
}

// Copy over "most" attributes, and set OBJECT bit and S4 bit as needed.
// Does not copy over names, dim, or dim names.
// Names are handled already. Dim and dim names should not be applicable,
// as this is a vector.
// Does not look like it would ever error in our use cases, so no `safe[]`.
Rf_copyMostAttrib(x, out);

UNPROTECT(1);
return out;
}

template <typename T>
inline SEXP r_vector<T>::resize_data(SEXP x, bool is_altrep, R_xlen_t size) {
underlying_type const* v_x = get_const_p(is_altrep, x);

SEXP out = PROTECT(safe[Rf_allocVector](get_sexptype(), size));
underlying_type* v_out = get_p(ALTREP(out), out);

const R_xlen_t x_size = Rf_xlength(x);
const R_xlen_t copy_size = (x_size > size) ? size : x_size;

// Copy over data from `x` up to `copy_size` (we could be truncating so don't blindly
// copy everything from `x`)
if (v_x != nullptr && v_out != nullptr) {
std::memcpy(v_out, v_x, copy_size * sizeof(underlying_type));
} else {
// Handles ALTREP `x` with no const pointer, VECSXP, STRSXP
for (R_xlen_t i = 0; i < copy_size; ++i) {
set_elt(out, i, get_elt(x, i));
}
}

UNPROTECT(1);
return out;
}

template <typename T>
inline SEXP r_vector<T>::resize_names(SEXP x, R_xlen_t size) {
const SEXP* v_x = STRING_PTR_RO(x);

SEXP out = PROTECT(safe[Rf_allocVector](STRSXP, size));

const R_xlen_t x_size = Rf_xlength(x);
const R_xlen_t copy_size = (x_size > size) ? size : x_size;

for (R_xlen_t i = 0; i < copy_size; ++i) {
SET_STRING_ELT(out, i, v_x[i]);
}

// Ensure remaining names are initialized to `""`
for (R_xlen_t i = copy_size; i < size; ++i) {
SET_STRING_ELT(out, i, R_BlankString);
}

UNPROTECT(1);
return out;
}

} // namespace writable

// TODO: is there a better condition we could use, e.g. assert something true
Expand Down
6 changes: 6 additions & 0 deletions inst/include/cpp11/raws.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ inline typename r_vector<uint8_t>::underlying_type r_vector<uint8_t>::get_elt(
return RAW_ELT(x, i);
}

template <>
inline typename r_vector<uint8_t>::underlying_type const* r_vector<uint8_t>::get_const_p(
bool is_altrep, SEXP data) {
return RAW_OR_NULL(data);
}

template <>
inline typename r_vector<uint8_t>::underlying_type* r_vector<uint8_t>::get_p(
bool is_altrep, SEXP data) {
Expand Down
11 changes: 11 additions & 0 deletions inst/include/cpp11/strings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,17 @@ inline typename r_vector<r_string>::underlying_type* r_vector<r_string>::get_p(b
return nullptr;
}

template <>
inline typename r_vector<r_string>::underlying_type const*
r_vector<r_string>::get_const_p(bool is_altrep, SEXP data) {
// No `STRING_PTR_OR_NULL()`
if (is_altrep) {
return nullptr;
} else {
return STRING_PTR_RO(data);
}
}

template <>
inline void r_vector<r_string>::get_region(SEXP x, R_xlen_t i, R_xlen_t n,
typename r_vector::underlying_type* buf) {
Expand Down
Loading