Skip to content

Commit fbdeb5a

Browse files
Add prefetching to arena allocations.
PiperOrigin-RevId: 565061139
1 parent 6c121f5 commit fbdeb5a

File tree

4 files changed

+74
-11
lines changed

4 files changed

+74
-11
lines changed

protobuf_deps.bzl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def protobuf_deps():
149149
if not native.existing_rule("upb"):
150150
http_archive(
151151
name = "upb",
152-
url = "https://github.com/protocolbuffers/protobuf/archive/f85a338d79f05938d1725fba3b2c603a8d06462e.zip",
153-
strip_prefix = "protobuf-f85a338d79f05938d1725fba3b2c603a8d06462e/upb",
154-
sha256 = "cd28ae63e40a146ec1a2d41e96f53e637aaa5d6c746e7120d013aafc65092882",
152+
url = "https://github.com/protocolbuffers/protobuf/archive/7242c3619c6db9843614b2c865681bf397261be8.zip",
153+
strip_prefix = "protobuf-7242c3619c6db9843614b2c865681bf397261be8/upb",
154+
sha256 = "0fc581f5e5caaf30c7119a73f2cff5d45424e4a4f23a52ebba73e3df031ad1c6",
155155
)

src/google/protobuf/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,7 @@ cc_library(
331331
":arena_cleanup",
332332
":string_block",
333333
"//src/google/protobuf/stubs:lite",
334+
"@com_google_absl//absl/base:prefetch",
334335
"@com_google_absl//absl/container:layout",
335336
"@com_google_absl//absl/log:absl_check",
336337
"@com_google_absl//absl/log:absl_log",

src/google/protobuf/arena.cc

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ class GetDeallocator {
114114
SerialArena::SerialArena(ArenaBlock* b, ThreadSafeArena& parent)
115115
: ptr_{b->Pointer(kBlockHeaderSize + ThreadSafeArena::kSerialArenaSize)},
116116
limit_{b->Limit()},
117+
prefetch_ptr_(
118+
b->Pointer(kBlockHeaderSize + ThreadSafeArena::kSerialArenaSize)),
119+
prefetch_limit_(b->Limit()),
117120
head_{b},
118121
space_allocated_{b->size},
119122
parent_{parent} {
@@ -130,9 +133,7 @@ SerialArena::SerialArena(FirstSerialArena, ArenaBlock* b,
130133
ThreadSafeArena& parent)
131134
: head_{b}, space_allocated_{b->size}, parent_{parent} {
132135
if (b->IsSentry()) return;
133-
134-
set_ptr(b->Pointer(kBlockHeaderSize));
135-
limit_ = b->Limit();
136+
set_range(b->Pointer(kBlockHeaderSize), b->Limit());
136137
}
137138

138139
std::vector<void*> SerialArena::PeekCleanupListForTesting() {
@@ -159,8 +160,7 @@ std::vector<void*> ThreadSafeArena::PeekCleanupListForTesting() {
159160
}
160161

161162
void SerialArena::Init(ArenaBlock* b, size_t offset) {
162-
set_ptr(b->Pointer(offset));
163-
limit_ = b->Limit();
163+
set_range(b->Pointer(offset), b->Limit());
164164
head_.store(b, std::memory_order_relaxed);
165165
space_used_.store(0, std::memory_order_relaxed);
166166
space_allocated_.store(b->size, std::memory_order_relaxed);
@@ -268,8 +268,7 @@ void SerialArena::AllocateNewBlock(size_t n) {
268268
/*used=*/used,
269269
/*allocated=*/mem.n, wasted);
270270
auto* new_head = new (mem.p) ArenaBlock{old_head, mem.n};
271-
set_ptr(new_head->Pointer(kBlockHeaderSize));
272-
limit_ = new_head->Limit();
271+
set_range(new_head->Pointer(kBlockHeaderSize), new_head->Limit());
273272
// Previous writes must take effect before writing new head.
274273
head_.store(new_head, std::memory_order_release);
275274

src/google/protobuf/serial_arena.h

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include <algorithm>
1414
#include <atomic>
15+
#include <cstddef>
1516
#include <cstdint>
1617
#include <string>
1718
#include <type_traits>
@@ -21,6 +22,8 @@
2122

2223
#include "google/protobuf/stubs/common.h"
2324
#include "absl/base/attributes.h"
25+
#include "absl/base/optimization.h"
26+
#include "absl/base/prefetch.h"
2427
#include "absl/log/absl_check.h"
2528
#include "absl/numeric/bits.h"
2629
#include "google/protobuf/arena_align.h"
@@ -29,7 +32,6 @@
2932
#include "google/protobuf/port.h"
3033
#include "google/protobuf/string_block.h"
3134

32-
3335
// Must be included last.
3436
#include "google/protobuf/port_def.inc"
3537

@@ -225,6 +227,7 @@ class PROTOBUF_EXPORT SerialArena {
225227
PROTOBUF_UNPOISON_MEMORY_REGION(ret, n);
226228
*out = ret;
227229
set_ptr(reinterpret_cast<char*>(next));
230+
MaybePrefetchForwards(reinterpret_cast<char*>(next));
228231
return true;
229232
}
230233

@@ -251,6 +254,7 @@ class PROTOBUF_EXPORT SerialArena {
251254
set_ptr(reinterpret_cast<char*>(next));
252255
AddCleanupFromExisting(ret, destructor);
253256
ABSL_DCHECK_GE(limit_, ptr());
257+
MaybePrefetchForwards(reinterpret_cast<char*>(next));
254258
return ret;
255259
}
256260

@@ -279,10 +283,58 @@ class PROTOBUF_EXPORT SerialArena {
279283

280284
PROTOBUF_UNPOISON_MEMORY_REGION(limit_ - n, n);
281285
limit_ -= n;
286+
MaybePrefetchBackwards(limit_);
282287
ABSL_DCHECK_GE(limit_, ptr());
283288
cleanup::CreateNode(tag, limit_, elem, destructor);
284289
}
285290

291+
static constexpr ptrdiff_t kPrefetchForwardsDegree = ABSL_CACHELINE_SIZE * 16;
292+
static constexpr ptrdiff_t kPrefetchBackwardsDegree = ABSL_CACHELINE_SIZE * 6;
293+
294+
// Prefetch the next kPrefetchForwardsDegree bytes after `prefetch_ptr_` and
295+
// up to `prefetch_limit_`, if `next` is within kPrefetchForwardsDegree bytes
296+
// of `prefetch_ptr_`.
297+
PROTOBUF_ALWAYS_INLINE
298+
void MaybePrefetchForwards(const char* next) {
299+
ABSL_DCHECK(static_cast<const void*>(prefetch_ptr_) == nullptr ||
300+
static_cast<const void*>(prefetch_ptr_) >= head());
301+
if (PROTOBUF_PREDICT_TRUE(prefetch_ptr_ - next > kPrefetchForwardsDegree))
302+
return;
303+
if (PROTOBUF_PREDICT_TRUE(prefetch_ptr_ < prefetch_limit_)) {
304+
const char* prefetch_ptr = std::max(next, prefetch_ptr_);
305+
ABSL_DCHECK(prefetch_ptr != nullptr);
306+
const char* end =
307+
std::min(prefetch_limit_, prefetch_ptr + ABSL_CACHELINE_SIZE * 16);
308+
for (; prefetch_ptr < end; prefetch_ptr += ABSL_CACHELINE_SIZE) {
309+
absl::PrefetchToLocalCacheForWrite(prefetch_ptr);
310+
}
311+
prefetch_ptr_ = prefetch_ptr;
312+
}
313+
}
314+
315+
PROTOBUF_ALWAYS_INLINE
316+
// Prefetch up to kPrefetchBackwardsDegree before `prefetch_limit_` and after
317+
// `prefetch_ptr_`, if `limit` is within kPrefetchBackwardsDegree of
318+
// `prefetch_limit_`.
319+
void MaybePrefetchBackwards(const char* limit) {
320+
ABSL_DCHECK(prefetch_limit_ == nullptr ||
321+
static_cast<const void*>(prefetch_limit_) <=
322+
static_cast<const void*>(head()->Limit()));
323+
if (PROTOBUF_PREDICT_TRUE(limit - prefetch_limit_ >
324+
kPrefetchBackwardsDegree))
325+
return;
326+
if (PROTOBUF_PREDICT_TRUE(prefetch_limit_ > prefetch_ptr_)) {
327+
const char* prefetch_limit = std::min(limit, prefetch_limit_);
328+
ABSL_DCHECK_NE(prefetch_limit, nullptr);
329+
const char* end =
330+
std::max(prefetch_ptr_, prefetch_limit - kPrefetchBackwardsDegree);
331+
for (; prefetch_limit > end; prefetch_limit -= ABSL_CACHELINE_SIZE) {
332+
absl::PrefetchToLocalCacheForWrite(prefetch_limit);
333+
}
334+
prefetch_limit_ = prefetch_limit;
335+
}
336+
}
337+
286338
private:
287339
friend class ThreadSafeArena;
288340

@@ -319,6 +371,11 @@ class PROTOBUF_EXPORT SerialArena {
319371
std::atomic<char*> ptr_{nullptr};
320372
// Limiting address up to which memory can be allocated from the head block.
321373
char* limit_ = nullptr;
374+
// Current prefetch positions. Data from `ptr_` up to but not including
375+
// `prefetch_ptr_` is software prefetched. Similarly, data from `limit_` down
376+
// to but not including `prefetch_limit_` is software prefetched.
377+
const char* prefetch_ptr_ = nullptr;
378+
const char* prefetch_limit_ = nullptr;
322379

323380
// The active string block.
324381
std::atomic<StringBlock*> string_block_{nullptr};
@@ -356,6 +413,12 @@ class PROTOBUF_EXPORT SerialArena {
356413
char* ptr() { return ptr_.load(std::memory_order_relaxed); }
357414
const char* ptr() const { return ptr_.load(std::memory_order_relaxed); }
358415
void set_ptr(char* ptr) { return ptr_.store(ptr, std::memory_order_relaxed); }
416+
PROTOBUF_ALWAYS_INLINE void set_range(char* ptr, char* limit) {
417+
set_ptr(ptr);
418+
prefetch_ptr_ = ptr;
419+
limit_ = limit;
420+
prefetch_limit_ = limit;
421+
}
359422

360423
// Constructor is private as only New() should be used.
361424
inline SerialArena(ArenaBlock* b, ThreadSafeArena& parent);

0 commit comments

Comments
 (0)