1212
1313#include < algorithm>
1414#include < atomic>
15+ #include < cstddef>
1516#include < cstdint>
1617#include < string>
1718#include < type_traits>
2122
2223#include " google/protobuf/stubs/common.h"
2324#include " absl/base/attributes.h"
25+ #include " absl/base/optimization.h"
26+ #include " absl/base/prefetch.h"
2427#include " absl/log/absl_check.h"
2528#include " absl/numeric/bits.h"
2629#include " google/protobuf/arena_align.h"
2932#include " google/protobuf/port.h"
3033#include " google/protobuf/string_block.h"
3134
32-
3335// Must be included last.
3436#include " google/protobuf/port_def.inc"
3537
@@ -225,6 +227,7 @@ class PROTOBUF_EXPORT SerialArena {
225227 PROTOBUF_UNPOISON_MEMORY_REGION (ret, n);
226228 *out = ret;
227229 set_ptr (reinterpret_cast <char *>(next));
230+ MaybePrefetchForwards (reinterpret_cast <char *>(next));
228231 return true ;
229232 }
230233
@@ -251,6 +254,7 @@ class PROTOBUF_EXPORT SerialArena {
251254 set_ptr (reinterpret_cast <char *>(next));
252255 AddCleanupFromExisting (ret, destructor);
253256 ABSL_DCHECK_GE (limit_, ptr ());
257+ MaybePrefetchForwards (reinterpret_cast <char *>(next));
254258 return ret;
255259 }
256260
@@ -279,10 +283,58 @@ class PROTOBUF_EXPORT SerialArena {
279283
280284 PROTOBUF_UNPOISON_MEMORY_REGION (limit_ - n, n);
281285 limit_ -= n;
286+ MaybePrefetchBackwards (limit_);
282287 ABSL_DCHECK_GE (limit_, ptr ());
283288 cleanup::CreateNode (tag, limit_, elem, destructor);
284289 }
285290
291+ static constexpr ptrdiff_t kPrefetchForwardsDegree = ABSL_CACHELINE_SIZE * 16 ;
292+ static constexpr ptrdiff_t kPrefetchBackwardsDegree = ABSL_CACHELINE_SIZE * 6 ;
293+
294+ // Prefetch the next kPrefetchForwardsDegree bytes after `prefetch_ptr_` and
295+ // up to `prefetch_limit_`, if `next` is within kPrefetchForwardsDegree bytes
296+ // of `prefetch_ptr_`.
297+ PROTOBUF_ALWAYS_INLINE
298+ void MaybePrefetchForwards (const char * next) {
299+ ABSL_DCHECK (static_cast <const void *>(prefetch_ptr_) == nullptr ||
300+ static_cast <const void *>(prefetch_ptr_) >= head ());
301+ if (PROTOBUF_PREDICT_TRUE (prefetch_ptr_ - next > kPrefetchForwardsDegree ))
302+ return ;
303+ if (PROTOBUF_PREDICT_TRUE (prefetch_ptr_ < prefetch_limit_)) {
304+ const char * prefetch_ptr = std::max (next, prefetch_ptr_);
305+ ABSL_DCHECK (prefetch_ptr != nullptr );
306+ const char * end =
307+ std::min (prefetch_limit_, prefetch_ptr + ABSL_CACHELINE_SIZE * 16 );
308+ for (; prefetch_ptr < end; prefetch_ptr += ABSL_CACHELINE_SIZE) {
309+ absl::PrefetchToLocalCacheForWrite (prefetch_ptr);
310+ }
311+ prefetch_ptr_ = prefetch_ptr;
312+ }
313+ }
314+
315+ PROTOBUF_ALWAYS_INLINE
316+ // Prefetch up to kPrefetchBackwardsDegree before `prefetch_limit_` and after
317+ // `prefetch_ptr_`, if `limit` is within kPrefetchBackwardsDegree of
318+ // `prefetch_limit_`.
319+ void MaybePrefetchBackwards (const char * limit) {
320+ ABSL_DCHECK (prefetch_limit_ == nullptr ||
321+ static_cast <const void *>(prefetch_limit_) <=
322+ static_cast <const void *>(head ()->Limit ()));
323+ if (PROTOBUF_PREDICT_TRUE (limit - prefetch_limit_ >
324+ kPrefetchBackwardsDegree ))
325+ return ;
326+ if (PROTOBUF_PREDICT_TRUE (prefetch_limit_ > prefetch_ptr_)) {
327+ const char * prefetch_limit = std::min (limit, prefetch_limit_);
328+ ABSL_DCHECK_NE (prefetch_limit, nullptr );
329+ const char * end =
330+ std::max (prefetch_ptr_, prefetch_limit - kPrefetchBackwardsDegree );
331+ for (; prefetch_limit > end; prefetch_limit -= ABSL_CACHELINE_SIZE) {
332+ absl::PrefetchToLocalCacheForWrite (prefetch_limit);
333+ }
334+ prefetch_limit_ = prefetch_limit;
335+ }
336+ }
337+
286338 private:
287339 friend class ThreadSafeArena ;
288340
@@ -319,6 +371,11 @@ class PROTOBUF_EXPORT SerialArena {
319371 std::atomic<char *> ptr_{nullptr };
320372 // Limiting address up to which memory can be allocated from the head block.
321373 char * limit_ = nullptr ;
374+ // Current prefetch positions. Data from `ptr_` up to but not including
375+ // `prefetch_ptr_` is software prefetched. Similarly, data from `limit_` down
376+ // to but not including `prefetch_limit_` is software prefetched.
377+ const char * prefetch_ptr_ = nullptr ;
378+ const char * prefetch_limit_ = nullptr ;
322379
323380 // The active string block.
324381 std::atomic<StringBlock*> string_block_{nullptr };
@@ -356,6 +413,12 @@ class PROTOBUF_EXPORT SerialArena {
356413 char * ptr () { return ptr_.load (std::memory_order_relaxed); }
357414 const char * ptr () const { return ptr_.load (std::memory_order_relaxed); }
358415 void set_ptr (char * ptr) { return ptr_.store (ptr, std::memory_order_relaxed); }
416+ PROTOBUF_ALWAYS_INLINE void set_range (char * ptr, char * limit) {
417+ set_ptr (ptr);
418+ prefetch_ptr_ = ptr;
419+ limit_ = limit;
420+ prefetch_limit_ = limit;
421+ }
359422
360423 // Constructor is private as only New() should be used.
361424 inline SerialArena (ArenaBlock* b, ThreadSafeArena& parent);
0 commit comments