@@ -60,27 +60,43 @@ ArenaBlock* SentryArenaBlock() {
6060}
6161#endif
6262
63- SizedPtr AllocateMemory (const AllocationPolicy* policy_ptr, size_t last_size,
64- size_t min_bytes) {
63+ inline size_t AllocationSize (size_t last_size, size_t start_size,
64+ size_t max_size) {
65+ if (last_size == 0 ) return start_size;
66+ // Double the current block size, up to a limit.
67+ return std::min (2 * last_size, max_size);
68+ }
69+
70+ SizedPtr AllocateMemory (const AllocationPolicy& policy, size_t size) {
71+ if (policy.block_alloc == nullptr ) {
72+ return AllocateAtLeast (size);
73+ }
74+ return {policy.block_alloc (size), size};
75+ }
76+
77+ SizedPtr AllocateBlock (const AllocationPolicy* policy_ptr, size_t last_size,
78+ size_t min_bytes) {
6579 AllocationPolicy policy; // default policy
6680 if (policy_ptr) policy = *policy_ptr;
67- size_t size;
68- if (last_size != 0 ) {
69- // Double the current block size, up to a limit.
70- auto max_size = policy.max_block_size ;
71- size = std::min (2 * last_size, max_size);
72- } else {
73- size = policy.start_block_size ;
74- }
81+ size_t size =
82+ AllocationSize (last_size, policy.start_block_size , policy.max_block_size );
7583 // Verify that min_bytes + kBlockHeaderSize won't overflow.
7684 ABSL_CHECK_LE (min_bytes, std::numeric_limits<size_t >::max () -
7785 SerialArena::kBlockHeaderSize );
7886 size = std::max (size, SerialArena::kBlockHeaderSize + min_bytes);
7987
80- if (policy.block_alloc == nullptr ) {
81- return AllocateAtLeast (size);
82- }
83- return {policy.block_alloc (size), size};
88+ return AllocateMemory (policy, size);
89+ }
90+
91+ SizedPtr AllocateCleanupChunk (const AllocationPolicy* policy_ptr,
92+ size_t last_size) {
93+ constexpr size_t kStartSize = 64 ;
94+ constexpr size_t kMaxSize = 4 << 10 ;
95+ static_assert (kStartSize % sizeof (cleanup::CleanupNode) == 0 , " " );
96+
97+ const size_t size = AllocationSize (last_size, kStartSize , kMaxSize );
98+ if (policy_ptr == nullptr ) return AllocateAtLeast (size);
99+ return AllocateMemory (*policy_ptr, size);
84100}
85101
86102class GetDeallocator {
@@ -102,14 +118,95 @@ class GetDeallocator {
102118
103119} // namespace
104120
121+ namespace cleanup {
122+ struct ChunkList ::Chunk {
123+ CleanupNode* First () { return reinterpret_cast <CleanupNode*>(this + 1 ); }
124+ CleanupNode* Last () { return First () + Capacity () - 1 ; }
125+ static size_t Capacity (size_t size) {
126+ return (size - sizeof (Chunk)) / sizeof (CleanupNode);
127+ }
128+ size_t Capacity () const { return Capacity (size); }
129+
130+ Chunk* next;
131+ size_t size;
132+ // Cleanup nodes follow.
133+ };
134+
135+ void ChunkList::AddFallback (void * elem, void (*destructor)(void *),
136+ SerialArena& arena) {
137+ ABSL_DCHECK_EQ (next_, limit_);
138+ SizedPtr mem = AllocateCleanupChunk (arena.parent_ .AllocPolicy (),
139+ head_ == nullptr ? 0 : head_->size );
140+ arena.AddSpaceAllocated (mem.n );
141+ head_ = new (mem.p ) Chunk{head_, mem.n };
142+ next_ = head_->First ();
143+ prefetch_ptr_ = reinterpret_cast <char *>(next_);
144+ limit_ = next_ + Chunk::Capacity (mem.n );
145+ AddFromExisting (elem, destructor);
146+ }
147+
148+ void ChunkList::Cleanup (const SerialArena& arena) {
149+ Chunk* c = head_;
150+ if (c == nullptr ) return ;
151+ GetDeallocator deallocator (arena.parent_ .AllocPolicy ());
152+
153+ // Iterate backwards in order to destroy in the right order.
154+ CleanupNode* it = next_ - 1 ;
155+ while (true ) {
156+ CleanupNode* first = c->First ();
157+ // A prefetch distance of 8 here was chosen arbitrarily.
158+ constexpr int kPrefetchDistance = 8 ;
159+ CleanupNode* prefetch = it;
160+ // Prefetch the first kPrefetchDistance nodes.
161+ for (int i = 0 ; prefetch >= first && i < kPrefetchDistance ;
162+ --prefetch, ++i) {
163+ prefetch->Prefetch ();
164+ }
165+ // For the middle nodes, run destructor and prefetch the node
166+ // kPrefetchDistance after the current one.
167+ for (; prefetch >= first; --it, --prefetch) {
168+ it->Destroy ();
169+ prefetch->Prefetch ();
170+ }
171+ // Note: we could consider prefetching `next` chunk earlier.
172+ absl::PrefetchToLocalCacheNta (c->next );
173+ // Destroy the rest without prefetching.
174+ for (; it >= first; --it) {
175+ it->Destroy ();
176+ }
177+ Chunk* next = c->next ;
178+ deallocator ({c, c->size });
179+ if (next == nullptr ) return ;
180+ c = next;
181+ it = c->Last ();
182+ };
183+ }
184+
185+ std::vector<void *> ChunkList::PeekForTesting () {
186+ std::vector<void *> ret;
187+ Chunk* c = head_;
188+ if (c == nullptr ) return ret;
189+ // Iterate backwards to match destruction order.
190+ CleanupNode* it = next_ - 1 ;
191+ while (true ) {
192+ CleanupNode* first = c->First ();
193+ for (; it >= first; --it) {
194+ ret.push_back (it->elem );
195+ }
196+ c = c->next ;
197+ if (c == nullptr ) return ret;
198+ it = c->Last ();
199+ };
200+ }
201+ } // namespace cleanup
202+
105203// It is guaranteed that this is constructed in `b`. IOW, this is not the first
106204// arena and `b` cannot be sentry.
107205SerialArena::SerialArena (ArenaBlock* b, ThreadSafeArena& parent)
108206 : ptr_{b->Pointer (kBlockHeaderSize + ThreadSafeArena::kSerialArenaSize )},
109207 limit_{b->Limit ()},
110208 prefetch_ptr_ (
111209 b->Pointer (kBlockHeaderSize + ThreadSafeArena::kSerialArenaSize )),
112- prefetch_limit_(b->Limit ()),
113210 head_{b},
114211 space_allocated_{b->size },
115212 parent_{parent} {
@@ -130,22 +227,7 @@ SerialArena::SerialArena(FirstSerialArena, ArenaBlock* b,
130227}
131228
132229std::vector<void *> SerialArena::PeekCleanupListForTesting () {
133- std::vector<void *> res;
134-
135- ArenaBlock* b = head ();
136- if (b->IsSentry ()) return res;
137-
138- const auto peek_list = [&](char * pos, char * end) {
139- for (; pos != end; pos += cleanup::Size ()) {
140- cleanup::PeekNode (pos, res);
141- }
142- };
143-
144- peek_list (limit_, b->Limit ());
145- for (b = b->next ; b; b = b->next ) {
146- peek_list (reinterpret_cast <char *>(b->cleanup_nodes ), b->Limit ());
147- }
148- return res;
230+ return cleanup_list_.PeekForTesting ();
149231}
150232
151233std::vector<void *> ThreadSafeArena::PeekCleanupListForTesting () {
@@ -223,25 +305,16 @@ void* SerialArena::AllocateFromStringBlockFallback() {
223305PROTOBUF_NOINLINE
224306void * SerialArena::AllocateAlignedWithCleanupFallback (
225307 size_t n, size_t align, void (*destructor)(void *)) {
226- size_t required = AlignUpTo (n, align) + cleanup::Size () ;
308+ size_t required = AlignUpTo (n, align);
227309 AllocateNewBlock (required);
228310 return AllocateAlignedWithCleanup (n, align, destructor);
229311}
230312
231- PROTOBUF_NOINLINE
232- void SerialArena::AddCleanupFallback (void * elem, void (*destructor)(void *)) {
233- AllocateNewBlock (cleanup::Size ());
234- AddCleanupFromExisting (elem, destructor);
235- }
236-
237313void SerialArena::AllocateNewBlock (size_t n) {
238314 size_t used = 0 ;
239315 size_t wasted = 0 ;
240316 ArenaBlock* old_head = head ();
241317 if (!old_head->IsSentry ()) {
242- // Sync limit to block
243- old_head->cleanup_nodes = limit_;
244-
245318 // Record how much used in this block.
246319 used = static_cast <size_t >(ptr () - old_head->Pointer (kBlockHeaderSize ));
247320 wasted = old_head->size - used - kBlockHeaderSize ;
@@ -253,7 +326,7 @@ void SerialArena::AllocateNewBlock(size_t n) {
253326 // but with a CPU regression. The regression might have been an artifact of
254327 // the microbenchmark.
255328
256- auto mem = AllocateMemory (parent_.AllocPolicy (), old_head->size , n);
329+ auto mem = AllocateBlock (parent_.AllocPolicy (), old_head->size , n);
257330 AddSpaceAllocated (mem.n );
258331 ThreadSafeArenaStats::RecordAllocateStats (parent_.arena_stats_ .MutableStats (),
259332 /* used=*/ used,
@@ -314,34 +387,6 @@ size_t SerialArena::FreeStringBlocks(StringBlock* string_block,
314387 return deallocated;
315388}
316389
317- void SerialArena::CleanupList () {
318- ArenaBlock* b = head ();
319- if (b->IsSentry ()) return ;
320-
321- b->cleanup_nodes = limit_;
322- do {
323- char * limit = b->Limit ();
324- char * it = reinterpret_cast <char *>(b->cleanup_nodes );
325- ABSL_DCHECK (!b->IsSentry () || it == limit);
326- // A prefetch distance of 8 here was chosen arbitrarily.
327- char * prefetch = it;
328- int prefetch_dist = 8 ;
329- for (; prefetch < limit && --prefetch_dist; prefetch += cleanup::Size ()) {
330- cleanup::PrefetchNode (prefetch);
331- }
332- for (; prefetch < limit;
333- it += cleanup::Size (), prefetch += cleanup::Size ()) {
334- cleanup::DestroyNode (it);
335- cleanup::PrefetchNode (prefetch);
336- }
337- absl::PrefetchToLocalCacheNta (b->next );
338- for (; it < limit; it += cleanup::Size ()) {
339- cleanup::DestroyNode (it);
340- }
341- b = b->next ;
342- } while (b);
343- }
344-
345390// Stores arrays of void* and SerialArena* instead of linked list of
346391// SerialArena* to speed up traversing all SerialArena. The cost of walk is non
347392// trivial when there are many nodes. Separately storing "ids" minimizes cache
@@ -550,7 +595,7 @@ ArenaBlock* ThreadSafeArena::FirstBlock(void* buf, size_t size,
550595
551596 SizedPtr mem;
552597 if (buf == nullptr || size < kBlockHeaderSize + kAllocPolicySize ) {
553- mem = AllocateMemory (&policy, 0 , kAllocPolicySize );
598+ mem = AllocateBlock (&policy, 0 , kAllocPolicySize );
554599 } else {
555600 mem = {buf, size};
556601 // Record user-owned block.
@@ -734,6 +779,8 @@ uint64_t ThreadSafeArena::Reset() {
734779 // Have to do this in a first pass, because some of the destructors might
735780 // refer to memory in other blocks.
736781 CleanupList ();
782+ // Reset the first arena's cleanup list.
783+ first_arena_.cleanup_list_ = cleanup::ChunkList ();
737784
738785 // Discard all blocks except the first one. Whether it is user-provided or
739786 // allocated, always reuse the first block for the first arena.
@@ -913,7 +960,7 @@ SerialArena* ThreadSafeArena::GetSerialArenaFallback(size_t n) {
913960 // have any blocks yet. So we'll allocate its first block now. It must be
914961 // big enough to host SerialArena and the pending request.
915962 serial = SerialArena::New (
916- AllocateMemory (alloc_policy_.get (), 0 , n + kSerialArenaSize ), *this );
963+ AllocateBlock (alloc_policy_.get (), 0 , n + kSerialArenaSize ), *this );
917964
918965 AddSerialArena (id, serial);
919966 }
0 commit comments