Skip to content

Commit 9879f42

Browse files
committed
Ruby <2.7now uses WeakMap too, which prevents memory leaks.
Ruby <2.7 does not allow non-finalizable objects to be WeakMap keys: https://bugs.ruby-lang.org/issues/16035 We work around this by using a secondary map for Ruby <2.7 which maps the non-finalizable integer to a distinct object. For now we accept that the entries in the secondary map wil never be collected. If this becomes a problem we can perform a GC pass every so often that looks at the contents of the object cache to decide what can be deleted from the secondary map.
1 parent d7e943b commit 9879f42

File tree

6 files changed

+103
-118
lines changed

6 files changed

+103
-118
lines changed

ruby/ext/google/protobuf_c/defs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ static VALUE DescriptorPool_alloc(VALUE klass) {
295295

296296
self->def_to_descriptor = rb_hash_new();
297297
self->symtab = upb_symtab_new();
298-
ObjectCache_Add(self->symtab, ret, _upb_symtab_arena(self->symtab));
298+
ObjectCache_Add(self->symtab, ret);
299299

300300
return ret;
301301
}

ruby/ext/google/protobuf_c/map.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ VALUE Map_GetRubyWrapper(upb_map* map, upb_fieldtype_t key_type,
9393
if (val == Qnil) {
9494
val = Map_alloc(cMap);
9595
Map* self;
96-
ObjectCache_Add(map, val, Arena_get(arena));
96+
ObjectCache_Add(map, val);
9797
TypedData_Get_Struct(val, Map, &Map_type, self);
9898
self->map = map;
9999
self->arena = arena;
@@ -318,7 +318,7 @@ static VALUE Map_init(int argc, VALUE* argv, VALUE _self) {
318318

319319
self->map = upb_map_new(Arena_get(self->arena), self->key_type,
320320
self->value_type_info.type);
321-
ObjectCache_Add(self->map, _self, Arena_get(self->arena));
321+
ObjectCache_Add(self->map, _self);
322322

323323
if (init_arg != Qnil) {
324324
Map_merge_into_self(_self, init_arg);
@@ -590,9 +590,10 @@ VALUE Map_eq(VALUE _self, VALUE _other) {
590590
*/
591591
static VALUE Map_freeze(VALUE _self) {
592592
Map* self = ruby_to_Map(_self);
593-
594-
ObjectCache_Pin(self->map, _self, Arena_get(self->arena));
595-
RB_OBJ_FREEZE(_self);
593+
if (!RB_OBJ_FROZEN(_self)) {
594+
Arena_Pin(self->arena, _self);
595+
RB_OBJ_FREEZE(_self);
596+
}
596597
return _self;
597598
}
598599

ruby/ext/google/protobuf_c/message.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ void Message_InitPtr(VALUE self_, upb_msg *msg, VALUE arena) {
105105
Message* self = ruby_to_Message(self_);
106106
self->msg = msg;
107107
self->arena = arena;
108-
ObjectCache_Add(msg, self_, Arena_get(arena));
108+
ObjectCache_Add(msg, self_);
109109
}
110110

111111
VALUE Message_GetArena(VALUE msg_rb) {
@@ -855,8 +855,10 @@ static VALUE Message_to_h(VALUE _self) {
855855
*/
856856
static VALUE Message_freeze(VALUE _self) {
857857
Message* self = ruby_to_Message(_self);
858-
ObjectCache_Pin(self->msg, _self, Arena_get(self->arena));
859-
RB_OBJ_FREEZE(_self);
858+
if (!RB_OBJ_FROZEN(_self)) {
859+
Arena_Pin(self->arena, _self);
860+
RB_OBJ_FREEZE(_self);
861+
}
860862
return _self;
861863
}
862864

ruby/ext/google/protobuf_c/protobuf.c

Lines changed: 77 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -167,30 +167,55 @@ void StringBuilder_PrintMsgval(StringBuilder* b, upb_msgval val,
167167
// Arena
168168
// -----------------------------------------------------------------------------
169169

170-
void Arena_free(void* data) { upb_arena_free(data); }
170+
typedef struct {
171+
upb_arena *arena;
172+
VALUE pinned_objs;
173+
} Arena;
174+
175+
static void Arena_mark(void *data) {
176+
Arena *arena = data;
177+
rb_gc_mark(arena->pinned_objs);
178+
}
179+
180+
static void Arena_free(void *data) {
181+
Arena *arena = data;
182+
upb_arena_free(arena->arena);
183+
}
171184

172185
static VALUE cArena;
173186

174187
const rb_data_type_t Arena_type = {
175188
"Google::Protobuf::Internal::Arena",
176-
{ NULL, Arena_free, NULL },
189+
{ Arena_mark, Arena_free, NULL },
190+
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
177191
};
178192

179193
static VALUE Arena_alloc(VALUE klass) {
180-
upb_arena *arena = upb_arena_new();
194+
Arena *arena = ALLOC(Arena);
195+
arena->arena = upb_arena_new();
196+
arena->pinned_objs = Qnil;
181197
return TypedData_Wrap_Struct(klass, &Arena_type, arena);
182198
}
183199

184200
upb_arena *Arena_get(VALUE _arena) {
185-
upb_arena *arena;
186-
TypedData_Get_Struct(_arena, upb_arena, &Arena_type, arena);
187-
return arena;
201+
Arena *arena;
202+
TypedData_Get_Struct(_arena, Arena, &Arena_type, arena);
203+
return arena->arena;
188204
}
189205

190206
VALUE Arena_new() {
191207
return Arena_alloc(cArena);
192208
}
193209

210+
void Arena_Pin(VALUE _arena, VALUE obj) {
211+
Arena *arena;
212+
TypedData_Get_Struct(_arena, Arena, &Arena_type, arena);
213+
if (arena->pinned_objs == Qnil) {
214+
arena->pinned_objs = rb_ary_new();
215+
}
216+
rb_ary_push(arena->pinned_objs, obj);
217+
}
218+
194219
void Arena_register(VALUE module) {
195220
VALUE internal = rb_define_module_under(module, "Internal");
196221
VALUE klass = rb_define_class_under(internal, "Arena", rb_cObject);
@@ -209,122 +234,79 @@ void Arena_register(VALUE module) {
209234
// different wrapper objects for the same C object, which saves memory and
210235
// preserves object identity.
211236
//
212-
// We use Hash and/or WeakMap for the cache. WeakMap is faster overall
213-
// (probably due to removal being integrated with GC) but doesn't work for Ruby
214-
// <2.7 (see note below). We need Hash for Ruby <2.7 and for cases where we
215-
// need to GC-root the object (notably when the object has been frozen).
237+
// We use WeakMap for the cache. For Ruby <2.7 we also need a secondary Hash
238+
// to store WeakMap keys because Ruby <2.7 WeakMap doesn't allow non-finalizable
239+
// keys.
216240

217241
#if RUBY_API_VERSION_CODE >= 20700
218-
#define USE_WEAK_MAP 1
242+
#define USE_SECONDARY_MAP 0
219243
#else
220-
#define USE_WEAK_MAP 0
244+
#define USE_SECONDARY_MAP 1
221245
#endif
222246

223-
static VALUE ObjectCache_GetKey(const void* key) {
224-
char buf[sizeof(key)];
225-
memcpy(&buf, &key, sizeof(key));
226-
intptr_t key_int = (intptr_t)key;
227-
PBRUBY_ASSERT((key_int & 3) == 0);
228-
return LL2NUM(key_int >> 2);
229-
}
247+
#if USE_SECONDARY_MAP
230248

231-
// Strong object cache, uses regular Hash and GC-roots objects.
232-
// - For Ruby <2.7, used for all objects.
233-
// - For Ruby >=2.7, used only for frozen objects, so we preserve the "frozen"
234-
// bit (since this information is not preserved at the upb level).
249+
// Maps Numeric -> Object. The object is then used as a key into the WeakMap.
250+
// This is needed for Ruby <2.7 where a number cannot be a key to WeakMap.
251+
// The object is used only for its identity; it does not contain any data.
252+
VALUE secondary_map = Qnil;
235253

236-
VALUE strong_obj_cache = Qnil;
237-
238-
static void StrongObjectCache_Init() {
239-
rb_gc_register_address(&strong_obj_cache);
240-
strong_obj_cache = rb_hash_new();
254+
static void SecondaryMap_Init() {
255+
rb_gc_register_address(&secondary_map);
256+
secondary_map = rb_hash_new();
241257
}
242258

243-
static void StrongObjectCache_Remove(void* key) {
244-
VALUE key_rb = ObjectCache_GetKey(key);
245-
PBRUBY_ASSERT(rb_hash_lookup(strong_obj_cache, key_rb) != Qnil);
246-
rb_hash_delete(strong_obj_cache, key_rb);
259+
static VALUE SecondaryMap_Get(VALUE key) {
260+
VALUE ret = rb_hash_lookup(secondary_map, key);
261+
if (ret == Qnil) {
262+
ret = rb_eval_string("Object.new");
263+
rb_hash_aset(secondary_map, key, ret);
264+
}
265+
return ret;
247266
}
248267

249-
static VALUE StrongObjectCache_Get(const void* key) {
250-
VALUE key_rb = ObjectCache_GetKey(key);
251-
return rb_hash_lookup(strong_obj_cache, key_rb);
252-
}
268+
#endif
253269

254-
static void StrongObjectCache_Add(const void* key, VALUE val,
255-
upb_arena* arena) {
256-
PBRUBY_ASSERT(StrongObjectCache_Get(key) == Qnil);
257-
VALUE key_rb = ObjectCache_GetKey(key);
258-
rb_hash_aset(strong_obj_cache, key_rb, val);
259-
upb_arena_addcleanup(arena, (void*)key, StrongObjectCache_Remove);
270+
static VALUE ObjectCache_GetKey(const void* key) {
271+
char buf[sizeof(key)];
272+
memcpy(&buf, &key, sizeof(key));
273+
intptr_t key_int = (intptr_t)key;
274+
PBRUBY_ASSERT((key_int & 3) == 0);
275+
VALUE ret = LL2NUM(key_int >> 2);
276+
#if USE_SECONDARY_MAP
277+
ret = SecondaryMap_Get(ret);
278+
#endif
279+
return ret;
260280
}
261281

262-
// Weak object cache. This speeds up the test suite significantly, so we
263-
// presume it speeds up real code also. However we can only use it in Ruby
264-
// >=2.7 due to:
265-
// https://bugs.ruby-lang.org/issues/16035
266-
267-
#if USE_WEAK_MAP
282+
// Public ObjectCache API.
268283

269284
VALUE weak_obj_cache = Qnil;
285+
ID item_get;
286+
ID item_set;
270287

271-
static void WeakObjectCache_Init() {
288+
static void ObjectCache_Init() {
272289
rb_gc_register_address(&weak_obj_cache);
273290
VALUE klass = rb_eval_string("ObjectSpace::WeakMap");
274291
weak_obj_cache = rb_class_new_instance(0, NULL, klass);
275-
}
276-
277-
static VALUE WeakObjectCache_Get(const void* key) {
278-
VALUE key_rb = ObjectCache_GetKey(key);
279-
VALUE ret = rb_funcall(weak_obj_cache, rb_intern("[]"), 1, key_rb);
280-
return ret;
281-
}
282-
283-
static void WeakObjectCache_Add(const void* key, VALUE val) {
284-
PBRUBY_ASSERT(WeakObjectCache_Get(key) == Qnil);
285-
VALUE key_rb = ObjectCache_GetKey(key);
286-
rb_funcall(weak_obj_cache, rb_intern("[]="), 2, key_rb, val);
287-
PBRUBY_ASSERT(WeakObjectCache_Get(key) == val);
288-
}
289-
290-
#endif
291-
292-
// Public ObjectCache API.
293-
294-
static void ObjectCache_Init() {
295-
StrongObjectCache_Init();
296-
#if USE_WEAK_MAP
297-
WeakObjectCache_Init();
292+
item_get = rb_intern("[]");
293+
item_set = rb_intern("[]=");
294+
#if USE_SECONDARY_MAP
295+
SecondaryMap_Init();
298296
#endif
299297
}
300298

301-
void ObjectCache_Add(const void* key, VALUE val, upb_arena *arena) {
302-
#if USE_WEAK_MAP
303-
(void)arena;
304-
WeakObjectCache_Add(key, val);
305-
#else
306-
StrongObjectCache_Add(key, val, arena);
307-
#endif
299+
void ObjectCache_Add(const void* key, VALUE val) {
300+
PBRUBY_ASSERT(ObjectCache_Get(key) == Qnil);
301+
VALUE key_rb = ObjectCache_GetKey(key);
302+
rb_funcall(weak_obj_cache, item_set, 2, key_rb, val);
303+
PBRUBY_ASSERT(ObjectCache_Get(key) == val);
308304
}
309305

310306
// Returns the cached object for this key, if any. Otherwise returns Qnil.
311307
VALUE ObjectCache_Get(const void* key) {
312-
#if USE_WEAK_MAP
313-
return WeakObjectCache_Get(key);
314-
#else
315-
return StrongObjectCache_Get(key);
316-
#endif
317-
}
318-
319-
void ObjectCache_Pin(const void* key, VALUE val, upb_arena *arena) {
320-
#if USE_WEAK_MAP
321-
PBRUBY_ASSERT(WeakObjectCache_Get(key) == val);
322-
// This will GC-root the object, but we'll still use the weak map for
323-
// actual lookup.
324-
StrongObjectCache_Add(key, val, arena);
325-
#else
326-
// Value is already pinned, nothing to do.
327-
#endif
308+
VALUE key_rb = ObjectCache_GetKey(key);
309+
return rb_funcall(weak_obj_cache, item_get, 1, key_rb);
328310
}
329311

330312
/*

ruby/ext/google/protobuf_c/protobuf.h

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,13 @@ const upb_fielddef* map_field_value(const upb_fielddef* field);
5555
VALUE Arena_new();
5656
upb_arena *Arena_get(VALUE arena);
5757

58+
// Pins this Ruby object to the lifetime of this arena, so that as long as the
59+
// arena is alive this object will not be collected.
60+
//
61+
// We use this to guarantee that the "frozen" bit on the object will be
62+
// remembered, even if the user drops their reference to this precise object.
63+
void Arena_Pin(VALUE arena, VALUE obj);
64+
5865
// -----------------------------------------------------------------------------
5966
// ObjectCache
6067
// -----------------------------------------------------------------------------
@@ -68,19 +75,11 @@ upb_arena *Arena_get(VALUE arena);
6875
// Adds an entry to the cache. The "arena" parameter must give the arena that
6976
// "key" was allocated from. In Ruby <2.7.0, it will be used to remove the key
7077
// from the cache when the arena is destroyed.
71-
void ObjectCache_Add(const void* key, VALUE val, upb_arena *arena);
78+
void ObjectCache_Add(const void* key, VALUE val);
7279

7380
// Returns the cached object for this key, if any. Otherwise returns Qnil.
7481
VALUE ObjectCache_Get(const void* key);
7582

76-
// Pins the previously added object so it is GC-rooted. This turns the
77-
// reference to "val" from weak to strong. We use this to guarantee that the
78-
// "frozen" bit on the object will be remembered, even if the user drops their
79-
// reference to this precise object.
80-
//
81-
// The "arena" parameter must give the arena that "key" was allocated from.
82-
void ObjectCache_Pin(const void* key, VALUE val, upb_arena *arena);
83-
8483
// -----------------------------------------------------------------------------
8584
// StringBuilder, for inspect
8685
// -----------------------------------------------------------------------------

ruby/ext/google/protobuf_c/repeated_field.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ VALUE RepeatedField_GetRubyWrapper(upb_array* array, TypeInfo type_info,
8888
if (val == Qnil) {
8989
val = RepeatedField_alloc(cRepeatedField);
9090
RepeatedField* self;
91-
ObjectCache_Add(array, val, Arena_get(arena));
91+
ObjectCache_Add(array, val);
9292
TypedData_Get_Struct(val, RepeatedField, &RepeatedField_type, self);
9393
self->array = array;
9494
self->arena = arena;
@@ -500,9 +500,10 @@ VALUE RepeatedField_eq(VALUE _self, VALUE _other) {
500500
*/
501501
static VALUE RepeatedField_freeze(VALUE _self) {
502502
RepeatedField* self = ruby_to_RepeatedField(_self);
503-
504-
ObjectCache_Pin(self->array, _self, Arena_get(self->arena));
505-
RB_OBJ_FREEZE(_self);
503+
if (!RB_OBJ_FROZEN(_self)) {
504+
Arena_Pin(self->arena, _self);
505+
RB_OBJ_FREEZE(_self);
506+
}
506507
return _self;
507508
}
508509

@@ -610,7 +611,7 @@ VALUE RepeatedField_init(int argc, VALUE* argv, VALUE _self) {
610611

611612
self->type_info = TypeInfo_FromClass(argc, argv, 0, &self->type_class, &ary);
612613
self->array = upb_array_new(arena, self->type_info.type);
613-
ObjectCache_Add(self->array, _self, arena);
614+
ObjectCache_Add(self->array, _self);
614615

615616
if (ary != Qnil) {
616617
if (!RB_TYPE_P(ary, T_ARRAY)) {

0 commit comments

Comments
 (0)