@@ -32,62 +32,31 @@ final class CallUtils {
3232
3333 private CallUtils () {} // uninstantiable
3434
35- /**
36- * Returns the {@link ClassDescriptor} for the given {@link StarlarkSemantics} and {@link Class}.
37- *
38- * <p>This method is a hotspot! It's called on every function call and field access. A single
39- * `bazel build` invocation can make tens or even hundreds of millions of calls to this method.
40- */
41- private static ClassDescriptor getClassDescriptor (StarlarkSemantics semantics , Class <?> clazz ) {
42- if (clazz == String .class ) {
43- clazz = StringModule .class ;
44- }
35+ /** A map for obtaining a {@link BuiltinManager} from a {@link StarlarkSemantics}. */
36+ // Historically, this code used to have a big map from (StarlarkSemantics, Class) pairs to
37+ // ClassDescriptors. This caused unnecessary GC churn and method call overhead for the dedicated
38+ // tuple objects, which became observable at scale. It was subsequently rewritten to be a
39+ // double-layer map from Semantics to Class to ClassDescriptor, which optimized for the common
40+ // case of few (typically just one) StarlarkSemantics instances. The inner map was then abstracted
41+ // into BuiltinManager.
42+ //
43+ // Avoid ConcurrentHashMap#computeIfAbsent because it is not reentrant: If a ClassDescriptor is
44+ // looked up before Starlark.UNIVERSE is initialized then the computation will re-enter the cache
45+ // and have a cycle; see b/161479826 for history.
46+ // TODO(bazel-team): Does the above cycle concern still exist?
47+ private static final ConcurrentHashMap <StarlarkSemantics , BuiltinManager > managerForSemantics =
48+ new ConcurrentHashMap <>();
4549
46- // We use two layers of caches, with the first layer being keyed by StarlarkSemantics and the
47- // second layer being keyed by Class. This optimizes for the common case of very few different
48- // StarlarkSemantics instances (typically, one) being in play. In contrast, if we used a single
49- // cache data structure then we'd need to use a dedicated tuple object for the keys of that data
50- // structure, and the GC churn and method call overhead become meaningful at scale.
51- //
52- // We implement each cache ourselves using CHM#get and CHM#putIfAbsent. We don't use
53- // CHM#computeIfAbsent since it is not reentrant: If #getClassDescriptor is called
54- // before Starlark.UNIVERSE is initialized then the computation will re-enter the cache and have
55- // a cycle; see b/161479826 for history.
56- // TODO(bazel-team): Maybe the above cycle concern doesn't exist now that CallUtils is private.
57- ConcurrentHashMap <Class <?>, ClassDescriptor > classDescriptorCache =
58- classDescriptorCachesBySemantics .get (semantics .getClassDescriptorCacheKey ());
59- if (classDescriptorCache == null ) {
60- classDescriptorCache =
61- new ConcurrentHashMap <>(
62- // In May 2023, typical Bazel usage results in ~150 entries in this cache. Therefore
63- // we presize the CHM accordingly to reduce the chance two entries use the same hash
64- // bucket (in May 2023 this strategy was completely effective!). We used to use the
65- // default capacity, and then the CHM would get dynamically resized to have 256
66- // buckets, many of which had at least 2 entries which is suboptimal for such a hot
67- // data structure.
68- // TODO(bazel-team): Better would be to precompute the entire lookup table on server
69- // startup (best would be to do this at compile time via an annotation processor),
70- // rather than rely on it getting built-up dynamically as Starlark code gets
71- // evaluated over the lifetime of the server. This way there are no concurrency
72- // concerns, so we can use a more efficient data structure that doesn't need to
73- // handle concurrent writes.
74- /* initialCapacity= */ 1000 );
75- ConcurrentHashMap <Class <?>, ClassDescriptor > prev =
76- classDescriptorCachesBySemantics .putIfAbsent (semantics , classDescriptorCache );
50+ static BuiltinManager getBuiltinManager (StarlarkSemantics semantics ) {
51+ BuiltinManager manager = managerForSemantics .get (semantics .getBuiltinManagerCacheKey ());
52+ if (manager == null ) {
53+ manager = new BuiltinManager (semantics );
54+ BuiltinManager prev = managerForSemantics .putIfAbsent (semantics , manager );
7755 if (prev != null ) {
78- classDescriptorCache = prev ; // first thread wins
56+ manager = prev ; // first thread wins
7957 }
8058 }
81-
82- ClassDescriptor classDescriptor = classDescriptorCache .get (clazz );
83- if (classDescriptor == null ) {
84- classDescriptor = buildClassDescriptor (semantics , clazz );
85- ClassDescriptor prev = classDescriptorCache .putIfAbsent (clazz , classDescriptor );
86- if (prev != null ) {
87- classDescriptor = prev ; // first thread wins
88- }
89- }
90- return classDescriptor ;
59+ return manager ;
9160 }
9261
9362 /**
@@ -137,10 +106,102 @@ private static class ClassDescriptor {
137106 @ Nullable TypeConstructor typeConstructor ;
138107 }
139108
140- /** Two-layer cache of {@link #buildClassDescriptor}, managed by {@link #getClassDescriptor}. */
141- private static final ConcurrentHashMap <
142- StarlarkSemantics , ConcurrentHashMap <Class <?>, ClassDescriptor >>
143- classDescriptorCachesBySemantics = new ConcurrentHashMap <>();
109+ /**
110+ * A manager for obtaining descriptors for native-defined Starlark objects and methods, under a
111+ * specific {@code StarlarkSemantics}.
112+ */
113+ static class BuiltinManager {
114+
115+ private final StarlarkSemantics semantics ;
116+
117+ // In May 2023, typical Bazel usage results in ~150 entries in this cache. Therefore
118+ // we presize the CHM accordingly to reduce the chance two entries use the same hash
119+ // bucket (in May 2023 this strategy was completely effective!). We used to use the
120+ // default capacity, and then the CHM would get dynamically resized to have 256
121+ // buckets, many of which had at least 2 entries which is suboptimal for such a hot
122+ // data structure.
123+ // TODO(bazel-team): Better would be to precompute the entire lookup table on server
124+ // startup (best would be to do this at compile time via an annotation processor),
125+ // rather than rely on it getting built-up dynamically as Starlark code gets
126+ // evaluated over the lifetime of the server. This way there are no concurrency
127+ // concerns, so we can use a more efficient data structure that doesn't need to
128+ // handle concurrent writes.
129+ private final ConcurrentHashMap <Class <?>, ClassDescriptor > classDescriptorCache =
130+ new ConcurrentHashMap <>(/* initialCapacity= */ 1000 );
131+
132+ private BuiltinManager (StarlarkSemantics semantics ) {
133+ this .semantics = semantics ;
134+ }
135+
136+ /**
137+ * Returns the {@link ClassDescriptor} for the given {@link StarlarkSemantics} and {@link
138+ * Class}.
139+ *
140+ * <p>This method is a hotspot! It's called on every function call and field access. A single
141+ * `bazel build` invocation can make tens or even hundreds of millions of calls to this method.
142+ */
143+ private ClassDescriptor getClassDescriptor (Class <?> clazz ) {
144+ if (clazz == String .class ) {
145+ clazz = StringModule .class ;
146+ }
147+
148+ ClassDescriptor classDescriptor = classDescriptorCache .get (clazz );
149+ if (classDescriptor == null ) {
150+ classDescriptor = buildClassDescriptor (semantics , clazz );
151+ ClassDescriptor prev = classDescriptorCache .putIfAbsent (clazz , classDescriptor );
152+ if (prev != null ) {
153+ classDescriptor = prev ; // first thread wins
154+ }
155+ }
156+ return classDescriptor ;
157+ }
158+
159+ /**
160+ * Returns the type constructor associated with the given Java class under a given {@code
161+ * StarlarkSemantics}, or null if there is none.
162+ *
163+ * <p>An example would be getting the type constructor for the {@code list} type from the class
164+ * {@code StarlarkList}.
165+ *
166+ * <p>The returned constructor has complete type information about the available Starlark
167+ * methods of the class.
168+ */
169+ @ Nullable
170+ TypeConstructor getTypeConstructor (Class <?> clazz ) {
171+ return getClassDescriptor (clazz ).typeConstructor ;
172+ }
173+
174+ /**
175+ * Returns the set of all StarlarkMethod-annotated Java methods (excluding the self-call method)
176+ * of the specified class.
177+ */
178+ ImmutableMap <String , MethodDescriptor > getAnnotatedMethods (Class <?> objClass ) {
179+ return getClassDescriptor (objClass ).methods ;
180+ }
181+
182+ /**
183+ * Returns a {@link MethodDescriptor} object representing a function which calls the selfCall
184+ * java method of the given object (the {@link StarlarkMethod} method with {@link
185+ * StarlarkMethod#selfCall()} set to true). Returns null if no such method exists.
186+ */
187+ @ Nullable
188+ MethodDescriptor getSelfCallMethodDescriptor (Class <?> objClass ) {
189+ return getClassDescriptor (objClass ).selfCall ;
190+ }
191+
192+ /**
193+ * Returns a {@code selfCall=true} method for the given class under the given Starlark
194+ * semantics, or null if no such method exists.
195+ */
196+ @ Nullable
197+ Method getSelfCallMethod (Class <?> objClass ) {
198+ MethodDescriptor descriptor = getClassDescriptor (objClass ).selfCall ;
199+ if (descriptor == null ) {
200+ return null ;
201+ }
202+ return descriptor .getMethod ();
203+ }
204+ }
144205
145206 private static ClassDescriptor buildClassDescriptor (StarlarkSemantics semantics , Class <?> clazz ) {
146207 MethodDescriptor selfCall = null ;
@@ -246,52 +307,4 @@ private static TypeConstructor getBaseTypeConstructor(Class<?> clazz) {
246307 String .format ("Error invoking %s#getBaseTypeConstructor" , clazz .getName ()), e );
247308 }
248309 }
249-
250- /**
251- * Returns the type constructor associated with the given Java class under a given {@code
252- * StarlarkSemantics}, or null if there is none.
253- *
254- * <p>An example would be getting the type constructor for the {@code list} type from the class
255- * {@code StarlarkList}.
256- *
257- * <p>The returned constructor has complete type information about the available Starlark methods
258- * of the class.
259- */
260- @ Nullable
261- static TypeConstructor getTypeConstructor (StarlarkSemantics semantics , Class <?> clazz ) {
262- return getClassDescriptor (semantics , clazz ).typeConstructor ;
263- }
264-
265- /**
266- * Returns the set of all StarlarkMethod-annotated Java methods (excluding the self-call method)
267- * of the specified class.
268- */
269- static ImmutableMap <String , MethodDescriptor > getAnnotatedMethods (
270- StarlarkSemantics semantics , Class <?> objClass ) {
271- return getClassDescriptor (semantics , objClass ).methods ;
272- }
273-
274- /**
275- * Returns a {@link MethodDescriptor} object representing a function which calls the selfCall java
276- * method of the given object (the {@link StarlarkMethod} method with {@link
277- * StarlarkMethod#selfCall()} set to true). Returns null if no such method exists.
278- */
279- @ Nullable
280- static MethodDescriptor getSelfCallMethodDescriptor (
281- StarlarkSemantics semantics , Class <?> objClass ) {
282- return getClassDescriptor (semantics , objClass ).selfCall ;
283- }
284-
285- /**
286- * Returns a {@code selfCall=true} method for the given class under the given Starlark semantics,
287- * or null if no such method exists.
288- */
289- @ Nullable
290- static Method getSelfCallMethod (StarlarkSemantics semantics , Class <?> objClass ) {
291- MethodDescriptor descriptor = getClassDescriptor (semantics , objClass ).selfCall ;
292- if (descriptor == null ) {
293- return null ;
294- }
295- return descriptor .getMethod ();
296- }
297310}
0 commit comments