Skip to content

Commit 83a5626

Browse files
mtdowlingMichael Dowling
authored andcommitted
Simplify ShapeId caching
The previous implementation could cause a recursive update error when a shape ID is purged from the cache because it attempted to remove an element from the ConcurrentHashMap inside of a computeIfAbsent call. This updated approach is simpler and instead separates prelude shape IDs into a ConcurrentHashMap that just stops caching after 500 entries are in the cache (which does more than cover the shapes currently in the prelude), and uses a synchronized LinkedHashMap to function as an LRU cache for all other shape IDs. The performance of this approach vs what we had previously is essentially the same.
1 parent e04cca6 commit 83a5626

File tree

2 files changed

+56
-77
lines changed

2 files changed

+56
-77
lines changed

config/spotbugs/filter.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,4 +142,10 @@
142142
<Class name="software.amazon.smithy.rulesengine.traits.ContextIndex"/>
143143
<Bug pattern="NP_NULL_ON_SOME_PATH_FROM_RETURN_VALUE"/>
144144
</Match>
145+
146+
<!-- We don't care about the return value of putIfAbsent in this cache -->
147+
<Match>
148+
<Class name="software.amazon.smithy.model.shapes.ShapeId$ShapeIdFactory"/>
149+
<Bug pattern="RV_RETURN_VALUE_OF_PUTIFABSENT_IGNORED"/>
150+
</Match>
145151
</FindBugsFilter>

smithy-model/src/main/java/software/amazon/smithy/model/shapes/ShapeId.java

Lines changed: 50 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,13 @@
1515

1616
package software.amazon.smithy.model.shapes;
1717

18+
import java.util.Collections;
1819
import java.util.LinkedHashMap;
20+
import java.util.Map;
1921
import java.util.Objects;
2022
import java.util.Optional;
21-
import java.util.Queue;
2223
import java.util.concurrent.ConcurrentHashMap;
23-
import java.util.concurrent.ConcurrentLinkedQueue;
2424
import java.util.concurrent.ConcurrentMap;
25-
import java.util.concurrent.atomic.AtomicInteger;
2625
import software.amazon.smithy.model.loader.ParserUtils;
2726
import software.amazon.smithy.model.loader.Prelude;
2827
import software.amazon.smithy.model.node.Node;
@@ -49,7 +48,7 @@
4948
public final class ShapeId implements ToShapeId, Comparable<ShapeId> {
5049

5150
/** LRA (least recently added) cache of parsed shape IDs. */
52-
private static final ShapeIdFactory FACTORY = new ShapeIdFactory(8192);
51+
private static final ShapeIdFactory FACTORY = new ShapeIdFactory();
5352

5453
private final String namespace;
5554
private final String name;
@@ -407,89 +406,63 @@ public int hashCode() {
407406
/**
408407
* A least-recently used flyweight factory that creates shape IDs.
409408
*
410-
* <p>Shape IDs are cached in a ConcurrentHashMap. Entries are stored in
411-
* one of two ConcurrentLinkedQueues: a prioritized queue stores all
412-
* shapes IDs in the 'smithy.api' namespace, while a deprioritized queue
413-
* stores all shapes IDs in other namespaces. When the *estimated*
414-
* number of cached items exceeds {@code maxSize}, entries are removed
415-
* from the cache in the order of least recently added (not least
416-
* recently used) until the number of estimated entries is less than
417-
* {@code maxSize}. A simple LRU cache implementation that uses a
418-
* ConcurrentLinkedQueue would be much more expensive since it requires
419-
* that cache hits remove and then re-add a key to the queue (in micro
420-
* benchmarks, removing and adding to a ConcurrentLinkedQueue on a
421-
* cache hit was shown to be a fairly significant performance penalty).
422-
*
423-
* <p>When evicting, entries are removed by first draining the
424-
* deprioritized queue, and then, if the deprioritized queue is empty,
425-
* draining the prioritized queue. Draining the prioritized queue should
426-
* only be necessary to account for misbehaving inputs.
427-
*
428-
* <p>While not an optimal cache, this cache is the way it is because it's
429-
* meant to be concurrent, non-blocking, lightweight, dependency-free, and
430-
* should improve the performance of normal workflows of using Smithy.
431-
*
432-
* <p>Other alternatives considered were:
433-
*
434-
* <ol>
435-
* <li>Pull in Caffeine. While appealing, Caffeine's cache didn't
436-
* perform noticeably better than this cache _and_ it is a dependency
437-
* that we wouldn't otherwise need to take, and we've drawn a pretty
438-
* hard line in Smithy to be dependency-free. We could potentially
439-
* "vendor" parts of Caffeine, but it's a large library that doesn't
440-
* lend itself well towards that use case.</li>
441-
* <li>Just use an unbounded ConcurrentHashMap. While simple, this
442-
* approach is not good for long running processes where you can't
443-
* control the input</li>
444-
* <li>Use an {@code AtomicInteger} to cap the maximum size of a
445-
* ConcurrentHashMap, and when the maximum size is hit, stop caching
446-
* entries. This approach would work for most normal use cases but
447-
* would not work well for long running processes that potentially
448-
* load multiple models.</li>
449-
* <li>Use a synchronized {@link LinkedHashMap}. This approach is
450-
* just a bit slower than the chosen approach.</li>
451-
* </ol>
409+
* <p>Prelude IDs are stored separately from non-prelude IDs because we can make a reasonable estimate about the
410+
* size of the prelude and stop caching IDs when that size is exceeded. Prelude shapes are stored in a
411+
* ConcurrentHashMap with a bounded size. Once the size exceeds 500, then items are no longer stored in the cache.
412+
* Non-prelude shapes are stored in a bounded, synchronized LRU cache based on {@link LinkedHashMap}.
452413
*/
453414
private static final class ShapeIdFactory {
454-
private final int maxSize;
455-
private final Queue<String> priorityQueue = new ConcurrentLinkedQueue<>();
456-
private final Queue<String> deprioritizedQueue = new ConcurrentLinkedQueue<>();
457-
private final ConcurrentMap<String, ShapeId> map;
458-
private final AtomicInteger size = new AtomicInteger();
459-
460-
ShapeIdFactory(final int maxSize) {
461-
this.maxSize = maxSize;
462-
map = new ConcurrentHashMap<>(maxSize);
415+
private static final int NON_PRELUDE_MAX_SIZE = 8192;
416+
private static final int PRELUDE_MAX_SIZE = 500;
417+
private static final String PRELUDE_PREFIX = Prelude.NAMESPACE + '#';
418+
419+
private final Map<String, ShapeId> nonPreludeCache;
420+
private final ConcurrentMap<String, ShapeId> preludeCache;
421+
422+
ShapeIdFactory() {
423+
preludeCache = new ConcurrentHashMap<>(PRELUDE_MAX_SIZE);
424+
425+
// A simple LRU cache based on LinkedHashMap, wrapped in a synchronized map.
426+
nonPreludeCache = Collections.synchronizedMap(new LinkedHashMap<String, ShapeId>(
427+
NON_PRELUDE_MAX_SIZE + 1, 1.0f, true) {
428+
@Override
429+
protected boolean removeEldestEntry(Map.Entry<String, ShapeId> eldest) {
430+
return this.size() > NON_PRELUDE_MAX_SIZE;
431+
}
432+
});
463433
}
464434

465435
ShapeId create(final String key) {
466-
return map.computeIfAbsent(key, id -> {
467-
ShapeId value = buildShapeId(key);
436+
if (key.startsWith(PRELUDE_PREFIX)) {
437+
return getPreludeId(key);
438+
} else {
439+
return getNonPreludeId(key);
440+
}
441+
}
468442

469-
if (value.getNamespace().equals(Prelude.NAMESPACE)) {
470-
priorityQueue.offer(key);
471-
} else {
472-
deprioritizedQueue.offer(key);
473-
}
443+
private ShapeId getPreludeId(String key) {
444+
// computeIfAbsent isn't used here since we need to limit the cache size and creating multiple IDs
445+
// simultaneously isn't an issue.
446+
ShapeId result = preludeCache.get(key);
447+
if (result != null) {
448+
return result;
449+
}
474450

475-
// Evict items when the cache gets too big.
476-
if (size.incrementAndGet() > maxSize) {
477-
// Remove an element from the deprioritized queue if it isn't empty,
478-
// and if it is, then try to remove an element from the priority queue.
479-
String item = deprioritizedQueue.poll();
480-
if (item == null) {
481-
item = priorityQueue.poll();
482-
}
483-
484-
size.decrementAndGet();
485-
map.remove(item);
486-
}
451+
// The ID wasn't found so build it, and add it to the cache if the cache isn't too big already.
452+
result = buildShapeId(key);
487453

488-
return value;
489-
});
454+
if (preludeCache.size() <= PRELUDE_MAX_SIZE) {
455+
preludeCache.putIfAbsent(key, result);
456+
}
457+
458+
return result;
459+
}
460+
461+
private ShapeId getNonPreludeId(String key) {
462+
return nonPreludeCache.computeIfAbsent(key, ShapeIdFactory::buildShapeId);
490463
}
491464

492-
private ShapeId buildShapeId(String absoluteShapeId) {
465+
private static ShapeId buildShapeId(String absoluteShapeId) {
493466
int namespacePosition = absoluteShapeId.indexOf('#');
494467
if (namespacePosition <= 0 || namespacePosition == absoluteShapeId.length() - 1) {
495468
throw new ShapeIdSyntaxException("Invalid shape ID: " + absoluteShapeId);

0 commit comments

Comments
 (0)