Skip to content

Commit 773d548

Browse files
authored
Per tier pool stats (pmem#70)
1 parent 35db85b commit 773d548

File tree

12 files changed

+411
-119
lines changed

12 files changed

+411
-119
lines changed

cachelib/allocator/Cache.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -235,17 +235,18 @@ void CacheBase::updateGlobalCacheStats(const std::string& statPrefix) const {
235235
statPrefix + "cache.size.configured",
236236
memStats.configuredRamCacheSize + memStats.nvmCacheSize);
237237

238+
//TODO: add specific per-tier counters
238239
const auto stats = getGlobalCacheStats();
239240
counters_.updateDelta(statPrefix + "cache.alloc_attempts",
240-
stats.allocAttempts);
241+
std::accumulate(stats.allocAttempts.begin(), stats.allocAttempts.end(),0));
241242
counters_.updateDelta(statPrefix + "cache.eviction_attempts",
242-
stats.evictionAttempts);
243+
std::accumulate(stats.evictionAttempts.begin(),stats.evictionAttempts.end(),0));
243244
counters_.updateDelta(statPrefix + "cache.alloc_failures",
244-
stats.allocFailures);
245+
std::accumulate(stats.allocFailures.begin(),stats.allocFailures.end(),0));
245246
counters_.updateDelta(statPrefix + "cache.invalid_allocs",
246247
stats.invalidAllocs);
247248
const std::string ramEvictionKey = statPrefix + "ram.evictions";
248-
counters_.updateDelta(ramEvictionKey, stats.numEvictions);
249+
counters_.updateDelta(ramEvictionKey, std::accumulate(stats.numEvictions.begin(),stats.numEvictions.end(),0));
249250
// get the new delta to see if uploading any eviction age stats or lifetime
250251
// stats makes sense.
251252
uint64_t ramEvictionDelta = counters_.getDelta(ramEvictionKey);

cachelib/allocator/CacheAllocator-inl.h

Lines changed: 122 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -418,8 +418,7 @@ CacheAllocator<CacheTrait>::allocateInternalTier(TierId tid,
418418
util::RollingLatencyTracker rollTracker{
419419
(*stats_.classAllocLatency)[tid][pid][cid]};
420420

421-
// TODO: per-tier
422-
(*stats_.allocAttempts)[pid][cid].inc();
421+
(*stats_.allocAttempts)[tid][pid][cid].inc();
423422

424423
void* memory = allocator_[tid]->allocate(pid, requiredSize);
425424

@@ -445,12 +444,12 @@ CacheAllocator<CacheTrait>::allocateInternalTier(TierId tid,
445444
handle = acquire(new (memory) Item(key, size, creationTime, expiryTime));
446445
if (handle) {
447446
handle.markNascent();
448-
(*stats_.fragmentationSize)[pid][cid].add(
447+
(*stats_.fragmentationSize)[tid][pid][cid].add(
449448
util::getFragmentation(*this, *handle));
450449
}
451450

452451
} else { // failed to allocate memory.
453-
(*stats_.allocFailures)[pid][cid].inc(); // TODO: per-tier
452+
(*stats_.allocFailures)[tid][pid][cid].inc();
454453
// wake up rebalancer
455454
if (poolRebalancer_) {
456455
poolRebalancer_->wakeUp();
@@ -522,16 +521,14 @@ CacheAllocator<CacheTrait>::allocateChainedItemInternal(
522521
util::RollingLatencyTracker rollTracker{
523522
(*stats_.classAllocLatency)[tid][pid][cid]};
524523

525-
// TODO: per-tier? Right now stats_ are not used in any public periodic
526-
// worker
527-
(*stats_.allocAttempts)[pid][cid].inc();
524+
(*stats_.allocAttempts)[tid][pid][cid].inc();
528525

529526
void* memory = allocator_[tid]->allocate(pid, requiredSize);
530527
if (memory == nullptr) {
531528
memory = findEviction(tid, pid, cid);
532529
}
533530
if (memory == nullptr) {
534-
(*stats_.allocFailures)[pid][cid].inc();
531+
(*stats_.allocFailures)[tid][pid][cid].inc();
535532
return WriteHandle{};
536533
}
537534

@@ -543,7 +540,7 @@ CacheAllocator<CacheTrait>::allocateChainedItemInternal(
543540

544541
if (child) {
545542
child.markNascent();
546-
(*stats_.fragmentationSize)[pid][cid].add(
543+
(*stats_.fragmentationSize)[tid][pid][cid].add(
547544
util::getFragmentation(*this, *child));
548545
}
549546

@@ -858,7 +855,7 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
858855
stats_.perPoolEvictionAgeSecs_[allocInfo.poolId].trackValue(refreshTime);
859856
}
860857

861-
(*stats_.fragmentationSize)[allocInfo.poolId][allocInfo.classId].sub(
858+
(*stats_.fragmentationSize)[tid][allocInfo.poolId][allocInfo.classId].sub(
862859
util::getFragmentation(*this, it));
863860

864861
// Chained items can only end up in this place if the user has allocated
@@ -941,7 +938,7 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
941938

942939
const auto childInfo =
943940
allocator_[tid]->getAllocInfo(static_cast<const void*>(head));
944-
(*stats_.fragmentationSize)[childInfo.poolId][childInfo.classId].sub(
941+
(*stats_.fragmentationSize)[tid][childInfo.poolId][childInfo.classId].sub(
945942
util::getFragmentation(*this, *head));
946943

947944
removeFromMMContainer(*head);
@@ -1585,20 +1582,20 @@ CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
15851582
Item* candidate = nullptr;
15861583
typename NvmCacheT::PutToken token;
15871584

1588-
mmContainer.withEvictionIterator([this, pid, cid, &candidate, &toRecycle,
1585+
mmContainer.withEvictionIterator([this, tid, pid, cid, &candidate, &toRecycle,
15891586
&searchTries, &mmContainer, &lastTier,
15901587
&token](auto&& itr) {
15911588
if (!itr) {
15921589
++searchTries;
1593-
(*stats_.evictionAttempts)[pid][cid].inc();
1590+
(*stats_.evictionAttempts)[tid][pid][cid].inc();
15941591
return;
15951592
}
15961593

15971594
while ((config_.evictionSearchTries == 0 ||
15981595
config_.evictionSearchTries > searchTries) &&
15991596
itr) {
16001597
++searchTries;
1601-
(*stats_.evictionAttempts)[pid][cid].inc();
1598+
(*stats_.evictionAttempts)[tid][pid][cid].inc();
16021599

16031600
auto* toRecycle_ = itr.get();
16041601
auto* candidate_ =
@@ -1698,6 +1695,7 @@ CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
16981695
XDCHECK(!candidate->isAccessible());
16991696
XDCHECK(candidate->getKey() == evictedToNext->getKey());
17001697

1698+
(*stats_.numWritebacks)[tid][pid][cid].inc();
17011699
wakeUpWaiters(*candidate, std::move(evictedToNext));
17021700
}
17031701

@@ -1707,9 +1705,9 @@ CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
17071705
// NULL. If `ref` == 0 then it means that we are the last holder of
17081706
// that item.
17091707
if (candidate->hasChainedItem()) {
1710-
(*stats_.chainedItemEvictions)[pid][cid].inc();
1708+
(*stats_.chainedItemEvictions)[tid][pid][cid].inc();
17111709
} else {
1712-
(*stats_.regularItemEvictions)[pid][cid].inc();
1710+
(*stats_.regularItemEvictions)[tid][pid][cid].inc();
17131711
}
17141712

17151713
if (auto eventTracker = getEventTracker()) {
@@ -2304,7 +2302,7 @@ bool CacheAllocator<CacheTrait>::recordAccessInMMContainer(Item& item,
23042302
const auto tid = getTierId(item);
23052303
const auto allocInfo =
23062304
allocator_[tid]->getAllocInfo(static_cast<const void*>(&item));
2307-
(*stats_.cacheHits)[allocInfo.poolId][allocInfo.classId].inc();
2305+
(*stats_.cacheHits)[tid][allocInfo.poolId][allocInfo.classId].inc();
23082306

23092307
// track recently accessed items if needed
23102308
if (UNLIKELY(config_.trackRecentItemsForDump)) {
@@ -2773,6 +2771,8 @@ size_t CacheAllocator<CacheTrait>::getPoolSize(PoolId poolId) const {
27732771

27742772
template <typename CacheTrait>
27752773
PoolStats CacheAllocator<CacheTrait>::getPoolStats(PoolId poolId) const {
2774+
//this pool ref is just used to get class ids, which will be the
2775+
//same across tiers
27762776
const auto& pool = allocator_[currentTier()]->getPool(poolId);
27772777
const auto& allocSizes = pool.getAllocSizes();
27782778
auto mpStats = pool.getStats();
@@ -2791,24 +2791,42 @@ PoolStats CacheAllocator<CacheTrait>::getPoolStats(PoolId poolId) const {
27912791
// TODO export evictions, numItems etc from compact cache directly.
27922792
if (!isCompactCache) {
27932793
for (const ClassId cid : classIds) {
2794-
uint64_t classHits = (*stats_.cacheHits)[poolId][cid].get();
2795-
XDCHECK(mmContainers_[currentTier()][poolId][cid],
2796-
folly::sformat("Pid {}, Cid {} not initialized.", poolId, cid));
2794+
uint64_t allocAttempts, evictionAttempts, allocFailures,
2795+
fragmentationSize, classHits, chainedItemEvictions,
2796+
regularItemEvictions, numWritebacks = 0;
2797+
MMContainerStat mmContainerStats;
2798+
for (TierId tid = 0; tid < getNumTiers(); tid++) {
2799+
allocAttempts += (*stats_.allocAttempts)[tid][poolId][cid].get();
2800+
evictionAttempts += (*stats_.evictionAttempts)[tid][poolId][cid].get();
2801+
allocFailures += (*stats_.allocFailures)[tid][poolId][cid].get();
2802+
fragmentationSize += (*stats_.fragmentationSize)[tid][poolId][cid].get();
2803+
classHits += (*stats_.cacheHits)[tid][poolId][cid].get();
2804+
chainedItemEvictions += (*stats_.chainedItemEvictions)[tid][poolId][cid].get();
2805+
regularItemEvictions += (*stats_.regularItemEvictions)[tid][poolId][cid].get();
2806+
numWritebacks += (*stats_.numWritebacks)[tid][poolId][cid].get();
2807+
mmContainerStats += getMMContainerStat(tid, poolId, cid);
2808+
XDCHECK(mmContainers_[tid][poolId][cid],
2809+
folly::sformat("Tid {}, Pid {}, Cid {} not initialized.", tid, poolId, cid));
2810+
}
27972811
cacheStats.insert(
27982812
{cid,
2799-
{allocSizes[cid], (*stats_.allocAttempts)[poolId][cid].get(),
2800-
(*stats_.evictionAttempts)[poolId][cid].get(),
2801-
(*stats_.allocFailures)[poolId][cid].get(),
2802-
(*stats_.fragmentationSize)[poolId][cid].get(), classHits,
2803-
(*stats_.chainedItemEvictions)[poolId][cid].get(),
2804-
(*stats_.regularItemEvictions)[poolId][cid].get(),
2805-
getMMContainerStat(currentTier(), poolId, cid)}});
2813+
{allocSizes[cid],
2814+
allocAttempts,
2815+
evictionAttempts,
2816+
allocFailures,
2817+
fragmentationSize,
2818+
classHits,
2819+
chainedItemEvictions,
2820+
regularItemEvictions,
2821+
numWritebacks,
2822+
mmContainerStats}});
28062823
totalHits += classHits;
28072824
}
28082825
}
28092826

28102827
PoolStats ret;
28112828
ret.isCompactCache = isCompactCache;
2829+
//pool name is also shared among tiers
28122830
ret.poolName = allocator_[currentTier()]->getPoolName(poolId);
28132831
ret.poolSize = pool.getPoolSize();
28142832
ret.poolUsableSize = pool.getPoolUsableSize();
@@ -2821,6 +2839,59 @@ PoolStats CacheAllocator<CacheTrait>::getPoolStats(PoolId poolId) const {
28212839
return ret;
28222840
}
28232841

2842+
template <typename CacheTrait>
2843+
PoolStats CacheAllocator<CacheTrait>::getPoolStats(TierId tid, PoolId poolId) const {
2844+
const auto& pool = allocator_[tid]->getPool(poolId);
2845+
const auto& allocSizes = pool.getAllocSizes();
2846+
auto mpStats = pool.getStats();
2847+
const auto& classIds = mpStats.classIds;
2848+
2849+
// check if this is a compact cache.
2850+
bool isCompactCache = false;
2851+
{
2852+
folly::SharedMutex::ReadHolder lock(compactCachePoolsLock_);
2853+
isCompactCache = isCompactCachePool_[poolId];
2854+
}
2855+
2856+
std::unordered_map<ClassId, CacheStat> cacheStats;
2857+
uint64_t totalHits = 0;
2858+
// cacheStats is only menaningful for pools that are not compact caches.
2859+
// TODO export evictions, numItems etc from compact cache directly.
2860+
if (!isCompactCache) {
2861+
for (const ClassId cid : classIds) {
2862+
uint64_t classHits = (*stats_.cacheHits)[tid][poolId][cid].get();
2863+
XDCHECK(mmContainers_[tid][poolId][cid],
2864+
folly::sformat("Tid {}, Pid {}, Cid {} not initialized.", tid, poolId, cid));
2865+
cacheStats.insert(
2866+
{cid,
2867+
{allocSizes[cid],
2868+
(*stats_.allocAttempts)[tid][poolId][cid].get(),
2869+
(*stats_.evictionAttempts)[tid][poolId][cid].get(),
2870+
(*stats_.allocFailures)[tid][poolId][cid].get(),
2871+
(*stats_.fragmentationSize)[tid][poolId][cid].get(),
2872+
classHits,
2873+
(*stats_.chainedItemEvictions)[tid][poolId][cid].get(),
2874+
(*stats_.regularItemEvictions)[tid][poolId][cid].get(),
2875+
(*stats_.numWritebacks)[tid][poolId][cid].get(),
2876+
getMMContainerStat(tid, poolId, cid)}});
2877+
totalHits += classHits;
2878+
}
2879+
}
2880+
2881+
PoolStats ret;
2882+
ret.isCompactCache = isCompactCache;
2883+
ret.poolName = allocator_[tid]->getPoolName(poolId);
2884+
ret.poolSize = pool.getPoolSize();
2885+
ret.poolUsableSize = pool.getPoolUsableSize();
2886+
ret.poolAdvisedSize = pool.getPoolAdvisedSize();
2887+
ret.cacheStats = std::move(cacheStats);
2888+
ret.mpStats = std::move(mpStats);
2889+
ret.numPoolGetHits = totalHits;
2890+
ret.evictionAgeSecs = stats_.perPoolEvictionAgeSecs_[poolId].estimate();
2891+
2892+
return ret;
2893+
}
2894+
28242895
template <typename CacheTrait>
28252896
ACStats CacheAllocator<CacheTrait>::getACStats(TierId tid,
28262897
PoolId poolId,
@@ -3072,7 +3143,7 @@ bool CacheAllocator<CacheTrait>::moveForSlabRelease(
30723143
const auto allocInfo = allocator_[tid]->getAllocInfo(oldItem.getMemory());
30733144
allocator_[tid]->free(&oldItem);
30743145

3075-
(*stats_.fragmentationSize)[allocInfo.poolId][allocInfo.classId].sub(
3146+
(*stats_.fragmentationSize)[tid][allocInfo.poolId][allocInfo.classId].sub(
30763147
util::getFragmentation(*this, oldItem));
30773148
stats_.numMoveSuccesses.inc();
30783149
return true;
@@ -3351,12 +3422,13 @@ void CacheAllocator<CacheTrait>::evictForSlabRelease(
33513422
nvmCache_->put(*evicted, std::move(token));
33523423
}
33533424

3425+
const auto tid = getTierId(*evicted);
33543426
const auto allocInfo =
3355-
allocator_[getTierId(*evicted)]->getAllocInfo(static_cast<const void*>(evicted));
3427+
allocator_[tid]->getAllocInfo(static_cast<const void*>(evicted));
33563428
if (evicted->hasChainedItem()) {
3357-
(*stats_.chainedItemEvictions)[allocInfo.poolId][allocInfo.classId].inc();
3429+
(*stats_.chainedItemEvictions)[tid][allocInfo.poolId][allocInfo.classId].inc();
33583430
} else {
3359-
(*stats_.regularItemEvictions)[allocInfo.poolId][allocInfo.classId].inc();
3431+
(*stats_.regularItemEvictions)[tid][allocInfo.poolId][allocInfo.classId].inc();
33603432
}
33613433

33623434
stats_.numEvictionSuccesses.inc();
@@ -3579,8 +3651,13 @@ folly::IOBufQueue CacheAllocator<CacheTrait>::saveStateToIOBuf() {
35793651
for (PoolId pid : pools) {
35803652
for (unsigned int cid = 0; cid < (*stats_.fragmentationSize)[pid].size();
35813653
++cid) {
3654+
uint64_t fragmentationSize = 0;
3655+
for (TierId tid = 0; tid < getNumTiers(); tid++) {
3656+
fragmentationSize += (*stats_.fragmentationSize)[tid][pid][cid].get();
3657+
}
35823658
metadata_.fragmentationSize()[pid][static_cast<ClassId>(cid)] =
3583-
(*stats_.fragmentationSize)[pid][cid].get();
3659+
fragmentationSize;
3660+
35843661
}
35853662
if (isCompactCachePool_[pid]) {
35863663
metadata_.compactCachePools()->push_back(pid);
@@ -3826,8 +3903,19 @@ void CacheAllocator<CacheTrait>::initStats() {
38263903
// deserialize the fragmentation size of each thread.
38273904
for (const auto& pid : *metadata_.fragmentationSize()) {
38283905
for (const auto& cid : pid.second) {
3829-
(*stats_.fragmentationSize)[pid.first][cid.first].set(
3830-
static_cast<uint64_t>(cid.second));
3906+
//in multi-tier we serialized as the sum - no way
3907+
//to get back so just divide the two for now
3908+
//TODO: proper multi-tier serialization
3909+
uint64_t total = static_cast<uint64_t>(cid.second);
3910+
uint64_t part = total / getNumTiers();
3911+
uint64_t sum = 0;
3912+
for (TierId tid = 1; tid < getNumTiers(); tid++) {
3913+
(*stats_.fragmentationSize)[tid][pid.first][cid.first].set(part);
3914+
sum += part;
3915+
}
3916+
uint64_t leftover = total - sum;
3917+
(*stats_.fragmentationSize)[0][pid.first][cid.first].set(leftover);
3918+
38313919
}
38323920
}
38333921

cachelib/allocator/CacheAllocator.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,6 +1233,8 @@ class CacheAllocator : public CacheBase {
12331233

12341234
// pool stats by pool id
12351235
PoolStats getPoolStats(PoolId pid) const override final;
1236+
// pool stats by tier id and pool id
1237+
PoolStats getPoolStats(TierId tid, PoolId pid) const;
12361238

12371239
// This can be expensive so it is not part of PoolStats
12381240
PoolEvictionAgeStats getPoolEvictionAgeStats(
@@ -2015,9 +2017,9 @@ auto& mmContainer = getMMContainer(tid, pid, cid);
20152017
XDCHECK(!candidate->isMarkedForEviction() && !candidate->isMoving());
20162018

20172019
if (candidate->hasChainedItem()) {
2018-
(*stats_.chainedItemEvictions)[pid][cid].inc();
2020+
(*stats_.chainedItemEvictions)[tid][pid][cid].inc();
20192021
} else {
2020-
(*stats_.regularItemEvictions)[pid][cid].inc();
2022+
(*stats_.regularItemEvictions)[tid][pid][cid].inc();
20212023
}
20222024

20232025
// it's safe to recycle the item here as there are no more

0 commit comments

Comments
 (0)