Skip to content

Commit 79855f8

Browse files
committed
add lifecycle metrics
motivation: startup/shutdown metrics are important for real-life services, for example spike in start metrics indicates a crash-loop changes: * add start and shutdown counters to ComponentLifecycle * add start and shutdown timers to report duration of startup and shutdown operations
1 parent 5577147 commit 79855f8

File tree

3 files changed

+97
-1
lines changed

3 files changed

+97
-1
lines changed

Sources/Lifecycle/Lifecycle.swift

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ import Darwin
1818
import Glibc
1919
#endif
2020
import Backtrace
21+
import CoreMetrics
2122
import Dispatch
2223
import Logging
23-
import Metrics
2424

2525
// MARK: - LifecycleTask
2626

@@ -469,6 +469,8 @@ public class ComponentLifecycle: LifecycleTask {
469469
// MARK: - private
470470

471471
private func _start(on queue: DispatchQueue, tasks: [LifecycleTask], callback: @escaping (Error?) -> Void) {
472+
Counter(label: "\(self.label).lifecycle.start").increment()
473+
472474
self.stateLock.withLock {
473475
guard case .idle = self.state else {
474476
preconditionFailure("invalid state, \(self.state)")
@@ -517,7 +519,9 @@ public class ComponentLifecycle: LifecycleTask {
517519
return callback(index, nil)
518520
}
519521
self.log("starting tasks [\(tasks[index].label)]")
522+
let startTime = DispatchTime.now()
520523
start { error in
524+
Timer(label: "\(self.label).\(tasks[index].label).lifecycle.start").recordNanoseconds(DispatchTime.now().uptimeNanoseconds - startTime.uptimeNanoseconds)
521525
if let error = error {
522526
self.log(level: .error, "failed to start [\(tasks[index].label)]: \(error)")
523527
return callback(index, error)
@@ -531,6 +535,8 @@ public class ComponentLifecycle: LifecycleTask {
531535
}
532536

533537
private func _shutdown(on queue: DispatchQueue, tasks: [LifecycleTask], callback: @escaping () -> Void) {
538+
Counter(label: "\(self.label).lifecycle.shutdown").increment()
539+
534540
self.stateLock.withLock {
535541
log("shutting down")
536542
self.state = .shuttingDown(queue)
@@ -555,8 +561,11 @@ public class ComponentLifecycle: LifecycleTask {
555561
if index >= tasks.count {
556562
return callback(errors)
557563
}
564+
558565
self.log("stopping tasks [\(tasks[index].label)]")
566+
let startTime = DispatchTime.now()
559567
shutdown { error in
568+
Timer(label: "\(self.label).\(tasks[index].label).lifecycle.shutdown").recordNanoseconds(DispatchTime.now().uptimeNanoseconds - startTime.uptimeNanoseconds)
560569
var errors = errors
561570
if let error = error {
562571
if errors == nil {

Tests/LifecycleTests/ComponentLifecycleTests+XCTest.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ extension ComponentLifecycleTests {
6666
("testStatefulNIO", testStatefulNIO),
6767
("testStatefulNIOStartFailure", testStatefulNIOStartFailure),
6868
("testStatefulNIOShutdownFailure", testStatefulNIOShutdownFailure),
69+
("testMetrics", testMetrics),
6970
]
7071
}
7172
}

Tests/LifecycleTests/ComponentLifecycleTests.swift

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
@testable import Lifecycle
1616
import LifecycleNIOCompat
17+
import Metrics
1718
import NIO
1819
import XCTest
1920

@@ -1247,4 +1248,89 @@ final class ComponentLifecycleTests: XCTestCase {
12471248

12481249
XCTAssertFalse(item.shutdown, "expected item to be shutdown")
12491250
}
1251+
1252+
func testMetrics() {
1253+
let metrics = TestMetrics()
1254+
MetricsSystem.bootstrap(metrics)
1255+
1256+
let items = (0 ..< 3).map { _ in GoodItem(id: UUID().uuidString, startDelay: 0.1, shutdownDelay: 0.1) }
1257+
let lifecycle = ComponentLifecycle(label: "test")
1258+
lifecycle.register(items)
1259+
lifecycle.start { startError in
1260+
XCTAssertNil(startError, "not expecting error")
1261+
lifecycle.shutdown { shutdownErrors in
1262+
XCTAssertNil(shutdownErrors, "not expecting error")
1263+
}
1264+
}
1265+
lifecycle.wait()
1266+
XCTAssertEqual(metrics.counters["\(lifecycle.label).lifecycle.start"]?.value, 1, "expected start counter to be 1")
1267+
XCTAssertEqual(metrics.counters["\(lifecycle.label).lifecycle.shutdown"]?.value, 1, "expected shutdown counter to be 1")
1268+
items.forEach { XCTAssertGreaterThan(metrics.timers["\(lifecycle.label).\($0.label).lifecycle.shutdown"]?.value ?? 0, 0, "expected start timer to be non-zero") }
1269+
items.forEach { XCTAssertGreaterThan(metrics.timers["\(lifecycle.label).\($0.label).lifecycle.shutdown"]?.value ?? 0, 0, "expected shutdown timer to be non-zero") }
1270+
}
1271+
}
1272+
1273+
// this is not thread-safe but okay for our purposes
1274+
class TestMetrics: MetricsFactory, RecorderHandler {
1275+
var counters = [String: TestCounter]()
1276+
var timers = [String: TestTimer]()
1277+
1278+
public init() {}
1279+
1280+
public func makeCounter(label: String, dimensions: [(String, String)]) -> CounterHandler {
1281+
let counter = TestCounter(label: label)
1282+
self.counters[label] = counter
1283+
return counter
1284+
}
1285+
1286+
public func makeRecorder(label: String, dimensions: [(String, String)], aggregate: Bool) -> RecorderHandler {
1287+
return self
1288+
}
1289+
1290+
public func makeTimer(label: String, dimensions: [(String, String)]) -> TimerHandler {
1291+
let timer = TestTimer(label: label)
1292+
self.timers[label] = timer
1293+
return timer
1294+
}
1295+
1296+
public func destroyCounter(_: CounterHandler) {}
1297+
public func destroyRecorder(_: RecorderHandler) {}
1298+
public func destroyTimer(_: TimerHandler) {}
1299+
1300+
public func record(_: Int64) {}
1301+
public func record(_: Double) {}
1302+
1303+
// this is not thread-safe but okay for our purposes
1304+
class TestCounter: CounterHandler {
1305+
let label: String
1306+
var value: Int64
1307+
1308+
init(label: String) {
1309+
self.label = label
1310+
self.value = 0
1311+
}
1312+
1313+
public func increment(by: Int64) {
1314+
self.value += by
1315+
}
1316+
1317+
public func reset() {
1318+
self.value = 0
1319+
}
1320+
}
1321+
1322+
// this is not thread-safe but okay for our purposes
1323+
class TestTimer: TimerHandler {
1324+
let label: String
1325+
var value: Int64
1326+
1327+
init(label: String) {
1328+
self.label = label
1329+
self.value = 0
1330+
}
1331+
1332+
public func recordNanoseconds(_ value: Int64) {
1333+
self.value = value
1334+
}
1335+
}
12501336
}

0 commit comments

Comments
 (0)