diff --git a/Cargo.Bazel.lock b/Cargo.Bazel.lock index 1beb8a6fa..4fa559206 100644 --- a/Cargo.Bazel.lock +++ b/Cargo.Bazel.lock @@ -1,5 +1,5 @@ { - "checksum": "a7ce8a7a81d746bfe5a4f95dcdae17c12d8081fd37edb194eb891727a2d29dc3", + "checksum": "8f4fbea21c1f9b827cf3e1a66029671a90a8e31f2f139ace1c0c5b9755f3ef30", "crates": { "actix-codec 0.5.2": { "name": "actix-codec", @@ -35996,6 +35996,10 @@ "id": "ic-base-types 0.9.0", "target": "ic_base_types" }, + { + "id": "ic-canisters-http-types 0.9.0", + "target": "ic_canisters_http_types" + }, { "id": "ic-cdk 0.17.1", "target": "ic_cdk" @@ -36012,6 +36016,14 @@ "id": "ic-management-canister-types 0.9.0", "target": "ic_management_canister_types" }, + { + "id": "ic-metrics-encoder 1.1.1", + "target": "ic_metrics_encoder" + }, + { + "id": "ic-nervous-system-common 0.9.0", + "target": "ic_nervous_system_common" + }, { "id": "ic-nns-common 0.9.0", "target": "ic_nns_common" @@ -62879,6 +62891,7 @@ "ic-base-types 0.9.0", "ic-canister-client 0.9.0", "ic-canister-client-sender 0.9.0", + "ic-canisters-http-types 0.9.0", "ic-cdk 0.17.1", "ic-cdk-macros 0.17.1", "ic-cdk-timers 0.11.0", @@ -62889,7 +62902,9 @@ "ic-interfaces-registry 0.9.0", "ic-management-canister-types 0.9.0", "ic-metrics 0.9.0", + "ic-metrics-encoder 1.1.1", "ic-nervous-system-clients 0.0.1", + "ic-nervous-system-common 0.9.0", "ic-nervous-system-root 0.9.0", "ic-nns-common 0.9.0", "ic-nns-constants 0.9.0", diff --git a/Cargo.lock b/Cargo.lock index 0cc5296c2..45aeb8dcf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6148,11 +6148,14 @@ dependencies = [ "candid", "futures", "ic-base-types", + "ic-canisters-http-types", "ic-cdk 0.17.1", "ic-cdk-macros 0.17.1", "ic-cdk-timers 0.11.0", "ic-interfaces-registry", "ic-management-canister-types", + "ic-metrics-encoder", + "ic-nervous-system-common", "ic-nns-common", "ic-nns-constants", "ic-protobuf", diff --git a/Cargo.toml b/Cargo.toml index 5395c19a4..d55462ff2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,8 @@ members = [ "rs/dre-canisters/node-provider-rewards", "rs/dre-canisters/node-provider-rewards-lib", "rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics", - "rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics-types", "rs/ic-observability/general-testnet-service-discovery", + "rs/dre-canisters/trustworthy-node-metrics/src/trustworthy-node-metrics-types", + "rs/ic-observability/general-testnet-service-discovery", ] resolver = "2" @@ -101,6 +102,7 @@ ic-base-types = { git = "https://github.com/dfinity/ic.git", rev = "c22d478ad97f ic-canister-client = { git = "https://github.com/dfinity/ic.git", rev = "c22d478ad97f82bd5d378862c9dd2f13afe2ece1" } ic-canister-client-sender = { git = "https://github.com/dfinity/ic.git", rev = "c22d478ad97f82bd5d378862c9dd2f13afe2ece1" } ic-canisters = { path = "rs/ic-canisters" } +ic-canisters-http-types = { git = "https://github.com/dfinity/ic.git", rev = "c22d478ad97f82bd5d378862c9dd2f13afe2ece1" } ic-nervous-system-common-test-keys = { git = "https://github.com/dfinity/ic.git", rev = "c22d478ad97f82bd5d378862c9dd2f13afe2ece1" } ic-cdk = { git = "https://github.com/dfinity/cdk-rs.git", rev = "929fd0b31e9ec69aad7cf6285df0394f25fe1815" } ic-config = { git = "https://github.com/dfinity/ic.git", rev = "c22d478ad97f82bd5d378862c9dd2f13afe2ece1" } @@ -111,6 +113,7 @@ ic-management-backend = { path = "rs/ic-management-backend" } ic-management-canister-types = { git = "https://github.com/dfinity/ic.git", rev = "c22d478ad97f82bd5d378862c9dd2f13afe2ece1" } ic-management-types = { path = "rs/ic-management-types" } ic-metrics = { git = "https://github.com/dfinity/ic.git", rev = "c22d478ad97f82bd5d378862c9dd2f13afe2ece1" } +ic-nervous-system-common = { git = "https://github.com/dfinity/ic.git", rev = "c22d478ad97f82bd5d378862c9dd2f13afe2ece1" } ic-nns-common = { git = "https://github.com/dfinity/ic.git", rev = "c22d478ad97f82bd5d378862c9dd2f13afe2ece1" } ic-nns-constants = { git = "https://github.com/dfinity/ic.git", rev = "c22d478ad97f82bd5d378862c9dd2f13afe2ece1" } ic-nns-governance = { git = "https://github.com/dfinity/ic.git", rev = "c22d478ad97f82bd5d378862c9dd2f13afe2ece1" } diff --git a/rs/dre-canisters/node-provider-rewards/Cargo.toml b/rs/dre-canisters/node-provider-rewards/Cargo.toml index 9599d1ebd..d06f74fb3 100644 --- a/rs/dre-canisters/node-provider-rewards/Cargo.toml +++ b/rs/dre-canisters/node-provider-rewards/Cargo.toml @@ -12,6 +12,7 @@ crate-type = ["cdylib"] [dependencies] async-trait = { workspace = true } +ic-canisters-http-types = { workspace = true } ic-cdk = { workspace = true } ic-cdk-timers = { workspace = true } ic-cdk-macros = { workspace = true } @@ -26,6 +27,8 @@ anyhow = { workspace = true } indexmap = { workspace = true } ic-base-types = { workspace = true } ic-management-canister-types = { workspace = true } +ic-metrics-encoder = { version = "1.1.1" } +ic-nervous-system-common = { workspace = true } ic-nns-common = { workspace = true } ic-protobuf = { workspace = true } ic-registry-transport = { workspace = true } diff --git a/rs/dre-canisters/node-provider-rewards/src/lib.rs b/rs/dre-canisters/node-provider-rewards/src/lib.rs index 9f126a5c6..d04093cf9 100644 --- a/rs/dre-canisters/node-provider-rewards/src/lib.rs +++ b/rs/dre-canisters/node-provider-rewards/src/lib.rs @@ -1,8 +1,12 @@ +use std::cell::RefCell; + use crate::canister_client::ICCanisterClient; use crate::metrics::MetricsManager; use crate::registry_store::CanisterRegistryStore; use crate::storage::{State, VM}; +use ic_canisters_http_types::{HttpRequest, HttpResponse, HttpResponseBuilder}; use ic_cdk_macros::*; +use ic_nervous_system_common::serve_metrics; mod canister_client; mod metrics; @@ -19,11 +23,42 @@ pub type RegistryStoreInstance = CanisterRegistryStore; pub type MetricsManagerInstance = MetricsManager; pub const IC_CANISTER_CLIENT: ICCanisterClient = ICCanisterClient {}; +#[derive(Default)] +pub struct PrometheusMetrics { + last_calculation_start: f64, + last_calculation_success: f64, + last_calculation_end: f64, +} + +impl PrometheusMetrics { + fn new() -> Self { + Default::default() + } + + fn mark_last_calculation_start(&mut self) { + self.last_calculation_start = (ic_cdk::api::time() / 1_000_000_000) as f64 + } + + fn mark_last_calculation_success(&mut self) { + self.last_calculation_end = (ic_cdk::api::time() / 1_000_000_000) as f64; + self.last_calculation_success = self.last_calculation_end + } + + fn mark_last_calculation_end(&mut self) { + self.last_calculation_end = (ic_cdk::api::time() / 1_000_000_000) as f64 + } +} + +thread_local! { + pub(crate) static PROMETHEUS_METRICS: RefCell = RefCell::new(PrometheusMetrics::new()); +} + /// Sync the local registry and subnets metrics with remote /// /// - Sync local registry stored from the remote registry canister /// - Sync subnets metrics from the management canister of the different subnets async fn sync_all() { + PROMETHEUS_METRICS.with_borrow_mut(|m| m.mark_last_calculation_start()); let registry_sync_result = RegistryStoreInstance::sync_registry_stored(&IC_CANISTER_CLIENT).await; match registry_sync_result { @@ -31,9 +66,13 @@ async fn sync_all() { let subnets_list = registry::subnets_list(); MetricsManagerInstance::update_subnets_metrics(&IC_CANISTER_CLIENT, subnets_list).await; + PROMETHEUS_METRICS.with_borrow_mut(|m| m.mark_last_calculation_success()); ic_cdk::println!("Successfully synced subnets metrics and local registry"); } - Err(e) => ic_cdk::println!("Failed to sync local registry: {:?}", e), + Err(e) => { + PROMETHEUS_METRICS.with_borrow_mut(|m| m.mark_last_calculation_end()); + ic_cdk::println!("Failed to sync local registry: {:?}", e) + } } } @@ -62,3 +101,57 @@ fn init() { fn post_upgrade() { setup_timers(); } + +pub fn encode_metrics(metrics: &PrometheusMetrics, w: &mut ic_metrics_encoder::MetricsEncoder>) -> std::io::Result<()> { + // General resource consumption. + w.encode_gauge( + "canister_stable_memory_size_bytes", + ic_nervous_system_common::stable_memory_size_bytes() as f64, + "Size of the stable memory allocated by this canister measured in bytes.", + )?; + w.encode_gauge( + "canister_total_memory_size_bytes", + ic_nervous_system_common::total_memory_size_bytes() as f64, + "Size of the total memory allocated by this canister measured in bytes.", + )?; + + // Calculation signals. + + // Calculation start timestamp seconds. + // + // * 0.0 -> first calculation not yet begun since canister started. + // * Any other positive number -> at least one calculation has started. + w.encode_gauge( + "last_calculation_start_timestamp_seconds", + metrics.last_calculation_start, + "Last time the calculation of metrics started. If this metric is present but zero, the first calculation during this canister's current execution has not yet begun or taken place.", + )?; + // Calculation finish timestamp seconds. + // * 0.0 -> first calculation not yet finished since canister started. + // * last_calculation_end_timestamp_seconds - last_calculation_start_timestamp_seconds > 0 -> last calculation finished, next calculation not started yet + // * last_calculation_end_timestamp_seconds - last_calculation_start_timestamp_seconds < 0 -> calculation ongoing, not finished yet + w.encode_gauge( + "last_calculation_end_timestamp_seconds", + metrics.last_calculation_end, + "Last time the calculation of metrics ended (successfully or with failure). If this metric is present but zero, the first calculation during this canister's current execution has not started or finished yet, either successfully or with errors. Else, subtracting this from the last calculation start should yield a positive value if the calculation ended (successfully or with errors), and a negative value if the calculation is still ongoing but has not finished.", + )?; + // Calculation success timestamp seconds. + // * 0.0 -> no calculation has yet succeeded since canister started. + // * last_calculation_end_timestamp_seconds == last_calculation_success_timestamp_seconds -> last calculation finished successfully + // * last_calculation_end_timestamp_seconds != last_calculation_success_timestamp_seconds -> last calculation failed + w.encode_gauge( + "last_calculation_success_timestamp_seconds", + metrics.last_calculation_success, + "Last time the calculation of metrics succeeded. If this metric is present but zero, no calculation has yet succeeded during this canister's current execution. Else, subtracting this number from last_calculation_start_timestamp_seconds gives a positive time delta when the last calculation succeeded, or a negative value if either the last calculation failed or a calculation is currently being performed. By definition, this and last_calculation_end_timestamp_seconds will be identical when the last calculation succeeded.", + )?; + + Ok(()) +} + +#[query(hidden = true, decoding_quota = 10000)] +fn http_request(request: HttpRequest) -> HttpResponse { + match request.path() { + "/metrics" => serve_metrics(|encoder| PROMETHEUS_METRICS.with(|m| encode_metrics(&m.borrow(), encoder))), + _ => HttpResponseBuilder::not_found().build(), + } +}