Skip to content

Commit e32adbf

Browse files
authored
Support continuous live-check (#1066)
* support continuous live-check * changelog * test added for disabled stats * refactor stats * Tidy up * fmt * readme
1 parent 6ba8686 commit e32adbf

File tree

8 files changed

+788
-460
lines changed

8 files changed

+788
-460
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ All notable changes to this project will be documented in this file.
1515
- Add Log support for emit and live-check ([#1042](https://github.com/open-telemetry/weaver/pull/1042) by @jerbly)
1616
- Add OTLP log emission for policy findings in live-check. Whenever a PolicyFinding is created, a log_record is emitted to your configured OTLP endpoint. ([#1045](https://github.com/open-telemetry/weaver/pull/1045) by @jerbly)
1717
- Deprecate `weaver registry search` command. This command is not compatible with V2 schema and will be removed in a future version. Users should search the generated documentation instead. ([#1057](https://github.com/open-telemetry/weaver/pull/1057) by @jerbly)
18+
- Add support for continuous live-check sessions with `--inactivity-timeout=0`, `--output=none` and `--no-stats` ([#1066](https://github.com/open-telemetry/weaver/pull/1066) by @jerbly)
1819

1920
# [0.19.0] - 2025-11-04
2021

crates/weaver_live_check/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,14 @@ Each policy finding is emitted as an OTLP log record with the following structur
274274
- `weaver.finding.signal_type`: Signal type (e.g., "span", "event", "metric")
275275
- `weaver.finding.signal_name`: Signal name (e.g., event name or metric name)
276276

277+
## Continuous Running Sessions
278+
279+
For long-running live-check sessions (for example, in a staging environment), there are a few settings to consider:
280+
281+
- `--inactivity-timeout=0`: If this is set to zero then weaver never times out.
282+
- `--output=none`: If this is set to none then no template engine is loaded and nothing is rendered out to the console or files.
283+
- `--no-stats`: If this is set then statistics are not accumulated over the running time of live-check which has the potential to otherwise store a lot of info in memory.
284+
277285
## Usage examples
278286

279287
Default operation. Receive OTLP requests and output findings as it arrives. Useful for debugging an application to check for telemetry problems as you step through your code. (ctrl-c to exit, or wait for the timeout)

crates/weaver_live_check/src/lib.rs

Lines changed: 7 additions & 256 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
//! This crate provides the weaver_live_check library
44
5-
use std::{collections::HashMap, rc::Rc};
5+
use std::rc::Rc;
66

77
use live_checker::LiveChecker;
88
use miette::Diagnostic;
@@ -23,10 +23,7 @@ use weaver_forge::{
2323
v2::registry::ForgeResolvedRegistry,
2424
};
2525
use weaver_semconv::{
26-
attribute::AttributeType,
27-
deprecated::Deprecated,
28-
group::{GroupType, InstrumentSpec},
29-
stability::Stability,
26+
attribute::AttributeType, deprecated::Deprecated, group::InstrumentSpec, stability::Stability,
3027
};
3128

3229
/// Advisors for live checks
@@ -49,11 +46,16 @@ pub mod sample_metric;
4946
pub mod sample_resource;
5047
/// The intermediary format for spans
5148
pub mod sample_span;
49+
/// Statistics tracking for live check reports
50+
mod stats;
5251
/// An ingester that reads attribute names from a text file.
5352
pub mod text_file_ingester;
5453
/// An ingester that reads attribute names from standard input.
5554
pub mod text_stdin_ingester;
5655

56+
// Re-export statistics types from stats module
57+
pub use stats::{CumulativeStatistics, DisabledStatistics, LiveCheckStatistics};
58+
5759
/// Missing Attribute advice type
5860
pub const MISSING_ATTRIBUTE_ADVICE_TYPE: &str = "missing_attribute";
5961
/// Template Attribute advice type
@@ -461,257 +463,6 @@ pub struct LiveCheckReport {
461463
pub statistics: LiveCheckStatistics,
462464
}
463465

464-
/// The statistics for a live check report
465-
#[derive(Debug, Clone, PartialEq, Serialize)]
466-
pub struct LiveCheckStatistics {
467-
/// The total number of sample entities
468-
pub total_entities: usize,
469-
/// The total number of sample entities by type
470-
pub total_entities_by_type: HashMap<String, usize>,
471-
/// The total number of advisories
472-
pub total_advisories: usize,
473-
/// The number of each advice level
474-
pub advice_level_counts: HashMap<FindingLevel, usize>,
475-
/// The number of entities with each highest advice level
476-
pub highest_advice_level_counts: HashMap<FindingLevel, usize>,
477-
/// The number of entities with no advice
478-
pub no_advice_count: usize,
479-
/// The number of entities with each advice type
480-
pub advice_type_counts: HashMap<String, usize>,
481-
/// The number of entities with each advice message
482-
pub advice_message_counts: HashMap<String, usize>,
483-
/// The number of each attribute seen from the registry
484-
pub seen_registry_attributes: HashMap<String, usize>,
485-
/// The number of each non-registry attribute seen
486-
pub seen_non_registry_attributes: HashMap<String, usize>,
487-
/// The number of each metric seen from the registry
488-
pub seen_registry_metrics: HashMap<String, usize>,
489-
/// The number of each non-registry metric seen
490-
pub seen_non_registry_metrics: HashMap<String, usize>,
491-
/// The number of each event seen from the registry
492-
pub seen_registry_events: HashMap<String, usize>,
493-
/// The number of each non-registry event seen
494-
pub seen_non_registry_events: HashMap<String, usize>,
495-
/// Fraction of the registry covered by the attributes, metrics, and events
496-
pub registry_coverage: f32,
497-
}
498-
499-
impl LiveCheckStatistics {
500-
/// Create a new empty LiveCheckStatistics
501-
#[must_use]
502-
pub fn new(registry: &VersionedRegistry) -> Self {
503-
let mut seen_attributes = HashMap::new();
504-
let mut seen_metrics = HashMap::new();
505-
let mut seen_events = HashMap::new();
506-
match registry {
507-
VersionedRegistry::V1(reg) => {
508-
for group in &reg.groups {
509-
for attribute in &group.attributes {
510-
if attribute.deprecated.is_none() {
511-
let _ = seen_attributes.insert(attribute.name.clone(), 0);
512-
}
513-
}
514-
if group.r#type == GroupType::Metric && group.deprecated.is_none() {
515-
if let Some(metric_name) = &group.metric_name {
516-
let _ = seen_metrics.insert(metric_name.clone(), 0);
517-
}
518-
}
519-
if group.r#type == GroupType::Event && group.deprecated.is_none() {
520-
if let Some(event_name) = &group.name {
521-
let _ = seen_events.insert(event_name.clone(), 0);
522-
}
523-
}
524-
}
525-
}
526-
VersionedRegistry::V2(reg) => {
527-
for attribute in &reg.attributes {
528-
if attribute.common.deprecated.is_none() {
529-
let _ = seen_attributes.insert(attribute.key.clone(), 0);
530-
}
531-
}
532-
for metric in &reg.signals.metrics {
533-
if metric.common.deprecated.is_none() {
534-
let _ = seen_metrics.insert(metric.name.to_string(), 0);
535-
}
536-
}
537-
for event in &reg.signals.events {
538-
if event.common.deprecated.is_none() {
539-
let _ = seen_events.insert(event.name.to_string(), 0);
540-
}
541-
}
542-
}
543-
}
544-
LiveCheckStatistics {
545-
total_entities: 0,
546-
total_entities_by_type: HashMap::new(),
547-
total_advisories: 0,
548-
advice_level_counts: HashMap::new(),
549-
highest_advice_level_counts: HashMap::new(),
550-
no_advice_count: 0,
551-
advice_type_counts: HashMap::new(),
552-
advice_message_counts: HashMap::new(),
553-
seen_registry_attributes: seen_attributes,
554-
seen_non_registry_attributes: HashMap::new(),
555-
seen_registry_metrics: seen_metrics,
556-
seen_non_registry_metrics: HashMap::new(),
557-
seen_registry_events: seen_events,
558-
seen_non_registry_events: HashMap::new(),
559-
registry_coverage: 0.0,
560-
}
561-
}
562-
563-
/// Add a live check result to the stats
564-
pub fn maybe_add_live_check_result(&mut self, live_check_result: Option<&LiveCheckResult>) {
565-
if let Some(result) = live_check_result {
566-
for advice in &result.all_advice {
567-
// Count of total advisories
568-
self.add_advice(advice);
569-
}
570-
// Count of samples with the highest advice level
571-
if let Some(highest_advice_level) = &result.highest_advice_level {
572-
self.add_highest_advice_level(highest_advice_level);
573-
}
574-
575-
// Count of samples with no advice
576-
if result.all_advice.is_empty() {
577-
self.inc_no_advice_count();
578-
}
579-
} else {
580-
// Count of samples with no advice
581-
self.inc_no_advice_count();
582-
}
583-
}
584-
585-
/// Increment the total number of entities by type
586-
pub fn inc_entity_count(&mut self, entity_type: &str) {
587-
*self
588-
.total_entities_by_type
589-
.entry(entity_type.to_owned())
590-
.or_insert(0) += 1;
591-
self.total_entities += 1;
592-
}
593-
594-
/// Add an advice to the statistics
595-
fn add_advice(&mut self, advice: &PolicyFinding) {
596-
*self
597-
.advice_level_counts
598-
.entry(advice.level.clone())
599-
.or_insert(0) += 1;
600-
*self
601-
.advice_type_counts
602-
.entry(advice.id.clone())
603-
.or_insert(0) += 1;
604-
*self
605-
.advice_message_counts
606-
.entry(advice.message.clone())
607-
.or_insert(0) += 1;
608-
self.total_advisories += 1;
609-
}
610-
611-
/// Add a highest advice level to the statistics
612-
fn add_highest_advice_level(&mut self, advice: &FindingLevel) {
613-
*self
614-
.highest_advice_level_counts
615-
.entry(advice.clone())
616-
.or_insert(0) += 1;
617-
}
618-
619-
/// Increment the no advice count in the statistics
620-
fn inc_no_advice_count(&mut self) {
621-
self.no_advice_count += 1;
622-
}
623-
624-
/// Add attribute name to coverage
625-
pub fn add_attribute_name_to_coverage(&mut self, seen_attribute_name: String) {
626-
if let Some(count) = self.seen_registry_attributes.get_mut(&seen_attribute_name) {
627-
// This is a registry attribute
628-
*count += 1;
629-
} else {
630-
// This is a non-registry attribute
631-
*self
632-
.seen_non_registry_attributes
633-
.entry(seen_attribute_name)
634-
.or_insert(0) += 1;
635-
}
636-
}
637-
638-
/// Add metric name to coverage
639-
pub fn add_metric_name_to_coverage(&mut self, seen_metric_name: String) {
640-
if let Some(count) = self.seen_registry_metrics.get_mut(&seen_metric_name) {
641-
// This is a registry metric
642-
*count += 1;
643-
} else {
644-
// This is a non-registry metric
645-
*self
646-
.seen_non_registry_metrics
647-
.entry(seen_metric_name)
648-
.or_insert(0) += 1;
649-
}
650-
}
651-
652-
/// Add event name to coverage
653-
pub fn add_event_name_to_coverage(&mut self, seen_event_name: String) {
654-
if seen_event_name.is_empty() {
655-
// Empty event_names are not counted
656-
return;
657-
}
658-
if let Some(count) = self.seen_registry_events.get_mut(&seen_event_name) {
659-
// This is a registry event
660-
*count += 1;
661-
} else {
662-
// This is a non-registry event
663-
*self
664-
.seen_non_registry_events
665-
.entry(seen_event_name)
666-
.or_insert(0) += 1;
667-
}
668-
}
669-
670-
/// Are there any violations in the statistics?
671-
#[must_use]
672-
pub fn has_violations(&self) -> bool {
673-
self.highest_advice_level_counts
674-
.contains_key(&FindingLevel::Violation)
675-
}
676-
677-
/// Finalize the statistics
678-
pub fn finalize(&mut self) {
679-
// Calculate the registry coverage
680-
// (non-zero attributes + non-zero metrics + non-zero events) / (total attributes + total metrics + total events)
681-
let non_zero_attributes = self
682-
.seen_registry_attributes
683-
.values()
684-
.filter(|&&count| count > 0)
685-
.count();
686-
let total_registry_attributes = self.seen_registry_attributes.len();
687-
688-
let non_zero_metrics = self
689-
.seen_registry_metrics
690-
.values()
691-
.filter(|&&count| count > 0)
692-
.count();
693-
let total_registry_metrics = self.seen_registry_metrics.len();
694-
695-
let non_zero_events = self
696-
.seen_registry_events
697-
.values()
698-
.filter(|&&count| count > 0)
699-
.count();
700-
let total_registry_events = self.seen_registry_events.len();
701-
702-
let total_registry_items =
703-
total_registry_attributes + total_registry_metrics + total_registry_events;
704-
705-
if total_registry_items > 0 {
706-
self.registry_coverage = ((non_zero_attributes + non_zero_metrics + non_zero_events)
707-
as f32)
708-
/ (total_registry_items as f32);
709-
} else {
710-
self.registry_coverage = 0.0;
711-
}
712-
}
713-
}
714-
715466
/// Samples implement this trait to run live checks on themselves
716467
pub trait LiveCheckRunner {
717468
/// Run the live check

0 commit comments

Comments
 (0)