Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,7 @@ groups:
- ref: auction.name
requirement_level: recommended
- ref: error.type
requirement_level: required
requirement_level: required

imports:
- metric_ref: metric.example_counter
Copy link
Member

@lmolkova lmolkova Jun 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

another branch of the slack discussion:
we probably want wildcard imports and cover attribute namespaces rather than groups.

E.g.

imports:
  - attribute.db.*
  - attribute.server.*
  - attribute.error.*
  - attribute.http.*
  - attribute.url.*

syntax allows users to allowlist attributes in bulk without importing everything or explicitly allowing each attribute.
Attribute groups are meaningless - they are grouping mechanism and we move attributes between them a lot.

there are not stability guarantees for attribute groups.

A similar syntax can be useful for bulk-importing metrics, entities, events

imports:
- metric.db.*
- metric.http.*
- metric.system.*
- entity.host.*
- ...

Importing each attribute/span/event/metric/entity one-by-one will be very verbose and hard to use. Bulk import seems to be the main use-case.

Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,13 @@ groups:
requirement_level: recommended
examples:
"Unused"

- id: metric.example_counter
type: metric
metric_name: example.counter
stability: development
brief: A counter of the number of messages processed.
instrument: counter
unit: "1"
attributes:
- ref: error.type
6 changes: 5 additions & 1 deletion crates/weaver_resolver/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,13 +438,14 @@ mod tests {
assert_eq!(semconv_spec.groups().len(), 2);
assert_eq!(&semconv_spec.groups()[0].id, "shared.attributes");
assert_eq!(&semconv_spec.groups()[1].id, "metric.auction.bid.count");
assert_eq!(semconv_spec.imports().expect("Imports not found").len(), 1);
}
"otel" => {
assert_eq!(
source.path,
"data/multi-registry/otel_registry/otel_registry.yaml"
);
assert_eq!(semconv_spec.groups().len(), 2);
assert_eq!(semconv_spec.groups().len(), 3);
assert_eq!(&semconv_spec.groups()[0].id, "otel.registry");
assert_eq!(&semconv_spec.groups()[1].id, "otel.unused");
}
Expand All @@ -467,6 +468,9 @@ mod tests {
// The group `otel.unused` should be garbage collected
let group = resolved_registry.group("otel.unused");
assert!(group.is_none());
// The group referenced in the `imports` should not be garbage collected
let group = resolved_registry.group("metric.example_counter");
assert!(group.is_some());
}

let metrics = resolved_registry.groups(GroupType::Metric);
Expand Down
27 changes: 22 additions & 5 deletions crates/weaver_resolver/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use weaver_resolved_schema::attribute::UnresolvedAttribute;
use weaver_resolved_schema::lineage::{AttributeLineage, GroupLineage};
use weaver_resolved_schema::registry::{Group, Registry};
use weaver_semconv::attribute::AttributeSpec;
use weaver_semconv::group::GroupSpecWithProvenance;
use weaver_semconv::group::{GroupImportWithProvenance, GroupSpecWithProvenance};
use weaver_semconv::manifest::RegistryManifest;
use weaver_semconv::provenance::Provenance;
use weaver_semconv::registry::SemConvRegistry;
Expand All @@ -30,6 +30,9 @@ pub struct UnresolvedRegistry {
/// The resolution process will progressively move the unresolved groups
/// into the registry field once they are resolved.
pub groups: Vec<UnresolvedGroup>,

/// List of unresolved imports that belong to the semantic convention
pub imports: Vec<GroupImportWithProvenance>,
}

/// A group containing unresolved attributes.
Expand All @@ -43,7 +46,7 @@ pub struct UnresolvedGroup {
/// The resolution process will progressively move the unresolved attributes,
/// and other signals, into the group field once they are resolved.
pub attributes: Vec<UnresolvedAttribute>,

/// The provenance of the group (URL or path).
pub provenance: Provenance,
}
Expand Down Expand Up @@ -137,7 +140,7 @@ pub fn resolve_semconv_registry(
check_root_attribute_id_duplicates(&ureg.registry, &attr_name_index, &mut errors);

if !include_unreferenced {
gc_unreferenced_objects(registry.manifest(), &mut ureg.registry, attr_catalog);
gc_unreferenced_objects(registry.manifest(), &mut ureg.registry, &ureg.imports, attr_catalog);
}

WResult::OkWithNFEs(ureg.registry, errors)
Expand All @@ -149,16 +152,26 @@ pub fn resolve_semconv_registry(
fn gc_unreferenced_objects(
manifest: Option<&RegistryManifest>,
registry: &mut Registry,
imports: &[GroupImportWithProvenance],
attr_catalog: &mut AttributeCatalog,
) {
// Build a set of imported IDs from the imports.
let imported_ids: HashSet<&str> = imports
.iter()
.map(|iwp| iwp.import.r#ref())
.collect();

if let Some(manifest) = manifest {
if manifest.dependencies.as_ref().map_or(0, |d| d.len()) > 0 {
// This registry has dependencies.
let current_reg_id = manifest.name.clone();

// Remove all groups that are not defined in the current registry.
registry.groups.retain(|group| {
if let Some(lineage) = &group.lineage {
if imported_ids.contains(group.id.as_str()) {
// This group is referenced in a dependency, so we keep it.
true
} else if let Some(lineage) = &group.lineage {
lineage.provenance().registry_id.as_ref() == current_reg_id
} else {
true
Expand Down Expand Up @@ -307,10 +320,14 @@ fn unresolved_registry_from_specs(
.unresolved_group_with_provenance_iter()
.map(group_from_spec)
.collect();

let imports = registry
.unresolved_imports_iter()
.collect::<Vec<_>>();

UnresolvedRegistry {
registry: Registry::new(registry_url),
groups,
imports,
}
}

Expand Down
47 changes: 47 additions & 0 deletions crates/weaver_semconv/src/group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,44 @@
pub entity_associations: Vec<String>,
}

/// Represents an import of a group defined in an imported registry.
/// Currently supports references to groups of type `metric`, `event`, and `entity`.
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
#[serde(deny_unknown_fields)]
#[serde(untagged)]
#[serde(rename_all = "snake_case")]
pub enum GroupImport {
/// Imports a metric group from the imported registry.
MetricRef {
/// The ID of the metric group being referenced in the imported registry.
metric_ref: String,
// Additional overridable fields may be added in the future.
Copy link
Member

@lmolkova lmolkova Jun 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

related to https://github.com/open-telemetry/weaver/pull/769/files#r2131292750 (bulk import)

importing without modification makes sense, but it would override the original signal definition without any means to distinguish imported from the original one.

e.g. in service A I report http.client.request.duration with url.template when it talks to internal services B and C.
Service B reports http.client.request.duration without url.template because it's not supported or possible.

How do I document these caveats if I have a singleton http.client.request.duration definition?

the proposal:

  • imports allow to import without modification - original definitions are fully frozen and immutable. Original id describes only that signal and not its flavor.
  • supported modifications are done through existing extends mechanism or something similar. I.e.
- id: my.service_a.metric.http.client.request.duration
  extends: metric.http.client.request.duration
  note: this a service a flavor of http.client.request.duration, it records url.template when talking to internal services
  attributes:
  - ref: url.template
     requirement_level:
       conditionally_required: SHOULD be recorded when calling service B or C

- id: my.service_b.metric.http.client.request.duration
  extends: metric.http.client.request.duration
  note: this a service b flavor of http.client.request.duration, it does not include url.template attribute since we can't record it there.

I.e. each modified version needs a unique id that can be used to render docs, generate code, build dashboards, run live checks.
We'd need to change what extends does for telemetry signals. Today it just inherits attributes and common group properties, but it can become stricter and require to preserve stability, name, type, etc.

From my point of view, having modifiable, but singleton definition is narrow and confusing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What matters to me is: 1) being able to import signals, and 2) being able to extend them. I like what you’re proposing, it seems clear to me. In this PR, I’ll focus only on the import part with wildcard support.

@jsuereth and @jerbly are we all in agreement with that?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense to me.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then we don't need signal-specific refs for import - they won't have signal-specific properties.

I.e. we can have a nicer syntax like

imports:
- metric.db.*
- metric.http.*
- metric.system.*
- entity.*
- ...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm on board with this direction

Copy link
Member

@lmolkova lmolkova Jun 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe can should rename attribute id to name?

In any case, if we do

imports:
  - attribute.db.*
  - metric.db.*

it'll be confusing for authors because attribute. prefix doesn't exist anywhere

if we do

imports:
  - db.*
  - metric.db.*

it'll be confusing to authors and readers since it's not obvious that db.* only imports attributes.

I'm trying to create non-ambigous and consistent way for those who read or write conventions, with the

- imports:
  - attributes:
    - http.*
    - db.*
    - system.*
  - metrics:
    - http.*
    - db.*
    - system.*
  - entities:
    - host

they import by over-the-wire identifier they use in queries/dashboards. The id becomes internal semconv impl detail it should be

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lquerel - What's the extent of the wildcard capability? Can we have *.db.* for example? Or, for people not using namespace dot notation, db*?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jerbly everything supported by globset is effectively supported by this imports section.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lmolkova

- imports:
  - attributes:
    - http.*
    - db.*
    - system.*
  - metrics:
    - http.*
    - db.*
    - system.*
  - entities:
    - host

Unless I’m misinterpreting something in your previous messages, I see more or less the same level of confusion with this syntax. For attributes, the list under attributes contains IDs, while the list under metrics contains names.

Copy link
Member

@lmolkova lmolkova Jun 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in my suggestion you import by the formal/public name. You never see metric.db.... unless you look into semconv repo, it's internal impl detail. The fact we use different properties to represent id inside semconv is sad.

How do you want to solve 'import attribute' problem otherwise?

},
/// Imports an event group from the imported registry.
EventRef {
/// The ID of the event group being referenced in the imported registry.
event_ref: String,
// Additional overridable fields may be added in the future.
},
/// Imports an entity group from the imported registry.
EntityRef {
/// The ID of the entity group being referenced in the imported registry.
entity_ref: String,
// Additional overridable fields may be added in the future.
},
}

impl GroupImport {
/// Returns the reference of the group being imported.
pub fn r#ref(&self) -> &str {
match self {
GroupImport::MetricRef { metric_ref } => metric_ref,
GroupImport::EventRef { event_ref } => event_ref,
GroupImport::EntityRef { entity_ref } => entity_ref,
}
}
}

impl GroupSpec {
/// Validation logic for the group.
pub(crate) fn validate(&self, path_or_url: &str) -> WResult<(), Error> {
Expand Down Expand Up @@ -1860,3 +1898,12 @@
/// The provenance of the group spec (path or URL).
pub provenance: Provenance,
}

/// A group import with its provenance (path or URL).
#[derive(Debug, Clone, Deserialize)]
pub struct GroupImportWithProvenance {
/// The group import.
pub import: GroupImport,
/// The provenance of the group import (path or URL).
pub provenance: Provenance,
}
21 changes: 20 additions & 1 deletion crates/weaver_semconv/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//! Semantic Convention Registry.

use crate::attribute::AttributeSpecWithProvenance;
use crate::group::GroupSpecWithProvenance;
use crate::group::{GroupImportWithProvenance, GroupSpecWithProvenance};
use crate::json_schema::JsonSchemaValidator;
use crate::manifest::RegistryManifest;
use crate::metric::MetricSpecWithProvenance;
Expand Down Expand Up @@ -246,6 +246,23 @@ impl SemConvRegistry {
})
}

/// Returns an iterator over all the unresolved imports defined in the semantic convention
/// registry. Each import is associated with its provenance (path or URL).
pub fn unresolved_imports_iter(&self) -> impl Iterator<Item = GroupImportWithProvenance> + '_ {
self.specs
.iter()
.flat_map(|SemConvSpecWithProvenance { spec, provenance }| {
spec.imports.iter().flat_map(|imports| {
imports
.iter()
.map(|group_import| GroupImportWithProvenance {
import: group_import.clone(),
provenance: provenance.clone(),
})
})
})
}

/// Returns a set of stats about the semantic convention registry.
pub fn stats(&self) -> Stats {
Stats {
Expand Down Expand Up @@ -347,6 +364,7 @@ mod tests {
annotations: None,
entity_associations: Vec::new(),
}],
imports: None,
},
),
(
Expand All @@ -373,6 +391,7 @@ mod tests {
annotations: None,
entity_associations: Vec::new(),
}],
imports: None,
},
),
];
Expand Down
17 changes: 16 additions & 1 deletion crates/weaver_semconv/src/semconv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

//! Semantic convention specification.

use crate::group::GroupSpec;
use crate::group::{GroupImport, GroupSpec};
use crate::json_schema::JsonSchemaValidator;
use crate::provenance::Provenance;
use crate::Error;
Expand All @@ -19,6 +19,10 @@ use weaver_common::result::WResult;
pub struct SemConvSpec {
/// A collection of semantic convention groups or [`GroupSpec`].
pub(crate) groups: Vec<GroupSpec>,

/// A list of imports referencing groups defined in a dependent registry.
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) imports: Option<Vec<GroupImport>>,
}

/// A wrapper for a [`SemConvSpec`] with its provenance.
Expand Down Expand Up @@ -172,6 +176,12 @@ impl SemConvSpec {
pub fn groups(&self) -> &[GroupSpec] {
&self.groups
}

/// Returns the list of imports in the semantic convention spec.
#[must_use]
pub fn imports(&self) -> Option<&[GroupImport]> {
self.imports.as_deref()
}
}

impl SemConvSpecWithProvenance {
Expand Down Expand Up @@ -278,11 +288,16 @@ mod tests {
stability: "stable"
brief: "description2"
type: "int"
imports:
- metric_ref: "metric_id"
- event_ref: "event_id"
- entity_ref: "entity_id"
"#;
let semconv_spec = SemConvSpec::from_string(spec)
.into_result_failing_non_fatal()
.unwrap();
assert_eq!(semconv_spec.groups.len(), 2);
assert_eq!(semconv_spec.imports.as_ref().unwrap().len(), 3);

// Invalid yaml
let spec = r#"
Expand Down
Loading