Skip to content

Commit 874c4c3

Browse files
tongossMrAlias
andauthored
feat: Improve error handling in prometheus exporter (#7363)
fix #7066 --------- Co-authored-by: Tyler Yahn <[email protected]>
1 parent a817caa commit 874c4c3

File tree

5 files changed

+507
-81
lines changed

5 files changed

+507
-81
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
4242
- `WithInstrumentationAttributes` in `go.opentelemetry.io/otel/meter` synchronously de-duplicates the passed attributes instead of delegating it to the returned `MeterOption`. (#7266)
4343
- `WithInstrumentationAttributes` in `go.opentelemetry.io/otel/log` synchronously de-duplicates the passed attributes instead of delegating it to the returned `LoggerOption`. (#7266)
4444
- `Distinct` in `go.opentelemetry.io/otel/attribute` is no longer guaranteed to uniquely identify an attribute set. Collisions between `Distinct` values for different Sets are possible with extremely high cardinality (billions of series per instrument), but are highly unlikely. (#7175)
45+
- Improve error handling for dropped data during translation by using `prometheus.NewInvalidMetric` in `go.opentelemetry.io/otel/exporters/prometheus`.
46+
**Breaking Change:** Previously, these cases were only logged and scrapes succeeded.
47+
Now, when translation would drop data (e.g., invalid label/value), the exporter emits a `NewInvalidMetric`, and Prometheus scrapes **fail with HTTP 500** by default.
48+
To preserve the prior behavior (scrapes succeed while errors are logged), configure your Prometheus HTTP handler with: `promhttp.HandlerOpts{ ErrorHandling: promhttp.ContinueOnError }`. (#7363)
4549
- The default `TranslationStrategy` in `go.opentelemetry.io/exporters/prometheus` is changed from `otlptranslator.NoUTF8EscapingWithSuffixes` to `otlptranslator.UnderscoreEscapingWithSuffixes`. (#7421)
4650
- The `ErrorType` function in `go.opentelemetry.io/otel/semconv/v1.37.0` now handles custom error types.
4751
If an error implements an `ErrorType() string` method, the return value of that method will be used as the error type. (#7442)

exporters/prometheus/errors.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package prometheus // import "go.opentelemetry.io/otel/exporters/prometheus"
5+
6+
import "errors"
7+
8+
// Sentinel errors for consistent error checks in tests.
9+
var (
10+
errInvalidMetricType = errors.New("invalid metric type")
11+
errInvalidMetric = errors.New("invalid metric")
12+
errEHScaleBelowMin = errors.New("exponential histogram scale below minimum supported")
13+
)

exporters/prometheus/exporter.go

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) {
241241

242242
attrKeys, attrVals, e := getAttrs(scopeMetrics.Scope.Attributes, c.labelNamer)
243243
if e != nil {
244-
otel.Handle(e)
244+
reportError(ch, nil, e)
245245
err = errors.Join(err, fmt.Errorf("failed to getAttrs for ScopeMetrics %d: %w", j, e))
246246
continue
247247
}
@@ -258,19 +258,19 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) {
258258
for k, m := range scopeMetrics.Metrics {
259259
typ := c.metricType(m)
260260
if typ == nil {
261+
reportError(ch, nil, errInvalidMetricType)
261262
continue
262263
}
263264
name, e := c.getName(m)
264265
if e != nil {
265-
// TODO(#7066): Handle this error better. It's not clear this can be
266-
// reached, bad metric names should / will be caught at creation time.
267-
otel.Handle(e)
266+
reportError(ch, nil, e)
268267
err = errors.Join(err, fmt.Errorf("failed to getAttrs for ScopeMetrics %d, Metrics %d: %w", j, k, e))
269268
continue
270269
}
271270

272271
drop, help := c.validateMetrics(name, m.Description, typ)
273272
if drop {
273+
reportError(ch, nil, errInvalidMetric)
274274
continue
275275
}
276276

@@ -373,7 +373,7 @@ func addExponentialHistogramMetric[N int64 | float64](
373373
for j, dp := range histogram.DataPoints {
374374
keys, values, e := getAttrs(dp.Attributes, labelNamer)
375375
if e != nil {
376-
otel.Handle(e)
376+
reportError(ch, nil, e)
377377
err = errors.Join(err, fmt.Errorf("failed to getAttrs for histogram.DataPoints %d: %w", j, e))
378378
continue
379379
}
@@ -386,11 +386,11 @@ func addExponentialHistogramMetric[N int64 | float64](
386386
scale := dp.Scale
387387
if scale < -4 {
388388
// Reject scales below -4 as they cannot be represented in Prometheus
389-
e := fmt.Errorf(
390-
"exponential histogram scale %d is below minimum supported scale -4, skipping data point",
391-
scale,
389+
reportError(
390+
ch,
391+
desc,
392+
fmt.Errorf("%w: %d (min -4)", errEHScaleBelowMin, scale),
392393
)
393-
otel.Handle(e)
394394
err = errors.Join(err, e)
395395
continue
396396
}
@@ -440,7 +440,7 @@ func addExponentialHistogramMetric[N int64 | float64](
440440
dp.StartTime,
441441
values...)
442442
if e != nil {
443-
otel.Handle(e)
443+
reportError(ch, desc, e)
444444
err = errors.Join(
445445
err,
446446
fmt.Errorf("failed to NewConstNativeHistogram for histogram.DataPoints %d: %w", j, e),
@@ -474,7 +474,7 @@ func addHistogramMetric[N int64 | float64](
474474
for j, dp := range histogram.DataPoints {
475475
keys, values, e := getAttrs(dp.Attributes, labelNamer)
476476
if e != nil {
477-
otel.Handle(e)
477+
reportError(ch, nil, e)
478478
err = errors.Join(err, fmt.Errorf("failed to getAttrs for histogram.DataPoints %d: %w", j, e))
479479
continue
480480
}
@@ -491,7 +491,7 @@ func addHistogramMetric[N int64 | float64](
491491
}
492492
m, e := prometheus.NewConstHistogram(desc, dp.Count, float64(dp.Sum), buckets, values...)
493493
if e != nil {
494-
otel.Handle(e)
494+
reportError(ch, desc, e)
495495
err = errors.Join(err, fmt.Errorf("failed to NewConstMetric for histogram.DataPoints %d: %w", j, e))
496496
continue
497497
}
@@ -527,7 +527,7 @@ func addSumMetric[N int64 | float64](
527527
for i, dp := range sum.DataPoints {
528528
keys, values, e := getAttrs(dp.Attributes, labelNamer)
529529
if e != nil {
530-
otel.Handle(e)
530+
reportError(ch, nil, e)
531531
err = errors.Join(err, fmt.Errorf("failed to getAttrs for sum.DataPoints %d: %w", i, e))
532532
continue
533533
}
@@ -537,7 +537,7 @@ func addSumMetric[N int64 | float64](
537537
desc := prometheus.NewDesc(name, m.Description, keys, nil)
538538
m, e := prometheus.NewConstMetric(desc, valueType, float64(dp.Value), values...)
539539
if e != nil {
540-
otel.Handle(e)
540+
reportError(ch, desc, e)
541541
err = errors.Join(err, fmt.Errorf("failed to NewConstMetric for sum.DataPoints %d: %w", i, e))
542542
continue
543543
}
@@ -572,7 +572,7 @@ func addGaugeMetric[N int64 | float64](
572572
for i, dp := range gauge.DataPoints {
573573
keys, values, e := getAttrs(dp.Attributes, labelNamer)
574574
if e != nil {
575-
otel.Handle(e)
575+
reportError(ch, nil, e)
576576
err = errors.Join(err, fmt.Errorf("failed to getAttrs for gauge.DataPoints %d: %w", i, e))
577577
continue
578578
}
@@ -582,7 +582,7 @@ func addGaugeMetric[N int64 | float64](
582582
desc := prometheus.NewDesc(name, m.Description, keys, nil)
583583
m, e := prometheus.NewConstMetric(desc, prometheus.GaugeValue, float64(dp.Value), values...)
584584
if e != nil {
585-
otel.Handle(e)
585+
reportError(ch, desc, e)
586586
err = errors.Join(err, fmt.Errorf("failed to NewConstMetric for gauge.DataPoints %d: %w", i, e))
587587
continue
588588
}
@@ -803,3 +803,10 @@ func attributesToLabels(attrs []attribute.KeyValue, labelNamer otlptranslator.La
803803
}
804804
return labels, nil
805805
}
806+
807+
func reportError(ch chan<- prometheus.Metric, desc *prometheus.Desc, err error) {
808+
if desc == nil {
809+
desc = prometheus.NewInvalidDesc(err)
810+
}
811+
ch <- prometheus.NewInvalidMetric(desc, err)
812+
}

0 commit comments

Comments
 (0)