Skip to content

Commit 2912c64

Browse files
committed
Fix prometheus metric name and unit conversion
1 parent a156bf1 commit 2912c64

File tree

6 files changed

+505
-61
lines changed

6 files changed

+505
-61
lines changed

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
3636
([#3823] (https://github.com/open-telemetry/opentelemetry-python/pull/3823))
3737
- Add span flags to OTLP spans and links
3838
([#3881](https://github.com/open-telemetry/opentelemetry-python/pull/3881))
39+
- Fix prometheus metric name and unit conversion
40+
([#3924](https://github.com/open-telemetry/opentelemetry-python/pull/3924))
41+
- this is a breaking change to prometheus metric names so they comply with the
42+
[specification](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus).
43+
- common unit abbreviations are converted to Prometheus conventions (`s` -> `seconds`),
44+
following the [collector's implementation](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/c0b51136575aa7ba89326d18edb4549e7e1bbdb9/pkg/translator/prometheus/normalize_name.go#L108)
45+
- repeated `_` are replaced with a single `_`
46+
- UCUM annotations (enclosed in curly braces like `{requests}`) are stripped away
47+
- units with slash are converted e.g. `m/s` -> `meters_per_second`.
48+
- The exporter's API is not changed
3949

4050
## Version 1.24.0/0.45b0 (2024-03-28)
4151

exporter/opentelemetry-exporter-prometheus/src/opentelemetry/exporter/prometheus/__init__.py

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,7 @@
6767
from json import dumps
6868
from logging import getLogger
6969
from os import environ
70-
from re import IGNORECASE, UNICODE, compile
71-
from typing import Dict, Sequence, Tuple, Union
70+
from typing import Deque, Dict, Iterable, Sequence, Tuple, Union
7271

7372
from prometheus_client import start_http_server
7473
from prometheus_client.core import (
@@ -80,6 +79,11 @@
8079
)
8180
from prometheus_client.core import Metric as PrometheusMetric
8281

82+
from opentelemetry.exporter.prometheus._mapping import (
83+
map_unit,
84+
sanitize_attribute,
85+
sanitize_full_name,
86+
)
8387
from opentelemetry.sdk.environment_variables import (
8488
OTEL_EXPORTER_PROMETHEUS_HOST,
8589
OTEL_EXPORTER_PROMETHEUS_PORT,
@@ -101,6 +105,7 @@
101105
MetricsData,
102106
Sum,
103107
)
108+
from opentelemetry.util.types import Attributes
104109

105110
_logger = getLogger(__name__)
106111

@@ -164,18 +169,15 @@ class _CustomCollector:
164169

165170
def __init__(self, disable_target_info: bool = False):
166171
self._callback = None
167-
self._metrics_datas = deque()
168-
self._non_letters_digits_underscore_re = compile(
169-
r"[^\w]", UNICODE | IGNORECASE
170-
)
172+
self._metrics_datas: Deque[MetricsData] = deque()
171173
self._disable_target_info = disable_target_info
172174
self._target_info = None
173175

174176
def add_metrics_data(self, metrics_data: MetricsData) -> None:
175177
"""Add metrics to Prometheus data"""
176178
self._metrics_datas.append(metrics_data)
177179

178-
def collect(self) -> None:
180+
def collect(self) -> Iterable[PrometheusMetric]:
179181
"""Collect fetches the metrics from OpenTelemetry
180182
and delivers them as Prometheus Metrics.
181183
Collect is invoked every time a ``prometheus.Gatherer`` is run
@@ -189,7 +191,7 @@ def collect(self) -> None:
189191
if len(self._metrics_datas):
190192
if not self._disable_target_info:
191193
if self._target_info is None:
192-
attributes = {}
194+
attributes: Attributes = {}
193195
for res in self._metrics_datas[0].resource_metrics:
194196
attributes = {**attributes, **res.resource.attributes}
195197

@@ -228,17 +230,17 @@ def _translate_to_prometheus(
228230

229231
pre_metric_family_ids = []
230232

231-
metric_name = ""
232-
metric_name += self._sanitize(metric.name)
233+
metric_name = sanitize_full_name(metric.name)
233234

234235
metric_description = metric.description or ""
236+
metric_unit = map_unit(metric.unit)
235237

236238
for number_data_point in metric.data.data_points:
237239
label_keys = []
238240
label_values = []
239241

240242
for key, value in sorted(number_data_point.attributes.items()):
241-
label_keys.append(self._sanitize(key))
243+
label_keys.append(sanitize_attribute(key))
242244
label_values.append(self._check_value(value))
243245

244246
pre_metric_family_ids.append(
@@ -247,7 +249,7 @@ def _translate_to_prometheus(
247249
metric_name,
248250
metric_description,
249251
"%".join(label_keys),
250-
metric.unit,
252+
metric_unit,
251253
]
252254
)
253255
)
@@ -299,7 +301,7 @@ def _translate_to_prometheus(
299301
name=metric_name,
300302
documentation=metric_description,
301303
labels=label_keys,
302-
unit=metric.unit,
304+
unit=metric_unit,
303305
)
304306
)
305307
metric_family_id_metric_family[
@@ -323,7 +325,7 @@ def _translate_to_prometheus(
323325
name=metric_name,
324326
documentation=metric_description,
325327
labels=label_keys,
326-
unit=metric.unit,
328+
unit=metric_unit,
327329
)
328330
)
329331
metric_family_id_metric_family[
@@ -344,7 +346,7 @@ def _translate_to_prometheus(
344346
name=metric_name,
345347
documentation=metric_description,
346348
labels=label_keys,
347-
unit=metric.unit,
349+
unit=metric_unit,
348350
)
349351
)
350352
metric_family_id_metric_family[
@@ -361,12 +363,6 @@ def _translate_to_prometheus(
361363
"Unsupported metric data. %s", type(metric.data)
362364
)
363365

364-
def _sanitize(self, key: str) -> str:
365-
"""sanitize the given metric name or label according to Prometheus rule.
366-
Replace all characters other than [A-Za-z0-9_] with '_'.
367-
"""
368-
return self._non_letters_digits_underscore_re.sub("_", key)
369-
370366
# pylint: disable=no-self-use
371367
def _check_value(self, value: Union[int, float, str, Sequence]) -> str:
372368
"""Check the label value and return is appropriate representation"""
@@ -380,7 +376,7 @@ def _create_info_metric(
380376
"""Create an Info Metric Family with list of attributes"""
381377
# sanitize the attribute names according to Prometheus rule
382378
attributes = {
383-
self._sanitize(key): self._check_value(value)
379+
sanitize_attribute(key): self._check_value(value)
384380
for key, value in attributes.items()
385381
}
386382
info = InfoMetricFamily(name, description, labels=attributes)
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
# Copyright The OpenTelemetry Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from re import UNICODE, compile
16+
17+
_SANITIZE_NAME_RE = compile(r"([^a-zA-Z0-9:]+)|_{2,}", UNICODE)
18+
# Same as name, but doesn't allow ":"
19+
_SANITIZE_ATTRIBUTE_KEY_RE = compile(r"([^a-zA-Z0-9]+)|_{2,}", UNICODE)
20+
21+
# UCUM annotations are ASCII chars 33-126 enclosed in curly braces
22+
# https://ucum.org/ucum#para-6
23+
_UCUM_ANNOTATION_CURLY = compile(r"{[!-~]*}")
24+
25+
# Remaps common UCUM and SI units to prometheus conventions. Copied from
26+
# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.101.0/pkg/translator/prometheus/normalize_name.go#L19
27+
# See specification:
28+
# https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#metric-metadata-1
29+
_UNIT_MAPPINGS = {
30+
# Time
31+
"d": "days",
32+
"h": "hours",
33+
"min": "minutes",
34+
"s": "seconds",
35+
"ms": "milliseconds",
36+
"us": "microseconds",
37+
"ns": "nanoseconds",
38+
# Bytes
39+
"By": "bytes",
40+
"KiBy": "kibibytes",
41+
"MiBy": "mebibytes",
42+
"GiBy": "gibibytes",
43+
"TiBy": "tibibytes",
44+
"KBy": "kilobytes",
45+
"MBy": "megabytes",
46+
"GBy": "gigabytes",
47+
"TBy": "terabytes",
48+
# SI
49+
"m": "meters",
50+
"V": "volts",
51+
"A": "amperes",
52+
"J": "joules",
53+
"W": "watts",
54+
"g": "grams",
55+
# Misc
56+
"Cel": "celsius",
57+
"Hz": "hertz",
58+
# TODO: this conflicts with the spec but I think it is correct. Need to open a spec issue
59+
"1": "",
60+
"%": "percent",
61+
}
62+
# Similar to _UNIT_MAPPINGS, but for "per" unit denominator.
63+
# Example: s => per second (singular)
64+
# Copied from https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/80317ce83ed87a2dff0c316bb939afbfaa823d5e/pkg/translator/prometheus/normalize_name.go#L58
65+
_PER_UNIT_MAPPINGS = {
66+
"s": "second",
67+
"m": "minute",
68+
"h": "hour",
69+
"d": "day",
70+
"w": "week",
71+
"mo": "month",
72+
"y": "year",
73+
}
74+
75+
76+
def sanitize_full_name(name: str) -> str:
77+
"""sanitize the given metric name according to Prometheus rule, including sanitizing
78+
leading digits
79+
80+
https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#metric-metadata-1
81+
"""
82+
# Leading number special case
83+
if name and name[0].isdigit():
84+
name = "_" + name[1:]
85+
return _sanitize_name(name)
86+
87+
88+
def _sanitize_name(name: str) -> str:
89+
"""sanitize the given metric name according to Prometheus rule, but does not handle
90+
sanitizing a leading digit."""
91+
return _SANITIZE_NAME_RE.sub("_", name)
92+
93+
94+
def sanitize_attribute(key: str) -> str:
95+
"""sanitize the given metric attribute key according to Prometheus rule.
96+
97+
https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#metric-attributes
98+
"""
99+
# Leading number special case
100+
if key and key[0].isdigit():
101+
key = "_" + key[1:]
102+
return _SANITIZE_ATTRIBUTE_KEY_RE.sub("_", key)
103+
104+
105+
def map_unit(unit: str) -> str:
106+
"""Maps unit to common prometheus metric names if available and sanitizes any invalid
107+
characters
108+
109+
See:
110+
- https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#metric-metadata-1
111+
- https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.101.0/pkg/translator/prometheus/normalize_name.go#L108
112+
"""
113+
# remove curly brace UCUM annotations
114+
unit = _UCUM_ANNOTATION_CURLY.sub("", unit)
115+
116+
if unit in _UNIT_MAPPINGS:
117+
return _UNIT_MAPPINGS[unit]
118+
119+
# replace "/" with "per" units like m/s -> meters_per_second
120+
ratio_unit_subparts = unit.split("/", maxsplit=1)
121+
if len(ratio_unit_subparts) == 2:
122+
bottom = _sanitize_name(ratio_unit_subparts[1])
123+
if bottom:
124+
top = _sanitize_name(ratio_unit_subparts[0])
125+
top = _UNIT_MAPPINGS.get(top, top)
126+
bottom = _PER_UNIT_MAPPINGS.get(bottom, bottom)
127+
return f"{top}_per_{bottom}" if top else f"per_{bottom}"
128+
129+
return (
130+
# since units end up as a metric name suffix, they must be sanitized
131+
_sanitize_name(unit)
132+
# strip surrounding "_" chars since it will lead to consecutive underscores in the
133+
# metric name
134+
.strip("_")
135+
)

0 commit comments

Comments
 (0)