Skip to content

Commit 00e67ba

Browse files
hodgesdsoblitorum
authored andcommitted
Add perf tracepoint collection flag (prometheus#1664)
* Add tracepoint collector option for perf collector Signed-off-by: Daniel Hodges <hodges.daniel.scott@gmail.com>
1 parent 58cc312 commit 00e67ba

File tree

3 files changed

+245
-17
lines changed

3 files changed

+245
-17
lines changed

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,13 @@ configuration is zero indexed and can also take a stride value; e.g.
9999
`--collector.perf --collector.perf.cpus=1-10:5` would collect on CPUs
100100
1, 5, and 10.
101101

102+
The perf collector is also able to collect
103+
[tracepoint](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html)
104+
counts when using the `--collector.perf.tracepoint` flag. Tracepoints can be
105+
found using [`perf list`](http://man7.org/linux/man-pages/man1/perf.1.html) or
106+
from debugfs. And example usage of this would be
107+
`--collector.perf.tracepoint="sched:sched_process_exec"`.
108+
102109

103110
Name | Description | OS
104111
---------|-------------|----

collector/perf_linux.go

Lines changed: 170 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@ import (
2020
"strings"
2121

2222
"github.com/go-kit/kit/log"
23+
"github.com/go-kit/kit/log/level"
2324
"github.com/hodgesds/perf-utils"
2425
"github.com/prometheus/client_golang/prometheus"
26+
"golang.org/x/sys/unix"
2527
kingpin "gopkg.in/alecthomas/kingpin.v2"
2628
)
2729

@@ -30,27 +32,29 @@ const (
3032
)
3133

3234
var (
33-
perfCPUsFlag = kingpin.Flag("collector.perf.cpus", "List of CPUs from which perf metrics should be collected").Default("").String()
35+
perfCPUsFlag = kingpin.Flag("collector.perf.cpus", "List of CPUs from which perf metrics should be collected").Default("").String()
36+
perfTracepointFlag = kingpin.Flag("collector.perf.tracepoint", "perf tracepoint that should be collected").Strings()
3437
)
3538

3639
func init() {
3740
registerCollector(perfSubsystem, defaultDisabled, NewPerfCollector)
3841
}
3942

40-
// perfCollector is a Collector that uses the perf subsystem to collect
41-
// metrics. It uses perf_event_open an ioctls for profiling. Due to the fact
42-
// that the perf subsystem is highly dependent on kernel configuration and
43-
// settings not all profiler values may be exposed on the target system at any
44-
// given time.
45-
type perfCollector struct {
46-
hwProfilerCPUMap map[*perf.HardwareProfiler]int
47-
swProfilerCPUMap map[*perf.SoftwareProfiler]int
48-
cacheProfilerCPUMap map[*perf.CacheProfiler]int
49-
perfHwProfilers map[int]*perf.HardwareProfiler
50-
perfSwProfilers map[int]*perf.SoftwareProfiler
51-
perfCacheProfilers map[int]*perf.CacheProfiler
52-
desc map[string]*prometheus.Desc
53-
logger log.Logger
43+
// perfTracepointFlagToTracepoints returns the set of configured tracepoints.
44+
func perfTracepointFlagToTracepoints(tracepointsFlag []string) ([]*perfTracepoint, error) {
45+
tracepoints := make([]*perfTracepoint, len(tracepointsFlag))
46+
47+
for i, tracepoint := range tracepointsFlag {
48+
split := strings.Split(tracepoint, ":")
49+
if len(split) != 2 {
50+
return nil, fmt.Errorf("Invalid tracepoint config %v", tracepoint)
51+
}
52+
tracepoints[i] = &perfTracepoint{
53+
subsystem: split[0],
54+
event: split[1],
55+
}
56+
}
57+
return tracepoints, nil
5458
}
5559

5660
// perfCPUFlagToCPUs returns a set of CPUs for the perf collectors to monitor.
@@ -98,6 +102,144 @@ func perfCPUFlagToCPUs(cpuFlag string) ([]int, error) {
98102
return cpus, nil
99103
}
100104

105+
// perfTracepoint is a struct for holding tracepoint information.
106+
type perfTracepoint struct {
107+
subsystem string
108+
event string
109+
}
110+
111+
// label returns the tracepoint name in the format of subsystem_tracepoint.
112+
func (t *perfTracepoint) label() string {
113+
return t.subsystem + "_" + t.event
114+
}
115+
116+
// tracepoint returns the tracepoint name in the format of subsystem:tracepoint.
117+
func (t *perfTracepoint) tracepoint() string {
118+
return t.subsystem + ":" + t.event
119+
}
120+
121+
// perfCollector is a Collector that uses the perf subsystem to collect
122+
// metrics. It uses perf_event_open an ioctls for profiling. Due to the fact
123+
// that the perf subsystem is highly dependent on kernel configuration and
124+
// settings not all profiler values may be exposed on the target system at any
125+
// given time.
126+
type perfCollector struct {
127+
hwProfilerCPUMap map[*perf.HardwareProfiler]int
128+
swProfilerCPUMap map[*perf.SoftwareProfiler]int
129+
cacheProfilerCPUMap map[*perf.CacheProfiler]int
130+
perfHwProfilers map[int]*perf.HardwareProfiler
131+
perfSwProfilers map[int]*perf.SoftwareProfiler
132+
perfCacheProfilers map[int]*perf.CacheProfiler
133+
desc map[string]*prometheus.Desc
134+
logger log.Logger
135+
tracepointCollector *perfTracepointCollector
136+
}
137+
138+
type perfTracepointCollector struct {
139+
// desc is the mapping of subsystem to tracepoint *prometheus.Desc.
140+
descs map[string]map[string]*prometheus.Desc
141+
// collection order is the sorted configured collection order of the profiler.
142+
collectionOrder []string
143+
144+
logger log.Logger
145+
profilers map[int]perf.GroupProfiler
146+
}
147+
148+
// update is used collect all tracepoints across all tracepoint profilers.
149+
func (c *perfTracepointCollector) update(ch chan<- prometheus.Metric) error {
150+
for cpu := range c.profilers {
151+
if err := c.updateCPU(cpu, ch); err != nil {
152+
return err
153+
}
154+
}
155+
return nil
156+
}
157+
158+
// updateCPU is used to update metrics per CPU profiler.
159+
func (c *perfTracepointCollector) updateCPU(cpu int, ch chan<- prometheus.Metric) error {
160+
cpuStr := fmt.Sprintf("%d", cpu)
161+
profiler := c.profilers[cpu]
162+
p, err := profiler.Profile()
163+
if err != nil {
164+
level.Error(c.logger).Log("msg", "Failed to collect tracepoint profile", "err", err)
165+
return err
166+
}
167+
168+
for i, value := range p.Values {
169+
// Get the Desc from the ordered group value.
170+
descKey := c.collectionOrder[i]
171+
descKeySlice := strings.Split(descKey, ":")
172+
ch <- prometheus.MustNewConstMetric(
173+
c.descs[descKeySlice[0]][descKeySlice[1]],
174+
prometheus.CounterValue,
175+
float64(value),
176+
cpuStr,
177+
)
178+
}
179+
return nil
180+
}
181+
182+
// newPerfTracepointCollector returns a configured perfTracepointCollector.
183+
func newPerfTracepointCollector(
184+
logger log.Logger,
185+
tracepointsFlag []string,
186+
cpus []int,
187+
) (*perfTracepointCollector, error) {
188+
tracepoints, err := perfTracepointFlagToTracepoints(tracepointsFlag)
189+
if err != nil {
190+
return nil, err
191+
}
192+
193+
collectionOrder := make([]string, len(tracepoints))
194+
descs := map[string]map[string]*prometheus.Desc{}
195+
eventAttrs := make([]unix.PerfEventAttr, len(tracepoints))
196+
197+
for i, tracepoint := range tracepoints {
198+
eventAttr, err := perf.TracepointEventAttr(tracepoint.subsystem, tracepoint.event)
199+
if err != nil {
200+
return nil, err
201+
}
202+
eventAttrs[i] = *eventAttr
203+
collectionOrder[i] = tracepoint.tracepoint()
204+
if _, ok := descs[tracepoint.subsystem]; !ok {
205+
descs[tracepoint.subsystem] = map[string]*prometheus.Desc{}
206+
}
207+
descs[tracepoint.subsystem][tracepoint.event] = prometheus.NewDesc(
208+
prometheus.BuildFQName(
209+
namespace,
210+
perfSubsystem,
211+
tracepoint.label(),
212+
),
213+
"Perf tracepoint "+tracepoint.tracepoint(),
214+
[]string{"cpu"},
215+
nil,
216+
)
217+
}
218+
219+
profilers := make(map[int]perf.GroupProfiler, len(cpus))
220+
for _, cpu := range cpus {
221+
profiler, err := perf.NewGroupProfiler(-1, cpu, 0, eventAttrs...)
222+
if err != nil {
223+
return nil, err
224+
}
225+
profilers[cpu] = profiler
226+
}
227+
228+
c := &perfTracepointCollector{
229+
descs: descs,
230+
collectionOrder: collectionOrder,
231+
profilers: profilers,
232+
logger: logger,
233+
}
234+
235+
for _, profiler := range c.profilers {
236+
if err := profiler.Start(); err != nil {
237+
return nil, err
238+
}
239+
}
240+
return c, nil
241+
}
242+
101243
// NewPerfCollector returns a new perf based collector, it creates a profiler
102244
// per CPU.
103245
func NewPerfCollector(logger log.Logger) (Collector, error) {
@@ -127,6 +269,16 @@ func NewPerfCollector(logger log.Logger) (Collector, error) {
127269
}
128270
}
129271

272+
// First configure any tracepoints.
273+
if *perfTracepointFlag != nil && len(*perfTracepointFlag) > 0 {
274+
tracepointCollector, err := newPerfTracepointCollector(logger, *perfTracepointFlag, cpus)
275+
if err != nil {
276+
return nil, err
277+
}
278+
collector.tracepointCollector = tracepointCollector
279+
}
280+
281+
// Configure all profilers for the specified CPUs.
130282
for _, cpu := range cpus {
131283
// Use -1 to profile all processes on the CPU, see:
132284
// man perf_event_open
@@ -411,6 +563,9 @@ func (c *perfCollector) Update(ch chan<- prometheus.Metric) error {
411563
if err := c.updateCacheStats(ch); err != nil {
412564
return err
413565
}
566+
if c.tracepointCollector != nil {
567+
return c.tracepointCollector.update(ch)
568+
}
414569

415570
return nil
416571
}

collector/perf_linux_test.go

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,74 @@ func TestPerfCPUFlagToCPUs(t *testing.T) {
180180
if test.exCpus[i] != cpus[i] {
181181
t.Fatalf(
182182
"expected cpus %v, got %v",
183-
test.exCpus,
184-
cpus,
183+
test.exCpus[i],
184+
cpus[i],
185+
)
186+
}
187+
}
188+
})
189+
}
190+
}
191+
192+
func TestPerfTracepointFlagToTracepoints(t *testing.T) {
193+
tests := []struct {
194+
name string
195+
flag []string
196+
exTracepoints []*perfTracepoint
197+
errStr string
198+
}{
199+
{
200+
name: "valid single tracepoint",
201+
flag: []string{"sched:sched_kthread_stop"},
202+
exTracepoints: []*perfTracepoint{
203+
{
204+
subsystem: "sched",
205+
event: "sched_kthread_stop",
206+
},
207+
},
208+
},
209+
{
210+
name: "valid multiple tracepoints",
211+
flag: []string{"sched:sched_kthread_stop", "sched:sched_process_fork"},
212+
exTracepoints: []*perfTracepoint{
213+
{
214+
subsystem: "sched",
215+
event: "sched_kthread_stop",
216+
},
217+
{
218+
subsystem: "sched",
219+
event: "sched_process_fork",
220+
},
221+
},
222+
},
223+
}
224+
225+
for _, test := range tests {
226+
t.Run(test.name, func(t *testing.T) {
227+
tracepoints, err := perfTracepointFlagToTracepoints(test.flag)
228+
if test.errStr != "" {
229+
if err != nil {
230+
t.Fatal("expected error to not be nil")
231+
}
232+
if test.errStr != err.Error() {
233+
t.Fatalf(
234+
"expected error %q, got %q",
235+
test.errStr,
236+
err.Error(),
237+
)
238+
}
239+
return
240+
}
241+
if err != nil {
242+
t.Fatal(err)
243+
}
244+
for i := range tracepoints {
245+
if test.exTracepoints[i].event != tracepoints[i].event &&
246+
test.exTracepoints[i].subsystem != tracepoints[i].subsystem {
247+
t.Fatalf(
248+
"expected tracepoint %v, got %v",
249+
test.exTracepoints[i],
250+
tracepoints[i],
185251
)
186252
}
187253
}

0 commit comments

Comments
 (0)