Skip to content

Commit 1d839f4

Browse files
committed
Collection interval random jittering
closes #460
1 parent f3b5537 commit 1d839f4

File tree

2 files changed

+30
-5
lines changed

2 files changed

+30
-5
lines changed

agent.go

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
package telegraf
22

33
import (
4-
"crypto/rand"
4+
cryptorand "crypto/rand"
55
"fmt"
66
"log"
77
"math/big"
8+
"math/rand"
89
"os"
910
"sync"
1011
"time"
@@ -92,6 +93,7 @@ func (a *Agent) gatherParallel(pointChan chan *client.Point) error {
9293

9394
start := time.Now()
9495
counter := 0
96+
jitter := a.Config.Agent.CollectionJitter.Duration.Nanoseconds()
9597
for _, input := range a.Config.Inputs {
9698
if input.Config.Interval != 0 {
9799
continue
@@ -104,9 +106,19 @@ func (a *Agent) gatherParallel(pointChan chan *client.Point) error {
104106

105107
acc := NewAccumulator(input.Config, pointChan)
106108
acc.SetDebug(a.Config.Agent.Debug)
107-
// acc.SetPrefix(input.Name + "_")
108109
acc.SetDefaultTags(a.Config.Tags)
109110

111+
if jitter != 0 {
112+
nanoSleep := rand.Int63n(jitter)
113+
d, err := time.ParseDuration(fmt.Sprintf("%dns", nanoSleep))
114+
if err != nil {
115+
log.Printf("Jittering collection interval failed for plugin %s",
116+
input.Name)
117+
} else {
118+
time.Sleep(d)
119+
}
120+
}
121+
110122
if err := input.Input.Gather(acc); err != nil {
111123
log.Printf("Error in input [%s]: %s", input.Name, err)
112124
}
@@ -143,7 +155,6 @@ func (a *Agent) gatherSeparate(
143155

144156
acc := NewAccumulator(input.Config, pointChan)
145157
acc.SetDebug(a.Config.Agent.Debug)
146-
// acc.SetPrefix(input.Name + "_")
147158
acc.SetDefaultTags(a.Config.Tags)
148159

149160
if err := input.Input.Gather(acc); err != nil {
@@ -315,7 +326,7 @@ func jitterInterval(ininterval, injitter time.Duration) time.Duration {
315326
outinterval := ininterval
316327
if injitter.Nanoseconds() != 0 {
317328
maxjitter := big.NewInt(injitter.Nanoseconds())
318-
if j, err := rand.Int(rand.Reader, maxjitter); err == nil {
329+
if j, err := cryptorand.Int(cryptorand.Reader, maxjitter); err == nil {
319330
jitter = j.Int64()
320331
}
321332
outinterval = time.Duration(jitter + ininterval.Nanoseconds())

internal/config/config.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,22 @@ type AgentConfig struct {
6161
// ie, if Interval=10s then always collect on :00, :10, :20, etc.
6262
RoundInterval bool
6363

64+
// CollectionJitter is used to jitter the collection by a random amount.
65+
// Each plugin will sleep for a random time within jitter before collecting.
66+
// This can be used to avoid many plugins querying things like sysfs at the
67+
// same time, which can have a measurable effect on the system.
68+
CollectionJitter internal.Duration
69+
6470
// Interval at which to flush data
6571
FlushInterval internal.Duration
6672

6773
// FlushRetries is the number of times to retry each data flush
6874
FlushRetries int
6975

70-
// FlushJitter tells
76+
// FlushJitter Jitters the flush interval by a random amount.
77+
// This is primarily to avoid large write spikes for users running a large
78+
// number of telegraf instances.
79+
// ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
7180
FlushJitter internal.Duration
7281

7382
// TODO(cam): Remove UTC and Precision parameters, they are no longer
@@ -271,6 +280,11 @@ var header = `# Telegraf configuration
271280
# Rounds collection interval to 'interval'
272281
# ie, if interval="10s" then always collect on :00, :10, :20, etc.
273282
round_interval = true
283+
# Collection jitter is used to jitter the collection by a random amount.
284+
# Each plugin will sleep for a random time within jitter before collecting.
285+
# This can be used to avoid many plugins querying things like sysfs at the
286+
# same time, which can have a measurable effect on the system.
287+
collection_jitter = "0s"
274288
275289
# Default data flushing interval for all outputs. You should not set this below
276290
# interval. Maximum flush_interval will be flush_interval + flush_jitter

0 commit comments

Comments
 (0)