Skip to content

Commit 1b3c2cd

Browse files
sebito91danielnelson
authored andcommitted
Add timezone support to logparser timestamps (#2882)
1 parent 3e94699 commit 1b3c2cd

File tree

6 files changed

+259
-16
lines changed

6 files changed

+259
-16
lines changed

plugins/inputs/logparser/README.md

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@ regex patterns.
1515
## /var/log/*/*.log -> find all .log files with a parent dir in /var/log
1616
## /var/log/apache.log -> only tail the apache log file
1717
files = ["/var/log/apache/access.log"]
18-
## Read file from beginning.
18+
19+
## Read files that currently exist from the beginning. Files that are created
20+
## while telegraf is running (and that match the "files" globs) will always
21+
## be read from the beginning.
1922
from_beginning = false
2023

2124
## Parse logstash-style "grok" patterns:
@@ -28,13 +31,27 @@ regex patterns.
2831
## %{COMMON_LOG_FORMAT} (plain apache & nginx access logs)
2932
## %{COMBINED_LOG_FORMAT} (access logs + referrer & agent)
3033
patterns = ["%{COMBINED_LOG_FORMAT}"]
34+
3135
## Name of the outputted measurement name.
3236
measurement = "apache_access_log"
37+
3338
## Full path(s) to custom pattern files.
3439
custom_pattern_files = []
40+
3541
## Custom patterns can also be defined here. Put one pattern per line.
3642
custom_patterns = '''
3743
'''
44+
45+
## Timezone allows you to provide an override for timestamps that
46+
## don't already include an offset
47+
## e.g. 04/06/2016 12:41:45 data one two 5.43µs
48+
##
49+
## Default: "" which renders UTC
50+
## Options are as follows:
51+
## 1. Local -- interpret based on machine localtime
52+
## 2. "Canada/Eastern" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
53+
## 3. UTC -- or blank/unspecified, will return timestamp in UTC
54+
timezone = "Canada/Eastern"
3855
```
3956

4057
### Grok Parser
@@ -125,6 +142,13 @@ Wed Apr 12 13:10:34 PST 2017 value=42
125142
'''
126143
```
127144

145+
For cases where the timestamp itself is without offset, the `timezone` config var is available
146+
to denote an offset. By default (with `timezone` either omit, blank or set to `"UTC"`), the times
147+
are processed as if in the UTC timezone. If specified as `timezone = "Local"`, the timestamp
148+
will be processed based on the current machine timezone configuration. Lastly, if using a
149+
timezone from the list of Unix [timezones](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones), the logparser grok will attempt to offset
150+
the timestamp accordingly. See test cases for more detailed examples.
151+
128152
#### TOML Escaping
129153

130154
When saving patterns to the configuration file, keep in mind the different TOML

plugins/inputs/logparser/grok/grok.go

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ var (
5959
patternOnlyRe = regexp.MustCompile(`%{(\w+)}`)
6060
)
6161

62+
// Parser is the primary struct to handle and grok-patterns defined in the config toml
6263
type Parser struct {
6364
Patterns []string
6465
// namedPatterns is a list of internally-assigned names to the patterns
@@ -70,6 +71,16 @@ type Parser struct {
7071
CustomPatternFiles []string
7172
Measurement string
7273

74+
// Timezone is an optional component to help render log dates to
75+
// your chosen zone.
76+
// Default: "" which renders UTC
77+
// Options are as follows:
78+
// 1. Local -- interpret based on machine localtime
79+
// 2. "America/Chicago" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
80+
// 3. UTC -- or blank/unspecified, will return timestamp in UTC
81+
Timezone string
82+
loc *time.Location
83+
7384
// typeMap is a map of patterns -> capture name -> modifier,
7485
// ie, {
7586
// "%{TESTLOG}":
@@ -105,6 +116,7 @@ type Parser struct {
105116
tsModder *tsModder
106117
}
107118

119+
// Compile is a bound method to Parser which will process the options for our parser
108120
func (p *Parser) Compile() error {
109121
p.typeMap = make(map[string]map[string]string)
110122
p.tsMap = make(map[string]map[string]string)
@@ -142,9 +154,9 @@ func (p *Parser) Compile() error {
142154

143155
// Parse any custom pattern files supplied.
144156
for _, filename := range p.CustomPatternFiles {
145-
file, err := os.Open(filename)
146-
if err != nil {
147-
return err
157+
file, fileErr := os.Open(filename)
158+
if fileErr != nil {
159+
return fileErr
148160
}
149161

150162
scanner := bufio.NewScanner(bufio.NewReader(file))
@@ -155,9 +167,16 @@ func (p *Parser) Compile() error {
155167
p.Measurement = "logparser_grok"
156168
}
157169

170+
p.loc, err = time.LoadLocation(p.Timezone)
171+
if err != nil {
172+
log.Printf("W! improper timezone supplied (%s), setting loc to UTC", p.Timezone)
173+
p.loc, _ = time.LoadLocation("UTC")
174+
}
175+
158176
return p.compileCustomPatterns()
159177
}
160178

179+
// ParseLine is the primary function to process individual lines, returning the metrics
161180
func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
162181
var err error
163182
// values are the parsed fields from the log line
@@ -251,7 +270,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
251270
var foundTs bool
252271
// first try timestamp layouts that we've already found
253272
for _, layout := range p.foundTsLayouts {
254-
ts, err := time.Parse(layout, v)
273+
ts, err := time.ParseInLocation(layout, v, p.loc)
255274
if err == nil {
256275
timestamp = ts
257276
foundTs = true
@@ -262,7 +281,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
262281
// layouts.
263282
if !foundTs {
264283
for _, layout := range timeLayouts {
265-
ts, err := time.Parse(layout, v)
284+
ts, err := time.ParseInLocation(layout, v, p.loc)
266285
if err == nil {
267286
timestamp = ts
268287
foundTs = true
@@ -280,7 +299,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
280299
case DROP:
281300
// goodbye!
282301
default:
283-
ts, err := time.Parse(t, v)
302+
ts, err := time.ParseInLocation(t, v, p.loc)
284303
if err == nil {
285304
timestamp = ts
286305
} else {

plugins/inputs/logparser/grok/grok_test.go

Lines changed: 183 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ func Benchmark_ParseLine_CommonLogFormat(b *testing.B) {
1616
p := &Parser{
1717
Patterns: []string{"%{COMMON_LOG_FORMAT}"},
1818
}
19-
p.Compile()
19+
_ = p.Compile()
2020

2121
var m telegraf.Metric
2222
for n := 0; n < b.N; n++ {
@@ -29,7 +29,7 @@ func Benchmark_ParseLine_CombinedLogFormat(b *testing.B) {
2929
p := &Parser{
3030
Patterns: []string{"%{COMBINED_LOG_FORMAT}"},
3131
}
32-
p.Compile()
32+
_ = p.Compile()
3333

3434
var m telegraf.Metric
3535
for n := 0; n < b.N; n++ {
@@ -48,7 +48,7 @@ func Benchmark_ParseLine_CustomPattern(b *testing.B) {
4848
TEST_LOG_A %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME}
4949
`,
5050
}
51-
p.Compile()
51+
_ = p.Compile()
5252

5353
var m telegraf.Metric
5454
for n := 0; n < b.N; n++ {
@@ -707,3 +707,183 @@ func TestShortPatternRegression(t *testing.T) {
707707
},
708708
metric.Fields())
709709
}
710+
711+
func TestTimezoneEmptyCompileFileAndParse(t *testing.T) {
712+
p := &Parser{
713+
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
714+
CustomPatternFiles: []string{"./testdata/test-patterns"},
715+
Timezone: "",
716+
}
717+
assert.NoError(t, p.Compile())
718+
719+
metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`)
720+
require.NotNil(t, metricA)
721+
assert.NoError(t, err)
722+
assert.Equal(t,
723+
map[string]interface{}{
724+
"clientip": "192.168.1.1",
725+
"myfloat": float64(1.25),
726+
"response_time": int64(5432),
727+
"myint": int64(101),
728+
},
729+
metricA.Fields())
730+
assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags())
731+
assert.Equal(t, int64(1465040505000000000), metricA.UnixNano())
732+
733+
metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`)
734+
require.NotNil(t, metricB)
735+
assert.NoError(t, err)
736+
assert.Equal(t,
737+
map[string]interface{}{
738+
"myfloat": 1.25,
739+
"mystring": "mystring",
740+
"nomodifier": "nomodifier",
741+
},
742+
metricB.Fields())
743+
assert.Equal(t, map[string]string{}, metricB.Tags())
744+
assert.Equal(t, int64(1465044105000000000), metricB.UnixNano())
745+
}
746+
747+
func TestTimezoneMalformedCompileFileAndParse(t *testing.T) {
748+
p := &Parser{
749+
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
750+
CustomPatternFiles: []string{"./testdata/test-patterns"},
751+
Timezone: "Something/Weird",
752+
}
753+
assert.NoError(t, p.Compile())
754+
755+
metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`)
756+
require.NotNil(t, metricA)
757+
assert.NoError(t, err)
758+
assert.Equal(t,
759+
map[string]interface{}{
760+
"clientip": "192.168.1.1",
761+
"myfloat": float64(1.25),
762+
"response_time": int64(5432),
763+
"myint": int64(101),
764+
},
765+
metricA.Fields())
766+
assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags())
767+
assert.Equal(t, int64(1465040505000000000), metricA.UnixNano())
768+
769+
metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`)
770+
require.NotNil(t, metricB)
771+
assert.NoError(t, err)
772+
assert.Equal(t,
773+
map[string]interface{}{
774+
"myfloat": 1.25,
775+
"mystring": "mystring",
776+
"nomodifier": "nomodifier",
777+
},
778+
metricB.Fields())
779+
assert.Equal(t, map[string]string{}, metricB.Tags())
780+
assert.Equal(t, int64(1465044105000000000), metricB.UnixNano())
781+
}
782+
783+
func TestTimezoneEuropeCompileFileAndParse(t *testing.T) {
784+
p := &Parser{
785+
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
786+
CustomPatternFiles: []string{"./testdata/test-patterns"},
787+
Timezone: "Europe/Berlin",
788+
}
789+
assert.NoError(t, p.Compile())
790+
791+
metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`)
792+
require.NotNil(t, metricA)
793+
assert.NoError(t, err)
794+
assert.Equal(t,
795+
map[string]interface{}{
796+
"clientip": "192.168.1.1",
797+
"myfloat": float64(1.25),
798+
"response_time": int64(5432),
799+
"myint": int64(101),
800+
},
801+
metricA.Fields())
802+
assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags())
803+
assert.Equal(t, int64(1465040505000000000), metricA.UnixNano())
804+
805+
metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`)
806+
require.NotNil(t, metricB)
807+
assert.NoError(t, err)
808+
assert.Equal(t,
809+
map[string]interface{}{
810+
"myfloat": 1.25,
811+
"mystring": "mystring",
812+
"nomodifier": "nomodifier",
813+
},
814+
metricB.Fields())
815+
assert.Equal(t, map[string]string{}, metricB.Tags())
816+
assert.Equal(t, int64(1465036905000000000), metricB.UnixNano())
817+
}
818+
819+
func TestTimezoneAmericasCompileFileAndParse(t *testing.T) {
820+
p := &Parser{
821+
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
822+
CustomPatternFiles: []string{"./testdata/test-patterns"},
823+
Timezone: "Canada/Eastern",
824+
}
825+
assert.NoError(t, p.Compile())
826+
827+
metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`)
828+
require.NotNil(t, metricA)
829+
assert.NoError(t, err)
830+
assert.Equal(t,
831+
map[string]interface{}{
832+
"clientip": "192.168.1.1",
833+
"myfloat": float64(1.25),
834+
"response_time": int64(5432),
835+
"myint": int64(101),
836+
},
837+
metricA.Fields())
838+
assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags())
839+
assert.Equal(t, int64(1465040505000000000), metricA.UnixNano())
840+
841+
metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`)
842+
require.NotNil(t, metricB)
843+
assert.NoError(t, err)
844+
assert.Equal(t,
845+
map[string]interface{}{
846+
"myfloat": 1.25,
847+
"mystring": "mystring",
848+
"nomodifier": "nomodifier",
849+
},
850+
metricB.Fields())
851+
assert.Equal(t, map[string]string{}, metricB.Tags())
852+
assert.Equal(t, int64(1465058505000000000), metricB.UnixNano())
853+
}
854+
855+
func TestTimezoneLocalCompileFileAndParse(t *testing.T) {
856+
p := &Parser{
857+
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
858+
CustomPatternFiles: []string{"./testdata/test-patterns"},
859+
Timezone: "Local",
860+
}
861+
assert.NoError(t, p.Compile())
862+
863+
metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`)
864+
require.NotNil(t, metricA)
865+
assert.NoError(t, err)
866+
assert.Equal(t,
867+
map[string]interface{}{
868+
"clientip": "192.168.1.1",
869+
"myfloat": float64(1.25),
870+
"response_time": int64(5432),
871+
"myint": int64(101),
872+
},
873+
metricA.Fields())
874+
assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags())
875+
assert.Equal(t, int64(1465040505000000000), metricA.UnixNano())
876+
877+
metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`)
878+
require.NotNil(t, metricB)
879+
assert.NoError(t, err)
880+
assert.Equal(t,
881+
map[string]interface{}{
882+
"myfloat": 1.25,
883+
"mystring": "mystring",
884+
"nomodifier": "nomodifier",
885+
},
886+
metricB.Fields())
887+
assert.Equal(t, map[string]string{}, metricB.Tags())
888+
assert.Equal(t, time.Date(2016, time.June, 4, 12, 41, 45, 0, time.Local).UnixNano(), metricB.UnixNano())
889+
}

plugins/inputs/logparser/grok/influx_patterns.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package grok
22

3-
// THIS SHOULD BE KEPT IN-SYNC WITH patterns/influx-patterns
3+
// DEFAULT_PATTERNS SHOULD BE KEPT IN-SYNC WITH patterns/influx-patterns
44
const DEFAULT_PATTERNS = `
55
# Captures are a slightly modified version of logstash "grok" patterns, with
66
# the format %{<capture syntax>[:<semantic name>][:<modifier>]}

0 commit comments

Comments
 (0)