Skip to content

Commit 20795bf

Browse files
committed
pkg/rules: consider group name and file for deduplication
Currently only the group name is considered for group deduplication. However prometheus uses also the group file according to [1]. This fixes it. [1] https://github.com/prometheus/prometheus/blob/ce838ad6fcbd14b0cf9971a4d093ad672e1469fe/rules/manager.go#L1047-L1055 Signed-off-by: Sergiusz Urbaniak <sergiusz.urbaniak@gmail.com>
1 parent d6305f5 commit 20795bf

File tree

5 files changed

+153
-6
lines changed

5 files changed

+153
-6
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ We use *breaking* word for marking changes that are not backward compatible (rel
2828
- [#2957](https://github.com/thanos-io/thanos/pull/2957) Rule: now sets all of the relevant fields properly; avoids a panic when `/api/v1/rules` is called and the time zone is _not_ UTC; `rules` field is an empty array now if no rules have been defined in a rule group.
2929
- [#2976](https://github.com/thanos-io/thanos/pull/2976) Query: Better rounding for incoming query timestamps.
3030
- [#2929](https://github.com/thanos-io/thanos/pull/2929) Mixin: Fix expression for 'unhealthy sidecar' alert and also increase the timeout for 10 minutes.
31+
- [#3024](https://github.com/thanos-io/thanos/pull/3024) Query: consider group name and file for deduplication
3132

3233
### Added
3334

docs/proposals/202003_thanos_rules_federation.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ Thanos Querier fans-out to all know Rules endpoints configured via `--rule` comm
6161

6262
Generally the deduplication logic is less complex than with time series, specifically:
6363

64-
* Deduplication happens first at the rule group level. The identifier is the group name.
64+
* Deduplication happens first at the rule group level. The identifier is the group name and the group file.
6565
* Then, per group name deduplication happens on the rule level, where:
6666

6767
1. the rule type (recording rule vs. alerting rule)
@@ -171,6 +171,26 @@ Given the following stream of incoming alerting rules will also result in two in
171171
severity: critical
172172
```
173173

174+
Scenario 4:
175+
176+
As specified, the group name and file fields are used for deduplication.
177+
178+
Given the following stream of incoming rule groups:
179+
```text
180+
group: a/file1
181+
group: b/file1
182+
group: a/file2
183+
```
184+
185+
The output becomes:
186+
```text
187+
group: a/file1
188+
group: a/file2
189+
group: b/file1
190+
```
191+
192+
Deduplication of included alerting/recording rules inside groups is described in the previous scenarios.
193+
174194
## Alternatives
175195

176196
* Cortex contains a sharded Ruler. Assigning rules to shards is done via Consul, though a gossip implementation is under development. Shards do not communicate with other shards. Rules come from a store (e.g. a Postgres database).

pkg/rules/rules.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,11 +130,11 @@ func dedupGroups(groups []*rulespb.RuleGroup) []*rulespb.RuleGroup {
130130
}
131131

132132
// Sort groups such that they appear next to each other.
133-
sort.Slice(groups, func(i, j int) bool { return groups[i].Name < groups[j].Name })
133+
sort.Slice(groups, func(i, j int) bool { return groups[i].Compare(groups[j]) < 0 })
134134

135135
i := 0
136136
for _, g := range groups[1:] {
137-
if g.Name == groups[i].Name {
137+
if g.Compare(groups[i]) == 0 {
138138
groups[i].Rules = append(groups[i].Rules, g.Rules...)
139139
} else {
140140
i++

pkg/rules/rules_test.go

Lines changed: 110 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package rules
55

66
import (
77
"context"
8+
"path"
89
"path/filepath"
910
"testing"
1011
"time"
@@ -53,7 +54,6 @@ func testRulesAgainstExamples(t *testing.T, dir string, server rulespb.RulesServ
5354
File: filepath.Join(dir, "alerts.yaml"),
5455
Rules: []*rulespb.Rule{
5556
someAlert, someAlert, someAlert, someAlert, someAlert, someAlert, someAlert,
56-
someRecording, someRecording, someRecording, someRecording, someRecording,
5757
},
5858
Interval: 60,
5959
PartialResponseStrategy: storepb.PartialResponseStrategy_ABORT,
@@ -63,7 +63,6 @@ func testRulesAgainstExamples(t *testing.T, dir string, server rulespb.RulesServ
6363
File: filepath.Join(dir, "alerts.yaml"),
6464
Rules: []*rulespb.Rule{
6565
someAlert, someAlert, someAlert, someAlert, someAlert, someAlert, someAlert,
66-
someRecording, someRecording, someRecording, someRecording, someRecording, someRecording, someRecording,
6766
},
6867
Interval: 60,
6968
PartialResponseStrategy: storepb.PartialResponseStrategy_ABORT,
@@ -87,6 +86,39 @@ func testRulesAgainstExamples(t *testing.T, dir string, server rulespb.RulesServ
8786
File: filepath.Join(dir, "alerts.yaml"),
8887
Rules: []*rulespb.Rule{
8988
someAlert, someAlert, someAlert, someAlert,
89+
},
90+
Interval: 60,
91+
PartialResponseStrategy: storepb.PartialResponseStrategy_ABORT,
92+
},
93+
{
94+
Name: "thanos-bucket-replicate.rules",
95+
File: filepath.Join(dir, "rules.yaml"),
96+
Rules: nil,
97+
Interval: 60,
98+
PartialResponseStrategy: storepb.PartialResponseStrategy_ABORT,
99+
},
100+
{
101+
Name: "thanos-query.rules",
102+
File: filepath.Join(dir, "rules.yaml"),
103+
Rules: []*rulespb.Rule{
104+
someRecording, someRecording, someRecording, someRecording, someRecording,
105+
},
106+
Interval: 60,
107+
PartialResponseStrategy: storepb.PartialResponseStrategy_ABORT,
108+
},
109+
{
110+
Name: "thanos-receive.rules",
111+
File: filepath.Join(dir, "rules.yaml"),
112+
Rules: []*rulespb.Rule{
113+
someRecording, someRecording, someRecording, someRecording, someRecording, someRecording, someRecording,
114+
},
115+
Interval: 60,
116+
PartialResponseStrategy: storepb.PartialResponseStrategy_ABORT,
117+
},
118+
{
119+
Name: "thanos-store.rules",
120+
File: filepath.Join(dir, "rules.yaml"),
121+
Rules: []*rulespb.Rule{
90122
someRecording, someRecording, someRecording, someRecording,
91123
},
92124
Interval: 60,
@@ -165,7 +197,9 @@ func testRulesAgainstExamples(t *testing.T, dir string, server rulespb.RulesServ
165197
got[i].EvaluationDurationSeconds = 0
166198
got[i].LastEvaluation = time.Time{}
167199

168-
testutil.Equals(t, expectedForType[i], got[i])
200+
t.Run(got[i].Name+" "+path.Base(got[i].File), func(t *testing.T) {
201+
testutil.Equals(t, expectedForType[i], got[i])
202+
})
169203
}
170204
testutil.Equals(t, expectedForType, got)
171205
})
@@ -742,6 +776,79 @@ func TestDedupGroups(t *testing.T) {
742776
},
743777
},
744778
},
779+
{
780+
name: "distinct file names",
781+
groups: []*rulespb.RuleGroup{
782+
{
783+
Name: "a",
784+
File: "foo.yaml",
785+
Rules: []*rulespb.Rule{
786+
rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a1"}),
787+
rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a2"}),
788+
},
789+
},
790+
{
791+
Name: "a",
792+
Rules: []*rulespb.Rule{
793+
rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a1"}),
794+
rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a2"}),
795+
},
796+
},
797+
{
798+
Name: "b",
799+
Rules: []*rulespb.Rule{
800+
rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "b1"}),
801+
rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "b2"}),
802+
},
803+
},
804+
{
805+
Name: "c",
806+
},
807+
{
808+
Name: "a",
809+
File: "bar.yaml",
810+
Rules: []*rulespb.Rule{
811+
rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a1"}),
812+
rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a2"}),
813+
},
814+
},
815+
},
816+
want: []*rulespb.RuleGroup{
817+
{
818+
Name: "a",
819+
Rules: []*rulespb.Rule{
820+
rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a1"}),
821+
rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a2"}),
822+
},
823+
},
824+
{
825+
Name: "b",
826+
Rules: []*rulespb.Rule{
827+
rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "b1"}),
828+
rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "b2"}),
829+
},
830+
},
831+
{
832+
Name: "c",
833+
},
834+
{
835+
Name: "a",
836+
File: "bar.yaml",
837+
Rules: []*rulespb.Rule{
838+
rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a1"}),
839+
rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a2"}),
840+
},
841+
},
842+
{
843+
Name: "a",
844+
File: "foo.yaml",
845+
Rules: []*rulespb.Rule{
846+
rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a1"}),
847+
rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a2"}),
848+
},
849+
},
850+
},
851+
},
745852
} {
746853
t.Run(tc.name, func(t *testing.T) {
747854
t.Run(tc.name, func(t *testing.T) {

pkg/rules/rulespb/custom.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,25 @@ func (r *RuleGroups) MarshalJSON() ([]byte, error) {
186186
return json.Marshal((*plain)(r))
187187
}
188188

189+
// Compare compares rule group x and y and returns:
190+
//
191+
// < 0 if x < y if rule group r1 is not equal and lexically before rule group r2
192+
// 0 if x == y if rule group r1 is logically equal to r2 (r1 and r2 are the "same" rule groups)
193+
// > 0 if x > y if rule group r1 is not equal and lexically after rule group r2
194+
func (r1 *RuleGroup) Compare(r2 *RuleGroup) int {
195+
return strings.Compare(r1.Key(), r2.Key())
196+
}
197+
198+
// Key returns the group key similar resembling Prometheus logic.
199+
// See https://github.com/prometheus/prometheus/blob/869f1bc587e667b79721852d5badd9f70a39fc3f/rules/manager.go#L1062-L1065
200+
func (r *RuleGroup) Key() string {
201+
if r == nil {
202+
return ""
203+
}
204+
205+
return r.File + ";" + r.Name
206+
}
207+
189208
func (m *Rule) UnmarshalJSON(entry []byte) error {
190209
decider := struct {
191210
Type string `json:"type"`

0 commit comments

Comments
 (0)