Skip to content

Commit f4b4467

Browse files
committed
improve validation of leader election values
- take into account partial setting of params and default values - fix md file format - fix linter issues Signed-off-by: Nader Ziada <[email protected]>
1 parent a227029 commit f4b4467

File tree

4 files changed

+117
-20
lines changed

4 files changed

+117
-20
lines changed

docs/operate.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ Optional variables
7272

7373
### Configuring Service Failover
7474

75-
# Configuring the KEDA HTTP Add-on Operator
75+
## Configuring the KEDA HTTP Add-on Operator
7676

7777
## Leader Election Timing
7878

@@ -92,3 +92,23 @@ env:
9292
- name: KEDA_HTTP_OPERATOR_LEADER_ELECTION_RETRY_PERIOD
9393
value: "5s"
9494
```
95+
96+
### Timing Parameter Constraints
97+
98+
**Important:** These parameters have strict ordering requirements to prevent leadership conflicts and unnecessary failover:
99+
100+
```
101+
LeaseDuration > RenewDeadline > RetryPeriod
102+
```
103+
104+
**Why this matters:**
105+
- **LeaseDuration > RenewDeadline**: Ensures the leader finishes renewal attempts before the lease expires, preventing multiple active leaders (split-brain scenarios)
106+
- **RenewDeadline > RetryPeriod**: Allows multiple retry attempts during the renewal window, preventing unnecessary leadership changes due to transient failures
107+
108+
**Configuration Guidelines:**
109+
110+
1. **Configure all three together**: When overriding any parameter, it's recommended to set all three values to avoid invalid combinations with defaults. Setting only one or two parameters can result in invalid configurations when mixed with default values.
111+
112+
2. **All values must be positive**: Each duration must be greater than 0.
113+
114+
3. **Validation failure**: If the operator detects an invalid configuration at startup, it will log an error and exit immediately before attempting leader election.

operator/main_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@ import (
2020
"testing"
2121
"time"
2222

23-
"github.com/kedacore/http-add-on/pkg/util"
2423
"github.com/stretchr/testify/assert"
24+
25+
"github.com/kedacore/http-add-on/pkg/util"
2526
)
2627

2728
func TestLeaderElectionEnvVarsIntegration(t *testing.T) {
@@ -78,7 +79,6 @@ func TestLeaderElectionEnvVarsIntegration(t *testing.T) {
7879

7980
for _, tt := range tests {
8081
t.Run(tt.name, func(t *testing.T) {
81-
8282
for key, value := range tt.envVars {
8383
t.Setenv(key, value)
8484
}

pkg/util/env_resolver.go

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -54,32 +54,55 @@ func ResolveOsEnvDuration(envName string) (*time.Duration, error) {
5454
return nil, nil
5555
}
5656

57-
// ValidateLeaderElectionDurations ensures LeaseDuration > RenewDeadline > RetryPeriod
57+
// Controller-runtime default values for leader election
58+
// Reference: https://github.com/kubernetes-sigs/controller-runtime/blob/main/pkg/manager/manager.go
59+
const (
60+
defaultLeaseDuration = 15 * time.Second
61+
defaultRenewDeadline = 10 * time.Second
62+
defaultRetryPeriod = 2 * time.Second
63+
)
64+
65+
// ValidateLeaderElectionConfig ensures LeaseDuration > RenewDeadline > RetryPeriod
5866
// to prevent multiple active leaders and unnecessary leadership churn.
67+
// This validation checks against the actual runtime values (user-provided or defaults)
68+
// to catch invalid partial configurations.
5969
func ValidateLeaderElectionConfig(leaseDuration, renewDeadline, retryPeriod *time.Duration) error {
60-
if leaseDuration == nil && renewDeadline == nil && retryPeriod == nil {
61-
return nil
70+
// Resolve actual values that will be used at runtime (user-provided or defaults)
71+
lease := defaultLeaseDuration
72+
if leaseDuration != nil {
73+
lease = *leaseDuration
6274
}
6375

64-
// If any are set, validate relationships
65-
if leaseDuration != nil && *leaseDuration <= 0 {
66-
return fmt.Errorf("lease duration must be greater than 0, got %v", *leaseDuration)
76+
renew := defaultRenewDeadline
77+
if renewDeadline != nil {
78+
renew = *renewDeadline
6779
}
68-
if renewDeadline != nil && *renewDeadline <= 0 {
69-
return fmt.Errorf("renew deadline must be greater than 0, got %v", *renewDeadline)
80+
81+
retry := defaultRetryPeriod
82+
if retryPeriod != nil {
83+
retry = *retryPeriod
7084
}
71-
if retryPeriod != nil && *retryPeriod <= 0 {
72-
return fmt.Errorf("retry period must be greater than 0, got %v", *retryPeriod)
85+
86+
// Validate all values are positive
87+
if lease <= 0 {
88+
return fmt.Errorf("lease duration must be greater than 0, got %v", lease)
89+
}
90+
if renew <= 0 {
91+
return fmt.Errorf("renew deadline must be greater than 0, got %v", renew)
92+
}
93+
if retry <= 0 {
94+
return fmt.Errorf("retry period must be greater than 0, got %v", retry)
7395
}
7496

75-
// Validate relationships when multiple values are set
76-
if leaseDuration != nil && renewDeadline != nil && *leaseDuration <= *renewDeadline {
77-
return fmt.Errorf("lease duration (%v) must be greater than renew deadline (%v)",
78-
*leaseDuration, *renewDeadline)
97+
// Validate relationships: LeaseDuration > RenewDeadline > RetryPeriod
98+
if lease <= renew {
99+
return fmt.Errorf("lease duration (%v) must be greater than renew deadline (%v)", lease, renew)
100+
}
101+
if renew <= retry {
102+
return fmt.Errorf("renew deadline (%v) must be greater than retry period (%v)", renew, retry)
79103
}
80-
if renewDeadline != nil && retryPeriod != nil && *renewDeadline <= *retryPeriod {
81-
return fmt.Errorf("renew deadline (%v) must be greater than retry period (%v)",
82-
*renewDeadline, *retryPeriod)
104+
if lease <= retry {
105+
return fmt.Errorf("lease duration (%v) must be greater than retry period (%v)", lease, retry)
83106
}
84107

85108
return nil

pkg/util/env_resolver_test.go

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,60 @@ func TestValidateLeaderElectionDurations(t *testing.T) {
170170
wantErr: true,
171171
errContains: "renew deadline",
172172
},
173+
{
174+
name: "lease duration <= retry period",
175+
leaseDuration: ptr(5 * time.Second),
176+
renewDeadline: ptr(3 * time.Second),
177+
retryPeriod: ptr(5 * time.Second),
178+
wantErr: true,
179+
errContains: "retry period",
180+
},
181+
// Partial configuration tests - validating against defaults
182+
{
183+
name: "partial config: only lease duration set (valid)",
184+
leaseDuration: ptr(20 * time.Second),
185+
renewDeadline: nil, // defaults to 10s
186+
retryPeriod: nil, // defaults to 2s
187+
wantErr: false,
188+
},
189+
{
190+
name: "partial config: only lease duration set (invalid - too low)",
191+
leaseDuration: ptr(5 * time.Second),
192+
renewDeadline: nil, // defaults to 10s
193+
retryPeriod: nil, // defaults to 2s
194+
wantErr: true,
195+
errContains: "lease duration (5s) must be greater than renew deadline (10s)",
196+
},
197+
{
198+
name: "partial config: only retry period set (invalid - too high)",
199+
leaseDuration: nil, // defaults to 15s
200+
renewDeadline: nil, // defaults to 10s
201+
retryPeriod: ptr(12 * time.Second),
202+
wantErr: true,
203+
errContains: "renew deadline (10s) must be greater than retry period (12s)",
204+
},
205+
{
206+
name: "partial config: only retry period set (valid)",
207+
leaseDuration: nil, // defaults to 15s
208+
renewDeadline: nil, // defaults to 10s
209+
retryPeriod: ptr(1 * time.Second),
210+
wantErr: false,
211+
},
212+
{
213+
name: "partial config: lease and renew set (valid)",
214+
leaseDuration: ptr(30 * time.Second),
215+
renewDeadline: ptr(20 * time.Second),
216+
retryPeriod: nil, // defaults to 2s
217+
wantErr: false,
218+
},
219+
{
220+
name: "partial config: lease and renew set (invalid - renew conflicts with default retry)",
221+
leaseDuration: ptr(30 * time.Second),
222+
renewDeadline: ptr(1 * time.Second), // less than default retry (2s)
223+
retryPeriod: nil, // defaults to 2s
224+
wantErr: true,
225+
errContains: "renew deadline (1s) must be greater than retry period (2s)",
226+
},
173227
}
174228

175229
for _, tt := range tests {

0 commit comments

Comments
 (0)