Skip to content

Commit 70e919d

Browse files
vdemeesterclaude
andcommitted
fix: Prevent excessive reconciliation when timeout disabled
- Stop immediate requeue loops when default-timeout-minutes is "0" - Remove redundant hasCondition checks (Knative already deduplicates) This adds an e2e tests that looks at pipeline logs to see how much reconciler loop there is. If you run it before the fix, it would count more than 1500 reconciler loop, whereas with the fix, only about 10. Signed-off-by: Vincent Demeester <vdemeest@redhat.com> Co-Authored-By: Claude <noreply@anthropic.com>
1 parent ad5317e commit 70e919d

File tree

5 files changed

+417
-25
lines changed

5 files changed

+417
-25
lines changed

pkg/reconciler/pipelinerun/pipelinerun.go

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import (
2525
"reflect"
2626
"regexp"
2727
"strings"
28-
"time"
2928

3029
"k8s.io/apimachinery/pkg/util/wait"
3130

@@ -275,22 +274,39 @@ func (c *Reconciler) ReconcileKind(ctx context.Context, pr *v1.PipelineRun) pkgr
275274
}
276275

277276
if pr.Status.StartTime != nil {
278-
// Compute the time since the task started.
277+
// Compute the time since the pipeline started.
279278
elapsed := c.Clock.Since(pr.Status.StartTime.Time)
280279
// Snooze this resource until the appropriate timeout has elapsed.
281-
// but if the timeout has been disabled by setting timeout to 0, we
282-
// do not want to subtract from 0, because a negative wait time will
283-
// result in the requeue happening essentially immediately
284280
timeout := pr.PipelineTimeout(ctx)
285281
taskTimeout := pr.TasksTimeout()
286-
waitTime := timeout - elapsed
282+
283+
// If the main pipeline timeout is NoTimeoutDuration (0), it means no timeout is configured.
284+
// This can happen in two ways:
285+
// 1. User explicitly set pr.Spec.Timeouts.Pipeline to 0 (wants no timeout)
286+
// 2. User didn't set pr.Spec.Timeouts.Pipeline (nil) AND default-timeout-minutes config is "0"
287+
// In these cases, check if there are specific task or finally timeouts to enforce.
288+
// If not, don't requeue - the reconciler will be triggered by watch events.
287289
if timeout == config.NoTimeoutDuration {
288-
waitTime = time.Duration(config.FromContextOrDefaults(ctx).Defaults.DefaultTimeoutMinutes) * time.Minute
290+
// Check if we have a specific task timeout to wait for
291+
if pr.Status.FinallyStartTime == nil && taskTimeout != nil && taskTimeout.Duration != config.NoTimeoutDuration {
292+
waitTime := taskTimeout.Duration - elapsed
293+
return controller.NewRequeueAfter(waitTime)
294+
}
295+
// Check if we have a finally timeout to wait for
296+
if pr.Status.FinallyStartTime != nil && pr.FinallyTimeout() != nil && pr.FinallyTimeout().Duration != config.NoTimeoutDuration {
297+
waitTime := pr.FinallyTimeout().Duration - c.Clock.Since(pr.Status.FinallyStartTime.Time)
298+
return controller.NewRequeueAfter(waitTime)
299+
}
300+
// No timeout configured anywhere, don't requeue
301+
return nil
289302
}
303+
304+
// Pipeline timeout is set, calculate wait time based on the most restrictive timeout
305+
waitTime := timeout - elapsed
290306
if pr.Status.FinallyStartTime == nil && taskTimeout != nil {
291-
waitTime = pr.TasksTimeout().Duration - elapsed
307+
waitTime = taskTimeout.Duration - elapsed
292308
if taskTimeout.Duration == config.NoTimeoutDuration {
293-
waitTime = time.Duration(config.FromContextOrDefaults(ctx).Defaults.DefaultTimeoutMinutes) * time.Minute
309+
waitTime = timeout - elapsed
294310
}
295311
} else if pr.Status.FinallyStartTime != nil && pr.FinallyTimeout() != nil &&
296312
pr.FinallyTimeout().Duration != config.NoTimeoutDuration {

pkg/reconciler/pipelinerun/pipelinerun_test.go

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2599,13 +2599,26 @@ spec:
25992599
c := prt.TestAssets.Controller
26002600
clients := prt.TestAssets.Clients
26012601
reconcileError := c.Reconciler.Reconcile(prt.TestAssets.Ctx, fmt.Sprintf("%s/%s", "foo", tc.prs[0].Name))
2602-
if reconcileError == nil {
2603-
t.Errorf("expected error, but got nil")
2604-
}
2605-
if isRequeueError, requeueDuration := controller.IsRequeueKey(reconcileError); !isRequeueError {
2606-
t.Errorf("Expected requeue error, but got: %s", reconcileError.Error())
2607-
} else if requeueDuration < 0 {
2608-
t.Errorf("Expected a positive requeue duration but got %s", requeueDuration.String())
2602+
2603+
// When timeout is explicitly disabled (set to 0), we should NOT requeue
2604+
// This prevents excessive reconciliation (issue #8495)
2605+
// Check if the pipeline has timeout disabled
2606+
pipelineTimeout := tc.prs[0].PipelineTimeout(prt.TestAssets.Ctx)
2607+
if pipelineTimeout == config.NoTimeoutDuration {
2608+
// Timeout is disabled - should not requeue
2609+
if reconcileError != nil {
2610+
t.Errorf("expected no error when timeout is disabled, but got: %v", reconcileError)
2611+
}
2612+
} else {
2613+
// Timeout is enabled - should requeue
2614+
if reconcileError == nil {
2615+
t.Errorf("expected error when timeout is enabled, but got nil")
2616+
}
2617+
if isRequeueError, requeueDuration := controller.IsRequeueKey(reconcileError); !isRequeueError {
2618+
t.Errorf("Expected requeue error, but got: %v", reconcileError)
2619+
} else if requeueDuration < 0 {
2620+
t.Errorf("Expected a positive requeue duration but got %s", requeueDuration.String())
2621+
}
26092622
}
26102623
prt.Test.Logf("Getting reconciled run")
26112624
reconciledRun, err := clients.Pipeline.TektonV1().PipelineRuns("foo").Get(prt.TestAssets.Ctx, tc.prs[0].Name, metav1.GetOptions{})

pkg/reconciler/taskrun/taskrun.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,10 +237,16 @@ func (c *Reconciler) ReconcileKind(ctx context.Context, tr *v1.TaskRun) pkgrecon
237237
elapsed := c.Clock.Since(tr.Status.StartTime.Time)
238238
// Snooze this resource until the timeout has elapsed.
239239
timeout := tr.GetTimeout(ctx)
240-
waitTime := timeout - elapsed
240+
// If timeout is NoTimeoutDuration (0), it means no timeout is configured.
241+
// This can happen in two ways:
242+
// 1. User explicitly set tr.Spec.Timeout.Duration to 0 (wants no timeout)
243+
// 2. User didn't set tr.Spec.Timeout (nil) AND default-timeout-minutes config is "0"
244+
// In both cases, we should not requeue based on timeout. The reconciler will
245+
// still be triggered appropriately by pod watch events when the TaskRun changes.
241246
if timeout == config.NoTimeoutDuration {
242-
waitTime = time.Duration(config.FromContextOrDefaults(ctx).Defaults.DefaultTimeoutMinutes) * time.Minute
247+
return nil
243248
}
249+
waitTime := timeout - elapsed
244250
return controller.NewRequeueAfter(waitTime)
245251
}
246252
return nil

pkg/reconciler/taskrun/taskrun_test.go

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2932,13 +2932,25 @@ status:
29322932
clients := testAssets.Clients
29332933

29342934
err = c.Reconciler.Reconcile(testAssets.Ctx, getRunName(tc.taskRun))
2935-
if err == nil {
2936-
t.Errorf("expected error when reconciling completed TaskRun : %v", err)
2937-
}
2938-
if isRequeueError, requeueDuration := controller.IsRequeueKey(err); !isRequeueError {
2939-
t.Errorf("Expected requeue error, but got: %s", err.Error())
2940-
} else if requeueDuration < 0 {
2941-
t.Errorf("Expected a positive requeue duration but got %s", requeueDuration.String())
2935+
2936+
// When timeout is explicitly disabled (set to 0), we should NOT requeue
2937+
// This prevents excessive reconciliation (issue #8495)
2938+
taskTimeout := tc.taskRun.GetTimeout(testAssets.Ctx)
2939+
if taskTimeout == config.NoTimeoutDuration {
2940+
// Timeout is disabled - should not requeue
2941+
if err != nil {
2942+
t.Errorf("expected no error when timeout is disabled, but got: %v", err)
2943+
}
2944+
} else {
2945+
// Timeout is enabled - should requeue
2946+
if err == nil {
2947+
t.Errorf("expected error when timeout is enabled, but got nil")
2948+
}
2949+
if isRequeueError, requeueDuration := controller.IsRequeueKey(err); !isRequeueError {
2950+
t.Errorf("Expected requeue error, but got: %v", err)
2951+
} else if requeueDuration < 0 {
2952+
t.Errorf("Expected a positive requeue duration but got %s", requeueDuration.String())
2953+
}
29422954
}
29432955
_, err = clients.Pipeline.TektonV1().TaskRuns(tc.taskRun.Namespace).Get(testAssets.Ctx, tc.taskRun.Name, metav1.GetOptions{})
29442956
if err != nil {

0 commit comments

Comments
 (0)