Skip to content

Commit f023900

Browse files
vdemeesterclaude
authored andcommitted
fix: Prevent excessive reconciliation when timeout disabled
- Stop immediate requeue loops when default-timeout-minutes is "0" - Remove redundant hasCondition checks (Knative already deduplicates) This adds an e2e tests that looks at pipeline logs to see how much reconciler loop there is. If you run it before the fix, it would count more than 1500 reconciler loop, whereas with the fix, only about 10. Signed-off-by: Vincent Demeester <vdemeest@redhat.com> Co-Authored-By: Claude <noreply@anthropic.com>
1 parent fa7d1fe commit f023900

File tree

5 files changed

+412
-35
lines changed

5 files changed

+412
-35
lines changed

pkg/reconciler/pipelinerun/pipelinerun.go

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import (
2525
"reflect"
2626
"regexp"
2727
"strings"
28-
"time"
2928

3029
"github.com/hashicorp/go-multierror"
3130
"github.com/tektoncd/pipeline/pkg/apis/config"
@@ -272,31 +271,50 @@ func (c *Reconciler) ReconcileKind(ctx context.Context, pr *v1.PipelineRun) pkgr
272271
}
273272

274273
if pr.Status.StartTime != nil {
275-
// Compute the time since the task started.
274+
// Compute the time since the pipeline started.
276275
elapsed := c.Clock.Since(pr.Status.StartTime.Time)
277276
// Snooze this resource until the appropriate timeout has elapsed.
278-
// but if the timeout has been disabled by setting timeout to 0, we
279-
// do not want to subtract from 0, because a negative wait time will
280-
// result in the requeue happening essentially immediately
281277
timeout := pr.PipelineTimeout(ctx)
282278
taskTimeout := pr.TasksTimeout()
283-
waitTime := timeout - elapsed
284-
if timeout == config.NoTimeoutDuration {
285-
waitTime = time.Duration(config.FromContextOrDefaults(ctx).Defaults.DefaultTimeoutMinutes) * time.Minute
286-
}
287-
if pr.Status.FinallyStartTime == nil && taskTimeout != nil {
288-
waitTime = pr.TasksTimeout().Duration - elapsed
289-
if taskTimeout.Duration == config.NoTimeoutDuration {
290-
waitTime = time.Duration(config.FromContextOrDefaults(ctx).Defaults.DefaultTimeoutMinutes) * time.Minute
279+
280+
// If the main pipeline timeout is NoTimeoutDuration (0), it means no timeout is configured.
281+
// This can happen in two ways:
282+
// 1. User explicitly set pr.Spec.Timeouts.Pipeline to 0 (wants no timeout)
283+
// 2. User didn't set pr.Spec.Timeouts.Pipeline (nil) AND default-timeout-minutes config is "0"
284+
// In these cases, check if there are specific task or finally timeouts to enforce.
285+
// If not, don't requeue - the reconciler will be triggered by watch events.
286+
// Check which phase we're in and handle timeout accordingly
287+
if pr.Status.FinallyStartTime != nil {
288+
// We're in finally phase - check for finally-specific timeout
289+
if pr.FinallyTimeout() != nil && pr.FinallyTimeout().Duration != config.NoTimeoutDuration {
290+
finallyWaitTime := pr.FinallyTimeout().Duration - c.Clock.Since(pr.Status.FinallyStartTime.Time)
291+
// If pipeline timeout is also set, use the most restrictive timeout
292+
if timeout != config.NoTimeoutDuration {
293+
waitTime := timeout - elapsed
294+
if finallyWaitTime < waitTime {
295+
waitTime = finallyWaitTime
296+
}
297+
return controller.NewRequeueAfter(waitTime)
298+
}
299+
return controller.NewRequeueAfter(finallyWaitTime)
291300
}
292-
} else if pr.Status.FinallyStartTime != nil && pr.FinallyTimeout() != nil &&
293-
pr.FinallyTimeout().Duration != config.NoTimeoutDuration {
294-
finallyWaitTime := pr.FinallyTimeout().Duration - c.Clock.Since(pr.Status.FinallyStartTime.Time)
295-
if finallyWaitTime < waitTime {
296-
waitTime = finallyWaitTime
301+
// No finally timeout, use pipeline timeout if set
302+
if timeout != config.NoTimeoutDuration {
303+
return controller.NewRequeueAfter(timeout - elapsed)
297304
}
305+
return nil
298306
}
299-
return controller.NewRequeueAfter(waitTime)
307+
308+
// We're in tasks phase - check for task-specific timeout
309+
if taskTimeout != nil && taskTimeout.Duration != config.NoTimeoutDuration {
310+
return controller.NewRequeueAfter(taskTimeout.Duration - elapsed)
311+
}
312+
// No task timeout, use pipeline timeout if set
313+
if timeout != config.NoTimeoutDuration {
314+
return controller.NewRequeueAfter(timeout - elapsed)
315+
}
316+
return nil
317+
300318
}
301319
return nil
302320
}

pkg/reconciler/pipelinerun/pipelinerun_test.go

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2644,13 +2644,26 @@ spec:
26442644
c := prt.TestAssets.Controller
26452645
clients := prt.TestAssets.Clients
26462646
reconcileError := c.Reconciler.Reconcile(prt.TestAssets.Ctx, fmt.Sprintf("%s/%s", "foo", tc.prs[0].Name))
2647-
if reconcileError == nil {
2648-
t.Errorf("expected error, but got nil")
2649-
}
2650-
if isRequeueError, requeueDuration := controller.IsRequeueKey(reconcileError); !isRequeueError {
2651-
t.Errorf("Expected requeue error, but got: %s", reconcileError.Error())
2652-
} else if requeueDuration < 0 {
2653-
t.Errorf("Expected a positive requeue duration but got %s", requeueDuration.String())
2647+
2648+
// When timeout is explicitly disabled (set to 0), we should NOT requeue
2649+
// This prevents excessive reconciliation (issue #8495)
2650+
// Check if the pipeline has timeout disabled
2651+
pipelineTimeout := tc.prs[0].PipelineTimeout(prt.TestAssets.Ctx)
2652+
if pipelineTimeout == config.NoTimeoutDuration {
2653+
// Timeout is disabled - should not requeue
2654+
if reconcileError != nil {
2655+
t.Errorf("expected no error when timeout is disabled, but got: %v", reconcileError)
2656+
}
2657+
} else {
2658+
// Timeout is enabled - should requeue
2659+
if reconcileError == nil {
2660+
t.Errorf("expected error when timeout is enabled, but got nil")
2661+
}
2662+
if isRequeueError, requeueDuration := controller.IsRequeueKey(reconcileError); !isRequeueError {
2663+
t.Errorf("Expected requeue error, but got: %v", reconcileError)
2664+
} else if requeueDuration < 0 {
2665+
t.Errorf("Expected a positive requeue duration but got %s", requeueDuration.String())
2666+
}
26542667
}
26552668
prt.Test.Logf("Getting reconciled run")
26562669
reconciledRun, err := clients.Pipeline.TektonV1().PipelineRuns("foo").Get(prt.TestAssets.Ctx, tc.prs[0].Name, metav1.GetOptions{})

pkg/reconciler/taskrun/taskrun.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,10 +231,16 @@ func (c *Reconciler) ReconcileKind(ctx context.Context, tr *v1.TaskRun) pkgrecon
231231
elapsed := c.Clock.Since(tr.Status.StartTime.Time)
232232
// Snooze this resource until the timeout has elapsed.
233233
timeout := tr.GetTimeout(ctx)
234-
waitTime := timeout - elapsed
234+
// If timeout is NoTimeoutDuration (0), it means no timeout is configured.
235+
// This can happen in two ways:
236+
// 1. User explicitly set tr.Spec.Timeout.Duration to 0 (wants no timeout)
237+
// 2. User didn't set tr.Spec.Timeout (nil) AND default-timeout-minutes config is "0"
238+
// In both cases, we should not requeue based on timeout. The reconciler will
239+
// still be triggered appropriately by pod watch events when the TaskRun changes.
235240
if timeout == config.NoTimeoutDuration {
236-
waitTime = time.Duration(config.FromContextOrDefaults(ctx).Defaults.DefaultTimeoutMinutes) * time.Minute
241+
return nil
237242
}
243+
waitTime := timeout - elapsed
238244
return controller.NewRequeueAfter(waitTime)
239245
}
240246
return nil

pkg/reconciler/taskrun/taskrun_test.go

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2929,13 +2929,25 @@ status:
29292929
clients := testAssets.Clients
29302930

29312931
err = c.Reconciler.Reconcile(testAssets.Ctx, getRunName(tc.taskRun))
2932-
if err == nil {
2933-
t.Errorf("expected error when reconciling completed TaskRun : %v", err)
2934-
}
2935-
if isRequeueError, requeueDuration := controller.IsRequeueKey(err); !isRequeueError {
2936-
t.Errorf("Expected requeue error, but got: %s", err.Error())
2937-
} else if requeueDuration < 0 {
2938-
t.Errorf("Expected a positive requeue duration but got %s", requeueDuration.String())
2932+
2933+
// When timeout is explicitly disabled (set to 0), we should NOT requeue
2934+
// This prevents excessive reconciliation (issue #8495)
2935+
taskTimeout := tc.taskRun.GetTimeout(testAssets.Ctx)
2936+
if taskTimeout == config.NoTimeoutDuration {
2937+
// Timeout is disabled - should not requeue
2938+
if err != nil {
2939+
t.Errorf("expected no error when timeout is disabled, but got: %v", err)
2940+
}
2941+
} else {
2942+
// Timeout is enabled - should requeue
2943+
if err == nil {
2944+
t.Errorf("expected error when timeout is enabled, but got nil")
2945+
}
2946+
if isRequeueError, requeueDuration := controller.IsRequeueKey(err); !isRequeueError {
2947+
t.Errorf("Expected requeue error, but got: %v", err)
2948+
} else if requeueDuration < 0 {
2949+
t.Errorf("Expected a positive requeue duration but got %s", requeueDuration.String())
2950+
}
29392951
}
29402952
_, err = clients.Pipeline.TektonV1().TaskRuns(tc.taskRun.Namespace).Get(testAssets.Ctx, tc.taskRun.Name, metav1.GetOptions{})
29412953
if err != nil {

0 commit comments

Comments
 (0)