adding efficient polling to waitForStepsToFinish

pritidesai · pritidesai · commit b6b9bca632ca · 2025-07-21T17:20:13.000-07:00
The current waitForStepsToFinish implementation is a classic busy-wait.
It checks for file existence without any sleep, resulting in a high
CPU usage. Adding a profile with a unit test to show that almost all
time is spent in system calls with a high total sample count. This led to
execssive CPU usage by the sidecar even when just waiting.

The function now sleeps 100ms between checks, drastically reducing the
frequency. The sidecar now uses minimal CPU while waiting.

Signed-off-by: Priti Desai &lt;pdesai@us.ibm.com&gt;
diff --git a/internal/sidecarlogresults/sidecarlogresults.go b/internal/sidecarlogresults/sidecarlogresults.go
@@ -26,6 +26,7 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
+	"time"
 
 	"github.com/tektoncd/pipeline/pkg/apis/config"
 	"github.com/tektoncd/pipeline/pkg/apis/pipeline"
@@ -103,6 +104,7 @@ func waitForStepsToFinish(runDir string) error {
 				return err
 			}
 		}
+		time.Sleep(100 * time.Millisecond)
 	}
 	return nil
 }
diff --git a/internal/sidecarlogresults/sidecarlogresults_test.go b/internal/sidecarlogresults/sidecarlogresults_test.go
@@ -23,9 +23,11 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
+	"runtime/pprof"
 	"sort"
 	"strings"
 	"testing"
+	"time"
 
 	"github.com/google/go-cmp/cmp"
 	v1 "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1"
@@ -608,6 +610,61 @@ func TestExtractStepAndResultFromSidecarResultName_Error(t *testing.T) {
 	}
 }
 
+// TestWaitForStepsToFinish_Profile ensures that waitForStepsToFinish correctly waits for all step output files to appear before returning
+// The test creates a file called cpu.prof and starts Go's CPU profiler
+// A temporary directory is created to simulate the Tekton step run directory.
+// The test creates a large number of subdirectories e.g. step0, step1, ..., each representing a step in a TaskRun
+// A goroutine is started that, one by one, writes an out file in each step directory, with a small delay between each
+// The test calls the function and waits for it to complete and the profile is saved for later analysis
+// This is helpful to compare the impact of code changes, provides a reproducible way to profile and optimize the function waitForStepsToFinish
+func TestWaitForStepsToFinish_Profile(t *testing.T) {
+	if os.Getenv("ENABLE_PROFILING_TESTS") != "true" {
+		t.Skip("Profiling tests are disabled")
+	}
+	f, err := os.Create("cpu.prof")
+	if err != nil {
+		t.Fatalf("could not create CPU profile: %v", err)
+	}
+	defer func(f *os.File) {
+		err := f.Close()
+		if err != nil {
+			return
+		}
+	}(f)
+	err = pprof.StartCPUProfile(f)
+	if err != nil {
+		return
+	}
+	defer pprof.StopCPUProfile()
+
+	// Setup: create a temp runDir with many fake step files
+	runDir := t.TempDir()
+	stepCount := 100
+	for i := range stepCount {
+		dir := filepath.Join(runDir, fmt.Sprintf("step%d", i))
+		err := os.MkdirAll(dir, 0755)
+		if err != nil {
+			return
+		}
+	}
+
+	// Simulate steps finishing one by one with a delay
+	go func() {
+		for i := range stepCount {
+			file := filepath.Join(runDir, fmt.Sprintf("step%d", i), "out")
+			err := os.WriteFile(file, []byte("done"), 0644)
+			if err != nil {
+				return
+			}
+			time.Sleep(10 * time.Millisecond)
+		}
+	}()
+
+	if err := waitForStepsToFinish(runDir); err != nil {
+		t.Fatalf("waitForStepsToFinish failed: %v", err)
+	}
+}
+
 func TestLookForArtifacts(t *testing.T) {
 	base := basicArtifacts()
 	modified := base.DeepCopy()

Original file line number	Diff line number	Diff line change
`@@ -26,6 +26,7 @@ import (`
`26`	`26`	`"os"`
`27`	`27`	`"path/filepath"`
`28`	`28`	`"strings"`
	`29`	`+ "time"`
`29`	`30`
`30`	`31`	`"github.com/tektoncd/pipeline/pkg/apis/config"`
`31`	`32`	`"github.com/tektoncd/pipeline/pkg/apis/pipeline"`
`@@ -103,6 +104,7 @@ func waitForStepsToFinish(runDir string) error {`
`103`	`104`	`return err`
`104`	`105`	`}`
`105`	`106`	`}`
	`107`	`+ time.Sleep(100 * time.Millisecond)`
`106`	`108`	`}`
`107`	`109`	`return nil`
`108`	`110`	`}`