Skip to content

Commit e1bda58

Browse files
committed
adding efficient polling to waitForStepsToFinish
The current waitForStepsToFinish implementation is a classic busy-wait. It checks for file existence without any sleep, resulting in a high CPU usage. Adding a profile with a unit test to show that almost all time is spent in system calls with a high total sample count. This led to execssive CPU usage by the sidecar even when just waiting. The function now sleeps 100ms between checks, drastically reducing the frequency. The sidecar now uses minimal CPU while waiting. Signed-off-by: Priti Desai <pdesai@us.ibm.com>
1 parent d504890 commit e1bda58

File tree

9 files changed

+207
-5
lines changed

9 files changed

+207
-5
lines changed

config/config-defaults.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,3 +149,11 @@ data:
149149
# limits:
150150
# memory: "256Mi"
151151
# cpu: "500m"
152+
153+
# default-sidecar-log-polling-interval specifies the polling interval for the Tekton sidecar log results container.
154+
# This controls how frequently the sidecar checks for step completion files written by steps in a TaskRun.
155+
# Lower values (e.g., "10ms") make the sidecar more responsive but may increase CPU usage; higher values (e.g., "1s")
156+
# reduce resource usage but may delay result collection.
157+
# This value is used by the sidecar-tekton-log-results container and can be tuned for performance or test scenarios.
158+
# Example values: "100ms", "500ms", "1s"
159+
default-sidecar-log-polling-interval: "100ms"

docs/additional-configs.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ The example below customizes the following:
243243
- the default maximum combinations of `Parameters` in a `Matrix` that can be used to fan out a `PipelineTask`. For
244244
more information, see [`Matrix`](matrix.md).
245245
- the default resolver type to `git`.
246+
- the default polling interval for the sidecar log results container via `default-sidecar-log-polling-interval`.
246247

247248
```yaml
248249
apiVersion: v1
@@ -260,8 +261,26 @@ data:
260261
emptyDir: {}
261262
default-max-matrix-combinations-count: "1024"
262263
default-resolver-type: "git"
264+
default-sidecar-log-polling-interval: "100ms"
263265
```
264266

267+
### `default-sidecar-log-polling-interval`
268+
269+
The `default-sidecar-log-polling-interval` key in the `config-defaults` ConfigMap specifies how frequently the Tekton
270+
sidecar log results container polls for step completion files written by steps in a TaskRun. Lower values (e.g., `10ms`)
271+
make the sidecar more responsive but may increase CPU usage; higher values (e.g., `1s`) reduce resource usage but may
272+
delay result collection. This value is used by the `sidecar-tekton-log-results` container and can be tuned for performance
273+
or test scenarios.
274+
275+
**Example values:**
276+
- `100ms` (default)
277+
- `500ms`
278+
- `1s`
279+
- `10ms` (for fast polling in tests)
280+
281+
**Note:** The `default-sidecar-log-polling-interval` setting is only applicable when results are created using the
282+
[sidecar approach](#enabling-larger-results-using-sidecar-logs).
283+
265284
**Note:** The `_example` key in the provided [config-defaults.yaml](./../config/config-defaults.yaml)
266285
file lists the keys you can customize along with their default values.
267286

internal/sidecarlogresults/sidecarlogresults.go

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"os"
2727
"path/filepath"
2828
"strings"
29+
"time"
2930

3031
"github.com/tektoncd/pipeline/pkg/apis/config"
3132
"github.com/tektoncd/pipeline/pkg/apis/pipeline"
@@ -74,7 +75,7 @@ func encode(w io.Writer, v any) error {
7475
return json.NewEncoder(w).Encode(v)
7576
}
7677

77-
func waitForStepsToFinish(runDir string) error {
78+
func waitForStepsToFinish(runDir string, sleepInterval time.Duration) error {
7879
steps := make(map[string]bool)
7980
files, err := os.ReadDir(runDir)
8081
if err != nil {
@@ -103,6 +104,9 @@ func waitForStepsToFinish(runDir string) error {
103104
return err
104105
}
105106
}
107+
if sleepInterval > 0 {
108+
time.Sleep(sleepInterval)
109+
}
106110
}
107111
return nil
108112
}
@@ -143,7 +147,15 @@ func readResults(resultsDir, resultFile, stepName string, resultType SidecarLogR
143147
// in their results path and prints them in a structured way to its
144148
// stdout so that the reconciler can parse those logs.
145149
func LookForResults(w io.Writer, runDir string, resultsDir string, resultNames []string, stepResultsDir string, stepResults map[string][]string) error {
146-
if err := waitForStepsToFinish(runDir); err != nil {
150+
intervalStr := os.Getenv("SIDECAR_LOG_POLLING_INTERVAL")
151+
if intervalStr == "" {
152+
intervalStr = "100ms"
153+
}
154+
interval, err := time.ParseDuration(intervalStr)
155+
if err != nil {
156+
interval = 100 * time.Millisecond
157+
}
158+
if err := waitForStepsToFinish(runDir, interval); err != nil {
147159
return fmt.Errorf("error while waiting for the steps to finish %w", err)
148160
}
149161
results := make(chan SidecarLogResult)
@@ -205,7 +217,15 @@ func LookForResults(w io.Writer, runDir string, resultsDir string, resultNames [
205217
// If the provenance file exists, the function extracts artifact information, formats it into a
206218
// JSON string, and encodes it for output alongside relevant metadata (step name, artifact type).
207219
func LookForArtifacts(w io.Writer, names []string, runDir string) error {
208-
if err := waitForStepsToFinish(runDir); err != nil {
220+
intervalStr := os.Getenv("SIDECAR_LOG_POLLING_INTERVAL")
221+
if intervalStr == "" {
222+
intervalStr = "100ms"
223+
}
224+
interval, err := time.ParseDuration(intervalStr)
225+
if err != nil {
226+
interval = 100 * time.Millisecond
227+
}
228+
if err := waitForStepsToFinish(runDir, interval); err != nil {
209229
return err
210230
}
211231

internal/sidecarlogresults/sidecarlogresults_test.go

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,11 @@ import (
2323
"fmt"
2424
"os"
2525
"path/filepath"
26+
"runtime/pprof"
2627
"sort"
2728
"strings"
2829
"testing"
30+
"time"
2931

3032
"github.com/google/go-cmp/cmp"
3133
v1 "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1"
@@ -608,6 +610,66 @@ func TestExtractStepAndResultFromSidecarResultName_Error(t *testing.T) {
608610
}
609611
}
610612

613+
// TestWaitForStepsToFinish_Profile ensures that waitForStepsToFinish correctly waits for all step output files to appear before returning
614+
// The test creates a file called cpu.prof and starts Go's CPU profiler
615+
// A temporary directory is created to simulate the Tekton step run directory.
616+
// The test creates a large number of subdirectories e.g. step0, step1, ..., each representing a step in a TaskRun
617+
// A goroutine is started that, one by one, writes an out file in each step directory, with a small delay between each
618+
// The test calls the function and waits for it to complete and the profile is saved for later analysis
619+
// This is helpful to compare the impact of code changes, provides a reproducible way to profile and optimize the function waitForStepsToFinish
620+
func TestWaitForStepsToFinish_Profile(t *testing.T) {
621+
f, err := os.Create("cpu.prof")
622+
if err != nil {
623+
t.Fatalf("could not create CPU profile: %v", err)
624+
}
625+
defer func(f *os.File) {
626+
err := f.Close()
627+
if err != nil {
628+
return
629+
}
630+
}(f)
631+
err = pprof.StartCPUProfile(f)
632+
if err != nil {
633+
return
634+
}
635+
defer pprof.StopCPUProfile()
636+
637+
// Setup: create a temp runDir with many fake step files
638+
runDir := t.TempDir()
639+
stepCount := 100
640+
for i := range stepCount {
641+
dir := filepath.Join(runDir, fmt.Sprintf("step%d", i))
642+
err := os.MkdirAll(dir, 0755)
643+
if err != nil {
644+
return
645+
}
646+
}
647+
648+
// Simulate steps finishing one by one with a delay
649+
go func() {
650+
for i := range stepCount {
651+
file := filepath.Join(runDir, fmt.Sprintf("step%d", i), "out")
652+
err := os.WriteFile(file, []byte("done"), 0644)
653+
if err != nil {
654+
return
655+
}
656+
time.Sleep(10 * time.Millisecond)
657+
}
658+
}()
659+
660+
intervalStr := os.Getenv("SIDECAR_LOG_POLLING_INTERVAL")
661+
if intervalStr == "" {
662+
intervalStr = "100ms"
663+
}
664+
interval, err := time.ParseDuration(intervalStr)
665+
if err != nil {
666+
interval = 100 * time.Millisecond
667+
}
668+
if err := waitForStepsToFinish(runDir, interval); err != nil {
669+
t.Fatalf("waitForStepsToFinish failed: %v", err)
670+
}
671+
}
672+
611673
func TestLookForArtifacts(t *testing.T) {
612674
base := basicArtifacts()
613675
modified := base.DeepCopy()

pkg/apis/config/default.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ const (
5454
// Default maximum resolution timeout used by the resolution controller before timing out when exceeded
5555
DefaultMaximumResolutionTimeout = 1 * time.Minute
5656

57+
DefaultSidecarLogPollingInterval = 100 * time.Millisecond
58+
5759
defaultTimeoutMinutesKey = "default-timeout-minutes"
5860
defaultServiceAccountKey = "default-service-account"
5961
defaultManagedByLabelValueKey = "default-managed-by-label-value"
@@ -67,6 +69,7 @@ const (
6769
defaultContainerResourceRequirementsKey = "default-container-resource-requirements"
6870
defaultImagePullBackOffTimeout = "default-imagepullbackoff-timeout"
6971
defaultMaximumResolutionTimeout = "default-maximum-resolution-timeout"
72+
defaultSidecarLogPollingIntervalKey = "default-sidecar-log-polling-interval"
7073
)
7174

7275
// DefaultConfig holds all the default configurations for the config.
@@ -88,6 +91,10 @@ type Defaults struct {
8891
DefaultContainerResourceRequirements map[string]corev1.ResourceRequirements
8992
DefaultImagePullBackOffTimeout time.Duration
9093
DefaultMaximumResolutionTimeout time.Duration
94+
// DefaultSidecarLogPollingInterval specifies how frequently (as a time.Duration) the Tekton sidecar log results container polls for step completion files.
95+
// This value is loaded from the 'sidecar-log-polling-interval' key in the config-defaults ConfigMap.
96+
// It is used to control the responsiveness and resource usage of the sidecar in both production and test environments.
97+
DefaultSidecarLogPollingInterval time.Duration
9198
}
9299

93100
// GetDefaultsConfigName returns the name of the configmap containing all
@@ -120,6 +127,7 @@ func (cfg *Defaults) Equals(other *Defaults) bool {
120127
other.DefaultResolverType == cfg.DefaultResolverType &&
121128
other.DefaultImagePullBackOffTimeout == cfg.DefaultImagePullBackOffTimeout &&
122129
other.DefaultMaximumResolutionTimeout == cfg.DefaultMaximumResolutionTimeout &&
130+
other.DefaultSidecarLogPollingInterval == cfg.DefaultSidecarLogPollingInterval &&
123131
reflect.DeepEqual(other.DefaultForbiddenEnv, cfg.DefaultForbiddenEnv)
124132
}
125133

@@ -134,6 +142,7 @@ func NewDefaultsFromMap(cfgMap map[string]string) (*Defaults, error) {
134142
DefaultResolverType: DefaultResolverTypeValue,
135143
DefaultImagePullBackOffTimeout: DefaultImagePullBackOffTimeout,
136144
DefaultMaximumResolutionTimeout: DefaultMaximumResolutionTimeout,
145+
DefaultSidecarLogPollingInterval: DefaultSidecarLogPollingInterval,
137146
}
138147

139148
if defaultTimeoutMin, ok := cfgMap[defaultTimeoutMinutesKey]; ok {
@@ -220,6 +229,14 @@ func NewDefaultsFromMap(cfgMap map[string]string) (*Defaults, error) {
220229
tc.DefaultMaximumResolutionTimeout = timeout
221230
}
222231

232+
if defaultSidecarPollingInterval, ok := cfgMap[defaultSidecarLogPollingIntervalKey]; ok {
233+
interval, err := time.ParseDuration(defaultSidecarPollingInterval)
234+
if err != nil {
235+
return nil, fmt.Errorf("failed parsing default config %q", defaultSidecarPollingInterval)
236+
}
237+
tc.DefaultSidecarLogPollingInterval = interval
238+
}
239+
223240
return &tc, nil
224241
}
225242

pkg/apis/config/default_test.go

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ func TestNewDefaultsFromConfigMap(t *testing.T) {
4646
DefaultResolverType: "git",
4747
DefaultImagePullBackOffTimeout: time.Duration(5) * time.Second,
4848
DefaultMaximumResolutionTimeout: 1 * time.Minute,
49+
DefaultSidecarLogPollingInterval: 100 * time.Millisecond,
4950
},
5051
fileName: config.GetDefaultsConfigName(),
5152
},
@@ -67,6 +68,7 @@ func TestNewDefaultsFromConfigMap(t *testing.T) {
6768
DefaultMaxMatrixCombinationsCount: 256,
6869
DefaultImagePullBackOffTimeout: 0,
6970
DefaultMaximumResolutionTimeout: 1 * time.Minute,
71+
DefaultSidecarLogPollingInterval: 100 * time.Millisecond,
7072
},
7173
fileName: "config-defaults-with-pod-template",
7274
},
@@ -91,6 +93,7 @@ func TestNewDefaultsFromConfigMap(t *testing.T) {
9193
DefaultMaxMatrixCombinationsCount: 256,
9294
DefaultImagePullBackOffTimeout: 0,
9395
DefaultMaximumResolutionTimeout: 1 * time.Minute,
96+
DefaultSidecarLogPollingInterval: 100 * time.Millisecond,
9497
},
9598
},
9699
{
@@ -104,6 +107,7 @@ func TestNewDefaultsFromConfigMap(t *testing.T) {
104107
DefaultMaxMatrixCombinationsCount: 256,
105108
DefaultImagePullBackOffTimeout: 0,
106109
DefaultMaximumResolutionTimeout: 1 * time.Minute,
110+
DefaultSidecarLogPollingInterval: 100 * time.Millisecond,
107111
},
108112
},
109113
{
@@ -120,6 +124,7 @@ func TestNewDefaultsFromConfigMap(t *testing.T) {
120124
DefaultManagedByLabelValue: config.DefaultManagedByLabelValue,
121125
DefaultImagePullBackOffTimeout: 0,
122126
DefaultMaximumResolutionTimeout: 1 * time.Minute,
127+
DefaultSidecarLogPollingInterval: 100 * time.Millisecond,
123128
},
124129
},
125130
{
@@ -133,6 +138,7 @@ func TestNewDefaultsFromConfigMap(t *testing.T) {
133138
DefaultForbiddenEnv: []string{"TEKTON_POWER_MODE", "TEST_ENV", "TEST_TEKTON"},
134139
DefaultImagePullBackOffTimeout: time.Duration(15) * time.Second,
135140
DefaultMaximumResolutionTimeout: 1 * time.Minute,
141+
DefaultSidecarLogPollingInterval: 100 * time.Millisecond,
136142
},
137143
},
138144
{
@@ -146,6 +152,7 @@ func TestNewDefaultsFromConfigMap(t *testing.T) {
146152
DefaultContainerResourceRequirements: map[string]corev1.ResourceRequirements{},
147153
DefaultImagePullBackOffTimeout: 0,
148154
DefaultMaximumResolutionTimeout: 1 * time.Minute,
155+
DefaultSidecarLogPollingInterval: 100 * time.Millisecond,
149156
},
150157
},
151158
{
@@ -162,6 +169,7 @@ func TestNewDefaultsFromConfigMap(t *testing.T) {
162169
DefaultMaxMatrixCombinationsCount: 256,
163170
DefaultImagePullBackOffTimeout: 0,
164171
DefaultMaximumResolutionTimeout: 1 * time.Minute,
172+
DefaultSidecarLogPollingInterval: 100 * time.Millisecond,
165173
DefaultContainerResourceRequirements: map[string]corev1.ResourceRequirements{
166174
config.ResourceRequirementDefaultContainerKey: {
167175
Requests: corev1.ResourceList{
@@ -219,6 +227,7 @@ func TestNewDefaultsFromEmptyConfigMap(t *testing.T) {
219227
DefaultMaxMatrixCombinationsCount: 256,
220228
DefaultImagePullBackOffTimeout: 0,
221229
DefaultMaximumResolutionTimeout: 1 * time.Minute,
230+
DefaultSidecarLogPollingInterval: 100 * time.Millisecond,
222231
}
223232
verifyConfigFileWithExpectedConfig(t, DefaultsConfigEmptyName, expectedConfig)
224233
}
@@ -417,6 +426,51 @@ func TestEquals(t *testing.T) {
417426
}
418427
}
419428

429+
func TestSidecarLogPollingIntervalParsing(t *testing.T) {
430+
cases := []struct {
431+
name string
432+
data map[string]string
433+
expected time.Duration
434+
wantErr bool
435+
}{
436+
{
437+
name: "valid interval",
438+
data: map[string]string{"default-sidecar-log-polling-interval": "42ms"},
439+
expected: 42 * time.Millisecond,
440+
wantErr: false,
441+
},
442+
{
443+
name: "invalid interval",
444+
data: map[string]string{"default-sidecar-log-polling-interval": "notaduration"},
445+
expected: 0,
446+
wantErr: true,
447+
},
448+
{
449+
name: "not set (default)",
450+
data: map[string]string{},
451+
expected: 100 * time.Millisecond,
452+
wantErr: false,
453+
},
454+
}
455+
for _, tc := range cases {
456+
t.Run(tc.name, func(t *testing.T) {
457+
cfg, err := config.NewDefaultsFromMap(tc.data)
458+
if tc.wantErr {
459+
if err == nil {
460+
t.Errorf("expected error, got nil")
461+
}
462+
return
463+
}
464+
if err != nil {
465+
t.Fatalf("unexpected error: %v", err)
466+
}
467+
if cfg.DefaultSidecarLogPollingInterval != tc.expected {
468+
t.Errorf("got %v, want %v", cfg.DefaultSidecarLogPollingInterval, tc.expected)
469+
}
470+
})
471+
}
472+
}
473+
420474
func verifyConfigFileWithExpectedConfig(t *testing.T, fileName string, expectedConfig *config.Defaults) {
421475
t.Helper()
422476
cm := test.ConfigMapFromTestFile(t, fileName)

0 commit comments

Comments
 (0)