Skip to content

[release-v1.0.x] efficient polling in waitForStepsToFinish #8910

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions config/config-defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,11 @@ data:
# limits:
# memory: "256Mi"
# cpu: "500m"

# default-sidecar-log-polling-interval specifies the polling interval for the Tekton sidecar log results container.
# This controls how frequently the sidecar checks for step completion files written by steps in a TaskRun.
# Lower values (e.g., "10ms") make the sidecar more responsive but may increase CPU usage; higher values (e.g., "1s")
# reduce resource usage but may delay result collection.
# This value is used by the sidecar-tekton-log-results container and can be tuned for performance or test scenarios.
# Example values: "100ms", "500ms", "1s"
default-sidecar-log-polling-interval: "100ms"
19 changes: 19 additions & 0 deletions docs/additional-configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ The example below customizes the following:
- the default maximum combinations of `Parameters` in a `Matrix` that can be used to fan out a `PipelineTask`. For
more information, see [`Matrix`](matrix.md).
- the default resolver type to `git`.
- the default polling interval for the sidecar log results container via `default-sidecar-log-polling-interval`.

```yaml
apiVersion: v1
Expand All @@ -260,8 +261,26 @@ data:
emptyDir: {}
default-max-matrix-combinations-count: "1024"
default-resolver-type: "git"
default-sidecar-log-polling-interval: "100ms"
```

### `default-sidecar-log-polling-interval`

The `default-sidecar-log-polling-interval` key in the `config-defaults` ConfigMap specifies how frequently the Tekton
sidecar log results container polls for step completion files written by steps in a TaskRun. Lower values (e.g., `10ms`)
make the sidecar more responsive but may increase CPU usage; higher values (e.g., `1s`) reduce resource usage but may
delay result collection. This value is used by the `sidecar-tekton-log-results` container and can be tuned for performance
or test scenarios.

**Example values:**
- `100ms` (default)
- `500ms`
- `1s`
- `10ms` (for fast polling in tests)

**Note:** The `default-sidecar-log-polling-interval` setting is only applicable when results are created using the
[sidecar approach](#enabling-larger-results-using-sidecar-logs).

**Note:** The `_example` key in the provided [config-defaults.yaml](./../config/config-defaults.yaml)
file lists the keys you can customize along with their default values.

Expand Down
26 changes: 23 additions & 3 deletions internal/sidecarlogresults/sidecarlogresults.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
"os"
"path/filepath"
"strings"
"time"

"github.com/tektoncd/pipeline/pkg/apis/config"
"github.com/tektoncd/pipeline/pkg/apis/pipeline"
Expand Down Expand Up @@ -74,7 +75,7 @@
return json.NewEncoder(w).Encode(v)
}

func waitForStepsToFinish(runDir string) error {
func waitForStepsToFinish(runDir string, sleepInterval time.Duration) error {
steps := make(map[string]bool)
files, err := os.ReadDir(runDir)
if err != nil {
Expand Down Expand Up @@ -103,6 +104,9 @@
return err
}
}
if sleepInterval > 0 {
time.Sleep(sleepInterval)
}
}
return nil
}
Expand Down Expand Up @@ -143,7 +147,15 @@
// in their results path and prints them in a structured way to its
// stdout so that the reconciler can parse those logs.
func LookForResults(w io.Writer, runDir string, resultsDir string, resultNames []string, stepResultsDir string, stepResults map[string][]string) error {
if err := waitForStepsToFinish(runDir); err != nil {
intervalStr := os.Getenv("SIDECAR_LOG_POLLING_INTERVAL")
if intervalStr == "" {
intervalStr = "100ms"

Check failure on line 152 in internal/sidecarlogresults/sidecarlogresults.go

View workflow job for this annotation

GitHub Actions / lint

string `100ms` has 3 occurrences, make it a constant (goconst)
}
interval, err := time.ParseDuration(intervalStr)
if err != nil {
interval = 100 * time.Millisecond
}
if err := waitForStepsToFinish(runDir, interval); err != nil {
return fmt.Errorf("error while waiting for the steps to finish %w", err)
}
results := make(chan SidecarLogResult)
Expand Down Expand Up @@ -205,7 +217,15 @@
// If the provenance file exists, the function extracts artifact information, formats it into a
// JSON string, and encodes it for output alongside relevant metadata (step name, artifact type).
func LookForArtifacts(w io.Writer, names []string, runDir string) error {
if err := waitForStepsToFinish(runDir); err != nil {
intervalStr := os.Getenv("SIDECAR_LOG_POLLING_INTERVAL")
if intervalStr == "" {
intervalStr = "100ms"
}
interval, err := time.ParseDuration(intervalStr)
if err != nil {
interval = 100 * time.Millisecond
}
if err := waitForStepsToFinish(runDir, interval); err != nil {
return err
}

Expand Down
62 changes: 62 additions & 0 deletions internal/sidecarlogresults/sidecarlogresults_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ import (
"fmt"
"os"
"path/filepath"
"runtime/pprof"
"sort"
"strings"
"testing"
"time"

"github.com/google/go-cmp/cmp"
v1 "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1"
Expand Down Expand Up @@ -609,6 +611,66 @@ func TestExtractStepAndResultFromSidecarResultName_Error(t *testing.T) {
}
}

// TestWaitForStepsToFinish_Profile ensures that waitForStepsToFinish correctly waits for all step output files to appear before returning
// The test creates a file called cpu.prof and starts Go's CPU profiler
// A temporary directory is created to simulate the Tekton step run directory.
// The test creates a large number of subdirectories e.g. step0, step1, ..., each representing a step in a TaskRun
// A goroutine is started that, one by one, writes an out file in each step directory, with a small delay between each
// The test calls the function and waits for it to complete and the profile is saved for later analysis
// This is helpful to compare the impact of code changes, provides a reproducible way to profile and optimize the function waitForStepsToFinish
func TestWaitForStepsToFinish_Profile(t *testing.T) {
f, err := os.Create("cpu.prof")
if err != nil {
t.Fatalf("could not create CPU profile: %v", err)
}
defer func(f *os.File) {
err := f.Close()
if err != nil {
return
}
}(f)
err = pprof.StartCPUProfile(f)
if err != nil {
return
}
defer pprof.StopCPUProfile()

// Setup: create a temp runDir with many fake step files
runDir := t.TempDir()
stepCount := 100
for i := range stepCount {
dir := filepath.Join(runDir, fmt.Sprintf("step%d", i))
err := os.MkdirAll(dir, 0755)
if err != nil {
return
}
}

// Simulate steps finishing one by one with a delay
go func() {
for i := range stepCount {
file := filepath.Join(runDir, fmt.Sprintf("step%d", i), "out")
err := os.WriteFile(file, []byte("done"), 0644)
if err != nil {
return
}
time.Sleep(10 * time.Millisecond)
}
}()

intervalStr := os.Getenv("SIDECAR_LOG_POLLING_INTERVAL")
if intervalStr == "" {
intervalStr = "100ms"
}
interval, err := time.ParseDuration(intervalStr)
if err != nil {
interval = 100 * time.Millisecond
}
if err := waitForStepsToFinish(runDir, interval); err != nil {
t.Fatalf("waitForStepsToFinish failed: %v", err)
}
}

func TestLookForArtifacts(t *testing.T) {
base := basicArtifacts()
modified := base.DeepCopy()
Expand Down
17 changes: 17 additions & 0 deletions pkg/apis/config/default.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ const (
// Default maximum resolution timeout used by the resolution controller before timing out when exceeded
DefaultMaximumResolutionTimeout = 1 * time.Minute

DefaultSidecarLogPollingInterval = 100 * time.Millisecond

defaultTimeoutMinutesKey = "default-timeout-minutes"
defaultServiceAccountKey = "default-service-account"
defaultManagedByLabelValueKey = "default-managed-by-label-value"
Expand All @@ -67,6 +69,7 @@ const (
defaultContainerResourceRequirementsKey = "default-container-resource-requirements"
defaultImagePullBackOffTimeout = "default-imagepullbackoff-timeout"
defaultMaximumResolutionTimeout = "default-maximum-resolution-timeout"
defaultSidecarLogPollingIntervalKey = "default-sidecar-log-polling-interval"
)

// DefaultConfig holds all the default configurations for the config.
Expand All @@ -88,6 +91,10 @@ type Defaults struct {
DefaultContainerResourceRequirements map[string]corev1.ResourceRequirements
DefaultImagePullBackOffTimeout time.Duration
DefaultMaximumResolutionTimeout time.Duration
// DefaultSidecarLogPollingInterval specifies how frequently (as a time.Duration) the Tekton sidecar log results container polls for step completion files.
// This value is loaded from the 'sidecar-log-polling-interval' key in the config-defaults ConfigMap.
// It is used to control the responsiveness and resource usage of the sidecar in both production and test environments.
DefaultSidecarLogPollingInterval time.Duration
}

// GetDefaultsConfigName returns the name of the configmap containing all
Expand Down Expand Up @@ -120,6 +127,7 @@ func (cfg *Defaults) Equals(other *Defaults) bool {
other.DefaultResolverType == cfg.DefaultResolverType &&
other.DefaultImagePullBackOffTimeout == cfg.DefaultImagePullBackOffTimeout &&
other.DefaultMaximumResolutionTimeout == cfg.DefaultMaximumResolutionTimeout &&
other.DefaultSidecarLogPollingInterval == cfg.DefaultSidecarLogPollingInterval &&
reflect.DeepEqual(other.DefaultForbiddenEnv, cfg.DefaultForbiddenEnv)
}

Expand All @@ -134,6 +142,7 @@ func NewDefaultsFromMap(cfgMap map[string]string) (*Defaults, error) {
DefaultResolverType: DefaultResolverTypeValue,
DefaultImagePullBackOffTimeout: DefaultImagePullBackOffTimeout,
DefaultMaximumResolutionTimeout: DefaultMaximumResolutionTimeout,
DefaultSidecarLogPollingInterval: DefaultSidecarLogPollingInterval,
}

if defaultTimeoutMin, ok := cfgMap[defaultTimeoutMinutesKey]; ok {
Expand Down Expand Up @@ -220,6 +229,14 @@ func NewDefaultsFromMap(cfgMap map[string]string) (*Defaults, error) {
tc.DefaultMaximumResolutionTimeout = timeout
}

if defaultSidecarPollingInterval, ok := cfgMap[defaultSidecarLogPollingIntervalKey]; ok {
interval, err := time.ParseDuration(defaultSidecarPollingInterval)
if err != nil {
return nil, fmt.Errorf("failed parsing default config %q", defaultSidecarPollingInterval)
}
tc.DefaultSidecarLogPollingInterval = interval
}

return &tc, nil
}

Expand Down
54 changes: 54 additions & 0 deletions pkg/apis/config/default_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ func TestNewDefaultsFromConfigMap(t *testing.T) {
DefaultResolverType: "git",
DefaultImagePullBackOffTimeout: time.Duration(5) * time.Second,
DefaultMaximumResolutionTimeout: 1 * time.Minute,
DefaultSidecarLogPollingInterval: 100 * time.Millisecond,
},
fileName: config.GetDefaultsConfigName(),
},
Expand All @@ -67,6 +68,7 @@ func TestNewDefaultsFromConfigMap(t *testing.T) {
DefaultMaxMatrixCombinationsCount: 256,
DefaultImagePullBackOffTimeout: 0,
DefaultMaximumResolutionTimeout: 1 * time.Minute,
DefaultSidecarLogPollingInterval: 100 * time.Millisecond,
},
fileName: "config-defaults-with-pod-template",
},
Expand All @@ -91,6 +93,7 @@ func TestNewDefaultsFromConfigMap(t *testing.T) {
DefaultMaxMatrixCombinationsCount: 256,
DefaultImagePullBackOffTimeout: 0,
DefaultMaximumResolutionTimeout: 1 * time.Minute,
DefaultSidecarLogPollingInterval: 100 * time.Millisecond,
},
},
{
Expand All @@ -104,6 +107,7 @@ func TestNewDefaultsFromConfigMap(t *testing.T) {
DefaultMaxMatrixCombinationsCount: 256,
DefaultImagePullBackOffTimeout: 0,
DefaultMaximumResolutionTimeout: 1 * time.Minute,
DefaultSidecarLogPollingInterval: 100 * time.Millisecond,
},
},
{
Expand All @@ -120,6 +124,7 @@ func TestNewDefaultsFromConfigMap(t *testing.T) {
DefaultManagedByLabelValue: config.DefaultManagedByLabelValue,
DefaultImagePullBackOffTimeout: 0,
DefaultMaximumResolutionTimeout: 1 * time.Minute,
DefaultSidecarLogPollingInterval: 100 * time.Millisecond,
},
},
{
Expand All @@ -133,6 +138,7 @@ func TestNewDefaultsFromConfigMap(t *testing.T) {
DefaultForbiddenEnv: []string{"TEKTON_POWER_MODE", "TEST_ENV", "TEST_TEKTON"},
DefaultImagePullBackOffTimeout: time.Duration(15) * time.Second,
DefaultMaximumResolutionTimeout: 1 * time.Minute,
DefaultSidecarLogPollingInterval: 100 * time.Millisecond,
},
},
{
Expand All @@ -146,6 +152,7 @@ func TestNewDefaultsFromConfigMap(t *testing.T) {
DefaultContainerResourceRequirements: map[string]corev1.ResourceRequirements{},
DefaultImagePullBackOffTimeout: 0,
DefaultMaximumResolutionTimeout: 1 * time.Minute,
DefaultSidecarLogPollingInterval: 100 * time.Millisecond,
},
},
{
Expand All @@ -162,6 +169,7 @@ func TestNewDefaultsFromConfigMap(t *testing.T) {
DefaultMaxMatrixCombinationsCount: 256,
DefaultImagePullBackOffTimeout: 0,
DefaultMaximumResolutionTimeout: 1 * time.Minute,
DefaultSidecarLogPollingInterval: 100 * time.Millisecond,
DefaultContainerResourceRequirements: map[string]corev1.ResourceRequirements{
config.ResourceRequirementDefaultContainerKey: {
Requests: corev1.ResourceList{
Expand Down Expand Up @@ -219,6 +227,7 @@ func TestNewDefaultsFromEmptyConfigMap(t *testing.T) {
DefaultMaxMatrixCombinationsCount: 256,
DefaultImagePullBackOffTimeout: 0,
DefaultMaximumResolutionTimeout: 1 * time.Minute,
DefaultSidecarLogPollingInterval: 100 * time.Millisecond,
}
verifyConfigFileWithExpectedConfig(t, DefaultsConfigEmptyName, expectedConfig)
}
Expand Down Expand Up @@ -417,6 +426,51 @@ func TestEquals(t *testing.T) {
}
}

func TestSidecarLogPollingIntervalParsing(t *testing.T) {
cases := []struct {
name string
data map[string]string
expected time.Duration
wantErr bool
}{
{
name: "valid interval",
data: map[string]string{"default-sidecar-log-polling-interval": "42ms"},
expected: 42 * time.Millisecond,
wantErr: false,
},
{
name: "invalid interval",
data: map[string]string{"default-sidecar-log-polling-interval": "notaduration"},
expected: 0,
wantErr: true,
},
{
name: "not set (default)",
data: map[string]string{},
expected: 100 * time.Millisecond,
wantErr: false,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
cfg, err := config.NewDefaultsFromMap(tc.data)
if tc.wantErr {
if err == nil {
t.Errorf("expected error, got nil")
}
return
}
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if cfg.DefaultSidecarLogPollingInterval != tc.expected {
t.Errorf("got %v, want %v", cfg.DefaultSidecarLogPollingInterval, tc.expected)
}
})
}
}

func verifyConfigFileWithExpectedConfig(t *testing.T, fileName string, expectedConfig *config.Defaults) {
t.Helper()
cm := test.ConfigMapFromTestFile(t, fileName)
Expand Down
Loading
Loading