Skip to content

Commit b8e725b

Browse files
mknyszekgopherbot
authored andcommitted
sweet: don't start diagnostics until all cockroach instances are ready
There's currently a flake with TestSweetEndToEnd, specifically with testing pgo, wherein we'll start grabbing diagnostics from cockroach instances too soon. Change-Id: I7eda15ee8d3401c591ad568b8c8bb125ace72e80 Reviewed-on: https://go-review.googlesource.com/c/benchmarks/+/614535 Auto-Submit: Michael Knyszek <mknyszek@google.com> Reviewed-by: Michael Pratt <mpratt@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
1 parent c934f1d commit b8e725b

File tree

1 file changed

+15
-6
lines changed
  • sweet/benchmarks/cockroachdb

1 file changed

+15
-6
lines changed

sweet/benchmarks/cockroachdb/main.go

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"runtime"
2121
"strconv"
2222
"strings"
23+
"sync"
2324
"syscall"
2425
"time"
2526

@@ -163,15 +164,19 @@ func launchCockroachCluster(cfg *config) ([]*cockroachdbInstance, error) {
163164
}
164165

165166
// waitForCluster pings nodes in the cluster until one responds, or
166-
// we time out. We only care to wait for one node to respond as the
167-
// workload will work as long as it can connect to one node initially.
168-
// The --ramp flag will take care of startup noise.
167+
// we time out. We wait for all nodes to respond because even if the
168+
// workload can start before all nodes are ready, that's not true for
169+
// collecting diagnostic data.
169170
func waitForCluster(instances []*cockroachdbInstance, cfg *config) error {
170171
ctx, cancel := context.WithCancel(context.Background())
171172
defer cancel()
173+
174+
var wg sync.WaitGroup
172175
for _, inst := range instances {
173176
inst := inst
174-
go func(ctx context.Context) {
177+
wg.Add(1)
178+
go func(context.Context) {
179+
defer wg.Done()
175180
for {
176181
select {
177182
case <-ctx.Done():
@@ -181,16 +186,20 @@ func waitForCluster(instances []*cockroachdbInstance, cfg *config) error {
181186
// 5 seconds first and between pings. 5 seconds was chosen through
182187
// trial and error as a time that nodes are *usually* ready by.
183188
if err := inst.ping(cfg); err == nil {
184-
cancel()
185189
return
186190
}
187191
}
188192
}
189193
}(ctx)
190194
}
195+
done := make(chan struct{})
196+
go func() {
197+
wg.Wait()
198+
done <- struct{}{}
199+
}()
191200

192201
select {
193-
case <-ctx.Done():
202+
case <-done:
194203
case <-time.After(time.Minute):
195204
return errors.New("benchmark timed out waiting for cluster to be ready")
196205
}

0 commit comments

Comments
 (0)