Skip to content

Commit 7ac79b8

Browse files
authored
policy-test: Explicitly wait for init containers (#8272)
The policy integration tests sometimes fail in CI due to timeouts waiting for a curl pod's exit code. It's not really clear why this is happening. This change separates waiting for the init containers from waiting for the exit code so we can log that situation explicitly. Furthermore, logging has been enhanced to include information about init containers. `curl` invocations now use a timeout and retries to help limit flakiness. Also, `curl-lock` deletions now wait for the resource to be fully removed. Signed-off-by: Oliver Gould <ver@buoyant.io>
1 parent 458cd0f commit 7ac79b8

File tree

2 files changed

+48
-5
lines changed

2 files changed

+48
-5
lines changed

policy-test/src/curl.rs

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,15 @@ impl Runner {
4949
/// Deletes the lock configmap, allowing curl pods to execute.
5050
pub async fn delete_lock(&self) {
5151
tracing::trace!(ns = %self.namespace, "Deleting curl-lock");
52-
kube::Api::<k8s::api::core::v1::ConfigMap>::namespaced(
52+
let api = kube::Api::<k8s::api::core::v1::ConfigMap>::namespaced(
5353
self.client.clone(),
5454
&self.namespace,
55+
);
56+
kube::runtime::wait::delete::delete_and_finalize(
57+
api,
58+
"curl-lock",
59+
&kube::api::DeleteParams::foreground(),
5560
)
56-
.delete("curl-lock", &kube::api::DeleteParams::foreground())
5761
.await
5862
.expect("curl-lock must be deleted");
5963
tracing::debug!(ns = %self.namespace, "Deleted curl-lock");
@@ -159,10 +163,12 @@ impl Runner {
159163
// after the configmap is deleted, even with a long timeout.
160164
// Instead, we use a relatively short timeout and retry the
161165
// wait to get a better chance.
162-
command: Some(vec!["sh".to_string(), "-c".to_string()]),
166+
command: Some(vec!["sh".to_string(), "-c".to_string()]),
163167
args: Some(vec![format!(
164168
"for i in $(seq 12) ; do \
169+
echo waiting 10s for curl-lock to be deleted ; \
165170
if kubectl wait --timeout=10s --for=delete --namespace={ns} cm/curl-lock ; then \
171+
echo curl-lock deleted ; \
166172
exit 0 ; \
167173
fi ; \
168174
done ; \
@@ -174,7 +180,7 @@ impl Runner {
174180
name: "curl".to_string(),
175181
image: Some("docker.io/curlimages/curl:latest".to_string()),
176182
args: Some(
177-
vec!["curl", "-sSfv", target_url]
183+
vec!["curl", "-sSfv", "--max-time", "10", "--retry", "12", target_url]
178184
.into_iter()
179185
.map(Into::into)
180186
.collect(),
@@ -201,6 +207,8 @@ impl Running {
201207

202208
/// Waits for the curl container to complete and returns its exit code.
203209
pub async fn exit_code(self) -> i32 {
210+
self.inits_complete().await;
211+
204212
fn get_exit_code(pod: &k8s::Pod) -> Option<i32> {
205213
let c = pod
206214
.status
@@ -220,7 +228,7 @@ impl Running {
220228
&self.name,
221229
|obj: Option<&k8s::Pod>| -> bool { obj.and_then(get_exit_code).is_some() },
222230
);
223-
match time::timeout(time::Duration::from_secs(120), finished).await {
231+
match time::timeout(time::Duration::from_secs(30), finished).await {
224232
Ok(Ok(())) => {}
225233
Ok(Err(error)) => panic!("Failed to wait for exit code: {}: {}", self.name, error),
226234
Err(_timeout) => {
@@ -241,4 +249,36 @@ impl Running {
241249

242250
code
243251
}
252+
253+
async fn inits_complete(&self) {
254+
let api = kube::Api::namespaced(self.client.clone(), &self.namespace);
255+
let init_complete = kube::runtime::wait::await_condition(
256+
api,
257+
&self.name,
258+
|pod: Option<&k8s::Pod>| -> bool {
259+
if let Some(pod) = pod {
260+
if let Some(status) = pod.status.as_ref() {
261+
return status.init_container_statuses.iter().flatten().all(|init| {
262+
init.state
263+
.as_ref()
264+
.map(|s| s.terminated.is_some())
265+
.unwrap_or(false)
266+
});
267+
}
268+
}
269+
false
270+
},
271+
);
272+
273+
match time::timeout(time::Duration::from_secs(120), init_complete).await {
274+
Ok(Ok(())) => {}
275+
Ok(Err(error)) => panic!("Failed to watch pod status: {}: {}", self.name, error),
276+
Err(_timeout) => {
277+
panic!(
278+
"Timeout waiting for init containers to complete: {}",
279+
self.name
280+
);
281+
}
282+
};
283+
}
244284
}

policy-test/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,9 @@ where
149149
if let Some(status) = p.status {
150150
let _span = tracing::info_span!("pod", ns = %ns.name(), name = %pod).entered();
151151
tracing::trace!(reason = ?status.reason, message = ?status.message);
152+
for c in status.init_container_statuses.into_iter().flatten() {
153+
tracing::trace!(init_container = %c.name, ready = %c.ready, state = ?c.state);
154+
}
152155
for c in status.container_statuses.into_iter().flatten() {
153156
tracing::trace!(container = %c.name, ready = %c.ready, state = ?c.state);
154157
}

0 commit comments

Comments
 (0)