Skip to content

Commit 55d0cee

Browse files
JAORMXclaude
andauthored
Fix VirtualMCPServer controller to check PodReady condition (#2901)
The VirtualMCPServer controller was marking the CR as Ready based on pod.Status.Phase == PodRunning, but this doesn't guarantee the pod is actually ready to serve traffic. A pod can be in Running phase while still waiting for its readiness probe to pass. This caused flaky E2E tests where: 1. Pod starts → Phase becomes Running 2. Controller sees Running → marks CR as Ready 3. Test sees CR Ready → tries to connect 4. Pod hasn't passed readiness probe → EOF error Now the controller checks the PodReady condition in pod.Status.Conditions, which is the authoritative signal that a pod can receive traffic. This aligns with Kubernetes semantics and ensures the VirtualMCPServer is only marked Ready when the underlying HTTP server is actually responding. Changes: - Check PodReady condition instead of PodRunning phase - Rename 'running' counter to 'ready' for clarity - Add test case for "running but not ready" pods - Update existing test to set PodReady condition 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude <noreply@anthropic.com>
1 parent 32b63c5 commit 55d0cee

File tree

2 files changed

+69
-19
lines changed

2 files changed

+69
-19
lines changed

cmd/thv-operator/controllers/virtualmcpserver_controller.go

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,14 +1019,17 @@ func (*VirtualMCPServerReconciler) determineStatusFromBackends(
10191019
}
10201020
}
10211021

1022-
// determineStatusFromPods determines the appropriate status based on pod states
1022+
// determineStatusFromPods determines the appropriate status based on pod states.
1023+
// The 'ready' parameter counts pods that have passed their readiness probes (PodReady condition is True),
1024+
// not just pods in Running phase. This ensures the VirtualMCPServer is only marked Ready when
1025+
// the underlying pods are actually ready to serve traffic.
10231026
func (r *VirtualMCPServerReconciler) determineStatusFromPods(
10241027
ctx context.Context,
10251028
vmcp *mcpv1alpha1.VirtualMCPServer,
1026-
running, pending, failed int,
1029+
ready, pending, failed int,
10271030
) statusDecision {
1028-
// Handle non-running states first (early returns reduce nesting)
1029-
if running == 0 {
1031+
// Handle non-ready states first (early returns reduce nesting)
1032+
if ready == 0 {
10301033
if failed > 0 {
10311034
return statusDecision{
10321035
phase: mcpv1alpha1.VirtualMCPServerPhaseFailed,
@@ -1050,9 +1053,9 @@ func (r *VirtualMCPServerReconciler) determineStatusFromPods(
10501053
}
10511054
}
10521055

1053-
// Pods are running - check backend health if backends exist
1056+
// Pods are ready (passed readiness probes) - check backend health if backends exist
10541057
if len(vmcp.Status.DiscoveredBackends) == 0 {
1055-
// No backends discovered yet - pods running is sufficient for Ready
1058+
// No backends discovered yet - pods ready is sufficient for Ready
10561059
return statusDecision{
10571060
phase: mcpv1alpha1.VirtualMCPServerPhaseReady,
10581061
message: "Virtual MCP server is running",
@@ -1081,25 +1084,37 @@ func (r *VirtualMCPServerReconciler) updateVirtualMCPServerStatus(
10811084
return err
10821085
}
10831086

1084-
// Count pod states
1085-
var running, pending, failed int
1087+
// Count pod states based on actual readiness, not just phase.
1088+
// A pod in Running phase may not be ready to serve traffic if it hasn't
1089+
// passed its readiness probe yet. We must check the PodReady condition.
1090+
var ready, pending, failed int
10861091
for _, pod := range podList.Items {
1087-
switch pod.Status.Phase {
1088-
case corev1.PodRunning:
1089-
running++
1090-
case corev1.PodPending:
1091-
pending++
1092-
case corev1.PodFailed:
1092+
// Check for terminal failure states first
1093+
if pod.Status.Phase == corev1.PodFailed {
10931094
failed++
1094-
case corev1.PodSucceeded:
1095-
running++
1096-
case corev1.PodUnknown:
1095+
continue
1096+
}
1097+
1098+
// Check if pod is actually ready to serve traffic (passed readiness probes)
1099+
// This is the authoritative signal that the pod can handle requests
1100+
isPodReady := false
1101+
for _, condition := range pod.Status.Conditions {
1102+
if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue {
1103+
isPodReady = true
1104+
break
1105+
}
1106+
}
1107+
1108+
if isPodReady {
1109+
ready++
1110+
} else {
1111+
// Pod exists but isn't ready yet (still starting, or readiness probe failing)
10971112
pending++
10981113
}
10991114
}
11001115

11011116
// Determine status in one place (no branching/repetition)
1102-
decision := r.determineStatusFromPods(ctx, vmcp, running, pending, failed)
1117+
decision := r.determineStatusFromPods(ctx, vmcp, ready, pending, failed)
11031118

11041119
// Apply all status updates at once
11051120
statusManager.SetPhase(decision.phase)

cmd/thv-operator/controllers/virtualmcpserver_controller_test.go

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -576,7 +576,7 @@ func TestVirtualMCPServerUpdateStatus(t *testing.T) {
576576
expectedPhase mcpv1alpha1.VirtualMCPServerPhase
577577
}{
578578
{
579-
name: "running pods",
579+
name: "ready pods",
580580
vmcp: &mcpv1alpha1.VirtualMCPServer{
581581
ObjectMeta: metav1.ObjectMeta{
582582
Name: testVmcpName,
@@ -592,11 +592,46 @@ func TestVirtualMCPServerUpdateStatus(t *testing.T) {
592592
},
593593
Status: corev1.PodStatus{
594594
Phase: corev1.PodRunning,
595+
Conditions: []corev1.PodCondition{
596+
{
597+
Type: corev1.PodReady,
598+
Status: corev1.ConditionTrue,
599+
},
600+
},
595601
},
596602
},
597603
},
598604
expectedPhase: mcpv1alpha1.VirtualMCPServerPhaseReady,
599605
},
606+
{
607+
name: "running but not ready pods",
608+
vmcp: &mcpv1alpha1.VirtualMCPServer{
609+
ObjectMeta: metav1.ObjectMeta{
610+
Name: testVmcpName,
611+
Namespace: "default",
612+
},
613+
},
614+
pods: []corev1.Pod{
615+
{
616+
ObjectMeta: metav1.ObjectMeta{
617+
Name: testVmcpName + "-pod-1",
618+
Namespace: "default",
619+
Labels: labelsForVirtualMCPServer(testVmcpName),
620+
},
621+
Status: corev1.PodStatus{
622+
Phase: corev1.PodRunning,
623+
// No PodReady condition or PodReady=False means pod isn't ready yet
624+
Conditions: []corev1.PodCondition{
625+
{
626+
Type: corev1.PodReady,
627+
Status: corev1.ConditionFalse,
628+
},
629+
},
630+
},
631+
},
632+
},
633+
expectedPhase: mcpv1alpha1.VirtualMCPServerPhasePending,
634+
},
600635
{
601636
name: "pending pods",
602637
vmcp: &mcpv1alpha1.VirtualMCPServer{

0 commit comments

Comments
 (0)