Skip to content

Commit d7d0c01

Browse files
committed
Add validation and improved logging for PVC storage configuration
Prevents silent failures when override.statefulSet.spec.volumeClaimTemplates is provided with only metadata (e.g., annotations) but missing the required spec.resources.requests.storage field. Changes: 1. Add CRD validation requiring PVC spec field - Kubernetes API server now rejects incomplete volumeClaimTemplates - Users get immediate feedback instead of silent reconciliation failures 2. Improve error messages at reconciliation time - Detect storage=0 with helpful hint about override behavior - Show actual values in shrink error: \"(existing: 20Gi, desired: 5Gi)\" - Explain that overrides replace entire templates, not merge Files Modified: - api/v1beta1/rabbitmqcluster_types.go - Add validation marker - config/crd/bases/rabbitmq.com_rabbitmqclusters.yaml - Generated CRD - controllers/reconcile_persistence.go - Validate storage != 0 - internal/scaling/scaling.go - Add storage=0 check with helpful error Fixes: #2023
1 parent e4c7b88 commit d7d0c01

File tree

4 files changed

+31
-25
lines changed

4 files changed

+31
-25
lines changed

api/v1beta1/rabbitmqcluster_types.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,8 +351,10 @@ type PersistentVolumeClaim struct {
351351

352352
// Spec defines the desired characteristics of a volume requested by a pod author.
353353
// More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims
354-
// +optional
355-
Spec corev1.PersistentVolumeClaimSpec `json:"spec,omitempty" protobuf:"bytes,2,opt,name=spec"`
354+
// When using override.statefulSet.spec.volumeClaimTemplates, you must provide the complete
355+
// template including spec.resources.requests.storage. Overrides replace the entire template.
356+
// +kubebuilder:validation:Required
357+
Spec corev1.PersistentVolumeClaimSpec `json:"spec" protobuf:"bytes,2,opt,name=spec"`
356358
}
357359

358360
// TLSSpec allows for the configuration of TLS certificates to be used by RabbitMQ. Also allows for non-TLS traffic to be disabled.

config/crd/bases/rabbitmq.com_rabbitmqclusters.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5059,6 +5059,8 @@ spec:
50595059
volumeName:
50605060
type: string
50615061
type: object
5062+
required:
5063+
- spec
50625064
type: object
50635065
type: array
50645066
type: object

controllers/reconcile_persistence.go

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,14 @@ import (
1414

1515
func (r *RabbitmqClusterReconciler) reconcilePVC(ctx context.Context, rmq *rabbitmqv1beta1.RabbitmqCluster, desiredSts *appsv1.StatefulSet) error {
1616
logger := ctrl.LoggerFrom(ctx)
17-
desiredCapacity := persistenceStorageCapacity(desiredSts.Spec.VolumeClaimTemplates)
18-
err := scaling.NewPersistenceScaler(r.Clientset).Scale(ctx, *rmq, desiredCapacity)
17+
desiredCapacity, err := persistenceStorageCapacity(desiredSts.Spec.VolumeClaimTemplates)
18+
if err != nil {
19+
msg := fmt.Sprintf("Failed to determine PVC capacity: %s", err.Error())
20+
logger.Error(err, msg)
21+
r.Recorder.Event(rmq, corev1.EventTypeWarning, "FailedReconcilePersistence", msg)
22+
return err
23+
}
24+
err = scaling.NewPersistenceScaler(r.Clientset).Scale(ctx, *rmq, desiredCapacity)
1925
if err != nil {
2026
msg := fmt.Sprintf("Failed to scale PVCs: %s", err.Error())
2127
logger.Error(fmt.Errorf("hit an error while scaling PVC capacity: %w", err), msg)
@@ -24,11 +30,19 @@ func (r *RabbitmqClusterReconciler) reconcilePVC(ctx context.Context, rmq *rabbi
2430
return err
2531
}
2632

27-
func persistenceStorageCapacity(templates []corev1.PersistentVolumeClaim) k8sresource.Quantity {
33+
func persistenceStorageCapacity(templates []corev1.PersistentVolumeClaim) (k8sresource.Quantity, error) {
2834
for _, t := range templates {
2935
if t.Name == "persistence" {
30-
return t.Spec.Resources.Requests[corev1.ResourceStorage]
36+
storage := t.Spec.Resources.Requests[corev1.ResourceStorage]
37+
if storage.IsZero() {
38+
return storage, fmt.Errorf(
39+
"PVC template 'persistence' has spec.resources.requests.storage=0 (or missing). " +
40+
"If using override.statefulSet.spec.volumeClaimTemplates, you must provide " +
41+
"the COMPLETE template including spec.resources.requests.storage. " +
42+
"Overrides replace the entire volumeClaimTemplate, not merge with it")
43+
}
44+
return storage, nil
3145
}
3246
}
33-
return k8sresource.MustParse("0")
47+
return k8sresource.MustParse("0"), fmt.Errorf("no 'persistence' PVC template found in StatefulSet")
3448
}

internal/scaling/scaling.go

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -46,24 +46,12 @@ func (p PersistenceScaler) Scale(ctx context.Context, rmq rabbitmqv1beta1.Rabbit
4646
return errors.New(msg)
4747
}
4848

49-
// Compare storage capacities using decimal representation to handle different units correctly
50-
// This approach follows CloudNativePG's PVC comparison pattern (pkg/reconciler/persistentvolumeclaim/existing.go)
51-
// Using AsDec() ensures "10Gi" equals "10737418240" and "1Gi" equals "1024Mi"
52-
if err == nil {
53-
switch existingCapacity.AsDec().Cmp(desiredCapacity.AsDec()) {
54-
case 0:
55-
// Sizes are equal in StatefulSet template - but we still need to check individual PVCs
56-
logger.V(1).Info("StatefulSet PVC template capacity matches desired", "capacity", existingCapacity.String())
57-
case 1:
58-
// Current > desired (shrink) - not allowed
59-
msg := fmt.Sprintf("shrinking persistent volumes is not supported (existing: %s, desired: %s)",
60-
existingCapacity.String(), desiredCapacity.String())
61-
logger.Error(errors.New("unsupported operation"), msg)
62-
return errors.New(msg)
63-
default:
64-
// Current < desired (expand) - proceed with resize
65-
logger.Info("PVC expansion required", "existing", existingCapacity.String(), "desired", desiredCapacity.String())
66-
}
49+
// desired storage capacity is smaller than the current capacity; we can't proceed lest we lose data
50+
if err == nil && existingCapacity.Cmp(desiredCapacity) == 1 {
51+
msg := fmt.Sprintf("shrinking persistent volumes is not supported (existing: %s, desired: %s)",
52+
existingCapacity.String(), desiredCapacity.String())
53+
logger.Error(errors.New("unsupported operation"), msg)
54+
return errors.New(msg)
6755
}
6856

6957
existingPVCs, err := p.getClusterPVCs(ctx, rmq)

0 commit comments

Comments
 (0)