Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
714f132
feat: added two new fields to cr and generated boilerplate
hmbill694 Dec 11, 2025
6af6e45
feat: add checks to see if we have a missing artifact in provisioner
hmbill694 Dec 11, 2025
48b244c
refactor: rename fields to be more descriptive
hmbill694 Dec 11, 2025
3cab66b
feat: add new values to config to handle retrying totally failed syncs
hmbill694 Dec 11, 2025
4b5d33d
feat: wired up
hmbill694 Dec 12, 2025
8d84152
refactor: moved to time based failure
hmbill694 Dec 12, 2025
0e36d00
refactor: let library check on sync
hmbill694 Dec 12, 2025
a40b2c4
refactor: remove other unneeded fields
hmbill694 Dec 12, 2025
2477554
refactor: formatting
hmbill694 Dec 12, 2025
19ac022
chore: generate manifests again
hmbill694 Dec 12, 2025
2fbcbde
fix: messy names and duplicate env var targeting
hmbill694 Dec 12, 2025
b11ce83
feat: added more condition reasons
hmbill694 Dec 12, 2025
e884a61
feat: added new phase for visibility into retry loop
hmbill694 Dec 12, 2025
25c789a
feat: handling new phase in controller and service code
hmbill694 Dec 12, 2025
d0dd062
chore: regenerate chart to include new phase
hmbill694 Dec 12, 2025
f43bbb0
fix: wait 10 seconds before checking sync status again
hmbill694 Dec 12, 2025
2ef59b3
chore: regenerate resources after adding LastFailureTime to spec
hmbill694 Dec 12, 2025
c9f8e6b
fix: resolve finalizer conflict and now respecting backoff behavior
hmbill694 Dec 12, 2025
597ac9e
chore: regenerate the chart
hmbill694 Dec 12, 2025
9bb15bd
fix: pr feedback
hmbill694 Dec 16, 2025
cb5996f
fix: resolve nit
hmbill694 Dec 16, 2025
db2350c
fix: consistent names
hmbill694 Dec 16, 2025
dca4302
fix: better boolean name
hmbill694 Dec 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 17 additions & 12 deletions api/v1alpha1/vmdiskimage_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,24 @@ const (

// Condition Reasons
const (
ReasonResourceCreationFailed string = "ResourceCreationFailed"
ReasonResouceUpdateFailed string = "ResourceUpdateFailed"
ReasonQueued string = "Queued"
ReasonSyncing string = "Syncing"
ReasonRetryLimitExceeded string = "RetryLimitExceeded"
ReasonSynced string = "Synced"
ReasonResourceCreationFailed string = "ResourceCreationFailed"
ReasonResouceUpdateFailed string = "ResourceUpdateFailed"
ReasonQueued string = "Queued"
ReasonSyncing string = "Syncing"
ReasonRetryLimitExceeded string = "RetryLimitExceeded"
ReasonMissingSourceArtifact string = "MissingSourceArtifact"
ReasonSyncAttemptDurationExceeded string = "SyncAttemptDurationExceeded"
ReasonUnknownSyncFailure string = "UnknownSyncFailure"
ReasonSynced string = "Synced"
)

// CRD phases
const (
PhaseQueued string = "Queued"
PhaseSyncing string = "Syncing"
PhaseReady string = "Ready"
PhaseFailed string = "Failed"
PhaseQueued string = "Queued"
PhaseSyncing string = "Syncing"
PhaseReady string = "Ready"
PhaseRetryableFailure string = "RetryableFailure"
PhaseFailed string = "Failed"
)

// VMDiskImage Labels
Expand Down Expand Up @@ -94,7 +98,7 @@ type VMDiskImageStatus struct {
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
// Important: Run "make" to regenerate code after modifying this file

// +kubebuilder:validation:Enum=Queued;Syncing;Ready;Failed
// +kubebuilder:validation:Enum=Queued;Syncing;Ready;Failed;RetryableFailure
Phase string `json:"phase"`

// A human-readable message providing more details about the current phase.
Expand All @@ -103,7 +107,8 @@ type VMDiskImageStatus struct {
// Conditions of the VMDiskImage resource.
Conditions []metav1.Condition `json:"conditions,omitempty"`

FailureCount int `json:"failureCount,omitempty"`
FailureCount int `json:"failureCount,omitempty"`
LastFailureTime *metav1.Time `json:"lastFailureTime,omitempty"`
}

// +kubebuilder:object:root=true
Expand Down
4 changes: 4 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ spec:
type: array
failureCount:
type: integer
lastFailureTime:
format: date-time
type: string
message:
description: A human-readable message providing more details about the current phase.
type: string
Expand All @@ -146,6 +149,7 @@ spec:
- Syncing
- Ready
- Failed
- RetryableFailure
type: string
required:
- phase
Expand Down
4 changes: 4 additions & 0 deletions config/crd/bases/crd.pelotech.ot_vmdiskimages.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@ spec:
type: array
failureCount:
type: integer
lastFailureTime:
format: date-time
type: string
message:
description: A human-readable message providing more details about
the current phase.
Expand All @@ -150,6 +153,7 @@ spec:
- Syncing
- Ready
- Failed
- RetryableFailure
type: string
required:
- phase
Expand Down
4 changes: 4 additions & 0 deletions dist/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ spec:
type: array
failureCount:
type: integer
lastFailureTime:
format: date-time
type: string
message:
description: A human-readable message providing more details about
the current phase.
Expand All @@ -158,6 +161,7 @@ spec:
- Syncing
- Ready
- Failed
- RetryableFailure
type: string
required:
- phase
Expand Down
42 changes: 24 additions & 18 deletions internal/vm-disk-image/config/vmdi-controller-config-reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,38 +6,44 @@ import (
)

const (
defaultConcurrency = 10 // TODO: We will need to tune this default
defaultRetryLimit = 2
defaultBackoffDuration = 10 * time.Second
defaultMaxSyncDuration = 1 * time.Hour
defaultConcurrency = 5 // TODO: We will need to tune this default
defaultMaxBackoffDelay = 1 * time.Hour
defaultMaxSyncDuration = 12 * time.Hour
defaultMaxSyncAttemptRetries = 3
defaultMaxSyncAttemptDuration = 1 * time.Hour
)

type VMDiskImageControllerConfig struct {
Concurrency int
RetryLimit int
RetryBackoffDuration time.Duration
MaxSyncDuration time.Duration
Concurrency int
MaxBackoffDelay time.Duration
MaxSyncDuration time.Duration
MaxSyncAttemptDuration time.Duration
MaxSyncAttemptRetries int
}

// This function will allow us to get the required config variables from the environment.
// Locally this is your "env" and in production these values will come from a configmap
func LoadVMDIControllerConfigFromEnv() VMDiskImageControllerConfig {
// The max amount of VMDIs we can have syncing at one time.
concurrency := corecfg.GetIntEnvOrDefault("CONCURRENCY", defaultConcurrency)
concurrency := corecfg.GetIntEnvOrDefault("MAX_VMDI_SYNC_CONCURRENCY", defaultConcurrency)

// How many times we will retry a failed sync.
retryLimit := corecfg.GetIntEnvOrDefault("RETRY_LIMIT", defaultRetryLimit)
// The longest we will ever wait to retry.
maxBackoffDelay := corecfg.GetDurationEnvOrDefault("MAX_SYNC_RETRY_BACKOFF_DURATION", defaultMaxBackoffDelay)

// How long we want to wait before trying to resync a failed VMDI.
retryBackoffDuration := corecfg.GetDurationEnvOrDefault("RETRY_BACKOFF_DURATION", defaultBackoffDuration)
// How long we will try to run a sync before we fail it forever.
maxSyncDuration := corecfg.GetDurationEnvOrDefault("MAX_SYNC_DURATION", defaultMaxSyncDuration)

// How long we will let a VMDI sit in syncing status.
maxSyncDuration := corecfg.GetDurationEnvOrDefault("MAX_SYNC_DURATION", defaultMaxSyncDuration)
maxAttemptDuration := corecfg.GetDurationEnvOrDefault("MAX_SYNC_ATTEMPT_DURATION", defaultMaxSyncAttemptDuration)

// How many times we will retry on a given attempt.
maxSyncAttemptRetries := corecfg.GetIntEnvOrDefault("MAX_SYNC_ATTEMPT_RETRIES", defaultMaxSyncAttemptRetries)

return VMDiskImageControllerConfig{
Concurrency: concurrency,
RetryLimit: retryLimit,
RetryBackoffDuration: retryBackoffDuration,
MaxSyncDuration: maxSyncDuration,
Concurrency: concurrency,
MaxBackoffDelay: maxBackoffDelay,
MaxSyncAttemptDuration: maxAttemptDuration,
MaxSyncAttemptRetries: maxSyncAttemptRetries,
MaxSyncDuration: maxSyncDuration,
}
}
32 changes: 15 additions & 17 deletions internal/vm-disk-image/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,9 @@ func (r *VMDiskImageReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return r.VMDiskImageOrchestrator.DeleteResource(ctx, &VMDiskImage)
}

resourceHasFinalizer := !crutils.ContainsFinalizer(&VMDiskImage, crdv1.VMDiskImageFinalizer)
if resourceHasFinalizer {
err := r.AddControllerFinalizer(ctx, &VMDiskImage)
if err != nil {
return r.HandleResourceUpdateError(ctx, &VMDiskImage, err, "Failed to add finalizer to our resource")
}

resourceMissingFinalizer := !crutils.ContainsFinalizer(&VMDiskImage, crdv1.VMDiskImageFinalizer)
if resourceMissingFinalizer {
return r.AddControllerFinalizer(ctx, &VMDiskImage)
}

currentPhase := VMDiskImage.Status.Phase
Expand All @@ -93,6 +89,8 @@ func (r *VMDiskImageReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return r.AttemptSyncingOfResource(ctx, &VMDiskImage)
case crdv1.PhaseSyncing:
return r.TransitonFromSyncing(ctx, &VMDiskImage)
case crdv1.PhaseRetryableFailure:
return r.AttemptRetry(ctx, &VMDiskImage)
case crdv1.PhaseReady, crdv1.PhaseFailed:
return ctrl.Result{}, nil
default:
Expand All @@ -112,18 +110,18 @@ func (r *VMDiskImageReconciler) SetupWithManager(mgr ctrl.Manager) error {

resourceGenerator := &vmdi.Generator{}
vmdiProvisioner := vmdi.K8sVMDIProvisioner{
Client: client,
ResourceGenerator: resourceGenerator,
MaxSyncDuration: config.MaxSyncDuration,
RetryLimit: config.RetryLimit,
Client: client,
ResourceGenerator: resourceGenerator,
MaxSyncAttemptDuration: config.MaxSyncAttemptDuration,
MaxSyncAttemptRetries: config.MaxSyncAttemptRetries,
}
orchestrator := vmdi.Orchestrator{
Client: client,
Recorder: mgr.GetEventRecorderFor(crdv1.VMDiskImageControllerName),
Provisioner: vmdiProvisioner,
RetryLimit: config.RetryLimit,
RetryBackoff: config.RetryBackoffDuration,
SyncLimit: config.Concurrency,
Client: client,
Recorder: mgr.GetEventRecorderFor(crdv1.VMDiskImageControllerName),
Provisioner: vmdiProvisioner,
MaxRetryBackoff: config.MaxBackoffDelay,
MaxSyncTime: config.MaxSyncDuration,
ConcurrentSyncLimit: config.Concurrency,
}
reconciler := &VMDiskImageReconciler{
Scheme: mgr.GetScheme(),
Expand Down
Loading