diff --git a/pkg/resourceinterpreter/default/thirdparty/resourcecustomizations/batch.volcano.sh/v1alpha1/Job/customizations.yaml b/pkg/resourceinterpreter/default/thirdparty/resourcecustomizations/batch.volcano.sh/v1alpha1/Job/customizations.yaml new file mode 100644 index 000000000000..81420cce9fb8 --- /dev/null +++ b/pkg/resourceinterpreter/default/thirdparty/resourcecustomizations/batch.volcano.sh/v1alpha1/Job/customizations.yaml @@ -0,0 +1,295 @@ +apiVersion: config.karmada.io/v1alpha1 +kind: ResourceInterpreterCustomization +metadata: + name: declarative-configuration-job +spec: + target: + apiVersion: batch.volcano.sh/v1alpha1 + kind: Job + customizations: + healthInterpretation: + luaScript: > + function InterpretHealth(observedObj) + if observedObj.status == nil or observedObj.status.state == nil then + return false + end + local phase = observedObj.status.state.phase + if phase == nil or phase == '' then + return false + end + if phase == 'Running' or phase == 'Completed' or phase == "Pending" or + phase == "Aborting" or phase == "Aborted" or phase == "Restarting" or + phase == "Completing" or phase == "Terminating" or phase == "Terminated" then + return true + end + return false + end + componentResource: + luaScript: | + local kube = require("kube") + local function get(obj, path) + local cur = obj + for i = 1, #path do + if cur == nil then return nil end + cur = cur[path[i]] + end + return cur + end + + local function to_num(v, default) + if v == nil or v == '' then + return default + end + local n = tonumber(v) + if n ~= nil then return n end + return default + end + + function GetComponents(observedObj) + local components = {} + + local tasks = get(observedObj, {"spec","tasks"}) + if tasks == nil then + return components + end + + for i, task in ipairs(tasks) do + local replicas = to_num(task.minAvailable, 1) + local requires = kube.accuratePodRequirements(task.template) + -- Determine a stable component name with sensible defaults + local taskName = task.name + if taskName == nil or taskName == '' then + -- Use zero-based index for uniqueness: task-0, task-1, ... + taskName = "task-" .. (i - 1) + end + table.insert(components, { + name = taskName, + replicas = replicas, + replicaRequirements = requires + }) + end + + return components + end + statusAggregation: + luaScript: > + local function durationVal(d) + if type(d) == "number" then + return d + end + if type(d) ~= "string" then + return 0 + end + local totalSeconds = 0 + for num, unit in string.gmatch(d, "([%d%.]+)([hms])") do + num = tonumber(num) + if unit == "h" then + totalSeconds = totalSeconds + num * 3600 + elseif unit == "m" then + totalSeconds = totalSeconds + num * 60 + elseif unit == "s" then + totalSeconds = totalSeconds + num + end + end + if totalSeconds > 0 then + return totalSeconds + end + return tonumber(d) or 0 + end + local function omitEmpty(t) + if t == nil then return nil end + local out = {} + for k, v in pairs(t) do + if type(v) == "table" then + local inner = omitEmpty(v) + if inner ~= nil and next(inner) ~= nil then + out[k] = inner + end + elseif v ~= nil and not (v == 0 or v == "" or v == "0s") then + out[k] = v + end + end + if next(out) ~= nil then + return out + else + return nil + end + end + + function AggregateStatus(desiredObj, statusItems) + if statusItems == nil then return desiredObj end + if desiredObj.status == nil then desiredObj.status = {} end + + if #statusItems == 1 then + desiredObj.status = statusItems[1].status + return desiredObj + end + + local failedClusters = {} + local completedClusters = 0 + local latestTransition = {} + local successfulClusters = 0 + local hasFailed = false + local failedConditions = {} + local failedPhases = { + Failed = true, + Aborted = true, + Aborting = true, + Terminated = true, + Terminating = true, + } + local status = { + state = {}, + minAvailable = 0, + taskStatusCount = {}, + pending = 0, + running = 0, + succeeded = 0, + failed = 0, + terminating = 0, + unknown = 0, + version = 0, + retryCount = 0, + controlledResources = {}, + conditions = {}, + runningDuration = "0s", + } + + for i = 1, #statusItems do + local s = statusItems[i].status + if s ~= nil then + status.minAvailable = status.minAvailable + (s.minAvailable or 0) + status.pending = status.pending + (s.pending or 0) + status.running = status.running + (s.running or 0) + status.succeeded = status.succeeded + (s.succeeded or 0) + status.failed = status.failed + (s.failed or 0) + status.terminating = status.terminating + (s.terminating or 0) + status.unknown = status.unknown + (s.unknown or 0) + status.version = math.max(status.version, s.version or 0) + status.retryCount = status.retryCount + (s.retryCount or 0) + if durationVal(s.runningDuration) > durationVal(status.runningDuration) then + status.runningDuration = s.runningDuration + end + + if s.taskStatusCount ~= nil then + for taskName, taskStatus in pairs(s.taskStatusCount) do + if status.taskStatusCount[taskName] == nil then + status.taskStatusCount[taskName] = { phase = {} } + end + if taskStatus.phase ~= nil then + for phaseName, count in pairs(taskStatus.phase) do + status.taskStatusCount[taskName].phase[phaseName] = (status.taskStatusCount[taskName].phase[phaseName] or 0) + count + end + end + end + end + + if s.controlledResources then + for k, v in pairs(s.controlledResources) do + status.controlledResources[k] = v + end + end + + if s.conditions then + local clusterHasFailed = false + for _, c in ipairs(s.conditions) do + if failedPhases[c.status] then + clusterHasFailed = true + hasFailed = true + end + if not clusterHasFailed then + local exist = latestTransition[c.status] + if exist == nil or c.lastTransitionTime > exist.lastTransitionTime then + latestTransition[c.status] = c + end + end + end + if clusterHasFailed and #failedConditions == 0 then + failedConditions = s.conditions + end + end + + if s.state ~= nil then + local st = s.state + if st.phase == "Completed" then + successfulClusters = successfulClusters + 1 + elseif st.phase == "Failed" or st.phase == "Aborted" then + table.insert(failedClusters, statusItems[i].clusterName) + end + end + end + end + + if #failedClusters > 0 then + status.state.phase = "Failed" + status.state.reason = "VolcanoJobFailed" + status.state.message = "Job failed in clusters: " .. table.concat(failedClusters, ",") + status.state.lastTransitionTime = os.date("!%Y-%m-%dT%H:%M:%SZ") + end + + if successfulClusters == #statusItems and successfulClusters > 0 then + status.state.phase = "Completed" + status.state.reason = "Completed" + status.state.message = "Job completed successfully" + status.state.lastTransitionTime = os.date("!%Y-%m-%dT%H:%M:%SZ") + end + + if hasFailed then + status.conditions = failedConditions + else + for _, v in pairs(latestTransition) do + table.insert(status.conditions, v) + end + end + + desiredObj.status = omitEmpty(status) or {} + return desiredObj + end + statusReflection: + luaScript: > + function ReflectStatus(observedObj) + local status = {} + + if observedObj == nil or observedObj.status == nil then + return status + end + + local s = observedObj.status + status.minAvailable = s.minAvailable + status.pending = s.pending + status.running = s.running + status.succeeded = s.succeeded + status.failed = s.failed + status.terminating = s.terminating + status.unknown = s.unknown + status.version = s.version + status.retryCount = s.retryCount + status.runningDuration = s.runningDuration + + status.taskStatusCount = {} + if s.taskStatusCount ~= nil then + for k, v in pairs(s.taskStatusCount) do + status.taskStatusCount[k] = v + end + end + + status.controlledResources = {} + if s.controlledResources ~= nil then + for k, v in pairs(s.controlledResources) do + status.controlledResources[k] = v + end + end + + if s.state ~= nil then + status.state = s.state + end + + status.conditions = {} + if type(s.conditions) == "table" then + for _, cond in ipairs(s.conditions) do + table.insert(status.conditions, cond) + end + end + + return status + end diff --git a/pkg/resourceinterpreter/default/thirdparty/resourcecustomizations/batch.volcano.sh/v1alpha1/Job/customizations_tests.yaml b/pkg/resourceinterpreter/default/thirdparty/resourcecustomizations/batch.volcano.sh/v1alpha1/Job/customizations_tests.yaml new file mode 100644 index 000000000000..41739db5177a --- /dev/null +++ b/pkg/resourceinterpreter/default/thirdparty/resourcecustomizations/batch.volcano.sh/v1alpha1/Job/customizations_tests.yaml @@ -0,0 +1,8 @@ +tests: + - desiredInputPath: testdata/desired-job.yaml + statusInputPath: testdata/status-file.yaml + operation: AggregateStatus + - observedInputPath: testdata/observed-job.yaml + operation: InterpretHealth + - observedInputPath: testdata/observed-job.yaml + operation: InterpretStatus diff --git a/pkg/resourceinterpreter/default/thirdparty/resourcecustomizations/batch.volcano.sh/v1alpha1/Job/testdata/desired-job.yaml b/pkg/resourceinterpreter/default/thirdparty/resourcecustomizations/batch.volcano.sh/v1alpha1/Job/testdata/desired-job.yaml new file mode 100644 index 000000000000..59181c95a546 --- /dev/null +++ b/pkg/resourceinterpreter/default/thirdparty/resourcecustomizations/batch.volcano.sh/v1alpha1/Job/testdata/desired-job.yaml @@ -0,0 +1,59 @@ +apiVersion: batch.volcano.sh/v1alpha1 +kind: Job +metadata: + name: dk-job +spec: + maxRetry: 3 + minAvailable: 3 + plugins: + env: [] + ssh: [] + svc: + - --disable-network-policy=true + queue: default + schedulerName: volcano + tasks: + - minAvailable: 1 + name: job-nginx1 + replicas: 1 + template: + metadata: + name: nginx1 + spec: + containers: + - args: + - sleep 10 + command: + - bash + - -c + image: nginx:latest + imagePullPolicy: IfNotPresent + name: nginx + resources: + requests: + cpu: 100m + nodeSelector: + kubernetes.io/os: linux + restartPolicy: OnFailure + - minAvailable: 2 + name: job-nginx2 + replicas: 3 + template: + metadata: + name: nginx2 + spec: + containers: + - args: + - sleep 30 + command: + - bash + - -c + image: nginx:latest + imagePullPolicy: IfNotPresent + name: nginx + resources: + requests: + cpu: 100m + nodeSelector: + kubernetes.io/os: linux + restartPolicy: OnFailure diff --git a/pkg/resourceinterpreter/default/thirdparty/resourcecustomizations/batch.volcano.sh/v1alpha1/Job/testdata/observed-job.yaml b/pkg/resourceinterpreter/default/thirdparty/resourcecustomizations/batch.volcano.sh/v1alpha1/Job/testdata/observed-job.yaml new file mode 100644 index 000000000000..ca958aeff855 --- /dev/null +++ b/pkg/resourceinterpreter/default/thirdparty/resourcecustomizations/batch.volcano.sh/v1alpha1/Job/testdata/observed-job.yaml @@ -0,0 +1,87 @@ +apiVersion: batch.volcano.sh/v1alpha1 +kind: Job +metadata: + name: dk-job +spec: + maxRetry: 3 + minAvailable: 3 + plugins: + env: [] + ssh: [] + svc: + - --disable-network-policy=true + queue: default + schedulerName: volcano + tasks: + - minAvailable: 1 + name: job-nginx1 + replicas: 1 + template: + metadata: + name: nginx1 + spec: + containers: + - args: + - sleep 10 + command: + - bash + - -c + image: nginx:latest + imagePullPolicy: IfNotPresent + name: nginx + resources: + requests: + cpu: 100m + nodeSelector: + kubernetes.io/os: linux + restartPolicy: OnFailure + - minAvailable: 2 + name: job-nginx2 + replicas: 3 + template: + metadata: + name: nginx2 + spec: + containers: + - args: + - sleep 30 + command: + - bash + - -c + image: nginx:latest + imagePullPolicy: IfNotPresent + name: nginx + resources: + requests: + cpu: 100m + nodeSelector: + kubernetes.io/os: linux + restartPolicy: OnFailure +status: + conditions: + - lastTransitionTime: "2025-09-29T10:57:03Z" + status: Pending + - lastTransitionTime: "2025-09-29T10:57:07Z" + status: Running + - lastTransitionTime: "2025-09-29T10:57:38Z" + status: Completed + failed: 0 + minAvailable: 3 + pending: 0 + retryCount: 0 + running: 0 + runningDuration: 35.688396584s + state: + lastTransitionTime: "2025-09-29T10:57:38Z" + phase: Completed + succeeded: 4 + taskStatusCount: + job-nginx1: + phase: + Succeeded: 1 + job-nginx2: + phase: + Succeeded: 3 + terminating: 0 + unknown: 0 + version: 1 diff --git a/pkg/resourceinterpreter/default/thirdparty/resourcecustomizations/batch.volcano.sh/v1alpha1/Job/testdata/status-file.yaml b/pkg/resourceinterpreter/default/thirdparty/resourcecustomizations/batch.volcano.sh/v1alpha1/Job/testdata/status-file.yaml new file mode 100644 index 000000000000..e7d11c3932c8 --- /dev/null +++ b/pkg/resourceinterpreter/default/thirdparty/resourcecustomizations/batch.volcano.sh/v1alpha1/Job/testdata/status-file.yaml @@ -0,0 +1,31 @@ +applied: true +clusterName: member1 +health: Healthy +status: + conditions: + - lastTransitionTime: "2025-09-29T10:57:03Z" + status: Pending + - lastTransitionTime: "2025-09-29T10:57:07Z" + status: Running + - lastTransitionTime: "2025-09-29T10:57:38Z" + status: Completed + failed: 0 + minAvailable: 3 + pending: 0 + retryCount: 0 + running: 0 + runningDuration: 35.688396584s + state: + lastTransitionTime: "2025-09-29T10:57:38Z" + phase: Completed + succeeded: 4 + taskStatusCount: + job-nginx1: + phase: + Succeeded: 1 + job-nginx2: + phase: + Succeeded: 3 + terminating: 0 + unknown: 0 + version: 1