Skip to content

Commit e367a8f

Browse files
committed
Add Diagon installation during cluster creation and modify the workload.py
Add wait_for_deployment_ready() Added unit test update goldens.yaml update goldens.yaml update goldens.yaml Fixed parser/cluster.py update goldens.yaml fixed linter fixed linter pyink Test unit test
1 parent 3ca9359 commit e367a8f

File tree

6 files changed

+440
-7
lines changed

6 files changed

+440
-7
lines changed

src/xpk/commands/cluster.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
from ..utils.templates import get_templates_absolute_path
8282
import shutil
8383
import os
84+
from . import managed_ml_diagnostics
8485

8586
CLUSTER_PREHEAT_JINJA_FILE = 'cluster_preheat.yaml.j2'
8687

@@ -407,6 +408,13 @@ def cluster_create(args) -> None:
407408
# pylint: disable=line-too-long
408409
f' https://console.cloud.google.com/kubernetes/clusters/details/{get_cluster_location(args.project, args.cluster, args.zone)}/{args.cluster}/details?project={args.project}'
409410
)
411+
412+
if args.managed_ml_diagnostics:
413+
return_code = managed_ml_diagnostics.install_mldiagnostics_prerequisites()
414+
if return_code != 0:
415+
xpk_print('Installation of MLDiagnostics failed.')
416+
xpk_exit(return_code)
417+
410418
xpk_exit(0)
411419

412420

src/xpk/commands/cluster_test.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import pytest
2222

2323
from xpk.commands.cluster import _install_kueue, _validate_cluster_create_args, run_gke_cluster_create_command
24+
from xpk.commands.managed_ml_diagnostics import install_mldiagnostics_prerequisites
2425
from xpk.core.system_characteristics import SystemCharacteristics, UserFacingNameToSystemCharacteristics
2526
from xpk.core.testing.commands_tester import CommandsTester
2627
from xpk.utils.feature_flags import FeatureFlags
@@ -56,6 +57,9 @@ def mocks(mocker) -> _Mocks:
5657
run_command_with_updates_path=(
5758
'xpk.commands.cluster.run_command_with_updates'
5859
),
60+
run_command_for_value_path=(
61+
'xpk.commands.cluster.run_command_for_value'
62+
),
5963
),
6064
)
6165

@@ -87,6 +91,7 @@ def construct_args(**kwargs: Any) -> Namespace:
8791
memory_limit='100Gi',
8892
cpu_limit=100,
8993
cluster_cpu_machine_type='',
94+
managed_mldiagnostics=False,
9095
)
9196
args_dict.update(kwargs)
9297
return Namespace(**args_dict)
@@ -247,3 +252,148 @@ def test_run_gke_cluster_create_command_with_gke_version_has_no_autoupgrade_flag
247252
mocks.commands_tester.assert_command_run(
248253
'clusters create', ' --no-enable-autoupgrade'
249254
)
255+
256+
257+
def test_install_mldiagnostics_prerequisites_commands_executed(
258+
mocks: _Mocks,
259+
mocker,
260+
):
261+
262+
mocks.commands_tester.set_result_for_command(
263+
(0, ''),
264+
'kubectl',
265+
'rollout',
266+
'status',
267+
'deployment/kueue-controller-manager',
268+
)
269+
270+
mocks.commands_tester.set_result_for_command(
271+
(0, ''),
272+
'kubectl',
273+
'rollout',
274+
'status',
275+
'deployment/cert-manager-webhook',
276+
)
277+
278+
mocks.commands_tester.set_result_for_command(
279+
(0, ''),
280+
'kubectl',
281+
'apply',
282+
'-f',
283+
'https://github.com/cert-manager/cert-manager/releases/',
284+
)
285+
286+
mocks.commands_tester.set_result_for_command(
287+
(0, ''),
288+
'gcloud',
289+
'artifacts',
290+
'generic',
291+
'download',
292+
)
293+
294+
mocks.commands_tester.set_result_for_command(
295+
(0, ''),
296+
'kubectl',
297+
'create',
298+
'namespace',
299+
'gke-mldiagnostics',
300+
)
301+
302+
mocks.commands_tester.set_result_for_command(
303+
(0, ''),
304+
'kubectl',
305+
'apply',
306+
'-f',
307+
'-n',
308+
'gke-mldiagnostics',
309+
)
310+
311+
mocks.commands_tester.set_result_for_command(
312+
(0, ''),
313+
'kubectl',
314+
'label',
315+
'namespace',
316+
'default',
317+
'managed-mldiagnostics-gke=true',
318+
)
319+
320+
mocks.commands_tester.assert_command_run(
321+
'kubectl',
322+
'rollout',
323+
'status',
324+
'deployment/kueue-controller-manager',
325+
times=1,
326+
)
327+
328+
mocks.commands_tester.assert_command_run(
329+
'kubectl',
330+
'apply',
331+
'-f',
332+
'https://github.com/cert-manager/cert-manager/',
333+
times=1,
334+
)
335+
336+
mocks.commands_tester.assert_command_run(
337+
'kubectl', 'rollout', 'status', 'deployment/cert-manager-webhook', times=1
338+
)
339+
340+
mocks.commands_tester.assert_command_run(
341+
'gcloud',
342+
'artifacts',
343+
'generic',
344+
'download',
345+
'--package=mldiagnostics-injection-webhook',
346+
'--version=v0.5.0',
347+
times=1,
348+
)
349+
350+
mocks.commands_tester.assert_command_run(
351+
'kubectl', 'create', 'namespace', 'gke-mldiagnostics', times=1
352+
)
353+
354+
mocks.commands_tester.assert_command_run(
355+
'kubectl',
356+
'apply',
357+
'-f',
358+
'mldiagnostics-injection-webhook-v0.5.0.yaml',
359+
'-n',
360+
'gke-mldiagnostics',
361+
times=1,
362+
)
363+
364+
mocks.commands_tester.assert_command_run(
365+
'kubectl',
366+
'label',
367+
'namespace',
368+
'default',
369+
'managed-mldiagnostics-gke=true',
370+
times=1,
371+
)
372+
373+
mocks.commands_tester.assert_command_run(
374+
'gcloud',
375+
'artifacts',
376+
'generic',
377+
'download',
378+
'--package=mldiagnostics-connection-operator',
379+
'--version=v0.5.0',
380+
times=1,
381+
)
382+
383+
mocks.commands_tester.assert_command_run(
384+
'kubectl',
385+
'apply',
386+
'-f',
387+
'mldiagnostics-connection-operator-v0.5.0.yaml',
388+
'-n',
389+
'gke-mldiagnostics',
390+
times=1,
391+
)
392+
393+
mocks.commands_tester.assert_command_run(
394+
'gcloud', 'artifacts', 'generic', 'download', times=2
395+
)
396+
397+
mocks.commands_tester.assert_command_run(
398+
'kubectl', 'apply', '-f', '-n', 'gke-mldiagnostics', times=2
399+
)

0 commit comments

Comments
 (0)