Skip to content

Commit 977a8ec

Browse files
committed
Add Diagon installation during cluster creation and modify the workload.py
Add wait_for_deployment_ready() Added unit test update goldens.yaml update goldens.yaml update goldens.yaml Fixed parser/cluster.py update goldens.yaml fixed linter fixed linter pyink Test unit test
1 parent 3ca9359 commit 977a8ec

File tree

5 files changed

+446
-0
lines changed

5 files changed

+446
-0
lines changed

src/xpk/commands/cluster.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
from ..utils.templates import get_templates_absolute_path
8282
import shutil
8383
import os
84+
from . import managed_ml_diagnostics
8485

8586
CLUSTER_PREHEAT_JINJA_FILE = 'cluster_preheat.yaml.j2'
8687

@@ -407,6 +408,13 @@ def cluster_create(args) -> None:
407408
# pylint: disable=line-too-long
408409
f' https://console.cloud.google.com/kubernetes/clusters/details/{get_cluster_location(args.project, args.cluster, args.zone)}/{args.cluster}/details?project={args.project}'
409410
)
411+
412+
if args.managed_ml_diagnostics:
413+
return_code = managed_ml_diagnostics.install_mldiagnostics_prerequisites()
414+
if return_code != 0:
415+
xpk_print('Installation of MLDiagnostics failed.')
416+
xpk_exit(return_code)
417+
410418
xpk_exit(0)
411419

412420

src/xpk/commands/cluster_test.py

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import pytest
2222

2323
from xpk.commands.cluster import _install_kueue, _validate_cluster_create_args, run_gke_cluster_create_command
24+
from xpk.commands.managed_ml_diagnostics import install_mldiagnostics_prerequisites
2425
from xpk.core.system_characteristics import SystemCharacteristics, UserFacingNameToSystemCharacteristics
2526
from xpk.core.testing.commands_tester import CommandsTester
2627
from xpk.utils.feature_flags import FeatureFlags
@@ -56,6 +57,15 @@ def mocks(mocker) -> _Mocks:
5657
run_command_with_updates_path=(
5758
'xpk.commands.cluster.run_command_with_updates'
5859
),
60+
managed_ml_diagnostics_run_command_with_updates_path=(
61+
'xpk.commands.managed_ml_diagnostics.run_command_with_updates'
62+
),
63+
run_command_for_value_path=(
64+
'xpk.commands.cluster.run_command_for_value'
65+
),
66+
managed_ml_diagnostics_run_command_for_value_path=(
67+
'xpk.commands.managed_ml_diagnostics.run_command_for_value'
68+
),
5969
),
6070
)
6171

@@ -87,6 +97,7 @@ def construct_args(**kwargs: Any) -> Namespace:
8797
memory_limit='100Gi',
8898
cpu_limit=100,
8999
cluster_cpu_machine_type='',
100+
managed_mldiagnostics=False,
90101
)
91102
args_dict.update(kwargs)
92103
return Namespace(**args_dict)
@@ -247,3 +258,148 @@ def test_run_gke_cluster_create_command_with_gke_version_has_no_autoupgrade_flag
247258
mocks.commands_tester.assert_command_run(
248259
'clusters create', ' --no-enable-autoupgrade'
249260
)
261+
262+
263+
def test_install_mldiagnostics_prerequisites_commands_executed(
264+
mocks: _Mocks,
265+
mocker,
266+
):
267+
268+
mocks.commands_tester.set_result_for_command(
269+
(0, ''),
270+
'kubectl',
271+
'rollout',
272+
'status',
273+
'deployment/kueue-controller-manager',
274+
)
275+
276+
mocks.commands_tester.set_result_for_command(
277+
(0, ''),
278+
'kubectl',
279+
'rollout',
280+
'status',
281+
'deployment/cert-manager-webhook',
282+
)
283+
284+
mocks.commands_tester.set_result_for_command(
285+
(0, ''),
286+
'kubectl',
287+
'apply',
288+
'-f',
289+
'https://github.com/cert-manager/cert-manager/releases/',
290+
)
291+
292+
mocks.commands_tester.set_result_for_command(
293+
(0, ''),
294+
'gcloud',
295+
'artifacts',
296+
'generic',
297+
'download',
298+
)
299+
300+
mocks.commands_tester.set_result_for_command(
301+
(0, ''),
302+
'kubectl',
303+
'create',
304+
'namespace',
305+
'gke-mldiagnostics',
306+
)
307+
308+
mocks.commands_tester.set_result_for_command(
309+
(0, ''),
310+
'kubectl',
311+
'apply',
312+
'-f',
313+
'-n',
314+
'gke-mldiagnostics',
315+
)
316+
317+
mocks.commands_tester.set_result_for_command(
318+
(0, ''),
319+
'kubectl',
320+
'label',
321+
'namespace',
322+
'default',
323+
'managed-mldiagnostics-gke=true',
324+
)
325+
326+
mocks.commands_tester.assert_command_run(
327+
'kubectl',
328+
'rollout',
329+
'status',
330+
'deployment/kueue-controller-manager',
331+
times=1,
332+
)
333+
334+
mocks.commands_tester.assert_command_run(
335+
'kubectl',
336+
'apply',
337+
'-f',
338+
'https://github.com/cert-manager/cert-manager/',
339+
times=1,
340+
)
341+
342+
mocks.commands_tester.assert_command_run(
343+
'kubectl', 'rollout', 'status', 'deployment/cert-manager-webhook', times=1
344+
)
345+
346+
mocks.commands_tester.assert_command_run(
347+
'gcloud',
348+
'artifacts',
349+
'generic',
350+
'download',
351+
'--package=mldiagnostics-injection-webhook',
352+
'--version=v0.5.0',
353+
times=1,
354+
)
355+
356+
mocks.commands_tester.assert_command_run(
357+
'kubectl', 'create', 'namespace', 'gke-mldiagnostics', times=1
358+
)
359+
360+
mocks.commands_tester.assert_command_run(
361+
'kubectl',
362+
'apply',
363+
'-f',
364+
'mldiagnostics-injection-webhook-v0.5.0.yaml',
365+
'-n',
366+
'gke-mldiagnostics',
367+
times=1,
368+
)
369+
370+
mocks.commands_tester.assert_command_run(
371+
'kubectl',
372+
'label',
373+
'namespace',
374+
'default',
375+
'managed-mldiagnostics-gke=true',
376+
times=1,
377+
)
378+
379+
mocks.commands_tester.assert_command_run(
380+
'gcloud',
381+
'artifacts',
382+
'generic',
383+
'download',
384+
'--package=mldiagnostics-connection-operator',
385+
'--version=v0.5.0',
386+
times=1,
387+
)
388+
389+
mocks.commands_tester.assert_command_run(
390+
'kubectl',
391+
'apply',
392+
'-f',
393+
'mldiagnostics-connection-operator-v0.5.0.yaml',
394+
'-n',
395+
'gke-mldiagnostics',
396+
times=1,
397+
)
398+
399+
mocks.commands_tester.assert_command_run(
400+
'gcloud', 'artifacts', 'generic', 'download', times=2
401+
)
402+
403+
mocks.commands_tester.assert_command_run(
404+
'kubectl', 'apply', '-f', '-n', 'gke-mldiagnostics', times=2
405+
)

0 commit comments

Comments
 (0)