File tree Expand file tree Collapse file tree 2 files changed +2
-3
lines changed
training/a4high/llama-3.1-405b/maxtext-pretraining-gke Expand file tree Collapse file tree 2 files changed +2
-3
lines changed Original file line number Diff line number Diff line change @@ -113,7 +113,7 @@ default settings, run the following command from your client:
113113cd $RECIPE_ROOT
114114helm install -f values.yaml \
115115 --set-file maxtext_config=$REPO_ROOT /src/frameworks/a4high/maxtext-configs/llama-3.1-405b-256gpus-a4h-fp8.yaml \
116- --set workload.image=us-central1-docker.pkg.dev/deeplearning-images/reproducibility/jax-maxtext-gpu:jax0.5.1-cuda_dl25.02-rev1-maxtext-20150313 \
116+ --set workload.image=us-central1-docker.pkg.dev/deeplearning-images/reproducibility/jax-maxtext-gpu:jax0.5.1-cuda_dl25.02-rev1-maxtext-20150317 \
117117 --set workload.run_name=$USER -llama-3-1-405b-maxtext-fp8 \
118118 --set workload.gpus=256 \
119119 --set queue=$KUEUE_NAME \
@@ -132,7 +132,7 @@ helm install -f values.yaml \
132132cd $RECIPE_ROOT
133133helm install -f values.yaml \
134134 --set-file maxtext_config=$REPO_ROOT /src/frameworks/a4high/maxtext-configs/llama-3.1-405b-256gpus-a4h-fp8.yaml \
135- --set workload.image=us-central1-docker.pkg.dev/deeplearning-images/reproducibility/jax-maxtext-gpu:jax0.5.1-cuda_dl25.02-rev1-maxtext-20150313 \
135+ --set workload.image=us-central1-docker.pkg.dev/deeplearning-images/reproducibility/jax-maxtext-gpu:jax0.5.1-cuda_dl25.02-rev1-maxtext-20150317 \
136136 --set workload.run_name=$USER -llama-3-1-405b-maxtext-fp8 \
137137 --set workload.gpus=256 \
138138 --set queue=$KUEUE_NAME \
Original file line number Diff line number Diff line change @@ -41,7 +41,6 @@ xlaFlags: >-
4141 --xla_gpu_enable_latency_hiding_scheduler=true
4242 --xla_gpu_enable_triton_gemm=false
4343 --xla_gpu_enable_command_buffer=FUSION,CUSTOM_CALL
44- --xla_gpu_enable_highest_priority_async_stream=true
4544 --xla_gpu_all_reduce_combine_threshold_bytes=17179869184
4645 --xla_gpu_all_gather_combine_threshold_bytes=17179869184
4746 --xla_gpu_reduce_scatter_combine_threshold_bytes=17179869184
You can’t perform that action at this time.
0 commit comments