From 0006c973aa7d6ace293e2a5fedaf1f8f804f960c Mon Sep 17 00:00:00 2001 From: Ryan Compton Date: Thu, 25 Sep 2025 18:07:18 +0000 Subject: [PATCH 1/2] Added config for Mistral-Small-24B-Base-2501 --- litgpt/config.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/litgpt/config.py b/litgpt/config.py index 97549a114d..086b8a8d10 100644 --- a/litgpt/config.py +++ b/litgpt/config.py @@ -2112,6 +2112,26 @@ def norm_class(self) -> Type: intermediate_size=28672, ) ) +configs.append( + # https://huggingface.co/mistralai/Mistral-Small-24B-Base-2501/blob/main/config.json + dict( + name="Mistral-Small-24B-Base-2501", + hf_config=dict(org="mistralai", name="Mistral-Small-24B-Base-2501"), + padded_vocab_size=131072, + block_size=131072, + n_layer=40, + n_head=32, + n_embd=5120, + n_query_groups=8, + rotary_percentage=1.0, + parallel_residual=False, + bias=False, + norm_class_name="RMSNorm", + norm_eps=1e-05, + mlp_class_name="LLaMAMLP", + intermediate_size=32768, + ) +) ############ From f2734e6622868c2e59cbd030fe5abd6c31cab082 Mon Sep 17 00:00:00 2001 From: Ryan Compton Date: Thu, 25 Sep 2025 18:20:01 +0000 Subject: [PATCH 2/2] Added instruct as well --- litgpt/config.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/litgpt/config.py b/litgpt/config.py index 086b8a8d10..bd3b7776ec 100644 --- a/litgpt/config.py +++ b/litgpt/config.py @@ -2132,6 +2132,26 @@ def norm_class(self) -> Type: intermediate_size=32768, ) ) +configs.append( + # https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501/blob/main/config.json + dict( + name="Mistral-Small-24B-Instruct-2501", + hf_config=dict(org="mistralai", name="Mistral-Small-24B-Instruct-2501"), + padded_vocab_size=131072, + block_size=131072, + n_layer=40, + n_head=32, + n_embd=5120, + n_query_groups=8, + rotary_percentage=1.0, + parallel_residual=False, + bias=False, + norm_class_name="RMSNorm", + norm_eps=1e-05, + mlp_class_name="LLaMAMLP", + intermediate_size=32768, + ) +) ############