From 0c3c896ea80643608b4e961eec7e1efcd844e109 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=99=BE=E5=9C=B0=20=E5=B8=8C=E7=95=99=E8=80=B6?=
 <65301509+KiruyaMomochi@users.noreply.github.com>
Date: Tue, 11 Nov 2025 03:32:00 +0800
Subject: [PATCH 1/5] chat : Kimi-K2-Thinking tool calling support

---
 common/chat.cpp                               | 132 +++++++++++++
 common/chat.h                                 |   1 +
 .../moonshotai-Kimi-K2-Thinking.jinja         |  97 ++++++++++
 tests/test-chat-parser.cpp                    | 182 ++++++++++++++++++
 tests/test-chat.cpp                           | 136 +++++++++++++
 5 files changed, 548 insertions(+)
 create mode 100644 models/templates/moonshotai-Kimi-K2-Thinking.jinja
diff --git a/common/chat.cpp b/common/chat.cpp
index 938872e82ee1d..562c875397412 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -643,6 +643,7 @@ const char * common_chat_format_name(common_chat_format format) {
         case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
         case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
         case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";
+        case COMMON_CHAT_FORMAT_KIMI_K2: return "Kimi K2";
         default:
             throw std::runtime_error("Unknown chat format");
     }
@@ -1726,6 +1727,68 @@ static common_chat_params common_chat_params_init_deepseek_v3_1(const common_cha
     return data;
 }
 
+static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & inputs) {
+    common_chat_params data;
+
+    // Pass thinking context for Kimi K2 template
+    json additional_context = {
+        {"thinking", inputs.enable_thinking},
+    };
+
+    auto prompt = apply(tmpl, inputs,
+                       /* messages_override= */ inputs.messages,
+                       /* tools_override= */ std::nullopt,
+                       additional_context);
+    data.prompt = prompt;
+    data.format = COMMON_CHAT_FORMAT_KIMI_K2;
+    if (string_ends_with(data.prompt, "<think>")) {
+        if (!inputs.enable_thinking) {
+            data.prompt += "</think>";
+        } else {
+            data.thinking_forced_open = true;
+        }
+    }
+    if (inputs.tools.is_array() && !inputs.tools.empty()) {
+        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
+        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
+            std::vector<std::string> tool_rules;
+            foreach_function(inputs.tools, [&](const json & tool) {
+                const auto & function = tool.at("function");
+                std::string name = function.at("name");
+                auto parameters = function.at("parameters");
+                builder.resolve_refs(parameters);
+                tool_rules.push_back(builder.add_rule(name + "-call",
+                    "( \"<|tool_call_begin|>\" )? \"" + name + "<|tool_call_argument_begin|>"
+                    "\" " + builder.add_schema(name + "-args", parameters) + " "
+                    "\"<|tool_call_end|>\""));
+            });
+            builder.add_rule("root",
+                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
+                "( \"<|tool_calls_section_begin|>\" ) "
+                "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
+                "\"<|tool_calls_section_end|>\""
+                " space");
+            data.grammar_triggers.push_back({
+                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
+                // If thinking_forced_open, then we capture the </think> tag in the grammar,
+                // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
+                std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
+                    "(<|tool_calls_section_begin|>)[\\s\\S]*"
+            });
+            data.preserved_tokens = {
+                "<think>",
+                "</think>",
+                "<|tool_calls_section_begin|>",
+                "<|tool_call_begin|>",
+                "<|tool_call_argument_begin|>",
+                "<|tool_call_end|>",
+                "<|tool_calls_section_end|>",
+            };
+        });
+    }
+    return data;
+}
+
 static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
     builder.try_parse_reasoning("<think>", "</think>");
     if (!builder.syntax().parse_tool_calls) {
@@ -1807,6 +1870,66 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
     }
 }
 
+static void common_chat_parse_kimi_k2_content(common_chat_msg_parser & builder) {
+    static const common_regex function_regex("(?:<|tool_call_begin|>)?([^\\n<]+)(?:<|tool_call_argument_begin|>)");
+
+    static const common_regex close_regex("(?:[\\s]*)?<|tool_call_end|>");
+    static const common_regex tool_calls_begin("(?:<|tool_calls_section_begin|>)");
+    static const common_regex tool_calls_end("<|tool_calls_section_end|>");
+
+    if (!builder.syntax().parse_tool_calls) {
+        LOG_DBG("%s: not parse_tool_calls\n", __func__);
+        builder.add_content(builder.consume_rest());
+        return;
+    }
+
+    LOG_DBG("%s: parse_tool_calls\n", __func__);
+
+    parse_json_tool_calls(
+        builder,
+        /* block_open= */ tool_calls_begin,
+        /* function_regex_start_only= */ std::nullopt,
+        function_regex,
+        close_regex,
+        tool_calls_end);
+}
+
+static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
+    // DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
+    // First try to parse using the standard reasoning parsing method
+    LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
+
+    auto start_pos = builder.pos();
+    auto found_end_think = builder.try_find_literal("</think>");
+    builder.move_to(start_pos);
+
+    if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
+        LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
+        common_chat_parse_kimi_k2_content(builder);
+    } else if (builder.try_parse_reasoning("<think>", "</think>")) {
+        // If reasoning was parsed successfully, the remaining content is regular content
+        LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
+        // </think><|tool_calls_section_begin|><|tool_call_begin|>function<|tool_call_argument_begin|>NAME\n```json\nJSON\n```<|tool_call_end|><|tool_calls_section_end|>
+        common_chat_parse_kimi_k2_content(builder);
+    } else {
+        if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
+          LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
+          common_chat_parse_kimi_k2_content(builder);
+          return;
+        }
+        // If no reasoning tags found, check if we should treat everything as reasoning
+        if (builder.syntax().thinking_forced_open) {
+            // If thinking is forced open but no tags found, treat everything as reasoning
+            LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
+            builder.add_reasoning_content(builder.consume_rest());
+        } else {
+            LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
+            // <|tool_call_begin|>NAME<|tool_call_argument_begin|>JSON<|tool_call_end|>
+            common_chat_parse_kimi_k2_content(builder);
+        }
+    }
+}
+
 static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
     common_chat_params data;
 
@@ -2912,6 +3035,12 @@ static common_chat_params common_chat_templates_apply_jinja(
         return common_chat_params_init_deepseek_v3_1(tmpl, params);
     }
 
+    // Kimi K2: detect based on specific patterns in the template
+    if (src.find("<|tool_calls_section_begin|>") != std::string::npos &&
+        params.json_schema.is_null()) {
+        return common_chat_params_init_kimi_k2(tmpl, params);
+    }
+
     // DeepSeek R1: use handler in all cases except json schema (thinking / tools).
     if (src.find("<｜tool▁calls▁begin｜>") != std::string::npos && params.json_schema.is_null()) {
         return common_chat_params_init_deepseek_r1(tmpl, params);
@@ -3139,6 +3268,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
         case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
             common_chat_parse_lfm2(builder);
             break;
+        case COMMON_CHAT_FORMAT_KIMI_K2:
+            common_chat_parse_kimi_k2(builder);
+            break;
         default:
             throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
     }
diff --git a/common/chat.h b/common/chat.h
index 50efb0d4e516f..74385f2058c1d 100644
--- a/common/chat.h
+++ b/common/chat.h
@@ -117,6 +117,7 @@ enum common_chat_format {
     COMMON_CHAT_FORMAT_NEMOTRON_V2,
     COMMON_CHAT_FORMAT_APERTUS,
     COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS,
+    COMMON_CHAT_FORMAT_KIMI_K2,
 
     COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
 };
diff --git a/models/templates/moonshotai-Kimi-K2-Thinking.jinja b/models/templates/moonshotai-Kimi-K2-Thinking.jinja
new file mode 100644
index 0000000000000..26f99b7d19f74
--- /dev/null
+++ b/models/templates/moonshotai-Kimi-K2-Thinking.jinja
@@ -0,0 +1,97 @@
+{%- macro render_content(msg) -%}
+    {%- set c = msg.get('content') -%}
+    {%- if c is string -%}
+      {{ c }}
+    {%- elif c is not none -%}
+      {% for content in c -%}
+        {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
+          <|media_start|>image<|media_content|><|media_pad|><|media_end|>
+        {% else -%}
+          {{ content['text'] }}
+        {%- endif -%}
+      {%- endfor -%}
+    {%- endif -%}
+{%- endmacro -%}
+
+{% macro set_roles(message) -%}
+  {%- set role_name =  message.get('name') or  message['role'] -%}
+  {%- if message['role'] == 'user' -%}
+    <|im_user|>{{role_name}}<|im_middle|>
+  {%- elif message['role'] == 'assistant' -%}
+    <|im_assistant|>{{role_name}}<|im_middle|>
+  {%- else -%}
+    <|im_system|>{{role_name}}<|im_middle|>
+  {%- endif -%}
+{%- endmacro -%}
+
+
+{%- macro render_toolcalls(message) -%}
+  <|tool_calls_section_begin|>
+  {%- for tool_call in message['tool_calls'] -%}
+    {%- set formatted_id = tool_call['id'] -%}
+    <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
+  {%- endfor -%}
+  <|tool_calls_section_end|>
+{%- endmacro -%}
+
+
+{# Find last non-tool-call assisitant message #}
+{%- set ns = namespace(last_non_tool_call_assistant_msg=-1) -%}
+{%- for idx in range(messages|length-1, -1, -1) -%}
+    {%- if messages[idx]['role'] == 'assistant' and not messages[idx].get('tool_calls') -%}
+        {%- set ns.last_non_tool_call_assistant_msg = idx -%}
+        {%- break -%}
+    {%- endif -%}
+{%- endfor -%}
+
+{# split all messages into history & suffix, reasoning_content in suffix should be reserved.#}
+{%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg+1] -%}
+{%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg+1:] -%}
+
+{%- if tools -%}
+  <|im_system|>tool_declare<|im_middle|>{{ tools | tojson() }}<|im_end|>
+{%- endif -%}
+
+{%- if messages|length == 0 or messages[0]['role'] != 'system' -%}
+  <|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|>
+{%- endif -%}
+  
+{%- for message in hist_msgs -%}
+  {{set_roles(message)}}
+  {%- if message['role'] == 'assistant' -%}
+    <think></think>{{render_content(message)}}
+    {%- if message.get('tool_calls') -%}
+      {{render_toolcalls(message)}}
+    {%- endif -%}
+  {%- elif message['role'] == 'tool' -%}
+    {%- set tool_call_id = message.tool_call_id -%}
+    ## Return of {{ tool_call_id }}
+{{render_content(message)}}
+  {%- elif message['content'] is not none -%}
+    {{render_content(message)}}
+  {%- endif -%}
+  <|im_end|>
+{%- endfor -%}
+
+{%- for message in suffix_msgs -%}
+  {{set_roles(message)}}
+  {%- if message['role'] == 'assistant' -%}
+    {%- set rc = message.get('reasoning_content', '') -%}
+    <think>{{rc}}</think>{{render_content(message)}}
+    {%- if message.get('tool_calls') -%}
+     {{render_toolcalls(message)}}
+    {%- endif -%}
+  {%- elif message['role'] == 'tool' -%}
+    {%- set tool_call_id = message.tool_call_id -%}
+    ## Return of {{ tool_call_id }}
+{{render_content(message)}}
+  {%- elif message['content'] is not none -%}
+    {{render_content(message)}}
+  {%- endif -%}
+  <|im_end|>
+{%- endfor -%}
+
+
+{%- if add_generation_prompt -%}
+  <|im_assistant|>assistant<|im_middle|>
+{%- endif -%}
diff --git a/tests/test-chat-parser.cpp b/tests/test-chat-parser.cpp
index 4766518fe6955..3963b825b9d08 100644
--- a/tests/test-chat-parser.cpp
+++ b/tests/test-chat-parser.cpp
@@ -164,6 +164,36 @@ static void test_reasoning() {
     assert_equals(variant, std::string("REASONING</think>ok"), msg.content);
     assert_equals(variant, std::string(""), msg.reasoning_content);
   }
+  // Test Kimi K2 parsing - reasoning content followed by "</think>" and then regular content
+  {
+    common_chat_syntax syntax = {
+        /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+        /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+        /* .reasoning_in_content = */ false,
+        /* .thinking_forced_open = */ true,
+        /* .parse_tool_calls = */ true,
+    };
+    const std::string variant("kimi_k2_reasoning_format_deepseek");
+    common_chat_msg_parser builder("REASONING</think>ok", /* is_partial= */ false, syntax);
+    assert_equals(variant, true, builder.try_parse_reasoning("<think>", "</think>"));
+    assert_equals(variant, std::string("REASONING"), builder.result().reasoning_content);
+    assert_equals(variant, std::string("ok"), builder.consume_rest());
+  }
+  // Test Kimi K2 parsing - reasoning_format none - reasoning content followed by "</think>" and then regular content
+  {
+    common_chat_syntax syntax = {
+        /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+        /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
+        /* .reasoning_in_content = */ false,
+        /* .thinking_forced_open = */ true,
+        /* .parse_tool_calls = */ true,
+    };
+    const std::string variant("kimi_k2_reasoning_format_none");
+    const std::string input = "REASONING</think>ok";
+    auto msg = common_chat_parse(input, false, syntax);
+    assert_equals(variant, std::string("REASONING</think>ok"), msg.content);
+    assert_equals(variant, std::string(""), msg.reasoning_content);
+  }
 }
 
 static void test_regex() {
@@ -404,6 +434,158 @@ static void test_deepseek_v3_1_tool_calls() {
     }
 }
 
+static void test_kimi_k2_tool_calls() {
+    //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
+    // variant: happy path for when it works as the model card says it should
+    const std::string variant("simple");
+    common_chat_syntax syntax = {
+        /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+        /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+        /* .reasoning_in_content = */ false,
+        /* .thinking_forced_open = */ false,
+        /* .parse_tool_calls = */ true,
+    };
+    const std::string input = "<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+    auto msg = common_chat_parse(input, false, syntax);
+    assert_equals<std::size_t>(variant, 1, msg.tool_calls.size());
+    assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name);
+    // JSON arguments are dumped without spaces
+    assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), msg.tool_calls[0].arguments);
+    assert_equals(variant, std::string(""), msg.content);
+    assert_equals(variant, std::string(""), msg.reasoning_content);
+
+    // variant: simple + thinking open
+    {
+        common_chat_syntax syntax = {
+            /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+            /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+            /* .reasoning_in_content = */ false,
+            /* .thinking_forced_open = */ true,
+            /* .parse_tool_calls = */ true,
+        };
+        const std::string variant("simple_thinking");
+        const std::string in = "REASONING</think><|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+        auto m = common_chat_parse(in, false, syntax);
+        assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
+        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
+        assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
+        assert_equals(variant, std::string(""), m.content);
+        assert_equals(variant, std::string("REASONING"), m.reasoning_content);
+    }
+    // variant: simple + multiple tool calls
+    {
+        common_chat_syntax syntax = {
+            /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+            /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+            /* .reasoning_in_content = */ false,
+            /* .thinking_forced_open = */ false,
+            /* .parse_tool_calls = */ true,
+        };
+        const std::string variant("simple_multiple_tool_calls");
+        const std::string in = "CONTENT<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_call_begin|>get_weather<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_calls_section_end|>";
+        auto m = common_chat_parse(in, false, syntax);
+        assert_equals<std::size_t>(variant, 2, m.tool_calls.size());
+        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
+        assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[0].arguments);
+        assert_equals(variant, std::string("get_weather"), m.tool_calls[1].name);
+        assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[1].arguments);
+        assert_equals(variant, std::string("CONTENT"), m.content);
+        assert_equals(variant, std::string(""), m.reasoning_content);
+    }
+
+
+    // variant: thinking forced open + tool call in reasoning content
+    {
+        common_chat_syntax syntax = {
+            /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+            /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+            /* .reasoning_in_content = */ false,
+            /* .thinking_forced_open = */ true,
+            /* .parse_tool_calls = */ true,
+        };
+        const std::string variant("thinking_forced_open_tool_call_in_reasoning");
+        const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time2<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING</think><|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+        auto m = common_chat_parse(in, false, syntax);
+        assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
+        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
+        assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
+        assert_equals(variant, std::string(""), m.content);
+        assert_equals(variant, std::string("REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time2<｜tool▁sep｜>{\"city\": \"Tokyo2\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>REASONING"), m.reasoning_content);
+    }
+
+    // variant: thinking forced open + tool call in reasoning content + no closing think + not partial
+    //          This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
+    //          to make tool calls in reasoning content according to the model card, but it does sometimes, so
+    //          add the reasoning content as regular content and parse the tool calls.
+    {
+        common_chat_syntax syntax = {
+            /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+            /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+            /* .reasoning_in_content = */ false,
+            /* .thinking_forced_open = */ true,
+            /* .parse_tool_calls = */ true,
+        };
+        const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial");
+        const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+        auto m = common_chat_parse(in, false, syntax);
+        assert_equals(variant, std::string("REASONING"), m.content);
+        assert_equals(variant, std::string(""), m.reasoning_content);
+        assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
+        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
+        assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
+    }
+
+    // variant: thinking forced open + tool call in reasoning content + no closing think + partial
+    {
+        common_chat_syntax syntax = {
+            /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+            /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+            /* .reasoning_in_content = */ false,
+            /* .thinking_forced_open = */ true,
+            /* .parse_tool_calls = */ true,
+        };
+        const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial");
+        const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+        auto m = common_chat_parse(in, /* is_partial= */ true, syntax);
+        assert_equals(variant, std::string("REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>"), m.reasoning_content);
+        assert_equals(variant, std::string(""), m.content);
+        assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
+    }
+
+    // variant: thinking not forced open + reasoning + regular content + no tool calls
+    {
+        common_chat_syntax syntax = {
+            /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+            /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+            /* .reasoning_in_content = */ false,
+            /* .thinking_forced_open = */ true,
+            /* .parse_tool_calls = */ true,
+        };
+        const std::string variant("thinking_forced_open_reasoning_regular_content_no_tool_calls");
+        const std::string in = "REASONING</think>CONTENT";
+        auto m = common_chat_parse(in, false, syntax);
+        assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
+        assert_equals(variant, std::string("CONTENT"), m.content);
+        assert_equals(variant, std::string("REASONING"), m.reasoning_content);
+    }
+    // variant: thinking not forced open + missing reasoning + no tool calls
+    {
+        common_chat_syntax syntax = {
+            /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+            /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+            /* .reasoning_in_content = */ false,
+            /* .thinking_forced_open = */ false,
+            /* .parse_tool_calls = */ true,
+        };
+        const std::string variant("thinking_not_forced_open_missing_reasoning_no_tool_calls");
+        const std::string in = "CONTENT";
+        auto m = common_chat_parse(in, false, syntax);
+        assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
+        assert_equals(variant, std::string("CONTENT"), m.content);
+        assert_equals(variant, std::string(""), m.reasoning_content);
+    }
+}
+
 static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) {
   common_chat_msg_parser builder(input, parse_as_partial, {});
   auto js = builder.try_consume_json_with_dumped_args({{"args"}}, {});
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index 4a8ba849b3f8c..6a50102a97103 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -2067,6 +2067,142 @@ static void test_template_output_parsers() {
                     /* .parse_tool_calls = */ true,
                 }));
     }
+    {
+        auto tmpls = read_templates("models/templates/moonshotai-Kimi-K2-Thinking.jinja");
+        std::vector<std::string>   end_tokens{ "<|im_end|>" };
+
+        for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
+            auto params = common_chat_templates_apply(tmpls.get(), inputs);
+            assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, params.format);
+            assert_equals(true, params.thinking_forced_open);
+        }
+
+        test_templates(tmpls.get(), end_tokens, message_assist, tools, "</think>Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+        test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "</think>Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+        assert_msg_equals(
+            simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
+            common_chat_parse(
+                "I'm\nthinking</think>Hello, world!\nWhat's up?",
+                /* is_partial= */ false,
+                {
+                    COMMON_CHAT_FORMAT_KIMI_K2,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+                    /* .reasoning_in_content = */ false,
+                    /* .thinking_forced_open = */ true,
+                }));
+        // variant: thinking forced open, reasoning_format none
+        assert_msg_equals(
+            simple_assist_msg("REASONING</think>ok", ""),
+            common_chat_parse(
+                "REASONING</think>ok",
+                /* is_partial= */ false,
+                {
+                    COMMON_CHAT_FORMAT_KIMI_K2,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
+                    /* .reasoning_in_content = */ false,
+                    /* .thinking_forced_open = */ true,
+                    /* .parse_tool_calls = */ true,
+                }));
+        // variant: happy path for when it works as the model card says it should
+        assert_msg_equals(
+            simple_assist_msg("", "", "get_time", "{\"city\":\"Tokyo\"}"),
+            common_chat_parse(
+                "<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+                /* is_partial= */ false,
+                {
+                    COMMON_CHAT_FORMAT_KIMI_K2,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+                    /* .reasoning_in_content = */ false,
+                    /* .thinking_forced_open = */ false,
+                    /* .parse_tool_calls = */ true,
+                }));
+        // variant: simple + thinking open
+        assert_msg_equals(
+            simple_assist_msg("", "REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
+            common_chat_parse(
+                "REASONING</think><|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+                /* is_partial= */ false,
+                {
+                    COMMON_CHAT_FORMAT_KIMI_K2,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+                    /* .reasoning_in_content = */ false,
+                    /* .thinking_forced_open = */ true,
+                    /* .parse_tool_calls = */ true,
+                }));
+        // variant: simple + multiple tool calls
+        common_chat_msg message_assist_multiple_calls;
+        message_assist_multiple_calls.role = "assistant";
+        message_assist_multiple_calls.content = "CONTENT";
+        message_assist_multiple_calls.tool_calls.push_back({"get_time", "{\"city\":\"Paris\"}", ""});
+        message_assist_multiple_calls.tool_calls.push_back({"get_weather", "{\"city\":\"Paris\"}", ""});
+        assert_msg_equals(
+            message_assist_multiple_calls,
+            common_chat_parse(
+                "CONTENT<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_call_begin|>get_weather<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_calls_section_end|>",
+                /* is_partial= */ false,
+                {
+                    COMMON_CHAT_FORMAT_KIMI_K2,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+                    /* .reasoning_in_content = */ false,
+                    /* .thinking_forced_open = */ false,
+                    /* .parse_tool_calls = */ true,
+                }));
+        // variant: thinking forced open + tool call in reasoning content
+        assert_msg_equals(
+            simple_assist_msg("", "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time2<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
+            common_chat_parse(
+                "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time2<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING</think><|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+                /* is_partial= */ false,
+                {
+                    COMMON_CHAT_FORMAT_KIMI_K2,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+                    /* .reasoning_in_content = */ false,
+                    /* .thinking_forced_open = */ true,
+                    /* .parse_tool_calls = */ true,
+                }));
+        // variant: thinking forced open + tool call in reasoning content + no closing think + not partial
+        //          This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
+        //          to make tool calls in reasoning content according to the model card, but it does sometimes, so
+        //          add the reasoning content as regular content and parse the tool calls.
+        assert_msg_equals(
+            simple_assist_msg("REASONING", "", "get_time", "{\"city\":\"Tokyo\"}"),
+            common_chat_parse(
+                "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+                /* is_partial= */ false,
+                {
+                    COMMON_CHAT_FORMAT_KIMI_K2,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+                    /* .reasoning_in_content = */ false,
+                    /* .thinking_forced_open = */ true,
+                    /* .parse_tool_calls = */ true,
+                }));
+        // variant: thinking forced open + tool call in reasoning content + no closing think + partial
+        assert_msg_equals(
+            simple_assist_msg("", "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>", "", ""),
+            common_chat_parse(
+                "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+                /* is_partial= */ true,
+                {
+                    COMMON_CHAT_FORMAT_KIMI_K2,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+                    /* .reasoning_in_content = */ false,
+                    /* .thinking_forced_open = */ true,
+                    /* .parse_tool_calls = */ true,
+                }));
+        // variant: thinking not forced open + missing reasoning + no tool calls
+        assert_msg_equals(
+            simple_assist_msg("CONTENT", ""),
+            common_chat_parse(
+                "CONTENT",
+                /* is_partial= */ false,
+                {
+                    COMMON_CHAT_FORMAT_KIMI_K2,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+                    /* .reasoning_in_content = */ false,
+                    /* .thinking_forced_open = */ false,
+                    /* .parse_tool_calls = */ true,
+                }));
+    }
     {
         auto tmpls = read_templates("models/templates/Apertus-8B-Instruct.jinja");
         std::vector<std::string> end_tokens{ "<|assistant_end|>" };

From 94d85cc404aadae9fbe7e43959a2858ed809c1c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=99=BE=E5=9C=B0=20=E5=B8=8C=E7=95=99=E8=80=B6?=
 <65301509+KiruyaMomochi@users.noreply.github.com>
Date: Tue, 11 Nov 2025 03:46:26 +0800
Subject: [PATCH 2/5] fix : escape vertical bar in regex

---
 common/chat.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/common/chat.cpp b/common/chat.cpp
index 562c875397412..1ef9b1e920e84 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1773,7 +1773,7 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp
                 // If thinking_forced_open, then we capture the </think> tag in the grammar,
                 // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
                 std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
-                    "(<|tool_calls_section_begin|>)[\\s\\S]*"
+                    "(<\\|tool_calls_section_begin\\|>)[\\s\\S]*"
             });
             data.preserved_tokens = {
                 "<think>",
@@ -1871,11 +1871,11 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
 }
 
 static void common_chat_parse_kimi_k2_content(common_chat_msg_parser & builder) {
-    static const common_regex function_regex("(?:<|tool_call_begin|>)?([^\\n<]+)(?:<|tool_call_argument_begin|>)");
+    static const common_regex function_regex("(?:<\\|tool_call_begin\\|>)?([^\\n<]+)(?:<\\|tool_call_argument_begin\\|>)");
 
-    static const common_regex close_regex("(?:[\\s]*)?<|tool_call_end|>");
-    static const common_regex tool_calls_begin("(?:<|tool_calls_section_begin|>)");
-    static const common_regex tool_calls_end("<|tool_calls_section_end|>");
+    static const common_regex close_regex("(?:[\\s]*)?<\\|tool_call_end\\|>");
+    static const common_regex tool_calls_begin("(?:<\\|tool_calls_section_begin\\|>)");
+    static const common_regex tool_calls_end("<\\|tool_calls_section_end\\|>");
 
     if (!builder.syntax().parse_tool_calls) {
         LOG_DBG("%s: not parse_tool_calls\n", __func__);

From 7c8a6941cc94c97307f24fe002aab88184df285e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=99=BE=E5=9C=B0=20=E5=B8=8C=E7=95=99=E8=80=B6?=
 <65301509+KiruyaMomochi@users.noreply.github.com>
Date: Tue, 11 Nov 2025 21:32:36 +0800
Subject: [PATCH 3/5] fix: function call with id

---
 common/chat.cpp | 37 ++++++++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 5 deletions(-)

diff --git a/common/chat.cpp b/common/chat.cpp
index 1ef9b1e920e84..125a5c471921a 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -699,7 +699,8 @@ static void parse_json_tool_calls(
     const common_regex & close_regex,
     const std::optional<common_regex> & block_close,
     bool allow_raw_python = false,
-    const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name = nullptr) {
+    const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name = nullptr,
+    const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_id = nullptr) {
 
     auto parse_tool_calls = [&]() {
         size_t from = std::string::npos;
@@ -714,12 +715,18 @@ static void parse_json_tool_calls(
 
             if (res) {
                 std::string name;
+                std::string id;
                 if (get_function_name) {
                     name = get_function_name(*res);
                 } else {
                     GGML_ASSERT(res->groups.size() == 2);
                     name = builder.str(res->groups[1]);
                 }
+                if (get_function_id) {
+                    id = get_function_id(*res);
+                } else {
+                    id = "";
+                }
                 first = false;
                 if (name.empty()) {
                     // get_function_name signalled us that we should skip this match and treat it as content.
@@ -731,7 +738,7 @@ static void parse_json_tool_calls(
                 auto maybe_raw_python = name == "python" && allow_raw_python;
                 if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
                     if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
-                        if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) {
+                        if (!builder.add_tool_call(name, id, arguments->value) || arguments->is_partial) {
                             throw common_chat_msg_partial_exception("incomplete tool call");
                         }
                         builder.consume_regex(close_regex);
@@ -740,7 +747,7 @@ static void parse_json_tool_calls(
                 }
                 if (maybe_raw_python) {
                     auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
-                    if (!builder.add_tool_call(name, "", arguments)) {
+                    if (!builder.add_tool_call(name, id, arguments)) {
                         throw common_chat_msg_partial_exception("incomplete tool call");
                     }
                     return;
@@ -1871,7 +1878,8 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
 }
 
 static void common_chat_parse_kimi_k2_content(common_chat_msg_parser & builder) {
-    static const common_regex function_regex("(?:<\\|tool_call_begin\\|>)?([^\\n<]+)(?:<\\|tool_call_argument_begin\\|>)");
+    // https://github.com/MoonshotAI/Kimi-K2/blob/main/docs/tool_call_guidance.md
+    static const common_regex function_regex("(?:<\\|tool_call_begin\\|>)?([\\w\\.]+:\\d+)\\s*(?:<\\|tool_call_argument_begin\\|>)");
 
     static const common_regex close_regex("(?:[\\s]*)?<\\|tool_call_end\\|>");
     static const common_regex tool_calls_begin("(?:<\\|tool_calls_section_begin\\|>)");
@@ -1891,7 +1899,26 @@ static void common_chat_parse_kimi_k2_content(common_chat_msg_parser & builder)
         /* function_regex_start_only= */ std::nullopt,
         function_regex,
         close_regex,
-        tool_calls_end);
+        tool_calls_end,
+        /* allow_raw_python */ false,
+        /* get_function_name= */ [&](const auto & res) -> std::string {
+            auto function_id = builder.str(res.groups[1]);
+
+            auto dot_pos = function_id.find(".");
+            if (dot_pos == std::string::npos) {
+                return "";
+            }
+
+            auto colon_pos = function_id.find(':', dot_pos + 1);
+            if (colon_pos == std::string::npos)
+                return function_id.substr(dot_pos + 1);
+            else
+                return function_id.substr(dot_pos + 1, colon_pos - (dot_pos + 1));
+        },
+        /* get_function_id= */ [&](const auto & res) -> std::string {
+            return builder.str(res.groups[1]);
+        }
+    );
 }
 
 static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {

From 56153aa6484af209903e11d484a722b8831263e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=99=BE=E5=9C=B0=20=E5=B8=8C=E7=95=99=E8=80=B6?=
 <65301509+KiruyaMomochi@users.noreply.github.com>
Date: Wed, 12 Nov 2025 02:00:18 +0800
Subject: [PATCH 4/5] fix: kimi-k2 tool calling grammar

---
 common/chat.cpp | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/common/chat.cpp b/common/chat.cpp
index 125a5c471921a..256d10a68a45b 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1758,14 +1758,16 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp
     if (inputs.tools.is_array() && !inputs.tools.empty()) {
         data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
         data.grammar = build_grammar([&](const common_grammar_builder & builder) {
+            // https://github.com/MoonshotAI/Kimi-K2/blob/main/docs/tool_call_guidance.md
             std::vector<std::string> tool_rules;
             foreach_function(inputs.tools, [&](const json & tool) {
+                const auto number = builder.add_rule("number", "[0-9]+");
                 const auto & function = tool.at("function");
                 std::string name = function.at("name");
                 auto parameters = function.at("parameters");
                 builder.resolve_refs(parameters);
                 tool_rules.push_back(builder.add_rule(name + "-call",
-                    "( \"<|tool_call_begin|>\" )? \"" + name + "<|tool_call_argument_begin|>"
+                    "\"<|tool_call_begin|>functions." + name + ":\" " + number + " \"<|tool_call_argument_begin|>"
                     "\" " + builder.add_schema(name + "-args", parameters) + " "
                     "\"<|tool_call_end|>\""));
             });
@@ -1773,8 +1775,7 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp
                 std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
                 "( \"<|tool_calls_section_begin|>\" ) "
                 "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
-                "\"<|tool_calls_section_end|>\""
-                " space");
+                "\"<|tool_calls_section_end|>\"");
             data.grammar_triggers.push_back({
                 COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
                 // If thinking_forced_open, then we capture the </think> tag in the grammar,
@@ -1878,8 +1879,7 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
 }
 
 static void common_chat_parse_kimi_k2_content(common_chat_msg_parser & builder) {
-    // https://github.com/MoonshotAI/Kimi-K2/blob/main/docs/tool_call_guidance.md
-    static const common_regex function_regex("(?:<\\|tool_call_begin\\|>)?([\\w\\.]+:\\d+)\\s*(?:<\\|tool_call_argument_begin\\|>)");
+    static const common_regex function_regex("(?:<\\|tool_call_begin\\|>)([\\w\\.]+:\\d+)\\s*(?:<\\|tool_call_argument_begin\\|>)");
 
     static const common_regex close_regex("(?:[\\s]*)?<\\|tool_call_end\\|>");
     static const common_regex tool_calls_begin("(?:<\\|tool_calls_section_begin\\|>)");
@@ -1916,7 +1916,13 @@ static void common_chat_parse_kimi_k2_content(common_chat_msg_parser & builder)
                 return function_id.substr(dot_pos + 1, colon_pos - (dot_pos + 1));
         },
         /* get_function_id= */ [&](const auto & res) -> std::string {
-            return builder.str(res.groups[1]);
+            auto function_id = builder.str(res.groups[1]);
+
+            auto dot_pos = function_id.find(".");
+            if (dot_pos == std::string::npos) {
+                return "";
+            }
+            return function_id;
         }
     );
 }

From accad29f1bb7e8036a375130a0d2944d46b554fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=99=BE=E5=9C=B0=20=E5=B8=8C=E7=95=99=E8=80=B6?=
 <65301509+KiruyaMomochi@users.noreply.github.com>
Date: Fri, 14 Nov 2025 04:02:39 +0800
Subject: [PATCH 5/5] fix: kimi-k2 tool calling testing with correct tool
 calling format

---
 tests/test-chat-parser.cpp | 14 +++++++-------
 tests/test-chat.cpp        | 32 ++++++++++++++++----------------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/tests/test-chat-parser.cpp b/tests/test-chat-parser.cpp
index 3963b825b9d08..dd1ecccedb78f 100644
--- a/tests/test-chat-parser.cpp
+++ b/tests/test-chat-parser.cpp
@@ -445,7 +445,7 @@ static void test_kimi_k2_tool_calls() {
         /* .thinking_forced_open = */ false,
         /* .parse_tool_calls = */ true,
     };
-    const std::string input = "<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+    const std::string input = "<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
     auto msg = common_chat_parse(input, false, syntax);
     assert_equals<std::size_t>(variant, 1, msg.tool_calls.size());
     assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name);
@@ -464,7 +464,7 @@ static void test_kimi_k2_tool_calls() {
             /* .parse_tool_calls = */ true,
         };
         const std::string variant("simple_thinking");
-        const std::string in = "REASONING</think><|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+        const std::string in = "REASONING</think><|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
         auto m = common_chat_parse(in, false, syntax);
         assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
         assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
@@ -482,7 +482,7 @@ static void test_kimi_k2_tool_calls() {
             /* .parse_tool_calls = */ true,
         };
         const std::string variant("simple_multiple_tool_calls");
-        const std::string in = "CONTENT<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_call_begin|>get_weather<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_calls_section_end|>";
+        const std::string in = "CONTENT<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_call_begin|>get_weather<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_calls_section_end|>";
         auto m = common_chat_parse(in, false, syntax);
         assert_equals<std::size_t>(variant, 2, m.tool_calls.size());
         assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
@@ -504,7 +504,7 @@ static void test_kimi_k2_tool_calls() {
             /* .parse_tool_calls = */ true,
         };
         const std::string variant("thinking_forced_open_tool_call_in_reasoning");
-        const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time2<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING</think><|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+        const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time2<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING</think><|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
         auto m = common_chat_parse(in, false, syntax);
         assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
         assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
@@ -526,7 +526,7 @@ static void test_kimi_k2_tool_calls() {
             /* .parse_tool_calls = */ true,
         };
         const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial");
-        const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+        const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
         auto m = common_chat_parse(in, false, syntax);
         assert_equals(variant, std::string("REASONING"), m.content);
         assert_equals(variant, std::string(""), m.reasoning_content);
@@ -545,9 +545,9 @@ static void test_kimi_k2_tool_calls() {
             /* .parse_tool_calls = */ true,
         };
         const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial");
-        const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+        const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
         auto m = common_chat_parse(in, /* is_partial= */ true, syntax);
-        assert_equals(variant, std::string("REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>"), m.reasoning_content);
+        assert_equals(variant, std::string("REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>"), m.reasoning_content);
         assert_equals(variant, std::string(""), m.content);
         assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
     }
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index 6a50102a97103..b436113f103c5 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -2074,11 +2074,11 @@ static void test_template_output_parsers() {
         for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
             auto params = common_chat_templates_apply(tmpls.get(), inputs);
             assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, params.format);
-            assert_equals(true, params.thinking_forced_open);
+            assert_equals(false, params.thinking_forced_open);
         }
 
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "</think>Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "</think>Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+        test_templates(tmpls.get(), end_tokens, message_assist, tools, "<think></think>Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+        test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "<think></think>Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
         assert_msg_equals(
             simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
             common_chat_parse(
@@ -2105,9 +2105,9 @@ static void test_template_output_parsers() {
                 }));
         // variant: happy path for when it works as the model card says it should
         assert_msg_equals(
-            simple_assist_msg("", "", "get_time", "{\"city\":\"Tokyo\"}"),
+            simple_assist_msg("", "", "get_time", "{\"city\":\"Tokyo\"}", "functions.get_time:0"),
             common_chat_parse(
-                "<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+                "<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
                 /* is_partial= */ false,
                 {
                     COMMON_CHAT_FORMAT_KIMI_K2,
@@ -2118,9 +2118,9 @@ static void test_template_output_parsers() {
                 }));
         // variant: simple + thinking open
         assert_msg_equals(
-            simple_assist_msg("", "REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
+            simple_assist_msg("", "REASONING", "get_time", "{\"city\":\"Tokyo\"}", "functions.get_time:0"),
             common_chat_parse(
-                "REASONING</think><|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+                "REASONING</think><|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
                 /* is_partial= */ false,
                 {
                     COMMON_CHAT_FORMAT_KIMI_K2,
@@ -2133,12 +2133,12 @@ static void test_template_output_parsers() {
         common_chat_msg message_assist_multiple_calls;
         message_assist_multiple_calls.role = "assistant";
         message_assist_multiple_calls.content = "CONTENT";
-        message_assist_multiple_calls.tool_calls.push_back({"get_time", "{\"city\":\"Paris\"}", ""});
-        message_assist_multiple_calls.tool_calls.push_back({"get_weather", "{\"city\":\"Paris\"}", ""});
+        message_assist_multiple_calls.tool_calls.push_back({"get_time", "{\"city\":\"Paris\"}", "functions.get_time:0"});
+        message_assist_multiple_calls.tool_calls.push_back({"get_weather", "{\"city\":\"Paris\"}", "functions.get_weather:1"});
         assert_msg_equals(
             message_assist_multiple_calls,
             common_chat_parse(
-                "CONTENT<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_call_begin|>get_weather<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_calls_section_end|>",
+                "CONTENT<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_call_begin|>functions.get_weather:1<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_calls_section_end|>",
                 /* is_partial= */ false,
                 {
                     COMMON_CHAT_FORMAT_KIMI_K2,
@@ -2149,9 +2149,9 @@ static void test_template_output_parsers() {
                 }));
         // variant: thinking forced open + tool call in reasoning content
         assert_msg_equals(
-            simple_assist_msg("", "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time2<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
+            simple_assist_msg("", "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time2:0<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING", "get_time", "{\"city\":\"Tokyo\"}", "functions.get_time:1"),
             common_chat_parse(
-                "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time2<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING</think><|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+                "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time2:0<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING</think><|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:1<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
                 /* is_partial= */ false,
                 {
                     COMMON_CHAT_FORMAT_KIMI_K2,
@@ -2165,9 +2165,9 @@ static void test_template_output_parsers() {
         //          to make tool calls in reasoning content according to the model card, but it does sometimes, so
         //          add the reasoning content as regular content and parse the tool calls.
         assert_msg_equals(
-            simple_assist_msg("REASONING", "", "get_time", "{\"city\":\"Tokyo\"}"),
+            simple_assist_msg("REASONING", "", "get_time", "{\"city\":\"Tokyo\"}", "functions.get_time:0"),
             common_chat_parse(
-                "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+                "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
                 /* is_partial= */ false,
                 {
                     COMMON_CHAT_FORMAT_KIMI_K2,
@@ -2178,9 +2178,9 @@ static void test_template_output_parsers() {
                 }));
         // variant: thinking forced open + tool call in reasoning content + no closing think + partial
         assert_msg_equals(
-            simple_assist_msg("", "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>", "", ""),
+            simple_assist_msg("", "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>", "", ""),
             common_chat_parse(
-                "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+                "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
                 /* is_partial= */ true,
                 {
                     COMMON_CHAT_FORMAT_KIMI_K2,