From 0c3c896ea80643608b4e961eec7e1efcd844e109 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=99=BE=E5=9C=B0=20=E5=B8=8C=E7=95=99=E8=80=B6?=
<65301509+KiruyaMomochi@users.noreply.github.com>
Date: Tue, 11 Nov 2025 03:32:00 +0800
Subject: [PATCH 1/5] chat : Kimi-K2-Thinking tool calling support
---
common/chat.cpp | 132 +++++++++++++
common/chat.h | 1 +
.../moonshotai-Kimi-K2-Thinking.jinja | 97 ++++++++++
tests/test-chat-parser.cpp | 182 ++++++++++++++++++
tests/test-chat.cpp | 136 +++++++++++++
5 files changed, 548 insertions(+)
create mode 100644 models/templates/moonshotai-Kimi-K2-Thinking.jinja
diff --git a/common/chat.cpp b/common/chat.cpp
index 938872e82ee1d..562c875397412 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -643,6 +643,7 @@ const char * common_chat_format_name(common_chat_format format) {
case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";
+ case COMMON_CHAT_FORMAT_KIMI_K2: return "Kimi K2";
default:
throw std::runtime_error("Unknown chat format");
}
@@ -1726,6 +1727,68 @@ static common_chat_params common_chat_params_init_deepseek_v3_1(const common_cha
return data;
}
+static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & inputs) {
+ common_chat_params data;
+
+ // Pass thinking context for Kimi K2 template
+ json additional_context = {
+ {"thinking", inputs.enable_thinking},
+ };
+
+ auto prompt = apply(tmpl, inputs,
+ /* messages_override= */ inputs.messages,
+ /* tools_override= */ std::nullopt,
+ additional_context);
+ data.prompt = prompt;
+ data.format = COMMON_CHAT_FORMAT_KIMI_K2;
+ if (string_ends_with(data.prompt, "")) {
+ if (!inputs.enable_thinking) {
+ data.prompt += "";
+ } else {
+ data.thinking_forced_open = true;
+ }
+ }
+ if (inputs.tools.is_array() && !inputs.tools.empty()) {
+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
+ std::vector tool_rules;
+ foreach_function(inputs.tools, [&](const json & tool) {
+ const auto & function = tool.at("function");
+ std::string name = function.at("name");
+ auto parameters = function.at("parameters");
+ builder.resolve_refs(parameters);
+ tool_rules.push_back(builder.add_rule(name + "-call",
+ "( \"<|tool_call_begin|>\" )? \"" + name + "<|tool_call_argument_begin|>"
+ "\" " + builder.add_schema(name + "-args", parameters) + " "
+ "\"<|tool_call_end|>\""));
+ });
+ builder.add_rule("root",
+ std::string(data.thinking_forced_open ? "( \"\" space )? " : "") +
+ "( \"<|tool_calls_section_begin|>\" ) "
+ "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
+ "\"<|tool_calls_section_end|>\""
+ " space");
+ data.grammar_triggers.push_back({
+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
+ // If thinking_forced_open, then we capture the tag in the grammar,
+ // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
+ std::string(data.thinking_forced_open ? "[\\s\\S]*?(\\s*)" : "(?:[\\s\\S]*?\\s*)?") +
+ "(<|tool_calls_section_begin|>)[\\s\\S]*"
+ });
+ data.preserved_tokens = {
+ "",
+ "",
+ "<|tool_calls_section_begin|>",
+ "<|tool_call_begin|>",
+ "<|tool_call_argument_begin|>",
+ "<|tool_call_end|>",
+ "<|tool_calls_section_end|>",
+ };
+ });
+ }
+ return data;
+}
+
static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
builder.try_parse_reasoning("", "");
if (!builder.syntax().parse_tool_calls) {
@@ -1807,6 +1870,66 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
}
}
+static void common_chat_parse_kimi_k2_content(common_chat_msg_parser & builder) {
+ static const common_regex function_regex("(?:<|tool_call_begin|>)?([^\\n<]+)(?:<|tool_call_argument_begin|>)");
+
+ static const common_regex close_regex("(?:[\\s]*)?<|tool_call_end|>");
+ static const common_regex tool_calls_begin("(?:<|tool_calls_section_begin|>)");
+ static const common_regex tool_calls_end("<|tool_calls_section_end|>");
+
+ if (!builder.syntax().parse_tool_calls) {
+ LOG_DBG("%s: not parse_tool_calls\n", __func__);
+ builder.add_content(builder.consume_rest());
+ return;
+ }
+
+ LOG_DBG("%s: parse_tool_calls\n", __func__);
+
+ parse_json_tool_calls(
+ builder,
+ /* block_open= */ tool_calls_begin,
+ /* function_regex_start_only= */ std::nullopt,
+ function_regex,
+ close_regex,
+ tool_calls_end);
+}
+
+static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
+ // DeepSeek V3.1 outputs reasoning content between "" and "" tags, followed by regular content
+ // First try to parse using the standard reasoning parsing method
+ LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
+
+ auto start_pos = builder.pos();
+ auto found_end_think = builder.try_find_literal("");
+ builder.move_to(start_pos);
+
+ if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
+ LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
+ common_chat_parse_kimi_k2_content(builder);
+ } else if (builder.try_parse_reasoning("", "")) {
+ // If reasoning was parsed successfully, the remaining content is regular content
+ LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
+ // <|tool_calls_section_begin|><|tool_call_begin|>function<|tool_call_argument_begin|>NAME\n```json\nJSON\n```<|tool_call_end|><|tool_calls_section_end|>
+ common_chat_parse_kimi_k2_content(builder);
+ } else {
+ if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
+ LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
+ common_chat_parse_kimi_k2_content(builder);
+ return;
+ }
+ // If no reasoning tags found, check if we should treat everything as reasoning
+ if (builder.syntax().thinking_forced_open) {
+ // If thinking is forced open but no tags found, treat everything as reasoning
+ LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
+ builder.add_reasoning_content(builder.consume_rest());
+ } else {
+ LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
+ // <|tool_call_begin|>NAME<|tool_call_argument_begin|>JSON<|tool_call_end|>
+ common_chat_parse_kimi_k2_content(builder);
+ }
+ }
+}
+
static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;
@@ -2912,6 +3035,12 @@ static common_chat_params common_chat_templates_apply_jinja(
return common_chat_params_init_deepseek_v3_1(tmpl, params);
}
+ // Kimi K2: detect based on specific patterns in the template
+ if (src.find("<|tool_calls_section_begin|>") != std::string::npos &&
+ params.json_schema.is_null()) {
+ return common_chat_params_init_kimi_k2(tmpl, params);
+ }
+
// DeepSeek R1: use handler in all cases except json schema (thinking / tools).
if (src.find("<|tool▁calls▁begin|>") != std::string::npos && params.json_schema.is_null()) {
return common_chat_params_init_deepseek_r1(tmpl, params);
@@ -3139,6 +3268,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
common_chat_parse_lfm2(builder);
break;
+ case COMMON_CHAT_FORMAT_KIMI_K2:
+ common_chat_parse_kimi_k2(builder);
+ break;
default:
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
}
diff --git a/common/chat.h b/common/chat.h
index 50efb0d4e516f..74385f2058c1d 100644
--- a/common/chat.h
+++ b/common/chat.h
@@ -117,6 +117,7 @@ enum common_chat_format {
COMMON_CHAT_FORMAT_NEMOTRON_V2,
COMMON_CHAT_FORMAT_APERTUS,
COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS,
+ COMMON_CHAT_FORMAT_KIMI_K2,
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
};
diff --git a/models/templates/moonshotai-Kimi-K2-Thinking.jinja b/models/templates/moonshotai-Kimi-K2-Thinking.jinja
new file mode 100644
index 0000000000000..26f99b7d19f74
--- /dev/null
+++ b/models/templates/moonshotai-Kimi-K2-Thinking.jinja
@@ -0,0 +1,97 @@
+{%- macro render_content(msg) -%}
+ {%- set c = msg.get('content') -%}
+ {%- if c is string -%}
+ {{ c }}
+ {%- elif c is not none -%}
+ {% for content in c -%}
+ {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
+ <|media_start|>image<|media_content|><|media_pad|><|media_end|>
+ {% else -%}
+ {{ content['text'] }}
+ {%- endif -%}
+ {%- endfor -%}
+ {%- endif -%}
+{%- endmacro -%}
+
+{% macro set_roles(message) -%}
+ {%- set role_name = message.get('name') or message['role'] -%}
+ {%- if message['role'] == 'user' -%}
+ <|im_user|>{{role_name}}<|im_middle|>
+ {%- elif message['role'] == 'assistant' -%}
+ <|im_assistant|>{{role_name}}<|im_middle|>
+ {%- else -%}
+ <|im_system|>{{role_name}}<|im_middle|>
+ {%- endif -%}
+{%- endmacro -%}
+
+
+{%- macro render_toolcalls(message) -%}
+ <|tool_calls_section_begin|>
+ {%- for tool_call in message['tool_calls'] -%}
+ {%- set formatted_id = tool_call['id'] -%}
+ <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
+ {%- endfor -%}
+ <|tool_calls_section_end|>
+{%- endmacro -%}
+
+
+{# Find last non-tool-call assisitant message #}
+{%- set ns = namespace(last_non_tool_call_assistant_msg=-1) -%}
+{%- for idx in range(messages|length-1, -1, -1) -%}
+ {%- if messages[idx]['role'] == 'assistant' and not messages[idx].get('tool_calls') -%}
+ {%- set ns.last_non_tool_call_assistant_msg = idx -%}
+ {%- break -%}
+ {%- endif -%}
+{%- endfor -%}
+
+{# split all messages into history & suffix, reasoning_content in suffix should be reserved.#}
+{%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg+1] -%}
+{%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg+1:] -%}
+
+{%- if tools -%}
+ <|im_system|>tool_declare<|im_middle|>{{ tools | tojson() }}<|im_end|>
+{%- endif -%}
+
+{%- if messages|length == 0 or messages[0]['role'] != 'system' -%}
+ <|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|>
+{%- endif -%}
+
+{%- for message in hist_msgs -%}
+ {{set_roles(message)}}
+ {%- if message['role'] == 'assistant' -%}
+ {{render_content(message)}}
+ {%- if message.get('tool_calls') -%}
+ {{render_toolcalls(message)}}
+ {%- endif -%}
+ {%- elif message['role'] == 'tool' -%}
+ {%- set tool_call_id = message.tool_call_id -%}
+ ## Return of {{ tool_call_id }}
+{{render_content(message)}}
+ {%- elif message['content'] is not none -%}
+ {{render_content(message)}}
+ {%- endif -%}
+ <|im_end|>
+{%- endfor -%}
+
+{%- for message in suffix_msgs -%}
+ {{set_roles(message)}}
+ {%- if message['role'] == 'assistant' -%}
+ {%- set rc = message.get('reasoning_content', '') -%}
+ {{rc}}{{render_content(message)}}
+ {%- if message.get('tool_calls') -%}
+ {{render_toolcalls(message)}}
+ {%- endif -%}
+ {%- elif message['role'] == 'tool' -%}
+ {%- set tool_call_id = message.tool_call_id -%}
+ ## Return of {{ tool_call_id }}
+{{render_content(message)}}
+ {%- elif message['content'] is not none -%}
+ {{render_content(message)}}
+ {%- endif -%}
+ <|im_end|>
+{%- endfor -%}
+
+
+{%- if add_generation_prompt -%}
+ <|im_assistant|>assistant<|im_middle|>
+{%- endif -%}
diff --git a/tests/test-chat-parser.cpp b/tests/test-chat-parser.cpp
index 4766518fe6955..3963b825b9d08 100644
--- a/tests/test-chat-parser.cpp
+++ b/tests/test-chat-parser.cpp
@@ -164,6 +164,36 @@ static void test_reasoning() {
assert_equals(variant, std::string("REASONINGok"), msg.content);
assert_equals(variant, std::string(""), msg.reasoning_content);
}
+ // Test Kimi K2 parsing - reasoning content followed by "" and then regular content
+ {
+ common_chat_syntax syntax = {
+ /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+ /* .reasoning_in_content = */ false,
+ /* .thinking_forced_open = */ true,
+ /* .parse_tool_calls = */ true,
+ };
+ const std::string variant("kimi_k2_reasoning_format_deepseek");
+ common_chat_msg_parser builder("REASONINGok", /* is_partial= */ false, syntax);
+ assert_equals(variant, true, builder.try_parse_reasoning("", ""));
+ assert_equals(variant, std::string("REASONING"), builder.result().reasoning_content);
+ assert_equals(variant, std::string("ok"), builder.consume_rest());
+ }
+ // Test Kimi K2 parsing - reasoning_format none - reasoning content followed by "" and then regular content
+ {
+ common_chat_syntax syntax = {
+ /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
+ /* .reasoning_in_content = */ false,
+ /* .thinking_forced_open = */ true,
+ /* .parse_tool_calls = */ true,
+ };
+ const std::string variant("kimi_k2_reasoning_format_none");
+ const std::string input = "REASONINGok";
+ auto msg = common_chat_parse(input, false, syntax);
+ assert_equals(variant, std::string("REASONINGok"), msg.content);
+ assert_equals(variant, std::string(""), msg.reasoning_content);
+ }
}
static void test_regex() {
@@ -404,6 +434,158 @@ static void test_deepseek_v3_1_tool_calls() {
}
}
+static void test_kimi_k2_tool_calls() {
+ //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
+ // variant: happy path for when it works as the model card says it should
+ const std::string variant("simple");
+ common_chat_syntax syntax = {
+ /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+ /* .reasoning_in_content = */ false,
+ /* .thinking_forced_open = */ false,
+ /* .parse_tool_calls = */ true,
+ };
+ const std::string input = "<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+ auto msg = common_chat_parse(input, false, syntax);
+ assert_equals(variant, 1, msg.tool_calls.size());
+ assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name);
+ // JSON arguments are dumped without spaces
+ assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), msg.tool_calls[0].arguments);
+ assert_equals(variant, std::string(""), msg.content);
+ assert_equals(variant, std::string(""), msg.reasoning_content);
+
+ // variant: simple + thinking open
+ {
+ common_chat_syntax syntax = {
+ /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+ /* .reasoning_in_content = */ false,
+ /* .thinking_forced_open = */ true,
+ /* .parse_tool_calls = */ true,
+ };
+ const std::string variant("simple_thinking");
+ const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+ auto m = common_chat_parse(in, false, syntax);
+ assert_equals(variant, 1, m.tool_calls.size());
+ assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
+ assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
+ assert_equals(variant, std::string(""), m.content);
+ assert_equals(variant, std::string("REASONING"), m.reasoning_content);
+ }
+ // variant: simple + multiple tool calls
+ {
+ common_chat_syntax syntax = {
+ /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+ /* .reasoning_in_content = */ false,
+ /* .thinking_forced_open = */ false,
+ /* .parse_tool_calls = */ true,
+ };
+ const std::string variant("simple_multiple_tool_calls");
+ const std::string in = "CONTENT<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_call_begin|>get_weather<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_calls_section_end|>";
+ auto m = common_chat_parse(in, false, syntax);
+ assert_equals(variant, 2, m.tool_calls.size());
+ assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
+ assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[0].arguments);
+ assert_equals(variant, std::string("get_weather"), m.tool_calls[1].name);
+ assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[1].arguments);
+ assert_equals(variant, std::string("CONTENT"), m.content);
+ assert_equals(variant, std::string(""), m.reasoning_content);
+ }
+
+
+ // variant: thinking forced open + tool call in reasoning content
+ {
+ common_chat_syntax syntax = {
+ /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+ /* .reasoning_in_content = */ false,
+ /* .thinking_forced_open = */ true,
+ /* .parse_tool_calls = */ true,
+ };
+ const std::string variant("thinking_forced_open_tool_call_in_reasoning");
+ const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time2<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+ auto m = common_chat_parse(in, false, syntax);
+ assert_equals(variant, 1, m.tool_calls.size());
+ assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
+ assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
+ assert_equals(variant, std::string(""), m.content);
+ assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING"), m.reasoning_content);
+ }
+
+ // variant: thinking forced open + tool call in reasoning content + no closing think + not partial
+ // This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
+ // to make tool calls in reasoning content according to the model card, but it does sometimes, so
+ // add the reasoning content as regular content and parse the tool calls.
+ {
+ common_chat_syntax syntax = {
+ /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+ /* .reasoning_in_content = */ false,
+ /* .thinking_forced_open = */ true,
+ /* .parse_tool_calls = */ true,
+ };
+ const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial");
+ const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+ auto m = common_chat_parse(in, false, syntax);
+ assert_equals(variant, std::string("REASONING"), m.content);
+ assert_equals(variant, std::string(""), m.reasoning_content);
+ assert_equals(variant, 1, m.tool_calls.size());
+ assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
+ assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
+ }
+
+ // variant: thinking forced open + tool call in reasoning content + no closing think + partial
+ {
+ common_chat_syntax syntax = {
+ /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+ /* .reasoning_in_content = */ false,
+ /* .thinking_forced_open = */ true,
+ /* .parse_tool_calls = */ true,
+ };
+ const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial");
+ const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+ auto m = common_chat_parse(in, /* is_partial= */ true, syntax);
+ assert_equals(variant, std::string("REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>"), m.reasoning_content);
+ assert_equals(variant, std::string(""), m.content);
+ assert_equals(variant, 0, m.tool_calls.size());
+ }
+
+ // variant: thinking not forced open + reasoning + regular content + no tool calls
+ {
+ common_chat_syntax syntax = {
+ /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+ /* .reasoning_in_content = */ false,
+ /* .thinking_forced_open = */ true,
+ /* .parse_tool_calls = */ true,
+ };
+ const std::string variant("thinking_forced_open_reasoning_regular_content_no_tool_calls");
+ const std::string in = "REASONINGCONTENT";
+ auto m = common_chat_parse(in, false, syntax);
+ assert_equals(variant, 0, m.tool_calls.size());
+ assert_equals(variant, std::string("CONTENT"), m.content);
+ assert_equals(variant, std::string("REASONING"), m.reasoning_content);
+ }
+ // variant: thinking not forced open + missing reasoning + no tool calls
+ {
+ common_chat_syntax syntax = {
+ /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+ /* .reasoning_in_content = */ false,
+ /* .thinking_forced_open = */ false,
+ /* .parse_tool_calls = */ true,
+ };
+ const std::string variant("thinking_not_forced_open_missing_reasoning_no_tool_calls");
+ const std::string in = "CONTENT";
+ auto m = common_chat_parse(in, false, syntax);
+ assert_equals(variant, 0, m.tool_calls.size());
+ assert_equals(variant, std::string("CONTENT"), m.content);
+ assert_equals(variant, std::string(""), m.reasoning_content);
+ }
+}
+
static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) {
common_chat_msg_parser builder(input, parse_as_partial, {});
auto js = builder.try_consume_json_with_dumped_args({{"args"}}, {});
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index 4a8ba849b3f8c..6a50102a97103 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -2067,6 +2067,142 @@ static void test_template_output_parsers() {
/* .parse_tool_calls = */ true,
}));
}
+ {
+ auto tmpls = read_templates("models/templates/moonshotai-Kimi-K2-Thinking.jinja");
+ std::vector end_tokens{ "<|im_end|>" };
+
+ for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
+ auto params = common_chat_templates_apply(tmpls.get(), inputs);
+ assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, params.format);
+ assert_equals(true, params.thinking_forced_open);
+ }
+
+ test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+ test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+ assert_msg_equals(
+ simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
+ common_chat_parse(
+ "I'm\nthinkingHello, world!\nWhat's up?",
+ /* is_partial= */ false,
+ {
+ COMMON_CHAT_FORMAT_KIMI_K2,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+ /* .reasoning_in_content = */ false,
+ /* .thinking_forced_open = */ true,
+ }));
+ // variant: thinking forced open, reasoning_format none
+ assert_msg_equals(
+ simple_assist_msg("REASONINGok", ""),
+ common_chat_parse(
+ "REASONINGok",
+ /* is_partial= */ false,
+ {
+ COMMON_CHAT_FORMAT_KIMI_K2,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
+ /* .reasoning_in_content = */ false,
+ /* .thinking_forced_open = */ true,
+ /* .parse_tool_calls = */ true,
+ }));
+ // variant: happy path for when it works as the model card says it should
+ assert_msg_equals(
+ simple_assist_msg("", "", "get_time", "{\"city\":\"Tokyo\"}"),
+ common_chat_parse(
+ "<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+ /* is_partial= */ false,
+ {
+ COMMON_CHAT_FORMAT_KIMI_K2,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+ /* .reasoning_in_content = */ false,
+ /* .thinking_forced_open = */ false,
+ /* .parse_tool_calls = */ true,
+ }));
+ // variant: simple + thinking open
+ assert_msg_equals(
+ simple_assist_msg("", "REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
+ common_chat_parse(
+ "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+ /* is_partial= */ false,
+ {
+ COMMON_CHAT_FORMAT_KIMI_K2,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+ /* .reasoning_in_content = */ false,
+ /* .thinking_forced_open = */ true,
+ /* .parse_tool_calls = */ true,
+ }));
+ // variant: simple + multiple tool calls
+ common_chat_msg message_assist_multiple_calls;
+ message_assist_multiple_calls.role = "assistant";
+ message_assist_multiple_calls.content = "CONTENT";
+ message_assist_multiple_calls.tool_calls.push_back({"get_time", "{\"city\":\"Paris\"}", ""});
+ message_assist_multiple_calls.tool_calls.push_back({"get_weather", "{\"city\":\"Paris\"}", ""});
+ assert_msg_equals(
+ message_assist_multiple_calls,
+ common_chat_parse(
+ "CONTENT<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_call_begin|>get_weather<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_calls_section_end|>",
+ /* is_partial= */ false,
+ {
+ COMMON_CHAT_FORMAT_KIMI_K2,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+ /* .reasoning_in_content = */ false,
+ /* .thinking_forced_open = */ false,
+ /* .parse_tool_calls = */ true,
+ }));
+ // variant: thinking forced open + tool call in reasoning content
+ assert_msg_equals(
+ simple_assist_msg("", "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time2<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
+ common_chat_parse(
+ "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time2<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+ /* is_partial= */ false,
+ {
+ COMMON_CHAT_FORMAT_KIMI_K2,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+ /* .reasoning_in_content = */ false,
+ /* .thinking_forced_open = */ true,
+ /* .parse_tool_calls = */ true,
+ }));
+ // variant: thinking forced open + tool call in reasoning content + no closing think + not partial
+ // This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
+ // to make tool calls in reasoning content according to the model card, but it does sometimes, so
+ // add the reasoning content as regular content and parse the tool calls.
+ assert_msg_equals(
+ simple_assist_msg("REASONING", "", "get_time", "{\"city\":\"Tokyo\"}"),
+ common_chat_parse(
+ "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+ /* is_partial= */ false,
+ {
+ COMMON_CHAT_FORMAT_KIMI_K2,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+ /* .reasoning_in_content = */ false,
+ /* .thinking_forced_open = */ true,
+ /* .parse_tool_calls = */ true,
+ }));
+ // variant: thinking forced open + tool call in reasoning content + no closing think + partial
+ assert_msg_equals(
+ simple_assist_msg("", "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>", "", ""),
+ common_chat_parse(
+ "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+ /* is_partial= */ true,
+ {
+ COMMON_CHAT_FORMAT_KIMI_K2,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+ /* .reasoning_in_content = */ false,
+ /* .thinking_forced_open = */ true,
+ /* .parse_tool_calls = */ true,
+ }));
+ // variant: thinking not forced open + missing reasoning + no tool calls
+ assert_msg_equals(
+ simple_assist_msg("CONTENT", ""),
+ common_chat_parse(
+ "CONTENT",
+ /* is_partial= */ false,
+ {
+ COMMON_CHAT_FORMAT_KIMI_K2,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+ /* .reasoning_in_content = */ false,
+ /* .thinking_forced_open = */ false,
+ /* .parse_tool_calls = */ true,
+ }));
+ }
{
auto tmpls = read_templates("models/templates/Apertus-8B-Instruct.jinja");
std::vector end_tokens{ "<|assistant_end|>" };
From 94d85cc404aadae9fbe7e43959a2858ed809c1c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=99=BE=E5=9C=B0=20=E5=B8=8C=E7=95=99=E8=80=B6?=
<65301509+KiruyaMomochi@users.noreply.github.com>
Date: Tue, 11 Nov 2025 03:46:26 +0800
Subject: [PATCH 2/5] fix : escape vertical bar in regex
---
common/chat.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/common/chat.cpp b/common/chat.cpp
index 562c875397412..1ef9b1e920e84 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1773,7 +1773,7 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp
// If thinking_forced_open, then we capture the tag in the grammar,
// (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
std::string(data.thinking_forced_open ? "[\\s\\S]*?(\\s*)" : "(?:[\\s\\S]*?\\s*)?") +
- "(<|tool_calls_section_begin|>)[\\s\\S]*"
+ "(<\\|tool_calls_section_begin\\|>)[\\s\\S]*"
});
data.preserved_tokens = {
"",
@@ -1871,11 +1871,11 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
}
static void common_chat_parse_kimi_k2_content(common_chat_msg_parser & builder) {
- static const common_regex function_regex("(?:<|tool_call_begin|>)?([^\\n<]+)(?:<|tool_call_argument_begin|>)");
+ static const common_regex function_regex("(?:<\\|tool_call_begin\\|>)?([^\\n<]+)(?:<\\|tool_call_argument_begin\\|>)");
- static const common_regex close_regex("(?:[\\s]*)?<|tool_call_end|>");
- static const common_regex tool_calls_begin("(?:<|tool_calls_section_begin|>)");
- static const common_regex tool_calls_end("<|tool_calls_section_end|>");
+ static const common_regex close_regex("(?:[\\s]*)?<\\|tool_call_end\\|>");
+ static const common_regex tool_calls_begin("(?:<\\|tool_calls_section_begin\\|>)");
+ static const common_regex tool_calls_end("<\\|tool_calls_section_end\\|>");
if (!builder.syntax().parse_tool_calls) {
LOG_DBG("%s: not parse_tool_calls\n", __func__);
From 7c8a6941cc94c97307f24fe002aab88184df285e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=99=BE=E5=9C=B0=20=E5=B8=8C=E7=95=99=E8=80=B6?=
<65301509+KiruyaMomochi@users.noreply.github.com>
Date: Tue, 11 Nov 2025 21:32:36 +0800
Subject: [PATCH 3/5] fix: function call with id
---
common/chat.cpp | 37 ++++++++++++++++++++++++++++++++-----
1 file changed, 32 insertions(+), 5 deletions(-)
diff --git a/common/chat.cpp b/common/chat.cpp
index 1ef9b1e920e84..125a5c471921a 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -699,7 +699,8 @@ static void parse_json_tool_calls(
const common_regex & close_regex,
const std::optional & block_close,
bool allow_raw_python = false,
- const std::function & get_function_name = nullptr) {
+ const std::function & get_function_name = nullptr,
+ const std::function & get_function_id = nullptr) {
auto parse_tool_calls = [&]() {
size_t from = std::string::npos;
@@ -714,12 +715,18 @@ static void parse_json_tool_calls(
if (res) {
std::string name;
+ std::string id;
if (get_function_name) {
name = get_function_name(*res);
} else {
GGML_ASSERT(res->groups.size() == 2);
name = builder.str(res->groups[1]);
}
+ if (get_function_id) {
+ id = get_function_id(*res);
+ } else {
+ id = "";
+ }
first = false;
if (name.empty()) {
// get_function_name signalled us that we should skip this match and treat it as content.
@@ -731,7 +738,7 @@ static void parse_json_tool_calls(
auto maybe_raw_python = name == "python" && allow_raw_python;
if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
- if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) {
+ if (!builder.add_tool_call(name, id, arguments->value) || arguments->is_partial) {
throw common_chat_msg_partial_exception("incomplete tool call");
}
builder.consume_regex(close_regex);
@@ -740,7 +747,7 @@ static void parse_json_tool_calls(
}
if (maybe_raw_python) {
auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
- if (!builder.add_tool_call(name, "", arguments)) {
+ if (!builder.add_tool_call(name, id, arguments)) {
throw common_chat_msg_partial_exception("incomplete tool call");
}
return;
@@ -1871,7 +1878,8 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
}
static void common_chat_parse_kimi_k2_content(common_chat_msg_parser & builder) {
- static const common_regex function_regex("(?:<\\|tool_call_begin\\|>)?([^\\n<]+)(?:<\\|tool_call_argument_begin\\|>)");
+ // https://github.com/MoonshotAI/Kimi-K2/blob/main/docs/tool_call_guidance.md
+ static const common_regex function_regex("(?:<\\|tool_call_begin\\|>)?([\\w\\.]+:\\d+)\\s*(?:<\\|tool_call_argument_begin\\|>)");
static const common_regex close_regex("(?:[\\s]*)?<\\|tool_call_end\\|>");
static const common_regex tool_calls_begin("(?:<\\|tool_calls_section_begin\\|>)");
@@ -1891,7 +1899,26 @@ static void common_chat_parse_kimi_k2_content(common_chat_msg_parser & builder)
/* function_regex_start_only= */ std::nullopt,
function_regex,
close_regex,
- tool_calls_end);
+ tool_calls_end,
+ /* allow_raw_python */ false,
+ /* get_function_name= */ [&](const auto & res) -> std::string {
+ auto function_id = builder.str(res.groups[1]);
+
+ auto dot_pos = function_id.find(".");
+ if (dot_pos == std::string::npos) {
+ return "";
+ }
+
+ auto colon_pos = function_id.find(':', dot_pos + 1);
+ if (colon_pos == std::string::npos)
+ return function_id.substr(dot_pos + 1);
+ else
+ return function_id.substr(dot_pos + 1, colon_pos - (dot_pos + 1));
+ },
+ /* get_function_id= */ [&](const auto & res) -> std::string {
+ return builder.str(res.groups[1]);
+ }
+ );
}
static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
From 56153aa6484af209903e11d484a722b8831263e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=99=BE=E5=9C=B0=20=E5=B8=8C=E7=95=99=E8=80=B6?=
<65301509+KiruyaMomochi@users.noreply.github.com>
Date: Wed, 12 Nov 2025 02:00:18 +0800
Subject: [PATCH 4/5] fix: kimi-k2 tool calling grammar
---
common/chat.cpp | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/common/chat.cpp b/common/chat.cpp
index 125a5c471921a..256d10a68a45b 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1758,14 +1758,16 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp
if (inputs.tools.is_array() && !inputs.tools.empty()) {
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
+ // https://github.com/MoonshotAI/Kimi-K2/blob/main/docs/tool_call_guidance.md
std::vector tool_rules;
foreach_function(inputs.tools, [&](const json & tool) {
+ const auto number = builder.add_rule("number", "[0-9]+");
const auto & function = tool.at("function");
std::string name = function.at("name");
auto parameters = function.at("parameters");
builder.resolve_refs(parameters);
tool_rules.push_back(builder.add_rule(name + "-call",
- "( \"<|tool_call_begin|>\" )? \"" + name + "<|tool_call_argument_begin|>"
+ "\"<|tool_call_begin|>functions." + name + ":\" " + number + " \"<|tool_call_argument_begin|>"
"\" " + builder.add_schema(name + "-args", parameters) + " "
"\"<|tool_call_end|>\""));
});
@@ -1773,8 +1775,7 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp
std::string(data.thinking_forced_open ? "( \"\" space )? " : "") +
"( \"<|tool_calls_section_begin|>\" ) "
"(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
- "\"<|tool_calls_section_end|>\""
- " space");
+ "\"<|tool_calls_section_end|>\"");
data.grammar_triggers.push_back({
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
// If thinking_forced_open, then we capture the tag in the grammar,
@@ -1878,8 +1879,7 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
}
static void common_chat_parse_kimi_k2_content(common_chat_msg_parser & builder) {
- // https://github.com/MoonshotAI/Kimi-K2/blob/main/docs/tool_call_guidance.md
- static const common_regex function_regex("(?:<\\|tool_call_begin\\|>)?([\\w\\.]+:\\d+)\\s*(?:<\\|tool_call_argument_begin\\|>)");
+ static const common_regex function_regex("(?:<\\|tool_call_begin\\|>)([\\w\\.]+:\\d+)\\s*(?:<\\|tool_call_argument_begin\\|>)");
static const common_regex close_regex("(?:[\\s]*)?<\\|tool_call_end\\|>");
static const common_regex tool_calls_begin("(?:<\\|tool_calls_section_begin\\|>)");
@@ -1916,7 +1916,13 @@ static void common_chat_parse_kimi_k2_content(common_chat_msg_parser & builder)
return function_id.substr(dot_pos + 1, colon_pos - (dot_pos + 1));
},
/* get_function_id= */ [&](const auto & res) -> std::string {
- return builder.str(res.groups[1]);
+ auto function_id = builder.str(res.groups[1]);
+
+ auto dot_pos = function_id.find(".");
+ if (dot_pos == std::string::npos) {
+ return "";
+ }
+ return function_id;
}
);
}
From accad29f1bb7e8036a375130a0d2944d46b554fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=99=BE=E5=9C=B0=20=E5=B8=8C=E7=95=99=E8=80=B6?=
<65301509+KiruyaMomochi@users.noreply.github.com>
Date: Fri, 14 Nov 2025 04:02:39 +0800
Subject: [PATCH 5/5] fix: kimi-k2 tool calling testing with correct tool
calling format
---
tests/test-chat-parser.cpp | 14 +++++++-------
tests/test-chat.cpp | 32 ++++++++++++++++----------------
2 files changed, 23 insertions(+), 23 deletions(-)
diff --git a/tests/test-chat-parser.cpp b/tests/test-chat-parser.cpp
index 3963b825b9d08..dd1ecccedb78f 100644
--- a/tests/test-chat-parser.cpp
+++ b/tests/test-chat-parser.cpp
@@ -445,7 +445,7 @@ static void test_kimi_k2_tool_calls() {
/* .thinking_forced_open = */ false,
/* .parse_tool_calls = */ true,
};
- const std::string input = "<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+ const std::string input = "<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
auto msg = common_chat_parse(input, false, syntax);
assert_equals(variant, 1, msg.tool_calls.size());
assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name);
@@ -464,7 +464,7 @@ static void test_kimi_k2_tool_calls() {
/* .parse_tool_calls = */ true,
};
const std::string variant("simple_thinking");
- const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+ const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
auto m = common_chat_parse(in, false, syntax);
assert_equals(variant, 1, m.tool_calls.size());
assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
@@ -482,7 +482,7 @@ static void test_kimi_k2_tool_calls() {
/* .parse_tool_calls = */ true,
};
const std::string variant("simple_multiple_tool_calls");
- const std::string in = "CONTENT<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_call_begin|>get_weather<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_calls_section_end|>";
+ const std::string in = "CONTENT<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_call_begin|>get_weather<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_calls_section_end|>";
auto m = common_chat_parse(in, false, syntax);
assert_equals(variant, 2, m.tool_calls.size());
assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
@@ -504,7 +504,7 @@ static void test_kimi_k2_tool_calls() {
/* .parse_tool_calls = */ true,
};
const std::string variant("thinking_forced_open_tool_call_in_reasoning");
- const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time2<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+ const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time2<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
auto m = common_chat_parse(in, false, syntax);
assert_equals(variant, 1, m.tool_calls.size());
assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
@@ -526,7 +526,7 @@ static void test_kimi_k2_tool_calls() {
/* .parse_tool_calls = */ true,
};
const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial");
- const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+ const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
auto m = common_chat_parse(in, false, syntax);
assert_equals(variant, std::string("REASONING"), m.content);
assert_equals(variant, std::string(""), m.reasoning_content);
@@ -545,9 +545,9 @@ static void test_kimi_k2_tool_calls() {
/* .parse_tool_calls = */ true,
};
const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial");
- const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
+ const std::string in = "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>";
auto m = common_chat_parse(in, /* is_partial= */ true, syntax);
- assert_equals(variant, std::string("REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>"), m.reasoning_content);
+ assert_equals(variant, std::string("REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>"), m.reasoning_content);
assert_equals(variant, std::string(""), m.content);
assert_equals(variant, 0, m.tool_calls.size());
}
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index 6a50102a97103..b436113f103c5 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -2074,11 +2074,11 @@ static void test_template_output_parsers() {
for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
auto params = common_chat_templates_apply(tmpls.get(), inputs);
assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, params.format);
- assert_equals(true, params.thinking_forced_open);
+ assert_equals(false, params.thinking_forced_open);
}
- test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
- test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+ test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+ test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
assert_msg_equals(
simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
common_chat_parse(
@@ -2105,9 +2105,9 @@ static void test_template_output_parsers() {
}));
// variant: happy path for when it works as the model card says it should
assert_msg_equals(
- simple_assist_msg("", "", "get_time", "{\"city\":\"Tokyo\"}"),
+ simple_assist_msg("", "", "get_time", "{\"city\":\"Tokyo\"}", "functions.get_time:0"),
common_chat_parse(
- "<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+ "<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
/* is_partial= */ false,
{
COMMON_CHAT_FORMAT_KIMI_K2,
@@ -2118,9 +2118,9 @@ static void test_template_output_parsers() {
}));
// variant: simple + thinking open
assert_msg_equals(
- simple_assist_msg("", "REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
+ simple_assist_msg("", "REASONING", "get_time", "{\"city\":\"Tokyo\"}", "functions.get_time:0"),
common_chat_parse(
- "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+ "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
/* is_partial= */ false,
{
COMMON_CHAT_FORMAT_KIMI_K2,
@@ -2133,12 +2133,12 @@ static void test_template_output_parsers() {
common_chat_msg message_assist_multiple_calls;
message_assist_multiple_calls.role = "assistant";
message_assist_multiple_calls.content = "CONTENT";
- message_assist_multiple_calls.tool_calls.push_back({"get_time", "{\"city\":\"Paris\"}", ""});
- message_assist_multiple_calls.tool_calls.push_back({"get_weather", "{\"city\":\"Paris\"}", ""});
+ message_assist_multiple_calls.tool_calls.push_back({"get_time", "{\"city\":\"Paris\"}", "functions.get_time:0"});
+ message_assist_multiple_calls.tool_calls.push_back({"get_weather", "{\"city\":\"Paris\"}", "functions.get_weather:1"});
assert_msg_equals(
message_assist_multiple_calls,
common_chat_parse(
- "CONTENT<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_call_begin|>get_weather<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_calls_section_end|>",
+ "CONTENT<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_call_begin|>functions.get_weather:1<|tool_call_argument_begin|>{\"city\": \"Paris\"}<|tool_call_end|><|tool_calls_section_end|>",
/* is_partial= */ false,
{
COMMON_CHAT_FORMAT_KIMI_K2,
@@ -2149,9 +2149,9 @@ static void test_template_output_parsers() {
}));
// variant: thinking forced open + tool call in reasoning content
assert_msg_equals(
- simple_assist_msg("", "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time2<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
+ simple_assist_msg("", "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time2:0<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING", "get_time", "{\"city\":\"Tokyo\"}", "functions.get_time:1"),
common_chat_parse(
- "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time2<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+ "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time2:0<|tool_call_argument_begin|>{\"city\": \"Tokyo2\"}<|tool_call_end|><|tool_calls_section_end|>REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:1<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
/* is_partial= */ false,
{
COMMON_CHAT_FORMAT_KIMI_K2,
@@ -2165,9 +2165,9 @@ static void test_template_output_parsers() {
// to make tool calls in reasoning content according to the model card, but it does sometimes, so
// add the reasoning content as regular content and parse the tool calls.
assert_msg_equals(
- simple_assist_msg("REASONING", "", "get_time", "{\"city\":\"Tokyo\"}"),
+ simple_assist_msg("REASONING", "", "get_time", "{\"city\":\"Tokyo\"}", "functions.get_time:0"),
common_chat_parse(
- "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+ "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
/* is_partial= */ false,
{
COMMON_CHAT_FORMAT_KIMI_K2,
@@ -2178,9 +2178,9 @@ static void test_template_output_parsers() {
}));
// variant: thinking forced open + tool call in reasoning content + no closing think + partial
assert_msg_equals(
- simple_assist_msg("", "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>", "", ""),
+ simple_assist_msg("", "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>", "", ""),
common_chat_parse(
- "REASONING<|tool_calls_section_begin|><|tool_call_begin|>get_time<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
+ "REASONING<|tool_calls_section_begin|><|tool_call_begin|>functions.get_time:0<|tool_call_argument_begin|>{\"city\": \"Tokyo\"}<|tool_call_end|><|tool_calls_section_end|>",
/* is_partial= */ true,
{
COMMON_CHAT_FORMAT_KIMI_K2,