Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2627,6 +2627,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.n_out_freq = value;
}
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
add_opt(common_arg(
{"--output-format"}, "{gguf,dat}",
string_format("output format for imatrix file (default: gguf except when output filename ends with .dat)"),
[](common_params & params, const std::string & value) {
/**/ if (value == "gguf") { params.imat_out_type = COMMON_IMATRIX_FORMAT_GGUF; }
else if (value == "dat") { params.imat_out_type = COMMON_IMATRIX_FORMAT_DAT; }
else { throw std::invalid_argument("invalid output format"); }
}
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
add_opt(common_arg(
{"--save-frequency"}, "N",
string_format("save an imatrix copy every N iterations (default: %d)", params.n_save_freq),
Expand Down
7 changes: 7 additions & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,12 @@ enum common_reasoning_format {
COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas.
};

enum common_imatrix_format_type {
COMMON_IMATRIX_FORMAT_AUTO,
COMMON_IMATRIX_FORMAT_GGUF,
COMMON_IMATRIX_FORMAT_DAT, // legacy
};

struct common_params {
int32_t n_predict = -1; // new tokens to predict
int32_t n_ctx = 4096; // context size
Expand Down Expand Up @@ -431,6 +437,7 @@ struct common_params {
int32_t n_out_freq = 10; // output the imatrix every n_out_freq iterations
int32_t n_save_freq = 0; // save the imatrix every n_save_freq iterations
int32_t i_chunk = 0; // start processing from this chunk
common_imatrix_format_type imat_out_type = COMMON_IMATRIX_FORMAT_AUTO; // format of the output imatrix

bool process_output = false; // collect data for the output tensor
bool compute_ppl = true; // whether to compute perplexity
Expand Down
3 changes: 2 additions & 1 deletion tools/imatrix/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ More information is available in <https://github.com/ggml-org/llama.cpp/pull/486

```
./llama-imatrix \
-m model.gguf -f some-text.txt [-o imatrix.gguf] [--no-ppl] \
-m model.gguf -f some-text.txt [-o imatrix.gguf] [--output-format {gguf,dat}] [--no-ppl] \
[--process-output] [--chunk 123] [--save-frequency 0] [--output-frequency 10] \
[--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] [--parse-special] \
[--show-statistics] [...]
Expand All @@ -20,6 +20,7 @@ The parameters in square brackets are optional and have the following meaning:
* `-lv | --verbosity` specifies the verbosity level. If set to `0`, no output other than the perplexity of the processed chunks will be generated. If set to `1`, each time the results are saved a message is written to `stderr`. If `>=2`, a message is output each time data is collected for any tensor. Default verbosity level is `1`.
* `-o | --output-file` specifies the name of the file where the computed data will be stored. If missing `imatrix.gguf` is used.
* `-ofreq | --output-frequency` specifies how often the so far computed result is saved to disk. Default is 10 (i.e., every 10 chunks)
* `--output-format` specifies the output format of the generated imatrix file. Either "gguf", or "dat" (the legacy format). Defaults to "gguf" unless the output filename ends with `.dat`.
* `--save-frequency` specifies how often to save a copy of the imatrix in a separate file. Default is 0 (i.e., never)
* `--process-output` specifies if data will be collected for the `output.weight` tensor. Typically, it is better not to utilize the importance matrix when quantizing `output.weight`, so this is set to `false` by default.
* `--in-file` one or more existing imatrix files to load and combine. Useful for merging files from multiple runs/datasets.
Expand Down
10 changes: 6 additions & 4 deletions tools/imatrix/imatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
static void print_usage(int, char ** argv) {
LOG("\nexample usage:\n");
LOG("\n %s \\\n"
" -m model.gguf -f some-text.txt [-o imatrix.gguf] [--no-ppl] \\\n"
" -m model.gguf -f some-text.txt [-o imatrix.gguf] [--output-format {gguf,dat}] [--no-ppl] \\\n"
" [--process-output] [--chunk 123] [--save-frequency 0] [--output-frequency 10] \\\n"
" [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] [--parse-special] \\\n"
" [--show-statistics] [...]\n" , argv[0]);
Expand Down Expand Up @@ -492,13 +492,15 @@ void IMatrixCollector::save_imatrix_legacy(int32_t ncall) const {

void IMatrixCollector::save_imatrix(int32_t n_chunk) const {
auto fname = m_params.out_file;
auto imat_type = m_params.imat_out_type;

// TODO: use the new format in more cases
if (!string_ends_with(fname, ".gguf")) {
LOG_WRN("\n%s: saving to legacy imatrix format because output suffix is not .gguf\n", __func__);
if ((imat_type == COMMON_IMATRIX_FORMAT_AUTO && string_ends_with(fname, ".dat")) ||
Copy link
Collaborator Author

@compilade compilade Jul 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be better to instead simply use GGUF regardless of the file name by default.

I don't know why I'm hesitating.

Generating new imatrix.dat has limited uses (however, reading has many uses). The main user who would benefit doesn't really use mainline llama.cpp for this anymore (see ikawrakow/ik_llama.cpp#15 (reply in thread)).

This simplification could also remove the need for the common_imatrix_format_type enum, which could be a bool instead.

EDIT: I've changed this in 1ef3cc1, the format is no longer decided with the output filename.

(imat_type == COMMON_IMATRIX_FORMAT_DAT)) {
LOG_WRN("\n%s: saving to legacy imatrix format\n", __func__);
this->save_imatrix_legacy(n_chunk);
return;
}
// else, default to GGUF imatrix

if (n_chunk > 0) {
fname += ".at_";
Expand Down
Loading