Skip to content

imatrix: calculate activation-based statistics for new format (GGUF) imatrices #14891

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 40 commits into
base: master
Choose a base branch
from
Draft
Changes from 1 commit
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
09bc7c2
Use activations to calculate the stats
EAddario Jul 26, 2025
2097f03
Refactor variable names
EAddario Jul 31, 2025
78ddb47
Fix problem up when GGUF does not have in_sum
EAddario Aug 2, 2025
9744a4a
Determine calculation mode
EAddario Aug 2, 2025
cce514a
Compute entropy for activations
EAddario Aug 2, 2025
b7fb362
Compute cosine similarity based on activations
EAddario Aug 2, 2025
9b841eb
Compute l2 norm
EAddario Aug 2, 2025
ee2509f
Adjust threshold
EAddario Aug 2, 2025
fc8f925
Update table display
EAddario Aug 2, 2025
4c01f51
Remove inactive
EAddario Aug 2, 2025
a32a2ec
Reformat report layout
EAddario Aug 2, 2025
4d1325e
Refactor variables
EAddario Aug 3, 2025
5324558
Update table layout
EAddario Aug 3, 2025
fce05aa
Refactor lambda into compute_tensor_averages() function
EAddario Aug 3, 2025
be60469
Refactor function names
EAddario Aug 3, 2025
a6155a8
Add compute_layer_statistics() function
EAddario Aug 3, 2025
2117c4e
Update aggregated statistic report layout
EAddario Aug 3, 2025
90cb1be
Minor cosmetic changes
EAddario Aug 3, 2025
f1c2a4c
Fix printing l2 norm when calc_mode = 1
EAddario Aug 3, 2025
c39c4e2
Refactor variable name
EAddario Aug 4, 2025
adbff66
Merge branch 'master' into imatrix
EAddario Aug 4, 2025
5e40cf4
Do not resize if in_sum is null
EAddario Aug 4, 2025
b373934
Compute aggregated (per layer) l2 norm
EAddario Aug 5, 2025
906548a
Update aggregated sum of squared activations per layer
EAddario Aug 5, 2025
aea9b31
Make ZD Score two-tailed
EAddario Aug 5, 2025
49996a1
Refactor variable names
EAddario Aug 5, 2025
4c3fea8
Update report layout
EAddario Aug 5, 2025
88854c9
Refactor legacy mode
EAddario Aug 5, 2025
030ed3c
Merge branch 'master' into imatrix
EAddario Aug 5, 2025
c7959ed
Merge branch 'master' into imatrix
EAddario Aug 7, 2025
3e9d53c
Refactor variable names
EAddario Aug 7, 2025
e0d6471
Reverse conditional logic to match convention
EAddario Aug 7, 2025
dadd90e
Rename report heading
EAddario Aug 7, 2025
5bb2def
Add --activation-statistics parameter
EAddario Aug 7, 2025
c5ecdaa
Add Euclidean–Cosine Score (ECS)
EAddario Aug 7, 2025
59af503
Update README.md
EAddario Aug 9, 2025
9467963
Merge branch 'master' into imatrix
EAddario Aug 9, 2025
6fe51e1
Fix typo in ECS formula
EAddario Aug 9, 2025
dcac206
Add --activation-statistics logic to avoid doubling the imatrix size …
EAddario Aug 9, 2025
89051cd
Update README.md
EAddario Aug 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 54 additions & 10 deletions tools/imatrix/imatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,12 @@ static const char * const LLM_KV_IMATRIX_CHUNK_COUNT = "imatrix.chunk_count";
static const char * const LLM_KV_IMATRIX_CHUNK_SIZE = "imatrix.chunk_size";

struct Stats {
std::vector<float> activations;
std::vector<float> values;
std::vector<int64_t> counts;
};

//ToDo: rename sqract variables to be more generic like 'values'
struct tensor_statistics {
std::string tensor;
Stats stats;
Expand Down Expand Up @@ -139,14 +141,28 @@ static void compute_statistics(std::vector<tensor_statistics> & tstats, const st
const int row_size = e.values.size() / n_mat;

std::vector<float> activations;
activations.reserve(e.values.size());

for (int i = 0; i < n_mat; ++i) {
for (int j = 0; j < row_size; ++j) {
activations.push_back(e.values[i*row_size + j] / e.counts[i]);
if (e.activations.empty()) {
activations.reserve(e.values.size());

for (int i = 0; i < n_mat; ++i) {
for (int j = 0; j < row_size; ++j) {
activations.push_back(e.values[i*row_size + j] / e.counts[i]);
}
}
} else {
activations.reserve(e.activations.size());

for (int i = 0; i < n_mat; ++i) {
for (int j = 0; j < row_size; ++j) {
activations.push_back(e.activations[i*row_size + j] / e.counts[i]);
}
}
}



//ToDo: rename act_ variables to be more generic like 'values'
const float act_total = std::accumulate(activations.begin(), activations.end(), 0.0f);
const float act_max = *std::max_element(activations.begin(), activations.end());
const float act_min = *std::min_element(activations.begin(), activations.end());
Expand Down Expand Up @@ -282,6 +298,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
e.counts.resize(n_as, e.counts[0]);
}
if (e.values.empty()) {
e.activations.resize(src1->ne[0]*n_as, 0);
e.values.resize(src1->ne[0]*n_as, 0);
e.counts.resize(n_as, 0);
}
Expand Down Expand Up @@ -313,6 +330,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
e.counts[ex]++;

for (int64_t j = 0; j < src1->ne[0]; ++j) {
e.activations[e_start + j] += x[j];
e.values[e_start + j] += x[j] * x[j];
if (!std::isfinite((float)e.values[e_start + j])) {
LOG_ERR("%f detected in %s\n", (float)e.values[e_start + j], wname.c_str());
Expand All @@ -338,6 +356,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
const int64_t n_mat = src1->ne[2] * src1->ne[3];

if (e.values.empty()) {
e.activations.resize(src1->ne[0] * n_mat, 0);
e.values.resize(src1->ne[0] * n_mat, 0);
e.counts.resize(n_mat, 0);
}
Expand All @@ -359,6 +378,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
const float * x = (const float *) (data + row * src1->nb[1] + i2 * src1->nb[2] + i3 * src1->ne[3]);
e.counts[mat_id]++;
for (int64_t j = 0; j < src1->ne[0]; ++j) {
e.activations[mat_start + j] += x[j];
e.values[mat_start + j] += x[j] * x[j];
if (!std::isfinite((float)e.values[j])) {
LOG_ERR("%f detected in %s\n", (float)e.values[j], wname.c_str());
Expand Down Expand Up @@ -532,6 +552,7 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const {
}

to_store.push_back(kv.first);
data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.activations.size(), GGML_MEM_ALIGN);
data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.values.size(), GGML_MEM_ALIGN);
data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.counts.size(), GGML_MEM_ALIGN);
}
Expand Down Expand Up @@ -584,6 +605,16 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const {

gguf_add_tensor(ctx_gguf, in_sum2);
gguf_add_tensor(ctx_gguf, counts);

if (!stat.activations.empty()) {
const int32_t nact = (int32_t) stat.activations.size();
struct ggml_tensor * in_sum = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nact / nmat, nmat);
ggml_format_name(in_sum, "%s.in_sum", name.c_str()); // ToDo: consider a better name. 'in_act' maybe?
for (int32_t j = 0; j < nval; ++j) {
((float *) in_sum->data)[j] = (float) stat.activations[j];
}
gguf_add_tensor(ctx_gguf, in_sum);
}
}
}

Expand Down Expand Up @@ -722,14 +753,15 @@ bool IMatrixCollector::load_imatrix(const char * file_name) {
}
}

const std::string in_sum_suffix{ ".in_sum" };
const std::string in_sum2_suffix{ ".in_sum2" };
const std::string counts_suffix{ ".counts" };

// Could re-use m_stats instead, but this allows
// checking for completeness of *each* loaded imatrix file
// and also makes it easier to re-use a similar implementation in quantize.cpp
// Using an ordered map to get a deterministic iteration order.
std::map<std::string, std::pair<struct ggml_tensor *, struct ggml_tensor *>> sums_counts_for;
std::map<std::string, std::tuple<struct ggml_tensor *, struct ggml_tensor *, struct ggml_tensor *>> sums_counts_for;

for (struct ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
std::string name = cur->name;
Expand All @@ -738,19 +770,24 @@ bool IMatrixCollector::load_imatrix(const char * file_name) {

if (string_remove_suffix(name, in_sum2_suffix)) {
// in_sum2
sums_counts_for[std::move(name)].first = cur;
std::get<0>(sums_counts_for[std::move(name)]) = cur;
} else if (string_remove_suffix(name, counts_suffix)) {
// counts
sums_counts_for[std::move(name)].second = cur;
} else {
std::get<1>(sums_counts_for[std::move(name)]) = cur;
} else if (string_remove_suffix(name, in_sum_suffix)) {
// in_sum
std::get<2>(sums_counts_for[std::move(name)]) = cur;
}
else {
// ignore other tensors
}
}

for (const auto & sc : sums_counts_for) {
const std::string & name = sc.first;
const struct ggml_tensor * in_sum2 = sc.second.first;
const struct ggml_tensor * counts = sc.second.second;
const struct ggml_tensor * in_sum2 = std::get<0>(sc.second);
const struct ggml_tensor * counts = std::get<1>(sc.second);
const struct ggml_tensor * in_sum = std::get<2>(sc.second);

if (!in_sum2 || !counts) {
LOG_ERR("%s: mismatched sums and counts for %s\n", __func__, name.c_str());
Expand All @@ -764,6 +801,7 @@ bool IMatrixCollector::load_imatrix(const char * file_name) {
int64_t nval = ggml_nelements(in_sum2);
if (e.values.empty()) {
e.values.resize(nval, 0.0f);
e.activations.resize(nval, 0.0f);
} else if ((size_t) nval != e.values.size()) {
LOG_ERR("%s: mismatched sums size for %s: %zu != %zu\n", __func__, name.c_str(), (size_t) nval, e.values.size());
gguf_free(ctx_gguf);
Expand Down Expand Up @@ -791,6 +829,12 @@ bool IMatrixCollector::load_imatrix(const char * file_name) {
for (int64_t j = 0; j < ncounts; j++) {
e.counts[j] += std::lround(((const float *) counts->data)[j]);
}
// ToDo: fix blow up when GGUF does not have in_sum
if (in_sum->data != nullptr) {
for (int64_t j = 0; j < nval; j++) {
e.activations[j] += ((const float *) in_sum->data)[j];
}
}
}

// TODO: extract into its own method; this is also used by the legacy format
Expand Down
Loading