From 09bc7c24e7502bf30802e80c71bf96b3c384a20e Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sat, 26 Jul 2025 17:06:41 +0100 Subject: [PATCH 01/36] Use activations to calculate the stats --- tools/imatrix/imatrix.cpp | 64 +++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 10 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index 9aad3711bae54..715a589037dc7 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -38,10 +38,12 @@ static const char * const LLM_KV_IMATRIX_CHUNK_COUNT = "imatrix.chunk_count"; static const char * const LLM_KV_IMATRIX_CHUNK_SIZE = "imatrix.chunk_size"; struct Stats { + std::vector activations; std::vector values; std::vector counts; }; +//ToDo: rename sqract variables to be more generic like 'values' struct tensor_statistics { std::string tensor; Stats stats; @@ -139,14 +141,28 @@ static void compute_statistics(std::vector & tstats, const st const int row_size = e.values.size() / n_mat; std::vector activations; - activations.reserve(e.values.size()); - for (int i = 0; i < n_mat; ++i) { - for (int j = 0; j < row_size; ++j) { - activations.push_back(e.values[i*row_size + j] / e.counts[i]); + if (e.activations.empty()) { + activations.reserve(e.values.size()); + + for (int i = 0; i < n_mat; ++i) { + for (int j = 0; j < row_size; ++j) { + activations.push_back(e.values[i*row_size + j] / e.counts[i]); + } + } + } else { + activations.reserve(e.activations.size()); + + for (int i = 0; i < n_mat; ++i) { + for (int j = 0; j < row_size; ++j) { + activations.push_back(e.activations[i*row_size + j] / e.counts[i]); + } } } + + + //ToDo: rename act_ variables to be more generic like 'values' const float act_total = std::accumulate(activations.begin(), activations.end(), 0.0f); const float act_max = *std::max_element(activations.begin(), activations.end()); const float act_min = *std::min_element(activations.begin(), activations.end()); @@ -282,6 +298,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * e.counts.resize(n_as, e.counts[0]); } if (e.values.empty()) { + e.activations.resize(src1->ne[0]*n_as, 0); e.values.resize(src1->ne[0]*n_as, 0); e.counts.resize(n_as, 0); } @@ -313,6 +330,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * e.counts[ex]++; for (int64_t j = 0; j < src1->ne[0]; ++j) { + e.activations[e_start + j] += x[j]; e.values[e_start + j] += x[j] * x[j]; if (!std::isfinite((float)e.values[e_start + j])) { LOG_ERR("%f detected in %s\n", (float)e.values[e_start + j], wname.c_str()); @@ -338,6 +356,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * const int64_t n_mat = src1->ne[2] * src1->ne[3]; if (e.values.empty()) { + e.activations.resize(src1->ne[0] * n_mat, 0); e.values.resize(src1->ne[0] * n_mat, 0); e.counts.resize(n_mat, 0); } @@ -359,6 +378,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * const float * x = (const float *) (data + row * src1->nb[1] + i2 * src1->nb[2] + i3 * src1->ne[3]); e.counts[mat_id]++; for (int64_t j = 0; j < src1->ne[0]; ++j) { + e.activations[mat_start + j] += x[j]; e.values[mat_start + j] += x[j] * x[j]; if (!std::isfinite((float)e.values[j])) { LOG_ERR("%f detected in %s\n", (float)e.values[j], wname.c_str()); @@ -532,6 +552,7 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const { } to_store.push_back(kv.first); + data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.activations.size(), GGML_MEM_ALIGN); data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.values.size(), GGML_MEM_ALIGN); data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.counts.size(), GGML_MEM_ALIGN); } @@ -584,6 +605,16 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const { gguf_add_tensor(ctx_gguf, in_sum2); gguf_add_tensor(ctx_gguf, counts); + + if (!stat.activations.empty()) { + const int32_t nact = (int32_t) stat.activations.size(); + struct ggml_tensor * in_sum = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nact / nmat, nmat); + ggml_format_name(in_sum, "%s.in_sum", name.c_str()); // ToDo: consider a better name. 'in_act' maybe? + for (int32_t j = 0; j < nval; ++j) { + ((float *) in_sum->data)[j] = (float) stat.activations[j]; + } + gguf_add_tensor(ctx_gguf, in_sum); + } } } @@ -722,6 +753,7 @@ bool IMatrixCollector::load_imatrix(const char * file_name) { } } + const std::string in_sum_suffix{ ".in_sum" }; const std::string in_sum2_suffix{ ".in_sum2" }; const std::string counts_suffix{ ".counts" }; @@ -729,7 +761,7 @@ bool IMatrixCollector::load_imatrix(const char * file_name) { // checking for completeness of *each* loaded imatrix file // and also makes it easier to re-use a similar implementation in quantize.cpp // Using an ordered map to get a deterministic iteration order. - std::map> sums_counts_for; + std::map> sums_counts_for; for (struct ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) { std::string name = cur->name; @@ -738,19 +770,24 @@ bool IMatrixCollector::load_imatrix(const char * file_name) { if (string_remove_suffix(name, in_sum2_suffix)) { // in_sum2 - sums_counts_for[std::move(name)].first = cur; + std::get<0>(sums_counts_for[std::move(name)]) = cur; } else if (string_remove_suffix(name, counts_suffix)) { // counts - sums_counts_for[std::move(name)].second = cur; - } else { + std::get<1>(sums_counts_for[std::move(name)]) = cur; + } else if (string_remove_suffix(name, in_sum_suffix)) { + // in_sum + std::get<2>(sums_counts_for[std::move(name)]) = cur; + } + else { // ignore other tensors } } for (const auto & sc : sums_counts_for) { const std::string & name = sc.first; - const struct ggml_tensor * in_sum2 = sc.second.first; - const struct ggml_tensor * counts = sc.second.second; + const struct ggml_tensor * in_sum2 = std::get<0>(sc.second); + const struct ggml_tensor * counts = std::get<1>(sc.second); + const struct ggml_tensor * in_sum = std::get<2>(sc.second); if (!in_sum2 || !counts) { LOG_ERR("%s: mismatched sums and counts for %s\n", __func__, name.c_str()); @@ -764,6 +801,7 @@ bool IMatrixCollector::load_imatrix(const char * file_name) { int64_t nval = ggml_nelements(in_sum2); if (e.values.empty()) { e.values.resize(nval, 0.0f); + e.activations.resize(nval, 0.0f); } else if ((size_t) nval != e.values.size()) { LOG_ERR("%s: mismatched sums size for %s: %zu != %zu\n", __func__, name.c_str(), (size_t) nval, e.values.size()); gguf_free(ctx_gguf); @@ -791,6 +829,12 @@ bool IMatrixCollector::load_imatrix(const char * file_name) { for (int64_t j = 0; j < ncounts; j++) { e.counts[j] += std::lround(((const float *) counts->data)[j]); } + // ToDo: fix blow up when GGUF does not have in_sum + if (in_sum->data != nullptr) { + for (int64_t j = 0; j < nval; j++) { + e.activations[j] += ((const float *) in_sum->data)[j]; + } + } } // TODO: extract into its own method; this is also used by the legacy format From 2097f038b07f43bdbb40b568f310474467414fac Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Thu, 31 Jul 2025 20:46:40 +0100 Subject: [PATCH 02/36] Refactor variable names --- tools/imatrix/imatrix.cpp | 188 +++++++++++++++++++------------------- 1 file changed, 92 insertions(+), 96 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index 715a589037dc7..b92b1486f7b13 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -38,8 +38,8 @@ static const char * const LLM_KV_IMATRIX_CHUNK_COUNT = "imatrix.chunk_count"; static const char * const LLM_KV_IMATRIX_CHUNK_SIZE = "imatrix.chunk_size"; struct Stats { - std::vector activations; - std::vector values; + std::vector in_sum; + std::vector in_sum2; std::vector counts; }; @@ -47,15 +47,15 @@ struct Stats { struct tensor_statistics { std::string tensor; Stats stats; - float total_sqract = 0.0f; - float mean_sqract = 0.0f; - float max_sqract = 0.0f; - float min_sqract = 0.0f; - int elements = 0; + float sum_values = 0.0f; + float mean_values = 0.0f; + float max_values = 0.0f; + float min_values = 0.0f; + int elements = 0; float stddev = 0.0f; float active = 0.0f; float entropy = 0.0f; - float zd = 0.0f; + float zd_score = 0.0f; float cossim = 0.0f; }; @@ -128,8 +128,8 @@ static void process_tensor_name(const std::string & input, std::string & layer, } static void compute_statistics(std::vector & tstats, const std::string & name, const Stats & e) { - if (e.values.size() % e.counts.size() != 0) { - LOG_ERR("%s: activation size mismatch for tensor %s (%zu vs %zu)\n", __func__, name.c_str(), e.counts.size(), e.values.size()); + if (e.in_sum2.size() % e.counts.size() != 0) { + LOG_ERR("%s: activation size mismatch for tensor %s (%zu vs %zu)\n", __func__, name.c_str(), e.counts.size(), e.in_sum2.size()); return; } if (e.counts.empty()) { @@ -138,73 +138,69 @@ static void compute_statistics(std::vector & tstats, const st } const int n_mat = e.counts.size(); - const int row_size = e.values.size() / n_mat; + const int row_size = e.in_sum2.size() / n_mat; std::vector activations; - if (e.activations.empty()) { - activations.reserve(e.values.size()); + if (e.in_sum.empty()) { + activations.reserve(e.in_sum2.size()); for (int i = 0; i < n_mat; ++i) { for (int j = 0; j < row_size; ++j) { - activations.push_back(e.values[i*row_size + j] / e.counts[i]); + activations.push_back(e.in_sum2[i*row_size + j] / e.counts[i]); } } } else { - activations.reserve(e.activations.size()); + activations.reserve(e.in_sum.size()); for (int i = 0; i < n_mat; ++i) { for (int j = 0; j < row_size; ++j) { - activations.push_back(e.activations[i*row_size + j] / e.counts[i]); + activations.push_back(e.in_sum[i*row_size + j] / e.counts[i]); } } } - - - //ToDo: rename act_ variables to be more generic like 'values' - const float act_total = std::accumulate(activations.begin(), activations.end(), 0.0f); - const float act_max = *std::max_element(activations.begin(), activations.end()); - const float act_min = *std::min_element(activations.begin(), activations.end()); - const float act_mean = act_total / activations.size(); - const float act_sqr_total = std::inner_product(activations.begin(), activations.end(), activations.begin(), 0.0f); - const float act_var = (act_sqr_total / activations.size()) - (act_mean * act_mean); - const float act_dev = std::sqrt(std::max(0.0f, act_var)); - float threshold = 1e-5f; - const int inactive_count = std::count_if(activations.begin(), activations.end(), - [threshold](const float v) { return fabsf(v) <= threshold; }); - const float active_ratio = 1 - static_cast(inactive_count) / activations.size(); + const float sum = std::accumulate(activations.begin(), activations.end(), 0.0f); + const float max = *std::max_element(activations.begin(), activations.end()); + const float min = *std::min_element(activations.begin(), activations.end()); + const float mean = sum / activations.size(); + const float sqr_sum = std::inner_product(activations.begin(), activations.end(), activations.begin(), 0.0f); + const float variance = (sqr_sum / activations.size()) - (mean * mean); + const float std_deviation = std::sqrt(std::max(0.0f, variance)); + const float threshold = 1e-5f; + const int inactive_count = std::count_if(activations.begin(), activations.end(), [threshold](const float v) { return fabsf(v) <= threshold; }); + const float active_ratio = 1 - static_cast(inactive_count) / activations.size(); float entropy = 0; - if (act_total > 0) { + if (sum > 0) { for (const auto act : activations) { - if (const float p = act / act_total; p > 0) { + if (const float p = act / sum; p > 0) { entropy -= p * std::log2(p); } } } int z_score = 0; - if (act_dev > 0.0f) { + if (std_deviation > 0.0f) { for (const auto act : activations) { - if (const float p = (act - act_mean) / act_dev; p > 1) { + if (const float p = (act - mean) / std_deviation; p > 1) { z_score++; } } } auto & ts = tstats.emplace_back(); - ts.tensor = name; - ts.stats = e; - ts.total_sqract = act_total; - ts.mean_sqract = act_mean; - ts.max_sqract = act_max; - ts.min_sqract = act_min; - ts.elements = static_cast(activations.size()); - ts.stddev = act_dev; - ts.active = active_ratio; - ts.entropy = entropy; - ts.zd = static_cast(z_score) / ts.elements; + ts.tensor = name; + ts.stats = e; + ts.sum_values = sum; + ts.mean_values = mean; + ts.max_values = max; + ts.min_values = min; + ts.elements = static_cast(activations.size()); + ts.stddev = std_deviation; + ts.active = active_ratio; + ts.entropy = entropy; + ts.zd_score = static_cast(z_score) / ts.elements; } static void compute_cossim(std::vector & tstats) { @@ -217,14 +213,14 @@ static void compute_cossim(std::vector & tstats) { auto prev = std::find_if(tstats.begin(), tstats.end(), [tname](const tensor_statistics & t) { return t.tensor == tname; }); if (prev != tstats.end()) { - const float dp = std::inner_product(ts.stats.values.begin(), ts.stats.values.end(), - prev->stats.values.begin(), 0.0f); - const float curr_mag = std::sqrt(std::inner_product(ts.stats.values.begin(), ts.stats.values.end(), - ts.stats.values.begin(), 0.0f)); - const float prev_mag = std::sqrt(std::inner_product(prev->stats.values.begin(), prev->stats.values.end(), - prev->stats.values.begin(), 0.0f)); - const float cs = dp / (curr_mag * prev_mag); - ts.cossim = cs; + const float dot_product = std::inner_product(ts.stats.in_sum2.begin(), ts.stats.in_sum2.end(), + prev->stats.in_sum2.begin(), 0.0f); + const float magnitude = std::sqrt(std::inner_product(ts.stats.in_sum2.begin(), ts.stats.in_sum2.end(), + ts.stats.in_sum2.begin(), 0.0f)); + const float prev_magnitude = std::sqrt(std::inner_product(prev->stats.in_sum2.begin(), prev->stats.in_sum2.end(), + prev->stats.in_sum2.begin(), 0.0f)); + const float cos_sim = dot_product / (magnitude * prev_magnitude); + ts.cossim = cos_sim; } } else { ts.cossim = 0; @@ -297,13 +293,13 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * // broadcast, when loading an old imatrix e.counts.resize(n_as, e.counts[0]); } - if (e.values.empty()) { - e.activations.resize(src1->ne[0]*n_as, 0); - e.values.resize(src1->ne[0]*n_as, 0); + if (e.in_sum2.empty()) { + e.in_sum.resize(src1->ne[0]*n_as, 0); + e.in_sum2.resize(src1->ne[0]*n_as, 0); e.counts.resize(n_as, 0); } - else if (e.values.size() != (size_t)src1->ne[0]*n_as) { - LOG_ERR("%s: inconsistent size for %s (%d vs %d)\n", __func__, wname.c_str(), (int)e.values.size(), (int)(src1->ne[0]*n_as)); + else if (e.in_sum2.size() != (size_t)src1->ne[0]*n_as) { + LOG_ERR("%s: inconsistent size for %s (%d vs %d)\n", __func__, wname.c_str(), (int)e.in_sum2.size(), (int)(src1->ne[0]*n_as)); exit(1); //GGML_ABORT("fatal error"); } else if (e.counts.size() != (size_t)n_as) { @@ -330,10 +326,10 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * e.counts[ex]++; for (int64_t j = 0; j < src1->ne[0]; ++j) { - e.activations[e_start + j] += x[j]; - e.values[e_start + j] += x[j] * x[j]; - if (!std::isfinite((float)e.values[e_start + j])) { - LOG_ERR("%f detected in %s\n", (float)e.values[e_start + j], wname.c_str()); + e.in_sum[e_start + j] += x[j]; + e.in_sum2[e_start + j] += x[j] * x[j]; + if (!std::isfinite((float)e.in_sum2[e_start + j])) { + LOG_ERR("%f detected in %s\n", (float)e.in_sum2[e_start + j], wname.c_str()); exit(1); } } @@ -355,13 +351,13 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * auto & e = m_stats[wname]; const int64_t n_mat = src1->ne[2] * src1->ne[3]; - if (e.values.empty()) { - e.activations.resize(src1->ne[0] * n_mat, 0); - e.values.resize(src1->ne[0] * n_mat, 0); + if (e.in_sum2.empty()) { + e.in_sum.resize(src1->ne[0] * n_mat, 0); + e.in_sum2.resize(src1->ne[0] * n_mat, 0); e.counts.resize(n_mat, 0); } - else if (e.values.size() != (size_t)(src1->ne[0] * n_mat)) { - LOG_ERR("%s: inconsistent size for %s (%d vs %d)\n", __func__, wname.c_str(), (int)e.values.size(), (int)(src1->ne[0] * n_mat)); + else if (e.in_sum2.size() != (size_t)(src1->ne[0] * n_mat)) { + LOG_ERR("%s: inconsistent size for %s (%d vs %d)\n", __func__, wname.c_str(), (int)e.in_sum2.size(), (int)(src1->ne[0] * n_mat)); exit(1); //GGML_ABORT("fatal error"); } else if (e.counts.size() != (size_t)n_mat) { @@ -378,10 +374,10 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * const float * x = (const float *) (data + row * src1->nb[1] + i2 * src1->nb[2] + i3 * src1->ne[3]); e.counts[mat_id]++; for (int64_t j = 0; j < src1->ne[0]; ++j) { - e.activations[mat_start + j] += x[j]; - e.values[mat_start + j] += x[j] * x[j]; - if (!std::isfinite((float)e.values[j])) { - LOG_ERR("%f detected in %s\n", (float)e.values[j], wname.c_str()); + e.in_sum[mat_start + j] += x[j]; + e.in_sum2[mat_start + j] += x[j] * x[j]; + if (!std::isfinite((float)e.in_sum2[j])) { + LOG_ERR("%f detected in %s\n", (float)e.in_sum2[j], wname.c_str()); exit(1); } } @@ -470,14 +466,14 @@ void IMatrixCollector::save_imatrix_legacy(int32_t ncall) const { // ceiling division to avoid accidental zeros const int32_t ncall = (*std::max_element(stat.counts.begin(), stat.counts.end()) + (chunk_size - 1)) / chunk_size; out.write((const char *) &ncall, sizeof(ncall)); - const int32_t nval = stat.values.size(); + const int32_t nval = stat.in_sum2.size(); const int32_t nmat = stat.counts.size(); out.write((const char *) &nval, sizeof(nval)); if (nval > 0 && nmat > 0) { std::vector tmp(nval); for (int32_t i = 0; i < nval; i++) { float count = static_cast(stat.counts[i / (nval / nmat)]); - float value = stat.values[i]; + float value = stat.in_sum2[i]; if (count == 0.0f) { // store 1 for partial data value = 1.0f; @@ -552,8 +548,8 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const { } to_store.push_back(kv.first); - data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.activations.size(), GGML_MEM_ALIGN); - data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.values.size(), GGML_MEM_ALIGN); + data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.in_sum.size(), GGML_MEM_ALIGN); + data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.in_sum2.size(), GGML_MEM_ALIGN); data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.counts.size(), GGML_MEM_ALIGN); } @@ -588,7 +584,7 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const { for (const auto & name : to_store) { const auto & stat = m_stats.at(name); - const int32_t nval = (int32_t) stat.values.size(); + const int32_t nval = (int32_t) stat.in_sum2.size(); const int32_t nmat = (int32_t) stat.counts.size(); if (nval > 0 && nmat > 0) { struct ggml_tensor * in_sum2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nval / nmat, nmat); @@ -597,7 +593,7 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const { ggml_format_name(counts, "%s.counts", name.c_str()); for (int32_t j = 0; j < nval; ++j) { - ((float *) in_sum2->data)[j] = (float) stat.values[j]; + ((float *) in_sum2->data)[j] = (float) stat.in_sum2[j]; } for (int32_t j = 0; j < nmat; ++j) { ((float *) counts->data)[j] = (float) stat.counts[j]; @@ -606,12 +602,12 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const { gguf_add_tensor(ctx_gguf, in_sum2); gguf_add_tensor(ctx_gguf, counts); - if (!stat.activations.empty()) { - const int32_t nact = (int32_t) stat.activations.size(); + if (!stat.in_sum.empty()) { + const int32_t nact = (int32_t) stat.in_sum.size(); struct ggml_tensor * in_sum = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nact / nmat, nmat); - ggml_format_name(in_sum, "%s.in_sum", name.c_str()); // ToDo: consider a better name. 'in_act' maybe? + ggml_format_name(in_sum, "%s.in_sum", name.c_str()); for (int32_t j = 0; j < nval; ++j) { - ((float *) in_sum->data)[j] = (float) stat.activations[j]; + ((float *) in_sum->data)[j] = (float) stat.in_sum[j]; } gguf_add_tensor(ctx_gguf, in_sum); } @@ -664,8 +660,8 @@ bool IMatrixCollector::load_imatrix_legacy(const char * fname) { return false; } - if (e.values.empty()) { - e.values.resize(nval, 0.0f); + if (e.in_sum2.empty()) { + e.in_sum2.resize(nval, 0.0f); e.counts.resize(1, 0); } @@ -679,7 +675,7 @@ bool IMatrixCollector::load_imatrix_legacy(const char * fname) { // Recreate the state as expected by save_imatrix(), and correct for weighted sum. for (int i = 0; i < nval; i++) { - e.values[i] += tmp[i] * chunk_size; + e.in_sum2[i] += tmp[i] * chunk_size; } // The legacy format doesn't distinguish the counts for different experts for (size_t j = 0; j < e.counts.size(); ++j) { @@ -799,11 +795,11 @@ bool IMatrixCollector::load_imatrix(const char * file_name) { auto & e = m_stats[name]; int64_t nval = ggml_nelements(in_sum2); - if (e.values.empty()) { - e.values.resize(nval, 0.0f); - e.activations.resize(nval, 0.0f); - } else if ((size_t) nval != e.values.size()) { - LOG_ERR("%s: mismatched sums size for %s: %zu != %zu\n", __func__, name.c_str(), (size_t) nval, e.values.size()); + if (e.in_sum2.empty()) { + e.in_sum2.resize(nval, 0.0f); + e.in_sum.resize(nval, 0.0f); + } else if ((size_t) nval != e.in_sum2.size()) { + LOG_ERR("%s: mismatched sums size for %s: %zu != %zu\n", __func__, name.c_str(), (size_t) nval, e.in_sum2.size()); gguf_free(ctx_gguf); ggml_free(ctx); return false; @@ -824,7 +820,7 @@ bool IMatrixCollector::load_imatrix(const char * file_name) { // Recreate the state as expected by save_imatrix() for (int64_t j = 0; j < nval; j++) { - e.values[j] += ((const float *) in_sum2->data)[j]; + e.in_sum2[j] += ((const float *) in_sum2->data)[j]; } for (int64_t j = 0; j < ncounts; j++) { e.counts[j] += std::lround(((const float *) counts->data)[j]); @@ -832,7 +828,7 @@ bool IMatrixCollector::load_imatrix(const char * file_name) { // ToDo: fix blow up when GGUF does not have in_sum if (in_sum->data != nullptr) { for (int64_t j = 0; j < nval; j++) { - e.activations[j] += ((const float *) in_sum->data)[j]; + e.in_sum[j] += ((const float *) in_sum->data)[j]; } } } @@ -1134,7 +1130,7 @@ static bool show_statistics(const common_params & params) { ; process_tensor_name(a.tensor, layer, name_a); process_tensor_name(b.tensor, layer, name_b); - return name_a < name_b || (name_a == name_b && a.total_sqract > b.total_sqract); + return name_a < name_b || (name_a == name_b && a.sum_values > b.sum_values); } }; std::sort(ts.begin(), ts.end(), tensor_comparer()); @@ -1166,12 +1162,12 @@ static bool show_statistics(const common_params & params) { } LOG_INF("%5s\t%-20s\t%10.2f\t%8.4f\t%11.4f\t%6.2f\t%6.2f\t%8.2f%%\t%6d\t%10.4f\t%6.2f%%\t%10.2f%%\t%8.4f\n", - layer.c_str(), name.c_str(), tstat.total_sqract, tstat.min_sqract, tstat.max_sqract, tstat.mean_sqract, + layer.c_str(), name.c_str(), tstat.sum_values, tstat.min_values, tstat.max_values, tstat.mean_values, tstat.stddev, tstat.active * 100.0f, tstat.elements, tstat.entropy, - 100.0f * (tstat.entropy / std::log2(tstat.elements)), 100.0f * tstat.zd, tstat.cossim); + 100.0f * (tstat.entropy / std::log2(tstat.elements)), 100.0f * tstat.zd_score, tstat.cossim); - const float weighted_bias = tstat.elements * tstat.total_sqract; - const float weighted_zd = tstat.elements * tstat.zd; + const float weighted_bias = tstat.elements * tstat.sum_values; + const float weighted_zd = tstat.elements * tstat.zd_score; const float weighted_cossim = tstat.elements * tstat.cossim; if (ws.find(blk) != ws.end()) { From 78ddb475de96dbb20ba5f5bba597f98a30beb807 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sat, 2 Aug 2025 16:31:21 +0100 Subject: [PATCH 03/36] Fix problem up when GGUF does not have in_sum --- tools/imatrix/imatrix.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index b92b1486f7b13..51d5a602a8ab9 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -825,8 +825,7 @@ bool IMatrixCollector::load_imatrix(const char * file_name) { for (int64_t j = 0; j < ncounts; j++) { e.counts[j] += std::lround(((const float *) counts->data)[j]); } - // ToDo: fix blow up when GGUF does not have in_sum - if (in_sum->data != nullptr) { + if (in_sum != nullptr) { for (int64_t j = 0; j < nval; j++) { e.in_sum[j] += ((const float *) in_sum->data)[j]; } From 9744a4a1c6e953457aa093ad3f07b66e9fdeed1d Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sat, 2 Aug 2025 16:36:12 +0100 Subject: [PATCH 04/36] Determine calculation mode --- tools/imatrix/imatrix.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index 51d5a602a8ab9..398a5e85dbf23 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -127,18 +127,19 @@ static void process_tensor_name(const std::string & input, std::string & layer, } } -static void compute_statistics(std::vector & tstats, const std::string & name, const Stats & e) { +static int compute_tensor_statistics(std::vector & tstats, const std::string & name, const Stats & e) { if (e.in_sum2.size() % e.counts.size() != 0) { LOG_ERR("%s: activation size mismatch for tensor %s (%zu vs %zu)\n", __func__, name.c_str(), e.counts.size(), e.in_sum2.size()); - return; + return -1;; } if (e.counts.empty()) { LOG_ERR("%s: there are no activations for tensor %s. The imatrix may be suboptimal\n", __func__, name.c_str()); - return; + return -1; } const int n_mat = e.counts.size(); const int row_size = e.in_sum2.size() / n_mat; + const int calc_mode = e.in_sum.empty() ? 2 : 1; std::vector activations; @@ -1104,13 +1105,15 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params, c static bool show_statistics(const common_params & params) { std::vector ts; + int tensor_calc_mode = 0; + if (params.in_files.empty() || params.in_files.size() > 1) { LOG_ERR("\nError: a single imatrix file is required to compute tensor statistics\n\n"); return false; } if (g_collector.load_imatrix(params.in_files[0].c_str())) { for (const auto & [name, stats] :g_collector.get_mstats()) { - compute_statistics(ts, name, stats); + tensor_calc_mode =compute_tensor_statistics(ts, name, stats); } } else { LOG_ERR("\nError: %s is not a valid imatrix file\n\n", params.in_files[0].c_str()); From cce514a392023048a67a5e0a1ef0197deb9e652b Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sat, 2 Aug 2025 16:40:40 +0100 Subject: [PATCH 05/36] Compute entropy for activations --- tools/imatrix/imatrix.cpp | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index 398a5e85dbf23..367412b6290f5 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -173,10 +173,28 @@ static int compute_tensor_statistics(std::vector & tstats, co const float active_ratio = 1 - static_cast(inactive_count) / activations.size(); float entropy = 0; - if (sum > 0) { - for (const auto act : activations) { - if (const float p = act / sum; p > 0) { - entropy -= p * std::log2(p); + + if (calc_mode == 1) { + float div = 0.0; + std::vector weights(activations.size()); + for (size_t i = 0; i < activations.size(); ++i) { + const float w = activations[i] * activations[i]; + weights[i] = w; + div += w; + } + + if (div > 0.0) { + for (float w : weights) { + const float p = w / div; + if (p > 0.0) entropy -= p * std::log2(p); + } + } + } else { + if (sum > 0) { + for (const auto act : activations) { + if (const float p = act / sum; p > 0) { + entropy -= p * std::log2(p); + } } } } @@ -202,6 +220,8 @@ static int compute_tensor_statistics(std::vector & tstats, co ts.active = active_ratio; ts.entropy = entropy; ts.zd_score = static_cast(z_score) / ts.elements; + + return calc_mode; } static void compute_cossim(std::vector & tstats) { From b7fb362d8ebaa1c8db684f00c02e90a121f2517b Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sat, 2 Aug 2025 16:43:49 +0100 Subject: [PATCH 06/36] Compute cosine similarity based on activations --- tools/imatrix/imatrix.cpp | 57 ++++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index 367412b6290f5..a86442747e5b7 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -224,27 +224,60 @@ static int compute_tensor_statistics(std::vector & tstats, co return calc_mode; } -static void compute_cossim(std::vector & tstats) { +static void compute_layer_statistics(std::vector & tstats) { static const std::regex pattern(R"(blk\.(\d+)\.)"); + + auto build_avg = [](const Stats & s) -> std::vector { + if (s.counts.empty()) return {}; + const size_t n_mat = s.counts.size(); + const size_t len = !s.in_sum.empty() ? s.in_sum.size() + : s.in_sum2.size(); + if (len == 0 || len % n_mat != 0) return {}; + const size_t row = len / n_mat; + std::vector v; + v.reserve(len); + if (!s.in_sum.empty()) { + for (size_t m = 0; m < n_mat; ++m) { + const float c = (float)s.counts[m]; + if (c <= 0) return {}; + const size_t off = m*row; + for (size_t j = 0; j < row; ++j) v.push_back(s.in_sum[off+j]/c); + } + } else { + for (size_t m = 0; m < n_mat; ++m) { + const float c = (float)s.counts[m]; + if (c <= 0) return {}; + const size_t off = m*row; + for (size_t j = 0; j < row; ++j) v.push_back(s.in_sum2[off+j]/c); + } + } + return v; + }; + // compute the cosine similarity between the same tensors in consecutive layers for (auto & ts : tstats) { + ts.cossim = 0; + if (std::smatch match; std::regex_search(ts.tensor, match, pattern)) { const int blk = std::stoi(match[1]); + if (blk <= 0) continue; std::string tname(ts.tensor); tname.replace(match.position(1), match.length(1), std::to_string(blk-1)); auto prev = std::find_if(tstats.begin(), tstats.end(), [tname](const tensor_statistics & t) { return t.tensor == tname; }); - if (prev != tstats.end()) { - const float dot_product = std::inner_product(ts.stats.in_sum2.begin(), ts.stats.in_sum2.end(), - prev->stats.in_sum2.begin(), 0.0f); - const float magnitude = std::sqrt(std::inner_product(ts.stats.in_sum2.begin(), ts.stats.in_sum2.end(), - ts.stats.in_sum2.begin(), 0.0f)); - const float prev_magnitude = std::sqrt(std::inner_product(prev->stats.in_sum2.begin(), prev->stats.in_sum2.end(), - prev->stats.in_sum2.begin(), 0.0f)); - const float cos_sim = dot_product / (magnitude * prev_magnitude); - ts.cossim = cos_sim; + if (prev == tstats.end()) continue; + const auto curr_avg = build_avg(ts.stats); + const auto prev_avg = build_avg(prev->stats); + if (curr_avg.size() == prev_avg.size() && !curr_avg.empty()) { + float dot_prod = 0.0f, vec1 = 0.0f, vec2 = 0.0f; + for (size_t i = 0; i < curr_avg.size(); ++i) { + dot_prod += curr_avg[i]*prev_avg[i]; + vec1 += curr_avg[i]*curr_avg[i]; + vec2 += prev_avg[i]*prev_avg[i]; + } + if (vec1 > 0 && vec2 > 0) ts.cossim = dot_prod / (std::sqrt(vec1)*std::sqrt(vec2)); } - } else { - ts.cossim = 0; + } + } } } } From 9b841eb696c6ecb00203daa93c2dc3cd831aa9fc Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sat, 2 Aug 2025 16:45:09 +0100 Subject: [PATCH 07/36] Compute l2 norm --- tools/imatrix/imatrix.cpp | 42 +++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index a86442747e5b7..a4633f6e7a62a 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -1,8 +1,9 @@ +#include "../../src/llama-impl.h" #include "arg.h" #include "common.h" -#include "log.h" -#include "llama.h" #include "gguf.h" +#include "llama.h" +#include "log.h" #include #include @@ -10,14 +11,14 @@ #include #include #include -#include -#include -#include #include -#include #include -#include +#include #include +#include +#include +#include +#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data @@ -43,7 +44,6 @@ struct Stats { std::vector counts; }; -//ToDo: rename sqract variables to be more generic like 'values' struct tensor_statistics { std::string tensor; Stats stats; @@ -57,6 +57,7 @@ struct tensor_statistics { float entropy = 0.0f; float zd_score = 0.0f; float cossim = 0.0f; + float l2_norm = 0.0f; }; class IMatrixCollector { @@ -253,6 +254,7 @@ static void compute_layer_statistics(std::vector & tstats) { } return v; }; + // compute the cosine similarity between the same tensors in consecutive layers for (auto & ts : tstats) { ts.cossim = 0; @@ -278,6 +280,30 @@ static void compute_layer_statistics(std::vector & tstats) { } } } + + // compute the L2 norm between the same tensors in consecutive layers + for (auto & ts : tstats) { + ts.l2_norm = 0.0f; + if (ts.stats.in_sum.empty()) continue; + + if (std::smatch match; std::regex_search(ts.tensor, match, pattern)) { + const int blk = std::stoi(match[1]); + if (blk <= 0) continue; + std::string tname(ts.tensor); + tname.replace(match.position(1), match.length(1), std::to_string(blk - 1)); + auto prev = std::find_if(tstats.begin(), tstats.end(), + [tname](const tensor_statistics & t) { return t.tensor == tname; }); + if (prev == tstats.end()) continue; + const auto cur_avg = build_avg(ts.stats); + const auto prev_avg = build_avg(prev->stats); + if (cur_avg.empty() || prev_avg.empty() || cur_avg.size() != prev_avg.size()) continue; + + float dist = 0.0; + for (size_t i = 0; i < cur_avg.size(); ++i) { + const float act = cur_avg[i] - prev_avg[i]; + dist += act * act; + } + ts.l2_norm = std::sqrt(dist); } } } From ee2509f563786a5c3e77f59f199ed87df00a0235 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sat, 2 Aug 2025 16:45:56 +0100 Subject: [PATCH 08/36] Adjust threshold --- tools/imatrix/imatrix.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index a4633f6e7a62a..fd90fe208951b 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -169,7 +169,7 @@ static int compute_tensor_statistics(std::vector & tstats, co const float sqr_sum = std::inner_product(activations.begin(), activations.end(), activations.begin(), 0.0f); const float variance = (sqr_sum / activations.size()) - (mean * mean); const float std_deviation = std::sqrt(std::max(0.0f, variance)); - const float threshold = 1e-5f; + const float threshold = 1e-5f * std_deviation; const int inactive_count = std::count_if(activations.begin(), activations.end(), [threshold](const float v) { return fabsf(v) <= threshold; }); const float active_ratio = 1 - static_cast(inactive_count) / activations.size(); @@ -1199,7 +1199,7 @@ static bool show_statistics(const common_params & params) { return false; } if (!ts.empty()) { - compute_cossim(ts); + compute_layer_statistics(ts); } else { LOG_ERR("Error: cannot compute statistics for %s\n\n", params.in_files[0].c_str()); return false; From fc8f92596fb26dc6c0e7cd3e7c7598d2135ab548 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sat, 2 Aug 2025 16:46:27 +0100 Subject: [PATCH 09/36] Update table display --- tools/imatrix/imatrix.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index fd90fe208951b..ca26195d2c54e 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -1225,9 +1225,22 @@ static bool show_statistics(const common_params & params) { std::map ws; LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast(ts.size())); - LOG_INF("\n%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", " Layer", " Tensor", " Σ(Act²)", - " Min", " Max", " μ", " σ", " % Active", "N", " Entropy", "E (norm)", "ZD", - " CosSim"); + LOG_INF( + "\n%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", + " Layer", + " Tensor", + tensor_calc_mode == 1 ? " L₂ Norm" : " Σ(Act²)", + " Min", + " Max", + " μ", + " σ", + " % Active", + "N", + " Entropy", + "E (norm)", + "ZD", + " CosSim" + ); LOG_INF( "==============================================================================================================" "===========================================================\n"); From 4c01f51ae15de75ab13b288947ffd78727c888bd Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sun, 3 Aug 2025 00:51:12 +0100 Subject: [PATCH 10/36] Remove inactive --- tools/imatrix/imatrix.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index ca26195d2c54e..b9e538e931366 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -53,7 +53,6 @@ struct tensor_statistics { float min_values = 0.0f; int elements = 0; float stddev = 0.0f; - float active = 0.0f; float entropy = 0.0f; float zd_score = 0.0f; float cossim = 0.0f; @@ -169,11 +168,7 @@ static int compute_tensor_statistics(std::vector & tstats, co const float sqr_sum = std::inner_product(activations.begin(), activations.end(), activations.begin(), 0.0f); const float variance = (sqr_sum / activations.size()) - (mean * mean); const float std_deviation = std::sqrt(std::max(0.0f, variance)); - const float threshold = 1e-5f * std_deviation; - const int inactive_count = std::count_if(activations.begin(), activations.end(), [threshold](const float v) { return fabsf(v) <= threshold; }); - const float active_ratio = 1 - static_cast(inactive_count) / activations.size(); - - float entropy = 0; + float entropy = 0; if (calc_mode == 1) { float div = 0.0; @@ -218,7 +213,6 @@ static int compute_tensor_statistics(std::vector & tstats, co ts.min_values = min; ts.elements = static_cast(activations.size()); ts.stddev = std_deviation; - ts.active = active_ratio; ts.entropy = entropy; ts.zd_score = static_cast(z_score) / ts.elements; From a32a2ecbed4b80b3ed1bbb22cbb96fa7bec6e973 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sun, 3 Aug 2025 00:51:33 +0100 Subject: [PATCH 11/36] Reformat report layout --- tools/imatrix/imatrix.cpp | 42 +++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index b9e538e931366..ba13b10f7ec6b 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -1219,25 +1219,23 @@ static bool show_statistics(const common_params & params) { std::map ws; LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast(ts.size())); - LOG_INF( - "\n%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", - " Layer", - " Tensor", - tensor_calc_mode == 1 ? " L₂ Norm" : " Σ(Act²)", - " Min", - " Max", - " μ", - " σ", - " % Active", + LOG_INF("\n%6s\t%18s\t%13s\t%8s\t%8s\t%7s\t%15s\t%13s\t%12s\t%s\t%5s\t%10s\n", + "Layer", + "Tensor", + tensor_calc_mode == 1 ? "L₂ Norm" : "Σ(Act²)", + "Min", + "Max", + "μ", + "σ", "N", - " Entropy", + "Entropy", "E (norm)", "ZD", - " CosSim" + "CosSim" ); LOG_INF( "==============================================================================================================" - "===========================================================\n"); + "=============================================================\n"); for (const auto & tstat : ts) { std::string layer, name; process_tensor_name(tstat.tensor, layer, name); @@ -1249,10 +1247,20 @@ static bool show_statistics(const common_params & params) { blk = -1; // not a block layer } - LOG_INF("%5s\t%-20s\t%10.2f\t%8.4f\t%11.4f\t%6.2f\t%6.2f\t%8.2f%%\t%6d\t%10.4f\t%6.2f%%\t%10.2f%%\t%8.4f\n", - layer.c_str(), name.c_str(), tstat.sum_values, tstat.min_values, tstat.max_values, tstat.mean_values, - tstat.stddev, tstat.active * 100.0f, tstat.elements, tstat.entropy, - 100.0f * (tstat.entropy / std::log2(tstat.elements)), 100.0f * tstat.zd_score, tstat.cossim); + LOG_INF("%5s\t%-20s\t%11.2f\t%10.4f\t%10.4f\t%8.2f\t%8.2f\t%7d\t%12.4f\t%7.2f%%\t%6.2f%%\t%10.4f\n", + layer.c_str(), + name.c_str(), + tstat.sum_values, + tstat.min_values, + tstat.max_values, + tstat.mean_values, + tstat.stddev, + tstat.elements, + tstat.entropy, + 100.0f * (tstat.entropy / std::log2(tstat.elements)), + 100.0f * tstat.zd_score, + tstat.cossim + ); const float weighted_bias = tstat.elements * tstat.sum_values; const float weighted_zd = tstat.elements * tstat.zd_score; From 4d1325e1ebe1eabf8fcf900ffc62b67ee3ba9e02 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sun, 3 Aug 2025 10:28:23 +0100 Subject: [PATCH 12/36] Refactor variables --- tools/imatrix/imatrix.cpp | 40 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index ba13b10f7ec6b..63b232f5cee33 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -1211,10 +1211,10 @@ static bool show_statistics(const common_params & params) { std::sort(ts.begin(), ts.end(), tensor_comparer()); struct weighted_stats { - float weighted_bias = 0.0f; - float weighted_zd = 0.0f; - float weighted_cossim = 0.0f; - int total_elements = 0; + float w_sum = 0.0f; + float w_zd = 0.0f; + float w_cossim = 0.0f; + int n = 0; }; std::map ws; @@ -1262,22 +1262,22 @@ static bool show_statistics(const common_params & params) { tstat.cossim ); - const float weighted_bias = tstat.elements * tstat.sum_values; - const float weighted_zd = tstat.elements * tstat.zd_score; - const float weighted_cossim = tstat.elements * tstat.cossim; + const float w_sum = tstat.elements * tstat.sum_values; + const float w_zd = tstat.elements * tstat.zd_score; + const float w_cossim = tstat.elements * tstat.cossim; if (ws.find(blk) != ws.end()) { - ws[blk].weighted_bias += weighted_bias; - ws[blk].weighted_zd += weighted_zd; - ws[blk].weighted_cossim += weighted_cossim; - ws[blk].total_elements += tstat.elements; + ws[blk].w_sum += w_sum; + ws[blk].w_zd += w_zd; + ws[blk].w_cossim += w_cossim; + ws[blk].n += tstat.elements; } else { weighted_stats temp_ws; - temp_ws.weighted_bias = weighted_bias; - temp_ws.weighted_zd = weighted_zd; - temp_ws.weighted_cossim = weighted_cossim; - temp_ws.total_elements = tstat.elements; - ws[blk] = temp_ws; + temp_ws.w_sum = w_sum; + temp_ws.w_zd = w_zd; + temp_ws.w_cossim = w_cossim; + temp_ws.n = tstat.elements; + ws[blk] = temp_ws; } } @@ -1289,14 +1289,14 @@ static bool show_statistics(const common_params & params) { const auto & layer = first; const auto & stats = second; - if (stats.total_elements == 0) { + if (stats.n == 0) { continue; } if (layer >= 0) { - const float bias = stats.weighted_bias / stats.total_elements; - const float zd = stats.weighted_zd / stats.total_elements; - const float cossim = stats.weighted_cossim / stats.total_elements; + const float w_sum = stats.w_sum / stats.n; + const float w_zd = stats.w_zd / stats.n; + const float w_cossim = stats.w_cossim / stats.n; LOG_INF("%5d\t%14.2f\t%10.4f%%\t%6.4f\n", layer, bias, 100.0f * zd, cossim); } From 5324558132ff076774921d14d8979b2f6a92ebdf Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sun, 3 Aug 2025 10:28:47 +0100 Subject: [PATCH 13/36] Update table layout --- tools/imatrix/imatrix.cpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index 63b232f5cee33..4a6d33837d42b 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -1231,8 +1231,7 @@ static bool show_statistics(const common_params & params) { "Entropy", "E (norm)", "ZD", - "CosSim" - ); + "CosSim"); LOG_INF( "==============================================================================================================" "=============================================================\n"); @@ -1259,8 +1258,7 @@ static bool show_statistics(const common_params & params) { tstat.entropy, 100.0f * (tstat.entropy / std::log2(tstat.elements)), 100.0f * tstat.zd_score, - tstat.cossim - ); + tstat.cossim); const float w_sum = tstat.elements * tstat.sum_values; const float w_zd = tstat.elements * tstat.zd_score; @@ -1283,8 +1281,12 @@ static bool show_statistics(const common_params & params) { const int layers = std::count_if(ws.begin(), ws.end(), [](const auto & kv) { return kv.first >= 0; }); LOG_INF("\nComputing weighted average statistics per layer (%d layers)\n", layers); - LOG_INF("\n%s\t%s\t%s\t%s\n", " Layer", " μΣ(Act²)", " μZD", "μCosSim"); - LOG_INF("================================================\n"); + LOG_INF("\n%6s\t%16s\t%7s\t%11s\n", + "Layer", + tensor_calc_mode == 1 ? "μL₂ Norm" : "μΣ(Act²)", + "μZD", + "μCosSim"); + LOG_INF("============================================\n"); for (const auto & [first, second] : ws) { const auto & layer = first; const auto & stats = second; @@ -1298,7 +1300,11 @@ static bool show_statistics(const common_params & params) { const float w_zd = stats.w_zd / stats.n; const float w_cossim = stats.w_cossim / stats.n; - LOG_INF("%5d\t%14.2f\t%10.4f%%\t%6.4f\n", layer, bias, 100.0f * zd, cossim); + LOG_INF("%5d\t%11.2f\t%6.2f%%\t%10.4f\n", + layer, + w_sum, + 100.0f * w_zd, + w_cossim); } } LOG_INF("\n"); From fce05aac9ea8d31e56de87d4108f94ebe339f5b2 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sun, 3 Aug 2025 13:03:21 +0100 Subject: [PATCH 14/36] Refactor lambda into compute_tensor_averages() function --- tools/imatrix/imatrix.cpp | 68 +++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index 4a6d33837d42b..88807d3721bb1 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -127,6 +127,39 @@ static void process_tensor_name(const std::string & input, std::string & layer, } } +static std::vector compute_tensor_averages(const Stats & tstats) { + if (tstats.counts.empty()) return {}; + const size_t n_mat = tstats.counts.size(); + const size_t len = !tstats.in_sum.empty() ? tstats.in_sum.size() : tstats.in_sum2.size(); + + if (len == 0 || len % n_mat != 0) return {}; + const size_t row = len / n_mat; + std::vector vec; + vec.reserve(len); + + if (!tstats.in_sum.empty()) { + for (size_t m = 0; m < n_mat; ++m) { + const float c = (float)tstats.counts[m]; + if (c <= 0) return {}; + const size_t off = m * row; + for (size_t j = 0; j < row; ++j) { + vec.push_back(tstats.in_sum[off + j] / c); + } + } + } else { + for (size_t m = 0; m < n_mat; ++m) { + const float c = (float)tstats.counts[m]; + if (c <= 0) return {}; + const size_t off = m * row; + for (size_t j = 0; j < row; ++j) { + vec.push_back(tstats.in_sum2[off + j] / c); + } + } + } + + return vec; +} + static int compute_tensor_statistics(std::vector & tstats, const std::string & name, const Stats & e) { if (e.in_sum2.size() % e.counts.size() != 0) { LOG_ERR("%s: activation size mismatch for tensor %s (%zu vs %zu)\n", __func__, name.c_str(), e.counts.size(), e.in_sum2.size()); @@ -222,33 +255,6 @@ static int compute_tensor_statistics(std::vector & tstats, co static void compute_layer_statistics(std::vector & tstats) { static const std::regex pattern(R"(blk\.(\d+)\.)"); - auto build_avg = [](const Stats & s) -> std::vector { - if (s.counts.empty()) return {}; - const size_t n_mat = s.counts.size(); - const size_t len = !s.in_sum.empty() ? s.in_sum.size() - : s.in_sum2.size(); - if (len == 0 || len % n_mat != 0) return {}; - const size_t row = len / n_mat; - std::vector v; - v.reserve(len); - if (!s.in_sum.empty()) { - for (size_t m = 0; m < n_mat; ++m) { - const float c = (float)s.counts[m]; - if (c <= 0) return {}; - const size_t off = m*row; - for (size_t j = 0; j < row; ++j) v.push_back(s.in_sum[off+j]/c); - } - } else { - for (size_t m = 0; m < n_mat; ++m) { - const float c = (float)s.counts[m]; - if (c <= 0) return {}; - const size_t off = m*row; - for (size_t j = 0; j < row; ++j) v.push_back(s.in_sum2[off+j]/c); - } - } - return v; - }; - // compute the cosine similarity between the same tensors in consecutive layers for (auto & ts : tstats) { ts.cossim = 0; @@ -261,8 +267,8 @@ static void compute_layer_statistics(std::vector & tstats) { auto prev = std::find_if(tstats.begin(), tstats.end(), [tname](const tensor_statistics & t) { return t.tensor == tname; }); if (prev == tstats.end()) continue; - const auto curr_avg = build_avg(ts.stats); - const auto prev_avg = build_avg(prev->stats); + const auto curr_avg = compute_tensor_averages(ts.stats); + const auto prev_avg = compute_tensor_averages(prev->stats); if (curr_avg.size() == prev_avg.size() && !curr_avg.empty()) { float dot_prod = 0.0f, vec1 = 0.0f, vec2 = 0.0f; for (size_t i = 0; i < curr_avg.size(); ++i) { @@ -288,8 +294,8 @@ static void compute_layer_statistics(std::vector & tstats) { auto prev = std::find_if(tstats.begin(), tstats.end(), [tname](const tensor_statistics & t) { return t.tensor == tname; }); if (prev == tstats.end()) continue; - const auto cur_avg = build_avg(ts.stats); - const auto prev_avg = build_avg(prev->stats); + const auto cur_avg = compute_tensor_averages(ts.stats); + const auto prev_avg = compute_tensor_averages(prev->stats); if (cur_avg.empty() || prev_avg.empty() || cur_avg.size() != prev_avg.size()) continue; float dist = 0.0; From be60469f25a2268275b514a6ecf3f8b2d2c74aae Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sun, 3 Aug 2025 15:10:17 +0100 Subject: [PATCH 15/36] Refactor function names --- tools/imatrix/imatrix.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index 88807d3721bb1..d5cea686aa682 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -160,7 +160,7 @@ static std::vector compute_tensor_averages(const Stats & tstats) { return vec; } -static int compute_tensor_statistics(std::vector & tstats, const std::string & name, const Stats & e) { +static int compute_vector_statistics(std::vector & tstats, const std::string & name, const Stats & e) { if (e.in_sum2.size() % e.counts.size() != 0) { LOG_ERR("%s: activation size mismatch for tensor %s (%zu vs %zu)\n", __func__, name.c_str(), e.counts.size(), e.in_sum2.size()); return -1;; @@ -252,7 +252,7 @@ static int compute_tensor_statistics(std::vector & tstats, co return calc_mode; } -static void compute_layer_statistics(std::vector & tstats) { +static void compute_tensor_statistics(std::vector & tstats) { static const std::regex pattern(R"(blk\.(\d+)\.)"); // compute the cosine similarity between the same tensors in consecutive layers @@ -1192,14 +1192,14 @@ static bool show_statistics(const common_params & params) { } if (g_collector.load_imatrix(params.in_files[0].c_str())) { for (const auto & [name, stats] :g_collector.get_mstats()) { - tensor_calc_mode =compute_tensor_statistics(ts, name, stats); + tensor_calc_mode =compute_vector_statistics(ts, name, stats); } } else { LOG_ERR("\nError: %s is not a valid imatrix file\n\n", params.in_files[0].c_str()); return false; } if (!ts.empty()) { - compute_layer_statistics(ts); + compute_tensor_statistics(ts); } else { LOG_ERR("Error: cannot compute statistics for %s\n\n", params.in_files[0].c_str()); return false; From a6155a81254506130cd19e88eb8e19c49f2e8f41 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sun, 3 Aug 2025 16:35:03 +0100 Subject: [PATCH 16/36] Add compute_layer_statistics() function --- tools/imatrix/imatrix.cpp | 64 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index d5cea686aa682..d4a6ebddd0efa 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -272,11 +272,11 @@ static void compute_tensor_statistics(std::vector & tstats) { if (curr_avg.size() == prev_avg.size() && !curr_avg.empty()) { float dot_prod = 0.0f, vec1 = 0.0f, vec2 = 0.0f; for (size_t i = 0; i < curr_avg.size(); ++i) { - dot_prod += curr_avg[i]*prev_avg[i]; - vec1 += curr_avg[i]*curr_avg[i]; - vec2 += prev_avg[i]*prev_avg[i]; + dot_prod += curr_avg[i] * prev_avg[i]; + vec1 += curr_avg[i] * curr_avg[i]; + vec2 += prev_avg[i] * prev_avg[i]; } - if (vec1 > 0 && vec2 > 0) ts.cossim = dot_prod / (std::sqrt(vec1)*std::sqrt(vec2)); + if (vec1 > 0 && vec2 > 0) ts.cossim = dot_prod / (std::sqrt(vec1) * std::sqrt(vec2)); } } } @@ -308,6 +308,62 @@ static void compute_tensor_statistics(std::vector & tstats) { } } +static void compute_layer_statistics(const std::vector & tstats, + std::map & layer_cossim, + const std::unordered_map & stats_map) { + struct layer_aggregation { + std::vector curr_avg; + std::vector prev_avg; + }; + + static const std::regex pattern(R"(blk\.(\d+)\.)"); + + // index tensor stats by name for quick lookup + std::unordered_map tidx; + tidx.reserve(tstats.size()); + for (const auto & ts : tstats) tidx[ts.tensor] = &ts; + + // concatenate per-layer + std::map taggr; // ordered by layer + for (const auto & ts : tstats) { + std::smatch match; + if (!std::regex_search(ts.tensor, match, pattern)) continue; + const int blk = std::stoi(match[1]); + if (blk <= 0) continue; + + std::string prev_lyr(ts.tensor); + prev_lyr.replace(match.position(1), match.length(1), std::to_string(blk-1)); + + if (auto it_prev = tidx.find(prev_lyr); it_prev == tidx.end()) continue; + + // use stored Stats to rebuild averages + const auto curr_avg = compute_tensor_averages(stats_map.at(ts.tensor)); + const auto prev_avg = compute_tensor_averages(stats_map.at(prev_lyr)); + if (curr_avg.empty() || prev_avg.empty() || curr_avg.size() != prev_avg.size()) continue; + + auto & [curr, prev] = taggr[blk]; + curr.insert(curr.end(), curr_avg.begin(), curr_avg.end()); + prev.insert(prev.end(), prev_avg.begin(), prev_avg.end()); + } + + // compute cosine per layer + for (auto & kv : taggr) { + const auto & curr = kv.second.curr_avg; + const auto & prev = kv.second.prev_avg; + if (curr.size() != prev.size() || curr.empty()) continue; + float dot_prod = 0.0, lyr1 = 0.0, lyr2 = 0.0; + for (size_t i = 0; i < curr.size(); ++i) { + const double a = curr[i], b = prev[i]; + dot_prod += a*b; + lyr1 += a*a; + lyr2 += b*b; + } + float cossim = 0.0f; + if (lyr1 > 0.0 && lyr2 > 0.0) cossim = dot_prod / (std::sqrt(lyr1) * std::sqrt(lyr2)); + layer_cossim[kv.first] = cossim; + } +} + bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) { GGML_UNUSED(user_data); From 2117c4e54bb28b0774287b8b52189b29a2387249 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sun, 3 Aug 2025 16:38:02 +0100 Subject: [PATCH 17/36] Update aggregated statistic report layout --- tools/imatrix/imatrix.cpp | 46 ++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index d4a6ebddd0efa..9a20758340c9e 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -1324,50 +1324,42 @@ static bool show_statistics(const common_params & params) { const float w_sum = tstat.elements * tstat.sum_values; const float w_zd = tstat.elements * tstat.zd_score; - const float w_cossim = tstat.elements * tstat.cossim; if (ws.find(blk) != ws.end()) { ws[blk].w_sum += w_sum; ws[blk].w_zd += w_zd; - ws[blk].w_cossim += w_cossim; ws[blk].n += tstat.elements; } else { weighted_stats temp_ws; temp_ws.w_sum = w_sum; temp_ws.w_zd = w_zd; - temp_ws.w_cossim = w_cossim; temp_ws.n = tstat.elements; ws[blk] = temp_ws; } } - const int layers = std::count_if(ws.begin(), ws.end(), [](const auto & kv) { return kv.first >= 0; }); - LOG_INF("\nComputing weighted average statistics per layer (%d layers)\n", layers); + std::map layer_cossim; + compute_layer_statistics(ts, layer_cossim, g_collector.get_mstats()); + + const auto layers = std::count_if(ws.begin(), ws.end(), [](const auto & kv) { return kv.first >= 0; }); + LOG_INF("\nComputing aggregated statistics per layer (%ld layers)\n", layers); LOG_INF("\n%6s\t%16s\t%7s\t%11s\n", "Layer", - tensor_calc_mode == 1 ? "μL₂ Norm" : "μΣ(Act²)", - "μZD", - "μCosSim"); + tensor_calc_mode == 1 ? "L₂ Norm" : "Σ(Act²)", + "ZD", + "CosSim"); LOG_INF("============================================\n"); - for (const auto & [first, second] : ws) { - const auto & layer = first; - const auto & stats = second; - - if (stats.n == 0) { - continue; - } - - if (layer >= 0) { - const float w_sum = stats.w_sum / stats.n; - const float w_zd = stats.w_zd / stats.n; - const float w_cossim = stats.w_cossim / stats.n; - - LOG_INF("%5d\t%11.2f\t%6.2f%%\t%10.4f\n", - layer, - w_sum, - 100.0f * w_zd, - w_cossim); - } + for (const auto & [layer, stats] : ws) { + if (layer < 0 || stats.n == 0) continue; + const float w_sum = stats.w_sum / stats.n; + const float w_zd = stats.w_zd / stats.n; + const auto lcs = layer_cossim.find(layer); + const float cossim = (lcs != layer_cossim.end()) ? lcs->second : 0.0f; + LOG_INF("%5d\t%11.2f\t%6.2f%%\t%10.4f\n", + layer, + w_sum, + 100.0f * w_zd, + cossim); } LOG_INF("\n"); From 90cb1be99d0aaa1fb37f94e0e57dd1b7c642306c Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sun, 3 Aug 2025 16:57:27 +0100 Subject: [PATCH 18/36] Minor cosmetic changes --- tools/imatrix/imatrix.cpp | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index 9a20758340c9e..ff189e0379798 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -311,52 +311,43 @@ static void compute_tensor_statistics(std::vector & tstats) { static void compute_layer_statistics(const std::vector & tstats, std::map & layer_cossim, const std::unordered_map & stats_map) { - struct layer_aggregation { + struct layer_aggregation { std::vector curr_avg; std::vector prev_avg; }; - static const std::regex pattern(R"(blk\.(\d+)\.)"); - - // index tensor stats by name for quick lookup std::unordered_map tidx; tidx.reserve(tstats.size()); for (const auto & ts : tstats) tidx[ts.tensor] = &ts; + std::map taggr; - // concatenate per-layer - std::map taggr; // ordered by layer for (const auto & ts : tstats) { std::smatch match; if (!std::regex_search(ts.tensor, match, pattern)) continue; const int blk = std::stoi(match[1]); if (blk <= 0) continue; - std::string prev_lyr(ts.tensor); prev_lyr.replace(match.position(1), match.length(1), std::to_string(blk-1)); - if (auto it_prev = tidx.find(prev_lyr); it_prev == tidx.end()) continue; - - // use stored Stats to rebuild averages const auto curr_avg = compute_tensor_averages(stats_map.at(ts.tensor)); const auto prev_avg = compute_tensor_averages(stats_map.at(prev_lyr)); if (curr_avg.empty() || prev_avg.empty() || curr_avg.size() != prev_avg.size()) continue; - auto & [curr, prev] = taggr[blk]; curr.insert(curr.end(), curr_avg.begin(), curr_avg.end()); prev.insert(prev.end(), prev_avg.begin(), prev_avg.end()); } - // compute cosine per layer + // compute the cosine similarity between consecutive layers for (auto & kv : taggr) { const auto & curr = kv.second.curr_avg; const auto & prev = kv.second.prev_avg; if (curr.size() != prev.size() || curr.empty()) continue; float dot_prod = 0.0, lyr1 = 0.0, lyr2 = 0.0; for (size_t i = 0; i < curr.size(); ++i) { - const double a = curr[i], b = prev[i]; - dot_prod += a*b; - lyr1 += a*a; - lyr2 += b*b; + float crr = curr[i], prv = prev[i]; + dot_prod += crr * prv; + lyr1 += crr * crr; + lyr2 += prv * prv; } float cossim = 0.0f; if (lyr1 > 0.0 && lyr2 > 0.0) cossim = dot_prod / (std::sqrt(lyr1) * std::sqrt(lyr2)); From f1c2a4ca3f9f23386892adc78f8c7a296c3e92ee Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sun, 3 Aug 2025 17:14:46 +0100 Subject: [PATCH 19/36] Fix printing l2 norm when calc_mode = 1 --- tools/imatrix/imatrix.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index ff189e0379798..6a07fdd354514 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -1302,7 +1302,7 @@ static bool show_statistics(const common_params & params) { LOG_INF("%5s\t%-20s\t%11.2f\t%10.4f\t%10.4f\t%8.2f\t%8.2f\t%7d\t%12.4f\t%7.2f%%\t%6.2f%%\t%10.4f\n", layer.c_str(), name.c_str(), - tstat.sum_values, + tensor_calc_mode == 1 ? tstat.l2_norm : tstat.sum_values, tstat.min_values, tstat.max_values, tstat.mean_values, From c39c4e2a331c3386f019885865698b826dcbce00 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Mon, 4 Aug 2025 22:15:50 +0100 Subject: [PATCH 20/36] Refactor variable name --- tools/imatrix/imatrix.cpp | 70 +++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index 6a07fdd354514..a28701944d015 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -40,7 +40,7 @@ static const char * const LLM_KV_IMATRIX_CHUNK_SIZE = "imatrix.chunk_size"; struct Stats { std::vector in_sum; - std::vector in_sum2; + std::vector values; std::vector counts; }; @@ -130,7 +130,7 @@ static void process_tensor_name(const std::string & input, std::string & layer, static std::vector compute_tensor_averages(const Stats & tstats) { if (tstats.counts.empty()) return {}; const size_t n_mat = tstats.counts.size(); - const size_t len = !tstats.in_sum.empty() ? tstats.in_sum.size() : tstats.in_sum2.size(); + const size_t len = !tstats.in_sum.empty() ? tstats.in_sum.size() : tstats.values.size(); if (len == 0 || len % n_mat != 0) return {}; const size_t row = len / n_mat; @@ -152,7 +152,7 @@ static std::vector compute_tensor_averages(const Stats & tstats) { if (c <= 0) return {}; const size_t off = m * row; for (size_t j = 0; j < row; ++j) { - vec.push_back(tstats.in_sum2[off + j] / c); + vec.push_back(tstats.values[off + j] / c); } } } @@ -161,8 +161,8 @@ static std::vector compute_tensor_averages(const Stats & tstats) { } static int compute_vector_statistics(std::vector & tstats, const std::string & name, const Stats & e) { - if (e.in_sum2.size() % e.counts.size() != 0) { - LOG_ERR("%s: activation size mismatch for tensor %s (%zu vs %zu)\n", __func__, name.c_str(), e.counts.size(), e.in_sum2.size()); + if (e.values.size() % e.counts.size() != 0) { + LOG_ERR("%s: activation size mismatch for tensor %s (%zu vs %zu)\n", __func__, name.c_str(), e.counts.size(), e.values.size()); return -1;; } if (e.counts.empty()) { @@ -171,17 +171,17 @@ static int compute_vector_statistics(std::vector & tstats, co } const int n_mat = e.counts.size(); - const int row_size = e.in_sum2.size() / n_mat; + const int row_size = e.values.size() / n_mat; const int calc_mode = e.in_sum.empty() ? 2 : 1; std::vector activations; if (e.in_sum.empty()) { - activations.reserve(e.in_sum2.size()); + activations.reserve(e.values.size()); for (int i = 0; i < n_mat; ++i) { for (int j = 0; j < row_size; ++j) { - activations.push_back(e.in_sum2[i*row_size + j] / e.counts[i]); + activations.push_back(e.values[i*row_size + j] / e.counts[i]); } } } else { @@ -420,13 +420,13 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * // broadcast, when loading an old imatrix e.counts.resize(n_as, e.counts[0]); } - if (e.in_sum2.empty()) { + if (e.values.empty()) { e.in_sum.resize(src1->ne[0]*n_as, 0); - e.in_sum2.resize(src1->ne[0]*n_as, 0); + e.values.resize(src1->ne[0]*n_as, 0); e.counts.resize(n_as, 0); } - else if (e.in_sum2.size() != (size_t)src1->ne[0]*n_as) { - LOG_ERR("%s: inconsistent size for %s (%d vs %d)\n", __func__, wname.c_str(), (int)e.in_sum2.size(), (int)(src1->ne[0]*n_as)); + else if (e.values.size() != (size_t)src1->ne[0]*n_as) { + LOG_ERR("%s: inconsistent size for %s (%d vs %d)\n", __func__, wname.c_str(), (int)e.values.size(), (int)(src1->ne[0]*n_as)); exit(1); //GGML_ABORT("fatal error"); } else if (e.counts.size() != (size_t)n_as) { @@ -454,9 +454,9 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * for (int64_t j = 0; j < src1->ne[0]; ++j) { e.in_sum[e_start + j] += x[j]; - e.in_sum2[e_start + j] += x[j] * x[j]; - if (!std::isfinite((float)e.in_sum2[e_start + j])) { - LOG_ERR("%f detected in %s\n", (float)e.in_sum2[e_start + j], wname.c_str()); + e.values[e_start + j] += x[j] * x[j]; + if (!std::isfinite((float)e.values[e_start + j])) { + LOG_ERR("%f detected in %s\n", (float)e.values[e_start + j], wname.c_str()); exit(1); } } @@ -478,13 +478,13 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * auto & e = m_stats[wname]; const int64_t n_mat = src1->ne[2] * src1->ne[3]; - if (e.in_sum2.empty()) { + if (e.values.empty()) { e.in_sum.resize(src1->ne[0] * n_mat, 0); - e.in_sum2.resize(src1->ne[0] * n_mat, 0); + e.values.resize(src1->ne[0] * n_mat, 0); e.counts.resize(n_mat, 0); } - else if (e.in_sum2.size() != (size_t)(src1->ne[0] * n_mat)) { - LOG_ERR("%s: inconsistent size for %s (%d vs %d)\n", __func__, wname.c_str(), (int)e.in_sum2.size(), (int)(src1->ne[0] * n_mat)); + else if (e.values.size() != (size_t)(src1->ne[0] * n_mat)) { + LOG_ERR("%s: inconsistent size for %s (%d vs %d)\n", __func__, wname.c_str(), (int)e.values.size(), (int)(src1->ne[0] * n_mat)); exit(1); //GGML_ABORT("fatal error"); } else if (e.counts.size() != (size_t)n_mat) { @@ -502,9 +502,9 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * e.counts[mat_id]++; for (int64_t j = 0; j < src1->ne[0]; ++j) { e.in_sum[mat_start + j] += x[j]; - e.in_sum2[mat_start + j] += x[j] * x[j]; - if (!std::isfinite((float)e.in_sum2[j])) { - LOG_ERR("%f detected in %s\n", (float)e.in_sum2[j], wname.c_str()); + e.values[mat_start + j] += x[j] * x[j]; + if (!std::isfinite((float)e.values[j])) { + LOG_ERR("%f detected in %s\n", (float)e.values[j], wname.c_str()); exit(1); } } @@ -593,14 +593,14 @@ void IMatrixCollector::save_imatrix_legacy(int32_t ncall) const { // ceiling division to avoid accidental zeros const int32_t ncall = (*std::max_element(stat.counts.begin(), stat.counts.end()) + (chunk_size - 1)) / chunk_size; out.write((const char *) &ncall, sizeof(ncall)); - const int32_t nval = stat.in_sum2.size(); + const int32_t nval = stat.values.size(); const int32_t nmat = stat.counts.size(); out.write((const char *) &nval, sizeof(nval)); if (nval > 0 && nmat > 0) { std::vector tmp(nval); for (int32_t i = 0; i < nval; i++) { float count = static_cast(stat.counts[i / (nval / nmat)]); - float value = stat.in_sum2[i]; + float value = stat.values[i]; if (count == 0.0f) { // store 1 for partial data value = 1.0f; @@ -676,7 +676,7 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const { to_store.push_back(kv.first); data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.in_sum.size(), GGML_MEM_ALIGN); - data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.in_sum2.size(), GGML_MEM_ALIGN); + data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.values.size(), GGML_MEM_ALIGN); data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.counts.size(), GGML_MEM_ALIGN); } @@ -711,7 +711,7 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const { for (const auto & name : to_store) { const auto & stat = m_stats.at(name); - const int32_t nval = (int32_t) stat.in_sum2.size(); + const int32_t nval = (int32_t) stat.values.size(); const int32_t nmat = (int32_t) stat.counts.size(); if (nval > 0 && nmat > 0) { struct ggml_tensor * in_sum2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nval / nmat, nmat); @@ -720,7 +720,7 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const { ggml_format_name(counts, "%s.counts", name.c_str()); for (int32_t j = 0; j < nval; ++j) { - ((float *) in_sum2->data)[j] = (float) stat.in_sum2[j]; + ((float *) in_sum2->data)[j] = (float) stat.values[j]; } for (int32_t j = 0; j < nmat; ++j) { ((float *) counts->data)[j] = (float) stat.counts[j]; @@ -787,8 +787,8 @@ bool IMatrixCollector::load_imatrix_legacy(const char * fname) { return false; } - if (e.in_sum2.empty()) { - e.in_sum2.resize(nval, 0.0f); + if (e.values.empty()) { + e.values.resize(nval, 0.0f); e.counts.resize(1, 0); } @@ -802,7 +802,7 @@ bool IMatrixCollector::load_imatrix_legacy(const char * fname) { // Recreate the state as expected by save_imatrix(), and correct for weighted sum. for (int i = 0; i < nval; i++) { - e.in_sum2[i] += tmp[i] * chunk_size; + e.values[i] += tmp[i] * chunk_size; } // The legacy format doesn't distinguish the counts for different experts for (size_t j = 0; j < e.counts.size(); ++j) { @@ -922,11 +922,11 @@ bool IMatrixCollector::load_imatrix(const char * file_name) { auto & e = m_stats[name]; int64_t nval = ggml_nelements(in_sum2); - if (e.in_sum2.empty()) { - e.in_sum2.resize(nval, 0.0f); + if (e.values.empty()) { + e.values.resize(nval, 0.0f); e.in_sum.resize(nval, 0.0f); - } else if ((size_t) nval != e.in_sum2.size()) { - LOG_ERR("%s: mismatched sums size for %s: %zu != %zu\n", __func__, name.c_str(), (size_t) nval, e.in_sum2.size()); + } else if ((size_t) nval != e.values.size()) { + LOG_ERR("%s: mismatched sums size for %s: %zu != %zu\n", __func__, name.c_str(), (size_t) nval, e.values.size()); gguf_free(ctx_gguf); ggml_free(ctx); return false; @@ -947,7 +947,7 @@ bool IMatrixCollector::load_imatrix(const char * file_name) { // Recreate the state as expected by save_imatrix() for (int64_t j = 0; j < nval; j++) { - e.in_sum2[j] += ((const float *) in_sum2->data)[j]; + e.values[j] += ((const float *) in_sum2->data)[j]; } for (int64_t j = 0; j < ncounts; j++) { e.counts[j] += std::lround(((const float *) counts->data)[j]); From 5e40cf4f1ca6af6187e5b049990b4208296c9ba6 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Tue, 5 Aug 2025 00:18:53 +0100 Subject: [PATCH 21/36] Do not resize if in_sum is null --- tools/imatrix/imatrix.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index 651cb658d7e7f..b772f2184d38c 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -938,7 +938,9 @@ bool IMatrixCollector::load_imatrix(const char * file_name) { int64_t nval = ggml_nelements(in_sum2); if (e.values.empty()) { e.values.resize(nval, 0.0f); - e.in_sum.resize(nval, 0.0f); + if (in_sum != nullptr) { + e.in_sum.resize(nval, 0.0f); + } } else if ((size_t) nval != e.values.size()) { LOG_ERR("%s: mismatched sums size for %s: %zu != %zu\n", __func__, name.c_str(), (size_t) nval, e.values.size()); gguf_free(ctx_gguf); From b37393423d426d962f1aecf70c6fdc26c0bcb52b Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Tue, 5 Aug 2025 08:54:57 +0100 Subject: [PATCH 22/36] Compute aggregated (per layer) l2 norm --- tools/imatrix/imatrix.cpp | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index b772f2184d38c..f22b67a309dc3 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -255,7 +255,7 @@ static int compute_vector_statistics(std::vector & tstats, co static void compute_tensor_statistics(std::vector & tstats) { static const std::regex pattern(R"(blk\.(\d+)\.)"); - // compute the cosine similarity between the same tensors in consecutive layers + // compute the Cosine Similarity between the same tensors in consecutive layers for (auto & ts : tstats) { ts.cossim = 0; @@ -281,7 +281,7 @@ static void compute_tensor_statistics(std::vector & tstats) { } } - // compute the L2 norm between the same tensors in consecutive layers + // compute the L2 Norm (Euclidian Distance) between the same tensors in consecutive layers for (auto & ts : tstats) { ts.l2_norm = 0.0f; if (ts.stats.in_sum.empty()) continue; @@ -310,6 +310,7 @@ static void compute_tensor_statistics(std::vector & tstats) { static void compute_layer_statistics(const std::vector & tstats, std::map & layer_cossim, + std::map & layer_l2_norm, const std::unordered_map & stats_map) { struct layer_aggregation { std::vector curr_avg; @@ -337,22 +338,33 @@ static void compute_layer_statistics(const std::vector & tsta prev.insert(prev.end(), prev_avg.begin(), prev_avg.end()); } - // compute the cosine similarity between consecutive layers + // compute the aggregated Cosine Similarity between consecutive layers for (auto & kv : taggr) { const auto & curr = kv.second.curr_avg; const auto & prev = kv.second.prev_avg; if (curr.size() != prev.size() || curr.empty()) continue; float dot_prod = 0.0, lyr1 = 0.0, lyr2 = 0.0; for (size_t i = 0; i < curr.size(); ++i) { - float crr = curr[i], prv = prev[i]; - dot_prod += crr * prv; - lyr1 += crr * crr; - lyr2 += prv * prv; + dot_prod += curr[i] * prev[i]; + lyr1 += curr[i] * curr[i]; + lyr2 += prev[i] * prev[i]; } float cossim = 0.0f; if (lyr1 > 0.0 && lyr2 > 0.0) cossim = dot_prod / (std::sqrt(lyr1) * std::sqrt(lyr2)); layer_cossim[kv.first] = cossim; } + + // compute the aggregated L2 Norm (Euclidian Distance) between consecutive layers + for (auto & kv : taggr) { + const auto & curr = kv.second.curr_avg; + const auto & prev = kv.second.prev_avg; + if (curr.size() != prev.size() || curr.empty()) continue; + float dist = 0.0f; + for (size_t i = 0; i < curr.size(); ++i) { + dist += (curr[i] - prev[i]) * (curr[i] - prev[i]); + } + layer_l2_norm[kv.first] = std::sqrt(dist); + } } bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) { @@ -1346,7 +1358,8 @@ static bool show_statistics(const common_params & params) { } std::map layer_cossim; - compute_layer_statistics(ts, layer_cossim, g_collector.get_mstats()); + std::map layer_l2_norm; + compute_layer_statistics(ts, layer_cossim, layer_l2_norm, g_collector.get_mstats()); const auto layers = std::count_if(ws.begin(), ws.end(), [](const auto & kv) { return kv.first >= 0; }); LOG_INF("\nComputing aggregated statistics per layer (%ld layers)\n", layers); @@ -1362,9 +1375,11 @@ static bool show_statistics(const common_params & params) { const float w_zd = stats.w_zd / stats.n; const auto lcs = layer_cossim.find(layer); const float cossim = (lcs != layer_cossim.end()) ? lcs->second : 0.0f; + const auto ll2n = layer_l2_norm.find(layer); + const float l2_norm = (ll2n != layer_l2_norm.end()) ? ll2n->second : 0.0f; LOG_INF("%5d\t%11.2f\t%6.2f%%\t%10.4f\n", layer, - w_sum, + tensor_calc_mode == 1 ? l2_norm: w_sum, 100.0f * w_zd, cossim); } From 906548a00a25b927f8ed8ac5b4b58b4f69db9e37 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Tue, 5 Aug 2025 12:06:19 +0100 Subject: [PATCH 23/36] Update aggregated sum of squared activations per layer --- tools/imatrix/imatrix.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index f22b67a309dc3..6d40984a01806 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -1294,7 +1294,6 @@ static bool show_statistics(const common_params & params) { struct weighted_stats { float w_sum = 0.0f; float w_zd = 0.0f; - float w_cossim = 0.0f; int n = 0; }; std::map ws; @@ -1341,16 +1340,15 @@ static bool show_statistics(const common_params & params) { 100.0f * tstat.zd_score, tstat.cossim); - const float w_sum = tstat.elements * tstat.sum_values; const float w_zd = tstat.elements * tstat.zd_score; if (ws.find(blk) != ws.end()) { - ws[blk].w_sum += w_sum; + ws[blk].w_sum += tstat.sum_values; ws[blk].w_zd += w_zd; ws[blk].n += tstat.elements; } else { weighted_stats temp_ws; - temp_ws.w_sum = w_sum; + temp_ws.w_sum = tstat.sum_values; temp_ws.w_zd = w_zd; temp_ws.n = tstat.elements; ws[blk] = temp_ws; @@ -1371,7 +1369,7 @@ static bool show_statistics(const common_params & params) { LOG_INF("============================================\n"); for (const auto & [layer, stats] : ws) { if (layer < 0 || stats.n == 0) continue; - const float w_sum = stats.w_sum / stats.n; + const float w_sum = stats.w_sum; const float w_zd = stats.w_zd / stats.n; const auto lcs = layer_cossim.find(layer); const float cossim = (lcs != layer_cossim.end()) ? lcs->second : 0.0f; From aea9b31db53a53204d8b817347b1972854e1d186 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Tue, 5 Aug 2025 12:57:13 +0100 Subject: [PATCH 24/36] Make ZD Score two-tailed --- tools/imatrix/imatrix.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index 6d40984a01806..d30b66c6a61f6 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -231,9 +231,7 @@ static int compute_vector_statistics(std::vector & tstats, co int z_score = 0; if (std_deviation > 0.0f) { for (const auto act : activations) { - if (const float p = (act - mean) / std_deviation; p > 1) { - z_score++; - } + if (const float z = (act - mean) / std_deviation; std::fabs(z) > 1.0f) z_score++; } } From 49996a19dafb0b13d9e427c7a30f50f59dc34e80 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Tue, 5 Aug 2025 13:32:46 +0100 Subject: [PATCH 25/36] Refactor variable names --- tools/imatrix/imatrix.cpp | 94 +++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index d30b66c6a61f6..46db4401a75ab 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -39,7 +39,7 @@ static const char * const LLM_KV_IMATRIX_CHUNK_COUNT = "imatrix.chunk_count"; static const char * const LLM_KV_IMATRIX_CHUNK_SIZE = "imatrix.chunk_size"; struct Stats { - std::vector in_sum; + std::vector activations; std::vector values; std::vector counts; }; @@ -130,20 +130,20 @@ static void process_tensor_name(const std::string & input, std::string & layer, static std::vector compute_tensor_averages(const Stats & tstats) { if (tstats.counts.empty()) return {}; const size_t n_mat = tstats.counts.size(); - const size_t len = !tstats.in_sum.empty() ? tstats.in_sum.size() : tstats.values.size(); + const size_t len = !tstats.activations.empty() ? tstats.activations.size() : tstats.values.size(); if (len == 0 || len % n_mat != 0) return {}; const size_t row = len / n_mat; std::vector vec; vec.reserve(len); - if (!tstats.in_sum.empty()) { + if (!tstats.activations.empty()) { for (size_t m = 0; m < n_mat; ++m) { const float c = (float)tstats.counts[m]; if (c <= 0) return {}; const size_t off = m * row; for (size_t j = 0; j < row; ++j) { - vec.push_back(tstats.in_sum[off + j] / c); + vec.push_back(tstats.activations[off + j] / c); } } } else { @@ -172,11 +172,11 @@ static int compute_vector_statistics(std::vector & tstats, co const int n_mat = e.counts.size(); const int row_size = e.values.size() / n_mat; - const int calc_mode = e.in_sum.empty() ? 2 : 1; + const int calc_mode = e.activations.empty() ? 2 : 1; std::vector activations; - if (e.in_sum.empty()) { + if (e.activations.empty()) { activations.reserve(e.values.size()); for (int i = 0; i < n_mat; ++i) { @@ -185,11 +185,11 @@ static int compute_vector_statistics(std::vector & tstats, co } } } else { - activations.reserve(e.in_sum.size()); + activations.reserve(e.activations.size()); for (int i = 0; i < n_mat; ++i) { for (int j = 0; j < row_size; ++j) { - activations.push_back(e.in_sum[i*row_size + j] / e.counts[i]); + activations.push_back(e.activations[i*row_size + j] / e.counts[i]); } } } @@ -282,7 +282,7 @@ static void compute_tensor_statistics(std::vector & tstats) { // compute the L2 Norm (Euclidian Distance) between the same tensors in consecutive layers for (auto & ts : tstats) { ts.l2_norm = 0.0f; - if (ts.stats.in_sum.empty()) continue; + if (ts.stats.activations.empty()) continue; if (std::smatch match; std::regex_search(ts.tensor, match, pattern)) { const int blk = std::stoi(match[1]); @@ -430,7 +430,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * e.counts.resize(n_as, e.counts[0]); } if (e.values.empty()) { - e.in_sum.resize(src1->ne[0]*n_as, 0); + e.activations.resize(src1->ne[0]*n_as, 0); e.values.resize(src1->ne[0]*n_as, 0); e.counts.resize(n_as, 0); } @@ -462,7 +462,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * e.counts[ex]++; for (int64_t j = 0; j < src1->ne[0]; ++j) { - e.in_sum[e_start + j] += x[j]; + e.activations[e_start + j] += x[j]; e.values[e_start + j] += x[j] * x[j]; if (!std::isfinite((float)e.values[e_start + j])) { LOG_ERR("%f detected in %s\n", (float)e.values[e_start + j], wname.c_str()); @@ -502,7 +502,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * } } if (e.values.empty()) { - e.in_sum.resize(src1->ne[0] * n_mat, 0); + e.activations.resize(src1->ne[0] * n_mat, 0); e.values.resize(src1->ne[0] * n_mat, 0); e.counts.resize(1, 0); } @@ -521,7 +521,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * for (int64_t row = 0; row < src1->ne[1]; ++row) { const float * x = (const float *) (data + row * src1->nb[1] + i2 * src1->nb[2] + i3 * src1->nb[3]); for (int64_t j = 0; j < src1->ne[0]; ++j) { - e.in_sum[mat_start + j] += x[j]; + e.activations[mat_start + j] += x[j]; e.values[mat_start + j] += x[j] * x[j]; if (!std::isfinite((float)e.values[j])) { LOG_ERR("%f detected in %s\n", (float)e.values[j], wname.c_str()); @@ -699,7 +699,7 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const { } to_store.push_back(kv.first); - data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.in_sum.size(), GGML_MEM_ALIGN); + data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.activations.size(), GGML_MEM_ALIGN); data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.values.size(), GGML_MEM_ALIGN); data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.counts.size(), GGML_MEM_ALIGN); } @@ -753,12 +753,12 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const { gguf_add_tensor(ctx_gguf, in_sum2); gguf_add_tensor(ctx_gguf, counts); - if (!stat.in_sum.empty()) { - const int32_t nact = (int32_t) stat.in_sum.size(); + if (!stat.activations.empty()) { + const int32_t nact = (int32_t) stat.activations.size(); struct ggml_tensor * in_sum = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nact / nmat, nmat); ggml_format_name(in_sum, "%s.in_sum", name.c_str()); for (int32_t j = 0; j < nval; ++j) { - ((float *) in_sum->data)[j] = (float) stat.in_sum[j]; + ((float *) in_sum->data)[j] = (float) stat.activations[j]; } gguf_add_tensor(ctx_gguf, in_sum); } @@ -949,7 +949,7 @@ bool IMatrixCollector::load_imatrix(const char * file_name) { if (e.values.empty()) { e.values.resize(nval, 0.0f); if (in_sum != nullptr) { - e.in_sum.resize(nval, 0.0f); + e.activations.resize(nval, 0.0f); } } else if ((size_t) nval != e.values.size()) { LOG_ERR("%s: mismatched sums size for %s: %zu != %zu\n", __func__, name.c_str(), (size_t) nval, e.values.size()); @@ -980,7 +980,7 @@ bool IMatrixCollector::load_imatrix(const char * file_name) { } if (in_sum != nullptr) { for (int64_t j = 0; j < nval; j++) { - e.in_sum[j] += ((const float *) in_sum->data)[j]; + e.activations[j] += ((const float *) in_sum->data)[j]; } } } @@ -1289,12 +1289,12 @@ static bool show_statistics(const common_params & params) { }; std::sort(ts.begin(), ts.end(), tensor_comparer()); - struct weighted_stats { - float w_sum = 0.0f; - float w_zd = 0.0f; + struct layer_stats { + float lyr_sum = 0.0f; + float lyr_zd = 0.0f; int n = 0; }; - std::map ws; + std::map ls; LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast(ts.size())); LOG_INF("\n%6s\t%18s\t%13s\t%8s\t%8s\t%7s\t%15s\t%13s\t%12s\t%s\t%5s\t%10s\n", @@ -1338,26 +1338,26 @@ static bool show_statistics(const common_params & params) { 100.0f * tstat.zd_score, tstat.cossim); - const float w_zd = tstat.elements * tstat.zd_score; + const float zd = tstat.elements * tstat.zd_score; - if (ws.find(blk) != ws.end()) { - ws[blk].w_sum += tstat.sum_values; - ws[blk].w_zd += w_zd; - ws[blk].n += tstat.elements; + if (ls.find(blk) != ls.end()) { + ls[blk].lyr_sum += tstat.sum_values; + ls[blk].lyr_zd += zd; + ls[blk].n += tstat.elements; } else { - weighted_stats temp_ws; - temp_ws.w_sum = tstat.sum_values; - temp_ws.w_zd = w_zd; - temp_ws.n = tstat.elements; - ws[blk] = temp_ws; + layer_stats temp_ls; + temp_ls.lyr_sum = tstat.sum_values; + temp_ls.lyr_zd = zd; + temp_ls.n = tstat.elements; + ls[blk] = temp_ls; } } - std::map layer_cossim; - std::map layer_l2_norm; - compute_layer_statistics(ts, layer_cossim, layer_l2_norm, g_collector.get_mstats()); + std::map lyr_cossim; + std::map lyr_l2_norm; + compute_layer_statistics(ts, lyr_cossim, lyr_l2_norm, g_collector.get_mstats()); - const auto layers = std::count_if(ws.begin(), ws.end(), [](const auto & kv) { return kv.first >= 0; }); + const auto layers = std::count_if(ls.begin(), ls.end(), [](const auto & kv) { return kv.first >= 0; }); LOG_INF("\nComputing aggregated statistics per layer (%ld layers)\n", layers); LOG_INF("\n%6s\t%16s\t%7s\t%11s\n", "Layer", @@ -1365,19 +1365,19 @@ static bool show_statistics(const common_params & params) { "ZD", "CosSim"); LOG_INF("============================================\n"); - for (const auto & [layer, stats] : ws) { + for (const auto & [layer, stats] : ls) { if (layer < 0 || stats.n == 0) continue; - const float w_sum = stats.w_sum; - const float w_zd = stats.w_zd / stats.n; - const auto lcs = layer_cossim.find(layer); - const float cossim = (lcs != layer_cossim.end()) ? lcs->second : 0.0f; - const auto ll2n = layer_l2_norm.find(layer); - const float l2_norm = (ll2n != layer_l2_norm.end()) ? ll2n->second : 0.0f; + const float lyr_sum = stats.lyr_sum; + const float lyr_zd = stats.lyr_zd / stats.n; + const auto lcs = lyr_cossim.find(layer); + const float lyr_cs = (lcs != lyr_cossim.end()) ? lcs->second : 0.0f; + const auto ll2n = lyr_l2_norm.find(layer); + const float l2_norm = (ll2n != lyr_l2_norm.end()) ? ll2n->second : 0.0f; LOG_INF("%5d\t%11.2f\t%6.2f%%\t%10.4f\n", layer, - tensor_calc_mode == 1 ? l2_norm: w_sum, - 100.0f * w_zd, - cossim); + tensor_calc_mode == 1 ? l2_norm: lyr_sum, + 100.0f * lyr_zd, + lyr_cs); } LOG_INF("\n"); From 4c3fea89d6975f418d6d6249e49ed8dbd208fe2b Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Tue, 5 Aug 2025 13:32:59 +0100 Subject: [PATCH 26/36] Update report layout --- tools/imatrix/imatrix.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index 46db4401a75ab..634739082a05b 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -1359,7 +1359,7 @@ static bool show_statistics(const common_params & params) { const auto layers = std::count_if(ls.begin(), ls.end(), [](const auto & kv) { return kv.first >= 0; }); LOG_INF("\nComputing aggregated statistics per layer (%ld layers)\n", layers); - LOG_INF("\n%6s\t%16s\t%7s\t%11s\n", + LOG_INF("\n%6s\t%13s\t%5s\t%10s\n", "Layer", tensor_calc_mode == 1 ? "L₂ Norm" : "Σ(Act²)", "ZD", From 88854c9179b6e62ffde2d57123e74e47dc00a55a Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Tue, 5 Aug 2025 14:16:45 +0100 Subject: [PATCH 27/36] Refactor legacy mode --- tools/imatrix/imatrix.cpp | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index 634739082a05b..2f16f3489c9e8 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -160,7 +160,7 @@ static std::vector compute_tensor_averages(const Stats & tstats) { return vec; } -static int compute_vector_statistics(std::vector & tstats, const std::string & name, const Stats & e) { +static bool compute_vector_statistics(std::vector & tstats, const std::string & name, const Stats & e) { if (e.values.size() % e.counts.size() != 0) { LOG_ERR("%s: activation size mismatch for tensor %s (%zu vs %zu)\n", __func__, name.c_str(), e.counts.size(), e.values.size()); return -1;; @@ -172,7 +172,6 @@ static int compute_vector_statistics(std::vector & tstats, co const int n_mat = e.counts.size(); const int row_size = e.values.size() / n_mat; - const int calc_mode = e.activations.empty() ? 2 : 1; std::vector activations; @@ -203,7 +202,15 @@ static int compute_vector_statistics(std::vector & tstats, co const float std_deviation = std::sqrt(std::max(0.0f, variance)); float entropy = 0; - if (calc_mode == 1) { + if (e.activations.empty()) { + if (sum > 0) { + for (const auto act : activations) { + if (const float p = act / sum; p > 0) { + entropy -= p * std::log2(p); + } + } + } + } else { float div = 0.0; std::vector weights(activations.size()); for (size_t i = 0; i < activations.size(); ++i) { @@ -218,14 +225,6 @@ static int compute_vector_statistics(std::vector & tstats, co if (p > 0.0) entropy -= p * std::log2(p); } } - } else { - if (sum > 0) { - for (const auto act : activations) { - if (const float p = act / sum; p > 0) { - entropy -= p * std::log2(p); - } - } - } } int z_score = 0; @@ -247,7 +246,7 @@ static int compute_vector_statistics(std::vector & tstats, co ts.entropy = entropy; ts.zd_score = static_cast(z_score) / ts.elements; - return calc_mode; + return e.activations.empty(); } static void compute_tensor_statistics(std::vector & tstats) { @@ -1257,7 +1256,7 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params, c static bool show_statistics(const common_params & params) { std::vector ts; - int tensor_calc_mode = 0; + bool legacy_mode = false; if (params.in_files.empty() || params.in_files.size() > 1) { LOG_ERR("\nError: a single imatrix file is required to compute tensor statistics\n\n"); @@ -1265,7 +1264,7 @@ static bool show_statistics(const common_params & params) { } if (g_collector.load_imatrix(params.in_files[0].c_str())) { for (const auto & [name, stats] :g_collector.get_mstats()) { - tensor_calc_mode =compute_vector_statistics(ts, name, stats); + legacy_mode = compute_vector_statistics(ts, name, stats); } } else { LOG_ERR("\nError: %s is not a valid imatrix file\n\n", params.in_files[0].c_str()); @@ -1300,7 +1299,7 @@ static bool show_statistics(const common_params & params) { LOG_INF("\n%6s\t%18s\t%13s\t%8s\t%8s\t%7s\t%15s\t%13s\t%12s\t%s\t%5s\t%10s\n", "Layer", "Tensor", - tensor_calc_mode == 1 ? "L₂ Norm" : "Σ(Act²)", + legacy_mode ? "Σ(Act²)" : "L₂ Norm", "Min", "Max", "μ", @@ -1327,7 +1326,7 @@ static bool show_statistics(const common_params & params) { LOG_INF("%5s\t%-20s\t%11.2f\t%10.4f\t%10.4f\t%8.2f\t%8.2f\t%7d\t%12.4f\t%7.2f%%\t%6.2f%%\t%10.4f\n", layer.c_str(), name.c_str(), - tensor_calc_mode == 1 ? tstat.l2_norm : tstat.sum_values, + legacy_mode == 1 ? tstat.sum_values : tstat.l2_norm, tstat.min_values, tstat.max_values, tstat.mean_values, @@ -1361,7 +1360,7 @@ static bool show_statistics(const common_params & params) { LOG_INF("\nComputing aggregated statistics per layer (%ld layers)\n", layers); LOG_INF("\n%6s\t%13s\t%5s\t%10s\n", "Layer", - tensor_calc_mode == 1 ? "L₂ Norm" : "Σ(Act²)", + legacy_mode ? "Σ(Act²)" : "L₂ Norm", "ZD", "CosSim"); LOG_INF("============================================\n"); @@ -1375,7 +1374,7 @@ static bool show_statistics(const common_params & params) { const float l2_norm = (ll2n != lyr_l2_norm.end()) ? ll2n->second : 0.0f; LOG_INF("%5d\t%11.2f\t%6.2f%%\t%10.4f\n", layer, - tensor_calc_mode == 1 ? l2_norm: lyr_sum, + legacy_mode ? lyr_sum : l2_norm, 100.0f * lyr_zd, lyr_cs); } From 3e9d53c61e69d72c848aacfb7b7830855908bb54 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Thu, 7 Aug 2025 12:03:24 +0100 Subject: [PATCH 28/36] Refactor variable names --- tools/imatrix/imatrix.cpp | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index a229c927b5a39..7554534adf2eb 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -47,16 +47,16 @@ struct Stats { struct tensor_statistics { std::string tensor; Stats stats; - float sum_values = 0.0f; - float mean_values = 0.0f; - float max_values = 0.0f; - float min_values = 0.0f; - int elements = 0; - float stddev = 0.0f; - float entropy = 0.0f; - float zd_score = 0.0f; - float cossim = 0.0f; - float l2_norm = 0.0f; + float sum_values = 0.0f; + float mean_values = 0.0f; + float max_values = 0.0f; + float min_values = 0.0f; + int elements = 0; + float std_deviation = 0.0f; + float entropy = 0.0f; + float zd_score = 0.0f; + float cossim = 0.0f; + float l2_norm = 0.0f; }; class IMatrixCollector { @@ -227,10 +227,10 @@ static bool compute_vector_statistics(std::vector & tstats, c } } - int z_score = 0; + int zd_score = 0; if (std_deviation > 0.0f) { for (const auto act : activations) { - if (const float z = (act - mean) / std_deviation; std::fabs(z) > 1.0f) z_score++; + if (const float z = (act - mean) / std_deviation; std::fabs(z) > 1.0f) zd_score++; } } @@ -242,9 +242,9 @@ static bool compute_vector_statistics(std::vector & tstats, c ts.max_values = max; ts.min_values = min; ts.elements = static_cast(activations.size()); - ts.stddev = std_deviation; + ts.std_deviation = std_deviation; ts.entropy = entropy; - ts.zd_score = static_cast(z_score) / ts.elements; + ts.zd_score = static_cast(zd_score) / ts.elements; return e.activations.empty(); } @@ -1334,7 +1334,7 @@ static bool show_statistics(const common_params & params) { tstat.min_values, tstat.max_values, tstat.mean_values, - tstat.stddev, + tstat.std_deviation, tstat.elements, tstat.entropy, 100.0f * (tstat.entropy / std::log2(tstat.elements)), From e0d64713402b7c236806cdfc3ce4cb7d0ad0cd34 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Thu, 7 Aug 2025 12:04:52 +0100 Subject: [PATCH 29/36] Reverse conditional logic to match convention --- tools/imatrix/imatrix.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index 7554534adf2eb..d8ff591349fb5 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -137,13 +137,13 @@ static std::vector compute_tensor_averages(const Stats & tstats) { std::vector vec; vec.reserve(len); - if (!tstats.activations.empty()) { + if (tstats.activations.empty()) { for (size_t m = 0; m < n_mat; ++m) { const float c = (float)tstats.counts[m]; if (c <= 0) return {}; const size_t off = m * row; for (size_t j = 0; j < row; ++j) { - vec.push_back(tstats.activations[off + j] / c); + vec.push_back(tstats.values[off + j] / c); } } } else { @@ -152,7 +152,7 @@ static std::vector compute_tensor_averages(const Stats & tstats) { if (c <= 0) return {}; const size_t off = m * row; for (size_t j = 0; j < row; ++j) { - vec.push_back(tstats.values[off + j] / c); + vec.push_back(tstats.activations[off + j] / c); } } } From dadd90ef73410025e8ccc1eb9517e6ddb3796134 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Thu, 7 Aug 2025 14:07:48 +0100 Subject: [PATCH 30/36] Rename report heading --- tools/imatrix/imatrix.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index d8ff591349fb5..a758a940960b3 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -1299,7 +1299,7 @@ static bool show_statistics(const common_params & params) { }; std::map ls; - LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast(ts.size())); + LOG_INF("\nComputing tensor statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast(ts.size())); LOG_INF("\n%6s\t%18s\t%13s\t%8s\t%8s\t%7s\t%15s\t%13s\t%12s\t%s\t%5s\t%10s\n", "Layer", "Tensor", @@ -1361,7 +1361,7 @@ static bool show_statistics(const common_params & params) { compute_layer_statistics(ts, lyr_cossim, lyr_l2_norm, g_collector.get_mstats()); const auto layers = std::count_if(ls.begin(), ls.end(), [](const auto & kv) { return kv.first >= 0; }); - LOG_INF("\nComputing aggregated statistics per layer (%ld layers)\n", layers); + LOG_INF("\nComputing layer statistics (%ld layers)\n", layers); LOG_INF("\n%6s\t%13s\t%5s\t%10s\n", "Layer", legacy_mode ? "Σ(Act²)" : "L₂ Norm", From 5bb2def02dcf923743029f72b0c16b17e3609e28 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Thu, 7 Aug 2025 17:41:21 +0100 Subject: [PATCH 31/36] Add --activation-statistics parameter --- common/arg.cpp | 7 +++++++ common/common.h | 9 +++++---- tools/imatrix/imatrix.cpp | 3 ++- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 0f01bb31454a4..2cd0cc011981f 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2707,6 +2707,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.show_statistics = true; } ).set_examples({LLAMA_EXAMPLE_IMATRIX})); + add_opt(common_arg( + {"--activation-statistics"}, + string_format("generate data to compute activation-based statistics (default: %s)", params.show_statistics ? "true" : "false"), + [](common_params & params) { + params.activation_statistics = true; + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX})); add_opt(common_arg( {"--parse-special"}, string_format("prase special tokens (chat, tool, etc) (default: %s)", params.parse_special ? "true" : "false"), diff --git a/common/common.h b/common/common.h index 5eab199af559e..d5dfdd49e0ce5 100644 --- a/common/common.h +++ b/common/common.h @@ -443,10 +443,11 @@ struct common_params { int32_t i_chunk = 0; // start processing from this chunk int8_t imat_dat = 0; // whether the legacy imatrix.dat format should be output (gguf <= 0 < dat) - bool process_output = false; // collect data for the output tensor - bool compute_ppl = true; // whether to compute perplexity - bool show_statistics = false; // show imatrix statistics per tensor - bool parse_special = false; // whether to parse special tokens during imatrix tokenization + bool process_output = false; // collect data for the output tensor + bool compute_ppl = true; // whether to compute perplexity + bool show_statistics = false; // show imatrix statistics per tensor + bool activation_statistics = false; // generate data to calculate activation based statistics + bool parse_special = false; // whether to parse special tokens during imatrix tokenization // cvector-generator params int n_pca_batch = 100; diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index a758a940960b3..902d6e7354aa0 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -30,7 +30,7 @@ static void print_usage(int, char ** argv) { " -m model.gguf -f some-text.txt [-o imatrix.gguf] [--output-format {gguf,dat}] [--no-ppl] \\\n" " [--process-output] [--chunk 123] [--save-frequency 0] [--output-frequency 10] \\\n" " [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] [--parse-special] \\\n" - " [--show-statistics] [...]\n" , argv[0]); + " [--activation-statistics] [--show-statistics] [...]\n" , argv[0]); LOG("\n"); } @@ -428,6 +428,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * // broadcast, when loading an old imatrix e.counts.resize(n_as, e.counts[0]); } + // ToDo: find an efficient way to implement --activation-statistics to avoid doubling the imatrix size by default if (e.values.empty()) { e.activations.resize(src1->ne[0]*n_as, 0); e.values.resize(src1->ne[0]*n_as, 0); From c5ecdaa1a1600499f3be8be1f2cb7cc7062959cf Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Thu, 7 Aug 2025 19:04:49 +0100 Subject: [PATCH 32/36] =?UTF-8?q?Add=20Euclidean=E2=80=93Cosine=20Score=20?= =?UTF-8?q?(ECS)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/imatrix/imatrix.cpp | 45 +++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index 902d6e7354aa0..bedbf586e4995 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #if defined(_MSC_VER) @@ -1301,7 +1302,7 @@ static bool show_statistics(const common_params & params) { std::map ls; LOG_INF("\nComputing tensor statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast(ts.size())); - LOG_INF("\n%6s\t%18s\t%13s\t%8s\t%8s\t%7s\t%15s\t%13s\t%12s\t%s\t%5s\t%10s\n", + LOG_INF("\n%6s\t%18s\t%13s\t%8s\t%8s\t%7s\t%15s\t%13s\t%11s\t%8s\t%5s\t%10s\n", "Layer", "Tensor", legacy_mode ? "Σ(Act²)" : "L₂ Norm", @@ -1310,8 +1311,8 @@ static bool show_statistics(const common_params & params) { "μ", "σ", "N", - "Entropy", - "E (norm)", + "H Norm", + legacy_mode ? "H" : "ECS", "ZD", "CosSim"); LOG_INF( @@ -1328,17 +1329,17 @@ static bool show_statistics(const common_params & params) { blk = -1; // not a block layer } - LOG_INF("%5s\t%-20s\t%11.2f\t%10.4f\t%10.4f\t%8.2f\t%8.2f\t%7d\t%12.4f\t%7.2f%%\t%6.2f%%\t%10.4f\n", + LOG_INF("%5s\t%-20s\t%11.2f\t%10.4f\t%10.4f\t%8.2f\t%8.2f\t%7d\t%10.2f%%\t%10.4f\t%6.2f%%\t%10.4f\n", layer.c_str(), name.c_str(), - legacy_mode == 1 ? tstat.sum_values : tstat.l2_norm, + legacy_mode ? tstat.sum_values : tstat.l2_norm, tstat.min_values, tstat.max_values, tstat.mean_values, tstat.std_deviation, tstat.elements, - tstat.entropy, 100.0f * (tstat.entropy / std::log2(tstat.elements)), + legacy_mode ? tstat.entropy : 100.0f * std::exp(-0.01f * tstat.l2_norm) * std::pow(fabs(tstat.cossim), 10.0f), 100.0f * tstat.zd_score, tstat.cossim); @@ -1363,25 +1364,37 @@ static bool show_statistics(const common_params & params) { const auto layers = std::count_if(ls.begin(), ls.end(), [](const auto & kv) { return kv.first >= 0; }); LOG_INF("\nComputing layer statistics (%ld layers)\n", layers); - LOG_INF("\n%6s\t%13s\t%5s\t%10s\n", + LOG_INF("\n%6s\t%13s\t%6s\t%11s\t%6s\n", "Layer", legacy_mode ? "Σ(Act²)" : "L₂ Norm", "ZD", - "CosSim"); - LOG_INF("============================================\n"); + "CosSim", + legacy_mode ? "" : "ECS"); + if (legacy_mode) { + LOG_INF("============================================\n"); + } else { + LOG_INF("=========================================================\n"); + } for (const auto & [layer, stats] : ls) { if (layer < 0 || stats.n == 0) continue; - const float lyr_sum = stats.lyr_sum; - const float lyr_zd = stats.lyr_zd / stats.n; const auto lcs = lyr_cossim.find(layer); - const float lyr_cs = (lcs != lyr_cossim.end()) ? lcs->second : 0.0f; + const float lyr_cs = lcs != lyr_cossim.end() ? lcs->second : 0.0f; const auto ll2n = lyr_l2_norm.find(layer); - const float l2_norm = (ll2n != lyr_l2_norm.end()) ? ll2n->second : 0.0f; - LOG_INF("%5d\t%11.2f\t%6.2f%%\t%10.4f\n", + const float lyr_l2n = ll2n != lyr_l2_norm.end() ? ll2n->second : 0.0f; + if (legacy_mode) { + LOG_INF("%5d\t%11.2f\t%6.2f%%\t%11.4f\n", layer, - legacy_mode ? lyr_sum : l2_norm, - 100.0f * lyr_zd, + stats.lyr_sum, + 100.0f * stats.lyr_zd / stats.n, lyr_cs); + } else { + LOG_INF("%5d\t%11.2f\t%6.2f%%\t%11.4f\t%8.4f\n", + layer, + lyr_l2n, + 100.0f * stats.lyr_zd / stats.n, + lyr_cs, + 100.0f * std::exp(-0.01f * lyr_l2n) * std::pow(fabs(lyr_cs), 10.0f)); + } } LOG_INF("\n"); From 59af5034f7fba3c31dc9cc861bcaf35024abe9dd Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sat, 9 Aug 2025 01:26:23 +0100 Subject: [PATCH 33/36] Update README.md --- tools/imatrix/README.md | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/tools/imatrix/README.md b/tools/imatrix/README.md index 4505cb4ce8c7d..bf13a2860509f 100644 --- a/tools/imatrix/README.md +++ b/tools/imatrix/README.md @@ -20,19 +20,19 @@ The parameters in square brackets are optional and have the following meaning: * `-lv | --verbosity` specifies the verbosity level. If set to `0`, no output other than the perplexity of the processed chunks will be generated. If set to `1`, each time the results are saved a message is written to `stderr`. If `>=2`, a message is output each time data is collected for any tensor. Default verbosity level is `1`. * `-o | --output-file` specifies the name of the file where the computed data will be stored. If missing `imatrix.gguf` is used. * `-ofreq | --output-frequency` specifies how often the so far computed result is saved to disk. Default is 10 (i.e., every 10 chunks) -* `--output-format` specifies the output format of the generated imatrix file. Either "gguf", or "dat" (the legacy format). Defaults to "gguf". +* `--output-format` specifies the output format of the generated imatrix file. Either `gguf`, or `dat` (the legacy format). Defaults to `gguf`. * `--save-frequency` specifies how often to save a copy of the imatrix in a separate file. Default is 0 (i.e., never) * `--process-output` specifies if data will be collected for the `output.weight` tensor. Typically, it is better not to utilize the importance matrix when quantizing `output.weight`, so this is set to `false` by default. * `--in-file` one or more existing imatrix files to load and combine. Useful for merging files from multiple runs/datasets. * `--parse-special` enables parsing of special tokens (e.g., `<|im_start|>` in some models). Useful for models with custom tokenizers. * `--chunk | --from-chunk` to skip the first `n` chunks of tokens from the input data. Useful for resuming or skipping initial low-quality data. -* `--chunks` maximum number of chunks to process. Default is -1 for all available chunks. +* `--chunks` maximum number of chunks to process. Default is `-1` for all available chunks. * `--no-ppl` disables the calculation of perplexity for the processed chunks. Useful if you want to speed up the processing and do not care about perplexity. * `--show-statistics` displays imatrix file's statistics. For faster computation, make sure to use GPU offloading via the `-ngl | --n-gpu-layers` argument. -Recent versions of `llama-imatrix` store data in GGUF format by default. For the legacy format, use an extension other than `.gguf` when saving the output file. More information is available in . +Versions **b5942** and newer of `llama-imatrix` store data in GGUF format by default. For the legacy format, use `--output-format dat` when saving the output file. More information is available in . ## Examples @@ -74,25 +74,27 @@ Recent versions of `llama-imatrix` store data in GGUF format by default. For the ./llama-imatrix --in-file imatrix.gguf --show-statistics ``` -`--show-statistics` will display the following statistics: +## Statistics + +From version , `--show-statistics` operates in two modes: for GGUF (preferred) imatrices, it reports direct and accurate activation statistics, and for legacy (binary) files, it reports the less precise average squared activations. #### Per tensor -* Σ(Act²): sum of all squared activations (the importance scores) -* Min & Max: minimum and maximum squared activations values -* μ & σ: Squared activations' mean and standard deviation -* % Active: proportion of elements whose average squared activation exceeds a small threshold (1e-5). Helpful to determine how alive/dormant the tensor is during inference -* N: number of squared activations -* Entropy: entropy of the squared activation distribution, in bits (standard Shannon entropy measurement) $S = -\sum_{i=1}^N p_i \log_2 p_i$ -* E (norm): Normalized entropy. $E(norm)=\frac{-\sum_{i=1}^N p_i \log_2 p_i}{log_2 N}$. These two metrics can be used to determine how well a prompt "exercises" the model's capabilities -* ZD Score: z-score distribution as described in _3.1 Layer Importance Scores_ of [Layer-Wise Quantization](https://arxiv.org/abs/2406.17415) -* CosSim: cosine similarity with respect to the previous layer's tensor. Useful to determine how similar the squared activations of the current layer are to the previous layer's squared activations. +* **Σ(Act²)** *(legacy mode)* / **L₂ Norm** *(preferred)*: If in legacy mode, the raw sum of squares of activations (sum of `Act²`). In preferred mode, the Euclidean Distance (L₂ Norm) between this tensor’s average activations and those of the previous layer. +* **Min / Max / μ / σ**: Tensor elements Min, Max, Mean, and Standard Deviation. +* **N**: Number of tensor elements considered. +* **H Norm**: Shannon Entropy normalized over log₂(N). Defined as $H Norm=\frac{-\sum_{i=1}^N p_i \log_2 p_i}{log_2 N}$. Used to determine how well a prompt "exercises" the model's capabilities. +* **H** *(legacy mode)* / **ECS** *(preferred)*: If legacy, Shannon Entropy defined as $H = -\sum_{i=1}^N p_i \log_2 p_i$. If preferred, *Euclidean-Cosine Score* defined as $ECS = K \cdot e^{-\alpha a} \cdot |b|^{\gamma}$ where `a = L₂ Norm`, `b = Cosine Similarity`, `α = -0.01`, `γ = 10` between this tensor’s elements and those of the previous layer. Higher score means more similarity and lower change. +* **ZD**: % of elements whose Z-score is > 1.0 in magnitude (an indicator of outliers), as described in _3.1 Layer Importance Scores_ of [Layer-Wise Quantization](https://arxiv.org/abs/2406.17415) +* **CosSim**: Cosine Similarity between this tensor’s elements and those of the previous layer. #### Per layer -Weighted averages of Σ(Act²), ZD Score and CosSim are also calculated. +Aggregated metrics per block/layer: -#### Important note on the computed Statistics +* **Σ(Act²)** *(legacy mode)* / **L₂ Norm** *(preferred)*: If in legacy mode, the sum of squared activations (sum of Act²) for the layer's concatenated tensors. In preferred mode, the Euclidean Distance (L₂ Norm) between this layer's average concatenated tensor activations the previous layer. +* **ZD**: % of this layer's concatenated tensors' elements with |Z| > 1. +* **CosSim**: Cosine Similarity between this layer's concatenated tensors' elements compared and the previous layer’s. +* **ECS** *(preferred only)*: Euclidean-Cosine Score applied to the layer. -When using these statistics, please note that they are computed on the squared activations, **not on the actual (raw) activations**. -Whilst the results are still useful, they're less realiable than using the raw values, and in the case of the cosine similarity, could be misleading if the tensor contains opposite vectors. +More information is available in https://github.com/ggml-org/llama.cpp/pull/14891 From 6fe51e12f1801ce11a7b0f2a53a2daf0caddfcfa Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sat, 9 Aug 2025 09:12:23 +0100 Subject: [PATCH 34/36] Fix typo in ECS formula --- tools/imatrix/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/imatrix/README.md b/tools/imatrix/README.md index bf13a2860509f..adbec2ed0097f 100644 --- a/tools/imatrix/README.md +++ b/tools/imatrix/README.md @@ -84,7 +84,7 @@ From version , `--show-statistics` operates in two modes: for GGUF (prefe * **Min / Max / μ / σ**: Tensor elements Min, Max, Mean, and Standard Deviation. * **N**: Number of tensor elements considered. * **H Norm**: Shannon Entropy normalized over log₂(N). Defined as $H Norm=\frac{-\sum_{i=1}^N p_i \log_2 p_i}{log_2 N}$. Used to determine how well a prompt "exercises" the model's capabilities. -* **H** *(legacy mode)* / **ECS** *(preferred)*: If legacy, Shannon Entropy defined as $H = -\sum_{i=1}^N p_i \log_2 p_i$. If preferred, *Euclidean-Cosine Score* defined as $ECS = K \cdot e^{-\alpha a} \cdot |b|^{\gamma}$ where `a = L₂ Norm`, `b = Cosine Similarity`, `α = -0.01`, `γ = 10` between this tensor’s elements and those of the previous layer. Higher score means more similarity and lower change. +* **H** *(legacy mode)* / **ECS** *(preferred)*: If legacy, Shannon Entropy defined as $H = -\sum_{i=1}^N p_i \log_2 p_i$. If preferred, *Euclidean-Cosine Score* defined as $ECS = K \cdot e^{-\alpha a} \cdot |b|^{\gamma}$ where `a = L₂ Norm`, `b = Cosine Similarity`, `α = 0.01`, `γ = 10` between this tensor’s elements and those of the previous layer. Higher score means more similarity and lower change. * **ZD**: % of elements whose Z-score is > 1.0 in magnitude (an indicator of outliers), as described in _3.1 Layer Importance Scores_ of [Layer-Wise Quantization](https://arxiv.org/abs/2406.17415) * **CosSim**: Cosine Similarity between this tensor’s elements and those of the previous layer. From dcac206f8e3931116956080b37c1e2aa809dff45 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sat, 9 Aug 2025 14:49:25 +0100 Subject: [PATCH 35/36] Add --activation-statistics logic to avoid doubling the imatrix size by default --- tools/imatrix/imatrix.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index bedbf586e4995..4d3a13cb2a03a 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -64,6 +64,7 @@ class IMatrixCollector { public: IMatrixCollector() = default; void set_params(common_params params) { m_params = std::move(params); } + bool activation_statistics() const { return m_params.activation_statistics; } bool collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data); void save_imatrix_legacy(int32_t ncall = -1) const; void save_imatrix(int32_t n_chunk = -1) const; @@ -429,9 +430,8 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * // broadcast, when loading an old imatrix e.counts.resize(n_as, e.counts[0]); } - // ToDo: find an efficient way to implement --activation-statistics to avoid doubling the imatrix size by default if (e.values.empty()) { - e.activations.resize(src1->ne[0]*n_as, 0); + if (activation_statistics()) e.activations.resize(src1->ne[0]*n_as, 0); e.values.resize(src1->ne[0]*n_as, 0); e.counts.resize(n_as, 0); } @@ -463,7 +463,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * e.counts[ex]++; for (int64_t j = 0; j < src1->ne[0]; ++j) { - e.activations[e_start + j] += x[j]; + if (activation_statistics()) e.activations[e_start + j] += x[j]; e.values[e_start + j] += x[j] * x[j]; if (!std::isfinite((float)e.values[e_start + j])) { LOG_ERR("%f detected in %s\n", (float)e.values[e_start + j], wname.c_str()); @@ -503,7 +503,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * } } if (e.values.empty()) { - e.activations.resize(src1->ne[0] * n_mat, 0); + if (activation_statistics()) e.activations.resize(src1->ne[0] * n_mat, 0); e.values.resize(src1->ne[0] * n_mat, 0); e.counts.resize(1, 0); } @@ -522,7 +522,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * for (int64_t row = 0; row < src1->ne[1]; ++row) { const float * x = (const float *) (data + row * src1->nb[1] + i2 * src1->nb[2] + i3 * src1->nb[3]); for (int64_t j = 0; j < src1->ne[0]; ++j) { - e.activations[mat_start + j] += x[j]; + if (activation_statistics()) e.activations[mat_start + j] += x[j]; e.values[mat_start + j] += x[j] * x[j]; if (!std::isfinite((float)e.values[j])) { LOG_ERR("%f detected in %s\n", (float)e.values[j], wname.c_str()); @@ -704,7 +704,7 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const { } to_store.push_back(kv.first); - data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.activations.size(), GGML_MEM_ALIGN); + if (activation_statistics()) data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.activations.size(), GGML_MEM_ALIGN); data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.values.size(), GGML_MEM_ALIGN); data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.counts.size(), GGML_MEM_ALIGN); } @@ -758,7 +758,7 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const { gguf_add_tensor(ctx_gguf, in_sum2); gguf_add_tensor(ctx_gguf, counts); - if (!stat.activations.empty()) { + if (!stat.activations.empty() && activation_statistics()) { const int32_t nact = (int32_t) stat.activations.size(); struct ggml_tensor * in_sum = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nact / nmat, nmat); ggml_format_name(in_sum, "%s.in_sum", name.c_str()); From 89051cda35532ddf9a43c8e7d9c4655b160181e4 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sat, 9 Aug 2025 14:49:44 +0100 Subject: [PATCH 36/36] Update README.md --- tools/imatrix/README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/imatrix/README.md b/tools/imatrix/README.md index adbec2ed0097f..46ee8d9968c89 100644 --- a/tools/imatrix/README.md +++ b/tools/imatrix/README.md @@ -10,7 +10,7 @@ More information is available in , `--show-statistics` operates in two modes: for GGUF (preferred) imatrices, it reports direct and accurate activation statistics, and for legacy (binary) files, it reports the less precise average squared activations. +Beginning with version , `--show-statistics` has two modes. If `--activation-statistics` was used at imatrix creation time and `--output-format` was set to `gguf`, it reports precise statistics. Otherwise, it reports less accurate, albeit still useful, metrics based on average squared activations. #### Per tensor