Skip to content

Commit d6ee6da

Browse files
committed
Update comments in repack.cpp
1 parent d45c9f0 commit d6ee6da

File tree

2 files changed

+8
-8
lines changed

2 files changed

+8
-8
lines changed

ggml/src/ggml-cpu/arch/x86/repack.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3506,7 +3506,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
35063506
// Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
35073507
for (int sb = 0; sb < QK_K / 128; sb++) {
35083508

3509-
// Load the eight block_q4_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
3509+
// Load the eight block_q2_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
35103510
const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + sb * 256));
35113511
const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 32 + sb * 256));
35123512
const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 64 + sb * 256));
@@ -4230,16 +4230,16 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
42304230
}
42314231
// For super block
42324232
for (int64_t b = 0; b < nb; b++) {
4233-
// Delta values - Load the sixteen scale values from two block_q4_kx8 structures
4233+
// Delta values - Load the sixteen scale values from two block_q2_kx8 structures
42344234
const __m512 col_scale_f32 = GGML_F32Cx8x2_LOAD(b_ptr_0[b].d, b_ptr_1[b].d);
42354235

4236-
// dmin values - Load the sixteen dmin values from two block_q4_kx8 structures
4236+
// dmin values - Load the sixteen dmin values from two block_q2_kx8 structures
42374237
const __m512 col_dmin_f32 = GGML_F32Cx8x2_LOAD(b_ptr_0[b].dmin, b_ptr_1[b].dmin);
42384238

42394239
// Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
42404240
for (int sb = 0; sb < QK_K / 128; sb++) {
42414241

4242-
// Load the eight block_q4_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
4242+
// Load the eight block_q2_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
42434243
const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + sb * 256));
42444244
const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 32 + sb * 256));
42454245
const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 64 + sb * 256));
@@ -4980,7 +4980,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
49804980
// Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
49814981
for (int sb = 0; sb < QK_K / 128; sb++) {
49824982

4983-
// Load the eight block_q4_K for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
4983+
// Load the eight block_q2_K for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
49844984
const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + sb * 256));
49854985
const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 32 + sb * 256));
49864986
const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 64 + sb * 256));
@@ -5654,7 +5654,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
56545654
// Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
56555655
for (int sb = 0; sb < QK_K / 128; sb++) {
56565656

5657-
// Load the eight block_q4_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
5657+
// Load the eight block_q2_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
56585658
const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + sb * 256));
56595659
const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 32 + sb * 256));
56605660
const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 64 + sb * 256));

ggml/src/ggml-cpu/repack.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,7 +1095,7 @@ static block_q2_Kx8 make_block_q2_Kx8(block_q2_K * in, unsigned int blck_size_in
10951095

10961096
const int end = QK_K * 2 / blck_size_interleave;
10971097

1098-
// Interleave Q4_K quants by taking 8 bytes at a time
1098+
// Interleave Q2_K quants by taking 8 bytes at a time
10991099
for (int i = 0; i < end; ++i) {
11001100
int src_id = i % 8;
11011101
int src_offset = (i / 8) * blck_size_interleave;
@@ -1106,7 +1106,7 @@ static block_q2_Kx8 make_block_q2_Kx8(block_q2_K * in, unsigned int blck_size_in
11061106
memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
11071107
}
11081108

1109-
// The below logic is designed so as to unapck and rearrange scales and mins values in Q2_K
1109+
// The below logic is designed so as to unpack and rearrange scales and mins values in Q2_K
11101110
// Currently the Q2_K structure has 16 scales and 16 mins packed in 16 bytes ( 4 bits for each value)
11111111
// The output Q2_Kx8 structure has 128 bytes for storing scales and mins
11121112
// Every 16 byte is packed such that it contains scales and mins for corresponding sub blocks from Q2_K structure

0 commit comments

Comments
 (0)