Update comments in repack.cpp

Manogna-Sree · Manogna-Sree · commit d6ee6da5e12b · 2025-07-30T22:43:19.000-07:00
diff --git a/ggml/src/ggml-cpu/arch/x86/repack.cpp b/ggml/src/ggml-cpu/arch/x86/repack.cpp
@@ -3506,7 +3506,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
                 // Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
                 for (int sb = 0; sb < QK_K / 128; sb++) {
 
-                    // Load the eight block_q4_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
+                    // Load the eight block_q2_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
                     const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + sb * 256));
                     const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 32 + sb * 256));
                     const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 64 + sb * 256));
@@ -4230,16 +4230,16 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
             }
             // For super block
             for (int64_t b = 0; b < nb; b++) {
-                // Delta values - Load the sixteen scale values from two block_q4_kx8 structures
+                // Delta values - Load the sixteen scale values from two block_q2_kx8 structures
                 const __m512 col_scale_f32 = GGML_F32Cx8x2_LOAD(b_ptr_0[b].d, b_ptr_1[b].d);
 
-                // dmin values - Load the sixteen dmin values from two block_q4_kx8 structures
+                // dmin values - Load the sixteen dmin values from two block_q2_kx8 structures
                 const __m512 col_dmin_f32 = GGML_F32Cx8x2_LOAD(b_ptr_0[b].dmin, b_ptr_1[b].dmin);
 
                 // Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
                 for (int sb = 0; sb < QK_K / 128; sb++) {
 
-                    // Load the eight block_q4_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
+                    // Load the eight block_q2_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
                     const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + sb * 256));
                     const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 32 + sb * 256));
                     const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 64 + sb * 256));
@@ -4980,7 +4980,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
                 // Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
                 for (int sb = 0; sb < QK_K / 128; sb++) {
 
-                    // Load the eight block_q4_K for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
+                    // Load the eight block_q2_K for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
                     const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + sb * 256));
                     const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 32 + sb * 256));
                     const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 64 + sb * 256));
@@ -5654,7 +5654,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
                 // Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
                 for (int sb = 0; sb < QK_K / 128; sb++) {
 
-                    // Load the eight block_q4_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
+                    // Load the eight block_q2_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
                     const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + sb * 256));
                     const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 32 + sb * 256));
                     const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 64 + sb * 256));
diff --git a/ggml/src/ggml-cpu/repack.cpp b/ggml/src/ggml-cpu/repack.cpp
@@ -1095,7 +1095,7 @@ static block_q2_Kx8 make_block_q2_Kx8(block_q2_K * in, unsigned int blck_size_in
 
     const int end = QK_K * 2 / blck_size_interleave;
 
-    // Interleave Q4_K quants by taking 8 bytes at a time
+    // Interleave Q2_K quants by taking 8 bytes at a time
     for (int i = 0; i < end; ++i) {
         int src_id = i % 8;
         int src_offset = (i / 8) * blck_size_interleave;
@@ -1106,7 +1106,7 @@ static block_q2_Kx8 make_block_q2_Kx8(block_q2_K * in, unsigned int blck_size_in
         memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
     }
 
-    // The below logic is designed so as to unapck and rearrange scales and mins values in Q2_K
+    // The below logic is designed so as to unpack and rearrange scales and mins values in Q2_K
     // Currently the Q2_K structure has 16 scales and 16 mins packed in 16 bytes ( 4 bits for each value)
     // The output Q2_Kx8 structure has 128 bytes for storing scales and mins
     // Every 16 byte is packed such that it contains scales and mins for corresponding sub blocks from Q2_K structure