@@ -3506,7 +3506,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
3506
3506
// Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
3507
3507
for (int sb = 0; sb < QK_K / 128; sb++) {
3508
3508
3509
- // Load the eight block_q4_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
3509
+ // Load the eight block_q2_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
3510
3510
const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + sb * 256));
3511
3511
const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 32 + sb * 256));
3512
3512
const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 64 + sb * 256));
@@ -4230,16 +4230,16 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
4230
4230
}
4231
4231
// For super block
4232
4232
for (int64_t b = 0; b < nb; b++) {
4233
- // Delta values - Load the sixteen scale values from two block_q4_kx8 structures
4233
+ // Delta values - Load the sixteen scale values from two block_q2_kx8 structures
4234
4234
const __m512 col_scale_f32 = GGML_F32Cx8x2_LOAD(b_ptr_0[b].d, b_ptr_1[b].d);
4235
4235
4236
- // dmin values - Load the sixteen dmin values from two block_q4_kx8 structures
4236
+ // dmin values - Load the sixteen dmin values from two block_q2_kx8 structures
4237
4237
const __m512 col_dmin_f32 = GGML_F32Cx8x2_LOAD(b_ptr_0[b].dmin, b_ptr_1[b].dmin);
4238
4238
4239
4239
// Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
4240
4240
for (int sb = 0; sb < QK_K / 128; sb++) {
4241
4241
4242
- // Load the eight block_q4_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
4242
+ // Load the eight block_q2_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
4243
4243
const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + sb * 256));
4244
4244
const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 32 + sb * 256));
4245
4245
const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i * )(b_ptr_0[b].qs + 64 + sb * 256));
@@ -4980,7 +4980,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
4980
4980
// Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
4981
4981
for (int sb = 0; sb < QK_K / 128; sb++) {
4982
4982
4983
- // Load the eight block_q4_K for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
4983
+ // Load the eight block_q2_K for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
4984
4984
const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + sb * 256));
4985
4985
const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 32 + sb * 256));
4986
4986
const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 64 + sb * 256));
@@ -5654,7 +5654,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
5654
5654
// Loop to iterate over the sixteen sub blocks of a super block - eight sub blocks are processed per iteration
5655
5655
for (int sb = 0; sb < QK_K / 128; sb++) {
5656
5656
5657
- // Load the eight block_q4_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
5657
+ // Load the eight block_q2_k for eight sub blocks quantized values interleaved with each other in chunks of eight bytes - B0,B1 ....B6,B7
5658
5658
const __m256i rhs_raw_mat_0123_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + sb * 256));
5659
5659
const __m256i rhs_raw_mat_4567_0 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 32 + sb * 256));
5660
5660
const __m256i rhs_raw_mat_0123_1 = _mm256_loadu_si256((const __m256i *)(b_ptr[b].qs + 64 + sb * 256));
0 commit comments