1
1
#include " cpy.hpp"
2
2
3
3
#include < float.h>
4
- #include < string>
5
4
6
5
#include " dequantize.hpp"
7
6
#include " ggml-sycl/common.hpp"
8
7
#include " ggml-sycl/presets.hpp"
9
8
#include " ggml.h"
10
9
11
- static __dpct_inline__ int best_index_int8 (int n, const int8_t * val, float x) {
12
- if (x <= val[0 ]) {
13
- return 0 ;
14
- }
15
- if (x >= val[n - 1 ]) {
16
- return n - 1 ;
17
- }
18
- int ml = 0 , mu = n - 1 ;
19
- while (mu - ml > 1 ) {
20
- int mav = (ml + mu) / 2 ;
21
- if (x < val[mav]) {
22
- mu = mav;
23
- } else {
24
- ml = mav;
25
- }
26
- }
27
- return x - val[mu - 1 ] < val[mu] - x ? mu - 1 : mu;
28
- }
29
10
30
11
static void cpy_1_f32_f32 (const char * cxi, char * cdsti) {
31
12
const float * xi = (const float *) cxi;
@@ -97,28 +78,6 @@ static void cpy_f32_f16(const char * cx, char * cdst, const int ne, const int ne
97
78
cpy_1 (cx + x_offset, cdst + dst_offset);
98
79
}
99
80
100
- static void cpy_blck_f32_q8_0 (const char * cxi, char * cdsti) {
101
- const float * xi = (const float *) cxi;
102
- block_q8_0 * dsti = (block_q8_0 *) cdsti;
103
-
104
- float amax = 0 .0f ; // absolute max
105
-
106
- for (int j = 0 ; j < QK8_0; j++) {
107
- const float v = xi[j];
108
- amax = sycl::fmax (amax, sycl::fabs ((float ) v));
109
- }
110
-
111
- const float d = amax / ((1 << 7 ) - 1 );
112
- const float id = d ? 1 .0f / d : 0 .0f ;
113
-
114
- dsti->d = d;
115
-
116
- for (int j = 0 ; j < QK8_0; ++j) {
117
- const float x0 = xi[j] * id;
118
-
119
- dsti->qs [j] = sycl::round ((float ) x0);
120
- }
121
- }
122
81
123
82
/* quantized type same copy */
124
83
template <typename T>
@@ -140,178 +99,7 @@ static void cpy_blck_q8_0_f32(const char * cxi, char * cdsti) {
140
99
}
141
100
}
142
101
143
- static void cpy_blck_f32_q4_0 (const char * cxi, char * cdsti) {
144
- const float * xi = (const float *) cxi;
145
- block_q4_0 * dsti = (block_q4_0 *) cdsti;
146
-
147
- float amax = 0 .0f ;
148
- float vmax = 0 .0f ;
149
-
150
- for (int j = 0 ; j < QK4_0; ++j) {
151
- const float v = xi[j];
152
- if (amax < sycl::fabs ((float ) v)) {
153
- amax = sycl::fabs ((float ) v);
154
- vmax = v;
155
- }
156
- }
157
-
158
- const float d = vmax / -8 ;
159
- const float id = d ? 1 .0f / d : 0 .0f ;
160
-
161
- dsti->d = d;
162
-
163
- for (int j = 0 ; j < QK4_0 / 2 ; ++j) {
164
- const float x0 = xi[0 + j] * id;
165
- const float x1 = xi[QK4_0 / 2 + j] * id;
166
-
167
- const uint8_t xi0 = dpct::min (15 , (int8_t ) (x0 + 8 .5f ));
168
- const uint8_t xi1 = dpct::min (15 , (int8_t ) (x1 + 8 .5f ));
169
-
170
- dsti->qs [j] = xi0;
171
- dsti->qs [j] |= xi1 << 4 ;
172
- }
173
- }
174
-
175
- static void cpy_blck_f32_q4_1 (const char * cxi, char * cdsti) {
176
- const float * xi = (const float *) cxi;
177
- block_q4_1 * dsti = (block_q4_1 *) cdsti;
178
-
179
- float vmin = FLT_MAX;
180
- float vmax = -FLT_MAX;
181
-
182
- for (int j = 0 ; j < QK4_1; ++j) {
183
- const float v = xi[j];
184
-
185
- if (v < vmin) {
186
- vmin = v;
187
- }
188
- if (v > vmax) {
189
- vmax = v;
190
- }
191
- }
192
-
193
- const float d = (vmax - vmin) / ((1 << 4 ) - 1 );
194
- const float id = d ? 1 .0f / d : 0 .0f ;
195
-
196
- dsti->dm .x () = d;
197
- dsti->dm .y () = vmin;
198
-
199
- for (int j = 0 ; j < QK4_1 / 2 ; ++j) {
200
- const float x0 = (xi[0 + j] - vmin) * id;
201
- const float x1 = (xi[QK4_1 / 2 + j] - vmin) * id;
202
-
203
- const uint8_t xi0 = dpct::min (15 , (int8_t ) (x0 + 0 .5f ));
204
- const uint8_t xi1 = dpct::min (15 , (int8_t ) (x1 + 0 .5f ));
205
102
206
- dsti->qs [j] = xi0;
207
- dsti->qs [j] |= xi1 << 4 ;
208
- }
209
- }
210
-
211
- static void cpy_blck_f32_q5_0 (const char * cxi, char * cdsti) {
212
- const float * xi = (const float *) cxi;
213
- block_q5_0 * dsti = (block_q5_0 *) cdsti;
214
-
215
- float amax = 0 .0f ;
216
- float vmax = 0 .0f ;
217
-
218
- for (int j = 0 ; j < QK5_0; ++j) {
219
- const float v = xi[j];
220
- if (amax < sycl::fabs ((float ) v)) {
221
- amax = sycl::fabs ((float ) v);
222
- vmax = v;
223
- }
224
- }
225
-
226
- const float d = vmax / -16 ;
227
- const float id = d ? 1 .0f / d : 0 .0f ;
228
-
229
- dsti->d = d;
230
-
231
- uint32_t qh = 0 ;
232
- for (int j = 0 ; j < QK5_0 / 2 ; ++j) {
233
- const float x0 = xi[0 + j] * id;
234
- const float x1 = xi[QK5_0 / 2 + j] * id;
235
-
236
- const uint8_t xi0 = dpct::min (31 , (int8_t ) (x0 + 16 .5f ));
237
- const uint8_t xi1 = dpct::min (31 , (int8_t ) (x1 + 16 .5f ));
238
-
239
- dsti->qs [j] = (xi0 & 0xf ) | ((xi1 & 0xf ) << 4 );
240
- qh |= ((xi0 & 0x10u ) >> 4 ) << (j + 0 );
241
- qh |= ((xi1 & 0x10u ) >> 4 ) << (j + QK5_0 / 2 );
242
- }
243
- memcpy (dsti->qh , &qh, sizeof (qh));
244
- }
245
-
246
- static void cpy_blck_f32_q5_1 (const char * cxi, char * cdsti) {
247
- const float * xi = (const float *) cxi;
248
- block_q5_1 * dsti = (block_q5_1 *) cdsti;
249
-
250
- float min = xi[0 ];
251
- float max = xi[0 ];
252
-
253
- for (int j = 1 ; j < QK5_1; ++j) {
254
- const float v = xi[j];
255
- min = v < min ? v : min;
256
- max = v > max ? v : max;
257
- }
258
-
259
- const float d = (max - min) / 31 ;
260
- const float id = d ? 1 .0f / d : 0 .0f ;
261
-
262
- dsti->dm .x () = d;
263
- dsti->dm .y () = min;
264
-
265
- uint32_t qh = 0 ;
266
- for (int j = 0 ; j < QK5_1 / 2 ; ++j) {
267
- const float x0 = (xi[0 + j] - min) * id;
268
- const float x1 = (xi[QK5_1 / 2 + j] - min) * id;
269
-
270
- const uint8_t xi0 = (uint8_t ) (x0 + 0 .5f );
271
- const uint8_t xi1 = (uint8_t ) (x1 + 0 .5f );
272
-
273
- dsti->qs [j] = (xi0 & 0xf ) | ((xi1 & 0xf ) << 4 );
274
- qh |= ((xi0 & 0x10u ) >> 4 ) << (j + 0 );
275
- qh |= ((xi1 & 0x10u ) >> 4 ) << (j + QK5_1 / 2 );
276
- }
277
- memcpy (dsti->qh , &qh, sizeof (qh));
278
- }
279
-
280
- static void cpy_blck_f32_iq4_nl (const char * cxi, char * cdsti) {
281
- const float * xi = (const float *) cxi;
282
- block_iq4_nl * dsti = (block_iq4_nl *) cdsti;
283
-
284
- float amax = 0 .0f ;
285
- float vmax = 0 .0f ;
286
-
287
- for (int j = 0 ; j < QK4_NL; ++j) {
288
- const float v = xi[j];
289
- if (amax < sycl::fabs ((float ) v)) {
290
- amax = sycl::fabs ((float ) v);
291
- vmax = v;
292
- }
293
- }
294
-
295
- float d = vmax / kvalues_iq4nl[0 ];
296
- const float id = d ? 1 .0f / d : 0 .0f ;
297
-
298
- float sumqx = 0 , sumq2 = 0 ;
299
- for (int j = 0 ; j < QK4_NL / 2 ; ++j) {
300
- const float x0 = xi[0 + j] * id;
301
- const float x1 = xi[QK4_NL / 2 + j] * id;
302
- const uint8_t xi0 = best_index_int8 (16 , kvalues_iq4nl, x0);
303
- const uint8_t xi1 = best_index_int8 (16 , kvalues_iq4nl, x1);
304
- dsti->qs [j] = xi0 | (xi1 << 4 );
305
- const float v0 = kvalues_iq4nl[xi0];
306
- const float v1 = kvalues_iq4nl[xi1];
307
- const float w0 = xi[0 + j] * xi[0 + j];
308
- const float w1 = xi[QK4_NL / 2 + j] * xi[QK4_NL / 2 + j];
309
- sumqx += w0 * v0 * xi[j] + w1 * v1 * xi[QK4_NL / 2 + j];
310
- sumq2 += w0 * v0 * v0 + w1 * v1 * v1;
311
- }
312
-
313
- dsti->d = sumq2 > 0 ? sumqx / sumq2 : d;
314
- }
315
103
316
104
template <dequantize_kernel_t dequant, int qk> static void cpy_blck_q_f32 (const char * cxi, char * cdsti) {
317
105
float * cdstf = (float *) (cdsti);
0 commit comments