|
59 | 59 |
|
60 | 60 | // The mean could be calculated by simply dividing the sum by |
61 | 61 | // the number of elements, but that method is numerically unstable |
62 | | -#define RUN_MEAN1(type, array, results, r, ss)\ |
| 62 | +#define RUN_MEAN1(type, array, rarray, ss)\ |
63 | 63 | ({\ |
64 | | - mp_float_t M, m;\ |
65 | | - M = m = (mp_float_t)(*(type *)(array));\ |
66 | | - for(size_t i=1; i < (ss).shape[0]; i++) {\ |
67 | | - (array) += (ss).strides[0];\ |
| 64 | + mp_float_t M = 0.0;\ |
| 65 | + for(size_t i=0; i < (ss).shape[0]; i++) {\ |
68 | 66 | mp_float_t value = (mp_float_t)(*(type *)(array));\ |
69 | | - m = M + (value - M) / (mp_float_t)(i+1);\ |
70 | | - M = m;\ |
| 67 | + M = M + (value - M) / (mp_float_t)(i+1);\ |
| 68 | + (array) += (ss).strides[0];\ |
71 | 69 | }\ |
72 | | - (array) += (ss).strides[0];\ |
73 | | - *(r)++ = M;\ |
| 70 | + *(rarray)++ = M;\ |
74 | 71 | }) |
75 | 72 |
|
76 | 73 | // Instead of the straightforward implementation of the definition, |
77 | 74 | // we take the numerically stable Welford algorithm here |
78 | 75 | // https://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/ |
79 | | -#define RUN_STD1(type, array, results, r, ss, div)\ |
| 76 | +#define RUN_STD1(type, array, rarray, ss, div)\ |
80 | 77 | ({\ |
81 | | - mp_float_t M = 0.0, m = 0.0, S = 0.0, s = 0.0;\ |
| 78 | + mp_float_t M = 0.0, m = 0.0, S = 0.0;\ |
82 | 79 | for(size_t i=0; i < (ss).shape[0]; i++) {\ |
83 | 80 | mp_float_t value = (mp_float_t)(*(type *)(array));\ |
84 | 81 | m = M + (value - M) / (mp_float_t)(i+1);\ |
85 | | - s = S + (value - M) * (value - m);\ |
| 82 | + S = S + (value - M) * (value - m);\ |
| 83 | + M = m;\ |
| 84 | + (array) += (ss).strides[0];\ |
| 85 | + }\ |
| 86 | + *(rarray)++ = MICROPY_FLOAT_C_FUN(sqrt)(S / (div));\ |
| 87 | +}) |
| 88 | + |
| 89 | +#define RUN_MEAN_STD1(type, array, rarray, ss, div, isStd)\ |
| 90 | +({\ |
| 91 | + mp_float_t M = 0.0, m = 0.0, S = 0.0;\ |
| 92 | + for(size_t i=0; i < (ss).shape[0]; i++) {\ |
| 93 | + mp_float_t value = (mp_float_t)(*(type *)(array));\ |
| 94 | + m = M + (value - M) / (mp_float_t)(i+1);\ |
| 95 | + if(isStd) {\ |
| 96 | + S += (value - M) * (value - m);\ |
| 97 | + }\ |
86 | 98 | M = m;\ |
87 | | - S = s;\ |
88 | 99 | (array) += (ss).strides[0];\ |
89 | 100 | }\ |
90 | | - *(r)++ = MICROPY_FLOAT_C_FUN(sqrt)(s / (div));\ |
| 101 | + *(rarray)++ = isStd ? MICROPY_FLOAT_C_FUN(sqrt)(S / (div)) : M;\ |
91 | 102 | }) |
92 | 103 |
|
93 | 104 | #define RUN_DIFF1(ndarray, type, array, results, rarray, index, stencil, N)\ |
|
181 | 192 | RUN_SUM1(type, (array), (results), (rarray), (ss));\ |
182 | 193 | } while(0) |
183 | 194 |
|
184 | | -#define RUN_MEAN(type, array, results, r, ss) do {\ |
185 | | - RUN_MEAN1(type, (array), (results), (r), (ss));\ |
| 195 | +#define RUN_MEAN(type, array, rarray, ss) do {\ |
| 196 | + RUN_MEAN1(type, (array), (rarray), (ss));\ |
186 | 197 | } while(0) |
187 | 198 |
|
188 | | -#define RUN_STD(type, array, results, r, ss, div) do {\ |
189 | | - RUN_STD1(type, (array), (results), (r), (ss), (div));\ |
| 199 | +#define RUN_STD(type, array, rarray, ss, div) do {\ |
| 200 | + RUN_STD1(type, (array), (results), (rarray), (ss), (div));\ |
| 201 | +} while(0) |
| 202 | + |
| 203 | +#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\ |
| 204 | + RUN_MEAN_STD1(type, (array), (results), (rarray), (ss), (div), (isStd));\ |
190 | 205 | } while(0) |
191 | 206 |
|
192 | 207 | #define RUN_ARGMIN(ndarray, type, array, results, rarray, shape, strides, index, op) do {\ |
|
218 | 233 | } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\ |
219 | 234 | } while(0) |
220 | 235 |
|
221 | | -#define RUN_MEAN(type, array, results, r, ss) do {\ |
| 236 | +#define RUN_MEAN(type, array, rarray, ss) do {\ |
222 | 237 | size_t l = 0;\ |
223 | 238 | do {\ |
224 | | - RUN_MEAN1(type, (array), (results), (r), (ss));\ |
| 239 | + RUN_MEAN1(type, (array), (rarray), (ss));\ |
225 | 240 | (array) -= (ss).strides[0] * (ss).shape[0];\ |
226 | 241 | (array) += (ss).strides[ULAB_MAX_DIMS - 1];\ |
227 | 242 | l++;\ |
228 | 243 | } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\ |
229 | 244 | } while(0) |
230 | 245 |
|
231 | | -#define RUN_STD(type, array, results, r, ss, div) do {\ |
| 246 | +#define RUN_STD(type, array, rarray, ss, div) do {\ |
232 | 247 | size_t l = 0;\ |
233 | 248 | do {\ |
234 | | - RUN_STD1(type, (array), (results), (r), (ss), (div));\ |
| 249 | + RUN_STD1(type, (array), (rarray), (ss), (div));\ |
235 | 250 | (array) -= (ss).strides[0] * (ss).shape[0];\ |
236 | 251 | (array) += (ss).strides[ULAB_MAX_DIMS - 1];\ |
237 | 252 | l++;\ |
238 | 253 | } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\ |
239 | 254 | } while(0) |
240 | 255 |
|
| 256 | +#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\ |
| 257 | + size_t l = 0;\ |
| 258 | + do {\ |
| 259 | + RUN_MEAN_STD1(type, (array), (rarray), (ss), (div), (isStd));\ |
| 260 | + (array) -= (ss).strides[0] * (ss).shape[0];\ |
| 261 | + (array) += (ss).strides[ULAB_MAX_DIMS - 1];\ |
| 262 | + l++;\ |
| 263 | + } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\ |
| 264 | +} while(0) |
| 265 | + |
| 266 | + |
241 | 267 | #define RUN_ARGMIN(ndarray, type, array, results, rarray, shape, strides, index, op) do {\ |
242 | 268 | size_t l = 0;\ |
243 | 269 | do {\ |
|
298 | 324 | } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\ |
299 | 325 | } while(0) |
300 | 326 |
|
301 | | -#define RUN_MEAN(type, array, results, r, ss) do {\ |
| 327 | +#define RUN_MEAN(type, array, rarray, ss) do {\ |
| 328 | + size_t k = 0;\ |
| 329 | + do {\ |
| 330 | + size_t l = 0;\ |
| 331 | + do {\ |
| 332 | + RUN_MEAN1(type, (array), (rarray), (ss));\ |
| 333 | + (array) -= (ss).strides[0] * (ss).shape[0];\ |
| 334 | + (array) += (ss).strides[ULAB_MAX_DIMS - 1];\ |
| 335 | + l++;\ |
| 336 | + } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\ |
| 337 | + (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS-1];\ |
| 338 | + (array) += (ss).strides[ULAB_MAX_DIMS - 2];\ |
| 339 | + k++;\ |
| 340 | + } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\ |
| 341 | +} while(0) |
| 342 | + |
| 343 | +#define RUN_STD(type, array, rarray, ss, div) do {\ |
302 | 344 | size_t k = 0;\ |
303 | 345 | do {\ |
304 | 346 | size_t l = 0;\ |
305 | 347 | do {\ |
306 | | - RUN_MEAN1(type, (array), (results), (r), (ss));\ |
| 348 | + RUN_STD1(type, (array), (rarray), (ss), (div));\ |
307 | 349 | (array) -= (ss).strides[0] * (ss).shape[0];\ |
308 | 350 | (array) += (ss).strides[ULAB_MAX_DIMS - 1];\ |
309 | 351 | l++;\ |
|
314 | 356 | } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\ |
315 | 357 | } while(0) |
316 | 358 |
|
317 | | -#define RUN_STD(type, array, results, r, ss, div) do {\ |
| 359 | +#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\ |
318 | 360 | size_t k = 0;\ |
319 | 361 | do {\ |
320 | 362 | size_t l = 0;\ |
321 | 363 | do {\ |
322 | | - RUN_STD1(type, (array), (results), (r), (ss), (div));\ |
| 364 | + RUN_MEAN_STD1(type, (array), (rarray), (ss), (div), (isStd));\ |
323 | 365 | (array) -= (ss).strides[0] * (ss).shape[0];\ |
324 | 366 | (array) += (ss).strides[ULAB_MAX_DIMS - 1];\ |
325 | 367 | l++;\ |
|
424 | 466 | } while(j < (ss).shape[ULAB_MAX_DIMS - 3]);\ |
425 | 467 | } while(0) |
426 | 468 |
|
427 | | -#define RUN_MEAN(type, array, results, r, ss) do {\ |
| 469 | +#define RUN_MEAN(type, array, rarray, ss) do {\ |
| 470 | + size_t j = 0;\ |
| 471 | + do {\ |
| 472 | + size_t k = 0;\ |
| 473 | + do {\ |
| 474 | + size_t l = 0;\ |
| 475 | + do {\ |
| 476 | + RUN_MEAN1(type, (array), (rarray), (ss));\ |
| 477 | + (array) -= (ss).strides[0] * (ss).shape[0];\ |
| 478 | + (array) += (ss).strides[ULAB_MAX_DIMS - 1];\ |
| 479 | + l++;\ |
| 480 | + } while(l < (ss).shape[ULAB_MAX_DIMS - 1]);\ |
| 481 | + (array) -= (ss).strides[ULAB_MAX_DIMS - 1] * (ss).shape[ULAB_MAX_DIMS-1];\ |
| 482 | + (array) += (ss).strides[ULAB_MAX_DIMS - 2];\ |
| 483 | + k++;\ |
| 484 | + } while(k < (ss).shape[ULAB_MAX_DIMS - 2]);\ |
| 485 | + (array) -= (ss).strides[ULAB_MAX_DIMS - 2] * (ss).shape[ULAB_MAX_DIMS-2];\ |
| 486 | + (array) += (ss).strides[ULAB_MAX_DIMS - 3];\ |
| 487 | + j++;\ |
| 488 | + } while(j < (ss).shape[ULAB_MAX_DIMS - 3]);\ |
| 489 | +} while(0) |
| 490 | + |
| 491 | +#define RUN_STD(type, array, rarray, ss, div) do {\ |
428 | 492 | size_t j = 0;\ |
429 | 493 | do {\ |
430 | 494 | size_t k = 0;\ |
431 | 495 | do {\ |
432 | 496 | size_t l = 0;\ |
433 | 497 | do {\ |
434 | | - RUN_MEAN1(type, (array), (results), (r), (ss));\ |
| 498 | + RUN_STD1(type, (array), (rarray), (ss), (div));\ |
435 | 499 | (array) -= (ss).strides[0] * (ss).shape[0];\ |
436 | 500 | (array) += (ss).strides[ULAB_MAX_DIMS - 1];\ |
437 | 501 | l++;\ |
|
446 | 510 | } while(j < (ss).shape[ULAB_MAX_DIMS - 3]);\ |
447 | 511 | } while(0) |
448 | 512 |
|
449 | | -#define RUN_STD(type, array, results, r, ss, div) do {\ |
| 513 | +#define RUN_MEAN_STD(type, array, rarray, ss, div, isStd) do {\ |
450 | 514 | size_t j = 0;\ |
451 | 515 | do {\ |
452 | 516 | size_t k = 0;\ |
453 | 517 | do {\ |
454 | 518 | size_t l = 0;\ |
455 | 519 | do {\ |
456 | | - RUN_STD1(type, (array), (results), (r), (ss), (div));\ |
| 520 | + RUN_MEAN_STD1(type, (array), (rarray), (ss), (div), (isStd));\ |
457 | 521 | (array) -= (ss).strides[0] * (ss).shape[0];\ |
458 | 522 | (array) += (ss).strides[ULAB_MAX_DIMS - 1];\ |
459 | 523 | l++;\ |
|
0 commit comments