@@ -425,6 +425,120 @@ namespace lsp
425425
426426 #undef EFF_HSLA_LIGHT_CORE
427427
428+ #define EFF_HSLA_ALPHA_CORE \
429+ /* v0 = v[0] */ \
430+ /* v1 = v[1] */ \
431+ /* v8 = h */ \
432+ /* v9 = s */ \
433+ /* v10 = l */ \
434+ /* v11 = a */ \
435+ /* v14 = 0 */ \
436+ /* v15 = 1 */ \
437+ __ASM_EMIT (" fsub v2.4s, v15.4s, v0.4s" ) /* v2 = 1 - v */ \
438+ __ASM_EMIT (" fsub v6.4s, v15.4s, v1.4s" ) \
439+ __ASM_EMIT (" fcmgt v4.4s, v14.4s, v0.4s" ) /* v4 = 0 > v */ \
440+ __ASM_EMIT (" fcmgt v5.4s, v14.4s, v1.4s" ) \
441+ __ASM_EMIT (" fadd v3.4s, v0.4s, v15.4s" ) /* v3 = v + 1 */ \
442+ __ASM_EMIT (" fadd v7.4s, v1.4s, v15.4s" ) \
443+ __ASM_EMIT (" bif v3.16b, v2.16b, v4.16b" ) /* v0 = V = (v+1)&[0>v] | (1-v)&[0<=v] */ \
444+ __ASM_EMIT (" bif v7.16b, v6.16b, v5.16b" ) \
445+ __ASM_EMIT (" mov v0.16b, v8.16b" ) \
446+ __ASM_EMIT (" mov v1.16b, v9.16b" ) \
447+ __ASM_EMIT (" mov v2.16b, v10.16b" ) \
448+ __ASM_EMIT (" mov v4.16b, v8.16b" ) \
449+ __ASM_EMIT (" mov v5.16b, v9.16b" ) \
450+ __ASM_EMIT (" mov v6.16b, v10.16b" )
451+
452+ /*
453+ value = v[i];
454+ value = (0.0f > value) ? 1.0f + value : 1.0f - value;
455+
456+ dst[0] = eff->h;
457+ dst[1] = eff->s;
458+ dst[2] = eff->l;
459+ dst[3] = value; // Fill alpha channel
460+ */
461+
462+ IF_ARCH_AARCH64
463+ (
464+ static const float EFF_HSLA_ALPHA_XC[] __lsp_aligned16 =
465+ {
466+ LSP_DSP_VEC4 (0 .0f ),
467+ LSP_DSP_VEC4 (1 .0f )
468+ };
469+ )
470+
471+ void eff_hsla_alpha (float *dst, const float *v, const dsp::hsla_alpha_eff_t *eff, size_t count)
472+ {
473+ ARCH_AARCH64_ASM
474+ (
475+ __ASM_EMIT (" ld4r {v8.4s, v9.4s, v10.4s, v11.4s}, [%[eff]]" ) /* v8 = h, v9 = s, v10 = l, v11 = a */
476+ __ASM_EMIT (" ldp q14, q15, [%[XC]]" ) /* v14 = 0.0, v15 = 1.0 */
477+
478+ // -----------------------------------------------------------------
479+ // 8x blocks
480+ __ASM_EMIT (" subs %[count], %[count], #8" )
481+ __ASM_EMIT (" b.lo 2f" )
482+ __ASM_EMIT (" 1:" )
483+ __ASM_EMIT (" ldp q0, q1, [%[src]]" ) /* v8 = v[0], v9 = v[1] */
484+ EFF_HSLA_ALPHA_CORE
485+ __ASM_EMIT (" subs %[count], %[count], #8" )
486+ __ASM_EMIT (" st4 {v0.4s, v1.4s, v2.4s, v3.4s}, [%[dst]]" )
487+ __ASM_EMIT (" add %[dst], %[dst], 0x40" )
488+ __ASM_EMIT (" add %[src], %[src], 0x20" )
489+ __ASM_EMIT (" st4 {v4.4s, v5.4s, v6.4s, v7.4s}, [%[dst]]" )
490+ __ASM_EMIT (" add %[dst], %[dst], 0x40" )
491+ __ASM_EMIT (" b.hs 1b" )
492+
493+ // -----------------------------------------------------------------
494+ // 1x-8x block
495+ __ASM_EMIT (" 2:" )
496+ __ASM_EMIT (" adds %[count], %[count], #8" )
497+ __ASM_EMIT (" b.ls 14f" )
498+ __ASM_EMIT (" tst %[count], #4" )
499+ __ASM_EMIT (" b.eq 4f" )
500+ __ASM_EMIT (" ldr q0, [%[src]]" )
501+ __ASM_EMIT (" add %[src], %[src], 0x10" )
502+ __ASM_EMIT (" 4:" )
503+ __ASM_EMIT (" tst %[count], #2" )
504+ __ASM_EMIT (" b.eq 6f" )
505+ __ASM_EMIT (" ld1 {v1.2s}, [%[src]]" )
506+ __ASM_EMIT (" add %[src], %[src], 0x08" )
507+ __ASM_EMIT (" 6:" )
508+ __ASM_EMIT (" tst %[count], #1" )
509+ __ASM_EMIT (" b.eq 8f" )
510+ __ASM_EMIT (" ld1 {v1.s}[2], [%[src]]" )
511+ __ASM_EMIT (" 8:" )
512+ EFF_HSLA_ALPHA_CORE
513+ __ASM_EMIT (" tst %[count], #4" )
514+ __ASM_EMIT (" b.eq 10f" )
515+ __ASM_EMIT (" st4 {v0.4s, v1.4s, v2.4s, v3.4s}, [%[dst]]" )
516+ __ASM_EMIT (" add %[dst], %[dst], 0x40" )
517+ __ASM_EMIT (" 10:" )
518+ __ASM_EMIT (" tst %[count], #2" )
519+ __ASM_EMIT (" b.eq 12f" )
520+ __ASM_EMIT (" st4 {v4.2s, v5.2s, v6.2s, v7.2s}, [%[dst]]" )
521+ __ASM_EMIT (" add %[dst], %[dst], 0x20" )
522+ __ASM_EMIT (" 12:" )
523+ __ASM_EMIT (" tst %[count], #1" )
524+ __ASM_EMIT (" b.eq 14f" )
525+ __ASM_EMIT (" st4 {v4.s, v5.s, v6.s, v7.s}[2], [%[dst]]" )
526+ // End
527+ __ASM_EMIT (" 14:" )
528+
529+ : [dst] " +r" (dst), [src] " +r" (v), [count] " +r" (count)
530+ : [eff] " r" (eff),
531+ [XC] " r" (&EFF_HSLA_ALPHA_XC[0 ])
532+ : " cc" , " memory" ,
533+ " v0" , " v1" , " v2" , " v3" ,
534+ " v4" , " v5" , " v6" , " v7" ,
535+ " v8" , " v9" , " v10" , " v11" ,
536+ " v12" , " v13" , " v14" , " v15"
537+ );
538+ }
539+
540+ #undef EFF_HSLA_ALPHA_CORE
541+
428542 } /* namespace asimd */
429543} /* namespace lsp */
430544
0 commit comments