From 94ed04d39f2838b9fd9e20e160e290cff3c3d908 Mon Sep 17 00:00:00 2001 From: Seppo Ingalsuo Date: Mon, 10 Mar 2025 18:11:20 +0200 Subject: [PATCH 1/3] Audio: Volume: Remove unnecessary clean of HiFi input align Only the output align register needs to be cleared. The priming of input stream with e.g. AE_ZALIGN128() fully initializes it. Signed-off-by: Seppo Ingalsuo --- src/audio/volume/volume_hifi3.c | 12 ++++++------ src/audio/volume/volume_hifi4.c | 12 ++++++------ src/audio/volume/volume_hifi4_with_peakvol.c | 12 ++++++------ src/audio/volume/volume_hifi5.c | 12 ++++++------ src/audio/volume/volume_hifi5_with_peakvol.c | 12 ++++++------ 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/audio/volume/volume_hifi3.c b/src/audio/volume/volume_hifi3.c index 5f3d7f2275f3..66704b5f69c5 100644 --- a/src/audio/volume/volume_hifi3.c +++ b/src/audio/volume/volume_hifi3.c @@ -70,7 +70,7 @@ static void vol_s24_to_s24_s32(struct processing_module *mod, struct input_strea ae_f32x2 *buf_end; int i, n, m; ae_f32x2 *vol; - ae_valign inu = AE_ZALIGN64(); + ae_valign inu; ae_valign outu = AE_ZALIGN64(); ae_f32x2 *in = (ae_f32x2 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) + bsource->consumed); @@ -151,7 +151,7 @@ static void vol_passthrough_s24_to_s24_s32(struct processing_module *mod, struct audio_stream *sink = bsink->data; ae_f32x2 in_sample = AE_ZERO32(); int i, n, m; - ae_valign inu = AE_ZALIGN64(); + ae_valign inu; ae_valign outu = AE_ZALIGN64(); ae_f32x2 *in = (ae_f32x2 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) + bsource->consumed); @@ -209,7 +209,7 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea ae_f32x2 *buf; ae_f32x2 *buf_end; ae_f32x2 *vol; - ae_valign inu = AE_ZALIGN64(); + ae_valign inu; ae_valign outu = AE_ZALIGN64(); const int channels_count = audio_stream_get_channels(sink); const int inc = sizeof(ae_f32x2); @@ -293,7 +293,7 @@ static void vol_passthrough_s32_to_s24_s32(struct processing_module *mod, struct audio_stream *sink = bsink->data; ae_f32x2 in_sample = AE_ZERO32(); int i, n, m; - ae_valign inu = AE_ZALIGN64(); + ae_valign inu; ae_valign outu = AE_ZALIGN64(); const int channels_count = audio_stream_get_channels(sink); int samples = channels_count * frames; @@ -351,7 +351,7 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu ae_f32x2 *buf; ae_f32x2 *buf_end; ae_f32x2 *vol; - ae_valign inu = AE_ZALIGN64(); + ae_valign inu; ae_valign outu = AE_ZALIGN64(); ae_f16x4 *in = (ae_f16x4 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) + bsource->consumed); @@ -440,7 +440,7 @@ static void vol_passthrough_s16_to_s16(struct processing_module *mod, struct audio_stream *sink = bsink->data; ae_f16x4 in_sample = AE_ZERO16(); int i, n, m; - ae_valign inu = AE_ZALIGN64(); + ae_valign inu; ae_valign outu = AE_ZALIGN64(); ae_f16x4 *in = (ae_f16x4 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) + bsource->consumed); diff --git a/src/audio/volume/volume_hifi4.c b/src/audio/volume/volume_hifi4.c index 6acecb2fb9b5..915255c40738 100644 --- a/src/audio/volume/volume_hifi4.c +++ b/src/audio/volume/volume_hifi4.c @@ -70,7 +70,7 @@ static void vol_s24_to_s24_s32(struct processing_module *mod, struct input_strea ae_f32x2 *buf_end; int i, n, m; ae_f32x2 *vol; - ae_valign inu = AE_ZALIGN64(); + ae_valign inu; ae_valign outu = AE_ZALIGN64(); ae_f32x2 *in = (ae_f32x2 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) + bsource->consumed); @@ -151,7 +151,7 @@ static void vol_passthrough_s24_to_s24_s32(struct processing_module *mod, struct audio_stream *sink = bsink->data; ae_f32x2 in_sample = AE_ZERO32(); int i, n, m; - ae_valign inu = AE_ZALIGN64(); + ae_valign inu; ae_valign outu = AE_ZALIGN64(); ae_f32x2 *in = (ae_f32x2 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) + bsource->consumed); @@ -209,7 +209,7 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea ae_f32x2 *buf; ae_f32x2 *buf_end; ae_f32x2 *vol; - ae_valign inu = AE_ZALIGN64(); + ae_valign inu; ae_valign outu = AE_ZALIGN64(); const int channels_count = audio_stream_get_channels(sink); const int inc = sizeof(ae_f32x2); @@ -293,7 +293,7 @@ static void vol_passthrough_s32_to_s24_s32(struct processing_module *mod, struct audio_stream *sink = bsink->data; ae_f32x2 in_sample = AE_ZERO32(); int i, n, m; - ae_valign inu = AE_ZALIGN64(); + ae_valign inu; ae_valign outu = AE_ZALIGN64(); const int channels_count = audio_stream_get_channels(sink); int samples = channels_count * frames; @@ -350,7 +350,7 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu ae_f32x2 *buf; ae_f32x2 *buf_end; ae_f32x2 *vol; - ae_valign inu = AE_ZALIGN64(); + ae_valign inu; ae_valign outu = AE_ZALIGN64(); ae_f16x4 *in = (ae_f16x4 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) + bsource->consumed); @@ -461,7 +461,7 @@ static void vol_passthrough_s16_to_s16(struct processing_module *mod, struct audio_stream *sink = bsink->data; ae_f16x4 in_sample = AE_ZERO16(); int i, n, m, left; - ae_valign inu = AE_ZALIGN64(); + ae_valign inu; ae_valign outu = AE_ZALIGN64(); ae_f16x4 *in = (ae_f16x4 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) + bsource->consumed); diff --git a/src/audio/volume/volume_hifi4_with_peakvol.c b/src/audio/volume/volume_hifi4_with_peakvol.c index 75e95d2e7dca..b35fc0b76620 100644 --- a/src/audio/volume/volume_hifi4_with_peakvol.c +++ b/src/audio/volume/volume_hifi4_with_peakvol.c @@ -61,7 +61,7 @@ static void vol_s24_to_s24_s32(struct processing_module *mod, struct input_strea ae_f32x2 volume = AE_ZERO32(); int i, n, m; ae_f32x2 *vol; - ae_valign inu = AE_ZALIGN64(); + ae_valign inu; ae_valign outu = AE_ZALIGN64(); ae_f32x2 *in = (ae_f32x2 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) + bsource->consumed); @@ -155,7 +155,7 @@ static void vol_passthrough_s24_to_s24_s32(struct processing_module *mod, ae_f32x2 in_sample = AE_ZERO32(); int i, n, m; - ae_valign inu = AE_ZALIGN64(); + ae_valign inu; ae_valign outu = AE_ZALIGN64(); ae_f32x2 *in = (ae_f32x2 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) + bsource->consumed); @@ -228,7 +228,7 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea ae_f32x2 *buf; ae_f32x2 *buf_end; ae_f32x2 *vol; - ae_valign inu = AE_ZALIGN64(); + ae_valign inu; ae_valign outu = AE_ZALIGN64(); const int channels_count = audio_stream_get_channels(sink); const int inc = sizeof(ae_f32x2); @@ -325,7 +325,7 @@ static void vol_passthrough_s32_to_s24_s32(struct processing_module *mod, struct audio_stream *sink = bsink->data; ae_f32x2 in_sample = AE_ZERO32(); int i, n, m; - ae_valign inu = AE_ZALIGN64(); + ae_valign inu; ae_valign outu = AE_ZALIGN64(); const int channels_count = audio_stream_get_channels(sink); const int inc = sizeof(ae_f32x2); @@ -397,7 +397,7 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu ae_f32x2 *buf; ae_f32x2 *buf_end; ae_f32x2 *vol; - ae_valign inu = AE_ZALIGN64(); + ae_valign inu; ae_valign outu = AE_ZALIGN64(); ae_f16x4 *in = (ae_f16x4 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) + bsource->consumed); @@ -531,7 +531,7 @@ static void vol_passthrough_s16_to_s16(struct processing_module *mod, struct audio_stream *sink = bsink->data; ae_f16x4 in_sample = AE_ZERO16(); int i, n, m, left; - ae_valign inu = AE_ZALIGN64(); + ae_valign inu; ae_valign outu = AE_ZALIGN64(); ae_f16x4 *in = (ae_f16x4 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) + bsource->consumed); diff --git a/src/audio/volume/volume_hifi5.c b/src/audio/volume/volume_hifi5.c index 33ffc691bce5..f0a20a3388b5 100644 --- a/src/audio/volume/volume_hifi5.c +++ b/src/audio/volume/volume_hifi5.c @@ -69,7 +69,7 @@ static void vol_s24_to_s24_s32(struct processing_module *mod, struct input_strea ae_int32x4 *buf_end; int i, n, m; ae_int32x4 *vol; - ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 inu; ae_valignx2 outu = AE_ZALIGN128(); ae_int32x4 *in = (ae_int32x4 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) @@ -157,7 +157,7 @@ static void vol_passthrough_s24_to_s24_s32(struct processing_module *mod, struct audio_stream *sink = bsink->data; ae_int32x2 in_sample, in_sample1; int i, n, m; - ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 inu; ae_valignx2 outu = AE_ZALIGN128(); ae_int32x4 *in = (ae_int32x4 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) @@ -217,7 +217,7 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea ae_int32x4 *buf; ae_int32x4 *buf_end; ae_int32x4 *vol; - ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 inu; ae_valignx2 outu = AE_ZALIGN128(); const int channels_count = audio_stream_get_channels(sink); const int inc = sizeof(ae_int32x4); @@ -315,7 +315,7 @@ static void vol_passthrough_s32_to_s24_s32(struct processing_module *mod, struct audio_stream *sink = bsink->data; ae_int32x2 in_sample, in_sample1; int i, n, m; - ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 inu; ae_valignx2 outu = AE_ZALIGN128(); const int channels_count = audio_stream_get_channels(sink); int samples = channels_count * frames; @@ -372,7 +372,7 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu ae_int32x4 *buf; ae_int32x4 *buf_end; ae_int32x4 *vol; - ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 inu; ae_valignx2 outu = AE_ZALIGN128(); ae_int16x8 *in = (ae_int16x8 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) @@ -467,7 +467,7 @@ static void vol_passthrough_s16_to_s16(struct processing_module *mod, struct audio_stream *sink = bsink->data; ae_int16x4 in_sample, in_sample1; int i, n, m; - ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 inu; ae_valignx2 outu = AE_ZALIGN128(); ae_int16x8 *in = (ae_int16x8 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) diff --git a/src/audio/volume/volume_hifi5_with_peakvol.c b/src/audio/volume/volume_hifi5_with_peakvol.c index d05838586136..8d46e64f3b2e 100644 --- a/src/audio/volume/volume_hifi5_with_peakvol.c +++ b/src/audio/volume/volume_hifi5_with_peakvol.c @@ -60,7 +60,7 @@ static void vol_s24_to_s24_s32(struct processing_module *mod, struct input_strea ae_int32x2 volume, volume1; int i, n, m; ae_int32x4 *vol; - ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 inu; ae_valignx2 outu = AE_ZALIGN128(); ae_int32x4 *in = (ae_int32x4 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) @@ -165,7 +165,7 @@ static void vol_passthrough_s24_to_s24_s32(struct processing_module *mod, struct audio_stream *sink = bsink->data; ae_int32x2 in_sample, in_sample1; int i, n, m; - ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 inu; ae_valignx2 outu = AE_ZALIGN128(); ae_int32x4 *in = (ae_int32x4 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) @@ -243,7 +243,7 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea ae_int32x4 *buf; ae_int32x4 *buf_end; ae_int32x4 *vol; - ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 inu; ae_valignx2 outu = AE_ZALIGN128(); const int channels_count = audio_stream_get_channels(sink); const int inc = sizeof(ae_int32x4); @@ -358,7 +358,7 @@ static void vol_passthrough_s32_to_s24_s32(struct processing_module *mod, struct audio_stream *sink = bsink->data; ae_int32x2 in_sample, in_sample1; int i, n, m; - ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 inu; ae_valignx2 outu = AE_ZALIGN128(); const int channels_count = audio_stream_get_channels(sink); const int inc = sizeof(ae_int32x4); @@ -434,7 +434,7 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu ae_int32x4 *buf; ae_int32x4 *buf_end; ae_int32x4 *vol; - ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 inu; ae_valignx2 outu = AE_ZALIGN128(); ae_int16x8 *in = (ae_int16x8 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) @@ -551,7 +551,7 @@ static void vol_passthrough_s16_to_s16(struct processing_module *mod, struct audio_stream *sink = bsink->data; ae_int16x4 in_sample, in_sample1; int i, n, m; - ae_valignx2 inu = AE_ZALIGN128(); + ae_valignx2 inu; ae_valignx2 outu = AE_ZALIGN128(); ae_int16x8 *in = (ae_int16x8 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) From f90291f661e25d00e91c62a919b7c56efadc9fb6 Mon Sep 17 00:00:00 2001 From: Seppo Ingalsuo Date: Thu, 13 Mar 2025 20:51:31 +0200 Subject: [PATCH 2/3] Audio: Volume: Remove unnecessary clear of HiFi registers These registers are set later in the code in the load instructions. This change saves a bit of code size and a tiny amount of processor cycles. Signed-off-by: Seppo Ingalsuo --- src/audio/volume/volume_hifi3.c | 30 ++++++++++---------- src/audio/volume/volume_hifi3_with_peakvol.c | 26 ++++++++--------- src/audio/volume/volume_hifi4.c | 30 ++++++++++---------- src/audio/volume/volume_hifi4_with_peakvol.c | 30 ++++++++++---------- 4 files changed, 58 insertions(+), 58 deletions(-) diff --git a/src/audio/volume/volume_hifi3.c b/src/audio/volume/volume_hifi3.c index 66704b5f69c5..762e5ee52894 100644 --- a/src/audio/volume/volume_hifi3.c +++ b/src/audio/volume/volume_hifi3.c @@ -63,9 +63,9 @@ static void vol_s24_to_s24_s32(struct processing_module *mod, struct input_strea struct vol_data *cd = module_get_private_data(mod); struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 in_sample = AE_ZERO32(); - ae_f32x2 out_sample = AE_ZERO32(); - ae_f32x2 volume = AE_ZERO32(); + ae_f32x2 in_sample; + ae_f32x2 out_sample; + ae_f32x2 volume; ae_f32x2 *buf; ae_f32x2 *buf_end; int i, n, m; @@ -149,7 +149,7 @@ static void vol_passthrough_s24_to_s24_s32(struct processing_module *mod, { struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 in_sample = AE_ZERO32(); + ae_f32x2 in_sample; int i, n, m; ae_valign inu; ae_valign outu = AE_ZALIGN64(); @@ -200,9 +200,9 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea struct vol_data *cd = module_get_private_data(mod); struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 in_sample = AE_ZERO32(); - ae_f32x2 out_sample = AE_ZERO32(); - ae_f32x2 volume = AE_ZERO32(); + ae_f32x2 in_sample; + ae_f32x2 out_sample; + ae_f32x2 volume; int i, n, m; ae_f64 mult0; ae_f64 mult1; @@ -291,7 +291,7 @@ static void vol_passthrough_s32_to_s24_s32(struct processing_module *mod, { struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 in_sample = AE_ZERO32(); + ae_f32x2 in_sample; int i, n, m; ae_valign inu; ae_valign outu = AE_ZALIGN64(); @@ -341,12 +341,12 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu struct vol_data *cd = module_get_private_data(mod); struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 volume0 = AE_ZERO32(); - ae_f32x2 volume1 = AE_ZERO32(); - ae_f32x2 out_sample0 = AE_ZERO32(); - ae_f32x2 out_sample1 = AE_ZERO32(); - ae_f16x4 in_sample = AE_ZERO16(); - ae_f16x4 out_sample = AE_ZERO16(); + ae_f32x2 volume0; + ae_f32x2 volume1; + ae_f32x2 out_sample0; + ae_f32x2 out_sample1; + ae_f16x4 in_sample; + ae_f16x4 out_sample; int i, n, m; ae_f32x2 *buf; ae_f32x2 *buf_end; @@ -438,7 +438,7 @@ static void vol_passthrough_s16_to_s16(struct processing_module *mod, { struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f16x4 in_sample = AE_ZERO16(); + ae_f16x4 in_sample; int i, n, m; ae_valign inu; ae_valign outu = AE_ZALIGN64(); diff --git a/src/audio/volume/volume_hifi3_with_peakvol.c b/src/audio/volume/volume_hifi3_with_peakvol.c index f645c5a903a1..3362b459c79c 100644 --- a/src/audio/volume/volume_hifi3_with_peakvol.c +++ b/src/audio/volume/volume_hifi3_with_peakvol.c @@ -43,9 +43,9 @@ static void vol_s24_to_s24_s32(struct processing_module *mod, struct input_strea struct vol_data *cd = module_get_private_data(mod); struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 in_sample = AE_ZERO32(); - ae_f32x2 out_sample = AE_ZERO32(); - ae_f32x2 volume = AE_ZERO32(); + ae_f32x2 in_sample; + ae_f32x2 out_sample; + ae_f32x2 volume; int channel, n, i, m; ae_f32 *in0 = (ae_f32 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) + bsource->consumed); @@ -118,7 +118,7 @@ static void vol_passthrough_s24_to_s24_s32(struct processing_module *mod, struct vol_data *cd = module_get_private_data(mod); struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 in_sample = AE_ZERO32(); + ae_f32x2 in_sample; int channel, n, i, m; ae_f32 *in0 = (ae_f32 *)audio_stream_wrap(source, (char *)audio_stream_get_rptr(source) + bsource->consumed); @@ -178,9 +178,9 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea struct vol_data *cd = module_get_private_data(mod); struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 in_sample = AE_ZERO32(); - ae_f32x2 out_sample = AE_ZERO32(); - ae_f32x2 volume = AE_ZERO32(); + ae_f32x2 in_sample; + ae_f32x2 out_sample; + ae_f32x2 volume; int i, n, channel, m; ae_f64 mult0; const int channels_count = audio_stream_get_channels(sink); @@ -254,7 +254,7 @@ static void vol_passthrough_s32_to_s24_s32(struct processing_module *mod, struct vol_data *cd = module_get_private_data(mod); struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 in_sample = AE_ZERO32(); + ae_f32x2 in_sample; int i, n, channel, m; const int channels_count = audio_stream_get_channels(sink); const int inc = sizeof(ae_f32) * channels_count; @@ -314,10 +314,10 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu struct vol_data *cd = module_get_private_data(mod); struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 volume = AE_ZERO32(); - ae_f32x2 out_sample0 = AE_ZERO32(); - ae_f16x4 in_sample = AE_ZERO16(); - ae_f16x4 out_sample = AE_ZERO16(); + ae_f32x2 volume; + ae_f32x2 out_sample0; + ae_f16x4 in_sample; + ae_f16x4 out_sample; int i, n, channel, m; ae_f16 *in; ae_f16 *out; @@ -396,7 +396,7 @@ static void vol_passthrough_s16_to_s16(struct processing_module *mod, struct vol_data *cd = module_get_private_data(mod); struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f16x4 in_sample = AE_ZERO16(); + ae_f16x4 in_sample; int i, n, channel, m; ae_f16 *in; ae_f16 *out; diff --git a/src/audio/volume/volume_hifi4.c b/src/audio/volume/volume_hifi4.c index 915255c40738..92301a287d39 100644 --- a/src/audio/volume/volume_hifi4.c +++ b/src/audio/volume/volume_hifi4.c @@ -63,9 +63,9 @@ static void vol_s24_to_s24_s32(struct processing_module *mod, struct input_strea struct vol_data *cd = module_get_private_data(mod); struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 in_sample = AE_ZERO32(); - ae_f32x2 out_sample = AE_ZERO32(); - ae_f32x2 volume = AE_ZERO32(); + ae_f32x2 in_sample; + ae_f32x2 out_sample; + ae_f32x2 volume; ae_f32x2 *buf; ae_f32x2 *buf_end; int i, n, m; @@ -149,7 +149,7 @@ static void vol_passthrough_s24_to_s24_s32(struct processing_module *mod, { struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 in_sample = AE_ZERO32(); + ae_f32x2 in_sample; int i, n, m; ae_valign inu; ae_valign outu = AE_ZALIGN64(); @@ -200,9 +200,9 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea struct vol_data *cd = module_get_private_data(mod); struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 in_sample = AE_ZERO32(); - ae_f32x2 out_sample = AE_ZERO32(); - ae_f32x2 volume = AE_ZERO32(); + ae_f32x2 in_sample; + ae_f32x2 out_sample; + ae_f32x2 volume; int i, n, m; ae_f64 mult0; ae_f64 mult1; @@ -291,7 +291,7 @@ static void vol_passthrough_s32_to_s24_s32(struct processing_module *mod, { struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 in_sample = AE_ZERO32(); + ae_f32x2 in_sample; int i, n, m; ae_valign inu; ae_valign outu = AE_ZALIGN64(); @@ -340,12 +340,12 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu struct vol_data *cd = module_get_private_data(mod); struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 volume0 = AE_ZERO32(); - ae_f32x2 volume1 = AE_ZERO32(); - ae_f32x2 out_sample0 = AE_ZERO32(); - ae_f32x2 out_sample1 = AE_ZERO32(); - ae_f16x4 in_sample = AE_ZERO16(); - ae_f16x4 out_sample = AE_ZERO16(); + ae_f32x2 volume0; + ae_f32x2 volume1; + ae_f32x2 out_sample0; + ae_f32x2 out_sample1; + ae_f16x4 in_sample; + ae_f16x4 out_sample; int i, n, m, left; ae_f32x2 *buf; ae_f32x2 *buf_end; @@ -459,7 +459,7 @@ static void vol_passthrough_s16_to_s16(struct processing_module *mod, { struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f16x4 in_sample = AE_ZERO16(); + ae_f16x4 in_sample; int i, n, m, left; ae_valign inu; ae_valign outu = AE_ZALIGN64(); diff --git a/src/audio/volume/volume_hifi4_with_peakvol.c b/src/audio/volume/volume_hifi4_with_peakvol.c index b35fc0b76620..211c7e2283f6 100644 --- a/src/audio/volume/volume_hifi4_with_peakvol.c +++ b/src/audio/volume/volume_hifi4_with_peakvol.c @@ -56,9 +56,9 @@ static void vol_s24_to_s24_s32(struct processing_module *mod, struct input_strea struct vol_data *cd = module_get_private_data(mod); struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 in_sample = AE_ZERO32(); - ae_f32x2 out_sample = AE_ZERO32(); - ae_f32x2 volume = AE_ZERO32(); + ae_f32x2 in_sample; + ae_f32x2 out_sample; + ae_f32x2 volume; int i, n, m; ae_f32x2 *vol; ae_valign inu; @@ -152,7 +152,7 @@ static void vol_passthrough_s24_to_s24_s32(struct processing_module *mod, struct vol_data *cd = module_get_private_data(mod); struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 in_sample = AE_ZERO32(); + ae_f32x2 in_sample; int i, n, m; ae_valign inu; @@ -219,9 +219,9 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea struct vol_data *cd = module_get_private_data(mod); struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 in_sample = AE_ZERO32(); - ae_f32x2 out_sample = AE_ZERO32(); - ae_f32x2 volume = AE_ZERO32(); + ae_f32x2 in_sample; + ae_f32x2 out_sample; + ae_f32x2 volume; int i, n, m; ae_f64 mult0; ae_f64 mult1; @@ -323,7 +323,7 @@ static void vol_passthrough_s32_to_s24_s32(struct processing_module *mod, struct vol_data *cd = module_get_private_data(mod); struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 in_sample = AE_ZERO32(); + ae_f32x2 in_sample; int i, n, m; ae_valign inu; ae_valign outu = AE_ZALIGN64(); @@ -387,12 +387,12 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu struct vol_data *cd = module_get_private_data(mod); struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f32x2 volume0 = AE_ZERO32(); - ae_f32x2 volume1 = AE_ZERO32(); - ae_f32x2 out_sample0 = AE_ZERO32(); - ae_f32x2 out_sample1 = AE_ZERO32(); - ae_f16x4 in_sample = AE_ZERO16(); - ae_f16x4 out_sample = AE_ZERO16(); + ae_f32x2 volume0; + ae_f32x2 volume1; + ae_f32x2 out_sample0; + ae_f32x2 out_sample1; + ae_f16x4 in_sample; + ae_f16x4 out_sample; int i, n, m, left; ae_f32x2 *buf; ae_f32x2 *buf_end; @@ -529,7 +529,7 @@ static void vol_passthrough_s16_to_s16(struct processing_module *mod, struct vol_data *cd = module_get_private_data(mod); struct audio_stream *source = bsource->data; struct audio_stream *sink = bsink->data; - ae_f16x4 in_sample = AE_ZERO16(); + ae_f16x4 in_sample; int i, n, m, left; ae_valign inu; ae_valign outu = AE_ZALIGN64(); From cda34a90e6beedd3bbe617fd34eeb620db19e1f7 Mon Sep 17 00:00:00 2001 From: Seppo Ingalsuo Date: Tue, 11 Mar 2025 11:59:49 +0200 Subject: [PATCH 3/3] Audio: Volume: Add IPC4 native Q1.31 mode and optimize This patch adds Q1.31 gain to existing options Q8.16, Q8.23. The Q1.31 x Q1.31 or Q1.31 x Q1.15 to multiplications are directly supported by HiFi instructions without need for additional shifts. Also for HiFi5 the most parallel multiplications are available for suitable Q1.31 product. In HiFi5 version s24 and s32 peakvolume and gain the single multiply instruction AE_MULF32S_HH() or AE_MULF32S_LL() is changed to quad multiply AE_MULF2P32X4RS(). In HiFi4 and HiFi3 version s24 and s32 the single multiply is changed to dual multiply AE_MULFP32X2RS(). The MCPS saving in HiFi3/4/5 builds for gain and peakvolume with 16/24/32 bits stereo 48 kHz formats varies between 0.5% and 6.1%. At smallest the saving is 0.02 MCPS and at largest 0.2 MCPS. The savings are quite small because the volume gain computation has been already optimized and most of the load has happens in SOF modules framework instead of gain multiplications. Signed-off-by: Seppo Ingalsuo --- src/audio/volume/volume.c | 4 +- src/audio/volume/volume.h | 50 ++++++--- src/audio/volume/volume_hifi3.c | 60 ++++++----- src/audio/volume/volume_hifi3_with_peakvol.c | 53 +++++----- src/audio/volume/volume_hifi4.c | 69 ++++++------ src/audio/volume/volume_hifi4_with_peakvol.c | 81 ++++++++------- src/audio/volume/volume_hifi5.c | 81 +++++++-------- src/audio/volume/volume_hifi5_with_peakvol.c | 104 ++++++++++--------- src/audio/volume/volume_ipc4.c | 8 ++ 9 files changed, 281 insertions(+), 229 deletions(-) diff --git a/src/audio/volume/volume.c b/src/audio/volume/volume.c index c748b899b37a..573f98674269 100644 --- a/src/audio/volume/volume.c +++ b/src/audio/volume/volume.c @@ -222,7 +222,7 @@ static inline int32_t volume_linear_ramp(struct vol_data *cd, int32_t ramp_time, if (!cd->initial_ramp) return cd->tvolume[channel]; - return cd->rvolume[channel] + ramp_time * cd->ramp_coef[channel]; + return sat_int32((int64_t)ramp_time * cd->ramp_coef[channel] + cd->rvolume[channel]); } #endif @@ -469,7 +469,7 @@ static void set_linear_ramp_coef(struct vol_data *cd, int chan, bool constant_ra * be some accumulated error in ramp time the longer * the ramp and the smaller the transition is. */ - coef = (2 * coef / cd->initial_ramp + 1) >> 1; + coef = ((int64_t)coef * 2 / cd->initial_ramp + 1) >> 1; /* Scale coefficient by 1/8, round */ coef = ((coef >> 2) + 1) >> 1; diff --git a/src/audio/volume/volume.h b/src/audio/volume/volume.h index 19d23798b962..ae3cd32e63f4 100644 --- a/src/audio/volume/volume.h +++ b/src/audio/volume/volume.h @@ -43,17 +43,24 @@ struct sof_ipc_ctrl_value_chan; //** \brief Volume gain Qx.y */ #define COMP_VOLUME_Q8_16 1 -//** \brief Volume gain Qx.y integer x number of bits including sign bit. */ -#define VOL_QXY_X 8 - -//** \brief Volume gain Qx.y fractional y number of bits. */ -#define VOL_QXY_Y 16 - #else //** \brief Volume gain Qx.y */ -#define COMP_VOLUME_Q1_23 1 +#define COMP_VOLUME_Q1_31 1 + +#endif +#if COMP_VOLUME_Q1_31 +/** \brief Volume gain Qx.y integer x number of bits including sign bit. + * With Q8.23 format the gain range is -138.47 to +42.14 dB. + */ +#define VOL_QXY_X 1 + +//** \brief Volume gain Qx.y fractional y number of bits. */ +#define VOL_QXY_Y 31 + +#define VOLUME_Q17_47_SHIFT 0 +#elif COMP_VOLUME_Q1_23 /** \brief Volume gain Qx.y integer x number of bits including sign bit. * With Q8.23 format the gain range is -138.47 to +42.14 dB. */ @@ -62,6 +69,19 @@ struct sof_ipc_ctrl_value_chan; //** \brief Volume gain Qx.y fractional y number of bits. */ #define VOL_QXY_Y 23 +#define VOLUME_Q17_47_SHIFT 8 + +#elif COMP_VOLUME_Q8_16 +//** \brief Volume gain Qx.y integer x number of bits including sign bit. */ +#define VOL_QXY_X 8 + +//** \brief Volume gain Qx.y fractional y number of bits. */ +#define VOL_QXY_Y 16 + +#define VOLUME_Q17_47_SHIFT 15 + +#else +#error "Need CONFIG_COMP_VOLUME_Qx_y" #endif /** @@ -92,16 +112,22 @@ struct sof_ipc_ctrl_value_chan; */ #define PEAK_16S_32C_ADJUST 16 -/** - * \brief Volume maximum value. - * TODO: This should be 1 << (VOL_QX_BITS + VOL_QY_BITS - 1) - 1 but - * the current volume code cannot handle the full Q1.16 range correctly. - */ +#if COMP_VOLUME_Q1_31 +/** \brief Volume maximum value. */ +#define VOL_MAX INT32_MAX + +/** \brief Volume 0dB value. */ +#define VOL_ZERO_DB INT32_MAX + +#else +/** \brief Volume maximum value. */ #define VOL_MAX ((1 << (VOL_QXY_X + VOL_QXY_Y - 1)) - 1) /** \brief Volume 0dB value. */ #define VOL_ZERO_DB BIT(VOL_QXY_Y) +#endif + /** \brief Volume minimum value. */ #define VOL_MIN 0 diff --git a/src/audio/volume/volume_hifi3.c b/src/audio/volume/volume_hifi3.c index 762e5ee52894..d6a04b072ca6 100644 --- a/src/audio/volume/volume_hifi3.c +++ b/src/audio/volume/volume_hifi3.c @@ -114,15 +114,16 @@ static void vol_s24_to_s24_s32(struct processing_module *mod, struct input_strea /* Multiply the input sample */ #if COMP_VOLUME_Q8_16 out_sample = AE_MULFP32X2RS(AE_SLAI32S(volume, 7), AE_SLAI32(in_sample, 8)); + out_sample = AE_SLAI32S(out_sample, 8); #elif COMP_VOLUME_Q1_23 out_sample = AE_MULFP32X2RS(volume, AE_SLAI32(in_sample, 8)); -#else -#error "Need CONFIG_COMP_VOLUME_Qx_y" + out_sample = AE_SLAI32S(out_sample, 8); +#elif COMP_VOLUME_Q1_31 + out_sample = AE_MULFP32X2RS(volume, AE_SLAI32(in_sample, 8)); #endif - /* Shift for S24_LE */ - out_sample = AE_SLAI32S(out_sample, 8); - out_sample = AE_SRAI32(out_sample, 8); + /* Shift and round for S24_LE */ + out_sample = AE_SRAI32R(out_sample, 8); /* Store the output sample */ AE_SA32X2_IP(out_sample, outu, out); @@ -204,8 +205,6 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea ae_f32x2 out_sample; ae_f32x2 volume; int i, n, m; - ae_f64 mult0; - ae_f64 mult1; ae_f32x2 *buf; ae_f32x2 *buf_end; ae_f32x2 *vol; @@ -249,23 +248,19 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea /* Load the input sample */ AE_LA32X2_IP(in_sample, inu, in); - -#if COMP_VOLUME_Q8_16 - /* Q8.16 x Q1.31 << 1 -> Q9.48 */ - mult0 = AE_MULF32S_HH(volume, in_sample); - mult0 = AE_SRAI64(mult0, 1); /* Q9.47 */ - mult1 = AE_MULF32S_LL(volume, in_sample); - mult1 = AE_SRAI64(mult1, 1); - out_sample = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q9.47 -> Q1.31 */ -#elif COMP_VOLUME_Q1_23 - /* Q1.23 x Q1.31 << 1 -> Q2.55 */ - mult0 = AE_MULF32S_HH(volume, in_sample); - mult0 = AE_SRAI64(mult0, 8); /* Q2.47 */ - mult1 = AE_MULF32S_LL(volume, in_sample); - mult1 = AE_SRAI64(mult1, 8); - out_sample = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q2.47 -> Q1.31 */ +#if COMP_VOLUME_Q1_31 + out_sample = AE_MULFP32X2RS(volume, in_sample); #else -#error "Need CONFIG_COMP_VOLUME_Qx_y" + /* With Q1.31 x Q1.31 -> Q17.47 HiFi multiplications the result is + * Q8.16 x Q1.31 << 1 >> 16 -> Q9.32, shift left by 15 for Q17.47 + * Q1.23 x Q1.31 << 1 >> 16 -> Q2.39, shift left by 8 for Q17.47 + */ + ae_f64 mult0 = AE_MULF32R_HH(volume, in_sample); + ae_f64 mult1 = AE_MULF32R_LL(volume, in_sample); + + mult0 = AE_SLAI64(mult0, VOLUME_Q17_47_SHIFT); + mult1 = AE_SLAI64(mult1, VOLUME_Q17_47_SHIFT); + out_sample = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q2.47 -> Q1.31 */ #endif AE_SA32X2_IP(out_sample, outu, out); } @@ -389,18 +384,20 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu /* load second two volume gain */ AE_L32X2_XC(volume1, vol, inc); + /* Load the input sample */ + AE_LA16X4_IP(in_sample, inu, in); + +#if COMP_VOLUME_Q1_31 + out_sample0 = AE_MULFP32X16X2RS_H(volume0, in_sample); + out_sample1 = AE_MULFP32X16X2RS_L(volume1, in_sample); +#else #if COMP_VOLUME_Q8_16 - /* Q8.16 to Q9.23 */ + /* Shift Q8.16 to Q9.23 + * No need to shift Q1.23, it is OK as such + */ volume0 = AE_SLAI32S(volume0, 7); volume1 = AE_SLAI32S(volume1, 7); -#elif COMP_VOLUME_Q1_23 - /* No need to shift, Q1.23 is OK as such */ -#else -#error "Need CONFIG_COMP_VOLUME_Qx_y" #endif - /* Load the input sample */ - AE_LA16X4_IP(in_sample, inu, in); - /* Multiply the input sample */ out_sample0 = AE_MULFP32X16X2RS_H(volume0, in_sample); out_sample1 = AE_MULFP32X16X2RS_L(volume1, in_sample); @@ -408,6 +405,7 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu /* Q9.23 to Q1.31 */ out_sample0 = AE_SLAI32S(out_sample0, 8); out_sample1 = AE_SLAI32S(out_sample1, 8); +#endif /* store the output */ out_sample = AE_ROUND16X4F32SSYM(out_sample0, out_sample1); diff --git a/src/audio/volume/volume_hifi3_with_peakvol.c b/src/audio/volume/volume_hifi3_with_peakvol.c index 3362b459c79c..be4565aba5c1 100644 --- a/src/audio/volume/volume_hifi3_with_peakvol.c +++ b/src/audio/volume/volume_hifi3_with_peakvol.c @@ -78,18 +78,21 @@ static void vol_s24_to_s24_s32(struct processing_module *mod, struct input_strea AE_L32_XP(in_sample, in, inc); /* calc peak vol */ peak_vol = AE_MAXABS32S(in_sample, peak_vol); -#if COMP_VOLUME_Q8_16 + /* Multiply the input sample */ +#if COMP_VOLUME_Q8_16 out_sample = AE_MULFP32X2RS(AE_SLAI32S(volume, 7), AE_SLAI32(in_sample, 8)); + out_sample = AE_SLAI32S(out_sample, 8); #elif COMP_VOLUME_Q1_23 - out_sample = AE_MULFP32X2RS(volume, AE_SLAI32S(in_sample, 8)); -#else -#error "Need CONFIG_COMP_VOLUME_Qx_y" -#endif - /* Shift for S24_LE */ + out_sample = AE_MULFP32X2RS(volume, AE_SLAI32(in_sample, 8)); out_sample = AE_SLAI32S(out_sample, 8); - out_sample = AE_SRAI32(out_sample, 8); +#elif COMP_VOLUME_Q1_31 + out_sample = AE_MULFP32X2RS(volume, AE_SLAI32(in_sample, 8)); +#endif + + /* Shift and round for S24_LE */ + out_sample = AE_SRAI32R(out_sample, 8); /* Store the output sample */ AE_S32_L_XP(out_sample, out, inc); } @@ -182,7 +185,6 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea ae_f32x2 out_sample; ae_f32x2 volume; int i, n, channel, m; - ae_f64 mult0; const int channels_count = audio_stream_get_channels(sink); const int inc = sizeof(ae_f32) * channels_count; int samples = channels_count * frames; @@ -214,19 +216,20 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea AE_L32_XP(in_sample, in, inc); /* calc peak vol */ peak_vol = AE_MAXABS32S(in_sample, peak_vol); -#if COMP_VOLUME_Q8_16 - /* Q8.16 x Q1.31 << 1 -> Q9.48 */ - mult0 = AE_MULF32S_HH(volume, in_sample); - mult0 = AE_SRAI64(mult0, 1); /* Q9.47 */ - out_sample = AE_ROUND32F48SASYM(mult0); /* Q9.47 -> Q1.31 */ -#elif COMP_VOLUME_Q1_23 - /* Q1.23 x Q1.31 << 1 -> Q2.55 */ - mult0 = AE_MULF32S_HH(volume, in_sample); - mult0 = AE_SRAI64(mult0, 8); /* Q2.47 */ - out_sample = AE_ROUND32F48SSYM(mult0); /* Q2.47 -> Q1.31 */ + +#if COMP_VOLUME_Q1_31 + out_sample = AE_MULFP32X2RS(volume, in_sample); #else -#error "Need CONFIG_COMP_VOLUME_Qx_y" + /* With Q1.31 x Q1.31 -> Q17.47 HiFi multiplications the result is + * Q8.16 x Q1.31 << 1 >> 16 -> Q9.32, shift left by 15 for Q17.47 + * Q1.23 x Q1.31 << 1 >> 16 -> Q2.39, shift left by 8 for Q17.47 + */ + ae_f64 mult0 = AE_MULF32R_HH(volume, in_sample); + + mult0 = AE_SLAI64(mult0, VOLUME_Q17_47_SHIFT); + out_sample = AE_ROUND32F48SSYM(mult0); /* Q2.47 -> Q1.31 */ #endif + AE_S32_L_XP(out_sample, out, inc); } peak_vol = AE_SLAA32S(peak_vol, attenuation); @@ -345,12 +348,10 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu /* Load volume */ volume = (ae_f32x2)cd->volume[channel]; #if COMP_VOLUME_Q8_16 - /* Q8.16 to Q9.23 */ + /* Shift Q8.16 to Q9.23 + * No need to shift Q1.23, it is OK as such + */ volume = AE_SLAI32S(volume, 7); -#elif COMP_VOLUME_Q1_23 - /* No need to shift, Q1.23 is OK as such */ -#else -#error "Need CONFIG_COMP_VOLUME_Qx_y" #endif for (i = 0; i < n; i += channels_count) { /* Load the input sample */ @@ -361,8 +362,12 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu /* Multiply the input sample */ out_sample0 = AE_MULFP32X16X2RS_H(volume, in_sample); +#if COMP_VOLUME_Q1_31 + /* No shift need, the product is Q1.31 */ +#else /* Q9.23 to Q1.31 */ out_sample0 = AE_SLAI32S(out_sample0, 8); +#endif /* store the output */ out_sample = AE_ROUND16X4F32SSYM(out_sample0, out_sample0); diff --git a/src/audio/volume/volume_hifi4.c b/src/audio/volume/volume_hifi4.c index 92301a287d39..6c27d9f0d038 100644 --- a/src/audio/volume/volume_hifi4.c +++ b/src/audio/volume/volume_hifi4.c @@ -114,15 +114,16 @@ static void vol_s24_to_s24_s32(struct processing_module *mod, struct input_strea /* Multiply the input sample */ #if COMP_VOLUME_Q8_16 out_sample = AE_MULFP32X2RS(AE_SLAI32S(volume, 7), AE_SLAI32(in_sample, 8)); + out_sample = AE_SLAI32S(out_sample, 8); #elif COMP_VOLUME_Q1_23 out_sample = AE_MULFP32X2RS(volume, AE_SLAI32(in_sample, 8)); -#else -#error "Need CONFIG_COMP_VOLUME_Qx_y" + out_sample = AE_SLAI32S(out_sample, 8); +#elif COMP_VOLUME_Q1_31 + out_sample = AE_MULFP32X2RS(volume, AE_SLAI32(in_sample, 8)); #endif - /* Shift for S24_LE */ - out_sample = AE_SLAI32S(out_sample, 8); - out_sample = AE_SRAI32(out_sample, 8); + /* Shift and round for S24_LE */ + out_sample = AE_SRAI32R(out_sample, 8); /* Store the output sample */ AE_SA32X2_IP(out_sample, outu, out); @@ -204,8 +205,6 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea ae_f32x2 out_sample; ae_f32x2 volume; int i, n, m; - ae_f64 mult0; - ae_f64 mult1; ae_f32x2 *buf; ae_f32x2 *buf_end; ae_f32x2 *vol; @@ -250,23 +249,21 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea /* Load the input sample */ AE_LA32X2_IP(in_sample, inu, in); -#if COMP_VOLUME_Q8_16 - /* Q8.16 x Q1.31 << 1 -> Q9.48 */ - mult0 = AE_MULF32S_HH(volume, in_sample); - mult0 = AE_SRAI64(mult0, 1); /* Q9.47 */ - mult1 = AE_MULF32S_LL(volume, in_sample); - mult1 = AE_SRAI64(mult1, 1); - out_sample = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q9.47 -> Q1.31 */ -#elif COMP_VOLUME_Q1_23 - /* Q1.23 x Q1.31 << 1 -> Q2.55 */ - mult0 = AE_MULF32S_HH(volume, in_sample); - mult0 = AE_SRAI64(mult0, 8); /* Q2.47 */ - mult1 = AE_MULF32S_LL(volume, in_sample); - mult1 = AE_SRAI64(mult1, 8); - out_sample = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q2.47 -> Q1.31 */ +#if COMP_VOLUME_Q1_31 + out_sample = AE_MULFP32X2RS(volume, in_sample); #else -#error "Need CONFIG_COMP_VOLUME_Qx_y" + /* With Q1.31 x Q1.31 -> Q17.47 HiFi multiplications the result is + * Q8.16 x Q1.31 << 1 >> 16 -> Q9.32, shift left by 15 for Q17.47 + * Q1.23 x Q1.31 << 1 >> 16 -> Q2.39, shift left by 8 for Q17.47 + */ + ae_f64 mult0 = AE_MULF32R_HH(volume, in_sample); + ae_f64 mult1 = AE_MULF32R_LL(volume, in_sample); + + mult0 = AE_SLAI64(mult0, VOLUME_Q17_47_SHIFT); + mult1 = AE_SLAI64(mult1, VOLUME_Q17_47_SHIFT); + out_sample = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q2.47 -> Q1.31 */ #endif + AE_SA32X2_IP(out_sample, outu, out); } AE_SA64POS_FP(outu, out); @@ -391,18 +388,20 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu /* load second two volume gain */ AE_L32X2_XC(volume1, vol, inc); + /* Load the input sample */ + AE_LA16X4_IP(in_sample, inu, in); + +#if COMP_VOLUME_Q1_31 + out_sample0 = AE_MULFP32X16X2RS_H(volume0, in_sample); + out_sample1 = AE_MULFP32X16X2RS_L(volume1, in_sample); +#else #if COMP_VOLUME_Q8_16 - /* Q8.16 to Q9.23 */ + /* Shift Q8.16 to Q9.23 + * No need to shift Q1.23, it is OK as such + */ volume0 = AE_SLAI32S(volume0, 7); volume1 = AE_SLAI32S(volume1, 7); -#elif COMP_VOLUME_Q1_23 - /* No need to shift, Q1.23 is OK as such */ -#else -#error "Need CONFIG_COMP_VOLUME_Qx_y" #endif - /* Load the input sample */ - AE_LA16X4_IP(in_sample, inu, in); - /* Multiply the input sample */ out_sample0 = AE_MULFP32X16X2RS_H(volume0, in_sample); out_sample1 = AE_MULFP32X16X2RS_L(volume1, in_sample); @@ -410,6 +409,7 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu /* Q9.23 to Q1.31 */ out_sample0 = AE_SLAI32S(out_sample0, 8); out_sample1 = AE_SLAI32S(out_sample1, 8); +#endif /* store the output */ out_sample = AE_ROUND16X4F32SSYM(out_sample0, out_sample1); @@ -421,16 +421,21 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu for (i = 0; i < left; i++) { /* load volume gain */ AE_L32_XC(volume0, (ae_f32 *)vol, sizeof(ae_f32)); + /* Load the input sample */ + AE_L16_IP(in_sample, (ae_f16 *)in, sizeof(ae_f16)); +#if COMP_VOLUME_Q1_31 + /* Multiply the input sample */ + out_sample0 = AE_MULFP32X16X2RS_H(volume0, in_sample); +#else #if COMP_VOLUME_Q8_16 /* Q8.16 to Q9.23 */ volume0 = AE_SLAI32S(volume0, 7); #endif - /* Load the input sample */ - AE_L16_IP(in_sample, (ae_f16 *)in, sizeof(ae_f16)); /* Multiply the input sample */ out_sample0 = AE_MULFP32X16X2RS_H(volume0, in_sample); /* Q9.23 to Q1.31 */ out_sample0 = AE_SLAI32S(out_sample0, 8); +#endif /* store the output */ out_sample = AE_ROUND16X4F32SSYM(out_sample0, out_sample0); AE_S16_0_IP(out_sample, (ae_f16 *)out, sizeof(ae_f16)); diff --git a/src/audio/volume/volume_hifi4_with_peakvol.c b/src/audio/volume/volume_hifi4_with_peakvol.c index 211c7e2283f6..a45e85b7b0ed 100644 --- a/src/audio/volume/volume_hifi4_with_peakvol.c +++ b/src/audio/volume/volume_hifi4_with_peakvol.c @@ -109,18 +109,19 @@ static void vol_s24_to_s24_s32(struct processing_module *mod, struct input_strea AE_L32X2_XC1(temp, peakvol, 0); temp = AE_MAXABS32S(in_sample, temp); AE_S32X2_XC1(temp, peakvol, inc); - /* Multiply the input sample */ + #if COMP_VOLUME_Q8_16 out_sample = AE_MULFP32X2RS(AE_SLAI32S(volume, 7), AE_SLAI32(in_sample, 8)); + out_sample = AE_SLAI32S(out_sample, 8); #elif COMP_VOLUME_Q1_23 out_sample = AE_MULFP32X2RS(volume, AE_SLAI32(in_sample, 8)); -#else -#error "Need CONFIG_COMP_VOLUME_Qx_y" + out_sample = AE_SLAI32S(out_sample, 8); +#elif COMP_VOLUME_Q1_31 + out_sample = AE_MULFP32X2RS(volume, AE_SLAI32(in_sample, 8)); #endif - /* Shift for S24_LE */ - out_sample = AE_SLAI32S(out_sample, 8); - out_sample = AE_SRAI32(out_sample, 8); + /* Shift and round for S24_LE */ + out_sample = AE_SRAI32R(out_sample, 8); /* Store the output sample */ AE_SA32X2_IP(out_sample, outu, out); @@ -223,8 +224,6 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea ae_f32x2 out_sample; ae_f32x2 volume; int i, n, m; - ae_f64 mult0; - ae_f64 mult1; ae_f32x2 *buf; ae_f32x2 *buf_end; ae_f32x2 *vol; @@ -278,23 +277,22 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea AE_L32X2_XC1(temp, peakvol, 0); temp = AE_MAXABS32S(in_sample, temp); AE_S32X2_XC1(temp, peakvol, inc); -#if COMP_VOLUME_Q8_16 - /* Q8.16 x Q1.31 << 1 -> Q9.48 */ - mult0 = AE_MULF32S_HH(volume, in_sample); - mult0 = AE_SRAI64(mult0, 1); /* Q9.47 */ - mult1 = AE_MULF32S_LL(volume, in_sample); - mult1 = AE_SRAI64(mult1, 1); - out_sample = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q9.47 -> Q1.31 */ -#elif COMP_VOLUME_Q1_23 - /* Q1.23 x Q1.31 << 1 -> Q2.55 */ - mult0 = AE_MULF32S_HH(volume, in_sample); - mult0 = AE_SRAI64(mult0, 8); /* Q2.47 */ - mult1 = AE_MULF32S_LL(volume, in_sample); - mult1 = AE_SRAI64(mult1, 8); - out_sample = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q2.47 -> Q1.31 */ + +#if COMP_VOLUME_Q1_31 + out_sample = AE_MULFP32X2RS(volume, in_sample); #else -#error "Need CONFIG_COMP_VOLUME_Qx_y" + /* With Q1.31 x Q1.31 -> Q17.47 HiFi multiplications the result is + * Q8.16 x Q1.31 << 1 >> 16 -> Q9.32, shift left by 15 for Q17.47 + * Q1.23 x Q1.31 << 1 >> 16 -> Q2.39, shift left by 8 for Q17.47 + */ + ae_f64 mult0 = AE_MULF32R_HH(volume, in_sample); + ae_f64 mult1 = AE_MULF32R_LL(volume, in_sample); + + mult0 = AE_SLAI64(mult0, VOLUME_Q17_47_SHIFT); + mult1 = AE_SLAI64(mult1, VOLUME_Q17_47_SHIFT); + out_sample = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q2.47 -> Q1.31 */ #endif + AE_SA32X2_IP(out_sample, outu, out); } AE_SA64POS_FP(outu, out); @@ -443,15 +441,6 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu /* load second two volume gain */ AE_L32X2_XC(volume1, vol, inc); -#if COMP_VOLUME_Q8_16 - /* Q8.16 to Q9.23 */ - volume0 = AE_SLAI32S(volume0, 7); - volume1 = AE_SLAI32S(volume1, 7); -#elif COMP_VOLUME_Q1_23 - /* No need to shift, Q1.23 is OK as such */ -#else -#error "Need CONFIG_COMP_VOLUME_Qx_y" -#endif /* Load the input sample */ AE_LA16X4_IP(in_sample, inu, in); /* calculate the peak volume*/ @@ -462,6 +451,18 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu AE_L32X2_XC1(temp, peakvol, 0); temp = AE_MAXABS32S(AE_SEXT32X2D16_10(in_sample), temp); AE_S32X2_XC1(temp, peakvol, inc); + +#if COMP_VOLUME_Q1_31 + out_sample0 = AE_MULFP32X16X2RS_H(volume0, in_sample); + out_sample1 = AE_MULFP32X16X2RS_L(volume1, in_sample); +#else +#if COMP_VOLUME_Q8_16 + /* Shift Q8.16 to Q9.23 + * No need to shift Q1.23, it is OK as such + */ + volume0 = AE_SLAI32S(volume0, 7); + volume1 = AE_SLAI32S(volume1, 7); +#endif /* Multiply the input sample */ out_sample0 = AE_MULFP32X16X2RS_H(volume0, in_sample); out_sample1 = AE_MULFP32X16X2RS_L(volume1, in_sample); @@ -469,6 +470,7 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu /* Q9.23 to Q1.31 */ out_sample0 = AE_SLAI32S(out_sample0, 8); out_sample1 = AE_SLAI32S(out_sample1, 8); +#endif /* store the output */ out_sample = AE_ROUND16X4F32SSYM(out_sample0, out_sample1); @@ -480,20 +482,27 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu for (i = 0; i < left; i++) { /* load first volume gain */ AE_L32_XC(volume0, (ae_f32 *)vol, sizeof(ae_f32)); -#if COMP_VOLUME_Q8_16 - /* Q8.16 to Q9.23 */ - volume0 = AE_SLAI32S(volume0, 7); -#endif + /* Load the input sample */ AE_L16_IP(in_sample, (ae_f16 *)in, sizeof(ae_f16)); /* calculate the peak volume*/ AE_L32_XC1(temp, (ae_f32 *)peakvol, 0); temp = AE_MAXABS32S(AE_SEXT32X2D16_32(in_sample), temp); AE_S32_L_XC1(temp, (ae_f32 *)peakvol, sizeof(ae_f32)); + +#if COMP_VOLUME_Q1_31 + /* Multiply the input sample */ + out_sample0 = AE_MULFP32X16X2RS_H(volume0, in_sample); +#else +#if COMP_VOLUME_Q8_16 + /* Q8.16 to Q9.23 */ + volume0 = AE_SLAI32S(volume0, 7); +#endif /* Multiply the input sample */ out_sample0 = AE_MULFP32X16X2RS_H(volume0, in_sample); /* Q9.23 to Q1.31 */ out_sample0 = AE_SLAI32S(out_sample0, 8); +#endif /* store the output */ out_sample = AE_ROUND16X4F32SSYM(out_sample0, out_sample0); AE_S16_0_IP(out_sample, (ae_f16 *)out, sizeof(ae_f16)); diff --git a/src/audio/volume/volume_hifi5.c b/src/audio/volume/volume_hifi5.c index f0a20a3388b5..794330cddd9b 100644 --- a/src/audio/volume/volume_hifi5.c +++ b/src/audio/volume/volume_hifi5.c @@ -117,17 +117,20 @@ static void vol_s24_to_s24_s32(struct processing_module *mod, struct input_strea AE_MULF2P32X4RS(out_sample, out_sample1, AE_SLAI32S(volume, 7), AE_SLAI32S(volume1, 7), AE_SLAI32(in_sample, 8), AE_SLAI32(in_sample1, 8)); + out_sample = AE_SLAI32S(out_sample, 8); + out_sample1 = AE_SLAI32S(out_sample1, 8); #elif COMP_VOLUME_Q1_23 AE_MULF2P32X4RS(out_sample, out_sample1, volume, volume1, AE_SLAI32(in_sample, 8), AE_SLAI32(in_sample1, 8)); -#else -#error "Need CONFIG_COMP_VOLUME_Qx_y" + out_sample = AE_SLAI32S(out_sample, 8); + out_sample1 = AE_SLAI32S(out_sample1, 8); +#elif COMP_VOLUME_Q1_31 + AE_MULF2P32X4RS(out_sample, out_sample1, volume, volume1, + AE_SLAI32(in_sample, 8), AE_SLAI32(in_sample1, 8)); #endif /* Shift for S24_LE */ - out_sample = AE_SLAI32S(out_sample, 8); out_sample = AE_SRAI32(out_sample, 8); - out_sample1 = AE_SLAI32S(out_sample1, 8); out_sample1 = AE_SRAI32(out_sample1, 8); /* Store the output sample */ @@ -212,8 +215,6 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea ae_int32x2 out_sample, out_sample1; ae_int32x2 volume, volume1; int i, n, m; - ae_int64 mult0; - ae_int64 mult1; ae_int32x4 *buf; ae_int32x4 *buf_end; ae_int32x4 *vol; @@ -259,35 +260,26 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea /* Load the input sample */ AE_LA32X2X2_IP(in_sample, in_sample1, inu, in); - -#if COMP_VOLUME_Q8_16 - /* Q8.16 x Q1.31 << 1 -> Q9.48 */ - mult0 = AE_MULF32S_HH(volume, in_sample); - mult0 = AE_SRAI64(mult0, 1); /* Q9.47 */ - mult1 = AE_MULF32S_LL(volume, in_sample); - mult1 = AE_SRAI64(mult1, 1); - out_sample = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q9.47 -> Q1.31 */ - - mult0 = AE_MULF32S_HH(volume1, in_sample1); - mult0 = AE_SRAI64(mult0, 1); /* Q9.47 */ - mult1 = AE_MULF32S_LL(volume1, in_sample1); - mult1 = AE_SRAI64(mult1, 1); - out_sample1 = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q9.47 -> Q1.31 */ -#elif COMP_VOLUME_Q1_23 - /* Q1.23 x Q1.31 << 1 -> Q2.55 */ - mult0 = AE_MULF32S_HH(volume, in_sample); - mult0 = AE_SRAI64(mult0, 8); /* Q2.47 */ - mult1 = AE_MULF32S_LL(volume, in_sample); - mult1 = AE_SRAI64(mult1, 8); +#if COMP_VOLUME_Q1_31 + AE_MULF2P32X4RS(out_sample, out_sample1, + volume, volume1, + in_sample, in_sample1); +#else + /* With Q1.31 x Q1.31 -> Q17.47 HiFi multiplications the result is + * Q8.16 x Q1.31 << 1 >> 16 -> Q9.32, shift left by 15 for Q17.47 + * Q1.23 x Q1.31 << 1 >> 16 -> Q2.39, shift left by 8 for Q17.47 + */ + ae_int64 mult0, mult1; + + AE_MULF32X2R_HH_LL(mult0, mult1, volume, in_sample); + mult0 = AE_SLAI64(mult0, VOLUME_Q17_47_SHIFT); + mult1 = AE_SLAI64(mult1, VOLUME_Q17_47_SHIFT); out_sample = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q2.47 -> Q1.31 */ - mult0 = AE_MULF32S_HH(volume1, in_sample1); - mult0 = AE_SRAI64(mult0, 8); /* Q2.47 */ - mult1 = AE_MULF32S_LL(volume1, in_sample1); - mult1 = AE_SRAI64(mult1, 8); - out_sample1 = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q2.47 -> Q1.31 */ -#else -#error "Need CONFIG_COMP_VOLUME_Qx_y" + AE_MULF32X2R_HH_LL(mult0, mult1, volume1, in_sample1); + mult0 = AE_SLAI64(mult0, VOLUME_Q17_47_SHIFT); + mult1 = AE_SLAI64(mult1, VOLUME_Q17_47_SHIFT); + out_sample1 = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q2.47 -> Q1.31 */ #endif AE_SA32X2X2_IP(out_sample, out_sample1, outu, out); } @@ -411,20 +403,24 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu AE_L32X2X2_XC(volume2, volume3, vol, inc); + /* Load the input sample */ + AE_LA16X4X2_IP(in_sample, in_sample1, inu, in); + +#if COMP_VOLUME_Q1_31 + AE_MULF2P32X16X4RS(out_temp, out_temp1, volume, volume1, in_sample); + out_sample = AE_ROUND16X4F32SSYM(out_temp, out_temp1); + AE_MULF2P32X16X4RS(out_temp, out_temp1, volume2, volume3, in_sample1); + out_sample1 = AE_ROUND16X4F32SSYM(out_temp, out_temp1); +#else #if COMP_VOLUME_Q8_16 - /* Q8.16 to Q9.23 */ + /* Shift Q8.16 to Q9.23 + * No need to shift Q1.23, it is OK as such + */ volume = AE_SLAI32S(volume, 7); volume1 = AE_SLAI32S(volume1, 7); - volume2 = AE_SLAI32S(volume2, 7); volume3 = AE_SLAI32S(volume3, 7); -#elif COMP_VOLUME_Q1_23 - /* No need to shift, Q1.23 is OK as such */ -#else -#error "Need CONFIG_COMP_VOLUME_Qx_y" #endif - /* Load the input sample */ - AE_LA16X4X2_IP(in_sample, in_sample1, inu, in); AE_MULF2P32X16X4RS(out_temp, out_temp1, volume, volume1, in_sample); /* Q9.23 to Q1.31 */ @@ -436,9 +432,10 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu /* Q9.23 to Q1.31 */ out_temp = AE_SLAI32S(out_temp, 8); out_temp1 = AE_SLAI32S(out_temp1, 8); - /* store the output */ out_sample1 = AE_ROUND16X4F32SSYM(out_temp, out_temp1); +#endif + /* store the output */ AE_SA16X4X2_IP(out_sample, out_sample1, outu, out); } AE_SA128POS_FP(outu, out); diff --git a/src/audio/volume/volume_hifi5_with_peakvol.c b/src/audio/volume/volume_hifi5_with_peakvol.c index 8d46e64f3b2e..16d2f7cb8bf9 100644 --- a/src/audio/volume/volume_hifi5_with_peakvol.c +++ b/src/audio/volume/volume_hifi5_with_peakvol.c @@ -21,6 +21,13 @@ LOG_MODULE_DECLARE(volume, CONFIG_SOF_LOG_LEVEL); #include "volume.h" +/* With Q1.31 x Q1.31 -> Q17.47 HiFi multiplications the result is + * Q8.16 x Q1.31 << 1 >> 16 -> Q9.32, shift left by 15 for Q17.47 + * + * With Q1.31 x Q1.31 -> Q17.47 HiFi multiplications the result is + * Q1.23 x Q1.31 << 1 >> 16 -> Q2.39, shift left by 8 for Q17.47 + */ + #if SOF_USE_HIFI(5, VOLUME) #if CONFIG_COMP_PEAK_VOL @@ -111,23 +118,26 @@ static void vol_s24_to_s24_s32(struct processing_module *mod, struct input_strea temp = AE_MAXABS32S(in_sample, temp); temp1 = AE_MAXABS32S(in_sample1, temp1); AE_S32X2X2_XC1(temp, temp1, peakvol, inc); + /* Multiply the input sample */ #if COMP_VOLUME_Q8_16 - AE_MULF2P32X4RS(out_sample, out_sample1, AE_SLAI32S(volume, 7), - AE_SLAI32S(volume1, 7), + AE_MULF2P32X4RS(out_sample, out_sample1, + AE_SLAI32S(volume, 7), AE_SLAI32S(volume1, 7), AE_SLAI32(in_sample, 8), AE_SLAI32(in_sample1, 8)); + out_sample = AE_SLAI32S(out_sample, 8); + out_sample1 = AE_SLAI32S(out_sample1, 8); #elif COMP_VOLUME_Q1_23 AE_MULF2P32X4RS(out_sample, out_sample1, volume, volume1, - AE_SLAI32(in_sample, 8), - AE_SLAI32(in_sample1, 8)); -#else -#error "Need CONFIG_COMP_VOLUME_Qx_y" + AE_SLAI32(in_sample, 8), AE_SLAI32(in_sample1, 8)); + out_sample = AE_SLAI32S(out_sample, 8); + out_sample1 = AE_SLAI32S(out_sample1, 8); +#elif COMP_VOLUME_Q1_31 + AE_MULF2P32X4RS(out_sample, out_sample1, volume, volume1, + AE_SLAI32(in_sample, 8), AE_SLAI32(in_sample1, 8)); #endif /* Shift for S24_LE */ - out_sample = AE_SLAI32S(out_sample, 8); out_sample = AE_SRAI32(out_sample, 8); - out_sample1 = AE_SLAI32S(out_sample1, 8); out_sample1 = AE_SRAI32(out_sample1, 8); /* Store the output sample */ @@ -238,8 +248,6 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea ae_int32x2 out_sample, out_sample1; ae_int32x2 volume, volume1; int i, n, m; - ae_int64 mult0; - ae_int64 mult1; ae_int32x4 *buf; ae_int32x4 *buf_end; ae_int32x4 *vol; @@ -296,34 +304,26 @@ static void vol_s32_to_s24_s32(struct processing_module *mod, struct input_strea temp = AE_MAXABS32S(in_sample, temp); temp1 = AE_MAXABS32S(in_sample1, temp1); AE_S32X2X2_XC1(temp, temp1, peakvol, inc); -#if COMP_VOLUME_Q8_16 - /* Q8.16 x Q1.31 << 1 -> Q9.48 */ - mult0 = AE_MULF32S_HH(volume, in_sample); - mult0 = AE_SRAI64(mult0, 1); /* Q9.47 */ - mult1 = AE_MULF32S_LL(volume, in_sample); - mult1 = AE_SRAI64(mult1, 1); - out_sample = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q9.47 -> Q1.31 */ - - mult0 = AE_MULF32S_HH(volume1, in_sample1); - mult0 = AE_SRAI64(mult0, 1); /* Q9.47 */ - mult1 = AE_MULF32S_LL(volume1, in_sample1); - mult1 = AE_SRAI64(mult1, 1); - out_sample1 = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q9.47 -> Q1.31 */ -#elif COMP_VOLUME_Q1_23 - /* Q1.23 x Q1.31 << 1 -> Q2.55 */ - mult0 = AE_MULF32S_HH(volume, in_sample); - mult0 = AE_SRAI64(mult0, 8); /* Q2.47 */ - mult1 = AE_MULF32S_LL(volume, in_sample); - mult1 = AE_SRAI64(mult1, 8); +#if COMP_VOLUME_Q1_31 + AE_MULF2P32X4RS(out_sample, out_sample1, + volume, volume1, + in_sample, in_sample1); +#else + /* With Q1.31 x Q1.31 -> Q17.47 HiFi multiplications the result is + * Q8.16 x Q1.31 << 1 >> 16 -> Q9.32, shift left by 15 for Q17.47 + * Q1.23 x Q1.31 << 1 >> 16 -> Q2.39, shift left by 8 for Q17.47 + */ + ae_f64 mult0, mult1; + + AE_MULF32X2R_HH_LL(mult0, mult1, volume, in_sample); + mult0 = AE_SLAI64(mult0, VOLUME_Q17_47_SHIFT); + mult1 = AE_SLAI64(mult1, VOLUME_Q17_47_SHIFT); out_sample = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q2.47 -> Q1.31 */ - mult0 = AE_MULF32S_HH(volume1, in_sample1); - mult0 = AE_SRAI64(mult0, 8); /* Q2.47 */ - mult1 = AE_MULF32S_LL(volume1, in_sample1); - mult1 = AE_SRAI64(mult1, 8); - out_sample1 = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q2.47 -> Q1.31 */ -#else -#error "Need CONFIG_COMP_VOLUME_Qx_y" + AE_MULF32X2R_HH_LL(mult0, mult1, volume1, in_sample1); + mult0 = AE_SLAI64(mult0, VOLUME_Q17_47_SHIFT); + mult1 = AE_SLAI64(mult1, VOLUME_Q17_47_SHIFT); + out_sample1 = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q2.47 -> Q1.31 */ #endif AE_SA32X2X2_IP(out_sample, out_sample1, outu, out); } @@ -478,17 +478,6 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu AE_L32X2X2_XC(volume, volume1, vol, inc); AE_L32X2X2_XC(volume2, volume3, vol, inc); -#if COMP_VOLUME_Q8_16 - /* Q8.16 to Q9.23 */ - volume = AE_SLAI32S(volume, 7); - volume1 = AE_SLAI32S(volume1, 7); - volume2 = AE_SLAI32S(volume2, 7); - volume3 = AE_SLAI32S(volume3, 7); -#elif COMP_VOLUME_Q1_23 - /* No need to shift, Q1.23 is OK as such */ -#else -#error "Need CONFIG_COMP_VOLUME_Qx_y" -#endif /* Load the input sample */ AE_LA16X4X2_IP(in_sample, in_sample1, inu, in); /* calculate the peak volume*/ @@ -502,7 +491,22 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu temp1 = AE_MAXABS32S(AE_SEXT32X2D16_10(in_sample1), temp1); AE_S32X2X2_XC1(temp, temp1, peakvol, inc); - /* Multiply the input sample */ +#if COMP_VOLUME_Q1_31 + AE_MULF2P32X16X4RS(out_temp, out_temp1, volume, volume1, in_sample); + out_sample = AE_ROUND16X4F32SSYM(out_temp, out_temp1); + AE_MULF2P32X16X4RS(out_temp, out_temp1, volume2, volume3, in_sample1); + out_sample1 = AE_ROUND16X4F32SSYM(out_temp, out_temp1); +#else +#if COMP_VOLUME_Q8_16 + /* Shift Q8.16 to Q9.23 + * No need to shift Q1.23, it is OK as such + */ + volume = AE_SLAI32S(volume, 7); + volume1 = AE_SLAI32S(volume1, 7); + volume2 = AE_SLAI32S(volume2, 7); + volume3 = AE_SLAI32S(volume3, 7); +#endif + AE_MULF2P32X16X4RS(out_temp, out_temp1, volume, volume1, in_sample); /* Q9.23 to Q1.31 */ out_temp = AE_SLAI32S(out_temp, 8); @@ -513,9 +517,9 @@ static void vol_s16_to_s16(struct processing_module *mod, struct input_stream_bu /* Q9.23 to Q1.31 */ out_temp = AE_SLAI32S(out_temp, 8); out_temp1 = AE_SLAI32S(out_temp1, 8); - /* store the output */ out_sample1 = AE_ROUND16X4F32SSYM(out_temp, out_temp1); - +#endif + /* store the output */ AE_SA16X4X2_IP(out_sample, out_sample1, outu, out); } AE_SA128POS_FP(outu, out); diff --git a/src/audio/volume/volume_ipc4.c b/src/audio/volume/volume_ipc4.c index 5db67be04cb5..3abc62bfab50 100644 --- a/src/audio/volume/volume_ipc4.c +++ b/src/audio/volume/volume_ipc4.c @@ -65,7 +65,11 @@ static int set_volume_ipc4(struct vol_data *cd, uint32_t const channel, */ static uint32_t convert_volume_ipc4_to_ipc3(struct comp_dev *dev, uint32_t volume) { +#if COMP_VOLUME_Q1_31 + return volume; +#else return sat_int32(Q_SHIFT_RND((int64_t)volume, 31, VOL_QXY_Y)); +#endif } static uint32_t convert_volume_ipc3_to_ipc4(uint32_t volume) @@ -73,7 +77,11 @@ static uint32_t convert_volume_ipc3_to_ipc4(uint32_t volume) /* In IPC4 volume is converted into Q1.23 format to be processed by firmware. * Now convert it back to Q1.31 */ +#if COMP_VOLUME_Q1_31 + return volume; +#else return sat_int32(Q_SHIFT_LEFT((int64_t)volume, VOL_QXY_Y, 31)); +#endif } static void init_ramp(struct vol_data *cd, uint32_t curve_duration, uint32_t target_volume)