diff --git a/src/audio/eq_fir/eq_fir_hifi2ep.c b/src/audio/eq_fir/eq_fir_hifi2ep.c
index 2a687623ea51..71b40edcee32 100644
--- a/src/audio/eq_fir/eq_fir_hifi2ep.c
+++ b/src/audio/eq_fir/eq_fir_hifi2ep.c
@@ -61,8 +61,7 @@ void eq_fir_2x_s32(struct fir_state_32x16 fir[], struct input_stream_buffer *bso
 		for (i = 0; i < (frames >> 1); i++) {
 			x1 = x0 + nch;
 			y1 = y0 + nch;
-			fir_32x16_2x_hifiep(f, *x0, *x1, y0, y1,
-					    lshift, rshift);
+			fir_32x16_2x(f, *x0, *x1, y0, y1, lshift, rshift);
 			x0 += inc;
 			y0 += inc;
 		}
@@ -107,8 +106,7 @@ void eq_fir_2x_s24(struct fir_state_32x16 fir[], struct input_stream_buffer *bso
 		for (i = 0; i < (frames >> 1); i++) {
 			x1 = x0 + nch;
 			y1 = y0 + nch;
-			fir_32x16_2x_hifiep(f, *x0 << 8, *x1 << 8, &z0, &z1,
-					    lshift, rshift);
+			fir_32x16_2x(f, *x0 << 8, *x1 << 8, &z0, &z1, lshift, rshift);
 			*y0 = sat_int24(Q_SHIFT_RND(z0, 31, 23));
 			*y1 = sat_int24(Q_SHIFT_RND(z1, 31, 23));
 			x0 += inc;
@@ -155,8 +153,7 @@ void eq_fir_2x_s16(struct fir_state_32x16 fir[], struct input_stream_buffer *bso
 		for (i = 0; i < (frames >> 1); i++) {
 			x1 = x0 + nch;
 			y1 = y0 + nch;
-			fir_32x16_2x_hifiep(f, *x0 << 16, *x1 << 16, &z0, &z1,
-					    lshift, rshift);
+			fir_32x16_2x(f, *x0 << 16, *x1 << 16, &z0, &z1, lshift, rshift);
 			*y0 = sat_int16(Q_SHIFT_RND(z0, 31, 15));
 			*y1 = sat_int16(Q_SHIFT_RND(z1, 31, 15));
 			x0 += inc;
diff --git a/src/audio/eq_fir/eq_fir_hifi3.c b/src/audio/eq_fir/eq_fir_hifi3.c
index 638b296a74c4..0d25851ea447 100644
--- a/src/audio/eq_fir/eq_fir_hifi3.c
+++ b/src/audio/eq_fir/eq_fir_hifi3.c
@@ -70,7 +70,7 @@ void eq_fir_2x_s32(struct fir_state_32x16 fir[], struct input_stream_buffer *bso
 				/* Load two input samples via input pointer x */
 				AE_L32_XP(d0, x, inc_nch_s);
 				AE_L32_XP(d1, x, inc_nch_s);
-				fir_32x16_2x_hifi3(f, d0, d1, y0, y1, shift);
+				fir_32x16_2x(f, d0, d1, y0, y1, shift);
 				AE_L32_XC(d0, y0, inc_2nch_s);
 				AE_L32_XC(d1, y1, inc_2nch_s);
 			}
@@ -131,7 +131,7 @@ void eq_fir_2x_s24(struct fir_state_32x16 fir[], struct input_stream_buffer *bso
 				d0 = AE_SLAA32(d0, 8);
 				d1 = AE_SLAA32(d1, 8);
 
-				fir_32x16_2x_hifi3(f, d0, d1,  &z0, &z1, shift);
+				fir_32x16_2x(f, d0, d1,  &z0, &z1, shift);
 
 				/* Shift and round to Q1.23 format */
 				d0 = AE_SRAI32R(z0, 8);
@@ -205,7 +205,7 @@ void eq_fir_2x_s16(struct fir_state_32x16 fir[], struct input_stream_buffer *bso
 				x0 = AE_CVT32X2F16_32(d0);
 				x1 = AE_CVT32X2F16_32(d1);
 
-				fir_32x16_2x_hifi3(f, x0, x1,  &z0, &z1, shift);
+				fir_32x16_2x(f, x0, x1,  &z0, &z1, shift);
 
 				/* Round to Q1.15 format */
 				d0 = AE_ROUND16X4F32SSYM(z0, z0);
diff --git a/src/audio/tdfb/tdfb_hifi3.c b/src/audio/tdfb/tdfb_hifi3.c
index 77225ceefbeb..f5484d337395 100644
--- a/src/audio/tdfb/tdfb_hifi3.c
+++ b/src/audio/tdfb/tdfb_hifi3.c
@@ -76,8 +76,7 @@ void tdfb_fir_s16(struct tdfb_comp_data *cd, struct input_stream_buffer *bsource
 
 				/* Compute FIR and mix as Q5.27*/
 				fir_core_setup_circular(f);
-				fir_32x16_2x_hifi3(f, cd->in[is], cd->in[is2], &y0, &y1,
-						   shift);
+				fir_32x16_2x(f, cd->in[is], cd->in[is2], &y0, &y1, shift);
 				for (k = 0; k < out_nch; k++) {
 					if (om & 1) {
 						cd->out[k] += (int32_t)y0 >> 4;
@@ -167,8 +166,7 @@ void tdfb_fir_s24(struct tdfb_comp_data *cd, struct input_stream_buffer *bsource
 
 				/* Compute FIR and mix as Q5.27*/
 				fir_core_setup_circular(f);
-				fir_32x16_2x_hifi3(f, cd->in[is], cd->in[is2], &y0, &y1,
-						   shift);
+				fir_32x16_2x(f, cd->in[is], cd->in[is2], &y0, &y1, shift);
 				for (k = 0; k < out_nch; k++) {
 					if (om & 1) {
 						cd->out[k] += (int32_t)y0 >> 4;
@@ -257,8 +255,7 @@ void tdfb_fir_s32(struct tdfb_comp_data *cd, struct input_stream_buffer *bsource
 
 				/* Compute FIR and mix as Q5.27*/
 				fir_core_setup_circular(f);
-				fir_32x16_2x_hifi3(f, cd->in[is], cd->in[is2], &y0, &y1,
-						   shift);
+				fir_32x16_2x(f, cd->in[is], cd->in[is2], &y0, &y1, shift);
 				for (k = 0; k < out_nch; k++) {
 					if (om & 1) {
 						cd->out[k] += (int32_t)y0 >> 4;
diff --git a/src/audio/tdfb/tdfb_hifiep.c b/src/audio/tdfb/tdfb_hifiep.c
index 1a8cdc4e397a..d6fea124b542 100644
--- a/src/audio/tdfb/tdfb_hifiep.c
+++ b/src/audio/tdfb/tdfb_hifiep.c
@@ -42,7 +42,7 @@ static inline void tdfb_core(struct tdfb_comp_data *cd, int in_nch, int out_nch)
 		fir_hifiep_setup_circular(f);
 		fir_get_lrshifts(f, &lshift, &rshift);
 		/* Process two samples */
-		fir_32x16_2x_hifiep(f, cd->in[is], cd->in[is2], &y0, &y1, lshift, rshift);
+		fir_32x16_2x(f, cd->in[is], cd->in[is2], &y0, &y1, lshift, rshift);
 		/* Mix as Q5.27 */
 		for (k = 0; k < out_nch; k++) {
 			if (om & 1) {
diff --git a/src/include/sof/math/fir_hifi2ep.h b/src/include/sof/math/fir_hifi2ep.h
index 85561c770c23..f3625104b3dc 100644
--- a/src/include/sof/math/fir_hifi2ep.h
+++ b/src/include/sof/math/fir_hifi2ep.h
@@ -30,7 +30,6 @@ struct fir_state_32x16 {
 	ae_p16x2s *coef; /* Pointer to FIR coefficients */
 	int taps; /* Number of FIR taps */
 	int length; /* Number of FIR taps plus input length (even) */
-	int in_shift; /* Amount of right shifts at input */
 	int out_shift; /* Amount of right shifts at output */
 };
 
@@ -53,10 +52,10 @@ static inline void fir_hifiep_setup_circular(struct fir_state_32x16 *fir)
 void fir_get_lrshifts(struct fir_state_32x16 *fir, int *lshift,
 		      int *rshift);
 
-void fir_32x16_hifiep(struct fir_state_32x16 *fir, int32_t x, int32_t *y, int lshift, int rshift);
+void fir_32x16(struct fir_state_32x16 *fir, int32_t x, int32_t *y, int lshift, int rshift);
 
-void fir_32x16_2x_hifiep(struct fir_state_32x16 *fir, int32_t x0, int32_t x1,
-			 int32_t *y0, int32_t *y1, int lshift, int rshift);
+void fir_32x16_2x(struct fir_state_32x16 *fir, int32_t x0, int32_t x1,
+		  int32_t *y0, int32_t *y1, int lshift, int rshift);
 
 #endif
 #endif /* __SOF_MATH_FIR_HIFI2EP_H__ */
diff --git a/src/include/sof/math/fir_hifi3.h b/src/include/sof/math/fir_hifi3.h
index 1a9e626247d7..abcf46df5f6f 100644
--- a/src/include/sof/math/fir_hifi3.h
+++ b/src/include/sof/math/fir_hifi3.h
@@ -28,7 +28,6 @@ struct fir_state_32x16 {
 	ae_f16x4 *coef; /* Pointer to FIR coefficients */
 	int taps; /* Number of FIR taps */
 	int length; /* Number of FIR taps plus input length (even) */
-	int in_shift; /* Amount of right shifts at input */
 	int out_shift; /* Amount of right shifts at output */
 };
 
@@ -55,14 +54,12 @@ static inline void fir_comp_setup_circular(const struct audio_stream *buffer)
 	AE_SETCEND0(audio_stream_get_end_addr(buffer));
 }
 
-void fir_get_lrshifts(struct fir_state_32x16 *fir, int *lshift,
-		      int *rshift);
+void fir_get_lrshifts(struct fir_state_32x16 *fir, int *lshift, int *rshift);
 
-void fir_32x16_hifi3(struct fir_state_32x16 *fir, ae_int32 x, ae_int32 *y,
-		     int shift);
+void fir_32x16(struct fir_state_32x16 *fir, ae_int32 x, ae_int32 *y, int shift);
 
-void fir_32x16_2x_hifi3(struct fir_state_32x16 *fir, ae_int32 x0, ae_int32 x1,
-			ae_int32 *y0, ae_int32 *y1, int shift);
+void fir_32x16_2x(struct fir_state_32x16 *fir, ae_int32 x0, ae_int32 x1,
+		  ae_int32 *y0, ae_int32 *y1, int shift);
 
 #endif
 #endif /* __SOF_MATH_FIR_HIFI3_H__ */
diff --git a/src/math/CMakeLists.txt b/src/math/CMakeLists.txt
index 9b506eb0b85e..ef0c409b69a6 100644
--- a/src/math/CMakeLists.txt
+++ b/src/math/CMakeLists.txt
@@ -29,7 +29,7 @@ add_local_sources_ifdef(CONFIG_POWER_FIXED sof power.c)
 
 add_local_sources_ifdef(CONFIG_BINARY_LOGARITHM_FIXED sof base2log.c)
 
-add_local_sources_ifdef(CONFIG_MATH_FIR sof fir_generic.c fir_hifi2ep.c fir_hifi3.c)
+add_local_sources_ifdef(CONFIG_MATH_FIR sof fir_generic.c fir_hifi2ep.c fir_hifi3.c fir_hifi5.c)
 
 if(CONFIG_MATH_FFT)
 	add_subdirectory(fft)
diff --git a/src/math/fir_hifi2ep.c b/src/math/fir_hifi2ep.c
index 2172e3e98d53..1dd03d7c1485 100644
--- a/src/math/fir_hifi2ep.c
+++ b/src/math/fir_hifi2ep.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: BSD-3-Clause
 //
-// Copyright(c) 2017 Intel Corporation. All rights reserved.
+// Copyright(c) 2017-2025 Intel Corporation.
 //
 // Author: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
 
@@ -89,7 +89,7 @@ EXPORT_SYMBOL(fir_get_lrshifts);
  * 8x 48 bit registers in register file P
  */
 
-void fir_32x16_hifiep(struct fir_state_32x16 *fir, int32_t x, int32_t *y, int lshift, int rshift)
+void fir_32x16(struct fir_state_32x16 *fir, int32_t x, int32_t *y, int lshift, int rshift)
 {
 	/* This function uses
 	 * 1x 56 bit registers Q,
@@ -163,8 +163,8 @@ EXPORT_SYMBOL(fir_32x16_hifiep);
  * 8x 48 bit registers in register file P
  */
 
-void fir_32x16_2x_hifiep(struct fir_state_32x16 *fir, int32_t x0, int32_t x1,
-			 int32_t *y0, int32_t *y1, int lshift, int rshift)
+void fir_32x16_2x(struct fir_state_32x16 *fir, int32_t x0, int32_t x1,
+		  int32_t *y0, int32_t *y1, int lshift, int rshift)
 {
 	/* This function uses
 	 * 2x 56 bit registers Q,
diff --git a/src/math/fir_hifi3.c b/src/math/fir_hifi3.c
index a485bf235f79..dd3e0edf8bde 100644
--- a/src/math/fir_hifi3.c
+++ b/src/math/fir_hifi3.c
@@ -1,13 +1,13 @@
 // SPDX-License-Identifier: BSD-3-Clause
 //
-// Copyright(c) 2017 Intel Corporation. All rights reserved.
+// Copyright(c) 2017-2025 Intel Corporation.
 //
 // Author: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
 
 #include <sof/math/fir_config.h>
 #include <sof/common.h>
 
-#if SOF_USE_MIN_HIFI(3, FILTER)
+#if SOF_USE_HIFI(3, FILTER) || SOF_USE_HIFI(4, FILTER)
 
 #include <sof/audio/buffer.h>
 #include <sof/math/fir_hifi3.h>
@@ -90,8 +90,7 @@ EXPORT_SYMBOL(fir_get_lrshifts);
  * 8x 48 bit registers in register file P
  */
 
-void fir_32x16_hifi3(struct fir_state_32x16 *fir, ae_int32 x, ae_int32 *y,
-		     int shift)
+void fir_32x16(struct fir_state_32x16 *fir, ae_int32 x, ae_int32 *y, int shift)
 {
 	/* This function uses
 	 * 1x 56 bit registers Q,
@@ -162,15 +161,15 @@ void fir_32x16_hifi3(struct fir_state_32x16 *fir, ae_int32 x, ae_int32 *y,
 	a = AE_SLAA64S(a, shift);
 	AE_S32_L_I(AE_ROUND32F48SSYM(a), (ae_int32 *)y, 0);
 }
-EXPORT_SYMBOL(fir_32x16_hifi3);
+EXPORT_SYMBOL(fir_32x16);
 
 /* HiFi EP has the follow number of reqisters that should not be exceeded
  * 4x 56 bit registers in register file Q
  * 8x 48 bit registers in register file P
  */
 
-void fir_32x16_2x_hifi3(struct fir_state_32x16 *fir, ae_int32 x0, ae_int32 x1,
-			ae_int32 *y0, ae_int32 *y1, int shift)
+void fir_32x16_2x(struct fir_state_32x16 *fir, ae_int32 x0, ae_int32 x1,
+		  ae_int32 *y0, ae_int32 *y1, int shift)
 {
 	/* This function uses
 	 * 2x 56 bit registers Q,
@@ -252,6 +251,6 @@ void fir_32x16_2x_hifi3(struct fir_state_32x16 *fir, ae_int32 x0, ae_int32 x1,
 	AE_S32_L_I(AE_ROUND32F48SSYM(b), (ae_int32 *)y1, 0);
 	AE_S32_L_I(AE_ROUND32F48SSYM(a), (ae_int32 *)y0, 0);
 }
-EXPORT_SYMBOL(fir_32x16_2x_hifi3);
+EXPORT_SYMBOL(fir_32x16_2x);
 
 #endif
diff --git a/src/math/fir_hifi5.c b/src/math/fir_hifi5.c
new file mode 100644
index 000000000000..d4b699ceabb2
--- /dev/null
+++ b/src/math/fir_hifi5.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: BSD-3-Clause
+//
+// Copyright(c) 2017-2025 Intel Corporation.
+//
+// Author: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
+
+#include <sof/math/fir_config.h>
+#include <sof/common.h>
+
+#if SOF_USE_MIN_HIFI(5, FILTER)
+
+#include <sof/audio/buffer.h>
+#include <sof/math/fir_hifi3.h>
+#include <user/fir.h>
+#include <xtensa/config/defs.h>
+#include <xtensa/tie/xt_hifi5.h>
+#include <rtos/symbol.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/*
+ * EQ FIR algorithm code
+ */
+
+void fir_reset(struct fir_state_32x16 *fir)
+{
+	fir->taps = 0;
+	fir->length = 0;
+	fir->out_shift = 0;
+	fir->coef = NULL;
+	/* There may need to know the beginning of dynamic allocation after
+	 * reset so omitting setting also fir->delay to NULL.
+	 */
+}
+EXPORT_SYMBOL(fir_reset);
+
+int fir_delay_size(struct sof_fir_coef_data *config)
+{
+	/* Check FIR tap count for implementation specific constraints */
+	if (config->length > SOF_FIR_MAX_LENGTH || config->length < 4)
+		return -EINVAL;
+
+	/* The optimization requires the tap count to be multiple of four */
+	if (config->length & 0x3)
+		return -EINVAL;
+
+	/* The dual sample version needs one more delay entry. To preserve
+	 * align for 64 bits need to add two.
+	 */
+	return (config->length + 2) * sizeof(int32_t);
+}
+EXPORT_SYMBOL(fir_delay_size);
+
+int fir_init_coef(struct fir_state_32x16 *fir,
+		  struct sof_fir_coef_data *config)
+{
+	/* The length is taps plus two since the filter computes two
+	 * samples per call. Length plus one would be minimum but the add
+	 * must be even. The even length is needed for 64 bit loads from delay
+	 * lines with 32 bit samples.
+	 */
+	fir->taps = (int)config->length;
+	fir->length = fir->taps + 2;
+	fir->out_shift = (int)config->out_shift;
+	fir->coef = (ae_f16x4 *)&config->coef[0];
+	return 0;
+}
+EXPORT_SYMBOL(fir_init_coef);
+
+void fir_init_delay(struct fir_state_32x16 *fir, int32_t **data)
+{
+	fir->delay = (ae_int32 *)*data;
+	fir->delay_end = fir->delay + fir->length;
+	fir->rwp = (ae_int32 *)(fir->delay + fir->length - 1);
+	*data += fir->length; /* Point to next delay line start */
+}
+EXPORT_SYMBOL(fir_init_delay);
+
+void fir_get_lrshifts(struct fir_state_32x16 *fir, int *lshift,
+		      int *rshift)
+{
+	*lshift = (fir->out_shift < 0) ? -fir->out_shift : 0;
+	*rshift = (fir->out_shift > 0) ? fir->out_shift : 0;
+}
+EXPORT_SYMBOL(fir_get_lrshifts);
+
+void fir_32x16(struct fir_state_32x16 *fir, ae_int32 x, ae_int32 *y, int shift)
+{
+	/* This function uses
+	 * 1x 56 bit registers Q,
+	 * 4x 48 bit registers P
+	 * 3x integers
+	 * 2x address pointers,
+	 */
+	ae_f64 a;
+	ae_valign u;
+	ae_f32x2 data2;
+	ae_f16x4 coefs;
+	ae_f32x2 d0;
+	ae_f32x2 d1;
+	int i;
+	ae_int32 *dp = fir->rwp;
+	ae_int16x4 *coefp = (ae_int16x4 *)fir->coef;
+	const int taps_div_4 = fir->taps >> 2;
+	const int inc = sizeof(int32_t);
+
+	/* Bypass samples if taps count is zero. */
+	if (!taps_div_4) {
+		*y = x;
+		return;
+	}
+
+	/* Write sample to delay */
+	AE_S32_L_XC(x, fir->rwp, -sizeof(int32_t));
+
+	/* Prime the coefficients stream */
+	u = AE_LA64_PP(coefp);
+
+	/* Note: If the next function is converted to handle two samples
+	 * per call the data load can be done with single instruction
+	 * AE_LP24X2F_C(data2, dp, sizeof(ae_p24x2f));
+	 */
+	a = AE_ZEROQ56();
+	for (i = 0; i < taps_div_4; i++) {
+		/* Load four coefficients. Coef_3 contains tap h[n],
+		 * coef_2 contains h[n+1], coef_1 contains h[n+2], and
+		 * coef_0 contains h[n+3];
+		 */
+		AE_LA16X4_IP(coefs, u, coefp);
+
+		/* Load two data samples and pack to d0 to data2_h and
+		 * d1 to data2_l.
+		 */
+		AE_L32_XC(d0, dp, inc);
+		AE_L32_XC(d1, dp, inc);
+		data2 = AE_SEL32_LL(d0, d1);
+
+		/* Accumulate
+		 * a += data2_h * coefs_3 + data2_l * coefs_2. The Q1.31
+		 * data and Q1.15 coefficients are used as 24 bits as
+		 * Q1.23 values.
+		 */
+		AE_MULAAFD32X16_H3_L2(a, data2, coefs);
+
+		/* Repeat the same for next two taps and increase coefp.
+		 * a += data2_h * coefs_1 + data2_l * coefs_0.
+		 */
+		AE_L32_XC(d0, dp, inc);
+		AE_L32_XC(d1, dp, inc);
+		data2 = AE_SEL32_LL(d0, d1);
+		AE_MULAAFD32X16_H1_L0(a, data2, coefs);
+	}
+
+	/* Do scaling shifts and store sample. */
+	a = AE_SLAA64S(a, shift);
+	AE_S32_L_I(AE_ROUND32F48SSYM(a), (ae_int32 *)y, 0);
+}
+EXPORT_SYMBOL(fir_32x16);
+
+void fir_32x16_2x(struct fir_state_32x16 *fir, ae_int32 x0, ae_int32 x1,
+		  ae_int32 *y0, ae_int32 *y1, int shift)
+{
+	/* This function uses
+	 * 7x 64 bit AE registers
+	 * 3x integers
+	 * 2x address pointers,
+	 */
+	ae_valign u;
+	ae_f64 a = AE_ZERO64();
+	ae_f64 b = AE_ZERO64();
+	ae_f32x2 d0;
+	ae_f32x2 d1;
+	ae_f32x2 d2;
+	ae_f16x4 coefs;
+	ae_f32x2 *dp;
+	ae_f16x4 *coefp = fir->coef;
+	const int taps_div_4 = fir->taps >> 2;
+	const int inc = 2 * sizeof(int32_t);
+	int i;
+
+	/* Bypass samples if taps count is zero. */
+	if (!taps_div_4) {
+		*y0 = x0;
+		*y1 = x1;
+		return;
+	}
+
+	/* Write samples to delay */
+	AE_S32_L_XC(x0, fir->rwp, -sizeof(int32_t));
+	dp = (ae_f32x2 *)fir->rwp;
+	AE_S32_L_XC(x1, fir->rwp, -sizeof(int32_t));
+
+	/* Prime the coefficients stream */
+	u = AE_LA64_PP(coefp);
+
+	/* Load two samples, two newest samples and proceed
+	 * to elder input samples in delay line.
+	 */
+	AE_L32X2_XC(d0, dp, inc);
+	for (i = 0; i < taps_div_4; i++) {
+		/* Load four coefficients. Coef_3 contains tap h[n],
+		 * coef_2 contains h[n+1], coef_1 contains h[n+2], and
+		 * coef_0 contains h[n+3];
+		 */
+		AE_LA16X4_IP(coefs, u, coefp);
+
+		/* Load two data samples more.
+		 * d0.H is x[n] the newest sample
+		 * d0.L is x[n-1]
+		 * d1.H is x[n-2]
+		 * d1.L is x[n-3]
+		 * d2.H is x[n-4]
+		 */
+		AE_L32X2_XC(d1, dp, inc);
+		AE_L32X2_XC(d2, dp, inc);
+
+		/* Calculate four FIR taps for current (x1 -> a) and previous input (x0 -> b)
+		 * b = b  + d0.H * c.3  + d0.L * c.2  + d1.H * c.1  + d1.L * c.0
+		 * a = a  + d0.L * c.3  + d1.H * c.2  + d1.L * c.1  + d2.H * c.0
+		 */
+		AE_MULA2Q32X16_FIR_H(b, a, d0, d1, d2, coefs);
+
+		/* Prepare for next four taps, d2 overlaps to next loop iteration as d0 */
+		d0 = d2;
+	}
+
+	/* Shift left by one Q1.31 x Q1.15 -> Q2.46 format for Q2.47 round and
+	 * store output samples.
+	 */
+	b = AE_SLAA64S(b, shift + 1);
+	a = AE_SLAA64S(a, shift + 1);
+	d0 = AE_ROUND32X2F48SASYM(b, a);
+	AE_S32_H_I(d0, (ae_int32 *)y1, 0);
+	AE_S32_L_I(d0, (ae_int32 *)y0, 0);
+}
+EXPORT_SYMBOL(fir_32x16_2x);
+
+#endif
diff --git a/src/math/fir_llext/CMakeLists.txt b/src/math/fir_llext/CMakeLists.txt
index 9b8d2531bea3..39bb2d8ce89b 100644
--- a/src/math/fir_llext/CMakeLists.txt
+++ b/src/math/fir_llext/CMakeLists.txt
@@ -6,5 +6,6 @@ sof_llext_build("fir"
 		../fir_generic.c
 		../fir_hifi2ep.c
 		../fir_hifi3.c
+		../fir_hifi5.c
 	LIB openmodules
 )
diff --git a/zephyr/CMakeLists.txt b/zephyr/CMakeLists.txt
index 54face8b974d..b387fc011de1 100644
--- a/zephyr/CMakeLists.txt
+++ b/zephyr/CMakeLists.txt
@@ -691,6 +691,7 @@ elseif(CONFIG_MATH_FIR)
 		${SOF_MATH_PATH}/fir_generic.c
 		${SOF_MATH_PATH}/fir_hifi2ep.c
 		${SOF_MATH_PATH}/fir_hifi3.c
+		${SOF_MATH_PATH}/fir_hifi5.c
 	)
 endif()