thesofproject · kv2019i · Nov 18, 2024 · Oct 31, 2024 · Oct 30, 2024 · lyakh
@@ -280,52 +280,6 @@ int32_t sofm_exp_int32(int32_t x)
 	return AE_MOVAD32_L(AE_MOVINT32X2_FROMINT64(ts));
 }
 
-/* Fractional multiplication with shift and round
- * Note that the parameters px and py must be cast to (int64_t) if other type.
- */
-static inline int exp_hifi_q_multsr_32x32(int a, int b, int c, int d, int e)
-{
-	ae_int64 res;
-	int xt_o;
-	int shift;
-
-	res = AE_MUL32_LL(a, b);
-	shift = XT_SUB(XT_ADD(c, d), XT_ADD(e, 1));
-	res = AE_SRAA64(res, shift);
-	res = AE_ADD64(res, 1);
-	res = AE_SRAI64(res, 1);
-	xt_o = AE_MOVINT32_FROMINT64(res);
-
-	return xt_o;
-}
-
-/* A macro for Q-shifts */
-static inline int exp_hifi_q_shift_rnd(int a, int b, int c)
-{
-	ae_int32 res;
-	int shift;
-
-	shift = XT_SUB(b, XT_ADD(c, 1));
-	res = AE_SRAA32(a, shift);
-	res = AE_ADD32(res, 1);
-	res = AE_SRAI32(res, 1);
-
-	return res;
-}
-
-/* Alternative version since compiler does not allow (x >> -1) */
-static inline int exp_hifi_q_shift_left(int a, int b, int c)
-{
-	ae_int32 xt_o;
-	int shift;
-
-	shift = XT_SUB(c, b);
-	xt_o = AE_SLAA32(a, shift);
-
-	return xt_o;
-}
-
-#define q_mult(a, b, qa, qb, qy) ((int32_t)exp_hifi_q_multsr_32x32((int64_t)(a), b, qa, qb, qy))
 /* Fixed point exponent function for approximate range -11.5 .. 7.6
  * that corresponds to decibels range -100 .. +66 dB.
  *
@@ -341,33 +295,59 @@ static inline int exp_hifi_q_shift_left(int a, int b, int c)
 
 int32_t sofm_exp_fixed(int32_t x)
 {
-	int32_t xs;
-	int32_t y;
-	int32_t y0;
+	ae_f64 p;
+	ae_int32 y0;
+	ae_int32 y;
+	ae_int32 xs;
+	int32_t n;
+	int shift;
 	int i;
-	int n = 0;
 
 	if (x < SOFM_EXP_FIXED_INPUT_MIN)
 		return 0;
 
 	if (x > SOFM_EXP_FIXED_INPUT_MAX)
 		return INT32_MAX;
 
-	/* x is Q5.27 */
-	xs = x;
-	while (xs >= SOFM_EXP_TWO_Q27 || xs <= SOFM_EXP_MINUS_TWO_Q27) {
-		xs >>= 1;
-		n++;
-	}
+	/* This returns number of right shifts needed to scale value x to |x| < 2.
+	 * The behavior differs slightly for positive and negative values but it
+	 * is not problem for sofm_exp_int32() function. E.g.
+	 *
+	 * x =  268435455 (1.9999999925), shift = 0
+	 * x =  268435456 (2.0000000000), shift = 1
+	 * x =  268435457 (2.0000000075), shift = 1
+	 *
+	 * x = -268435457 (-2.0000000075), shift = 1
+	 * x = -268435456 (-2.0000000000), shift = 0
+	 * x = -268435455 (-1.9999999925), shift = 0
+	 *
+	 * If the shift is zero, just return result from sofm_exp_int32() with
+	 * input Q format and output Q format adjusts.
+	 */
+	shift = (int)AE_MAX32(0, 3 - AE_NSAZ32_L(x));
+	if (!shift)
+		return AE_SRAI32R(sofm_exp_int32(AE_MOVAD32_L(AE_SLAI32S(x, 1))), 3);
+
+	/* Shifting x one less right to save an additional Q27 to Q28 conversion
+	 * shift for sofm_exp_int32()
+	 */
+	n = 1 << shift;
+	xs = AE_SRAA32RS(x, shift - 1);
 
 	/* sofm_exp_int32() input is Q4.28, while x1 is Q5.27
 	 * sofm_exp_int32() output is Q9.23, while y0 is Q12.20
 	 */
-	y0 = exp_hifi_q_shift_rnd(sofm_exp_int32(exp_hifi_q_shift_left(xs, 27, 28)),
-				  23, 20);
+	y0 = AE_SRAI32R(sofm_exp_int32(xs), 3);
 	y = SOFM_EXP_ONE_Q20;
-	for (i = 0; i < (1 << n); i++)
-		y = (int32_t)exp_hifi_q_multsr_32x32((int64_t)y, y0, 20, 20, 20);
+
+	/* AE multiply returns Q41 from Q20 * Q20. To get Q20 it need to be
+	 * shifted right by 21. Since the used round instruction is aligned
+	 * to the high 32 bits it is shifted instead left by 32 - 21 = 11:
+	 */
+	for (i = 0; i < n; i++) {
+		p = AE_SLAI64S(AE_MULF32S_LL(y, y0), 11);
+		y = AE_ROUND32F64SASYM(p);
+	}
 
 	return y;
 }

@@ -5,6 +5,7 @@
  * Author: Shriram Shastry <malladi.sastry@linux.intel.com>
  */
 
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <stdarg.h>
@@ -25,7 +26,17 @@
 #define ULP_SCALE 0.0000999999999749
 #define NUMTESTSAMPLES 256
 
-static void gen_exp_testvector(double a, double b, double *r, int32_t *b_i);
+#define NUMTESTSAMPLES_MIDDLE_TEST2 100
+#define NUMTESTSAMPLES_SMALL_TEST2 100
+#define NUMTESTSAMPLES_LARGE_TEST2 100
+#define ABS_DELTA_TOLERANCE_TEST1 1.2e-2
+#define REL_DELTA_TOLERANCE_TEST1 2.8e-4
+#define ABS_DELTA_TOLERANCE_TEST2 1.7e-6
+#define REL_DELTA_TOLERANCE_TEST2 3.7e-2
+#define ABS_DELTA_TOLERANCE_TEST3 1.2e-1
+#define REL_DELTA_TOLERANCE_TEST3 7.7e-5
+#define SOFM_EXP_FIXED_ARG_MIN -11.5
+#define SOFM_EXP_FIXED_ARG_MAX 7.6245
 
 /* testvector in Q4.28, -5 to +5 */
 /*
@@ -52,7 +63,7 @@ static void gen_exp_testvector(double a, double b, double *r, int32_t *b_i)
 		*b_i = INT32_MAX;
 }
 
-static void test_math_arithmetic_exponential_fixed(void **state)
+static void test_function_sofm_exp_int32(void **state)
 {
 	(void)state;
 
@@ -81,10 +92,107 @@ static void test_math_arithmetic_exponential_fixed(void **state)
 	}
 }
 
+static void gen_exp_testvector_linspace_q27(double a, double b, int step_count,
+					    int point, int32_t *value)
+{
+	double fstep = (b - a) / (step_count - 1);
+	double fvalue = a + fstep * point;
+
+	*value = (int32_t)round(fvalue * 134217728.0);
+}
+
+static double ref_exp(double value)
+{
+	if (value < SOFM_EXP_FIXED_ARG_MIN)
+		return 0.0;
+	else if (value > SOFM_EXP_FIXED_ARG_MAX)
+		return 2048.0;
+	else
+		return exp(value);
+}
+
+static void test_exp_with_input_value(int32_t ivalue, int32_t *iexp_value,
+				      double *abs_delta_max, double *rel_delta_max,
+				      double abs_delta_tolerance, double rel_delta_tolerance)
+{
+	double fvalue, fexp_value, ref_exp_value;
+	double rel_delta, abs_delta;
+
+	*iexp_value = sofm_exp_fixed(ivalue);
+	fvalue = (double)ivalue / (1 << 27); /* Q5.27 */
+	fexp_value = (double)*iexp_value / (1 << 20); /* Q12.20 */
+	/* printf("val = %10.6f, exp = %12.6f\n", fvalue, fexp_value); */
+
+	ref_exp_value = ref_exp(fvalue);
+	abs_delta = fabs(ref_exp_value - fexp_value);
+	rel_delta = abs_delta / ref_exp_value;
+	if (abs_delta > *abs_delta_max)
+		*abs_delta_max = abs_delta;
+
+	if (rel_delta > *rel_delta_max)
+		*rel_delta_max = rel_delta;
+
+	if (abs_delta > abs_delta_tolerance) {
+		printf("%s: Absolute error %g exceeds limit %g, input %g output %g.\n",
+		       __func__, abs_delta, abs_delta_tolerance, fvalue, fexp_value);
+		assert_true(false);
+	}
+
+	if (rel_delta > rel_delta_tolerance) {
+		printf("%s: Relative error %g exceeds limit %g, input %g output %g.\n",
+		       __func__, rel_delta, rel_delta_tolerance, fvalue, fexp_value);
+		assert_true(false);
+	}
+}
+
+static void test_function_sofm_exp_fixed(void **state)
+{
+	(void)state;
+
+	double rel_delta_max, abs_delta_max;
+	int32_t ivalue, iexp_value;
+	int i;
+
+	rel_delta_max = 0;
+	abs_delta_max = 0;
+	for (i = 0; i < NUMTESTSAMPLES_MIDDLE_TEST2; i++) {
+		gen_exp_testvector_linspace_q27(-5, 5, NUMTESTSAMPLES_MIDDLE_TEST2, i, &ivalue);
+		test_exp_with_input_value(ivalue, &iexp_value, &abs_delta_max, &rel_delta_max,
+					  ABS_DELTA_TOLERANCE_TEST1, REL_DELTA_TOLERANCE_TEST1);
+
+	}
+
+	printf("%s: Absolute max error was %.6e (middle).\n", __func__, abs_delta_max);
+	printf("%s: Relative max error was %.6e (middle).\n", __func__, rel_delta_max);
+
+	rel_delta_max = 0;
+	abs_delta_max = 0;
+	for (i = 0; i < NUMTESTSAMPLES_SMALL_TEST2; i++) {
+		gen_exp_testvector_linspace_q27(-16, -5, NUMTESTSAMPLES_SMALL_TEST2, i, &ivalue);
+		test_exp_with_input_value(ivalue, &iexp_value, &abs_delta_max, &rel_delta_max,
+					  ABS_DELTA_TOLERANCE_TEST2, REL_DELTA_TOLERANCE_TEST2);
+	}
+
+	printf("%s: Absolute max error was %.6e (small).\n", __func__, abs_delta_max);
+	printf("%s: Relative max error was %.6e (small).\n", __func__, rel_delta_max);
+
+	rel_delta_max = 0;
+	abs_delta_max = 0;
+	for (i = 0; i < NUMTESTSAMPLES_LARGE_TEST2; i++) {
+		gen_exp_testvector_linspace_q27(5, 15.9999, NUMTESTSAMPLES_LARGE_TEST2, i, &ivalue);
+		test_exp_with_input_value(ivalue, &iexp_value, &abs_delta_max, &rel_delta_max,
+					  ABS_DELTA_TOLERANCE_TEST3, REL_DELTA_TOLERANCE_TEST3);
+	}
+
+	printf("%s: Absolute max error was %.6e (large).\n", __func__, abs_delta_max);
+	printf("%s: Relative max error was %.6e (large).\n", __func__, rel_delta_max);
+}
+
 int main(void)
 {
 	const struct CMUnitTest tests[] = {
-		cmocka_unit_test(test_math_arithmetic_exponential_fixed)
+		cmocka_unit_test(test_function_sofm_exp_int32),
+		cmocka_unit_test(test_function_sofm_exp_fixed),
 	};
 
 	cmocka_set_message_output(CM_OUTPUT_TAP);