From b4db29e2d726c315ceeb59543023bf965191c58a Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sun, 12 Jan 2025 18:33:02 -0800 Subject: [PATCH 01/25] add main logic --- inc/zoo/swar/associative_iteration.h | 73 ++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 00bc9e7c..55767d74 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -510,6 +510,79 @@ constexpr auto halvePrecision(SWAR even, SWAR odd) { return evenHalf | oddHalf; } + +template struct MultiplicationResult { + SWAR result; + BooleanSWAR overflowed; +}; + +// static_assert([] { +// using D = SWAR<8, u32>; +// using S = SWAR<4, u32>; +// using H = SWAR<2, u32>; +// constexpr auto UpperHalfOfLanes = S::oddLaneMask().value(); +// static_assert(UpperHalfOfLanes == 0xF0F0'F0F0); +// return true; +// }()); + +template +constexpr MultiplicationResult +fullMultiplication(SWAR multiplicand, SWAR multiplier) { + using S = SWAR; using D = SWAR; + + auto [l_even, l_odd] = doublePrecision(multiplicand); + auto [r_even, r_odd] = doublePrecision(multiplier); + auto res_even = multiplication_OverflowUnsafe(l_even, r_even); + auto res_odd = multiplication_OverflowUnsafe(l_odd, r_odd); + + // Into the double precision world + constexpr auto HalfLane = S::NBits; + constexpr auto UpperHalfOfLanes = SWAR::oddLaneMask().value(); + auto res = halvePrecision(res_even, res_odd); + + auto over_even = D{(res_even.value() & UpperHalfOfLanes) >> HalfLane}; + auto over_odd = D{(res_odd.value() & UpperHalfOfLanes) >> HalfLane}; + auto overflow_values = halvePrecision(over_even, over_odd); + + // back to normal precision world + auto did_overflow = ~(zoo::swar::equals(overflow_values, S{0})); + + return {res, did_overflow}; +} + +using S = SWAR<4, u32>; + +static_assert(S::oddLaneMask().value() == 0xF0F0'F0F0); +static_assert(S::evenLaneMask().value() == 0x0F0F'0F0F); + +static_assert(fullMultiplication(S{0x0009'0000}, S{0x0009'0000}) + .result.value() == 0x0001'0000); +static_assert(fullMultiplication(S{0x0003'0000}, S{0x0007'0000}) + .result.value() == 0x0005'0000); + +static_assert(fullMultiplication(S{0x0002'0000}, S{0x0008'0000}) + .overflowed.value() == 0x0008'0000); + +static_assert(fullMultiplication(S{0x0008'0000}, S{0x0008'0000}) + .overflowed.value() == 0x0008'0000); + +static_assert(fullMultiplication(S{0x0001'0000}, S{0x0008'0000}) + .overflowed.value() == 0x0000'0000); + +// static_assert([] { +// // fullMultiplication(S{0x0008'0012}, S{0x0007'0032}).result.value() +// ==0x0008'0034 auto r = fullMultiplication(S{0x0003'0012}, +// S{0x0003'0032}); if (r.result.value() != 0x0009'0034) { return false; } +// if (r.overflow.value() != 0x0000'0000) { return false; } +// return true; +// }()); + +static_assert(fullMultiplication(S{0x0008'0012}, S{0x0007'0032}) + .result.value() == 0x0008'0034); + +static_assert(fullMultiplication(S{0x0008'0012}, S{0x0007'0032}) + .result.value() == 0x0008'0034); + } #endif From 1261006ee1885ca108227bda0fdfc211b90ef623 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sun, 12 Jan 2025 18:33:44 -0800 Subject: [PATCH 02/25] add even lane mask --- inc/zoo/swar/SWAR.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 60ba9540..7978005c 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -70,6 +70,8 @@ constexpr __uint128_t lsbIndex(__uint128_t v) noexcept { } #endif + + /// Core abstraction around SIMD Within A Register (SWAR). Specifies 'lanes' /// of NBits width against a type T, and provides an abstraction for performing /// SIMD operations against that primitive type T treated as a SIMD register. @@ -108,6 +110,17 @@ struct SWAR { return result; } + constexpr static auto evenLaneMask() { + using S = SWAR; + static_assert(0 == S::Lanes % 2, "Only even number of elements supported"); + using D = SWAR; + return S{(D::LeastSignificantBit << S::NBits) - D::LeastSignificantBit}; + } + + constexpr static auto oddLaneMask() { + return SWAR{static_cast(~evenLaneMask().value())}; + } + template constexpr static auto from(const Range &values) noexcept { using std::begin; using std::end; @@ -595,4 +608,7 @@ static_assert( 0x0706050403020100ull ); +static_assert(SWAR<4, u16>::evenLaneMask().value() == 0b0000'1111'0000'1111); +static_assert(SWAR<4, u16>::oddLaneMask().value() == 0b1111'0000'1111'0000); + }} From f976ae40c43250348ead0e10c4db5bb74393d659 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sun, 12 Jan 2025 18:34:42 -0800 Subject: [PATCH 03/25] even/odd lane mask --- inc/zoo/swar/associative_iteration.h | 12 ++---------- test/swar/BasicOperations.cpp | 2 +- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 55767d74..9972900e 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -475,14 +475,6 @@ struct SWAR_Pair{ SWAR even, odd; }; -template -constexpr SWAR doublingMask() { - using S = SWAR; - static_assert(0 == S::Lanes % 2, "Only even number of elements supported"); - using D = SWAR; - return S{(D::LeastSignificantBit << NB) - D::LeastSignificantBit}; -} - template constexpr auto doublePrecision(SWAR input) { using S = SWAR; @@ -491,7 +483,7 @@ constexpr auto doublePrecision(SWAR input) { "Precision can only be doubled for SWARs of even element count" ); using RV = SWAR; - constexpr auto DM = doublingMask(); + constexpr auto DM = SWAR::evenLaneMask(); return SWAR_Pair{ RV{(input & DM).value()}, RV{(input.value() >> NB) & DM.value()} @@ -503,7 +495,7 @@ constexpr auto halvePrecision(SWAR even, SWAR odd) { using S = SWAR; static_assert(0 == NB % 2, "Only even lane-bitcounts supported"); using RV = SWAR; - constexpr auto HalvingMask = doublingMask(); + constexpr auto HalvingMask = SWAR::evenLaneMask(); auto evenHalf = RV{even.value()} & HalvingMask, oddHalf = RV{(RV{odd.value()} & HalvingMask).value() << NB/2}; diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 602384ae..dec368a4 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -211,7 +211,7 @@ static_assert(BooleanSWAR{Literals<4, u16>, namespace Multiplication { static_assert(~int64_t(0) == negate(S4_64{S4_64::LeastSignificantBit}).value()); -static_assert(0x0F0F0F0F == doublingMask<4, uint32_t>().value()); +static_assert(0x0F0F0F0F == SWAR<4, uint32_t>::evenLaneMask().value()); constexpr auto PrecisionFixtureTest = 0x89ABCDEF; constexpr auto Doubled = From f0720bdff3c212574586d6531740f12c67336121 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sun, 12 Jan 2025 18:35:26 -0800 Subject: [PATCH 04/25] clean up a little --- inc/zoo/swar/associative_iteration.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 9972900e..c6368dbe 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -543,7 +543,6 @@ fullMultiplication(SWAR multiplicand, SWAR multiplier) { } using S = SWAR<4, u32>; - static_assert(S::oddLaneMask().value() == 0xF0F0'F0F0); static_assert(S::evenLaneMask().value() == 0x0F0F'0F0F); @@ -561,14 +560,6 @@ static_assert(fullMultiplication(S{0x0008'0000}, S{0x0008'0000}) static_assert(fullMultiplication(S{0x0001'0000}, S{0x0008'0000}) .overflowed.value() == 0x0000'0000); -// static_assert([] { -// // fullMultiplication(S{0x0008'0012}, S{0x0007'0032}).result.value() -// ==0x0008'0034 auto r = fullMultiplication(S{0x0003'0012}, -// S{0x0003'0032}); if (r.result.value() != 0x0009'0034) { return false; } -// if (r.overflow.value() != 0x0000'0000) { return false; } -// return true; -// }()); - static_assert(fullMultiplication(S{0x0008'0012}, S{0x0007'0032}) .result.value() == 0x0008'0034); From f928811ccad08bf2c331897e6f4491b93e6be876 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sun, 12 Jan 2025 18:36:30 -0800 Subject: [PATCH 05/25] clean some more --- inc/zoo/swar/associative_iteration.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index c6368dbe..de81ecfe 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -508,15 +508,6 @@ template struct MultiplicationResult { BooleanSWAR overflowed; }; -// static_assert([] { -// using D = SWAR<8, u32>; -// using S = SWAR<4, u32>; -// using H = SWAR<2, u32>; -// constexpr auto UpperHalfOfLanes = S::oddLaneMask().value(); -// static_assert(UpperHalfOfLanes == 0xF0F0'F0F0); -// return true; -// }()); - template constexpr MultiplicationResult fullMultiplication(SWAR multiplicand, SWAR multiplier) { @@ -543,6 +534,7 @@ fullMultiplication(SWAR multiplicand, SWAR multiplier) { } using S = SWAR<4, u32>; + static_assert(S::oddLaneMask().value() == 0xF0F0'F0F0); static_assert(S::evenLaneMask().value() == 0x0F0F'0F0F); From 51f29874f12a9db5e95cd7a7e910088bf7a4e93a Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sun, 12 Jan 2025 18:59:28 -0800 Subject: [PATCH 06/25] just return overflow --- inc/zoo/swar/associative_iteration.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index de81ecfe..0b8f8d00 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -505,7 +505,7 @@ constexpr auto halvePrecision(SWAR even, SWAR odd) { template struct MultiplicationResult { SWAR result; - BooleanSWAR overflowed; + SWAR overflow; }; template @@ -527,10 +527,7 @@ fullMultiplication(SWAR multiplicand, SWAR multiplier) { auto over_odd = D{(res_odd.value() & UpperHalfOfLanes) >> HalfLane}; auto overflow_values = halvePrecision(over_even, over_odd); - // back to normal precision world - auto did_overflow = ~(zoo::swar::equals(overflow_values, S{0})); - - return {res, did_overflow}; + return {res, overflow_values}; } using S = SWAR<4, u32>; @@ -543,14 +540,14 @@ static_assert(fullMultiplication(S{0x0009'0000}, S{0x0009'0000}) static_assert(fullMultiplication(S{0x0003'0000}, S{0x0007'0000}) .result.value() == 0x0005'0000); -static_assert(fullMultiplication(S{0x0002'0000}, S{0x0008'0000}) - .overflowed.value() == 0x0008'0000); - -static_assert(fullMultiplication(S{0x0008'0000}, S{0x0008'0000}) - .overflowed.value() == 0x0008'0000); - -static_assert(fullMultiplication(S{0x0001'0000}, S{0x0008'0000}) - .overflowed.value() == 0x0000'0000); +// static_assert(fullMultiplication(S{0x0002'0000}, S{0x0008'0000}) +// .overflowed.value() == 0x0008'0000); +// +// static_assert(fullMultiplication(S{0x0008'0000}, S{0x0008'0000}) +// .overflowed.value() == 0x0008'0000); +// +// static_assert(fullMultiplication(S{0x0001'0000}, S{0x0008'0000}) +// .overflowed.value() == 0x0000'0000); static_assert(fullMultiplication(S{0x0008'0012}, S{0x0007'0032}) .result.value() == 0x0008'0034); From 35fffa771030bf25e645eae48b917f0f14a4ab5c Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 14 Jan 2025 12:29:29 -0800 Subject: [PATCH 07/25] wow seems to be working --- inc/zoo/swar/associative_iteration.h | 139 ++++++++++++++------------- 1 file changed, 74 insertions(+), 65 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 0b8f8d00..ffba50fb 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -1,7 +1,10 @@ #ifndef ZOO_SWAR_ASSOCIATIVE_ITERATION_H #define ZOO_SWAR_ASSOCIATIVE_ITERATION_H +#include "SWAR.h" #include "zoo/swar/SWAR.h" +#include +#include //#define ZOO_DEVELOPMENT_DEBUGGING #ifdef ZOO_DEVELOPMENT_DEBUGGING @@ -426,38 +429,6 @@ constexpr auto multiplication_OverflowUnsafe_SpecificBitCount_deprecated( return product; } -// TODO(Jamie): Add tests from other PR. -template -constexpr auto exponentiation_OverflowUnsafe_SpecificBitCount( - SWAR x, - SWAR exponent -) { - using S = SWAR; - - auto operation = [](auto left, auto right, auto counts) { - const auto mask = makeLaneMaskFromMSB(counts); - const auto product = - multiplication_OverflowUnsafe_SpecificBitCount(left, right); - return (product & mask) | (left & ~mask); - }; - - // halver should work same as multiplication... i think... - auto halver = [](auto counts) { - auto msbCleared = counts & ~S{S::MostSignificantBit}; - return S{static_cast(msbCleared.value() << 1)}; - }; - - exponent = S{static_cast(exponent.value() << (NB - ActualBits))}; - return associativeOperatorIterated_regressive( - x, - S{meta::BitmaskMaker().value}, // neutral is lane wise.. - exponent, - S{S::MostSignificantBit}, - operation, - ActualBits, - halver - ); -} template constexpr auto multiplication_OverflowUnsafe( @@ -509,51 +480,89 @@ template struct MultiplicationResult { }; template -constexpr MultiplicationResult -fullMultiplication(SWAR multiplicand, SWAR multiplier) { +constexpr auto +doublingMultiplication(SWAR multiplicand, SWAR multiplier) { using S = SWAR; using D = SWAR; - auto [l_even, l_odd] = doublePrecision(multiplicand); auto [r_even, r_odd] = doublePrecision(multiplier); - auto res_even = multiplication_OverflowUnsafe(l_even, r_even); - auto res_odd = multiplication_OverflowUnsafe(l_odd, r_odd); - - // Into the double precision world - constexpr auto HalfLane = S::NBits; - constexpr auto UpperHalfOfLanes = SWAR::oddLaneMask().value(); - auto res = halvePrecision(res_even, res_odd); + auto + res_even = multiplication_OverflowUnsafe(l_even, r_even), + res_odd = multiplication_OverflowUnsafe(l_odd, r_odd); + return SWAR_Pair{res_even, res_odd}; +} - auto over_even = D{(res_even.value() & UpperHalfOfLanes) >> HalfLane}; - auto over_odd = D{(res_odd.value() & UpperHalfOfLanes) >> HalfLane}; - auto overflow_values = halvePrecision(over_even, over_odd); +template +constexpr MultiplicationResult +wideningMultiplication(SWAR multiplicand, SWAR multiplier) { + using S = SWAR; using D = SWAR; + constexpr auto + HalfLane = S::NBits, + UpperHalfOfLanes = SWAR::oddLaneMask().value(); + auto [res_even, res_odd] = doublingMultiplication(multiplicand, multiplier); + auto result = halvePrecision(res_even, res_odd); + auto + over_even = D{(res_even.value() & UpperHalfOfLanes) >> HalfLane}, + over_odd = D{(res_odd.value() & UpperHalfOfLanes) >> HalfLane}; + auto upper_lanes_overflow = halvePrecision(over_even, over_odd); + return {result, upper_lanes_overflow}; +} - return {res, overflow_values}; +template +constexpr +auto saturatedMultiplication(SWAR multiplicand, SWAR multiplier) { + using S = SWAR; + constexpr auto One = S{S::LeastSignificantBit}; + auto [res, overflow] = wideningMultiplication(multiplicand, multiplier); + auto did_overflow = zoo::swar::greaterEqual(overflow, One); + auto laneMask = did_overflow.MSBtoLaneMask(); + auto res_saturated = res | laneMask; + return S{res_saturated}; } -using S = SWAR<4, u32>; -static_assert(S::oddLaneMask().value() == 0xF0F0'F0F0); -static_assert(S::evenLaneMask().value() == 0x0F0F'0F0F); +// TODO(Jamie): Add tests from other PR. +template +constexpr auto saturatingExponentiation( + SWAR x, + SWAR exponent +) { + using S = SWAR; -static_assert(fullMultiplication(S{0x0009'0000}, S{0x0009'0000}) - .result.value() == 0x0001'0000); -static_assert(fullMultiplication(S{0x0003'0000}, S{0x0007'0000}) - .result.value() == 0x0005'0000); + auto operation = [](auto left, auto right, auto counts) { + auto mask = makeLaneMaskFromMSB(counts); + auto product = saturatedMultiplication(left, right); + return (product & mask) | (left & ~mask); + }; -// static_assert(fullMultiplication(S{0x0002'0000}, S{0x0008'0000}) -// .overflowed.value() == 0x0008'0000); -// -// static_assert(fullMultiplication(S{0x0008'0000}, S{0x0008'0000}) -// .overflowed.value() == 0x0008'0000); -// -// static_assert(fullMultiplication(S{0x0001'0000}, S{0x0008'0000}) -// .overflowed.value() == 0x0000'0000); + // halver should work same as multiplication... i think... + auto halver = [](auto counts) { + auto msbCleared = counts & ~S{S::MostSignificantBit}; + return S{static_cast(msbCleared.value() << 1)}; + }; -static_assert(fullMultiplication(S{0x0008'0012}, S{0x0007'0032}) - .result.value() == 0x0008'0034); + constexpr auto NumBitsPerLane = S::NBits; + return associativeOperatorIterated_regressive( + x, + S{S::LeastSignificantBit}, + exponent, + S{S::MostSignificantBit}, + operation, + NumBitsPerLane, + halver + ); +} -static_assert(fullMultiplication(S{0x0008'0012}, S{0x0007'0032}) - .result.value() == 0x0008'0034); +using S4 = SWAR<4, u32>; +using S8 = SWAR<8, u32>; +static_assert(S4::oddLaneMask().value() == 0xF0F0'F0F0); +static_assert(S4::evenLaneMask().value() == 0x0F0F'0F0F); +static_assert(wideningMultiplication(S4{0x0009'0000}, S4{0x0009'0000}).result.value() == 0x0001'0000); +static_assert(wideningMultiplication(S4{0x0003'0000}, S4{0x0007'0000}).result.value() == 0x0005'0000); +static_assert(wideningMultiplication(S4{0x0008'0012}, S4{0x0007'0032}).result.value() == 0x0008'0034); +static_assert(wideningMultiplication(S4{0x0008'0012}, S4{0x0007'0032}).result.value() == 0x0008'0034); +static_assert(saturatedMultiplication(S8{0x09'40'03'01}, S8{0x37'03'C0'01}).value() == 0xFF'C0'FF'01); +static_assert(saturatedMultiplication(S4{0x0009'0001}, S4{0x0009'0001}).value() == 0x000F'0001); +static_assert(saturatingExponentiation(S4{0x9000'0432}, S4{0x1000'0221}).value() == 0x9111'1F92); } From b87b40827a636c032f4a75111f694ac3693a0938 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 14 Jan 2025 12:37:19 -0800 Subject: [PATCH 08/25] tidy a little --- inc/zoo/swar/associative_iteration.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index ffba50fb..854bb4f8 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -514,9 +514,9 @@ auto saturatedMultiplication(SWAR multiplicand, SWAR multiplier) { constexpr auto One = S{S::LeastSignificantBit}; auto [res, overflow] = wideningMultiplication(multiplicand, multiplier); auto did_overflow = zoo::swar::greaterEqual(overflow, One); - auto laneMask = did_overflow.MSBtoLaneMask(); - auto res_saturated = res | laneMask; - return S{res_saturated}; + auto lane_mask = did_overflow.MSBtoLaneMask(); + auto saturated = res | lane_mask; + return S{saturated}; } @@ -527,6 +527,10 @@ constexpr auto saturatingExponentiation( SWAR exponent ) { using S = SWAR; + constexpr auto NumBitsPerLane = S::NBits; + constexpr auto + MSB = S{S::MostSignificantBit}, + LSB = S{S::LeastSignificantBit}; auto operation = [](auto left, auto right, auto counts) { auto mask = makeLaneMaskFromMSB(counts); @@ -534,18 +538,16 @@ constexpr auto saturatingExponentiation( return (product & mask) | (left & ~mask); }; - // halver should work same as multiplication... i think... auto halver = [](auto counts) { auto msbCleared = counts & ~S{S::MostSignificantBit}; return S{static_cast(msbCleared.value() << 1)}; }; - constexpr auto NumBitsPerLane = S::NBits; return associativeOperatorIterated_regressive( x, - S{S::LeastSignificantBit}, + LSB, exponent, - S{S::MostSignificantBit}, + MSB, operation, NumBitsPerLane, halver From 07be9f90fd41d6879437b91a5aafd1ff9d7e030f Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 14 Jan 2025 12:42:51 -0800 Subject: [PATCH 09/25] rm spurious tests --- inc/zoo/swar/SWAR.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 7978005c..c8065ca2 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -608,7 +608,4 @@ static_assert( 0x0706050403020100ull ); -static_assert(SWAR<4, u16>::evenLaneMask().value() == 0b0000'1111'0000'1111); -static_assert(SWAR<4, u16>::oddLaneMask().value() == 0b1111'0000'1111'0000); - }} From 93e4bac08e2187454fc8bed3f26b4b6701f588c7 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 14 Jan 2025 12:45:47 -0800 Subject: [PATCH 10/25] rename --- inc/zoo/swar/associative_iteration.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 854bb4f8..2044107c 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -512,10 +512,10 @@ constexpr auto saturatedMultiplication(SWAR multiplicand, SWAR multiplier) { using S = SWAR; constexpr auto One = S{S::LeastSignificantBit}; - auto [res, overflow] = wideningMultiplication(multiplicand, multiplier); + auto [result, overflow] = wideningMultiplication(multiplicand, multiplier); auto did_overflow = zoo::swar::greaterEqual(overflow, One); auto lane_mask = did_overflow.MSBtoLaneMask(); - auto saturated = res | lane_mask; + auto saturated = result | lane_mask; return S{saturated}; } From 05468a209890fb6ddfba025080c28a95ff7cf055 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 14 Jan 2025 22:44:20 -0800 Subject: [PATCH 11/25] start test refactor --- test/swar/BasicOperations.cpp | 84 +++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index dec368a4..90163eeb 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -255,6 +255,90 @@ HE(3, u8, 0xFF, 0x7); HE(2, u8, 0xAA, 0x2); #undef HE +template +constexpr auto testSaturatingMultiplication(T left, T right, T expected) { + using S = SWAR; + return saturatingExponentiation(S{left}, S{right}).value() == expected; +} + + +// template< +// typename Arg, +// std::size_t N, +// // Reject via SFINAE plain arrays with non-matching number of elements +// typename = std::enable_if_t +// > +// constexpr +// SWAR(Literals_t, const Arg (&values)[N]): +// m_v{from(values)} +// {} + +template +struct Foo { + constexpr Foo(T l, T r, T e): + left{l}, right{r}, expected{e} + {} + T left{}, right{}, expected{}; +}; + +template +constexpr auto multiple_test(const Arg (&values)[N]) { + using std::begin; using std::end; + auto start = begin(values); + auto fin = end(values); + while (start != fin) { + auto v = *start++; + auto left = v.left; + auto right = v.right; + auto expected = v.expected; + if (saturatingExponentiation(SWAR{left}, SWAR{right}).value() != expected) { + return false; + } + } + return true; +} + +constexpr auto satExpoTests = [] () { + return multiple_test<8, u32>({ + Foo{ + 0x09'40'03'01, + 0x37'03'C0'01, + 0xFF'FF'FF'01}, + + Foo{ + 0x09'40'03'01, + 0x37'03'C0'01, + 0xFF'FF'FF'01}, + + Foo{ + 0x09'40'03'01, + 0x37'03'C0'01, + 0xFF'FF'FF'01}, + }); +}; + +static_assert(satExpoTests()); + + +static_assert(testSaturatingMultiplication<8, u32>( + 0x09'40'03'01, + 0x37'03'C0'01, + 0xFF'FF'FF'01 +)); + +using S8 = SWAR<8, u32>; +using S4 = SWAR<4, u32>; +static_assert(S4::oddLaneMask().value() == 0xF0F0'F0F0); +static_assert(S4::evenLaneMask().value() == 0x0F0F'0F0F); +static_assert(wideningMultiplication(S4{0x0009'0000}, S4{0x0009'0000}).result.value() == 0x0001'0000); +static_assert(wideningMultiplication(S4{0x0003'0000}, S4{0x0007'0000}).result.value() == 0x0005'0000); +static_assert(wideningMultiplication(S4{0x0008'0012}, S4{0x0007'0032}).result.value() == 0x0008'0034); +static_assert(wideningMultiplication(S4{0x0008'0012}, S4{0x0007'0032}).result.value() == 0x0008'0034); +static_assert(saturatedMultiplication(S8{0x09'40'03'01}, S8{0x37'03'C0'01}).value() == 0xFF'C0'FF'01); +static_assert(saturatedMultiplication(S4{0x0009'0001}, S4{0x0009'0001}).value() == 0x000F'0001); +static_assert(saturatingExponentiation(S4{0x9000'0432}, S4{0x1000'0221}).value() == 0x9111'1F92); + + TEST_CASE("Old multiply version", "[deprecated][swar]") { SWAR<8, u32> Micand{0x5030201}; SWAR<8, u32> Mplier{0xA050301}; From ac45f1be3bf3abbc78cb4205fede56302db012a6 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Thu, 16 Jan 2025 15:28:01 -0800 Subject: [PATCH 12/25] tidy tests --- test/swar/BasicOperations.cpp | 101 +++++++++------------------------- 1 file changed, 27 insertions(+), 74 deletions(-) diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 90163eeb..bff40785 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -260,83 +260,36 @@ constexpr auto testSaturatingMultiplication(T left, T right, T expected) { using S = SWAR; return saturatingExponentiation(S{left}, S{right}).value() == expected; } - - -// template< -// typename Arg, -// std::size_t N, -// // Reject via SFINAE plain arrays with non-matching number of elements -// typename = std::enable_if_t -// > -// constexpr -// SWAR(Literals_t, const Arg (&values)[N]): -// m_v{from(values)} -// {} - -template -struct Foo { - constexpr Foo(T l, T r, T e): - left{l}, right{r}, expected{e} - {} - T left{}, right{}, expected{}; -}; - -template -constexpr auto multiple_test(const Arg (&values)[N]) { - using std::begin; using std::end; - auto start = begin(values); - auto fin = end(values); - while (start != fin) { - auto v = *start++; - auto left = v.left; - auto right = v.right; - auto expected = v.expected; - if (saturatingExponentiation(SWAR{left}, SWAR{right}).value() != expected) { - return false; - } - } - return true; -} - -constexpr auto satExpoTests = [] () { - return multiple_test<8, u32>({ - Foo{ - 0x09'40'03'01, - 0x37'03'C0'01, - 0xFF'FF'FF'01}, - - Foo{ - 0x09'40'03'01, - 0x37'03'C0'01, - 0xFF'FF'FF'01}, - - Foo{ +static_assert( + testSaturatingMultiplication<8, u32>( 0x09'40'03'01, 0x37'03'C0'01, - 0xFF'FF'FF'01}, - }); -}; - -static_assert(satExpoTests()); - - -static_assert(testSaturatingMultiplication<8, u32>( - 0x09'40'03'01, - 0x37'03'C0'01, - 0xFF'FF'FF'01 + 0xFF'FF'FF'01 +)); +static_assert( + testSaturatingMultiplication<8, u32>( + 0x02'02'02'02, + 0x02'02'02'02, + 0x04'04'04'04 +)); +static_assert( + testSaturatingMultiplication<8, u32>( + 0xFF'FF'FF'FF, + 0x04'03'02'01, + 0xFF'FF'FF'FF +)); +static_assert( + testSaturatingMultiplication<8, u32>( + 0x02'FF'FF'FF, + 0x03'03'02'01, + 0x08'FF'FF'FF +)); +static_assert( + testSaturatingMultiplication<4, u32>( + 0x1243'0003, + 0x0002'0002, + 0x1119'1119 )); - -using S8 = SWAR<8, u32>; -using S4 = SWAR<4, u32>; -static_assert(S4::oddLaneMask().value() == 0xF0F0'F0F0); -static_assert(S4::evenLaneMask().value() == 0x0F0F'0F0F); -static_assert(wideningMultiplication(S4{0x0009'0000}, S4{0x0009'0000}).result.value() == 0x0001'0000); -static_assert(wideningMultiplication(S4{0x0003'0000}, S4{0x0007'0000}).result.value() == 0x0005'0000); -static_assert(wideningMultiplication(S4{0x0008'0012}, S4{0x0007'0032}).result.value() == 0x0008'0034); -static_assert(wideningMultiplication(S4{0x0008'0012}, S4{0x0007'0032}).result.value() == 0x0008'0034); -static_assert(saturatedMultiplication(S8{0x09'40'03'01}, S8{0x37'03'C0'01}).value() == 0xFF'C0'FF'01); -static_assert(saturatedMultiplication(S4{0x0009'0001}, S4{0x0009'0001}).value() == 0x000F'0001); -static_assert(saturatingExponentiation(S4{0x9000'0432}, S4{0x1000'0221}).value() == 0x9111'1F92); TEST_CASE("Old multiply version", "[deprecated][swar]") { From 2302504d7e398840142d3f7ec03fa3e79a90b502 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Thu, 16 Jan 2025 15:28:16 -0800 Subject: [PATCH 13/25] rm tests --- inc/zoo/swar/associative_iteration.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 2044107c..9ec36853 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -554,18 +554,6 @@ constexpr auto saturatingExponentiation( ); } -using S4 = SWAR<4, u32>; -using S8 = SWAR<8, u32>; -static_assert(S4::oddLaneMask().value() == 0xF0F0'F0F0); -static_assert(S4::evenLaneMask().value() == 0x0F0F'0F0F); -static_assert(wideningMultiplication(S4{0x0009'0000}, S4{0x0009'0000}).result.value() == 0x0001'0000); -static_assert(wideningMultiplication(S4{0x0003'0000}, S4{0x0007'0000}).result.value() == 0x0005'0000); -static_assert(wideningMultiplication(S4{0x0008'0012}, S4{0x0007'0032}).result.value() == 0x0008'0034); -static_assert(wideningMultiplication(S4{0x0008'0012}, S4{0x0007'0032}).result.value() == 0x0008'0034); -static_assert(saturatedMultiplication(S8{0x09'40'03'01}, S8{0x37'03'C0'01}).value() == 0xFF'C0'FF'01); -static_assert(saturatedMultiplication(S4{0x0009'0001}, S4{0x0009'0001}).value() == 0x000F'0001); -static_assert(saturatingExponentiation(S4{0x9000'0432}, S4{0x1000'0221}).value() == 0x9111'1F92); - } #endif From 2214ac85083407b2ebe99df7f682b521efe8909f Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Thu, 16 Jan 2025 15:32:37 -0800 Subject: [PATCH 14/25] add consume msb --- inc/zoo/swar/SWAR.h | 6 ++++++ inc/zoo/swar/associative_iteration.h | 6 ++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index c8065ca2..14c6e84f 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -258,6 +258,12 @@ constexpr auto horizontalEquality(SWAR left, SWAR right) { return left.m_v == right.m_v; } +template +constexpr static auto consumeMSB(SWAR s) noexcept { + using S = SWAR; + auto msbCleared = s & ~S{S::MostSignificantBit}; + return S{static_cast(msbCleared.value() << 1)}; +} #if ZOO_USE_LEASTNBITSMASK diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 9ec36853..b1254cc8 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -396,8 +396,7 @@ constexpr auto multiplication_OverflowUnsafe_SpecificBitCount( }; auto halver = [](auto counts) { - auto msbCleared = counts & ~S{S::MostSignificantBit}; - return S{msbCleared.value() << 1}; + return swar::consumeMSB(counts); }; auto shifted = S{multiplier.value() << (NB - ActualBits)}; @@ -539,8 +538,7 @@ constexpr auto saturatingExponentiation( }; auto halver = [](auto counts) { - auto msbCleared = counts & ~S{S::MostSignificantBit}; - return S{static_cast(msbCleared.value() << 1)}; + return swar::consumeMSB(counts); }; return associativeOperatorIterated_regressive( From 61a7506ed4d2cdedbfcd3c05eded29a5571ab415 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Thu, 16 Jan 2025 15:35:18 -0800 Subject: [PATCH 15/25] rename lower/upper --- inc/zoo/swar/associative_iteration.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index b1254cc8..046c1300 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -474,8 +474,8 @@ constexpr auto halvePrecision(SWAR even, SWAR odd) { template struct MultiplicationResult { - SWAR result; - SWAR overflow; + SWAR lower; + SWAR upper; }; template @@ -497,11 +497,11 @@ wideningMultiplication(SWAR multiplicand, SWAR multiplier) { constexpr auto HalfLane = S::NBits, UpperHalfOfLanes = SWAR::oddLaneMask().value(); - auto [res_even, res_odd] = doublingMultiplication(multiplicand, multiplier); - auto result = halvePrecision(res_even, res_odd); + auto [lower, upper] = doublingMultiplication(multiplicand, multiplier); + auto result = halvePrecision(lower, upper); auto - over_even = D{(res_even.value() & UpperHalfOfLanes) >> HalfLane}, - over_odd = D{(res_odd.value() & UpperHalfOfLanes) >> HalfLane}; + over_even = D{(lower.value() & UpperHalfOfLanes) >> HalfLane}, + over_odd = D{(upper.value() & UpperHalfOfLanes) >> HalfLane}; auto upper_lanes_overflow = halvePrecision(over_even, over_odd); return {result, upper_lanes_overflow}; } From 7b41db0951460f91d6c0ce8f4ad9ca21698dba4b Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Thu, 16 Jan 2025 15:45:27 -0800 Subject: [PATCH 16/25] make doubling multi nicer --- inc/zoo/swar/associative_iteration.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 046c1300..1ea3d5e5 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -479,15 +479,16 @@ template struct MultiplicationResult { }; template -constexpr auto -doublingMultiplication(SWAR multiplicand, SWAR multiplier) { - using S = SWAR; using D = SWAR; - auto [l_even, l_odd] = doublePrecision(multiplicand); - auto [r_even, r_odd] = doublePrecision(multiplier); +constexpr +auto +doublePrecisionMultiplication(SWAR multiplicand, SWAR multiplier) { + auto + icand = doublePrecision(multiplicand), + plier = doublePrecision(multiplier); auto - res_even = multiplication_OverflowUnsafe(l_even, r_even), - res_odd = multiplication_OverflowUnsafe(l_odd, r_odd); - return SWAR_Pair{res_even, res_odd}; + lower = multiplication_OverflowUnsafe(icand.even, plier.even), + upper = multiplication_OverflowUnsafe(icand.odd, plier.odd); + return SWAR_Pair{lower, upper}; } template @@ -497,7 +498,7 @@ wideningMultiplication(SWAR multiplicand, SWAR multiplier) { constexpr auto HalfLane = S::NBits, UpperHalfOfLanes = SWAR::oddLaneMask().value(); - auto [lower, upper] = doublingMultiplication(multiplicand, multiplier); + auto [lower, upper] = doublePrecisionMultiplication(multiplicand, multiplier); auto result = halvePrecision(lower, upper); auto over_even = D{(lower.value() & UpperHalfOfLanes) >> HalfLane}, From 5cf88dfa78afda85dfa17d6857bee5cab47c2025 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Thu, 16 Jan 2025 17:14:36 -0800 Subject: [PATCH 17/25] consolidate exponentation and make naming consistent --- inc/zoo/swar/SWAR.h | 8 ----- inc/zoo/swar/associative_iteration.h | 49 +++++++++++++++++++++++----- test/swar/BasicOperations.cpp | 2 +- 3 files changed, 42 insertions(+), 17 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 14c6e84f..4de6bdea 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -258,14 +258,6 @@ constexpr auto horizontalEquality(SWAR left, SWAR right) { return left.m_v == right.m_v; } -template -constexpr static auto consumeMSB(SWAR s) noexcept { - using S = SWAR; - auto msbCleared = s & ~S{S::MostSignificantBit}; - return S{static_cast(msbCleared.value() << 1)}; -} - - #if ZOO_USE_LEASTNBITSMASK template constexpr auto isolate(T pattern) { diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 1ea3d5e5..c61640ba 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -18,6 +18,8 @@ inline std::ostream &binary(std::ostream &out, uint64_t input, int count) { return out; } + + template std::ostream &operator<<(std::ostream &out, zoo::swar::SWAR s) { using S = zoo::swar::SWAR; @@ -44,6 +46,14 @@ std::ostream &operator<<(std::ostream &out, zoo::swar::SWAR s) { namespace zoo::swar { +template +constexpr static auto consumeMSB(SWAR s) noexcept { + using S = SWAR; + auto msbCleared = s & ~S{S::MostSignificantBit}; + return S{static_cast(msbCleared.value() << 1)}; +} + + template constexpr auto parallelSuffix(S input) { auto @@ -509,7 +519,7 @@ wideningMultiplication(SWAR multiplicand, SWAR multiplier) { template constexpr -auto saturatedMultiplication(SWAR multiplicand, SWAR multiplier) { +auto saturatingMultiplication(SWAR multiplicand, SWAR multiplier) { using S = SWAR; constexpr auto One = S{S::LeastSignificantBit}; auto [result, overflow] = wideningMultiplication(multiplicand, multiplier); @@ -519,12 +529,11 @@ auto saturatedMultiplication(SWAR multiplicand, SWAR multiplier) { return S{saturated}; } - -// TODO(Jamie): Add tests from other PR. -template -constexpr auto saturatingExponentiation( +template +constexpr auto exponentiation ( SWAR x, - SWAR exponent + SWAR exponent, + MultiplicationFn&& multiplicationFn ) { using S = SWAR; constexpr auto NumBitsPerLane = S::NBits; @@ -532,9 +541,9 @@ constexpr auto saturatingExponentiation( MSB = S{S::MostSignificantBit}, LSB = S{S::LeastSignificantBit}; - auto operation = [](auto left, auto right, auto counts) { + auto operation = [&multiplicationFn](auto left, auto right, auto counts) { auto mask = makeLaneMaskFromMSB(counts); - auto product = saturatedMultiplication(left, right); + auto product = multiplicationFn(left, right); return (product & mask) | (left & ~mask); }; @@ -553,6 +562,30 @@ constexpr auto saturatingExponentiation( ); } +template +constexpr auto saturatingExponentation( + SWAR x, + SWAR exponent +) { + return exponentiation( + x, + exponent, + saturatingMultiplication + ); +} + +template +constexpr auto exponentiation_OverflowUnsafe( + SWAR x, + SWAR exponent +) { + return exponentiation( + x, + exponent, + multiplication_OverflowUnsafe + ); +} + } #endif diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index bff40785..1b8fbdd0 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -258,7 +258,7 @@ HE(2, u8, 0xAA, 0x2); template constexpr auto testSaturatingMultiplication(T left, T right, T expected) { using S = SWAR; - return saturatingExponentiation(S{left}, S{right}).value() == expected; + return exponentation_Saturating(S{left}, S{right}).value() == expected; } static_assert( testSaturatingMultiplication<8, u32>( From 3a65ed2bfd4d83d546700026d38d45a9ce3d743a Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Thu, 16 Jan 2025 17:39:03 -0800 Subject: [PATCH 18/25] works --- inc/zoo/swar/associative_iteration.h | 16 ++++++++-------- test/swar/BasicOperations.cpp | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index c61640ba..cd113726 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -508,13 +508,14 @@ wideningMultiplication(SWAR multiplicand, SWAR multiplier) { constexpr auto HalfLane = S::NBits, UpperHalfOfLanes = SWAR::oddLaneMask().value(); - auto [lower, upper] = doublePrecisionMultiplication(multiplicand, multiplier); - auto result = halvePrecision(lower, upper); + auto [even, odd] = doublePrecisionMultiplication(multiplicand, multiplier); auto - over_even = D{(lower.value() & UpperHalfOfLanes) >> HalfLane}, - over_odd = D{(upper.value() & UpperHalfOfLanes) >> HalfLane}; - auto upper_lanes_overflow = halvePrecision(over_even, over_odd); - return {result, upper_lanes_overflow}; + upper_even = even.shiftIntraLaneRight(HalfLane, D{UpperHalfOfLanes}), + upper_odd = odd.shiftIntraLaneRight(HalfLane, D{UpperHalfOfLanes}); + auto + lower = halvePrecision(even, odd), // throws away the upper bits + upper = halvePrecision(upper_even, upper_odd); // preserve the upper bits + return {lower, upper}; } template @@ -525,8 +526,7 @@ auto saturatingMultiplication(SWAR multiplicand, SWAR multiplier) auto [result, overflow] = wideningMultiplication(multiplicand, multiplier); auto did_overflow = zoo::swar::greaterEqual(overflow, One); auto lane_mask = did_overflow.MSBtoLaneMask(); - auto saturated = result | lane_mask; - return S{saturated}; + return S{result | lane_mask}; } template diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 1b8fbdd0..42b8983f 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -258,7 +258,7 @@ HE(2, u8, 0xAA, 0x2); template constexpr auto testSaturatingMultiplication(T left, T right, T expected) { using S = SWAR; - return exponentation_Saturating(S{left}, S{right}).value() == expected; + return saturatingExponentation(S{left}, S{right}).value() == expected; } static_assert( testSaturatingMultiplication<8, u32>( From 2b613ee337b1bf49efc98a83c8b18511eacb1dd4 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 25 Feb 2025 00:08:20 -0800 Subject: [PATCH 19/25] mv tests --- inc/zoo/swar/associative_iteration.h | 50 ++++++++++++++++++++++------ test/swar/BasicOperations.cpp | 25 ++++++++++++++ 2 files changed, 65 insertions(+), 10 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index cd113726..772c6410 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -5,6 +5,7 @@ #include "zoo/swar/SWAR.h" #include #include +#include //#define ZOO_DEVELOPMENT_DEBUGGING #ifdef ZOO_DEVELOPMENT_DEBUGGING @@ -450,6 +451,7 @@ constexpr auto multiplication_OverflowUnsafe( ); } + template struct SWAR_Pair{ SWAR even, odd; @@ -479,6 +481,7 @@ constexpr auto halvePrecision(SWAR even, SWAR odd) { auto evenHalf = RV{even.value()} & HalvingMask, oddHalf = RV{(RV{odd.value()} & HalvingMask).value() << NB/2}; + return evenHalf | oddHalf; } @@ -502,19 +505,46 @@ doublePrecisionMultiplication(SWAR multiplicand, SWAR multiplier) } template -constexpr MultiplicationResult -wideningMultiplication(SWAR multiplicand, SWAR multiplier) { - using S = SWAR; using D = SWAR; +constexpr auto deinterleaveLanesOfPair = [](auto even, auto odd) { + using S = SWAR; + using H = SWAR; constexpr auto - HalfLane = S::NBits, - UpperHalfOfLanes = SWAR::oddLaneMask().value(); - auto [even, odd] = doublePrecisionMultiplication(multiplicand, multiplier); - auto - upper_even = even.shiftIntraLaneRight(HalfLane, D{UpperHalfOfLanes}), - upper_odd = odd.shiftIntraLaneRight(HalfLane, D{UpperHalfOfLanes}); - auto + HalfLane = H::NBits, + UpperHalfOfLanes = H::oddLaneMask().value(); + auto + upper_even = even.shiftIntraLaneRight(HalfLane, S{UpperHalfOfLanes}), + upper_odd = odd.shiftIntraLaneRight(HalfLane, S{UpperHalfOfLanes}); + auto lower = halvePrecision(even, odd), // throws away the upper bits upper = halvePrecision(upper_even, upper_odd); // preserve the upper bits + return std::make_pair(lower, upper); +}; + +namespace test_deinterleaving { + +template +auto test = [](auto a, auto b, auto expected_lower, auto expected_upper) { + auto [lower, upper] = deinterleaveLanesOfPair(a, b); + auto lower_ok = lower.value() == expected_lower.value(); + auto upper_ok = upper.value() == expected_upper.value(); + return lower_ok && upper_ok; +}; + +using S = SWAR<8, uint32_t>; +static_assert(test<8, uint32_t>( + S{0xFDFCFBFA}, // input a + S{0xF4F3F2F1}, // input b + S{0x4D3C2B1A}, // expected lower + S{0xFFFFFFFF} // expected upper +)); + +} // namespace test_deinterleaving + +template +constexpr MultiplicationResult +wideningMultiplication(SWAR multiplicand, SWAR multiplier) { + auto [even, odd] = doublePrecisionMultiplication(multiplicand, multiplier); + auto [lower, upper] = deinterleaveLanesOfPair(even, odd); return {lower, upper}; } diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 42b8983f..bc29cf1c 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -291,6 +291,31 @@ static_assert( 0x1119'1119 )); +namespace test_deinterleaving { + +template +auto test = [](auto a, auto b, auto expected_lower, auto expected_upper) { + auto [lower, upper] = deinterleaveLanesOfPair(a, b); + auto lower_ok = lower.value() == expected_lower.value(); + auto upper_ok = upper.value() == expected_upper.value(); + return lower_ok && upper_ok; +}; + + +// notice the vertical groups becomes horizontal pairs +using S = SWAR<8, uint32_t>; +static_assert(test<8, uint32_t>( + S{0xFDFCFBFA}, // input a + S{0xF4F3F2F1}, // input b +/* C A + 3 1 +*/ S{0x4D3C2B1A}, // expected lower +/* 3C 1A */ + S{0xFFFFFFFF} // expected upper +)); + +} // namespace test_deinterleaving + TEST_CASE("Old multiply version", "[deprecated][swar]") { SWAR<8, u32> Micand{0x5030201}; From fa0667b67b421cdcdaa072af5fbd5fcc73979420 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 25 Feb 2025 00:10:50 -0800 Subject: [PATCH 20/25] oops --- inc/zoo/swar/associative_iteration.h | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 772c6410..b642df8d 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -520,26 +520,6 @@ constexpr auto deinterleaveLanesOfPair = [](auto even, auto odd) { return std::make_pair(lower, upper); }; -namespace test_deinterleaving { - -template -auto test = [](auto a, auto b, auto expected_lower, auto expected_upper) { - auto [lower, upper] = deinterleaveLanesOfPair(a, b); - auto lower_ok = lower.value() == expected_lower.value(); - auto upper_ok = upper.value() == expected_upper.value(); - return lower_ok && upper_ok; -}; - -using S = SWAR<8, uint32_t>; -static_assert(test<8, uint32_t>( - S{0xFDFCFBFA}, // input a - S{0xF4F3F2F1}, // input b - S{0x4D3C2B1A}, // expected lower - S{0xFFFFFFFF} // expected upper -)); - -} // namespace test_deinterleaving - template constexpr MultiplicationResult wideningMultiplication(SWAR multiplicand, SWAR multiplier) { From dea354a2b7cada23462724737addc327e3f7619f Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 25 Feb 2025 00:26:39 -0800 Subject: [PATCH 21/25] tidy --- inc/zoo/swar/associative_iteration.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index b642df8d..a8850eae 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -1,11 +1,7 @@ #ifndef ZOO_SWAR_ASSOCIATIVE_ITERATION_H #define ZOO_SWAR_ASSOCIATIVE_ITERATION_H -#include "SWAR.h" #include "zoo/swar/SWAR.h" -#include -#include -#include //#define ZOO_DEVELOPMENT_DEBUGGING #ifdef ZOO_DEVELOPMENT_DEBUGGING From 81237a63bfb9eca7e447f48735689e7024bf6744 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 25 Feb 2025 00:27:47 -0800 Subject: [PATCH 22/25] tidy --- inc/zoo/swar/associative_iteration.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index a8850eae..71b4b696 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -15,8 +15,6 @@ inline std::ostream &binary(std::ostream &out, uint64_t input, int count) { return out; } - - template std::ostream &operator<<(std::ostream &out, zoo::swar::SWAR s) { using S = zoo::swar::SWAR; From b792e30510c9526f2f011cd127392bd99b769410 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 25 Feb 2025 00:28:20 -0800 Subject: [PATCH 23/25] tidy --- inc/zoo/swar/associative_iteration.h | 1 - 1 file changed, 1 deletion(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 71b4b696..f3b27ab8 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -48,7 +48,6 @@ constexpr static auto consumeMSB(SWAR s) noexcept { return S{static_cast(msbCleared.value() << 1)}; } - template constexpr auto parallelSuffix(S input) { auto From 5d41262fd1c6dca7acf5856579d23c2ffa555e82 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 25 Feb 2025 00:31:49 -0800 Subject: [PATCH 24/25] make pair for generatlity --- inc/zoo/swar/associative_iteration.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index f3b27ab8..0a56438b 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -444,7 +444,6 @@ constexpr auto multiplication_OverflowUnsafe( ); } - template struct SWAR_Pair{ SWAR even, odd; @@ -494,7 +493,7 @@ doublePrecisionMultiplication(SWAR multiplicand, SWAR multiplier) auto lower = multiplication_OverflowUnsafe(icand.even, plier.even), upper = multiplication_OverflowUnsafe(icand.odd, plier.odd); - return SWAR_Pair{lower, upper}; + return std::make_pair(lower, upper); } template @@ -514,11 +513,11 @@ constexpr auto deinterleaveLanesOfPair = [](auto even, auto odd) { }; template -constexpr MultiplicationResult +constexpr auto wideningMultiplication(SWAR multiplicand, SWAR multiplier) { auto [even, odd] = doublePrecisionMultiplication(multiplicand, multiplier); auto [lower, upper] = deinterleaveLanesOfPair(even, odd); - return {lower, upper}; + return std::make_pair(lower, upper); } template From cfb10726d5deb6f1b4b48a4ae5790a6a3ef6235f Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 25 Feb 2025 00:37:33 -0800 Subject: [PATCH 25/25] tidy --- inc/zoo/swar/associative_iteration.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 0a56438b..d68f14e6 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -45,7 +45,7 @@ template constexpr static auto consumeMSB(SWAR s) noexcept { using S = SWAR; auto msbCleared = s & ~S{S::MostSignificantBit}; - return S{static_cast(msbCleared.value() << 1)}; + return S{msbCleared.value() << 1}; } template