From 295d60273cf479d380cf643696831a7e74a108bf Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Wed, 17 Apr 2024 00:45:54 +0100 Subject: [PATCH 01/43] new literals --- inc/zoo/swar/SWAR.h | 46 ++++++++++++++++++++++++++++++++--- test/swar/BasicOperations.cpp | 32 +++++++++++++++++++----- 2 files changed, 69 insertions(+), 9 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index fec26f22..78ed65a8 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -12,6 +12,15 @@ namespace zoo { namespace swar { +template struct SWAR; + +template struct Literals_t { + constexpr static void (SWAR::*value)() = nullptr; +}; + +template +constexpr Literals_t Literals{}; + using u64 = uint64_t; using u32 = uint32_t; using u16 = uint16_t; @@ -70,6 +79,18 @@ struct SWAR { // "don't care" bits when non-power-of-two bit lane sizes are supported LowerBits = MostSignificantBit - LeastSignificantBit; + template > + constexpr + SWAR(Literals_t, const Arg (&values)[N]) : m_v{0} { + auto result = T{0}; + for (const auto arg : values) { + result = (result << NBits) | arg; + } + m_v = result; + } + + constexpr static T MaxUnsignedLaneValue = ~(((~T{0}) << (NBits - 1)) << 1); + SWAR() = default; constexpr explicit SWAR(T v): m_v(v) {} constexpr explicit operator T() const noexcept { return m_v; } @@ -161,6 +182,9 @@ struct SWAR { T m_v; }; +template +SWAR(Literals_t, const Arg (&values)[SWAR::Lanes]) -> SWAR; + /// Defining operator== on base SWAR types is entirely too error prone. Force a verbose invocation. template constexpr auto horizontalEquality(SWAR left, SWAR right) { @@ -231,6 +255,19 @@ template struct BooleanSWAR: SWAR { using Base = SWAR; + constexpr auto toMsbBools(const bool (&values)[Base::Lanes]) { + constexpr auto msbOfFirstLane = T{1} << (NBits - 1); + auto result = T{0}; + for (auto arg : values) { + auto bit = arg ? msbOfFirstLane : 0; + result = (result << NBits) | bit; + } + return BooleanSWAR{result}; + } + + template > + constexpr BooleanSWAR(Literals_t, const bool (&values)[N]) : Base(toMsbBools(values)) {} + // Booleanness is stored in the MSBs static constexpr auto MaskMSB = broadcast(Base(T(1) << (NBits -1))); @@ -240,7 +277,7 @@ struct BooleanSWAR: SWAR { static constexpr auto MaskNonLSB = ~MaskLSB; static constexpr auto MaskNonMSB = ~MaskMSB; constexpr explicit BooleanSWAR(T v): Base(v) {} - + constexpr BooleanSWAR clear(int bit) const noexcept { constexpr auto Bit = T(1) << (NBits - 1); return this->m_v ^ (Bit << (NBits * bit)); } @@ -256,7 +293,7 @@ struct BooleanSWAR: SWAR { constexpr auto operator ~() const noexcept { return BooleanSWAR(Base{Base::MostSignificantBit} ^ *this); } - + constexpr auto operator not() const noexcept { return BooleanSWAR(MaskMSB ^ *this); } @@ -305,6 +342,9 @@ struct BooleanSWAR: SWAR { convertToBooleanSWAR(SWAR arg) noexcept; }; +template +BooleanSWAR(Literals_t, const bool (&values)[BooleanSWAR::Lanes]) -> BooleanSWAR; + template constexpr BooleanSWAR convertToBooleanSWAR(SWAR arg) noexcept { @@ -381,7 +421,7 @@ greaterEqual(SWAR left, SWAR right) noexcept { using S = swar::SWAR; const auto h = S::MostSignificantBit, x = left.value(), y = right.value(); // x=left, y= right is x < y const auto z = (x|h) - (y&~h); - // bitwise ternary median! + // bitwise ternary median! const auto t = h & ~median(x, ~y, z); return ~BooleanSWAR{static_cast(t)}; // ~(x= y } diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 1628e222..389b5f43 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -33,6 +33,26 @@ using S32_32 = SWAR<32, uint32_t>; using S64_64 = SWAR<64, uint64_t>; +static_assert(SWAR<16, u64>::MaxUnsignedLaneValue == 65535); +static_assert(SWAR<16, u32>::MaxUnsignedLaneValue == 65535); +static_assert(SWAR<8, u32>::MaxUnsignedLaneValue == 255); +static_assert(SWAR<4, u32>::MaxUnsignedLaneValue == 15); +static_assert(SWAR<2, u32>::MaxUnsignedLaneValue == 3); + +static_assert(SWAR{Literals<8, u32>, {0, 0, 0, 0}}.value() == 0); +static_assert(SWAR{Literals<8, u32>, {0, 0, 0, 1}}.value() == 1); +static_assert(SWAR{Literals<8, u32>, {8, 3, 2, 1}}.value() == 0x08'03'02'01); +static_assert(SWAR{Literals<8, u32>, {42, 42, 42, 42}}.value() == 0x2A'2A'2A'2A); +static_assert(SWAR{Literals<4, u32>, {0, 0, 0, 0, 0, 0, 0, 0}}.value() == 0); +static_assert(SWAR{Literals<4, u32>, {0, 0, 0, 0, 0, 0, 0, 1}}.value() == 1); +static_assert(SWAR{Literals<4, u32>, {0, 0, 0, 0, 0, 0, 0, 0}}.value() == 0); +static_assert(SWAR{Literals<4, u32>, {8, 7, 6, 5, 4, 3, 2, 1}}.value() == 0x8765'4321); +static_assert(SWAR{Literals<4, u32>, {8, 7, 6, 5, 4, 3, 2, 7}}.value() == 0x8765'4327); + +static_assert(BooleanSWAR{Literals<4, u16>, {false, false, false, false}}.value() == 0); +static_assert(BooleanSWAR{Literals<4, u16>, {true, true, true, true}}.value() == 0b1000'1000'1000'1000); +static_assert(BooleanSWAR{Literals<8, u32>, {true, true, true, true}}.value() == 0b10000000'10000000'10000000'10000000); + namespace Multiplication { static_assert(~int64_t(0) == negate(S4_64{S4_64::LeastSignificantBit}).value()); @@ -357,7 +377,7 @@ TEST_CASE( const auto left = S2_16{0}.blitElement(1, i); const auto right = S2_16{S2_16::AllOnes}.blitElement(1, i-1); const auto test = S2_16{0}.blitElement(1, 2); - CHECK(test.value() == greaterEqual<2, u16>(left, right).value()); + CHECK(test.value() == greaterEqual<2, u16>(left, right).value()); } } SECTION("single") { @@ -365,7 +385,7 @@ TEST_CASE( const auto large = S4_32{0}.blitElement(1, i+1); const auto small = S4_32{S4_32::AllOnes}.blitElement(1, i-1); const auto test = S4_32{0}.blitElement(1, 8); - CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); + CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); } } SECTION("allLanes") { @@ -373,7 +393,7 @@ TEST_CASE( const auto small = S4_32(S4_32::LeastSignificantBit * (i-1)); const auto large = S4_32(S4_32::LeastSignificantBit * (i+1)); const auto test = S4_32(S4_32::LeastSignificantBit * 8); - CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); + CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); } } } @@ -425,7 +445,7 @@ TEST_CASE( "BooleanSWAR MSBtoLaneMask", "[swar]" ) { - // BooleanSWAR as a mask: + // BooleanSWAR as a mask: auto bswar =BooleanSWAR<4, u32>(0x0808'0000); auto mask = S4_32(0x0F0F'0000); CHECK(bswar.MSBtoLaneMask().value() == mask.value()); @@ -452,6 +472,6 @@ TEST_CASE( CHECK(SWAR<4, u16>(0x0400).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0300)).value()); CHECK(SWAR<4, u16>(0x0B00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0300)).value()); CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0700)).value()); - CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value()); - CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value()); + CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value()); + CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value()); } From dbf8115a8a5b8807c83820207a863949f3475b11 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Fri, 19 Apr 2024 14:03:26 +0100 Subject: [PATCH 02/43] PR feedback --- inc/zoo/swar/SWAR.h | 4 ++-- test/swar/BasicOperations.cpp | 27 ++++++++++++++++++--------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 78ed65a8..38e619db 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -77,7 +77,8 @@ struct SWAR { ~(~T(0) << NBits), // Use LowerBits in favor of ~MostSignificantBit to not pollute // "don't care" bits when non-power-of-two bit lane sizes are supported - LowerBits = MostSignificantBit - LeastSignificantBit; + LowerBits = MostSignificantBit - LeastSignificantBit, + MaxUnsignedLaneValue = LeastSignificantLaneMask; template > constexpr @@ -89,7 +90,6 @@ struct SWAR { m_v = result; } - constexpr static T MaxUnsignedLaneValue = ~(((~T{0}) << (NBits - 1)) << 1); SWAR() = default; constexpr explicit SWAR(T v): m_v(v) {} diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 389b5f43..4c9b9a65 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -39,19 +40,27 @@ static_assert(SWAR<8, u32>::MaxUnsignedLaneValue == 255); static_assert(SWAR<4, u32>::MaxUnsignedLaneValue == 15); static_assert(SWAR<2, u32>::MaxUnsignedLaneValue == 3); -static_assert(SWAR{Literals<8, u32>, {0, 0, 0, 0}}.value() == 0); -static_assert(SWAR{Literals<8, u32>, {0, 0, 0, 1}}.value() == 1); -static_assert(SWAR{Literals<8, u32>, {8, 3, 2, 1}}.value() == 0x08'03'02'01); -static_assert(SWAR{Literals<8, u32>, {42, 42, 42, 42}}.value() == 0x2A'2A'2A'2A); -static_assert(SWAR{Literals<4, u32>, {0, 0, 0, 0, 0, 0, 0, 0}}.value() == 0); -static_assert(SWAR{Literals<4, u32>, {0, 0, 0, 0, 0, 0, 0, 1}}.value() == 1); -static_assert(SWAR{Literals<4, u32>, {0, 0, 0, 0, 0, 0, 0, 0}}.value() == 0); +static_assert(SWAR{Literals<32, u64>, {2, 1}}.value() == 0x00000002'00000001); +static_assert(SWAR{Literals<32, u64>, {1, 2}}.value() == 0x00000001'00000002); + +static_assert(SWAR{Literals<16, u64>, {4, 3, 2, 1}}.value() == 0x0004'0003'0002'0001); +static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.value() == 0x0001'0002'0003'0004); + +static_assert(SWAR{Literals<16, u32>, {2, 1}}.value() == 0x0002'0001); +static_assert(SWAR{Literals<16, u32>, {1, 2}}.value() == 0x0001'0002); + +static_assert(SWAR{Literals<8, u32>, {4, 3, 2, 1}}.value() == 0x04'03'02'01); +static_assert(SWAR{Literals<8, u32>, {1, 2, 3, 4}}.value() == 0x01'02'03'04); + +static_assert(SWAR{Literals<4, u32>, {1, 2, 3, 4, 5, 6, 7, 8}}.value() == 0x1234'5678); static_assert(SWAR{Literals<4, u32>, {8, 7, 6, 5, 4, 3, 2, 1}}.value() == 0x8765'4321); -static_assert(SWAR{Literals<4, u32>, {8, 7, 6, 5, 4, 3, 2, 7}}.value() == 0x8765'4327); static_assert(BooleanSWAR{Literals<4, u16>, {false, false, false, false}}.value() == 0); static_assert(BooleanSWAR{Literals<4, u16>, {true, true, true, true}}.value() == 0b1000'1000'1000'1000); -static_assert(BooleanSWAR{Literals<8, u32>, {true, true, true, true}}.value() == 0b10000000'10000000'10000000'10000000); +static_assert(BooleanSWAR{Literals<4, u16>, {true, false, false, false}}.value() == 0b1000'0000'0000'0000); +static_assert(BooleanSWAR{Literals<4, u16>, {false, true, false, false}}.value() == 0b0000'1000'0000'0000); +static_assert(BooleanSWAR{Literals<4, u16>, {false, false, true, false}}.value() == 0b0000'0000'1000'0000); +static_assert(BooleanSWAR{Literals<4, u16>, {false, false, false, true}}.value() == 0b0000'0000'0000'1000); namespace Multiplication { From 23278073fb8348c758383d527058b73a95c87b2d Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Fri, 19 Apr 2024 15:27:14 +0100 Subject: [PATCH 03/43] tidy tests --- test/swar/BasicOperations.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 4c9b9a65..6f6e4e0c 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -52,13 +52,17 @@ static_assert(SWAR{Literals<16, u32>, {1, 2}}.value() == 0x0001'0002); static_assert(SWAR{Literals<8, u32>, {4, 3, 2, 1}}.value() == 0x04'03'02'01); static_assert(SWAR{Literals<8, u32>, {1, 2, 3, 4}}.value() == 0x01'02'03'04); -static_assert(SWAR{Literals<4, u32>, {1, 2, 3, 4, 5, 6, 7, 8}}.value() == 0x1234'5678); -static_assert(SWAR{Literals<4, u32>, {8, 7, 6, 5, 4, 3, 2, 1}}.value() == 0x8765'4321); +static_assert(SWAR{Literals<8, u16>, {2, 1}}.value() == 0x0201); +static_assert(SWAR{Literals<8, u16>, {1, 2}}.value() == 0x0102); -static_assert(BooleanSWAR{Literals<4, u16>, {false, false, false, false}}.value() == 0); -static_assert(BooleanSWAR{Literals<4, u16>, {true, true, true, true}}.value() == 0b1000'1000'1000'1000); -static_assert(BooleanSWAR{Literals<4, u16>, {true, false, false, false}}.value() == 0b1000'0000'0000'0000); -static_assert(BooleanSWAR{Literals<4, u16>, {false, true, false, false}}.value() == 0b0000'1000'0000'0000); +static_assert(SWAR{Literals<4, u8>, {2, 1}}.value() == 0x21); +static_assert(SWAR{Literals<4, u8>, {1, 2}}.value() == 0x12); + +#define F false +#define T true +static_assert(BooleanSWAR{Literals<4, u16>, {F, F, F, F}}.value() == 0); +static_assert(BooleanSWAR{Literals<4, u16>, {T, F, F, F}}.value() == 0b1000'0000'0000'0000); +static_assert(BooleanSWAR{Literals<4, u16>, {F, T, F, F}}.value() == 0b0000'1000'0000'0000); static_assert(BooleanSWAR{Literals<4, u16>, {false, false, true, false}}.value() == 0b0000'0000'1000'0000); static_assert(BooleanSWAR{Literals<4, u16>, {false, false, false, true}}.value() == 0b0000'0000'0000'1000); From 7c10d29ceb3dcdba755671d458bc92a87b26037c Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Fri, 19 Apr 2024 16:14:30 +0100 Subject: [PATCH 04/43] tidy reused code for booleanswaer --- inc/zoo/swar/SWAR.h | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 38e619db..a4769cf1 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -12,7 +12,8 @@ namespace zoo { namespace swar { -template struct SWAR; +template +struct SWAR; template struct Literals_t { constexpr static void (SWAR::*value)() = nullptr; @@ -80,14 +81,21 @@ struct SWAR { LowerBits = MostSignificantBit - LeastSignificantBit, MaxUnsignedLaneValue = LeastSignificantLaneMask; + template + constexpr auto loadBaseTypeIntoLanes(const U (&values)[Lanes], + const ManipulationFn&& manipulation) { + auto result = T{0}; + for (auto value : values) { + auto laneValue = manipulation(value); + result = (result << NBits) | laneValue; + } + return result; + } + template > constexpr SWAR(Literals_t, const Arg (&values)[N]) : m_v{0} { - auto result = T{0}; - for (const auto arg : values) { - result = (result << NBits) | arg; - } - m_v = result; + m_v = loadBaseTypeIntoLanes(values, [](auto x) { return x; }); } @@ -255,19 +263,12 @@ template struct BooleanSWAR: SWAR { using Base = SWAR; - constexpr auto toMsbBools(const bool (&values)[Base::Lanes]) { + template > + constexpr BooleanSWAR(Literals_t, const bool (&values)[N]) : Base{0} { constexpr auto msbOfFirstLane = T{1} << (NBits - 1); - auto result = T{0}; - for (auto arg : values) { - auto bit = arg ? msbOfFirstLane : 0; - result = (result << NBits) | bit; - } - return BooleanSWAR{result}; + this->m_v = Base::loadBaseTypeIntoLanes(values, [](auto x) { return x ? msbOfFirstLane : 0; }); } - template > - constexpr BooleanSWAR(Literals_t, const bool (&values)[N]) : Base(toMsbBools(values)) {} - // Booleanness is stored in the MSBs static constexpr auto MaskMSB = broadcast(Base(T(1) << (NBits -1))); From 0b8b21445a854efb4aa0db27974457a0ee732a95 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Fri, 19 Apr 2024 17:03:52 +0100 Subject: [PATCH 05/43] update name --- inc/zoo/swar/SWAR.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index a4769cf1..bbcdd26e 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -82,8 +82,8 @@ struct SWAR { MaxUnsignedLaneValue = LeastSignificantLaneMask; template - constexpr auto loadBaseTypeIntoLanes(const U (&values)[Lanes], - const ManipulationFn&& manipulation) { + constexpr auto loadIntoLanes(const U (&values)[Lanes], + const ManipulationFn&& manipulation) { auto result = T{0}; for (auto value : values) { auto laneValue = manipulation(value); @@ -95,7 +95,7 @@ struct SWAR { template > constexpr SWAR(Literals_t, const Arg (&values)[N]) : m_v{0} { - m_v = loadBaseTypeIntoLanes(values, [](auto x) { return x; }); + m_v = loadIntoLanes(values, [](auto x) { return x; }); } @@ -266,7 +266,7 @@ struct BooleanSWAR: SWAR { template > constexpr BooleanSWAR(Literals_t, const bool (&values)[N]) : Base{0} { constexpr auto msbOfFirstLane = T{1} << (NBits - 1); - this->m_v = Base::loadBaseTypeIntoLanes(values, [](auto x) { return x ? msbOfFirstLane : 0; }); + this->m_v = Base::loadIntoLanes(values, [](auto x) { return x ? msbOfFirstLane : 0; }); } // Booleanness is stored in the MSBs From e3a802d193f5107e47a2af8cb4e5e909384fde91 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Fri, 19 Apr 2024 17:21:12 +0100 Subject: [PATCH 06/43] style --- inc/zoo/swar/SWAR.h | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index bbcdd26e..0d594013 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -82,8 +82,7 @@ struct SWAR { MaxUnsignedLaneValue = LeastSignificantLaneMask; template - constexpr auto loadIntoLanes(const U (&values)[Lanes], - const ManipulationFn&& manipulation) { + constexpr auto loadIntoLanes(const U (&values)[Lanes], const ManipulationFn&& manipulation) { auto result = T{0}; for (auto value : values) { auto laneValue = manipulation(value); @@ -94,10 +93,8 @@ struct SWAR { template > constexpr - SWAR(Literals_t, const Arg (&values)[N]) : m_v{0} { - m_v = loadIntoLanes(values, [](auto x) { return x; }); - } - + SWAR(Literals_t, const Arg (&values)[N]) + : m_v{loadIntoLanes(values, [](auto x) { return x; })} {} SWAR() = default; constexpr explicit SWAR(T v): m_v(v) {} @@ -263,11 +260,9 @@ template struct BooleanSWAR: SWAR { using Base = SWAR; - template > - constexpr BooleanSWAR(Literals_t, const bool (&values)[N]) : Base{0} { - constexpr auto msbOfFirstLane = T{1} << (NBits - 1); - this->m_v = Base::loadIntoLanes(values, [](auto x) { return x ? msbOfFirstLane : 0; }); - } + template + constexpr BooleanSWAR(Literals_t, const bool (&values)[N]) + : Base(Literals, values) { this->m_v << (NBits - 1); } // Booleanness is stored in the MSBs static constexpr auto MaskMSB = From 7ed6e8870f3ef4d3c47dba61c32b99692ee067e9 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Fri, 19 Apr 2024 17:21:28 +0100 Subject: [PATCH 07/43] test style --- test/swar/BasicOperations.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 6f6e4e0c..cf77b49e 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -60,11 +60,14 @@ static_assert(SWAR{Literals<4, u8>, {1, 2}}.value() == 0x12); #define F false #define T true -static_assert(BooleanSWAR{Literals<4, u16>, {F, F, F, F}}.value() == 0); +static_assert(BooleanSWAR{Literals<4, u16>, {F, F, F, F}}.value() == 0b0000'0000'0000'0000); +static_assert(BooleanSWAR{Literals<4, u16>, {T, F, F, F}}.value() == 0b1000'0000'0000'0000); +static_assert(BooleanSWAR{Literals<4, u16>, {F, T, F, F}}.value() == 0b0000'1000'0000'0000); +static_assert(BooleanSWAR{Literals<4, u16>, {F, F, T, F}}.value() == 0b0000'0000'1000'0000); +static_assert(BooleanSWAR{Literals<4, u16>, {F, F, F, T}}.value() == 0b0000'0000'0000'1000); + static_assert(BooleanSWAR{Literals<4, u16>, {T, F, F, F}}.value() == 0b1000'0000'0000'0000); static_assert(BooleanSWAR{Literals<4, u16>, {F, T, F, F}}.value() == 0b0000'1000'0000'0000); -static_assert(BooleanSWAR{Literals<4, u16>, {false, false, true, false}}.value() == 0b0000'0000'1000'0000); -static_assert(BooleanSWAR{Literals<4, u16>, {false, false, false, true}}.value() == 0b0000'0000'0000'1000); namespace Multiplication { From 03edd02e7460f33372362974e6372493890d8b52 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Fri, 19 Apr 2024 17:21:57 +0100 Subject: [PATCH 08/43] fix --- inc/zoo/swar/SWAR.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 0d594013..12def29d 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -261,8 +261,10 @@ struct BooleanSWAR: SWAR { using Base = SWAR; template - constexpr BooleanSWAR(Literals_t, const bool (&values)[N]) - : Base(Literals, values) { this->m_v << (NBits - 1); } + constexpr BooleanSWAR(Literals_t, const bool (&values)[N]) : Base{0} { + constexpr auto msbOfFirstLane = T{1} << (NBits - 1); + this->m_v = Base::loadIntoLanes(values, [](auto x) { return x ? msbOfFirstLane : 0; }); + } // Booleanness is stored in the MSBs static constexpr auto MaskMSB = From 0648bb1b57cfa9266957c91de7860f59c700dbec Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Fri, 19 Apr 2024 17:30:36 +0100 Subject: [PATCH 09/43] undef util --- test/swar/BasicOperations.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index cf77b49e..d5858fda 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -65,9 +65,9 @@ static_assert(BooleanSWAR{Literals<4, u16>, {T, F, F, F}}.value() == 0b1000'0000 static_assert(BooleanSWAR{Literals<4, u16>, {F, T, F, F}}.value() == 0b0000'1000'0000'0000); static_assert(BooleanSWAR{Literals<4, u16>, {F, F, T, F}}.value() == 0b0000'0000'1000'0000); static_assert(BooleanSWAR{Literals<4, u16>, {F, F, F, T}}.value() == 0b0000'0000'0000'1000); - static_assert(BooleanSWAR{Literals<4, u16>, {T, F, F, F}}.value() == 0b1000'0000'0000'0000); -static_assert(BooleanSWAR{Literals<4, u16>, {F, T, F, F}}.value() == 0b0000'1000'0000'0000); +#undef F +#undef T namespace Multiplication { From 0d0a8330fad44fc48c34131e44e60f8db5f8928e Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Fri, 19 Apr 2024 17:33:45 +0100 Subject: [PATCH 10/43] simplify again --- inc/zoo/swar/SWAR.h | 15 ++++++--------- test/swar/BasicOperations.cpp | 2 -- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 12def29d..7f09d23f 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -81,12 +81,11 @@ struct SWAR { LowerBits = MostSignificantBit - LeastSignificantBit, MaxUnsignedLaneValue = LeastSignificantLaneMask; - template - constexpr auto loadIntoLanes(const U (&values)[Lanes], const ManipulationFn&& manipulation) { + template + constexpr auto loadIntoLanes(const U (&values)[Lanes]) const noexcept { auto result = T{0}; for (auto value : values) { - auto laneValue = manipulation(value); - result = (result << NBits) | laneValue; + result = (result << NBits) | value; } return result; } @@ -94,7 +93,7 @@ struct SWAR { template > constexpr SWAR(Literals_t, const Arg (&values)[N]) - : m_v{loadIntoLanes(values, [](auto x) { return x; })} {} + : m_v{loadIntoLanes(values)} {} SWAR() = default; constexpr explicit SWAR(T v): m_v(v) {} @@ -261,10 +260,8 @@ struct BooleanSWAR: SWAR { using Base = SWAR; template - constexpr BooleanSWAR(Literals_t, const bool (&values)[N]) : Base{0} { - constexpr auto msbOfFirstLane = T{1} << (NBits - 1); - this->m_v = Base::loadIntoLanes(values, [](auto x) { return x ? msbOfFirstLane : 0; }); - } + constexpr BooleanSWAR(Literals_t, const bool (&values)[N]) + : Base(Literals, values) { this->m_v <<= (NBits - 1); } // Booleanness is stored in the MSBs static constexpr auto MaskMSB = diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index d5858fda..5a903dee 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -5,10 +5,8 @@ #include #include #include -#include #include - using namespace zoo; using namespace zoo::swar; From 33183cdd61fd8d2051689b647692f91a94c153ca Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Fri, 19 Apr 2024 17:34:48 +0100 Subject: [PATCH 11/43] fmt --- inc/zoo/swar/SWAR.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 7f09d23f..c72296ef 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -92,8 +92,7 @@ struct SWAR { template > constexpr - SWAR(Literals_t, const Arg (&values)[N]) - : m_v{loadIntoLanes(values)} {} + SWAR(Literals_t, const Arg (&values)[N]) : m_v{loadIntoLanes(values)} {} SWAR() = default; constexpr explicit SWAR(T v): m_v(v) {} From a7d744d05c33f6fee0d0c2369f18ef61b698dcd1 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Fri, 19 Apr 2024 18:01:35 +0100 Subject: [PATCH 12/43] Add to Array add to_array oops make 23 for now not sure we want to commit to this? remove named function --- inc/zoo/swar/SWAR.h | 10 ++++++++++ test/CMakeLists.txt | 6 +++--- test/swar/BasicOperations.cpp | 12 +++++++----- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index c72296ef..831d9a41 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -4,6 +4,7 @@ #include "zoo/meta/log.h" +#include #include #ifdef _MSC_VER @@ -94,6 +95,15 @@ struct SWAR { constexpr SWAR(Literals_t, const Arg (&values)[N]) : m_v{loadIntoLanes(values)} {} + constexpr std::array to_array() const noexcept { + std::array result; + for (int i = 0; i < Lanes; ++i) { + auto otherEnd = Lanes - i - 1; + result[otherEnd] = at(i); + } + return result; + } + SWAR() = default; constexpr explicit SWAR(T v): m_v(v) {} constexpr explicit operator T() const noexcept { return m_v; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 411d9ce1..ee9fba51 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -15,7 +15,7 @@ if(MSVC) # Avoids multiple problems # Due to multiple bugs, forced upgrade to C++ 20 - set(CMAKE_CXX_STANDARD 20) + set(CMAKE_CXX_STANDARD 23) # Set the policy to use the new behavior if(POLICY CMP0067) @@ -81,7 +81,7 @@ if(MSVC) endif() else() # Non-MSVC specific configuration (original content) - set(CMAKE_CXX_STANDARD 17) + set(CMAKE_CXX_STANDARD 23) set(CMAKE_CXX_FLAGS_UBSAN "-fsanitize=undefined -fno-omit-frame-pointer -fno-optimize-sibling-calls -O1 -g") set(CMAKE_CXX_FLAGS_ASAN "-fsanitize=address -fno-omit-frame-pointer") @@ -122,7 +122,7 @@ else() set( ZOO_TEST_SOURCES ${CATCH2_MAIN_SOURCE} ${TYPE_ERASURE_SOURCES} ${ALGORITHM_SOURCES} - ${SWAR_SOURCES} + ${SWAR_SOURCES} ${MISCELLANEA_SOURCES} ) diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 5a903dee..8dfc222e 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -2,11 +2,6 @@ #include "catch2/catch.hpp" -#include -#include -#include -#include - using namespace zoo; using namespace zoo::swar; @@ -56,6 +51,13 @@ static_assert(SWAR{Literals<8, u16>, {1, 2}}.value() == 0x0102); static_assert(SWAR{Literals<4, u8>, {2, 1}}.value() == 0x21); static_assert(SWAR{Literals<4, u8>, {1, 2}}.value() == 0x12); +static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.at(0) == 4); +static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.at(1) == 3); + +static_assert(SWAR{Literals<4, u8>, {1, 2}}.to_array() == std::array{1, 2}); +static_assert(SWAR{Literals<16, u64>, {4, 3, 2, 1}}.value() == 0x0004'0003'0002'0001); + + #define F false #define T true static_assert(BooleanSWAR{Literals<4, u16>, {F, F, F, F}}.value() == 0b0000'0000'0000'0000); From 06af01c6486af3f42e280c8edd99af9da210f092 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sat, 20 Apr 2024 01:41:46 +0100 Subject: [PATCH 13/43] rename --- inc/zoo/swar/SWAR.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 831d9a41..ab5a42ba 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -83,17 +83,17 @@ struct SWAR { MaxUnsignedLaneValue = LeastSignificantLaneMask; template - constexpr auto loadIntoLanes(const U (&values)[Lanes]) const noexcept { + constexpr static auto from_array(const U (&values)[Lanes]) noexcept { auto result = T{0}; for (auto value : values) { result = (result << NBits) | value; } - return result; + return SWAR{result}; } template > constexpr - SWAR(Literals_t, const Arg (&values)[N]) : m_v{loadIntoLanes(values)} {} + SWAR(Literals_t, const Arg (&values)[N]) : m_v{from_array(values)} {} constexpr std::array to_array() const noexcept { std::array result; From ffc1120fd0b91873e6b5e5b5338685296beef81e Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sat, 20 Apr 2024 16:30:23 +0100 Subject: [PATCH 14/43] nailed it --- inc/zoo/swar/SWAR.h | 14 +++++++++----- test/CMakeLists.txt | 4 ++-- test/swar/BasicOperations.cpp | 17 ++++++++++++++++- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index ab5a42ba..56a76a14 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -73,10 +73,14 @@ struct SWAR { AllOnes = ~std::make_unsigned_t{0} >> PaddingBitsCount, // Also constructed in RobinHood utils: possible bug? LeastSignificantBit = meta::BitmaskMaker{1}, NBits>::value, MostSignificantBit = LeastSignificantBit << (NBits - 1), - LeastSignificantLaneMask = - sizeof(T) * 8 == NBits ? // needed to avoid shifting all bits - ~T(0) : - ~(~T(0) << NBits), + LeastSignificantLaneMask = []() { + constexpr auto NBitsLessThanByteOfT = NBits < sizeof(T) * 8; + if constexpr (NBitsLessThanByteOfT) { + return (T(1) << NBits) - 1; + } else { + return ~T(0); + } + }(), // Use LowerBits in favor of ~MostSignificantBit to not pollute // "don't care" bits when non-power-of-two bit lane sizes are supported LowerBits = MostSignificantBit - LeastSignificantBit, @@ -96,7 +100,7 @@ struct SWAR { SWAR(Literals_t, const Arg (&values)[N]) : m_v{from_array(values)} {} constexpr std::array to_array() const noexcept { - std::array result; + std::array result = {}; for (int i = 0; i < Lanes; ++i) { auto otherEnd = Lanes - i - 1; result[otherEnd] = at(i); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ee9fba51..57209c37 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -15,7 +15,7 @@ if(MSVC) # Avoids multiple problems # Due to multiple bugs, forced upgrade to C++ 20 - set(CMAKE_CXX_STANDARD 23) + set(CMAKE_CXX_STANDARD 20) # Set the policy to use the new behavior if(POLICY CMP0067) @@ -81,7 +81,7 @@ if(MSVC) endif() else() # Non-MSVC specific configuration (original content) - set(CMAKE_CXX_STANDARD 23) + set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_FLAGS_UBSAN "-fsanitize=undefined -fno-omit-frame-pointer -fno-optimize-sibling-calls -O1 -g") set(CMAKE_CXX_FLAGS_ASAN "-fsanitize=address -fno-omit-frame-pointer") diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 8dfc222e..ed202792 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -54,7 +54,22 @@ static_assert(SWAR{Literals<4, u8>, {1, 2}}.value() == 0x12); static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.at(0) == 4); static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.at(1) == 3); -static_assert(SWAR{Literals<4, u8>, {1, 2}}.to_array() == std::array{1, 2}); +template +constexpr auto compareContents(A a, B b) { + for (auto i = 0; i < N; ++i) { + if (a[i] != b[i]) { + return false; + } + } + return true; +} + +using S4U8 = SWAR<4, u8>; +constexpr auto A = S4U8{Literals<4, u8>, {4, 4}}; +constexpr auto B = std::array{4, 4}; + +static_assert(compareContents>(A.to_array(), B)); + static_assert(SWAR{Literals<16, u64>, {4, 3, 2, 1}}.value() == 0x0004'0003'0002'0001); From 5b837e613c4346b0e1045d09252ecf93fc7fb928 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sat, 20 Apr 2024 16:31:58 +0100 Subject: [PATCH 15/43] cleanup --- test/swar/BasicOperations.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index ed202792..1f8140e2 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -68,7 +68,7 @@ using S4U8 = SWAR<4, u8>; constexpr auto A = S4U8{Literals<4, u8>, {4, 4}}; constexpr auto B = std::array{4, 4}; -static_assert(compareContents>(A.to_array(), B)); +static_assert(compareContents(A.to_array(), B)); static_assert(SWAR{Literals<16, u64>, {4, 3, 2, 1}}.value() == 0x0004'0003'0002'0001); From c97318f74e1c24f0aa496d1706ae92658b0ddca8 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sat, 20 Apr 2024 17:08:48 +0100 Subject: [PATCH 16/43] array tests? --- inc/zoo/swar/SWAR.h | 3 +-- test/swar/BasicOperations.cpp | 30 ++++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 56a76a14..2de1c9c2 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -74,8 +74,7 @@ struct SWAR { LeastSignificantBit = meta::BitmaskMaker{1}, NBits>::value, MostSignificantBit = LeastSignificantBit << (NBits - 1), LeastSignificantLaneMask = []() { - constexpr auto NBitsLessThanByteOfT = NBits < sizeof(T) * 8; - if constexpr (NBitsLessThanByteOfT) { + if constexpr (NBits < sizeof(T) * 8) { return (T(1) << NBits) - 1; } else { return ~T(0); diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 1f8140e2..09e87ea6 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -55,7 +55,10 @@ static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.at(0) == 4); static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.at(1) == 3); template -constexpr auto compareContents(A a, B b) { +constexpr auto compareContainers(A a, B b) { + if (a.size() != b.size()) { + return false; + } for (auto i = 0; i < N; ++i) { if (a[i] != b[i]) { return false; @@ -64,13 +67,24 @@ constexpr auto compareContents(A a, B b) { return true; } -using S4U8 = SWAR<4, u8>; -constexpr auto A = S4U8{Literals<4, u8>, {4, 4}}; -constexpr auto B = std::array{4, 4}; - -static_assert(compareContents(A.to_array(), B)); - -static_assert(SWAR{Literals<16, u64>, {4, 3, 2, 1}}.value() == 0x0004'0003'0002'0001); +#define ARRAY_TEST \ + constexpr auto A = S{Literals, {ArrayLiteral}}; \ + constexpr auto B = std::array{ArrayLiteral}; \ + return compareContainers(A.to_array(), B); + +static_assert([]() { + using S = S8_32; + constexpr auto NBits = 8; + #define ArrayLiteral 4, 3, 2, 1 + ARRAY_TEST +}()); + +static_assert([]() { + using S = S4_16; + constexpr auto NBits = 4; + #define ArrayLiteral 4, 3, 2, 1 + ARRAY_TEST +}()); #define F false From 3dd0296f17e264bbb9e86db138ab28553d0fce71 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sat, 20 Apr 2024 17:12:54 +0100 Subject: [PATCH 17/43] indentaion --- inc/zoo/swar/SWAR.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 2de1c9c2..2df04c5e 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -75,9 +75,9 @@ struct SWAR { MostSignificantBit = LeastSignificantBit << (NBits - 1), LeastSignificantLaneMask = []() { if constexpr (NBits < sizeof(T) * 8) { - return (T(1) << NBits) - 1; + return (T(1) << NBits) - 1; } else { - return ~T(0); + return ~T(0); } }(), // Use LowerBits in favor of ~MostSignificantBit to not pollute From da9ccb991117c9b6072d36baf5992ab9915306ae Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sat, 20 Apr 2024 17:21:00 +0100 Subject: [PATCH 18/43] update indent --- inc/zoo/swar/SWAR.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 2df04c5e..c3e820a1 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -74,11 +74,11 @@ struct SWAR { LeastSignificantBit = meta::BitmaskMaker{1}, NBits>::value, MostSignificantBit = LeastSignificantBit << (NBits - 1), LeastSignificantLaneMask = []() { - if constexpr (NBits < sizeof(T) * 8) { - return (T(1) << NBits) - 1; - } else { - return ~T(0); - } + if constexpr (NBits < sizeof(T) * 8) { + return (T(1) << NBits) - 1; + } else { + return ~T(0); + } }(), // Use LowerBits in favor of ~MostSignificantBit to not pollute // "don't care" bits when non-power-of-two bit lane sizes are supported From da64b7d0d9070125706835731e4ff58729008e65 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sat, 20 Apr 2024 17:30:50 +0100 Subject: [PATCH 19/43] from array --- inc/zoo/swar/SWAR.h | 19 ++++++++++++++----- test/swar/BasicOperations.cpp | 2 ++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index c3e820a1..cc1213f3 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -85,15 +85,24 @@ struct SWAR { LowerBits = MostSignificantBit - LeastSignificantBit, MaxUnsignedLaneValue = LeastSignificantLaneMask; - template - constexpr static auto from_array(const U (&values)[Lanes]) noexcept { + template + constexpr static auto from_range(InputIt first, InputIt last) noexcept { auto result = T{0}; - for (auto value : values) { - result = (result << NBits) | value; + for (; first != last; ++first) { + result = (result << NBits) | *first; } - return SWAR{result}; + return result; } + template + constexpr static auto from_array(const U (&values)[Lanes]) noexcept { + return SWAR{from_range(std::begin(values), std::end(values))}; + } + + using ArrayType = std::array; + + constexpr SWAR(const ArrayType& array) : m_v{from_range(array.begin(), array.end())} {} + template > constexpr SWAR(Literals_t, const Arg (&values)[N]) : m_v{from_array(values)} {} diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 09e87ea6..7f858054 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -54,6 +54,8 @@ static_assert(SWAR{Literals<4, u8>, {1, 2}}.value() == 0x12); static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.at(0) == 4); static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.at(1) == 3); +static_assert(SWAR<8, u32>::from_array({1, 2, 3, 4}).value() == 0x0102'0304); + template constexpr auto compareContainers(A a, B b) { if (a.size() != b.size()) { From 11ccd9ea3b4697fb0547805e1d86009c0d3568b4 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sun, 12 May 2024 23:13:04 -0700 Subject: [PATCH 20/43] more updates --- inc/zoo/swar/SWAR.h | 36 ++- test/swar/BasicOperations.cpp | 533 ++++++++++++++++------------------ 2 files changed, 292 insertions(+), 277 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index cc1213f3..a9ec9e93 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -96,12 +96,19 @@ struct SWAR { template constexpr static auto from_array(const U (&values)[Lanes]) noexcept { - return SWAR{from_range(std::begin(values), std::end(values))}; + using std::begin; using std::end; + return SWAR{from_range(begin(values), end(values))}; + } + + template + constexpr static auto from_array(const std::array &values) noexcept { + using std::begin; using std::end; + return SWAR{from_range(begin(values), end(values))}; } using ArrayType = std::array; - constexpr SWAR(const ArrayType& array) : m_v{from_range(array.begin(), array.end())} {} + constexpr SWAR(const ArrayType &array) : m_v{from_range(array.begin(), array.end())} {} template > constexpr @@ -120,6 +127,28 @@ struct SWAR { constexpr explicit SWAR(T v): m_v(v) {} constexpr explicit operator T() const noexcept { return m_v; } +// constexpr auto operator==(T (&values)[Lanes]) const noexcept { +// return compareToContainer(values); +// } +// +// constexpr auto operator==(std::array values) const noexcept { +// return compareToContainer(values); +// } + + template + constexpr bool compareToContainer(B b) const noexcept { + auto a = to_array(); + if (a.size() != b.size()) { + return false; + } + for (auto i = 0; i < Lanes; ++i) { + if (a[i] != b[i]) { + return false; + } + } + return true; + } + constexpr T value() const noexcept { return m_v; } #define SWAR_UNARY_OPERATORS_X_LIST \ @@ -210,6 +239,9 @@ struct SWAR { template SWAR(Literals_t, const Arg (&values)[SWAR::Lanes]) -> SWAR; +template +SWAR(Literals_t, const std::array::Lanes>&) -> SWAR; + /// Defining operator== on base SWAR types is entirely too error prone. Force a verbose invocation. template constexpr auto horizontalEquality(SWAR left, SWAR right) { diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 7f858054..f2349953 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -36,8 +36,10 @@ static_assert(SWAR<2, u32>::MaxUnsignedLaneValue == 3); static_assert(SWAR{Literals<32, u64>, {2, 1}}.value() == 0x00000002'00000001); static_assert(SWAR{Literals<32, u64>, {1, 2}}.value() == 0x00000001'00000002); -static_assert(SWAR{Literals<16, u64>, {4, 3, 2, 1}}.value() == 0x0004'0003'0002'0001); -static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.value() == 0x0001'0002'0003'0004); +static_assert(SWAR{Literals<16, u64>, {4, 3, 2, 1}}.value() == + 0x0004'0003'0002'0001); +static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.value() == + 0x0001'0002'0003'0004); static_assert(SWAR{Literals<16, u32>, {2, 1}}.value() == 0x0002'0001); static_assert(SWAR{Literals<16, u32>, {1, 2}}.value() == 0x0001'0002); @@ -54,49 +56,63 @@ static_assert(SWAR{Literals<4, u8>, {1, 2}}.value() == 0x12); static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.at(0) == 4); static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.at(1) == 3); -static_assert(SWAR<8, u32>::from_array({1, 2, 3, 4}).value() == 0x0102'0304); - -template -constexpr auto compareContainers(A a, B b) { - if (a.size() != b.size()) { - return false; - } - for (auto i = 0; i < N; ++i) { - if (a[i] != b[i]) { - return false; - } + // static_assert([]() -> bool { + // constexpr auto array = std::array{1, 2, 3, 4}; + // auto s = SWAR{Literals<16, u64>, array}; + // return s.at(0) == 4; + // }()); + +template +constexpr auto operator==(const SWAR &sw, + const std::array::Lanes> &arr) { + const auto swArr = sw.to_array(); + if (swArr.size() != arr.size()) { + return false; + } + for (auto i = 0; i < SWAR::Lanes; ++i) { + if (swArr.at(i) != arr[i]) { + return false; } - return true; + } + return true; } -#define ARRAY_TEST \ - constexpr auto A = S{Literals, {ArrayLiteral}}; \ - constexpr auto B = std::array{ArrayLiteral}; \ - return compareContainers(A.to_array(), B); +static_assert(SWAR{Literals<8, u32>, {1, 2, 3, 4}} == + std::array{1, 2, 3, 4}); + +#define ARRAY_TEST \ + constexpr auto A = S{Literals, {ArrayLiteral}}; \ + constexpr auto B = std::array{ArrayLiteral}; \ + return A == B; static_assert([]() { - using S = S8_32; - constexpr auto NBits = 8; - #define ArrayLiteral 4, 3, 2, 1 - ARRAY_TEST + using S = S8_32; + constexpr auto NBits = 8; +#define ArrayLiteral 4, 3, 2, 1 + ARRAY_TEST }()); static_assert([]() { - using S = S4_16; - constexpr auto NBits = 4; - #define ArrayLiteral 4, 3, 2, 1 - ARRAY_TEST + using S = S4_16; + constexpr auto NBits = 4; +#define ArrayLiteral 4, 3, 2, 1 + ARRAY_TEST }()); - #define F false #define T true -static_assert(BooleanSWAR{Literals<4, u16>, {F, F, F, F}}.value() == 0b0000'0000'0000'0000); -static_assert(BooleanSWAR{Literals<4, u16>, {T, F, F, F}}.value() == 0b1000'0000'0000'0000); -static_assert(BooleanSWAR{Literals<4, u16>, {F, T, F, F}}.value() == 0b0000'1000'0000'0000); -static_assert(BooleanSWAR{Literals<4, u16>, {F, F, T, F}}.value() == 0b0000'0000'1000'0000); -static_assert(BooleanSWAR{Literals<4, u16>, {F, F, F, T}}.value() == 0b0000'0000'0000'1000); -static_assert(BooleanSWAR{Literals<4, u16>, {T, F, F, F}}.value() == 0b1000'0000'0000'0000); +static_assert(BooleanSWAR{Literals<4, u16>, {F, F, F, F}}.value() == + 0b0000'0000'0000'0000); +static_assert(BooleanSWAR{Literals<4, u16>, {T, F, F, F}}.value() == + 0b1000'0000'0000'0000); +static_assert(BooleanSWAR{Literals<4, u16>, {F, T, F, F}}.value() == + 0b0000'1000'0000'0000); +static_assert(BooleanSWAR{Literals<4, u16>, {F, F, T, F}}.value() == + 0b0000'0000'1000'0000); +static_assert(BooleanSWAR{Literals<4, u16>, {F, F, F, T}}.value() == + 0b0000'0000'0000'1000); +static_assert(BooleanSWAR{Literals<4, u16>, {T, F, F, F}}.value() == + 0b1000'0000'0000'0000); #undef F #undef T @@ -111,7 +127,8 @@ constexpr auto Doubled = static_assert(0x090B0D0F == Doubled.even.value()); static_assert(0x080A0C0E == Doubled.odd.value()); -static_assert(PrecisionFixtureTest == halvePrecision(Doubled.even, Doubled.odd).value()); +static_assert(PrecisionFixtureTest == + halvePrecision(Doubled.even, Doubled.odd).value()); constexpr SWAR<8, u32> Micand{0x5030201}; constexpr SWAR<8, u32> Mplier{0xA050301}; @@ -123,21 +140,18 @@ constexpr SWAR<8, u32> Mplier{0xA050301}; // 1*1 = 1 constexpr auto Expected = 0x320F0601; -static_assert( - Expected == multiplication_OverflowUnsafe(Micand, Mplier).value() -); +static_assert(Expected == + multiplication_OverflowUnsafe(Micand, Mplier).value()); static_assert( 0x320F0601 != // intentionally use a too-small bit count - multiplication_OverflowUnsafe_SpecificBitCount<3>(Micand, Mplier).value() -); + multiplication_OverflowUnsafe_SpecificBitCount<3>(Micand, Mplier).value()); -} +} // namespace Multiplication -#define HE(nbits, t, v0, v1) \ - static_assert(horizontalEquality(\ - SWAR(v0),\ - SWAR(meta::BitmaskMaker::value)\ - )); +#define HE(nbits, t, v0, v1) \ + static_assert(horizontalEquality( \ + SWAR(v0), \ + SWAR(meta::BitmaskMaker::value))); HE(8, u64, 0x0808'0808'0808'0808, 0x8); HE(4, u64, 0x1111'1111'1111'1111, 0x1); HE(3, u64, 0xFFFF'FFFF'FFFF'FFFF, 0x7); @@ -148,66 +162,62 @@ HE(2, u8, 0xAA, 0x2); #undef HE TEST_CASE("Old version", "[deprecated][swar]") { - SWAR<8, u32> Micand{0x5030201}; - SWAR<8, u32> Mplier{0xA050301}; - auto Expected = 0x320F0601; - auto result = - multiplication_OverflowUnsafe_SpecificBitCount_deprecated<4>( - Micand, Mplier - ); - CHECK(Expected == result.value()); + SWAR<8, u32> Micand{0x5030201}; + SWAR<8, u32> Mplier{0xA050301}; + auto Expected = 0x320F0601; + auto result = multiplication_OverflowUnsafe_SpecificBitCount_deprecated<4>( + Micand, Mplier); + CHECK(Expected == result.value()); } TEST_CASE("Parity", "[swar]") { - // For each nibble, E indicates (E)ven and O (O)dd parities - // EEOEEOOO - auto Examples = 0xFF13A7E4; - SWAR<4, u32> casesBy4{Examples}; - SWAR<8, u32> casesBy8{Examples}; - auto by4 = parity(casesBy4); - auto by8 = parity(casesBy8); - CHECK(by4.value() == 0x00800888); - CHECK(by8.value() == 0x00808000); + // For each nibble, E indicates (E)ven and O (O)dd parities + // EEOEEOOO + auto Examples = 0xFF13A7E4; + SWAR<4, u32> casesBy4{Examples}; + SWAR<8, u32> casesBy8{Examples}; + auto by4 = parity(casesBy4); + auto by8 = parity(casesBy8); + CHECK(by4.value() == 0x00800888); + CHECK(by8.value() == 0x00808000); } -TEST_CASE( - "Isolate", - "[swar]" -) { - for (auto i = 0; i < 63; ++i) { - CHECK(i == isolate<8>(i)); - CHECK(i == isolate<8>(0xFF00+i)); - CHECK(i == isolate<8>(0xFFFF00+i)); - } - for (auto i = 0; i < 31; ++i) { - CHECK(i == isolate<7>(i)); - CHECK(i == isolate<7>(0xFF00+i)); - CHECK(i == isolate<7>(0xFFFF00+i)); - } - for (auto i = 0; i < 31; ++i) { - CHECK(i == isolate<11>(i)); - CHECK(i == isolate<11>(0xF800+i)); - CHECK(i == isolate<11>(0xFFF800+i)); - } +TEST_CASE("Isolate", "[swar]") { + for (auto i = 0; i < 63; ++i) { + CHECK(i == isolate<8>(i)); + CHECK(i == isolate<8>(0xFF00 + i)); + CHECK(i == isolate<8>(0xFFFF00 + i)); + } + for (auto i = 0; i < 31; ++i) { + CHECK(i == isolate<7>(i)); + CHECK(i == isolate<7>(0xFF00 + i)); + CHECK(i == isolate<7>(0xFFFF00 + i)); + } + for (auto i = 0; i < 31; ++i) { + CHECK(i == isolate<11>(i)); + CHECK(i == isolate<11>(0xF800 + i)); + CHECK(i == isolate<11>(0xFFF800 + i)); + } } TEST_CASE("Compress/Expand", "[swar]") { - unsigned - Mask = 0b0001'0011'0111'0111'0110'1110'1100'1010, - ToMove = 0b0101'0101'0101'0101'0101'0101'0101'0101, - // Selection: 1 01 101 101 10 010 01 0 0 - result = 0b0001'0'1'1'0'1'1'0'1'10'0'10'0'1'0'0; - auto q = compress(S32_32{ToMove}, S32_32{Mask}); - CHECK(result == q.value()); - SECTION("Regression 1") { - u64 - input = 0b1010'1001'0110'0001'1001'0000'0010'1010'0100'0111'1110'1001'1111'0001'1110'1011, - mask = 0b0110'0000'0001'0101'0101'1111'0101'1100'0110'1111'0100'0111'0001'1000'0101'0010, - expected =0b0001'0000'0000'0001'0001'0000'0000'0010'0010'0111'0001'0001'0001'0000'0010'0001; - using S = S4_64; - auto v = compress(S{input}, S{mask}); - CHECK(expected == v.value()); - } + unsigned Mask = 0b0001'0011'0111'0111'0110'1110'1100'1010, + ToMove = 0b0101'0101'0101'0101'0101'0101'0101'0101, + // Selection: 1 01 101 101 10 010 01 0 0 + result = 0b0001'0'1'1'0'1'1'0'1'10'0'10'0'1'0'0; + auto q = compress(S32_32{ToMove}, S32_32{Mask}); + CHECK(result == q.value()); + SECTION("Regression 1") { + u64 input = + 0b1010'1001'0110'0001'1001'0000'0010'1010'0100'0111'1110'1001'1111'0001'1110'1011, + mask = + 0b0110'0000'0001'0101'0101'1111'0101'1100'0110'1111'0100'0111'0001'1000'0101'0010, + expected = + 0b0001'0000'0000'0001'0001'0000'0000'0010'0010'0111'0001'0001'0001'0000'0010'0001; + using S = S4_64; + auto v = compress(S{input}, S{mask}); + CHECK(expected == v.value()); + } } static_assert(1 == popcount<5>(0x100ull)); @@ -220,13 +230,13 @@ static_assert(0x210 == popcount<1>(0x320)); static_assert(0x4321 == popcount<2>(0xF754)); static_assert(0x50004 == popcount<4>(0x3E001122)); -static_assert(1 == msbIndex(1ull<<1)); -static_assert(3 == msbIndex(1ull<<3)); -static_assert(5 == msbIndex(1ull<<5)); -static_assert(8 == msbIndex(1ull<<8)); -static_assert(17 == msbIndex(1ull<<17)); -static_assert(30 == msbIndex(1ull<<30)); -static_assert(31 == msbIndex(1ull<<31)); +static_assert(1 == msbIndex(1ull << 1)); +static_assert(3 == msbIndex(1ull << 3)); +static_assert(5 == msbIndex(1ull << 5)); +static_assert(8 == msbIndex(1ull << 8)); +static_assert(17 == msbIndex(1ull << 17)); +static_assert(30 == msbIndex(1ull << 30)); +static_assert(31 == msbIndex(1ull << 31)); namespace { using namespace zoo::meta; @@ -237,7 +247,7 @@ static_assert(0x0808'0808'0808'0808ull == BitmaskMaker::value); static_assert(0x0101'0101'0101'0101ull == BitmaskMaker::value); static_assert(0x0E0E'0E0E'0E0E'0E0Eull == BitmaskMaker::value); static_assert(0x0303'0303'0303'0303ull == BitmaskMaker::value); -} +} // namespace static_assert(0x00 == clearLSB(0x80)); static_assert(0x80 == clearLSB(0xC0)); @@ -325,124 +335,81 @@ static_assert(0x0808'0808 == u32(broadcast<8>(SWAR<8, u32>(0x0000'0008)))); static_assert(0x0B0B'0B0B == u32(broadcast<8>(SWAR<8, u32>(0x0000'000B)))); static_assert(0x0E0E'0E0E == u32(broadcast<8>(SWAR<8, u32>(0x0000'000E)))); static_assert(0x6B6B'6B6B == u32(broadcast<8>(SWAR<8, u32>(0x0000'006B)))); -static_assert(0x0808'0808'0808'0808ull == u64(broadcast<8>(SWAR<8, u64>(0x0000'0000'0000'0008ull)))); - -static_assert(1 == lsbIndex(1<<1)); -static_assert(3 == lsbIndex(1<<3)); -static_assert(5 == lsbIndex(1<<5)); -static_assert(8 == lsbIndex(1<<8)); -static_assert(17 == lsbIndex(1<<17)); -static_assert(30 == lsbIndex(1<<30)); +static_assert(0x0808'0808'0808'0808ull == + u64(broadcast<8>(SWAR<8, u64>(0x0000'0000'0000'0008ull)))); +static_assert(1 == lsbIndex(1 << 1)); +static_assert(3 == lsbIndex(1 << 3)); +static_assert(5 == lsbIndex(1 << 5)); +static_assert(8 == lsbIndex(1 << 8)); +static_assert(17 == lsbIndex(1 << 17)); +static_assert(30 == lsbIndex(1 << 30)); /*These tests were not catching errors known to have been present -static_assert(0x80880008 == greaterEqual<3>(SWAR<4, uint32_t>(0x3245'1027)).value()); -static_assert(0x88888888 == greaterEqual<0>(SWAR<4, uint32_t>(0x0123'4567)).value()); -static_assert(0x88888888 == greaterEqual<0>(SWAR<4, uint32_t>(0x7654'3210)).value()); -static_assert(0x00000008 == greaterEqual<7>(SWAR<4, uint32_t>(0x0123'4567)).value()); -static_assert(0x80000000 == greaterEqual<7>(SWAR<4, uint32_t>(0x7654'3210)).value()); +static_assert(0x80880008 == greaterEqual<3>(SWAR<4, +uint32_t>(0x3245'1027)).value()); static_assert(0x88888888 == +greaterEqual<0>(SWAR<4, uint32_t>(0x0123'4567)).value()); +static_assert(0x88888888 == greaterEqual<0>(SWAR<4, +uint32_t>(0x7654'3210)).value()); static_assert(0x00000008 == +greaterEqual<7>(SWAR<4, uint32_t>(0x0123'4567)).value()); +static_assert(0x80000000 == greaterEqual<7>(SWAR<4, +uint32_t>(0x7654'3210)).value()); */ +#define GE_MSB_TEST(left, right, result) \ + static_assert(result == greaterEqual_MSB_off<4, u32>(SWAR<4, u32>(left), \ + SWAR<4, u32>(right)) \ + .value()); -#define GE_MSB_TEST(left, right, result) static_assert(result == greaterEqual_MSB_off<4, u32>(SWAR<4, u32>(left), SWAR<4, u32>(right)).value()); - -GE_MSB_TEST( - 0x1000'0010, - 0x0111'1101, - 0x8000'0080) -GE_MSB_TEST( - 0x4333'3343, - 0x4444'4444, - 0x8000'0080) -GE_MSB_TEST( - 0x0550'0110, - 0x0110'0550, - 0x8888'8008) -GE_MSB_TEST( - 0x4771'1414, - 0x4641'1774, - 0x8888'8008) -GE_MSB_TEST( - 0x0123'4567, - 0x0000'0000, - 0x8888'8888) -GE_MSB_TEST( - 0x0123'4567, - 0x7777'7777, - 0x0000'0008) -GE_MSB_TEST( - 0x0000'0000, - 0x0123'4567, - 0x8000'0000) -GE_MSB_TEST( - 0x7777'7777, - 0x0123'4567, - 0x8888'8888) +GE_MSB_TEST(0x1000'0010, 0x0111'1101, 0x8000'0080) +GE_MSB_TEST(0x4333'3343, 0x4444'4444, 0x8000'0080) +GE_MSB_TEST(0x0550'0110, 0x0110'0550, 0x8888'8008) +GE_MSB_TEST(0x4771'1414, 0x4641'1774, 0x8888'8008) +GE_MSB_TEST(0x0123'4567, 0x0000'0000, 0x8888'8888) +GE_MSB_TEST(0x0123'4567, 0x7777'7777, 0x0000'0008) +GE_MSB_TEST(0x0000'0000, 0x0123'4567, 0x8000'0000) +GE_MSB_TEST(0x7777'7777, 0x0123'4567, 0x8888'8888) // Replicate the msb off tests with the greaterEqual that allows msb on -#define GE_MSB_ON_TEST(left, right, result) static_assert(result == greaterEqual<4, u32>(SWAR<4, u32>(left), SWAR<4, u32>(right)).value()); - -GE_MSB_ON_TEST( - 0x1000'0010, - 0x0111'1101, - 0x8000'0080) -GE_MSB_ON_TEST( - 0x4333'3343, - 0x4444'4444, - 0x8000'0080) -GE_MSB_ON_TEST( - 0x0550'0110, - 0x0110'0550, - 0x8888'8008) -GE_MSB_ON_TEST( - 0x4771'1414, - 0x4641'1774, - 0x8888'8008) -GE_MSB_ON_TEST( - 0x0123'4567, - 0x0000'0000, - 0x8888'8888) -GE_MSB_ON_TEST( - 0x0123'4567, - 0x7777'7777, - 0x0000'0008) -GE_MSB_ON_TEST( - 0x0000'0000, - 0x0123'4567, - 0x8000'0000) -GE_MSB_ON_TEST( - 0x7777'7777, - 0x0123'4567, - 0x8888'8888) - -TEST_CASE( - "greaterEqualMSBOn", - "[swar][unsigned-swar]" -) { - SECTION("single") { - for (uint32_t i = 1; i < 4; i++) { - const auto left = S2_16{0}.blitElement(1, i); - const auto right = S2_16{S2_16::AllOnes}.blitElement(1, i-1); - const auto test = S2_16{0}.blitElement(1, 2); - CHECK(test.value() == greaterEqual<2, u16>(left, right).value()); - } +#define GE_MSB_ON_TEST(left, right, result) \ + static_assert( \ + result == \ + greaterEqual<4, u32>(SWAR<4, u32>(left), SWAR<4, u32>(right)).value()); + +GE_MSB_ON_TEST(0x1000'0010, 0x0111'1101, 0x8000'0080) +GE_MSB_ON_TEST(0x4333'3343, 0x4444'4444, 0x8000'0080) +GE_MSB_ON_TEST(0x0550'0110, 0x0110'0550, 0x8888'8008) +GE_MSB_ON_TEST(0x4771'1414, 0x4641'1774, 0x8888'8008) +GE_MSB_ON_TEST(0x0123'4567, 0x0000'0000, 0x8888'8888) +GE_MSB_ON_TEST(0x0123'4567, 0x7777'7777, 0x0000'0008) +GE_MSB_ON_TEST(0x0000'0000, 0x0123'4567, 0x8000'0000) +GE_MSB_ON_TEST(0x7777'7777, 0x0123'4567, 0x8888'8888) + +TEST_CASE("greaterEqualMSBOn", "[swar][unsigned-swar]") { + SECTION("single") { + for (uint32_t i = 1; i < 4; i++) { + const auto left = S2_16{0}.blitElement(1, i); + const auto right = S2_16{S2_16::AllOnes}.blitElement(1, i - 1); + const auto test = S2_16{0}.blitElement(1, 2); + CHECK(test.value() == greaterEqual<2, u16>(left, right).value()); } - SECTION("single") { - for (uint32_t i = 1; i < 15; i++) { - const auto large = S4_32{0}.blitElement(1, i+1); - const auto small = S4_32{S4_32::AllOnes}.blitElement(1, i-1); - const auto test = S4_32{0}.blitElement(1, 8); - CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); - } + } + SECTION("single") { + for (uint32_t i = 1; i < 15; i++) { + const auto large = S4_32{0}.blitElement(1, i + 1); + const auto small = S4_32{S4_32::AllOnes}.blitElement(1, i - 1); + const auto test = S4_32{0}.blitElement(1, 8); + CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); } - SECTION("allLanes") { - for (uint32_t i = 1; i < 15; i++) { - const auto small = S4_32(S4_32::LeastSignificantBit * (i-1)); - const auto large = S4_32(S4_32::LeastSignificantBit * (i+1)); - const auto test = S4_32(S4_32::LeastSignificantBit * 8); - CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); - } + } + SECTION("allLanes") { + for (uint32_t i = 1; i < 15; i++) { + const auto small = S4_32(S4_32::LeastSignificantBit * (i - 1)); + const auto large = S4_32(S4_32::LeastSignificantBit * (i + 1)); + const auto test = S4_32(S4_32::LeastSignificantBit * 8); + CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); } + } } static_assert(0x123 == SWAR<4, uint32_t>(0x173).blitElement(1, 2).value()); @@ -450,75 +417,91 @@ static_assert(0 == isolateLSB(u32(0))); constexpr auto aBooleansWithTrue = booleans(SWAR<4, u32>{0x1}); static_assert(aBooleansWithTrue); -//static_assert(~aBooleansWithTrue); +// static_assert(~aBooleansWithTrue); static_assert(false == !bool(aBooleansWithTrue)); -TEST_CASE( - "fullAddition", - "[swar][signed-swar][unsigned-swar]" -) { - SECTION("fullAddition overflow") { - const auto sum = fullAddition(SWAR<4, u32>(0x0000'1000), SWAR<4, u32>(0x0000'7000)); - CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); - CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); - CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.result.value()); - } - SECTION("no carry or overflow for safe values") { - const auto sum = fullAddition(SWAR<4, u32>(0x0000'8000), SWAR<4, u32>(0x0000'7000)); - CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); - CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.overflow.value()); - CHECK(SWAR<4, u32>(0x0000'F000).value() == sum.result.value()); - } - SECTION("fullAddition signed overflow") { - const auto sum = fullAddition(SWAR<4, u32>(0x0000'5000), SWAR<4, u32>(0x0000'5000)); - CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); - CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); - CHECK(SWAR<4, u32>(0x0000'A000).value() == sum.result.value()); - } - SECTION("0x0111 (7) + 0x0111 (7) is 0x1110 (0x1110->0x1101->0x0010) (0xe unsigned, 0x2 signed) (signed and unsigned check)") { - const auto sum = fullAddition(SWAR<4, u32>(0x0000'7000), SWAR<4, u32>(0x0000'7000)); - CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); - CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); - CHECK(SWAR<4, u32>(0x0000'e000).value() == sum.result.value()); - } - SECTION("both carry and overflow") { - const auto sum = fullAddition(SWAR<4, u32>(0x0000'a000), SWAR<4, u32>(0x0000'a000)); - CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.carry.value()); - CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); - } +TEST_CASE("fullAddition", "[swar][signed-swar][unsigned-swar]") { + SECTION("fullAddition overflow") { + const auto sum = + fullAddition(SWAR<4, u32>(0x0000'1000), SWAR<4, u32>(0x0000'7000)); + CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); + CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); + CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.result.value()); + } + SECTION("no carry or overflow for safe values") { + const auto sum = + fullAddition(SWAR<4, u32>(0x0000'8000), SWAR<4, u32>(0x0000'7000)); + CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); + CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.overflow.value()); + CHECK(SWAR<4, u32>(0x0000'F000).value() == sum.result.value()); + } + SECTION("fullAddition signed overflow") { + const auto sum = + fullAddition(SWAR<4, u32>(0x0000'5000), SWAR<4, u32>(0x0000'5000)); + CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); + CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); + CHECK(SWAR<4, u32>(0x0000'A000).value() == sum.result.value()); + } + SECTION("0x0111 (7) + 0x0111 (7) is 0x1110 (0x1110->0x1101->0x0010) (0xe " + "unsigned, 0x2 signed) (signed and unsigned check)") { + const auto sum = + fullAddition(SWAR<4, u32>(0x0000'7000), SWAR<4, u32>(0x0000'7000)); + CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); + CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); + CHECK(SWAR<4, u32>(0x0000'e000).value() == sum.result.value()); + } + SECTION("both carry and overflow") { + const auto sum = + fullAddition(SWAR<4, u32>(0x0000'a000), SWAR<4, u32>(0x0000'a000)); + CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.carry.value()); + CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); + } } -TEST_CASE( - "BooleanSWAR MSBtoLaneMask", - "[swar]" -) { - // BooleanSWAR as a mask: - auto bswar =BooleanSWAR<4, u32>(0x0808'0000); - auto mask = S4_32(0x0F0F'0000); - CHECK(bswar.MSBtoLaneMask().value() == mask.value()); +TEST_CASE("BooleanSWAR MSBtoLaneMask", "[swar]") { + // BooleanSWAR as a mask: + auto bswar = BooleanSWAR<4, u32>(0x0808'0000); + auto mask = S4_32(0x0F0F'0000); + CHECK(bswar.MSBtoLaneMask().value() == mask.value()); } -constexpr auto fullAddSumTest = fullAddition(S4_32(0x0111'1101), S4_32(0x1000'0010)); -static_assert( S4_32(0x1111'1111).value() == fullAddSumTest.result.value()); -static_assert( S4_32(0x0000'0000).value() == fullAddSumTest.carry.value()); -static_assert( S4_32(0x0000'0000).value() == fullAddSumTest.overflow.value()); +constexpr auto fullAddSumTest = + fullAddition(S4_32(0x0111'1101), S4_32(0x1000'0010)); +static_assert(S4_32(0x1111'1111).value() == fullAddSumTest.result.value()); +static_assert(S4_32(0x0000'0000).value() == fullAddSumTest.carry.value()); +static_assert(S4_32(0x0000'0000).value() == fullAddSumTest.overflow.value()); // Verify that saturation works (saturates and doesn't saturate as appropriate) -static_assert( S4_16(0x0000).value() == saturatingUnsignedAddition(S4_16(0x0000), S4_16(0x0000)).value()); -static_assert( S4_16(0x0200).value() == saturatingUnsignedAddition(S4_16(0x0100), S4_16(0x0100)).value()); -static_assert( S4_16(0x0400).value() == saturatingUnsignedAddition(S4_16(0x0300), S4_16(0x0100)).value()); -static_assert( S4_16(0x0A00).value() == saturatingUnsignedAddition(S4_16(0x0300), S4_16(0x0700)).value()); -static_assert( S4_16(0x0F00).value() == saturatingUnsignedAddition(S4_16(0x0800), S4_16(0x0700)).value()); -static_assert( S4_16(0x0F00).value() == saturatingUnsignedAddition(S4_16(0x0800), S4_16(0x0800)).value()); - -TEST_CASE( - "saturatingUnsignedAddition", - "[swar][saturation]" -) { - CHECK(SWAR<4, u16>(0x0200).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0100)).value()); - CHECK(SWAR<4, u16>(0x0400).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0300)).value()); - CHECK(SWAR<4, u16>(0x0B00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0300)).value()); - CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0700)).value()); - CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value()); - CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value()); +static_assert(S4_16(0x0000).value() == + saturatingUnsignedAddition(S4_16(0x0000), S4_16(0x0000)).value()); +static_assert(S4_16(0x0200).value() == + saturatingUnsignedAddition(S4_16(0x0100), S4_16(0x0100)).value()); +static_assert(S4_16(0x0400).value() == + saturatingUnsignedAddition(S4_16(0x0300), S4_16(0x0100)).value()); +static_assert(S4_16(0x0A00).value() == + saturatingUnsignedAddition(S4_16(0x0300), S4_16(0x0700)).value()); +static_assert(S4_16(0x0F00).value() == + saturatingUnsignedAddition(S4_16(0x0800), S4_16(0x0700)).value()); +static_assert(S4_16(0x0F00).value() == + saturatingUnsignedAddition(S4_16(0x0800), S4_16(0x0800)).value()); + +TEST_CASE("saturatingUnsignedAddition", "[swar][saturation]") { + CHECK(SWAR<4, u16>(0x0200).value() == + saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0100)) + .value()); + CHECK(SWAR<4, u16>(0x0400).value() == + saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0300)) + .value()); + CHECK(SWAR<4, u16>(0x0B00).value() == + saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0300)) + .value()); + CHECK(SWAR<4, u16>(0x0F00).value() == + saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0700)) + .value()); + CHECK(SWAR<4, u16>(0x0F00).value() == + saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)) + .value()); + CHECK(S4_32(0x0F0C'F000).value() == + saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)) + .value()); } From aab033a1a5db1113d00a107103056a5377571b7a Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Wed, 15 May 2024 15:29:42 -0700 Subject: [PATCH 21/43] wip --- inc/zoo/swar/SWAR.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index a9ec9e93..e559c663 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -106,9 +106,7 @@ struct SWAR { return SWAR{from_range(begin(values), end(values))}; } - using ArrayType = std::array; - - constexpr SWAR(const ArrayType &array) : m_v{from_range(array.begin(), array.end())} {} + constexpr SWAR(const std::array &array) : m_v{from_range(array.begin(), array.end())} {} template > constexpr From ba6a5baba2c81b0a931107dc51ab4956fd138b03 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Wed, 15 May 2024 15:41:44 -0700 Subject: [PATCH 22/43] rm unused --- inc/zoo/swar/SWAR.h | 22 ---------------------- test/swar/BasicOperations.cpp | 1 + 2 files changed, 1 insertion(+), 22 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index e559c663..783de663 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -125,28 +125,6 @@ struct SWAR { constexpr explicit SWAR(T v): m_v(v) {} constexpr explicit operator T() const noexcept { return m_v; } -// constexpr auto operator==(T (&values)[Lanes]) const noexcept { -// return compareToContainer(values); -// } -// -// constexpr auto operator==(std::array values) const noexcept { -// return compareToContainer(values); -// } - - template - constexpr bool compareToContainer(B b) const noexcept { - auto a = to_array(); - if (a.size() != b.size()) { - return false; - } - for (auto i = 0; i < Lanes; ++i) { - if (a[i] != b[i]) { - return false; - } - } - return true; - } - constexpr T value() const noexcept { return m_v; } #define SWAR_UNARY_OPERATORS_X_LIST \ diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index f2349953..991e70f3 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -46,6 +46,7 @@ static_assert(SWAR{Literals<16, u32>, {1, 2}}.value() == 0x0001'0002); static_assert(SWAR{Literals<8, u32>, {4, 3, 2, 1}}.value() == 0x04'03'02'01); static_assert(SWAR{Literals<8, u32>, {1, 2, 3, 4}}.value() == 0x01'02'03'04); +static_assert(SWAR{Literals<8, u32>, {1, 2, 3, 4}}.value() == 0x01'02'03'04); static_assert(SWAR{Literals<8, u16>, {2, 1}}.value() == 0x0201); static_assert(SWAR{Literals<8, u16>, {1, 2}}.value() == 0x0102); From f3d9f42387ee265e2d52ccad544320d022af83c6 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Wed, 15 May 2024 15:44:05 -0700 Subject: [PATCH 23/43] respect 80 chars --- inc/zoo/swar/SWAR.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 783de663..0eb1f60d 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -367,7 +367,10 @@ struct BooleanSWAR: SWAR { }; template -BooleanSWAR(Literals_t, const bool (&values)[BooleanSWAR::Lanes]) -> BooleanSWAR; +BooleanSWAR( + Literals_t, + const bool (&values)[BooleanSWAR::Lanes]) + -> BooleanSWAR; template constexpr BooleanSWAR From 783f1891a16d4692a9bea9e2ac8f606b8f6f23d1 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Wed, 15 May 2024 15:59:06 -0700 Subject: [PATCH 24/43] undo clang formatting --- test/swar/BasicOperations.cpp | 458 ++++++++++++++++++---------------- 1 file changed, 249 insertions(+), 209 deletions(-) diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 991e70f3..8bd43994 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -2,6 +2,12 @@ #include "catch2/catch.hpp" +#include +#include +#include +#include + + using namespace zoo; using namespace zoo::swar; @@ -128,8 +134,7 @@ constexpr auto Doubled = static_assert(0x090B0D0F == Doubled.even.value()); static_assert(0x080A0C0E == Doubled.odd.value()); -static_assert(PrecisionFixtureTest == - halvePrecision(Doubled.even, Doubled.odd).value()); +static_assert(PrecisionFixtureTest == halvePrecision(Doubled.even, Doubled.odd).value()); constexpr SWAR<8, u32> Micand{0x5030201}; constexpr SWAR<8, u32> Mplier{0xA050301}; @@ -141,18 +146,21 @@ constexpr SWAR<8, u32> Mplier{0xA050301}; // 1*1 = 1 constexpr auto Expected = 0x320F0601; -static_assert(Expected == - multiplication_OverflowUnsafe(Micand, Mplier).value()); +static_assert( + Expected == multiplication_OverflowUnsafe(Micand, Mplier).value() +); static_assert( 0x320F0601 != // intentionally use a too-small bit count - multiplication_OverflowUnsafe_SpecificBitCount<3>(Micand, Mplier).value()); + multiplication_OverflowUnsafe_SpecificBitCount<3>(Micand, Mplier).value() +); -} // namespace Multiplication +} -#define HE(nbits, t, v0, v1) \ - static_assert(horizontalEquality( \ - SWAR(v0), \ - SWAR(meta::BitmaskMaker::value))); +#define HE(nbits, t, v0, v1) \ + static_assert(horizontalEquality(\ + SWAR(v0),\ + SWAR(meta::BitmaskMaker::value)\ + )); HE(8, u64, 0x0808'0808'0808'0808, 0x8); HE(4, u64, 0x1111'1111'1111'1111, 0x1); HE(3, u64, 0xFFFF'FFFF'FFFF'FFFF, 0x7); @@ -163,62 +171,66 @@ HE(2, u8, 0xAA, 0x2); #undef HE TEST_CASE("Old version", "[deprecated][swar]") { - SWAR<8, u32> Micand{0x5030201}; - SWAR<8, u32> Mplier{0xA050301}; - auto Expected = 0x320F0601; - auto result = multiplication_OverflowUnsafe_SpecificBitCount_deprecated<4>( - Micand, Mplier); - CHECK(Expected == result.value()); + SWAR<8, u32> Micand{0x5030201}; + SWAR<8, u32> Mplier{0xA050301}; + auto Expected = 0x320F0601; + auto result = + multiplication_OverflowUnsafe_SpecificBitCount_deprecated<4>( + Micand, Mplier + ); + CHECK(Expected == result.value()); } TEST_CASE("Parity", "[swar]") { - // For each nibble, E indicates (E)ven and O (O)dd parities - // EEOEEOOO - auto Examples = 0xFF13A7E4; - SWAR<4, u32> casesBy4{Examples}; - SWAR<8, u32> casesBy8{Examples}; - auto by4 = parity(casesBy4); - auto by8 = parity(casesBy8); - CHECK(by4.value() == 0x00800888); - CHECK(by8.value() == 0x00808000); + // For each nibble, E indicates (E)ven and O (O)dd parities + // EEOEEOOO + auto Examples = 0xFF13A7E4; + SWAR<4, u32> casesBy4{Examples}; + SWAR<8, u32> casesBy8{Examples}; + auto by4 = parity(casesBy4); + auto by8 = parity(casesBy8); + CHECK(by4.value() == 0x00800888); + CHECK(by8.value() == 0x00808000); } -TEST_CASE("Isolate", "[swar]") { - for (auto i = 0; i < 63; ++i) { - CHECK(i == isolate<8>(i)); - CHECK(i == isolate<8>(0xFF00 + i)); - CHECK(i == isolate<8>(0xFFFF00 + i)); - } - for (auto i = 0; i < 31; ++i) { - CHECK(i == isolate<7>(i)); - CHECK(i == isolate<7>(0xFF00 + i)); - CHECK(i == isolate<7>(0xFFFF00 + i)); - } - for (auto i = 0; i < 31; ++i) { - CHECK(i == isolate<11>(i)); - CHECK(i == isolate<11>(0xF800 + i)); - CHECK(i == isolate<11>(0xFFF800 + i)); - } +TEST_CASE( + "Isolate", + "[swar]" +) { + for (auto i = 0; i < 63; ++i) { + CHECK(i == isolate<8>(i)); + CHECK(i == isolate<8>(0xFF00+i)); + CHECK(i == isolate<8>(0xFFFF00+i)); + } + for (auto i = 0; i < 31; ++i) { + CHECK(i == isolate<7>(i)); + CHECK(i == isolate<7>(0xFF00+i)); + CHECK(i == isolate<7>(0xFFFF00+i)); + } + for (auto i = 0; i < 31; ++i) { + CHECK(i == isolate<11>(i)); + CHECK(i == isolate<11>(0xF800+i)); + CHECK(i == isolate<11>(0xFFF800+i)); + } } TEST_CASE("Compress/Expand", "[swar]") { - unsigned Mask = 0b0001'0011'0111'0111'0110'1110'1100'1010, - ToMove = 0b0101'0101'0101'0101'0101'0101'0101'0101, - // Selection: 1 01 101 101 10 010 01 0 0 - result = 0b0001'0'1'1'0'1'1'0'1'10'0'10'0'1'0'0; - auto q = compress(S32_32{ToMove}, S32_32{Mask}); - CHECK(result == q.value()); - SECTION("Regression 1") { - u64 input = - 0b1010'1001'0110'0001'1001'0000'0010'1010'0100'0111'1110'1001'1111'0001'1110'1011, - mask = - 0b0110'0000'0001'0101'0101'1111'0101'1100'0110'1111'0100'0111'0001'1000'0101'0010, - expected = - 0b0001'0000'0000'0001'0001'0000'0000'0010'0010'0111'0001'0001'0001'0000'0010'0001; - using S = S4_64; - auto v = compress(S{input}, S{mask}); - CHECK(expected == v.value()); - } + unsigned + Mask = 0b0001'0011'0111'0111'0110'1110'1100'1010, + ToMove = 0b0101'0101'0101'0101'0101'0101'0101'0101, + // Selection: 1 01 101 101 10 010 01 0 0 + result = 0b0001'0'1'1'0'1'1'0'1'10'0'10'0'1'0'0; + auto q = compress(S32_32{ToMove}, S32_32{Mask}); + CHECK(result == q.value()); + SECTION("Regression 1") { + u64 + input = 0b1010'1001'0110'0001'1001'0000'0010'1010'0100'0111'1110'1001'1111'0001'1110'1011, + mask = 0b0110'0000'0001'0101'0101'1111'0101'1100'0110'1111'0100'0111'0001'1000'0101'0010, + expected =0b0001'0000'0000'0001'0001'0000'0000'0010'0010'0111'0001'0001'0001'0000'0010'0001; + using S = S4_64; + auto v = compress(S{input}, S{mask}); + CHECK(expected == v.value()); + } } static_assert(1 == popcount<5>(0x100ull)); @@ -231,13 +243,13 @@ static_assert(0x210 == popcount<1>(0x320)); static_assert(0x4321 == popcount<2>(0xF754)); static_assert(0x50004 == popcount<4>(0x3E001122)); -static_assert(1 == msbIndex(1ull << 1)); -static_assert(3 == msbIndex(1ull << 3)); -static_assert(5 == msbIndex(1ull << 5)); -static_assert(8 == msbIndex(1ull << 8)); -static_assert(17 == msbIndex(1ull << 17)); -static_assert(30 == msbIndex(1ull << 30)); -static_assert(31 == msbIndex(1ull << 31)); +static_assert(1 == msbIndex(1ull<<1)); +static_assert(3 == msbIndex(1ull<<3)); +static_assert(5 == msbIndex(1ull<<5)); +static_assert(8 == msbIndex(1ull<<8)); +static_assert(17 == msbIndex(1ull<<17)); +static_assert(30 == msbIndex(1ull<<30)); +static_assert(31 == msbIndex(1ull<<31)); namespace { using namespace zoo::meta; @@ -248,7 +260,7 @@ static_assert(0x0808'0808'0808'0808ull == BitmaskMaker::value); static_assert(0x0101'0101'0101'0101ull == BitmaskMaker::value); static_assert(0x0E0E'0E0E'0E0E'0E0Eull == BitmaskMaker::value); static_assert(0x0303'0303'0303'0303ull == BitmaskMaker::value); -} // namespace +} static_assert(0x00 == clearLSB(0x80)); static_assert(0x80 == clearLSB(0xC0)); @@ -336,81 +348,124 @@ static_assert(0x0808'0808 == u32(broadcast<8>(SWAR<8, u32>(0x0000'0008)))); static_assert(0x0B0B'0B0B == u32(broadcast<8>(SWAR<8, u32>(0x0000'000B)))); static_assert(0x0E0E'0E0E == u32(broadcast<8>(SWAR<8, u32>(0x0000'000E)))); static_assert(0x6B6B'6B6B == u32(broadcast<8>(SWAR<8, u32>(0x0000'006B)))); -static_assert(0x0808'0808'0808'0808ull == - u64(broadcast<8>(SWAR<8, u64>(0x0000'0000'0000'0008ull)))); +static_assert(0x0808'0808'0808'0808ull == u64(broadcast<8>(SWAR<8, u64>(0x0000'0000'0000'0008ull)))); + +static_assert(1 == lsbIndex(1<<1)); +static_assert(3 == lsbIndex(1<<3)); +static_assert(5 == lsbIndex(1<<5)); +static_assert(8 == lsbIndex(1<<8)); +static_assert(17 == lsbIndex(1<<17)); +static_assert(30 == lsbIndex(1<<30)); -static_assert(1 == lsbIndex(1 << 1)); -static_assert(3 == lsbIndex(1 << 3)); -static_assert(5 == lsbIndex(1 << 5)); -static_assert(8 == lsbIndex(1 << 8)); -static_assert(17 == lsbIndex(1 << 17)); -static_assert(30 == lsbIndex(1 << 30)); /*These tests were not catching errors known to have been present -static_assert(0x80880008 == greaterEqual<3>(SWAR<4, -uint32_t>(0x3245'1027)).value()); static_assert(0x88888888 == -greaterEqual<0>(SWAR<4, uint32_t>(0x0123'4567)).value()); -static_assert(0x88888888 == greaterEqual<0>(SWAR<4, -uint32_t>(0x7654'3210)).value()); static_assert(0x00000008 == -greaterEqual<7>(SWAR<4, uint32_t>(0x0123'4567)).value()); -static_assert(0x80000000 == greaterEqual<7>(SWAR<4, -uint32_t>(0x7654'3210)).value()); +static_assert(0x80880008 == greaterEqual<3>(SWAR<4, uint32_t>(0x3245'1027)).value()); +static_assert(0x88888888 == greaterEqual<0>(SWAR<4, uint32_t>(0x0123'4567)).value()); +static_assert(0x88888888 == greaterEqual<0>(SWAR<4, uint32_t>(0x7654'3210)).value()); +static_assert(0x00000008 == greaterEqual<7>(SWAR<4, uint32_t>(0x0123'4567)).value()); +static_assert(0x80000000 == greaterEqual<7>(SWAR<4, uint32_t>(0x7654'3210)).value()); */ -#define GE_MSB_TEST(left, right, result) \ - static_assert(result == greaterEqual_MSB_off<4, u32>(SWAR<4, u32>(left), \ - SWAR<4, u32>(right)) \ - .value()); -GE_MSB_TEST(0x1000'0010, 0x0111'1101, 0x8000'0080) -GE_MSB_TEST(0x4333'3343, 0x4444'4444, 0x8000'0080) -GE_MSB_TEST(0x0550'0110, 0x0110'0550, 0x8888'8008) -GE_MSB_TEST(0x4771'1414, 0x4641'1774, 0x8888'8008) -GE_MSB_TEST(0x0123'4567, 0x0000'0000, 0x8888'8888) -GE_MSB_TEST(0x0123'4567, 0x7777'7777, 0x0000'0008) -GE_MSB_TEST(0x0000'0000, 0x0123'4567, 0x8000'0000) -GE_MSB_TEST(0x7777'7777, 0x0123'4567, 0x8888'8888) +#define GE_MSB_TEST(left, right, result) static_assert(result == greaterEqual_MSB_off<4, u32>(SWAR<4, u32>(left), SWAR<4, u32>(right)).value()); + +GE_MSB_TEST( + 0x1000'0010, + 0x0111'1101, + 0x8000'0080) +GE_MSB_TEST( + 0x4333'3343, + 0x4444'4444, + 0x8000'0080) +GE_MSB_TEST( + 0x0550'0110, + 0x0110'0550, + 0x8888'8008) +GE_MSB_TEST( + 0x4771'1414, + 0x4641'1774, + 0x8888'8008) +GE_MSB_TEST( + 0x0123'4567, + 0x0000'0000, + 0x8888'8888) +GE_MSB_TEST( + 0x0123'4567, + 0x7777'7777, + 0x0000'0008) +GE_MSB_TEST( + 0x0000'0000, + 0x0123'4567, + 0x8000'0000) +GE_MSB_TEST( + 0x7777'7777, + 0x0123'4567, + 0x8888'8888) // Replicate the msb off tests with the greaterEqual that allows msb on -#define GE_MSB_ON_TEST(left, right, result) \ - static_assert( \ - result == \ - greaterEqual<4, u32>(SWAR<4, u32>(left), SWAR<4, u32>(right)).value()); - -GE_MSB_ON_TEST(0x1000'0010, 0x0111'1101, 0x8000'0080) -GE_MSB_ON_TEST(0x4333'3343, 0x4444'4444, 0x8000'0080) -GE_MSB_ON_TEST(0x0550'0110, 0x0110'0550, 0x8888'8008) -GE_MSB_ON_TEST(0x4771'1414, 0x4641'1774, 0x8888'8008) -GE_MSB_ON_TEST(0x0123'4567, 0x0000'0000, 0x8888'8888) -GE_MSB_ON_TEST(0x0123'4567, 0x7777'7777, 0x0000'0008) -GE_MSB_ON_TEST(0x0000'0000, 0x0123'4567, 0x8000'0000) -GE_MSB_ON_TEST(0x7777'7777, 0x0123'4567, 0x8888'8888) - -TEST_CASE("greaterEqualMSBOn", "[swar][unsigned-swar]") { - SECTION("single") { - for (uint32_t i = 1; i < 4; i++) { - const auto left = S2_16{0}.blitElement(1, i); - const auto right = S2_16{S2_16::AllOnes}.blitElement(1, i - 1); - const auto test = S2_16{0}.blitElement(1, 2); - CHECK(test.value() == greaterEqual<2, u16>(left, right).value()); +#define GE_MSB_ON_TEST(left, right, result) static_assert(result == greaterEqual<4, u32>(SWAR<4, u32>(left), SWAR<4, u32>(right)).value()); + +GE_MSB_ON_TEST( + 0x1000'0010, + 0x0111'1101, + 0x8000'0080) +GE_MSB_ON_TEST( + 0x4333'3343, + 0x4444'4444, + 0x8000'0080) +GE_MSB_ON_TEST( + 0x0550'0110, + 0x0110'0550, + 0x8888'8008) +GE_MSB_ON_TEST( + 0x4771'1414, + 0x4641'1774, + 0x8888'8008) +GE_MSB_ON_TEST( + 0x0123'4567, + 0x0000'0000, + 0x8888'8888) +GE_MSB_ON_TEST( + 0x0123'4567, + 0x7777'7777, + 0x0000'0008) +GE_MSB_ON_TEST( + 0x0000'0000, + 0x0123'4567, + 0x8000'0000) +GE_MSB_ON_TEST( + 0x7777'7777, + 0x0123'4567, + 0x8888'8888) + +TEST_CASE( + "greaterEqualMSBOn", + "[swar][unsigned-swar]" +) { + SECTION("single") { + for (uint32_t i = 1; i < 4; i++) { + const auto left = S2_16{0}.blitElement(1, i); + const auto right = S2_16{S2_16::AllOnes}.blitElement(1, i-1); + const auto test = S2_16{0}.blitElement(1, 2); + CHECK(test.value() == greaterEqual<2, u16>(left, right).value()); + } } - } - SECTION("single") { - for (uint32_t i = 1; i < 15; i++) { - const auto large = S4_32{0}.blitElement(1, i + 1); - const auto small = S4_32{S4_32::AllOnes}.blitElement(1, i - 1); - const auto test = S4_32{0}.blitElement(1, 8); - CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); + SECTION("single") { + for (uint32_t i = 1; i < 15; i++) { + const auto large = S4_32{0}.blitElement(1, i+1); + const auto small = S4_32{S4_32::AllOnes}.blitElement(1, i-1); + const auto test = S4_32{0}.blitElement(1, 8); + CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); + } } - } - SECTION("allLanes") { - for (uint32_t i = 1; i < 15; i++) { - const auto small = S4_32(S4_32::LeastSignificantBit * (i - 1)); - const auto large = S4_32(S4_32::LeastSignificantBit * (i + 1)); - const auto test = S4_32(S4_32::LeastSignificantBit * 8); - CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); + SECTION("allLanes") { + for (uint32_t i = 1; i < 15; i++) { + const auto small = S4_32(S4_32::LeastSignificantBit * (i-1)); + const auto large = S4_32(S4_32::LeastSignificantBit * (i+1)); + const auto test = S4_32(S4_32::LeastSignificantBit * 8); + CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); + } } - } } static_assert(0x123 == SWAR<4, uint32_t>(0x173).blitElement(1, 2).value()); @@ -418,91 +473,76 @@ static_assert(0 == isolateLSB(u32(0))); constexpr auto aBooleansWithTrue = booleans(SWAR<4, u32>{0x1}); static_assert(aBooleansWithTrue); -// static_assert(~aBooleansWithTrue); +//static_assert(~aBooleansWithTrue); static_assert(false == !bool(aBooleansWithTrue)); -TEST_CASE("fullAddition", "[swar][signed-swar][unsigned-swar]") { - SECTION("fullAddition overflow") { - const auto sum = - fullAddition(SWAR<4, u32>(0x0000'1000), SWAR<4, u32>(0x0000'7000)); - CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); - CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); - CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.result.value()); - } - SECTION("no carry or overflow for safe values") { - const auto sum = - fullAddition(SWAR<4, u32>(0x0000'8000), SWAR<4, u32>(0x0000'7000)); - CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); - CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.overflow.value()); - CHECK(SWAR<4, u32>(0x0000'F000).value() == sum.result.value()); - } - SECTION("fullAddition signed overflow") { - const auto sum = - fullAddition(SWAR<4, u32>(0x0000'5000), SWAR<4, u32>(0x0000'5000)); - CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); - CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); - CHECK(SWAR<4, u32>(0x0000'A000).value() == sum.result.value()); - } - SECTION("0x0111 (7) + 0x0111 (7) is 0x1110 (0x1110->0x1101->0x0010) (0xe " - "unsigned, 0x2 signed) (signed and unsigned check)") { - const auto sum = - fullAddition(SWAR<4, u32>(0x0000'7000), SWAR<4, u32>(0x0000'7000)); - CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); - CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); - CHECK(SWAR<4, u32>(0x0000'e000).value() == sum.result.value()); - } - SECTION("both carry and overflow") { - const auto sum = - fullAddition(SWAR<4, u32>(0x0000'a000), SWAR<4, u32>(0x0000'a000)); - CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.carry.value()); - CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); - } +TEST_CASE( + "fullAddition", + "[swar][signed-swar][unsigned-swar]" +) { + SECTION("fullAddition overflow") { + const auto sum = fullAddition(SWAR<4, u32>(0x0000'1000), SWAR<4, u32>(0x0000'7000)); + CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); + CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); + CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.result.value()); + } + SECTION("no carry or overflow for safe values") { + const auto sum = fullAddition(SWAR<4, u32>(0x0000'8000), SWAR<4, u32>(0x0000'7000)); + CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); + CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.overflow.value()); + CHECK(SWAR<4, u32>(0x0000'F000).value() == sum.result.value()); + } + SECTION("fullAddition signed overflow") { + const auto sum = fullAddition(SWAR<4, u32>(0x0000'5000), SWAR<4, u32>(0x0000'5000)); + CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); + CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); + CHECK(SWAR<4, u32>(0x0000'A000).value() == sum.result.value()); + } + SECTION("0x0111 (7) + 0x0111 (7) is 0x1110 (0x1110->0x1101->0x0010) (0xe unsigned, 0x2 signed) (signed and unsigned check)") { + const auto sum = fullAddition(SWAR<4, u32>(0x0000'7000), SWAR<4, u32>(0x0000'7000)); + CHECK(SWAR<4, u32>(0x0000'0000).value() == sum.carry.value()); + CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); + CHECK(SWAR<4, u32>(0x0000'e000).value() == sum.result.value()); + } + SECTION("both carry and overflow") { + const auto sum = fullAddition(SWAR<4, u32>(0x0000'a000), SWAR<4, u32>(0x0000'a000)); + CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.carry.value()); + CHECK(SWAR<4, u32>(0x0000'8000).value() == sum.overflow.value()); + } } -TEST_CASE("BooleanSWAR MSBtoLaneMask", "[swar]") { - // BooleanSWAR as a mask: - auto bswar = BooleanSWAR<4, u32>(0x0808'0000); - auto mask = S4_32(0x0F0F'0000); - CHECK(bswar.MSBtoLaneMask().value() == mask.value()); +TEST_CASE( + "BooleanSWAR MSBtoLaneMask", + "[swar]" +) { + // BooleanSWAR as a mask: + auto bswar =BooleanSWAR<4, u32>(0x0808'0000); + auto mask = S4_32(0x0F0F'0000); + CHECK(bswar.MSBtoLaneMask().value() == mask.value()); } -constexpr auto fullAddSumTest = - fullAddition(S4_32(0x0111'1101), S4_32(0x1000'0010)); -static_assert(S4_32(0x1111'1111).value() == fullAddSumTest.result.value()); -static_assert(S4_32(0x0000'0000).value() == fullAddSumTest.carry.value()); -static_assert(S4_32(0x0000'0000).value() == fullAddSumTest.overflow.value()); +constexpr auto fullAddSumTest = fullAddition(S4_32(0x0111'1101), S4_32(0x1000'0010)); +static_assert( S4_32(0x1111'1111).value() == fullAddSumTest.result.value()); +static_assert( S4_32(0x0000'0000).value() == fullAddSumTest.carry.value()); +static_assert( S4_32(0x0000'0000).value() == fullAddSumTest.overflow.value()); // Verify that saturation works (saturates and doesn't saturate as appropriate) -static_assert(S4_16(0x0000).value() == - saturatingUnsignedAddition(S4_16(0x0000), S4_16(0x0000)).value()); -static_assert(S4_16(0x0200).value() == - saturatingUnsignedAddition(S4_16(0x0100), S4_16(0x0100)).value()); -static_assert(S4_16(0x0400).value() == - saturatingUnsignedAddition(S4_16(0x0300), S4_16(0x0100)).value()); -static_assert(S4_16(0x0A00).value() == - saturatingUnsignedAddition(S4_16(0x0300), S4_16(0x0700)).value()); -static_assert(S4_16(0x0F00).value() == - saturatingUnsignedAddition(S4_16(0x0800), S4_16(0x0700)).value()); -static_assert(S4_16(0x0F00).value() == - saturatingUnsignedAddition(S4_16(0x0800), S4_16(0x0800)).value()); - -TEST_CASE("saturatingUnsignedAddition", "[swar][saturation]") { - CHECK(SWAR<4, u16>(0x0200).value() == - saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0100)) - .value()); - CHECK(SWAR<4, u16>(0x0400).value() == - saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0300)) - .value()); - CHECK(SWAR<4, u16>(0x0B00).value() == - saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0300)) - .value()); - CHECK(SWAR<4, u16>(0x0F00).value() == - saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0700)) - .value()); - CHECK(SWAR<4, u16>(0x0F00).value() == - saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)) - .value()); - CHECK(S4_32(0x0F0C'F000).value() == - saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)) - .value()); +static_assert( S4_16(0x0000).value() == saturatingUnsignedAddition(S4_16(0x0000), S4_16(0x0000)).value()); +static_assert( S4_16(0x0200).value() == saturatingUnsignedAddition(S4_16(0x0100), S4_16(0x0100)).value()); +static_assert( S4_16(0x0400).value() == saturatingUnsignedAddition(S4_16(0x0300), S4_16(0x0100)).value()); +static_assert( S4_16(0x0A00).value() == saturatingUnsignedAddition(S4_16(0x0300), S4_16(0x0700)).value()); +static_assert( S4_16(0x0F00).value() == saturatingUnsignedAddition(S4_16(0x0800), S4_16(0x0700)).value()); +static_assert( S4_16(0x0F00).value() == saturatingUnsignedAddition(S4_16(0x0800), S4_16(0x0800)).value()); + +TEST_CASE( + "saturatingUnsignedAddition", + "[swar][saturation]" +) { + CHECK(SWAR<4, u16>(0x0200).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0100)).value()); + CHECK(SWAR<4, u16>(0x0400).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0300)).value()); + CHECK(SWAR<4, u16>(0x0B00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0300)).value()); + CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0700)).value()); + CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value()); + CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value()); } + From f9e28b1bcad73d6b59a533e9ab4d96a3548bba47 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Wed, 15 May 2024 16:00:50 -0700 Subject: [PATCH 25/43] format boolean swar --- test/swar/BasicOperations.cpp | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 8bd43994..a5c7288d 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -108,18 +108,13 @@ static_assert([]() { #define F false #define T true -static_assert(BooleanSWAR{Literals<4, u16>, {F, F, F, F}}.value() == - 0b0000'0000'0000'0000); -static_assert(BooleanSWAR{Literals<4, u16>, {T, F, F, F}}.value() == - 0b1000'0000'0000'0000); -static_assert(BooleanSWAR{Literals<4, u16>, {F, T, F, F}}.value() == - 0b0000'1000'0000'0000); -static_assert(BooleanSWAR{Literals<4, u16>, {F, F, T, F}}.value() == - 0b0000'0000'1000'0000); -static_assert(BooleanSWAR{Literals<4, u16>, {F, F, F, T}}.value() == - 0b0000'0000'0000'1000); -static_assert(BooleanSWAR{Literals<4, u16>, {T, F, F, F}}.value() == - 0b1000'0000'0000'0000); +using BS = BooleanSWAR<4, u16>; +static_assert(BS{Literals<4, u16>, {F, F, F, F}}.value() == 0b0000'0000'0000'0000); +static_assert(BS{Literals<4, u16>, {T, F, F, F}}.value() == 0b1000'0000'0000'0000); +static_assert(BS{Literals<4, u16>, {F, T, F, F}}.value() == 0b0000'1000'0000'0000); +static_assert(BS{Literals<4, u16>, {F, F, T, F}}.value() == 0b0000'0000'1000'0000); +static_assert(BS{Literals<4, u16>, {F, F, F, T}}.value() == 0b0000'0000'0000'1000); +static_assert(BS{Literals<4, u16>, {T, F, F, F}}.value() == 0b1000'0000'0000'0000); #undef F #undef T From d63652108e4f58d134b7364590c077c87fabb52f Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sat, 25 May 2024 23:39:04 -0700 Subject: [PATCH 26/43] improvementws --- inc/zoo/swar/SWAR.h | 1 + test/swar/BasicOperations.cpp | 74 ++++++++++++----------------------- 2 files changed, 27 insertions(+), 48 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 0eb1f60d..3aa1708e 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -63,6 +63,7 @@ constexpr std::make_unsigned_t lsbIndex(T v) noexcept { template struct SWAR { using type = std::make_unsigned_t; + constexpr static auto Literal = Literals; constexpr static inline type NBits = NBits_, BitWidth = sizeof(T) * 8, diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index a5c7288d..6728a11a 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -6,8 +6,6 @@ #include #include #include - - using namespace zoo; using namespace zoo::swar; @@ -42,10 +40,8 @@ static_assert(SWAR<2, u32>::MaxUnsignedLaneValue == 3); static_assert(SWAR{Literals<32, u64>, {2, 1}}.value() == 0x00000002'00000001); static_assert(SWAR{Literals<32, u64>, {1, 2}}.value() == 0x00000001'00000002); -static_assert(SWAR{Literals<16, u64>, {4, 3, 2, 1}}.value() == - 0x0004'0003'0002'0001); -static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.value() == - 0x0001'0002'0003'0004); +static_assert(SWAR{Literals<16, u64>, {4, 3, 2, 1}}.value() == 0x0004'0003'0002'0001); +static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.value() == 0x0001'0002'0003'0004); static_assert(SWAR{Literals<16, u32>, {2, 1}}.value() == 0x0002'0001); static_assert(SWAR{Literals<16, u32>, {1, 2}}.value() == 0x0001'0002); @@ -60,51 +56,33 @@ static_assert(SWAR{Literals<8, u16>, {1, 2}}.value() == 0x0102); static_assert(SWAR{Literals<4, u8>, {2, 1}}.value() == 0x21); static_assert(SWAR{Literals<4, u8>, {1, 2}}.value() == 0x12); +// Little-endian static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.at(0) == 4); static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.at(1) == 3); - // static_assert([]() -> bool { - // constexpr auto array = std::array{1, 2, 3, 4}; - // auto s = SWAR{Literals<16, u64>, array}; - // return s.at(0) == 4; - // }()); - -template -constexpr auto operator==(const SWAR &sw, - const std::array::Lanes> &arr) { - const auto swArr = sw.to_array(); - if (swArr.size() != arr.size()) { - return false; - } - for (auto i = 0; i < SWAR::Lanes; ++i) { - if (swArr.at(i) != arr[i]) { - return false; - } - } - return true; -} - -static_assert(SWAR{Literals<8, u32>, {1, 2, 3, 4}} == - std::array{1, 2, 3, 4}); - -#define ARRAY_TEST \ - constexpr auto A = S{Literals, {ArrayLiteral}}; \ - constexpr auto B = std::array{ArrayLiteral}; \ - return A == B; - -static_assert([]() { - using S = S8_32; - constexpr auto NBits = 8; -#define ArrayLiteral 4, 3, 2, 1 - ARRAY_TEST -}()); - -static_assert([]() { - using S = S4_16; - constexpr auto NBits = 4; -#define ArrayLiteral 4, 3, 2, 1 - ARRAY_TEST -}()); +// Macro required because initializer lists are not constexpr +#define ARRAY_TEST(SwarType, ...) \ + static_assert([]() { \ + using S = SwarType; \ + constexpr auto arry = std::array{__VA_ARGS__}; \ + constexpr auto test_array = S{S::Literal, {__VA_ARGS__}}.to_array(); \ + static_assert(arry.size() == S::Lanes); \ + for (auto i = 0; i < S::Lanes; ++i) { \ + if (arry[i] != test_array.at(i)) { \ + return false; \ + } \ + } \ + return true; \ + }()); \ + +ARRAY_TEST(S16_64, 1, 2, 3, 4); +ARRAY_TEST(S16_64, 4, 3, 2, 1); + +ARRAY_TEST(S8_32, 255, 255, 255, 255); +ARRAY_TEST(S8_64, 255, 255, 255, 255, 255, 255, 255, 255); + +ARRAY_TEST(S16_32, 65534, 65534); +ARRAY_TEST(S16_64, 65534, 65534, 65534, 65534); #define F false #define T true From d880691c8889dd330b2e7dc0c33cd18f261e42a2 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sat, 25 May 2024 23:42:07 -0700 Subject: [PATCH 27/43] indentation --- inc/zoo/swar/SWAR.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 3aa1708e..7691a728 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -17,7 +17,7 @@ template struct SWAR; template struct Literals_t { - constexpr static void (SWAR::*value)() = nullptr; + constexpr static void (SWAR::*value)() = nullptr; }; template From bbad5833a4fd1444e51df8997da17f3025674c4e Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sun, 26 May 2024 00:24:25 -0700 Subject: [PATCH 28/43] make sure we understand equality --- inc/zoo/swar/math.h | 48 +++++++++++++++++++++++++++++++++++ test/swar/BasicOperations.cpp | 20 +++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 inc/zoo/swar/math.h diff --git a/inc/zoo/swar/math.h b/inc/zoo/swar/math.h new file mode 100644 index 00000000..c12b850e --- /dev/null +++ b/inc/zoo/swar/math.h @@ -0,0 +1,48 @@ +#pragma once +#include "SWAR.h" + +template +constexpr static +std::enable_if_t, bool> +is_power_of_two(IntegerType x) noexcept { + return x > 0 && (x & (x - 1)) == 0; +} + +template +constexpr static +std::enable_if_t, bool> +is_power_of_two() noexcept { + return is_power_of_two(X); +} +static_assert(is_power_of_two()); + + +template +constexpr static +std::enable_if_t< + std::is_integral_v && + is_power_of_two(), size_t> +modulo_power_of_two(IntegerType x) noexcept { + return x & (N - 1); +} + +static_assert(modulo_power_of_two<4>(0) == 0); +static_assert(modulo_power_of_two<8>(9) == 1); +static_assert(modulo_power_of_two<4096>(4097) == 1); + +// SWAR power of two + +template +constexpr static auto is_power_of_two_sw(S x) noexcept { + constexpr auto NBits = S::NBits; + using T = typename S::Type; + // return x > 0 && (x & (x - 1)) == 0; + auto gt_zero = zoo::swar::greaterEqual_MSB_off(x, S{0}); + + constexpr auto Ones = S::LeastSignificantBit; + auto x_minus_1 = x - Ones; + auto x_and_x_minus_1 = x & x_minus_1; + using BS = zoo::swar::BooleanSWAR; + return B +} + diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 6728a11a..23c3ab2e 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -1,6 +1,8 @@ +#include "zoo/swar/SWAR.h" #include "zoo/swar/associative_iteration.h" #include "catch2/catch.hpp" +#include "math.h" #include #include @@ -93,6 +95,24 @@ static_assert(BS{Literals<4, u16>, {F, T, F, F}}.value() == 0b0000'1000'0000'000 static_assert(BS{Literals<4, u16>, {F, F, T, F}}.value() == 0b0000'0000'1000'0000); static_assert(BS{Literals<4, u16>, {F, F, F, T}}.value() == 0b0000'0000'0000'1000); static_assert(BS{Literals<4, u16>, {T, F, F, F}}.value() == 0b1000'0000'0000'0000); + + +namespace equality { +using S = SWAR<8, u32>; +using BS = BooleanSWAR<8, u32>; +static_assert(equals(S{S::Literal, {1, 2, 3, 4}}, + S{S::Literal, {1, 2, 3, 4}}).value() + == BS{BS::Literal, {T, T, T, T}}.value()); + +static_assert(equals(S{S::Literal, {1, 2, 3, 4}}, + S{S::Literal, {5, 6, 7, 8}}).value() + == BS{BS::Literal, {F, F, F, F}}.value()); + +static_assert(equals(S{S::Literal, {1, 2, 3, 4}}, + S{S::Literal, {5, 2, 7, 4}}).value() + == BS{BS::Literal, {F, T, F, T}}.value()); +} + #undef F #undef T From f6a04f101250494378f9a376189d5c8d3a42a6e3 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sun, 26 May 2024 00:53:55 -0700 Subject: [PATCH 29/43] works --- inc/zoo/swar/math.h | 26 ++++++++++++++++---------- test/swar/BasicOperations.cpp | 18 ++++++++++++++++++ 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/inc/zoo/swar/math.h b/inc/zoo/swar/math.h index c12b850e..8d105475 100644 --- a/inc/zoo/swar/math.h +++ b/inc/zoo/swar/math.h @@ -32,17 +32,23 @@ static_assert(modulo_power_of_two<4096>(4097) == 1); // SWAR power of two +namespace zoo::swar { template -constexpr static auto is_power_of_two_sw(S x) noexcept { - constexpr auto NBits = S::NBits; - using T = typename S::Type; - // return x > 0 && (x & (x - 1)) == 0; - auto gt_zero = zoo::swar::greaterEqual_MSB_off(x, S{0}); - +constexpr static auto subtract_one_unsafe(S x) noexcept { constexpr auto Ones = S::LeastSignificantBit; - auto x_minus_1 = x - Ones; - auto x_and_x_minus_1 = x & x_minus_1; - using BS = zoo::swar::BooleanSWAR; - return B + auto x_minus_1 = S{x.value() - Ones}; + return x_minus_1; +} + +// todo subtract K unsafe using BitmaskMaker + +template +constexpr static auto is_power_of_two(S x) noexcept { + constexpr auto NBits = S::NBits; + using T = typename S::type; + auto x_minus_1 = subtract_one_unsafe(x); + return equals(S{x_minus_1.value() & x.value()}, S{0}); } +} // namespace zoo::swar + diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 23c3ab2e..fb265f50 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -3,6 +3,7 @@ #include "catch2/catch.hpp" #include "math.h" +#include "zoo/swar/math.h" #include #include @@ -113,6 +114,23 @@ static_assert(equals(S{S::Literal, {1, 2, 3, 4}}, == BS{BS::Literal, {F, T, F, T}}.value()); } +namespace math { +using S = SWAR<8, u32>; +using BS = BooleanSWAR<8, u32>; + +static_assert(subtract_one_unsafe(S{S::Literal, {1, 3, 4, 8}}).value() + == S{S::Literal, {0, 2, 3, 7}}.value()); + +static_assert(is_power_of_two(S{S::Literal, {1, 3, 4, 8}}).value() + == BS{BS::Literal, {T, F, T, T}}.value()); + +static_assert(is_power_of_two(S{S::Literal, {3, 7, 11, 101}}).value() + == BS{BS::Literal, {F, F, F, F}}.value()); + +static_assert(is_power_of_two(S{S::Literal, {2, 64, 128, 7}}).value() + == BS{BS::Literal, {T, T, T, 0}}.value()); +} + #undef F #undef T From e39d298d24e0f45e403e77779178ebbf4ba6cb95 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sun, 26 May 2024 01:18:52 -0700 Subject: [PATCH 30/43] implement modulo --- inc/zoo/swar/math.h | 37 +++++++++++++++++++++++++++++++++-- test/swar/BasicOperations.cpp | 8 ++++---- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/inc/zoo/swar/math.h b/inc/zoo/swar/math.h index 8d105475..7801681f 100644 --- a/inc/zoo/swar/math.h +++ b/inc/zoo/swar/math.h @@ -1,5 +1,9 @@ #pragma once #include "SWAR.h" +#include +#include + +namespace zoo::math { template constexpr static @@ -30,6 +34,8 @@ static_assert(modulo_power_of_two<4>(0) == 0); static_assert(modulo_power_of_two<8>(9) == 1); static_assert(modulo_power_of_two<4096>(4097) == 1); +} + // SWAR power of two namespace zoo::swar { @@ -39,16 +45,43 @@ constexpr static auto subtract_one_unsafe(S x) noexcept { auto x_minus_1 = S{x.value() - Ones}; return x_minus_1; } - // todo subtract K unsafe using BitmaskMaker +// todo subtract K "saturated" using BitmaskMaker template constexpr static auto is_power_of_two(S x) noexcept { constexpr auto NBits = S::NBits; using T = typename S::type; + auto greater_than_0 = greaterEqual_MSB_off(x, S{0}); auto x_minus_1 = subtract_one_unsafe(x); - return equals(S{x_minus_1.value() & x.value()}, S{0}); + auto zero = equals(S{x_minus_1.value() & x.value()}, S{0}); + return greater_than_0 & zero; } +template +constexpr static +auto +modulo_power_of_two(const S x) noexcept { + static_assert(zoo::math::is_power_of_two()); + constexpr auto NBits = S::NBits; + using T = typename S::type; + constexpr auto N_minus_1 = N - 1; + constexpr auto N_in_lanes = zoo::meta::BitmaskMaker::value; + T y = x.value() & N_in_lanes; + return S{y}; +} + +using S = zoo::swar::SWAR<4, uint16_t>; +static_assert(modulo_power_of_two<4>(S{0}).value() == 0); + +static_assert(modulo_power_of_two<4>(S{S::Literal, {0, 2, 4, 6}}).value() + == S{S::Literal, {0, 2, 0, 2}}.value()); + +static_assert(modulo_power_of_two<512>(S{S::Literal, {0, 2, 4, 6}}).value() + == S{S::Literal, {0, 2, 4, 6}}.value()); + +static_assert(modulo_power_of_two<512>(S{S::Literal, {0, 511, 512, 1024}}).value() + == S{S::Literal, {0, 511, 0, 0}}.value()); + } // namespace zoo::swar diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index fb265f50..f0d7ca72 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -5,10 +5,10 @@ #include "math.h" #include "zoo/swar/math.h" -#include -#include -#include -#include +// #include +// #include +// #include +// #include using namespace zoo; using namespace zoo::swar; From 0ca3600bc5d2dbd6aecd7caad1f89e9a93f8dddb Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sun, 26 May 2024 01:24:05 -0700 Subject: [PATCH 31/43] modulo works! --- inc/zoo/swar/math.h | 11 ----------- test/swar/BasicOperations.cpp | 14 +++++++++++++- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/inc/zoo/swar/math.h b/inc/zoo/swar/math.h index 7801681f..734f0057 100644 --- a/inc/zoo/swar/math.h +++ b/inc/zoo/swar/math.h @@ -71,17 +71,6 @@ modulo_power_of_two(const S x) noexcept { return S{y}; } -using S = zoo::swar::SWAR<4, uint16_t>; -static_assert(modulo_power_of_two<4>(S{0}).value() == 0); - -static_assert(modulo_power_of_two<4>(S{S::Literal, {0, 2, 4, 6}}).value() - == S{S::Literal, {0, 2, 0, 2}}.value()); - -static_assert(modulo_power_of_two<512>(S{S::Literal, {0, 2, 4, 6}}).value() - == S{S::Literal, {0, 2, 4, 6}}.value()); - -static_assert(modulo_power_of_two<512>(S{S::Literal, {0, 511, 512, 1024}}).value() - == S{S::Literal, {0, 511, 0, 0}}.value()); } // namespace zoo::swar diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index f0d7ca72..26e33ef4 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -114,7 +114,7 @@ static_assert(equals(S{S::Literal, {1, 2, 3, 4}}, == BS{BS::Literal, {F, T, F, T}}.value()); } -namespace math { +namespace math_test { using S = SWAR<8, u32>; using BS = BooleanSWAR<8, u32>; @@ -129,6 +129,18 @@ static_assert(is_power_of_two(S{S::Literal, {3, 7, 11, 101}}).value() static_assert(is_power_of_two(S{S::Literal, {2, 64, 128, 7}}).value() == BS{BS::Literal, {T, T, T, 0}}.value()); + +static_assert(modulo_power_of_two<4>(S{0}).value() == 0); + +static_assert(modulo_power_of_two<4>(S{S::Literal, {0, 2, 4, 6}}).value() + == S{S::Literal, {0, 2, 0, 2}}.value()); + +static_assert(modulo_power_of_two<512>(S{S::Literal, {0, 2, 4, 6}}).value() + == S{S::Literal, {0, 2, 4, 6}}.value()); + +static_assert(modulo_power_of_two<64>(S{S::Literal, {0, 1, 64, 65}}).value() + == S{S::Literal, {0, 1, 0, 1}}.value()); + } #undef F From f7ffe70a0e90f3f0e69b5bc9f08a84cc0ab99c12 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sun, 26 May 2024 01:24:46 -0700 Subject: [PATCH 32/43] snifae --- inc/zoo/swar/math.h | 3 +-- test/swar/BasicOperations.cpp | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/inc/zoo/swar/math.h b/inc/zoo/swar/math.h index 734f0057..46a33247 100644 --- a/inc/zoo/swar/math.h +++ b/inc/zoo/swar/math.h @@ -60,9 +60,8 @@ constexpr static auto is_power_of_two(S x) noexcept { template constexpr static -auto +std::enable_if_t(), S> modulo_power_of_two(const S x) noexcept { - static_assert(zoo::math::is_power_of_two()); constexpr auto NBits = S::NBits; using T = typename S::type; constexpr auto N_minus_1 = N - 1; diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 26e33ef4..7f0b99b3 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -135,7 +135,7 @@ static_assert(modulo_power_of_two<4>(S{0}).value() == 0); static_assert(modulo_power_of_two<4>(S{S::Literal, {0, 2, 4, 6}}).value() == S{S::Literal, {0, 2, 0, 2}}.value()); -static_assert(modulo_power_of_two<512>(S{S::Literal, {0, 2, 4, 6}}).value() +static_assert(modulo_power_of_two<8>(S{S::Literal, {0, 2, 4, 6}}).value() == S{S::Literal, {0, 2, 4, 6}}.value()); static_assert(modulo_power_of_two<64>(S{S::Literal, {0, 1, 64, 65}}).value() From 53e56f507f6e88d13a62cc7f3a5b837e43ab4e08 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sun, 26 May 2024 01:25:19 -0700 Subject: [PATCH 33/43] better example --- test/swar/BasicOperations.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 7f0b99b3..ebed3c1f 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -135,8 +135,8 @@ static_assert(modulo_power_of_two<4>(S{0}).value() == 0); static_assert(modulo_power_of_two<4>(S{S::Literal, {0, 2, 4, 6}}).value() == S{S::Literal, {0, 2, 0, 2}}.value()); -static_assert(modulo_power_of_two<8>(S{S::Literal, {0, 2, 4, 6}}).value() - == S{S::Literal, {0, 2, 4, 6}}.value()); +static_assert(modulo_power_of_two<8>(S{S::Literal, {0, 2, 4, 9}}).value() + == S{S::Literal, {0, 2, 4, 1}}.value()); static_assert(modulo_power_of_two<64>(S{S::Literal, {0, 1, 64, 65}}).value() == S{S::Literal, {0, 1, 0, 1}}.value()); From b62355a287118595c8de224afde61cdaab2cb1a2 Mon Sep 17 00:00:00 2001 From: thecppzoo Date: Sun, 26 May 2024 13:25:07 -0700 Subject: [PATCH 34/43] Update SWAR.h There ought to be a space separating a `struct` being defined and the `{` that begins the definition --- inc/zoo/swar/SWAR.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 7691a728..c0c98139 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -21,7 +21,7 @@ template struct Literals_t { }; template -constexpr Literals_t Literals{}; +constexpr Literals_t Literals {}; using u64 = uint64_t; using u32 = uint32_t; From bf93d56f9427ff2dc3f16a79ffccdd02f9091a9b Mon Sep 17 00:00:00 2001 From: thecppzoo Date: Sun, 26 May 2024 13:26:21 -0700 Subject: [PATCH 35/43] Update SWAR.h Reverting mistake, this is not a `struct` definition but a variable! --- inc/zoo/swar/SWAR.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index c0c98139..7691a728 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -21,7 +21,7 @@ template struct Literals_t { }; template -constexpr Literals_t Literals {}; +constexpr Literals_t Literals{}; using u64 = uint64_t; using u32 = uint32_t; From 79d3847a678c5aba794f18022bffa93982a0f4f9 Mon Sep 17 00:00:00 2001 From: thecppzoo Date: Sun, 26 May 2024 13:32:35 -0700 Subject: [PATCH 36/43] Attempt to sidestep MSVC bug --- test/swar/BasicOperations.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 6728a11a..5869b3f2 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -37,7 +37,7 @@ static_assert(SWAR<8, u32>::MaxUnsignedLaneValue == 255); static_assert(SWAR<4, u32>::MaxUnsignedLaneValue == 15); static_assert(SWAR<2, u32>::MaxUnsignedLaneValue == 3); -static_assert(SWAR{Literals<32, u64>, {2, 1}}.value() == 0x00000002'00000001); +static_assert(SWAR{zoo::swar::Literals<32, zoo::swar::u64>, {2, 1}}.value() == 0x00000002'00000001); static_assert(SWAR{Literals<32, u64>, {1, 2}}.value() == 0x00000001'00000002); static_assert(SWAR{Literals<16, u64>, {4, 3, 2, 1}}.value() == 0x0004'0003'0002'0001); From a585ad9cf4f9eeacce112a2c66359208080d21d3 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sun, 26 May 2024 17:10:50 -0700 Subject: [PATCH 37/43] camelCawe --- inc/zoo/meta/BitmaskMaker.h | 2 ++ inc/zoo/swar/math.h | 33 ++++++++++++++------------------- test/swar/BasicOperations.cpp | 16 ++++++++-------- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/inc/zoo/meta/BitmaskMaker.h b/inc/zoo/meta/BitmaskMaker.h index 4c8008ac..1134773b 100644 --- a/inc/zoo/meta/BitmaskMaker.h +++ b/inc/zoo/meta/BitmaskMaker.h @@ -43,6 +43,8 @@ struct BitmaskMaker { static_assert(0xF0F0 == BitmaskMaker::value); static_assert(0xEDFEDFED == BitmaskMaker::value); + + }} // zoo::meta #endif diff --git a/inc/zoo/swar/math.h b/inc/zoo/swar/math.h index 46a33247..c8118bd4 100644 --- a/inc/zoo/swar/math.h +++ b/inc/zoo/swar/math.h @@ -1,46 +1,42 @@ #pragma once #include "SWAR.h" -#include -#include namespace zoo::math { template constexpr static std::enable_if_t, bool> -is_power_of_two(IntegerType x) noexcept { +isPowerOfTwo(IntegerType x) noexcept { return x > 0 && (x & (x - 1)) == 0; } template constexpr static std::enable_if_t, bool> -is_power_of_two() noexcept { - return is_power_of_two(X); +isPowerOfTwo() noexcept { + return isPowerOfTwo(X); } -static_assert(is_power_of_two()); +static_assert(isPowerOfTwo()); template constexpr static std::enable_if_t< std::is_integral_v && - is_power_of_two(), size_t> -modulo_power_of_two(IntegerType x) noexcept { + isPowerOfTwo(), size_t> +moduloPowerOfTwo(IntegerType x) noexcept { return x & (N - 1); } -static_assert(modulo_power_of_two<4>(0) == 0); -static_assert(modulo_power_of_two<8>(9) == 1); -static_assert(modulo_power_of_two<4096>(4097) == 1); +static_assert(moduloPowerOfTwo<4>(0) == 0); +static_assert(moduloPowerOfTwo<8>(9) == 1); +static_assert(moduloPowerOfTwo<4096>(4097) == 1); } -// SWAR power of two - namespace zoo::swar { template -constexpr static auto subtract_one_unsafe(S x) noexcept { +constexpr static auto subtractOneUnsafe(S x) noexcept { constexpr auto Ones = S::LeastSignificantBit; auto x_minus_1 = S{x.value() - Ones}; return x_minus_1; @@ -49,19 +45,19 @@ constexpr static auto subtract_one_unsafe(S x) noexcept { // todo subtract K "saturated" using BitmaskMaker template -constexpr static auto is_power_of_two(S x) noexcept { +constexpr static auto isPowerOfTwo(S x) noexcept { constexpr auto NBits = S::NBits; using T = typename S::type; auto greater_than_0 = greaterEqual_MSB_off(x, S{0}); - auto x_minus_1 = subtract_one_unsafe(x); + auto x_minus_1 = subtractOneUnsafe(x); auto zero = equals(S{x_minus_1.value() & x.value()}, S{0}); return greater_than_0 & zero; } template constexpr static -std::enable_if_t(), S> -modulo_power_of_two(const S x) noexcept { +std::enable_if_t(), S> +moduloPowerOfTwo(const S x) noexcept { constexpr auto NBits = S::NBits; using T = typename S::type; constexpr auto N_minus_1 = N - 1; @@ -70,6 +66,5 @@ modulo_power_of_two(const S x) noexcept { return S{y}; } - } // namespace zoo::swar diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index ebed3c1f..a223c5df 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -118,27 +118,27 @@ namespace math_test { using S = SWAR<8, u32>; using BS = BooleanSWAR<8, u32>; -static_assert(subtract_one_unsafe(S{S::Literal, {1, 3, 4, 8}}).value() +static_assert(subtractOneUnsafe(S{S::Literal, {1, 3, 4, 8}}).value() == S{S::Literal, {0, 2, 3, 7}}.value()); -static_assert(is_power_of_two(S{S::Literal, {1, 3, 4, 8}}).value() +static_assert(isPowerOfTwo(S{S::Literal, {1, 3, 4, 8}}).value() == BS{BS::Literal, {T, F, T, T}}.value()); -static_assert(is_power_of_two(S{S::Literal, {3, 7, 11, 101}}).value() +static_assert(isPowerOfTwo(S{S::Literal, {3, 7, 11, 101}}).value() == BS{BS::Literal, {F, F, F, F}}.value()); -static_assert(is_power_of_two(S{S::Literal, {2, 64, 128, 7}}).value() +static_assert(isPowerOfTwo(S{S::Literal, {2, 64, 128, 7}}).value() == BS{BS::Literal, {T, T, T, 0}}.value()); -static_assert(modulo_power_of_two<4>(S{0}).value() == 0); +static_assert(moduloPowerOfTwo<4>(S{0}).value() == 0); -static_assert(modulo_power_of_two<4>(S{S::Literal, {0, 2, 4, 6}}).value() +static_assert(moduloPowerOfTwo<4>(S{S::Literal, {0, 2, 4, 6}}).value() == S{S::Literal, {0, 2, 0, 2}}.value()); -static_assert(modulo_power_of_two<8>(S{S::Literal, {0, 2, 4, 9}}).value() +static_assert(moduloPowerOfTwo<8>(S{S::Literal, {0, 2, 4, 9}}).value() == S{S::Literal, {0, 2, 4, 1}}.value()); -static_assert(modulo_power_of_two<64>(S{S::Literal, {0, 1, 64, 65}}).value() +static_assert(moduloPowerOfTwo<64>(S{S::Literal, {0, 1, 64, 65}}).value() == S{S::Literal, {0, 1, 0, 1}}.value()); } From f1452cb5fc3170c92f9cafd843c9b0052a134820 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sun, 26 May 2024 17:12:47 -0700 Subject: [PATCH 38/43] indentatino --- inc/zoo/swar/math.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/inc/zoo/swar/math.h b/inc/zoo/swar/math.h index c8118bd4..a8e0d2c6 100644 --- a/inc/zoo/swar/math.h +++ b/inc/zoo/swar/math.h @@ -7,14 +7,14 @@ template constexpr static std::enable_if_t, bool> isPowerOfTwo(IntegerType x) noexcept { - return x > 0 && (x & (x - 1)) == 0; + return x && (x & (x - 1)) == 0; } template constexpr static std::enable_if_t, bool> isPowerOfTwo() noexcept { - return isPowerOfTwo(X); + return isPowerOfTwo(X); } static_assert(isPowerOfTwo()); @@ -25,7 +25,7 @@ std::enable_if_t< std::is_integral_v && isPowerOfTwo(), size_t> moduloPowerOfTwo(IntegerType x) noexcept { - return x & (N - 1); + return x & (N - 1); } static_assert(moduloPowerOfTwo<4>(0) == 0); @@ -37,33 +37,33 @@ static_assert(moduloPowerOfTwo<4096>(4097) == 1); namespace zoo::swar { template constexpr static auto subtractOneUnsafe(S x) noexcept { - constexpr auto Ones = S::LeastSignificantBit; - auto x_minus_1 = S{x.value() - Ones}; - return x_minus_1; + constexpr auto Ones = S::LeastSignificantBit; + auto x_minus_1 = S{x.value() - Ones}; + return x_minus_1; } // todo subtract K unsafe using BitmaskMaker // todo subtract K "saturated" using BitmaskMaker template constexpr static auto isPowerOfTwo(S x) noexcept { - constexpr auto NBits = S::NBits; - using T = typename S::type; - auto greater_than_0 = greaterEqual_MSB_off(x, S{0}); - auto x_minus_1 = subtractOneUnsafe(x); - auto zero = equals(S{x_minus_1.value() & x.value()}, S{0}); - return greater_than_0 & zero; + constexpr auto NBits = S::NBits; + using T = typename S::type; + auto greater_than_0 = greaterEqual_MSB_off(x, S{0}); + auto x_minus_1 = subtractOneUnsafe(x); + auto zero = equals(S{x_minus_1.value() & x.value()}, S{0}); + return greater_than_0 & zero; } template constexpr static std::enable_if_t(), S> moduloPowerOfTwo(const S x) noexcept { - constexpr auto NBits = S::NBits; - using T = typename S::type; - constexpr auto N_minus_1 = N - 1; - constexpr auto N_in_lanes = zoo::meta::BitmaskMaker::value; - T y = x.value() & N_in_lanes; - return S{y}; + constexpr auto NBits = S::NBits; + using T = typename S::type; + constexpr auto N_minus_1 = N - 1; + constexpr auto N_in_lanes = zoo::meta::BitmaskMaker::value; + T y = x.value() & N_in_lanes; + return S{y}; } } // namespace zoo::swar From f45632af8925e9473301eaff3cb7ed8b116ca995 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sun, 26 May 2024 17:13:10 -0700 Subject: [PATCH 39/43] auto --- inc/zoo/swar/math.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inc/zoo/swar/math.h b/inc/zoo/swar/math.h index a8e0d2c6..585050ee 100644 --- a/inc/zoo/swar/math.h +++ b/inc/zoo/swar/math.h @@ -62,7 +62,7 @@ moduloPowerOfTwo(const S x) noexcept { using T = typename S::type; constexpr auto N_minus_1 = N - 1; constexpr auto N_in_lanes = zoo::meta::BitmaskMaker::value; - T y = x.value() & N_in_lanes; + auto y = x.value() & N_in_lanes; return S{y}; } From 79148d38587672fee66178ac1855626a0906d0e9 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Sun, 26 May 2024 17:31:54 -0700 Subject: [PATCH 40/43] add utils --- inc/zoo/swar/SWAR.h | 4 ++++ inc/zoo/swar/math.h | 5 ++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 7691a728..74758e34 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -16,6 +16,9 @@ namespace zoo { namespace swar { template struct SWAR; +template +struct BooleanSWAR; + template struct Literals_t { constexpr static void (SWAR::*value)() = nullptr; }; @@ -63,6 +66,7 @@ constexpr std::make_unsigned_t lsbIndex(T v) noexcept { template struct SWAR { using type = std::make_unsigned_t; + using Boolean = BooleanSWAR; constexpr static auto Literal = Literals; constexpr static inline type NBits = NBits_, diff --git a/inc/zoo/swar/math.h b/inc/zoo/swar/math.h index 585050ee..aefceb6b 100644 --- a/inc/zoo/swar/math.h +++ b/inc/zoo/swar/math.h @@ -58,13 +58,12 @@ template constexpr static std::enable_if_t(), S> moduloPowerOfTwo(const S x) noexcept { - constexpr auto NBits = S::NBits; - using T = typename S::type; constexpr auto N_minus_1 = N - 1; - constexpr auto N_in_lanes = zoo::meta::BitmaskMaker::value; + constexpr auto N_in_lanes = zoo::meta::BitmaskMaker::value; auto y = x.value() & N_in_lanes; return S{y}; } + } // namespace zoo::swar From 08a73e1b7a264e0e45fc42a349bfa30d334edd93 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Mon, 27 May 2024 00:10:35 -0700 Subject: [PATCH 41/43] tidy up --- inc/zoo/swar/math.h | 7 +------ test/swar/BasicOperations.cpp | 8 +++++--- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/inc/zoo/swar/math.h b/inc/zoo/swar/math.h index aefceb6b..d089fb15 100644 --- a/inc/zoo/swar/math.h +++ b/inc/zoo/swar/math.h @@ -16,7 +16,6 @@ std::enable_if_t, bool> isPowerOfTwo() noexcept { return isPowerOfTwo(X); } -static_assert(isPowerOfTwo()); template @@ -28,10 +27,6 @@ moduloPowerOfTwo(IntegerType x) noexcept { return x & (N - 1); } -static_assert(moduloPowerOfTwo<4>(0) == 0); -static_assert(moduloPowerOfTwo<8>(9) == 1); -static_assert(moduloPowerOfTwo<4096>(4097) == 1); - } namespace zoo::swar { @@ -48,7 +43,7 @@ template constexpr static auto isPowerOfTwo(S x) noexcept { constexpr auto NBits = S::NBits; using T = typename S::type; - auto greater_than_0 = greaterEqual_MSB_off(x, S{0}); + auto greater_than_0 = greaterEqual(x, S{0}); auto x_minus_1 = subtractOneUnsafe(x); auto zero = equals(S{x_minus_1.value() & x.value()}, S{0}); return greater_than_0 & zero; diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 76fc9422..ebd46ac6 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -1,4 +1,3 @@ -#include "zoo/swar/SWAR.h" #include "zoo/swar/associative_iteration.h" #include "catch2/catch.hpp" @@ -115,9 +114,13 @@ static_assert(equals(S{S::Literal, {1, 2, 3, 4}}, } namespace math_test { +static_assert(math::isPowerOfTwo()); +static_assert(math::moduloPowerOfTwo<4>(0) == 0); +static_assert(math::moduloPowerOfTwo<8>(9) == 1); +static_assert(math::moduloPowerOfTwo<4096>(4097) == 1); + using S = SWAR<8, u32>; using BS = BooleanSWAR<8, u32>; - static_assert(subtractOneUnsafe(S{S::Literal, {1, 3, 4, 8}}).value() == S{S::Literal, {0, 2, 3, 7}}.value()); @@ -140,7 +143,6 @@ static_assert(moduloPowerOfTwo<8>(S{S::Literal, {0, 2, 4, 9}}).value() static_assert(moduloPowerOfTwo<64>(S{S::Literal, {0, 1, 64, 65}}).value() == S{S::Literal, {0, 1, 0, 1}}.value()); - } #undef F From 0228573695b38280c1ea8470b754fbcd979636a9 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Mon, 27 May 2024 00:11:41 -0700 Subject: [PATCH 42/43] weren't using that anyway --- inc/zoo/swar/SWAR.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 74758e34..7691a728 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -16,9 +16,6 @@ namespace zoo { namespace swar { template struct SWAR; -template -struct BooleanSWAR; - template struct Literals_t { constexpr static void (SWAR::*value)() = nullptr; }; @@ -66,7 +63,6 @@ constexpr std::make_unsigned_t lsbIndex(T v) noexcept { template struct SWAR { using type = std::make_unsigned_t; - using Boolean = BooleanSWAR; constexpr static auto Literal = Literals; constexpr static inline type NBits = NBits_, From 0829a7e96ce4967c5f87ed427098f4f0b56af341 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Mon, 27 May 2024 00:42:28 -0700 Subject: [PATCH 43/43] tidy up tests --- test/swar/BasicOperations.cpp | 82 +++++++++++++++++------------------ 1 file changed, 39 insertions(+), 43 deletions(-) diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index ebd46ac6..b814f0fd 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -86,31 +86,29 @@ ARRAY_TEST(S8_64, 255, 255, 255, 255, 255, 255, 255, 255); ARRAY_TEST(S16_32, 65534, 65534); ARRAY_TEST(S16_64, 65534, 65534, 65534, 65534); -#define F false -#define T true using BS = BooleanSWAR<4, u16>; -static_assert(BS{Literals<4, u16>, {F, F, F, F}}.value() == 0b0000'0000'0000'0000); -static_assert(BS{Literals<4, u16>, {T, F, F, F}}.value() == 0b1000'0000'0000'0000); -static_assert(BS{Literals<4, u16>, {F, T, F, F}}.value() == 0b0000'1000'0000'0000); -static_assert(BS{Literals<4, u16>, {F, F, T, F}}.value() == 0b0000'0000'1000'0000); -static_assert(BS{Literals<4, u16>, {F, F, F, T}}.value() == 0b0000'0000'0000'1000); -static_assert(BS{Literals<4, u16>, {T, F, F, F}}.value() == 0b1000'0000'0000'0000); +static_assert(BS{Literals<4, u16>, {0, 0, 0, 0}}.value() == 0b0000'0000'0000'0000); +static_assert(BS{Literals<4, u16>, {1, 0, 0, 0}}.value() == 0b1000'0000'0000'0000); +static_assert(BS{Literals<4, u16>, {0, 1, 0, 0}}.value() == 0b0000'1000'0000'0000); +static_assert(BS{Literals<4, u16>, {0, 0, 1, 0}}.value() == 0b0000'0000'1000'0000); +static_assert(BS{Literals<4, u16>, {0, 0, 0, 1}}.value() == 0b0000'0000'0000'1000); +static_assert(BS{Literals<4, u16>, {1, 0, 0, 0}}.value() == 0b1000'0000'0000'0000); namespace equality { using S = SWAR<8, u32>; using BS = BooleanSWAR<8, u32>; -static_assert(equals(S{S::Literal, {1, 2, 3, 4}}, - S{S::Literal, {1, 2, 3, 4}}).value() - == BS{BS::Literal, {T, T, T, T}}.value()); - -static_assert(equals(S{S::Literal, {1, 2, 3, 4}}, - S{S::Literal, {5, 6, 7, 8}}).value() - == BS{BS::Literal, {F, F, F, F}}.value()); - -static_assert(equals(S{S::Literal, {1, 2, 3, 4}}, - S{S::Literal, {5, 2, 7, 4}}).value() - == BS{BS::Literal, {F, T, F, T}}.value()); +template +constexpr auto laneWiseEqualsTest( + const typename S::type (&left)[S::Lanes], + const typename S::type (&right)[S::Lanes], + const bool (&expected)[S::Lanes]) { + return equals(S{S::Literal, left}, S{S::Literal, right}).value() + == BS{BS::Literal, expected}.value(); +} +static_assert(laneWiseEqualsTest({1, 2, 3, 4}, {1, 2, 3, 4}, {1, 1, 1, 1})); +static_assert(laneWiseEqualsTest({1, 2, 3, 4}, {5, 6, 7, 8}, {0, 0, 0, 0})); +static_assert(laneWiseEqualsTest({1, 2, 3, 4}, {5, 2, 7, 4}, {0, 1, 0, 1})); } namespace math_test { @@ -121,33 +119,31 @@ static_assert(math::moduloPowerOfTwo<4096>(4097) == 1); using S = SWAR<8, u32>; using BS = BooleanSWAR<8, u32>; -static_assert(subtractOneUnsafe(S{S::Literal, {1, 3, 4, 8}}).value() - == S{S::Literal, {0, 2, 3, 7}}.value()); - -static_assert(isPowerOfTwo(S{S::Literal, {1, 3, 4, 8}}).value() - == BS{BS::Literal, {T, F, T, T}}.value()); - -static_assert(isPowerOfTwo(S{S::Literal, {3, 7, 11, 101}}).value() - == BS{BS::Literal, {F, F, F, F}}.value()); - -static_assert(isPowerOfTwo(S{S::Literal, {2, 64, 128, 7}}).value() - == BS{BS::Literal, {T, T, T, 0}}.value()); - +template +constexpr auto powerOfTwoTest( + const typename S::type (&input)[S::Lanes], + const bool (&expected)[S::Lanes]) { + return isPowerOfTwo(S{S::Literal, input}).value() == BS{BS::Literal, expected}.value(); +} +static_assert(powerOfTwoTest({1, 2, 3, 4}, {1, 1, 0, 1})); +static_assert(powerOfTwoTest({2, 3, 64, 77}, {1, 0, 1, 0})); +static_assert(powerOfTwoTest({3, 65, 128, 0}, {0, 0, 1, 1})); +static_assert(powerOfTwoTest({256, 7, 11, 101}, {1, 0, 0, 0})); +static_assert(powerOfTwoTest({2, 64, 128, 7}, {1, 1, 1, 0})); + +template +constexpr auto moduloSwarTest( + const typename S::type (&input)[S::Lanes], + const typename S::type (&expected)[S::Lanes]) { + return moduloPowerOfTwo(S{S::Literal, input}).value() == S{S::Literal, expected}.value(); +} static_assert(moduloPowerOfTwo<4>(S{0}).value() == 0); - -static_assert(moduloPowerOfTwo<4>(S{S::Literal, {0, 2, 4, 6}}).value() - == S{S::Literal, {0, 2, 0, 2}}.value()); - -static_assert(moduloPowerOfTwo<8>(S{S::Literal, {0, 2, 4, 9}}).value() - == S{S::Literal, {0, 2, 4, 1}}.value()); - -static_assert(moduloPowerOfTwo<64>(S{S::Literal, {0, 1, 64, 65}}).value() - == S{S::Literal, {0, 1, 0, 1}}.value()); +static_assert(moduloSwarTest<4>({0, 2, 4, 6}, {0, 2, 0, 2})); +static_assert(moduloSwarTest<4>({1, 3, 5, 7}, {1, 3, 1, 3})); +static_assert(moduloSwarTest<8>({9, 8, 16, 7}, {1, 0, 0, 7})); +static_assert(moduloSwarTest<16>({17, 32, 64, 127}, {1, 0, 0, 15})); } -#undef F -#undef T - namespace Multiplication { static_assert(~int64_t(0) == negate(S4_64{S4_64::LeastSignificantBit}).value());