diff --git a/README.md b/README.md index 2efdedd9..08b24f58 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) +[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) [![C++ CI](https://github.com/thecppzoo/zoo/actions/workflows/master.yaml/badge.svg)](https://github.com/thecppzoo/zoo/actions/workflows/master.yaml) ## Build suggestion diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 508ba0ac..eecbcec3 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -76,14 +76,25 @@ struct SWAR { AllOnes = ~std::make_unsigned_t{0} >> PaddingBitsCount, // Also constructed in RobinHood utils: possible bug? LeastSignificantBit = meta::BitmaskMaker{1}, NBits>::value, MostSignificantBit = LeastSignificantBit << (NBits - 1), - LeastSignificantLaneMask = - sizeof(T) * 8 == NBits ? // needed to avoid shifting all bits - ~T(0) : - ~(~T(0) << NBits), + LeastSignificantLaneMask = []() { + if constexpr (NBits < sizeof(T) * 8) { + return (T(1) << NBits) - 1; + } else { + return ~T(0); + } + }(), // Use LowerBits in favor of ~MostSignificantBit to not pollute // "don't care" bits when non-power-of-two bit lane sizes are supported LowerBits = MostSignificantBit - LeastSignificantBit; + static_assert(std::is_unsigned_v, + "You should not use an unsigned type as the base for a SWAR type. " + "If you have used `int` or `long`, please use `uint32_t` or `uint64_t` instead. " + "This type parameter is only used to determine the total width of the SWAR register. " + "The signed-ness of the type has no *intentional* semantic meaning to what you're defining and " + "furthermore, some bitwise operations are different for signed and unsigned types." + ); + SWAR() = default; constexpr explicit SWAR(T v): m_v(v) {} constexpr explicit operator T() const noexcept { return m_v; } @@ -104,9 +115,13 @@ struct SWAR { SWAR_BINARY_OPERATORS_X_LIST #undef X + constexpr static T laneMask(int laneIndex) noexcept { + return LeastSignificantLaneMask << (NBits * laneIndex); + } + // Returns lane at position with other lanes cleared. - constexpr T isolateLane(int position) const noexcept { - return m_v & (LeastSignificantLaneMask << (NBits * position)); + constexpr T isolateLane(int laneIndex) const noexcept { + return m_v & laneMask(laneIndex); } // Returns lane value at position, in lane 0, rest of SWAR cleared. @@ -254,7 +269,7 @@ struct BooleanSWAR: SWAR { static constexpr auto MaskNonLSB = ~MaskLSB; static constexpr auto MaskNonMSB = ~MaskMSB; constexpr explicit BooleanSWAR(T v): Base(v) {} - + constexpr BooleanSWAR clear(int bit) const noexcept { constexpr auto Bit = T(1) << (NBits - 1); return this->m_v ^ (Bit << (NBits * bit)); } @@ -270,7 +285,7 @@ struct BooleanSWAR: SWAR { constexpr auto operator ~() const noexcept { return BooleanSWAR(Base{Base::MostSignificantBit} ^ *this); } - + constexpr auto operator not() const noexcept { return BooleanSWAR(MaskMSB ^ *this); } @@ -395,7 +410,7 @@ greaterEqual(SWAR left, SWAR right) noexcept { using S = swar::SWAR; const auto h = S::MostSignificantBit, x = left.value(), y = right.value(); // x=left, y= right is x < y const auto z = (x|h) - (y&~h); - // bitwise ternary median! + // bitwise ternary median! const auto t = h & ~median(x, ~y, z); return ~BooleanSWAR{static_cast(t)}; // ~(x= y } diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 587c5ce6..137028eb 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -1,7 +1,9 @@ #ifndef ZOO_SWAR_ASSOCIATIVE_ITERATION_H #define ZOO_SWAR_ASSOCIATIVE_ITERATION_H +#include "zoo/meta/BitmaskMaker.h" #include "zoo/swar/SWAR.h" +#include //#define ZOO_DEVELOPMENT_DEBUGGING #ifdef ZOO_DEVELOPMENT_DEBUGGING @@ -260,7 +262,8 @@ template constexpr auto makeLaneMaskFromMSB(SWAR input) { using S = SWAR; auto msb = input & S{S::MostSignificantBit}; - auto msbCopiedToLSB = S{msb.value() >> (NB - 1)}; + B val = msb.value() >> (NB - 1); + auto msbCopiedToLSB = S{val}; return impl::makeLaneMaskFromMSB_and_LSB(msb, msbCopiedToLSB); } @@ -392,8 +395,13 @@ template< typename CountHalver > constexpr auto associativeOperatorIterated_regressive( - Base base, Base neutral, IterationCount count, IterationCount forSquaring, - Operator op, unsigned log2Count, CountHalver ch + Base base, + Base neutral, + IterationCount count, + IterationCount forSquaring, + Operator op, + unsigned log2Count, + CountHalver ch ) { auto result = neutral; if(!log2Count) { return result; } @@ -419,10 +427,12 @@ constexpr auto multiplication_OverflowUnsafe_SpecificBitCount( auto halver = [](auto counts) { auto msbCleared = counts & ~S{S::MostSignificantBit}; - return S{msbCleared.value() << 1}; + T res = msbCleared.value() << 1; + return S{res}; }; - multiplier = S{multiplier.value() << (NB - ActualBits)}; + T val = multiplier.value() << (NB - ActualBits); + multiplier = S{val}; return associativeOperatorIterated_regressive( multiplicand, S{0}, multiplier, S{S::MostSignificantBit}, operation, ActualBits, halver @@ -483,6 +493,34 @@ constexpr auto exponentiation_OverflowUnsafe_SpecificBitCount( ); } +/** Transforms a binary number into it's unary representation (in binary). + * E.g. 0b0011 (3) -> 0b0111 + * It seems that getting the lane width exactly is overflowy */ +template +constexpr auto binaryToUnary_Plural(S input) { + constexpr auto two = S{meta::BitmaskMaker::value}; + constexpr auto one = S::LeastSignificantBit; + constexpr auto max_size = S::LeastSignificantLaneMask; + typename S::type v = exponentiation_OverflowUnsafe_SpecificBitCount(two, input).value() - one; + return S{v}; +} + +template +constexpr auto rightShift_Plural(S input, S shifts) { + auto minimumMask = ~binaryToUnary_Plural(shifts); + auto inputMasked = input.value() & minimumMask.value(); + + typename S::type result = 0; + for (int i = 0; i < S::Lanes; i++) { + auto laneMask = S::laneMask(i); + auto currentShiftAmount = shifts.at(i); + auto masked = inputMasked & laneMask; + auto shifted = masked >> currentShiftAmount; + result |= shifted; + } + return S{result}; +} + template constexpr auto multiplication_OverflowUnsafe( SWAR multiplicand, diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 1628e222..d61a3598 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -42,6 +42,11 @@ constexpr auto PrecisionFixtureTest = 0x89ABCDEF; constexpr auto Doubled = doublePrecision(SWAR<4, uint32_t>{PrecisionFixtureTest}); +static_assert(makeLaneMaskFromMSB(SWAR<4, uint16_t>{ + 0b1000'0000'1000'0000}).value() == + 0b1111'0000'1111'0000 +); + static_assert(0x090B0D0F == Doubled.even.value()); static_assert(0x080A0C0E == Doubled.odd.value()); static_assert(PrecisionFixtureTest == halvePrecision(Doubled.even, Doubled.odd).value()); @@ -357,7 +362,7 @@ TEST_CASE( const auto left = S2_16{0}.blitElement(1, i); const auto right = S2_16{S2_16::AllOnes}.blitElement(1, i-1); const auto test = S2_16{0}.blitElement(1, 2); - CHECK(test.value() == greaterEqual<2, u16>(left, right).value()); + CHECK(test.value() == greaterEqual<2, u16>(left, right).value()); } } SECTION("single") { @@ -365,7 +370,7 @@ TEST_CASE( const auto large = S4_32{0}.blitElement(1, i+1); const auto small = S4_32{S4_32::AllOnes}.blitElement(1, i-1); const auto test = S4_32{0}.blitElement(1, 8); - CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); + CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); } } SECTION("allLanes") { @@ -373,7 +378,7 @@ TEST_CASE( const auto small = S4_32(S4_32::LeastSignificantBit * (i-1)); const auto large = S4_32(S4_32::LeastSignificantBit * (i+1)); const auto test = S4_32(S4_32::LeastSignificantBit * 8); - CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); + CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); } } } @@ -425,7 +430,7 @@ TEST_CASE( "BooleanSWAR MSBtoLaneMask", "[swar]" ) { - // BooleanSWAR as a mask: + // BooleanSWAR as a mask: auto bswar =BooleanSWAR<4, u32>(0x0808'0000); auto mask = S4_32(0x0F0F'0000); CHECK(bswar.MSBtoLaneMask().value() == mask.value()); @@ -452,6 +457,88 @@ TEST_CASE( CHECK(SWAR<4, u16>(0x0400).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0300)).value()); CHECK(SWAR<4, u16>(0x0B00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0300)).value()); CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0700)).value()); - CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value()); - CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value()); + CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value()); + CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value()); } + +template +constexpr static bool binaryToUnary_Plural_Test() { + return binaryToUnary_Plural(SWAR{Input}).value() == Expected; +}; + +static_assert(binaryToUnary_Plural_Test<4, uint16_t, + 0b0001'0010'0011'0011, + 0b0001'0011'0111'0111 +>()); + +static_assert(binaryToUnary_Plural_Test<4, uint16_t, + 0b0000'0001'0010'0011, + 0b0000'0001'0011'0111 +>()); + +static_assert(binaryToUnary_Plural_Test<4, uint16_t, + 0b0100'0001'0010'0011, + 0b1111'0001'0011'0111 +>()); + +static_assert(binaryToUnary_Plural_Test<4, uint16_t, + 0b0000'0000'0000'0001, + 0b0000'0000'0000'0001 +>()); + +static_assert(binaryToUnary_Plural_Test<8, uint16_t, + 0b000000111'00000101, // 7 ' 5 + 0b001111111'00011111 // seven ones, fives ones! +>()); + +template +constexpr static bool rightShift_Plural_Test() { + using S = SWAR; + return rightShift_Plural(S{Input}, S{Count}).value() == Expected; +}; + +static_assert(rightShift_Plural_Test<4, uint16_t, + 0b0111'0111'0111'0111, // input + 0b0010'0010'0010'0010, // 2 ' 2 ' 2 ' 2 + 0b0001'0001'0001'0001 // notice, input, shifted over two to right! +>()); + +static_assert(rightShift_Plural_Test<4, uint16_t, + 0b0000'0000'1111'0001, + 0b0000'0000'0000'0001, + 0b0000'0000'1111'0000 +>()); + +static_assert(rightShift_Plural_Test<4, uint16_t, + 0b0000'1000'1000'1000, + 0b0100'0011'0010'0001, + 0b0000'0001'0010'0100 +>()); + +static_assert(rightShift_Plural_Test<4, uint16_t, + 0b1111'1111'1111'1111, + 0b0001'0001'0001'0001, + 0b0111'0111'0111'0111 +>()); + +static_assert(rightShift_Plural_Test<4, uint16_t, + 0b0000'0000'1111'0001, + 0b0000'0000'0000'0000, + 0b0000'0000'1111'0001 +>()); + +static_assert(rightShift_Plural_Test<4, uint16_t, + 0b0000'0000'1111'0001, + 0b0000'0000'0001'0001, + 0b0000'0000'0111'0000 +>()); + +using S = SWAR<4, uint16_t>; +static_assert(S::LeastSignificantLaneMask == 0b0000'0000'0000'1111); +static_assert(S::laneMask(0) == 0b0000'0000'0000'1111); +static_assert(S::laneMask(1) == 0b0000'0000'1111'0000); +static_assert(S::laneMask(2) == 0b0000'1111'0000'0000); +static_assert(S::laneMask(3) == 0b1111'0000'0000'0000); +static_assert(S{S::laneMask(3)}.at(3) == 0b0000'0000'0000'1111); + +