Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions inc/zoo/swar/SWAR.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ constexpr __uint128_t lsbIndex(__uint128_t v) noexcept {
}
#endif



/// Core abstraction around SIMD Within A Register (SWAR). Specifies 'lanes'
/// of NBits width against a type T, and provides an abstraction for performing
/// SIMD operations against that primitive type T treated as a SIMD register.
Expand Down Expand Up @@ -108,6 +110,17 @@ struct SWAR {
return result;
}

constexpr static auto evenLaneMask() {
using S = SWAR<NBits, T>;
static_assert(0 == S::Lanes % 2, "Only even number of elements supported");
using D = SWAR<NBits * 2, T>;
return S{(D::LeastSignificantBit << S::NBits) - D::LeastSignificantBit};
}

constexpr static auto oddLaneMask() {
return SWAR<NBits, T>{static_cast<T>(~evenLaneMask().value())};
}

template <typename Range>
constexpr static auto from(const Range &values) noexcept {
using std::begin; using std::end;
Expand Down Expand Up @@ -595,4 +608,7 @@ static_assert(
0x0706050403020100ull
);

static_assert(SWAR<4, u16>::evenLaneMask().value() == 0b0000'1111'0000'1111);
static_assert(SWAR<4, u16>::oddLaneMask().value() == 0b1111'0000'1111'0000);

}}
65 changes: 55 additions & 10 deletions inc/zoo/swar/associative_iteration.h
Original file line number Diff line number Diff line change
Expand Up @@ -475,14 +475,6 @@ struct SWAR_Pair{
SWAR<NB, T> even, odd;
};

template<int NB, typename T>
constexpr SWAR<NB, T> doublingMask() {
using S = SWAR<NB, T>;
static_assert(0 == S::Lanes % 2, "Only even number of elements supported");
using D = SWAR<NB * 2, T>;
return S{(D::LeastSignificantBit << NB) - D::LeastSignificantBit};
}

template<int NB, typename T>
constexpr auto doublePrecision(SWAR<NB, T> input) {
using S = SWAR<NB, T>;
Expand All @@ -491,7 +483,7 @@ constexpr auto doublePrecision(SWAR<NB, T> input) {
"Precision can only be doubled for SWARs of even element count"
);
using RV = SWAR<NB * 2, T>;
constexpr auto DM = doublingMask<NB, T>();
constexpr auto DM = SWAR<NB, T>::evenLaneMask();
return SWAR_Pair<NB * 2, T>{
RV{(input & DM).value()},
RV{(input.value() >> NB) & DM.value()}
Expand All @@ -503,13 +495,66 @@ constexpr auto halvePrecision(SWAR<NB, T> even, SWAR<NB, T> odd) {
using S = SWAR<NB, T>;
static_assert(0 == NB % 2, "Only even lane-bitcounts supported");
using RV = SWAR<NB/2, T>;
constexpr auto HalvingMask = doublingMask<NB/2, T>();
constexpr auto HalvingMask = SWAR<NB/2, T>::evenLaneMask();
auto
evenHalf = RV{even.value()} & HalvingMask,
oddHalf = RV{(RV{odd.value()} & HalvingMask).value() << NB/2};
return evenHalf | oddHalf;
}


template <int NB, typename T> struct MultiplicationResult {
SWAR<NB, T> result;
SWAR<NB, T> overflow;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not overflow.

};

template <int NB, typename T>
constexpr MultiplicationResult<NB, T>
fullMultiplication(SWAR<NB, T> multiplicand, SWAR<NB, T> multiplier) {
using S = SWAR<NB, T>; using D = SWAR<NB * 2, T>;

auto [l_even, l_odd] = doublePrecision(multiplicand);
auto [r_even, r_odd] = doublePrecision(multiplier);
auto res_even = multiplication_OverflowUnsafe(l_even, r_even);
auto res_odd = multiplication_OverflowUnsafe(l_odd, r_odd);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Merge these declarations into a single auto, the idea is that in that way you are verifying they are all of the same type.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also todo signed multiplication


// Into the double precision world
constexpr auto HalfLane = S::NBits;
constexpr auto UpperHalfOfLanes = SWAR<S::NBits, T>::oddLaneMask().value();
auto res = halvePrecision(res_even, res_odd);

auto over_even = D{(res_even.value() & UpperHalfOfLanes) >> HalfLane};
auto over_odd = D{(res_odd.value() & UpperHalfOfLanes) >> HalfLane};
auto overflow_values = halvePrecision(over_even, over_odd);

return {res, overflow_values};
}

using S = SWAR<4, u32>;

static_assert(S::oddLaneMask().value() == 0xF0F0'F0F0);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

aware these tests not formatted nicely, just making a draft for visibility

static_assert(S::evenLaneMask().value() == 0x0F0F'0F0F);

static_assert(fullMultiplication(S{0x0009'0000}, S{0x0009'0000})
.result.value() == 0x0001'0000);
static_assert(fullMultiplication(S{0x0003'0000}, S{0x0007'0000})
.result.value() == 0x0005'0000);

// static_assert(fullMultiplication(S{0x0002'0000}, S{0x0008'0000})
// .overflowed.value() == 0x0008'0000);
//
// static_assert(fullMultiplication(S{0x0008'0000}, S{0x0008'0000})
// .overflowed.value() == 0x0008'0000);
//
// static_assert(fullMultiplication(S{0x0001'0000}, S{0x0008'0000})
// .overflowed.value() == 0x0000'0000);

static_assert(fullMultiplication(S{0x0008'0012}, S{0x0007'0032})
.result.value() == 0x0008'0034);

static_assert(fullMultiplication(S{0x0008'0012}, S{0x0007'0032})
.result.value() == 0x0008'0034);

}

#endif
2 changes: 1 addition & 1 deletion test/swar/BasicOperations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ static_assert(BooleanSWAR{Literals<4, u16>,
namespace Multiplication {

static_assert(~int64_t(0) == negate(S4_64{S4_64::LeastSignificantBit}).value());
static_assert(0x0F0F0F0F == doublingMask<4, uint32_t>().value());
static_assert(0x0F0F0F0F == SWAR<4, uint32_t>::evenLaneMask().value());

constexpr auto PrecisionFixtureTest = 0x89ABCDEF;
constexpr auto Doubled =
Expand Down
Loading