-
Notifications
You must be signed in to change notification settings - Fork 12
Add more multiplication primitives #107
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 6 commits
b4db29e
1261006
f976ae4
f0720bd
f928811
51f2987
35fffa7
b87b408
07be9f9
93e4bac
05468a2
ac45f1b
2302504
2214ac8
61a7506
7b41db0
5cf88df
3a65ed2
2b613ee
fa0667b
dea354a
81237a6
b792e30
5d41262
cfb1072
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -475,14 +475,6 @@ struct SWAR_Pair{ | |
SWAR<NB, T> even, odd; | ||
}; | ||
|
||
template<int NB, typename T> | ||
constexpr SWAR<NB, T> doublingMask() { | ||
using S = SWAR<NB, T>; | ||
static_assert(0 == S::Lanes % 2, "Only even number of elements supported"); | ||
using D = SWAR<NB * 2, T>; | ||
return S{(D::LeastSignificantBit << NB) - D::LeastSignificantBit}; | ||
} | ||
|
||
template<int NB, typename T> | ||
constexpr auto doublePrecision(SWAR<NB, T> input) { | ||
using S = SWAR<NB, T>; | ||
|
@@ -491,7 +483,7 @@ constexpr auto doublePrecision(SWAR<NB, T> input) { | |
"Precision can only be doubled for SWARs of even element count" | ||
); | ||
using RV = SWAR<NB * 2, T>; | ||
constexpr auto DM = doublingMask<NB, T>(); | ||
constexpr auto DM = SWAR<NB, T>::evenLaneMask(); | ||
return SWAR_Pair<NB * 2, T>{ | ||
RV{(input & DM).value()}, | ||
RV{(input.value() >> NB) & DM.value()} | ||
|
@@ -503,13 +495,66 @@ constexpr auto halvePrecision(SWAR<NB, T> even, SWAR<NB, T> odd) { | |
using S = SWAR<NB, T>; | ||
static_assert(0 == NB % 2, "Only even lane-bitcounts supported"); | ||
using RV = SWAR<NB/2, T>; | ||
constexpr auto HalvingMask = doublingMask<NB/2, T>(); | ||
constexpr auto HalvingMask = SWAR<NB/2, T>::evenLaneMask(); | ||
auto | ||
evenHalf = RV{even.value()} & HalvingMask, | ||
oddHalf = RV{(RV{odd.value()} & HalvingMask).value() << NB/2}; | ||
return evenHalf | oddHalf; | ||
} | ||
|
||
|
||
template <int NB, typename T> struct MultiplicationResult { | ||
SWAR<NB, T> result; | ||
SWAR<NB, T> overflow; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not overflow. |
||
}; | ||
|
||
template <int NB, typename T> | ||
constexpr MultiplicationResult<NB, T> | ||
fullMultiplication(SWAR<NB, T> multiplicand, SWAR<NB, T> multiplier) { | ||
using S = SWAR<NB, T>; using D = SWAR<NB * 2, T>; | ||
|
||
auto [l_even, l_odd] = doublePrecision(multiplicand); | ||
auto [r_even, r_odd] = doublePrecision(multiplier); | ||
auto res_even = multiplication_OverflowUnsafe(l_even, r_even); | ||
auto res_odd = multiplication_OverflowUnsafe(l_odd, r_odd); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Merge these declarations into a single There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also todo signed multiplication |
||
|
||
// Into the double precision world | ||
constexpr auto HalfLane = S::NBits; | ||
constexpr auto UpperHalfOfLanes = SWAR<S::NBits, T>::oddLaneMask().value(); | ||
auto res = halvePrecision(res_even, res_odd); | ||
|
||
auto over_even = D{(res_even.value() & UpperHalfOfLanes) >> HalfLane}; | ||
auto over_odd = D{(res_odd.value() & UpperHalfOfLanes) >> HalfLane}; | ||
auto overflow_values = halvePrecision(over_even, over_odd); | ||
|
||
return {res, overflow_values}; | ||
} | ||
|
||
using S = SWAR<4, u32>; | ||
|
||
static_assert(S::oddLaneMask().value() == 0xF0F0'F0F0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. aware these tests not formatted nicely, just making a draft for visibility |
||
static_assert(S::evenLaneMask().value() == 0x0F0F'0F0F); | ||
|
||
static_assert(fullMultiplication(S{0x0009'0000}, S{0x0009'0000}) | ||
.result.value() == 0x0001'0000); | ||
static_assert(fullMultiplication(S{0x0003'0000}, S{0x0007'0000}) | ||
.result.value() == 0x0005'0000); | ||
|
||
// static_assert(fullMultiplication(S{0x0002'0000}, S{0x0008'0000}) | ||
// .overflowed.value() == 0x0008'0000); | ||
// | ||
// static_assert(fullMultiplication(S{0x0008'0000}, S{0x0008'0000}) | ||
// .overflowed.value() == 0x0008'0000); | ||
// | ||
// static_assert(fullMultiplication(S{0x0001'0000}, S{0x0008'0000}) | ||
// .overflowed.value() == 0x0000'0000); | ||
|
||
static_assert(fullMultiplication(S{0x0008'0012}, S{0x0007'0032}) | ||
.result.value() == 0x0008'0034); | ||
|
||
static_assert(fullMultiplication(S{0x0008'0012}, S{0x0007'0032}) | ||
.result.value() == 0x0008'0034); | ||
|
||
} | ||
|
||
#endif |
Uh oh!
There was an error while loading. Please reload this page.