Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions inc/zoo/swar/associative_iteration.h
Original file line number Diff line number Diff line change
Expand Up @@ -474,8 +474,8 @@ constexpr auto halvePrecision(SWAR<NB, T> even, SWAR<NB, T> odd) {


template <int NB, typename T> struct MultiplicationResult {
SWAR<NB, T> result;
SWAR<NB, T> overflow;
SWAR<NB, T> lower;
SWAR<NB, T> upper;
Comment on lines +482 to +483
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

merge

};

template <int NB, typename T>
Expand All @@ -497,11 +497,11 @@ wideningMultiplication(SWAR<NB, T> multiplicand, SWAR<NB, T> multiplier) {
constexpr auto
HalfLane = S::NBits,
UpperHalfOfLanes = SWAR<S::NBits, T>::oddLaneMask().value();
auto [res_even, res_odd] = doublingMultiplication(multiplicand, multiplier);
auto result = halvePrecision(res_even, res_odd);
auto [lower, upper] = doublingMultiplication(multiplicand, multiplier);
auto result = halvePrecision(lower, upper);
auto
over_even = D{(res_even.value() & UpperHalfOfLanes) >> HalfLane},
over_odd = D{(res_odd.value() & UpperHalfOfLanes) >> HalfLane};
over_even = D{(lower.value() & UpperHalfOfLanes) >> HalfLane},
over_odd = D{(upper.value() & UpperHalfOfLanes) >> HalfLane};
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shift intra lane allows you to provide the mask.
Please use those primitives instead of deploying the pick-axe

auto upper_lanes_overflow = halvePrecision(over_even, over_odd);
return {result, upper_lanes_overflow};
}
Expand Down
Loading