Skip to content

Commit 2666309

Browse files
authored
Merge pull request #546 from xtensor-stack/fix/fast-math-nearbyint
Make nearbyhint generic implementation compatible with -ffast-math
2 parents 2d270b2 + 45cad81 commit 2666309

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

include/xsimd/arch/generic/xsimd_generic_math.hpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1718,8 +1718,17 @@ namespace xsimd {
17181718
batch_type s = bitofsign(self);
17191719
batch_type v = self ^ s;
17201720
batch_type t2n = constants::twotonmb<batch_type>();
1721+
// Under fast-math, reordering is possible and the compiler optimizes d
1722+
// to v. That's not what we want, so prevent compiler optimization here.
1723+
// FIXME: it may be better to emit a memory barrier here (?).
1724+
#ifdef __FAST_MATH__
1725+
volatile batch_type d0 = v + t2n;
1726+
batch_type d = *(batch_type*)(void*)(&d0) - t2n;
1727+
#else
17211728
batch_type d0 = v + t2n;
1722-
return s ^ select(v < t2n, d0 - t2n, v);
1729+
batch_type d = d0 - t2n;
1730+
#endif
1731+
return s ^ select(v < t2n, d, v);
17231732
}
17241733
}
17251734
template<class A> batch<float, A> nearbyint(batch<float, A> const& self, requires_arch<generic>) {

0 commit comments

Comments
 (0)