Skip to content

Commit 678b58b

Browse files
committed
implement floor and ceil with inline assembly on i586
1 parent e8cfc94 commit 678b58b

File tree

2 files changed

+63
-51
lines changed

2 files changed

+63
-51
lines changed

libm-test/src/precision.rs

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -271,18 +271,6 @@ impl MaybeOverride<(f32,)> for SpecialCase {
271271

272272
impl MaybeOverride<(f64,)> for SpecialCase {
273273
fn check_float<F: Float>(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction {
274-
if cfg!(x86_no_sse)
275-
&& ctx.base_name == BaseName::Ceil
276-
&& ctx.basis == CheckBasis::Musl
277-
&& input.0 < 0.0
278-
&& input.0 > -1.0
279-
&& expected == F::ZERO
280-
&& actual == F::ZERO
281-
{
282-
// musl returns -0.0, we return +0.0
283-
return XFAIL("i586 ceil signed zero");
284-
}
285-
286274
if cfg!(x86_no_sse)
287275
&& (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven)
288276
&& (expected - actual).abs() <= F::ONE
@@ -292,16 +280,6 @@ impl MaybeOverride<(f64,)> for SpecialCase {
292280
return XFAIL("i586 rint rounding mode");
293281
}
294282

295-
if cfg!(x86_no_sse)
296-
&& (ctx.fn_ident == Identifier::Ceil || ctx.fn_ident == Identifier::Floor)
297-
&& expected.eq_repr(F::NEG_ZERO)
298-
&& actual.eq_repr(F::ZERO)
299-
{
300-
// FIXME: the x87 implementations do not keep the distinction between -0.0 and 0.0.
301-
// See https://github.com/rust-lang/libm/pull/404#issuecomment-2572399955
302-
return XFAIL("i586 ceil/floor signed zero");
303-
}
304-
305283
if cfg!(x86_no_sse)
306284
&& (ctx.fn_ident == Identifier::Exp10 || ctx.fn_ident == Identifier::Exp2)
307285
{

libm/src/math/arch/i586.rs

Lines changed: 63 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,71 @@
11
//! Architecture-specific support for x86-32 without SSE2
2+
//!
3+
//! We use an alternative implementation on x86, because the
4+
//! main implementation fails with the x87 FPU used by
5+
//! debian i386, probably due to excess precision issues.
6+
//!
7+
//! See https://github.com/rust-lang/compiler-builtins/pull/976 for discussion on why these
8+
//! functions are implemented in this way.
29
3-
use super::super::fabs;
10+
// FIXME: when the MSRV allows, use naked functions instead.
411

5-
/// Use an alternative implementation on x86, because the
6-
/// main implementation fails with the x87 FPU used by
7-
/// debian i386, probably due to excess precision issues.
8-
/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
9-
pub fn ceil(x: f64) -> f64 {
10-
if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
11-
let truncated = x as i64 as f64;
12-
if truncated < x {
13-
return truncated + 1.0;
14-
} else {
15-
return truncated;
16-
}
17-
} else {
18-
return x;
12+
pub extern "C" fn ceil(mut x: f64) -> f64 {
13+
// We save and later restore the FPU control word.
14+
let mut cw_stash = core::mem::MaybeUninit::<u16>::uninit();
15+
let mut cw_tmp = core::mem::MaybeUninit::<u16>::uninit();
16+
unsafe {
17+
core::arch::asm!(
18+
"fstcw ({stash_ptr})", // Save the cw
19+
"movw ({stash_ptr}), %dx", // ...
20+
"orw $0x0800, %dx", // Set rounding control to 0b10 (+∞),
21+
"andw $0xfbff, %dx", // preserving other controls
22+
"movw %dx, ({cw_ptr})", // Apply cw
23+
"fldcw ({cw_ptr})", // ...
24+
"fldl ({x_ptr})", // Push x to the stack
25+
"frndint", // Round
26+
"fldcw ({stash_ptr})", // Restore cw
27+
"fstpl ({x_ptr})", // Save rounded x to mem
28+
cw_ptr = in(reg) &mut cw_tmp,
29+
stash_ptr = in(reg) &mut cw_stash,
30+
x_ptr = in(reg) &mut x,
31+
out("dx") _, // Cw scratch
32+
// All the x87 FPU stack is used, all registers must be clobbered
33+
out("st(0)") _, out("st(1)") _, out("st(2)") _, out("st(3)") _,
34+
out("st(4)") _, out("st(5)") _, out("st(6)") _, out("st(7)") _,
35+
options(att_syntax)
36+
)
1937
}
38+
x
2039
}
2140

22-
/// Use an alternative implementation on x86, because the
23-
/// main implementation fails with the x87 FPU used by
24-
/// debian i386, probably due to excess precision issues.
25-
/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219.
26-
pub fn floor(x: f64) -> f64 {
27-
if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() {
28-
let truncated = x as i64 as f64;
29-
if truncated > x {
30-
return truncated - 1.0;
31-
} else {
32-
return truncated;
33-
}
34-
} else {
35-
return x;
41+
// NOTE: this `inline(never)` is load-bearing for making functions that use it panic-free.
42+
// Without it `rem_pio2_large` (and any function that uses it) will contain panics.
43+
#[inline(never)]
44+
pub extern "C" fn floor(mut x: f64) -> f64 {
45+
// We save and later restore the FPU control word.
46+
let mut cw_stash = core::mem::MaybeUninit::<u16>::uninit();
47+
let mut cw_tmp = core::mem::MaybeUninit::<u16>::uninit();
48+
unsafe {
49+
core::arch::asm!(
50+
"fstcw ({stash_ptr})", // Save the cw
51+
"movw ({stash_ptr}), %dx", // ...
52+
"orw $0x0400, %dx", // Set rounding control to 0b01 (-∞),
53+
"andw $0xf7ff, %dx", // preserving other controls
54+
"movw %dx, ({cw_ptr})", // Apply cw
55+
"fldcw ({cw_ptr})", // ...
56+
"fldl ({x_ptr})", // Push x to the stack
57+
"frndint", // Round
58+
"fldcw ({stash_ptr})", // Restore cw
59+
"fstpl ({x_ptr})", // Save rounded x to mem
60+
cw_ptr = in(reg) &mut cw_tmp,
61+
stash_ptr = in(reg) &mut cw_stash,
62+
x_ptr = in(reg) &mut x,
63+
out("dx") _, // Cw scratch
64+
// All the x87 FPU stack is used, all registers must be clobbered
65+
out("st(0)") _, out("st(1)") _, out("st(2)") _, out("st(3)") _,
66+
out("st(4)") _, out("st(5)") _, out("st(6)") _, out("st(7)") _,
67+
options(att_syntax)
68+
)
3669
}
70+
x
3771
}

0 commit comments

Comments
 (0)