|
1 | 1 | //! Architecture-specific support for x86-32 without SSE2
|
| 2 | +//! |
| 3 | +//! We use an alternative implementation on x86, because the |
| 4 | +//! main implementation fails with the x87 FPU used by |
| 5 | +//! debian i386, probably due to excess precision issues. |
| 6 | +//! |
| 7 | +//! See https://github.com/rust-lang/compiler-builtins/pull/976 for discussion on why these |
| 8 | +//! functions are implemented in this way. |
2 | 9 |
|
3 |
| -use super::super::fabs; |
| 10 | +// FIXME: when the MSRV allows, use naked functions instead. |
4 | 11 |
|
5 |
| -/// Use an alternative implementation on x86, because the |
6 |
| -/// main implementation fails with the x87 FPU used by |
7 |
| -/// debian i386, probably due to excess precision issues. |
8 |
| -/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. |
9 |
| -pub fn ceil(x: f64) -> f64 { |
10 |
| - if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { |
11 |
| - let truncated = x as i64 as f64; |
12 |
| - if truncated < x { |
13 |
| - return truncated + 1.0; |
14 |
| - } else { |
15 |
| - return truncated; |
16 |
| - } |
17 |
| - } else { |
18 |
| - return x; |
| 12 | +use core::mem::MaybeUninit; |
| 13 | + |
| 14 | +pub extern "C" fn ceil(mut x: f64) -> f64 { |
| 15 | + // We save and later restore the FPU control word. |
| 16 | + let mut cw_stash = MaybeUninit::<u16>::uninit(); |
| 17 | + let mut cw_tmp = MaybeUninit::<u16>::uninit(); |
| 18 | + unsafe { |
| 19 | + core::arch::asm!( |
| 20 | + "fstcw ({stash_ptr})", // Save the cw |
| 21 | + "movw ({stash_ptr}), %dx", // ... |
| 22 | + "orw $0x0800, %dx", // Set rounding control to 0b10 (+∞), |
| 23 | + "andw $0xfbff, %dx", // preserving other controls |
| 24 | + "movw %dx, ({cw_ptr})", // Apply cw |
| 25 | + "fldcw ({cw_ptr})", // ... |
| 26 | + "fldl ({x_ptr})", // Push x to the stack |
| 27 | + "frndint", // Round |
| 28 | + "fldcw ({stash_ptr})", // Restore cw |
| 29 | + "fstpl ({x_ptr})", // Save rounded x to mem |
| 30 | + cw_ptr = in(reg) &mut cw_tmp, |
| 31 | + stash_ptr = in(reg) &mut cw_stash, |
| 32 | + x_ptr = in(reg) &mut x, |
| 33 | + out("dx") _, // Cw scratch |
| 34 | + // All the x87 FPU stack is used, all registers must be clobbered |
| 35 | + out("st(0)") _, out("st(1)") _, out("st(2)") _, out("st(3)") _, |
| 36 | + out("st(4)") _, out("st(5)") _, out("st(6)") _, out("st(7)") _, |
| 37 | + options(att_syntax) |
| 38 | + ) |
19 | 39 | }
|
| 40 | + x |
20 | 41 | }
|
21 | 42 |
|
22 |
| -/// Use an alternative implementation on x86, because the |
23 |
| -/// main implementation fails with the x87 FPU used by |
24 |
| -/// debian i386, probably due to excess precision issues. |
25 |
| -/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. |
26 |
| -pub fn floor(x: f64) -> f64 { |
27 |
| - if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { |
28 |
| - let truncated = x as i64 as f64; |
29 |
| - if truncated > x { |
30 |
| - return truncated - 1.0; |
31 |
| - } else { |
32 |
| - return truncated; |
33 |
| - } |
34 |
| - } else { |
35 |
| - return x; |
| 43 | +pub extern "C" fn floor(mut x: f64) -> f64 { |
| 44 | + // We save and later restore the FPU control word. |
| 45 | + let mut cw_stash = MaybeUninit::<u16>::uninit(); |
| 46 | + let mut cw_tmp = MaybeUninit::<u16>::uninit(); |
| 47 | + unsafe { |
| 48 | + core::arch::asm!( |
| 49 | + "fstcw ({stash_ptr})", // Save the cw |
| 50 | + "movw ({stash_ptr}), %dx", // ... |
| 51 | + "orw $0x0400, %dx", // Set rounding control to 0b01 (-∞), |
| 52 | + "andw $0xf7ff, %dx", // preserving other controls |
| 53 | + "movw %dx, ({cw_ptr})", // Apply cw |
| 54 | + "fldcw ({cw_ptr})", // ... |
| 55 | + "fldl ({x_ptr})", // Push x to the stack |
| 56 | + "frndint", // Round |
| 57 | + "fldcw ({stash_ptr})", // Restore cw |
| 58 | + "fstpl ({x_ptr})", // Save rounded x to mem |
| 59 | + cw_ptr = in(reg) &mut cw_tmp, |
| 60 | + stash_ptr = in(reg) &mut cw_stash, |
| 61 | + x_ptr = in(reg) &mut x, |
| 62 | + out("dx") _, // Cw scratch |
| 63 | + // All the x87 FPU stack is used, all registers must be clobbered |
| 64 | + out("st(0)") _, out("st(1)") _, out("st(2)") _, out("st(3)") _, |
| 65 | + out("st(4)") _, out("st(5)") _, out("st(6)") _, out("st(7)") _, |
| 66 | + options(att_syntax) |
| 67 | + ) |
36 | 68 | }
|
| 69 | + x |
37 | 70 | }
|
0 commit comments