From 269bc27a1f7ca8743a05b3073f97ef29a4673f7b Mon Sep 17 00:00:00 2001 From: Neven Sajko Date: Fri, 11 Jul 2025 13:12:02 +0200 Subject: [PATCH 1/2] special: trig: smaller polynomials for sinpi,cospi kernels for f16,f32 Regenerate the polynomials for polynomial approximation used in the implementation of `sinpi` and `cospi` kernels for `Float16` and `Float32`. The main difference is that smaller polynomials are used, which should help performance. There should be no loss in accuracy, as the kernels for `Float16` and `Float32` are "wide", computed in increased precision before rounding the final result back to the original narrow type. The current minimax polynomials for these wide kernels have unnecessarily great degree. Another difference, for `cospi`, is that coefficient 0 is not constrained to be exactly equal to `1` any more. This means the polynomial approximation is not exact for zero inputs any more, however this should be OK, considering the result is rounded to a narrower type before returning it to the user in any case. Relaxing the value of coefficient 0 possibly allows further minimizing the overall maximum relative error (if keeping the polynomial degree fixed). The polynomials for the kernel implementations are generated using Sollya. Apart from generating the minimax polynomial using the Remez algorithm, Sollya also takes care of rounding the big coefficients into machine precision in a way that loses less accuracy than with naive coefficient-wise rounding. All relevant Sollya code is included in relevant doc strings. --- base/special/trig.jl | 150 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 144 insertions(+), 6 deletions(-) diff --git a/base/special/trig.jl b/base/special/trig.jl index 96a2da5f5b968..30317d07f3fe0 100644 --- a/base/special/trig.jl +++ b/base/special/trig.jl @@ -725,6 +725,146 @@ function acos(x::T) where T <: Union{Float32, Float64} end end +# Not used at run time, so prevent unnecessary specialization/inference. +Base.@nospecializeinfer function _exact_string_to_floating_point((@nospecialize type::Type{<:AbstractFloat}), string::String, precision::Int) + bf = BigFloat(string; precision) + f = type(bf) + (f == bf) || throw(ArgumentError("not exact")) + f +end + +""" + _sinpi_kernel_wide_polynomial_f16::Tuple{Vararg{Float32}} + +Floating-point constants defining a minimax polynomial, computed with Sollya like so: + +```sollya +f = sin(pi * x); +i = [-1/8, 1/4]; +m = [|1, 3, 5, 7|]; +d = [|single...|]; +p = fpminimax(f, m, d, i); +supnorm(p, f, i, relative, 1b-200); +e = abs(f(x) - p(x)) / abs(f(x)); +plot(e, i); +p; +``` + +The relative error of the polynomial produced by `fpminimax` is below `2.8e-8`. + +* Calculated with the `supnorm` call in the Sollya code above. + +* This value accounts for the representation of the coefficients in machine precision. + +* This value underestimates the complete error when evaluating the polynomial in machine precision. +""" +const _sinpi_kernel_wide_polynomial_f16 = ( + _exact_string_to_floating_point(Float32, "3.1415927410125732421875"), + _exact_string_to_floating_point(Float32, "-5.1677227020263671875"), + _exact_string_to_floating_point(Float32, "2.5502727031707763671875"), + _exact_string_to_floating_point(Float32, "-0.593835175037384033203125"), +) + +""" + _sinpi_kernel_wide_polynomial_f32::Tuple{Vararg{Float64}} + +Floating-point constants defining a minimax polynomial, computed with Sollya like so: + +```sollya +prec = 400; +f = sin(pi * x); +i = [-1/8, 1/4]; +m = [|1, 3, 5, 7, 9|]; +d = [|double...|]; +p = fpminimax(f, m, d, i); +supnorm(p, f, i, relative, 1b-200); +e = abs(f(x) - p(x)) / abs(f(x)); +plot(e, i); +p; +``` + +The relative error of the polynomial produced by `fpminimax` is below `4.6e-12`. + +* Calculated with the `supnorm` call in the Sollya code above. + +* This value accounts for the representation of the coefficients in machine precision. + +* This value underestimates the complete error when evaluating the polynomial in machine precision. +""" +const _sinpi_kernel_wide_polynomial_f32 = ( + _exact_string_to_floating_point(Float64, "3.141592653575500104778939203242771327495574951171875"), + _exact_string_to_floating_point(Float64, "-5.16771276881935026636938346200622618198394775390625"), + _exact_string_to_floating_point(Float64, "2.550162618972915407056234471383504569530487060546875"), + _exact_string_to_floating_point(Float64, "-0.599201360341684807764295328524895012378692626953125"), + _exact_string_to_floating_point(Float64, "8.099587813821683413006979890269576571881771087646484375e-2"), +) + +""" + _cospi_kernel_wide_polynomial_f16::Tuple{Vararg{Float32}} + +Floating-point constants defining a minimax polynomial, computed with Sollya like so: + +```sollya +f = cos(pi * x); +i = [-1/8, 1/4]; +m = [|0, 2, 4, 6|]; +d = [|single...|]; +p = fpminimax(f, m, d, i); +supnorm(p, f, i, relative, 1b-200); +e = abs(f(x) - p(x)) / abs(f(x)); +plot(e, i); +p; +``` + +The relative error of the polynomial produced by `fpminimax` is below `6.0e-8`. + +* Calculated with the `supnorm` call in the Sollya code above. + +* This value accounts for the representation of the coefficients in machine precision. + +* This value underestimates the complete error when evaluating the polynomial in machine precision. +""" +const _cospi_kernel_wide_polynomial_f16 = ( + _exact_string_to_floating_point(Float32, "0.999999940395355224609375"), + _exact_string_to_floating_point(Float32, "-4.934782505035400390625"), + _exact_string_to_floating_point(Float32, "4.05737972259521484375"), + _exact_string_to_floating_point(Float32, "-1.3042089939117431640625"), +) + +""" + _cospi_kernel_wide_polynomial_f32::Tuple{Vararg{Float64}} + +Floating-point constants defining a minimax polynomial, computed with Sollya like so: + +```sollya +prec = 400; +f = cos(pi * x); +i = [-1/8, 1/4]; +m = [|0, 2, 4, 6, 8|]; +d = [|double...|]; +p = fpminimax(f, m, d, i); +supnorm(p, f, i, relative, 1b-200); +e = abs(f(x) - p(x)) / abs(f(x)); +plot(e, i); +p; +``` + +The relative error of the polynomial produced by `fpminimax` is below `5.7e-11`. + +* Calculated with the `supnorm` call in the Sollya code above. + +* This value accounts for the representation of the coefficients in machine precision. + +* This value underestimates the complete error when evaluating the polynomial in machine precision. +""" +const _cospi_kernel_wide_polynomial_f32 = ( + _exact_string_to_floating_point(Float64, "0.99999999994393740099241085772518999874591827392578125"), + _exact_string_to_floating_point(Float64, "-4.934802158259088855629670433700084686279296875"), + _exact_string_to_floating_point(Float64, "4.05870692154277179497512406669557094573974609375"), + _exact_string_to_floating_point(Float64, "-1.3350359059651226711906701893894933164119720458984375"), + _exact_string_to_floating_point(Float64, "0.2312609234105421907035093909144052304327487945556640625"), +) + # Uses minimax polynomial of sin(π * x) for π * x in [0, .25] @inline function sinpi_kernel(x::Float64) sinpi_kernel_wide(x) @@ -742,8 +882,7 @@ end end @inline function sinpi_kernel_wide(x::Float32) x = Float64(x) - return x*evalpoly(x*x, (3.1415926535762266, -5.167712769188119, - 2.5501626483206374, -0.5992021090314925, 0.08100185277841528)) + return x*evalpoly(x*x, _sinpi_kernel_wide_polynomial_f32) end @inline function sinpi_kernel(x::Float16) @@ -751,7 +890,7 @@ end end @inline function sinpi_kernel_wide(x::Float16) x = Float32(x) - return x*evalpoly(x*x, (3.1415927f0, -5.1677127f0, 2.5501626f0, -0.5992021f0, 0.081001855f0)) + return x*evalpoly(x*x, _sinpi_kernel_wide_polynomial_f16) end # Uses minimax polynomial of cos(π * x) for π * x in [0, .25] @@ -773,15 +912,14 @@ end end @inline function cospi_kernel_wide(x::Float32) x = Float64(x) - return evalpoly(x*x, (1.0, -4.934802200541122, 4.058712123568637, - -1.3352624040152927, 0.23531426791507182, -0.02550710082498761)) + return evalpoly(x*x, _cospi_kernel_wide_polynomial_f32) end @inline function cospi_kernel(x::Float16) Float16(cospi_kernel_wide(x)) end @inline function cospi_kernel_wide(x::Float16) x = Float32(x) - return evalpoly(x*x, (1.0f0, -4.934802f0, 4.058712f0, -1.3352624f0, 0.23531426f0, -0.0255071f0)) + return evalpoly(x*x, _cospi_kernel_wide_polynomial_f16) end """ From ca39015689c278a085d972af38f843b1f5e2bbcb Mon Sep 17 00:00:00 2001 From: Neven Sajko <4944410+nsajko@users.noreply.github.com> Date: Sat, 12 Jul 2025 04:40:37 +0200 Subject: [PATCH 2/2] attempt to fix bootstrap --- base/special/trig.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/special/trig.jl b/base/special/trig.jl index 30317d07f3fe0..7f06a384912c8 100644 --- a/base/special/trig.jl +++ b/base/special/trig.jl @@ -726,7 +726,7 @@ function acos(x::T) where T <: Union{Float32, Float64} end # Not used at run time, so prevent unnecessary specialization/inference. -Base.@nospecializeinfer function _exact_string_to_floating_point((@nospecialize type::Type{<:AbstractFloat}), string::String, precision::Int) +Base.@nospecializeinfer function _exact_string_to_floating_point((@nospecialize type::Type{<:AbstractFloat}), string::String, precision::Int = 1000) bf = BigFloat(string; precision) f = type(bf) (f == bf) || throw(ArgumentError("not exact"))