Skip to content

Commit 848a168

Browse files
committed
move more functions to julia
1 parent ac84a40 commit 848a168

File tree

1 file changed

+93
-67
lines changed

1 file changed

+93
-67
lines changed

src/Quadmath.jl

Lines changed: 93 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -117,21 +117,31 @@ inttype(::Type{Float128}) = Int128
117117

118118
# conversion
119119
Float128(x::Float128) = x
120+
function Float128(x::T) where T <: Base.IEEEFloat
121+
if x===zero(x)
122+
return reinterpret(Float128, zero(UInt128))
123+
elseif x===-zero(x)
124+
return reinterpret(Float128, sign_mask(Float128))
125+
else
126+
s = UInt128(signbit(x))<<127
127+
if !isfinite(x)
128+
xf = reinterpret(Unsigned, x) & significand_mask(T)
129+
d = exponent_mask(Float128)
130+
else
131+
f, e = frexp(x)
132+
xf = reinterpret(Unsigned, f) & significand_mask(T)
133+
d = ((e+exponent_bias(Float128)-1) % UInt128) << significand_bits(Float128)
134+
end
135+
xu = UInt128(xf)<<(significand_bits(Float128)-significand_bits(T))
136+
return reinterpret(Float128, s|d|xu)
137+
end
138+
end
120139

121-
# Float64
122-
@assume_effects :foldable Float128(x::Float64) =
123-
Float128(@quad_ccall(quadoplib.__extenddftf2(x::Cdouble)::Cfloat128))
140+
# truncation
124141
@assume_effects :foldable Float64(x::Float128) =
125142
@quad_ccall(quadoplib.__trunctfdf2(x::Cfloat128)::Cdouble)
126-
127-
# Float32
128-
@assume_effects :foldable Float128(x::Float32) =
129-
Float128(@quad_ccall(quadoplib.__extendsftf2(x::Cfloat)::Cfloat128))
130143
@assume_effects :foldable Float32(x::Float128) =
131144
@quad_ccall(quadoplib.__trunctfsf2(x::Cfloat128)::Cfloat)
132-
133-
# Float16
134-
Float128(x::Float16) = Float128(Float32(x))
135145
Float16(x::Float128) = Float16(Float64(x)) # TODO: avoid double rounding
136146

137147
# TwicePrecision
@@ -192,75 +202,81 @@ Float128(x::Bool) = x ? Float128(1) : Float128(0)
192202
Float128(@quad_ccall(quadoplib.__negtf2(x::Cfloat128)::Cfloat128))
193203

194204
# Float128 -> Integer
195-
@assume_effects :foldable unsafe_trunc(::Type{Int32}, x::Float128) =
196-
@quad_ccall(quadoplib.__fixtfsi(x::Cfloat128)::Int32)
197-
198-
@assume_effects :foldable unsafe_trunc(::Type{Int64}, x::Float128) =
199-
@quad_ccall(quadoplib.__fixtfdi(x::Cfloat128)::Int64)
200-
201-
@assume_effects :foldable unsafe_trunc(::Type{UInt32}, x::Float128) =
202-
@quad_ccall(quadoplib.__fixunstfsi(x::Cfloat128)::UInt32)
203-
204-
@assume_effects :foldable unsafe_trunc(::Type{UInt64}, x::Float128) =
205-
@quad_ccall(quadoplib.__fixunstfdi(x::Cfloat128)::UInt64)
206-
207-
function unsafe_trunc(::Type{UInt128}, x::Float128)
205+
function unsafe_trunc(::Type{T}, x::Float128) where T<:Base.BitUnsigned
208206
xu = reinterpret(UInt128,x)
209207
k = (Int64(xu >> 112) & 0x07fff) - 16382 - 113
210208
xu = (xu & significand_mask(Float128)) | 0x0001_0000_0000_0000_0000_0000_0000_0000
211209
if k <= 0
212-
UInt128(xu >> -k)
210+
(xu >> -k) % T
213211
else
214-
UInt128(xu) << k
212+
(xu % T) << k
215213
end
216214
end
217-
function unsafe_trunc(::Type{Int128}, x::Float128)
218-
copysign(unsafe_trunc(UInt128,x) % Int128, x)
215+
function unsafe_trunc(::Type{T}, x::Float128) where T<:Base.BitSigned
216+
copysign(unsafe_trunc(unsigned(T),x) % T, x)
219217
end
220218
trunc(::Type{Signed}, x::Float128) = trunc(Int,x)
221219
trunc(::Type{Unsigned}, x::Float128) = trunc(Int,x)
222220
trunc(::Type{Integer}, x::Float128) = trunc(Int,x)
223221

224-
for Ti in (Int32, Int64, Int128, UInt32, UInt64, UInt128)
225-
if Ti <: Unsigned || sizeof(Ti) < sizeof(Float128)
226-
# Here `Float128(typemin(Ti))-1` is exact, so we can compare the lower-bound
227-
# directly. `Float128(typemax(Ti))+1` is either always exactly representable, or
228-
# rounded to `Inf` (e.g. when `Ti==UInt128 && Float128==Float32`).
229-
@eval begin
230-
function trunc(::Type{$Ti},x::Float128)
231-
if Float128(typemin($Ti)) - one(Float128) < x < Float128(typemax($Ti)) + one(Float128)
232-
return unsafe_trunc($Ti,x)
233-
else
234-
throw(InexactError(:trunc, $Ti, x))
235-
end
222+
for Ti in (UInt8, UInt16, UInt32, UInt64, UInt128)
223+
# Here `Float128(typemin(Ti))` is 0, so we can check the sign of the float
224+
# `Float128(typemax(Ti))+1` is exactly representable
225+
@eval begin
226+
function trunc(::Type{$Ti},x::Float128)
227+
if !signbit(x) && x < Float128(typemax($Ti)) + one(Float128)
228+
return unsafe_trunc($Ti,x)
229+
else
230+
throw(InexactError(:trunc, $Ti, x))
236231
end
237-
function (::Type{$Ti})(x::Float128)
238-
if (Float128(typemin($Ti)) <= x <= Float128(typemax($Ti))) && (round(x, RoundToZero) == x)
239-
return unsafe_trunc($Ti,x)
240-
else
241-
throw(InexactError($(Expr(:quote,Ti.name.name)), $Ti, x))
242-
end
232+
end
233+
function (::Type{$Ti})(x::Float128)
234+
if (!signbit(x) && x <= Float128(typemax($Ti))) && (round(x, RoundToZero) == x)
235+
return unsafe_trunc($Ti,x)
236+
else
237+
throw(InexactError($(Expr(:quote,Ti.name.name)), $Ti, x))
243238
end
244239
end
245-
else
246-
# Here `eps(Float128(typemin(Ti))) > 1`, so the only value which can be truncated to
247-
# `Float128(typemin(Ti)` is itself. Similarly, `Float128(typemax(Ti))` is inexact and will
248-
# be rounded up. This assumes that `Float128(typemin(Ti)) > -Inf`, which is true for
249-
# these types, but not for `Float16` or larger integer types.
250-
@eval begin
251-
function trunc(::Type{$Ti},x::Float128)
252-
if Float128(typemin($Ti)) <= x < Float128(typemax($Ti))
253-
return unsafe_trunc($Ti,x)
254-
else
255-
throw(InexactError(:trunc, $Ti, x))
256-
end
240+
end
241+
end
242+
for Ti in (Int8, Int16, Int32, Int64)
243+
# Here `Float128(typemin(Ti))-1` is exact, so we can compare the lower-bound
244+
# directly. `Float128(typemax(Ti))+1` is always exactly representable
245+
@eval begin
246+
function trunc(::Type{$Ti},x::Float128)
247+
if Float128(typemin($Ti)) - one(Float128) < x < Float128(typemax($Ti)) + one(Float128)
248+
return unsafe_trunc($Ti,x)
249+
else
250+
throw(InexactError(:trunc, $Ti, x))
257251
end
258-
function (::Type{$Ti})(x::Float128)
259-
if (Float128(typemin($Ti)) <= x < Float128(typemax($Ti))) && (round(x, RoundToZero) == x)
260-
return unsafe_trunc($Ti,x)
261-
else
262-
throw(InexactError($(Expr(:quote,Ti.name.name)), $Ti, x))
263-
end
252+
end
253+
function (::Type{$Ti})(x::Float128)
254+
if (Float128(typemin($Ti)) <= x <= Float128(typemax($Ti))) && (round(x, RoundToZero) == x)
255+
return unsafe_trunc($Ti,x)
256+
else
257+
throw(InexactError($(Expr(:quote,Ti.name.name)), $Ti, x))
258+
end
259+
end
260+
end
261+
end
262+
for Ti in (Int128,)
263+
# Here `eps(Float128(typemin(Ti))) > 1`, so the only value which can be truncated to
264+
# `Float128(typemin(Ti)` is itself. Similarly, `Float128(typemax(Ti))` is inexact and will
265+
# be rounded up. This assumes that `Float128(typemin(Ti)) > -Inf`, which is true for
266+
# Int128, but possibly not for larger integer types.
267+
@eval begin
268+
function trunc(::Type{$Ti},x::Float128)
269+
if Float128(typemin($Ti)) <= x < Float128(typemax($Ti))
270+
return unsafe_trunc($Ti,x)
271+
else
272+
throw(InexactError(:trunc, $Ti, x))
273+
end
274+
end
275+
function (::Type{$Ti})(x::Float128)
276+
if (Float128(typemin($Ti)) <= x < Float128(typemax($Ti))) && (round(x, RoundToZero) == x)
277+
return unsafe_trunc($Ti,x)
278+
else
279+
throw(InexactError($(Expr(:quote,Ti.name.name)), $Ti, x))
264280
end
265281
end
266282
end
@@ -278,7 +294,10 @@ for f in (:acos, :acosh, :asin, :asinh, :atan, :atanh, :cosh, :cos,
278294
end
279295
end
280296

281-
@assume_effects :foldable abs(x::Float128) = Float128(@quad_ccall(libquadmath.fabsq(x::Cfloat128)::Cfloat128))
297+
function abs(x::Float128)
298+
# mask out sign
299+
reinterpret(Float128, reinterpret(UInt128, x)&(~(UInt128(1)<<127)))
300+
end
282301
@assume_effects :foldable round(x::Float128) = Float128(@quad_ccall(libquadmath.rintq(x::Cfloat128)::Cfloat128))
283302
round(x::Float128, r::RoundingMode{:Down}) = floor(x)
284303
round(x::Float128, r::RoundingMode{:Up}) = ceil(x)
@@ -317,9 +336,16 @@ sincos(x::Float128) = (sin(x), cos(x))
317336
Float128(@quad_ccall(libquadmath.fmaq(x::Cfloat128, y::Cfloat128, z::Cfloat128)::Cfloat128))
318337
end
319338

320-
@assume_effects :foldable isnan(x::Float128) = 0 != @quad_ccall(libquadmath.isnanq(x::Cfloat128)::Cint)
321-
@assume_effects :foldable isinf(x::Float128) = 0 != @quad_ccall(libquadmath.isinfq(x::Cfloat128)::Cint)
322-
@assume_effects :foldable isfinite(x::Float128) = 0 != @quad_ccall(libquadmath.finiteq(x::Cfloat128)::Cint)
339+
function isinf(x::Float128)
340+
xu = reinterpret(UInt128, x)
341+
# xu must be either 0x7fff_0000... or 0xffff_0000...
342+
return xu in (exponent_mask(Float128), exponent_mask(Float128)&~(UInt128(1)<<127))
343+
end
344+
function isfinite(x::Float128)
345+
xu = reinterpret(UInt128, x)
346+
return xu>>significand_bits(Float128)&0x7fff != 0x7fff
347+
end
348+
isnan(x::Float128) = !(isfinite(x) || isinf(x))
323349

324350
isinteger(x::Float128) = isfinite(x) && x === trunc(x)
325351

0 commit comments

Comments
 (0)