@@ -139,67 +139,28 @@ Float128(x::Base.TwicePrecision{Float64}) =
139
139
Float128 (x. hi) + Float128 (x. lo)
140
140
141
141
# integer -> Float128
142
- @assume_effects :foldable Float128 (x:: Int32 ) =
143
- Float128 (@quad_ccall (quadoplib. __floatsitf (x:: Int32 ):: Cfloat128 ))
144
-
145
- @assume_effects :foldable Float128 (x:: UInt32 ) =
146
- Float128 (@quad_ccall (quadoplib. __floatunsitf (x:: UInt32 ):: Cfloat128 ))
147
-
148
- @assume_effects :foldable Float128 (x:: Int64 ) =
149
- Float128 (@quad_ccall (quadoplib. __floatditf (x:: Int64 ):: Cfloat128 ))
150
-
151
- @assume_effects :foldable Float128 (x:: UInt64 ) =
152
- Float128 (@quad_ccall (quadoplib. __floatunditf (x:: UInt64 ):: Cfloat128 ))
153
-
154
- Float128 (x:: Int16 ) = Float128 (Int32 (x))
155
- Float128 (x:: Int8 ) = Float128 (Int32 (x))
156
- Float128 (x:: UInt16 ) = Float128 (UInt32 (x))
157
- Float128 (x:: UInt8 ) = Float128 (UInt32 (x))
158
-
159
- function Float128 (x:: UInt128 )
160
- x == 0 && return Float128 (0.0 )
161
- n = 128 - leading_zeros (x) # ndigits0z(x,2)
162
- if n <= 113
142
+ function _Float128_bits (x:: T ) where T<: Base.BitUnsigned
143
+ n = 8 * sizeof (T)- leading_zeros (x) # Base.top_set_bit(x)
144
+ if n <= 113 # not enough significand to need to care about rounding
163
145
y = ((x % UInt128) << (113 - n)) & significand_mask (Float128)
164
146
else
165
147
y = ((x >> (n- 114 )) % UInt128) & 0x0001_ffff_ffff_ffff_ffff_ffff_ffff_ffff # keep 1 extra bit
166
148
y = (y+ 1 )>> 1 # round, ties up (extra leading bit in case of next exponent)
167
149
y &= ~ UInt128 (trailing_zeros (x) == (n- 114 )) # fix last bit to round to even
168
150
end
169
- d = ((n+ 16382 ) % UInt128) << 112
170
- # reinterpret(Float128, d + y)
171
- d += y
172
- if Sys. iswindows ()
173
- return reinterpret (Float128,d)
174
- else
175
- y1 = reinterpret (Float64,UInt64 (d >> 64 ))
176
- y2 = reinterpret (Float64,(d % UInt64))
177
- return Float128 ((VecElement (y2),VecElement (y1)))
178
- end
151
+ d = ((n+ exponent_bias (Float128)- 1 ) % UInt128) << significand_bits (Float128)
152
+ return d + y
153
+ end
154
+ function Float128 (x:: T ) where T<: Base.BitUnsigned
155
+ iszero (x) && return reinterpret (Float128, UInt128 (0 ))
156
+ reinterpret (Float128, _Float128_bits (x))
179
157
end
180
158
181
- function Float128 (x:: Int128 )
182
- x == 0 && return 0.0
183
- s = reinterpret (UInt128,x) & sign_mask (Float128) # sign bit
184
- x = abs (x) % UInt128
185
- n = 128 - leading_zeros (x) # ndigits0z(x,2)
186
- if n <= 113
187
- y = ((x % UInt128) << (113 - n)) & significand_mask (Float128)
188
- else
189
- y = ((x >> (n- 114 )) % UInt128) & 0x0001_ffff_ffff_ffff_ffff_ffff_ffff_ffff # keep 1 extra bit
190
- y = (y+ 1 )>> 1 # round, ties up (extra leading bit in case of next exponent)
191
- y &= ~ UInt128 (trailing_zeros (x) == (n- 114 )) # fix last bit to round to even
192
- end
193
- d = ((n+ 16382 ) % UInt128) << 112
194
- # reinterpret(Float128, s | d + y)
195
- d = s | d + y
196
- if Sys. iswindows ()
197
- return reinterpret (Float128,d)
198
- else
199
- y1 = reinterpret (Float64,UInt64 (d >> 64 ))
200
- y2 = reinterpret (Float64,(d % UInt64))
201
- Float128 ((VecElement (y2),VecElement (y1)))
202
- end
159
+ function Float128 (x:: T ) where T<: Base.BitSigned
160
+ iszero (x) && return reinterpret (Float128, UInt128 (0 ))
161
+ s = UInt128 (signbit (x)) << 127 # sign bit
162
+ ux = abs (x) % Unsigned # the % Unsigned doesn't care that abs(typemin) == typemin
163
+ reinterpret (Float128, s| _Float128_bits (ux))
203
164
end
204
165
205
166
# Float128 -> integer requires arithmetic, so is below
0 commit comments