StatProfilerHTML.jl report
Generated on Thu, 21 Dec 2023 12:59:22
File source code
Line Exclusive Inclusive Code
1 # This file is a part of Julia. License is MIT: https://julialang.org/license
2
3 # Twice-precision arithmetic.
4
5 # Necessary for creating nicely-behaved ranges like r = 0.1:0.1:0.3
6 # that return r[3] == 0.3. Otherwise, we have roundoff error due to
7 # 0.1 + 2*0.1 = 0.30000000000000004
8
9 """
10 hi, lo = splitprec(F::Type{<:AbstractFloat}, i::Integer)
11
12 Represent an integer `i` as a pair of floating-point numbers `hi` and
13 `lo` (of type `F`) such that:
14 - `widen(hi) + widen(lo) ≈ i`. It is exact if 1.5 * (number of precision bits in `F`) is greater than the number of bits in `i`.
15 - all bits in `hi` are more significant than any of the bits in `lo`
16 - `hi` can be exactly multiplied by the `hi` component of another call to `splitprec`.
17
18 In particular, while `convert(Float64, i)` can be lossy since Float64
19 has only 53 bits of precision, `splitprec(Float64, i)` is exact for
20 any Int64/UInt64.
21 """
22 function splitprec(::Type{F}, i::Integer) where {F<:AbstractFloat}
23 hi = truncbits(F(i), cld(precision(F), 2))
24 ihi = oftype(i, hi)
25 hi, F(i - ihi)
26 end
27
28 function truncmask(x::F, mask) where {F<:IEEEFloat}
29 reinterpret(F, mask & reinterpret(uinttype(F), x))
30 end
31 truncmask(x, mask) = x
32
33 function truncbits(x::F, nb) where {F<:IEEEFloat}
34 truncmask(x, typemax(uinttype(F)) << nb)
35 end
36 truncbits(x, nb) = x
37
38
39 ## Dekker arithmetic
40
41 """
42 hi, lo = canonicalize2(big, little)
43
44 Generate a representation where all the nonzero bits in `hi` are more
45 significant than any of the nonzero bits in `lo`. `big` must be larger
46 in absolute value than `little`.
47 """
48 function canonicalize2(big, little)
49 h = big+little
50 h, (big - h) + little
51 end
52
53 """
54 zhi, zlo = add12(x, y)
55
56 A high-precision representation of `x + y` for floating-point
57 numbers. Mathematically, `zhi + zlo = x + y`, where `zhi` contains the
58 most significant bits and `zlo` the least significant.
59
60 Because of the way floating-point numbers are printed, `lo` may not
61 look the way you might expect from the standpoint of decimal
62 representation, even though it is exact from the standpoint of binary
63 representation.
64
65 Example:
66 ```julia-repl
67 julia> 1.0 + 1.0001e-15
68 1.000000000000001
69
70 julia> big(1.0) + big(1.0001e-15)
71 1.000000000000001000100000000000020165767380775934141445417482375879192346701529
72
73 julia> hi, lo = Base.add12(1.0, 1.0001e-15)
74 (1.000000000000001, -1.1012302462515652e-16)
75
76 julia> big(hi) + big(lo)
77 1.000000000000001000100000000000020165767380775934141445417482375879192346701529
78 ```
79
80 `lo` differs from 1.0e-19 because `hi` is not exactly equal to
81 the first 16 decimal digits of the answer.
82 """
83 function add12(x::T, y::T) where {T}
84 1 (0 %)
1 (0 %) samples spent in add12
1 (100 %) (incl.) when called from unsafe_getindex line 483
1 (100 %) samples spent calling ifelse
x, y = ifelse(abs(y) > abs(x), (y, x), (x, y))
85 canonicalize2(x, y)
86 end
87 add12(x, y) = add12(promote(x, y)...)
88
89 """
90 zhi, zlo = mul12(x, y)
91
92 A high-precision representation of `x * y` for floating-point
93 numbers. Mathematically, `zhi + zlo = x * y`, where `zhi` contains the
94 most significant bits and `zlo` the least significant.
95
96 Example:
97 ```julia-repl
98 julia> x = Float32(π)
99 3.1415927f0
100
101 julia> x * x
102 9.869605f0
103
104 julia> Float64(x) * Float64(x)
105 9.869604950382893
106
107 julia> hi, lo = Base.mul12(x, x)
108 (9.869605f0, -1.140092f-7)
109
110 julia> Float64(hi) + Float64(lo)
111 9.869604950382893
112 ```
113 """
114 function mul12(x::T, y::T) where {T<:AbstractFloat}
115 (h, l) = Math.two_mul(x, y)
116 ifelse(!isfinite(h), (h, h), (h, l))
117 end
118 mul12(x::T, y::T) where {T} = (p = x * y; (p, zero(p)))
119 mul12(x, y) = mul12(promote(x, y)...)
120
121 """
122 zhi, zlo = div12(x, y)
123
124 A high-precision representation of `x / y` for floating-point
125 numbers. Mathematically, `zhi + zlo ≈ x / y`, where `zhi` contains the
126 most significant bits and `zlo` the least significant.
127
128 Example:
129 ```julia-repl
130 julia> x, y = Float32(π), 3.1f0
131 (3.1415927f0, 3.1f0)
132
133 julia> x / y
134 1.013417f0
135
136 julia> Float64(x) / Float64(y)
137 1.0134170444063078
138
139 julia> hi, lo = Base.div12(x, y)
140 (1.013417f0, 3.8867366f-8)
141
142 julia> Float64(hi) + Float64(lo)
143 1.0134170444063066
144 ```
145 """
146 function div12(x::T, y::T) where {T<:AbstractFloat}
147 # We lose precision if any intermediate calculation results in a subnormal.
148 # To prevent this from happening, standardize the values.
149 xs, xe = frexp(x)
150 ys, ye = frexp(y)
151 r = xs / ys
152 rh, rl = canonicalize2(r, -fma(r, ys, -xs)/ys)
153 ifelse(iszero(r) | !isfinite(r), (r, r), (ldexp(rh, xe-ye), ldexp(rl, xe-ye)))
154 end
155 div12(x::T, y::T) where {T} = (p = x / y; (p, zero(p)))
156 div12(x, y) = div12(promote(x, y)...)
157
158
159 ## TwicePrecision
160
161 """
162 TwicePrecision{T}(hi::T, lo::T)
163 TwicePrecision{T}((num, denom))
164
165 A number with twice the precision of `T`, e.g., quad-precision if `T =
166 Float64`.
167
168 !!! warning
169 `TwicePrecision` is an internal type used to increase the
170 precision of floating-point ranges, and not intended for external use.
171 If you encounter them in real code, the most likely explanation is
172 that you are directly accessing the fields of a range. Use
173 the function interface instead, `step(r)` rather than `r.step`
174
175 # Extended help
176
177 `hi` represents the high bits (most significant bits) and
178 `lo` the low bits (least significant bits). Rational values
179 `num//denom` can be approximated conveniently using the syntax
180 `TwicePrecision{T}((num, denom))`.
181
182 When used with `T<:Union{Float16,Float32,Float64}` to construct an "exact"
183 `StepRangeLen`, `ref` should be the range element with smallest
184 magnitude and `offset` set to the corresponding index. For
185 efficiency, multiplication of `step` by the index is not performed at
186 twice precision: `step.hi` should have enough trailing zeros in its
187 `bits` representation that `(0:len-1)*step.hi` is exact (has no
188 roundoff error). If `step` has an exact rational representation
189 `num//denom`, then you can construct `step` using
190
191 step = TwicePrecision{T}((num, denom), nb)
192
193 where `nb` is the number of trailing zero bits of `step.hi`. For
194 ranges, you can set `nb = ceil(Int, log2(len-1))`.
195 """
196 struct TwicePrecision{T}
197 hi::T # most significant bits
198 lo::T # least significant bits
199 end
200
201 TwicePrecision{T}(x::T) where {T} = TwicePrecision{T}(x, zero(T))
202
203 TwicePrecision{T}(x::TwicePrecision{T}) where {T} = x
204
205 function TwicePrecision{T}(x) where {T}
206 xT = T(x)
207 Δx = x - xT
208 TwicePrecision{T}(xT, T(Δx))
209 end
210
211 TwicePrecision{T}(i::Integer) where {T<:AbstractFloat} =
212 TwicePrecision{T}(canonicalize2(splitprec(T, i)...)...)
213
214 TwicePrecision(x) = TwicePrecision{typeof(x)}(x)
215
216 # Numerator/Denominator constructors
217 function TwicePrecision{T}(nd::Tuple{Integer,Integer}) where {T<:Union{Float16,Float32}}
218 n, d = nd
219 TwicePrecision{T}(n/d)
220 end
221
222 function TwicePrecision{T}(nd::Tuple{Any,Any}) where {T}
223 n, d = nd
224 TwicePrecision{T}(TwicePrecision{T}(n) / d)
225 end
226
227 function TwicePrecision{T}(nd::Tuple{I,I}, nb::Integer) where {T,I}
228 twiceprecision(TwicePrecision{T}(nd), nb)
229 end
230
231 # Fix #39798
232 # See steprangelen_hp(::Type{Float64}, ref::Tuple{Integer,Integer},
233 # step::Tuple{Integer,Integer}, nb::Integer,
234 # len::Integer, offset::Integer)
235 function TwicePrecision{T}(nd::Tuple{Integer,Integer}, nb::Integer) where T
236 twiceprecision(TwicePrecision{T}(nd), nb)
237 end
238
239 # Truncating constructors. Useful for generating values that can be
240 # exactly multiplied by small integers.
241 function twiceprecision(val::T, nb::Integer) where {T<:IEEEFloat}
242 hi = truncbits(val, nb)
243 TwicePrecision{T}(hi, val - hi)
244 end
245
246 function twiceprecision(val::TwicePrecision{T}, nb::Integer) where {T<:IEEEFloat}
247 hi = truncbits(val.hi, nb)
248 TwicePrecision{T}(hi, (val.hi - hi) + val.lo)
249 end
250
251 nbitslen(r::StepRangeLen) = nbitslen(eltype(r), length(r), r.offset)
252 nbitslen(::Type{T}, len, offset) where {T<:IEEEFloat} =
253 min(cld(precision(T), 2), nbitslen(len, offset))
254 # The +1 here is for safety, because the precision of the significand
255 # is 1 bit higher than the number that are explicitly stored.
256 nbitslen(len, offset) = len < 2 ? 0 : top_set_bit(max(offset-1, len-offset) - 1) + 1
257
258 eltype(::Type{TwicePrecision{T}}) where {T} = T
259
260 promote_rule(::Type{TwicePrecision{R}}, ::Type{TwicePrecision{S}}) where {R,S} =
261 TwicePrecision{promote_type(R,S)}
262 promote_rule(::Type{TwicePrecision{R}}, ::Type{S}) where {R,S<:Number} =
263 TwicePrecision{promote_type(R,S)}
264
265 (::Type{T})(x::TwicePrecision) where {T<:Number} = (T(x.hi) + T(x.lo))::T
266
267 convert(::Type{TwicePrecision{T}}, x::TwicePrecision{T}) where {T} = x
268 convert(::Type{TwicePrecision{T}}, x::TwicePrecision) where {T} =
269 TwicePrecision{T}(convert(T, x.hi), convert(T, x.lo))::TwicePrecision{T}
270
271 convert(::Type{T}, x::TwicePrecision) where {T<:Number} = T(x)::T
272 convert(::Type{TwicePrecision{T}}, x::Number) where {T} = TwicePrecision{T}(x)::TwicePrecision{T}
273
274 float(x::TwicePrecision{<:AbstractFloat}) = x
275 float(x::TwicePrecision) = TwicePrecision(float(x.hi), float(x.lo))
276
277 big(x::TwicePrecision) = big(x.hi) + big(x.lo)
278
279 -(x::TwicePrecision) = TwicePrecision(-x.hi, -x.lo)
280
281 function zero(::Type{TwicePrecision{T}}) where {T}
282 z = zero(T)
283 TwicePrecision{T}(z, z)
284 end
285
286 # Arithmetic
287
288 function +(x::TwicePrecision, y::Number)
289 s_hi, s_lo = add12(x.hi, y)
290 TwicePrecision(canonicalize2(s_hi, s_lo+x.lo)...)
291 end
292 +(x::Number, y::TwicePrecision) = y+x
293
294 function +(x::TwicePrecision{T}, y::TwicePrecision{T}) where T
295 r = x.hi + y.hi
296 s = abs(x.hi) > abs(y.hi) ? (((x.hi - r) + y.hi) + y.lo) + x.lo : (((y.hi - r) + x.hi) + x.lo) + y.lo
297 TwicePrecision(canonicalize2(r, s)...)
298 end
299 +(x::TwicePrecision, y::TwicePrecision) = +(promote(x, y)...)
300
301 -(x::TwicePrecision, y::TwicePrecision) = x + (-y)
302 -(x::TwicePrecision, y::Number) = x + (-y)
303 -(x::Number, y::TwicePrecision) = x + (-y)
304
305 function *(x::TwicePrecision, v::Number)
306 v == 0 && return TwicePrecision(x.hi*v, x.lo*v)
307 x * TwicePrecision(oftype(x.hi*v, v))
308 end
309 function *(x::TwicePrecision{<:IEEEFloat}, v::Integer)
310 v == 0 && return TwicePrecision(x.hi*v, x.lo*v)
311 nb = top_set_bit(abs(v)-1)
312 u = truncbits(x.hi, nb)
313 TwicePrecision(canonicalize2(u*v, ((x.hi-u) + x.lo)*v)...)
314 end
315 *(v::Number, x::TwicePrecision) = x*v
316
317 function *(x::TwicePrecision{T}, y::TwicePrecision{T}) where {T}
318 zh, zl = mul12(x.hi, y.hi)
319 ret = TwicePrecision{T}(canonicalize2(zh, (x.hi * y.lo + x.lo * y.hi) + zl)...)
320 ifelse(iszero(zh) | !isfinite(zh), TwicePrecision{T}(zh, zh), ret)
321 end
322 *(x::TwicePrecision, y::TwicePrecision) = *(promote(x, y)...)
323
324 function /(x::TwicePrecision, v::Number)
325 x / TwicePrecision(oftype(x.hi/v, v))
326 end
327
328 function /(x::TwicePrecision, y::TwicePrecision)
329 hi = x.hi / y.hi
330 uh, ul = mul12(hi, y.hi)
331 lo = ((((x.hi - uh) - ul) + x.lo) - hi*y.lo)/y.hi
332 ret = TwicePrecision(canonicalize2(hi, lo)...)
333 ifelse(iszero(hi) | !isfinite(hi), TwicePrecision(hi, hi), ret)
334 end
335
336 ## StepRangeLen
337
338 # Use TwicePrecision only for Float64; use Float64 for T<:Union{Float16,Float32}
339 # See also _linspace1
340 # Ratio-of-integers constructors
341 function steprangelen_hp(::Type{Float64}, ref::Tuple{Integer,Integer},
342 step::Tuple{Integer,Integer}, nb::Integer,
343 len::Integer, offset::Integer)
344 StepRangeLen(TwicePrecision{Float64}(ref),
345 TwicePrecision{Float64}(step, nb), len, offset)
346 end
347
348 function steprangelen_hp(::Type{T}, ref::Tuple{Integer,Integer},
349 step::Tuple{Integer,Integer}, nb::Integer,
350 len::Integer, offset::Integer) where {T<:IEEEFloat}
351 StepRangeLen{T}(ref[1]/ref[2], step[1]/step[2], len, offset)
352 end
353
354 # AbstractFloat constructors (can supply a single number or a 2-tuple
355 const F_or_FF = Union{AbstractFloat, Tuple{AbstractFloat,AbstractFloat}}
356 asF64(x::AbstractFloat) = Float64(x)
357 asF64(x::Tuple{AbstractFloat,AbstractFloat}) = Float64(x[1]) + Float64(x[2])
358
359 function steprangelen_hp(::Type{Float64}, ref::F_or_FF,
360 step::F_or_FF, nb::Integer,
361 len::Integer, offset::Integer)
362 StepRangeLen(TwicePrecision{Float64}(ref...),
363 twiceprecision(TwicePrecision{Float64}(step...), nb), len, offset)
364 end
365
366 function steprangelen_hp(::Type{T}, ref::F_or_FF,
367 step::F_or_FF, nb::Integer,
368 len::Integer, offset::Integer) where {T<:IEEEFloat}
369 StepRangeLen{T}(asF64(ref), asF64(step), len, offset)
370 end
371
372
373
374 StepRangeLen(ref::TwicePrecision{T}, step::TwicePrecision{T},
375 len::Integer, offset::Integer=1) where {T} =
376 StepRangeLen{T,TwicePrecision{T},TwicePrecision{T}}(ref, step, len, offset)
377
378 # Construct range for rational start=start_n/den, step=step_n/den
379 function floatrange(::Type{T}, start_n::Integer, step_n::Integer, len::Integer, den::Integer) where T
380 len = len + 0 # promote with Int
381 if len < 2 || step_n == 0
382 return steprangelen_hp(T, (start_n, den), (step_n, den), 0, len, oneunit(len))
383 end
384 # index of smallest-magnitude value
385 L = typeof(len)
386 imin = clamp(round(typeof(len), -start_n/step_n+1), oneunit(L), len)
387 # Compute smallest-magnitude element to 2x precision
388 ref_n = start_n+(imin-1)*step_n # this shouldn't overflow, so don't check
389 nb = nbitslen(T, len, imin)
390 steprangelen_hp(T, (ref_n, den), (step_n, den), nb, len, imin)
391 end
392
393 function (:)(start::T, step::T, stop::T) where T<:IEEEFloat
394 step == 0 && throw(ArgumentError("range step cannot be zero"))
395 # see if the inputs have exact rational approximations (and if so,
396 # perform all computations in terms of the rationals)
397 step_n, step_d = rat(step)
398 if step_d != 0 && T(step_n/step_d) == step
399 start_n, start_d = rat(start)
400 stop_n, stop_d = rat(stop)
401 if start_d != 0 && stop_d != 0 &&
402 T(start_n/start_d) == start && T(stop_n/stop_d) == stop
403 den = lcm_unchecked(start_d, step_d) # use same denominator for start and step
404 m = maxintfloat(T, Int)
405 if den != 0 && abs(start*den) <= m && abs(step*den) <= m && # will round succeed?
406 rem(den, start_d) == 0 && rem(den, step_d) == 0 # check lcm overflow
407 start_n = round(Int, start*den)
408 step_n = round(Int, step*den)
409 len = max(0, Int(div(den*stop_n - stop_d*start_n + step_n*stop_d, step_n*stop_d)))
410 # Integer ops could overflow, so check that this makes sense
411 if isbetween(start, start + (len-1)*step, stop + step/2) &&
412 !isbetween(start, start + len*step, stop)
413 # Return a 2x precision range
414 return floatrange(T, start_n, step_n, len, den)
415 end
416 end
417 end
418 end
419 # Fallback, taking start and step literally
420 # n.b. we use Int as the default length type for IEEEFloats
421 lf = (stop-start)/step
422 if lf < 0
423 len = 0
424 elseif lf == 0
425 len = 1
426 else
427 len = round(Int, lf) + 1
428 stop′ = start + (len-1)*step
429 # if we've overshot the end, subtract one:
430 len -= (start < stop < stop′) + (start > stop > stop′)
431 end
432 steprangelen_hp(T, start, step, 0, len, 1)
433 end
434
435 step(r::StepRangeLen{T,TwicePrecision{T},TwicePrecision{T}}) where {T<:AbstractFloat} = T(r.step)
436 step(r::StepRangeLen{T,TwicePrecision{T},TwicePrecision{T}}) where {T} = T(r.step)
437
438 range_start_step_length(a::Real, st::IEEEFloat, len::Integer) =
439 range_start_step_length(promote(a, st)..., len)
440
441 range_start_step_length(a::IEEEFloat, st::Real, len::Integer) =
442 range_start_step_length(promote(a, st)..., len)
443
444 range_start_step_length(a::IEEEFloat, st::IEEEFloat, len::Integer) =
445 range_start_step_length(promote(a, st)..., len)
446
447 function range_start_step_length(a::T, st::T, len::Integer) where T<:IEEEFloat
448 len = len + 0 # promote with Int
449 start_n, start_d = rat(a)
450 step_n, step_d = rat(st)
451 if start_d != 0 && step_d != 0 &&
452 T(start_n/start_d) == a && T(step_n/step_d) == st
453 den = lcm_unchecked(start_d, step_d)
454 m = maxintfloat(T, Int)
455 if abs(den*a) <= m && abs(den*st) <= m &&
456 rem(den, start_d) == 0 && rem(den, step_d) == 0
457 start_n = round(Int, den*a)
458 step_n = round(Int, den*st)
459 return floatrange(T, start_n, step_n, len, den)
460 end
461 end
462 steprangelen_hp(T, a, st, 0, len, 1)
463 end
464
465 range_step_stop_length(step::Real, stop::IEEEFloat, len::Integer) =
466 range_step_stop_length(promote(step, stop)..., len)
467
468 range_step_stop_length(step::IEEEFloat, stop::Real, len::Integer) =
469 range_step_stop_length(promote(step, stop)..., len)
470
471 function range_step_stop_length(step::IEEEFloat, stop::IEEEFloat, len::Integer)
472 r = range_start_step_length(stop, negate(step), len)
473 reverse(r)
474 end
475
476 # This assumes that r.step has already been split so that (0:len-1)*r.step.hi is exact
477 function unsafe_getindex(r::StepRangeLen{T,<:TwicePrecision,<:TwicePrecision}, i::Integer) where T
478 # Very similar to _getindex_hiprec, but optimized to avoid a 2nd call to add12
479 @inline
480 i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
481 u = oftype(r.offset, i) - r.offset
482 shift_hi, shift_lo = u*r.step.hi, u*r.step.lo
483 1 (0 %)
1 (0 %) samples spent in unsafe_getindex
1 (100 %) (incl.) when called from getindex line 956
1 (100 %) samples spent calling add12
x_hi, x_lo = add12(r.ref.hi, shift_hi)
484 1 (0 %)
1 (0 %) samples spent in unsafe_getindex
1 (100 %) (incl.) when called from getindex line 956
1 (100 %) samples spent calling +
T(x_hi + (x_lo + (shift_lo + r.ref.lo)))
485 end
486
487 function _getindex_hiprec(r::StepRangeLen{<:Any,<:TwicePrecision,<:TwicePrecision}, i::Integer)
488 i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
489 u = oftype(r.offset, i) - r.offset
490 shift_hi, shift_lo = u*r.step.hi, u*r.step.lo
491 x_hi, x_lo = add12(r.ref.hi, shift_hi)
492 x_hi, x_lo = add12(x_hi, x_lo + (shift_lo + r.ref.lo))
493 TwicePrecision(x_hi, x_lo)
494 end
495
496 function getindex(r::StepRangeLen{T,<:TwicePrecision,<:TwicePrecision}, s::OrdinalRange{S}) where {T, S<:Integer}
497 @boundscheck checkbounds(r, s)
498 len = length(s)
499 L = typeof(len)
500 sstep = step_hp(s)
501 rstep = step_hp(r)
502 if S === Bool
503 #rstep *= one(sstep)
504 if len == 0
505 return StepRangeLen{T}(first(r), rstep, zero(L), oneunit(L))
506 elseif len == 1
507 if first(s)
508 return StepRangeLen{T}(first(r), rstep, oneunit(L), oneunit(L))
509 else
510 return StepRangeLen{T}(first(r), rstep, zero(L), oneunit(L))
511 end
512 else # len == 2
513 return StepRangeLen{T}(last(r), step(r), oneunit(L), oneunit(L))
514 end
515 else
516 soffset = round(L, (r.offset - first(s))/sstep + 1)
517 soffset = clamp(soffset, oneunit(L), len)
518 ioffset = L(first(s) + (soffset - oneunit(L)) * sstep)
519 if sstep == 1 || len < 2
520 newstep = rstep #* one(sstep)
521 else
522 newstep = rstep * sstep
523 newstep = twiceprecision(newstep, nbitslen(T, len, soffset))
524 end
525 soffset = max(oneunit(L), soffset)
526 if ioffset == r.offset
527 return StepRangeLen{T}(r.ref, newstep, len, soffset)
528 else
529 return StepRangeLen{T}(r.ref + (ioffset-r.offset)*rstep, newstep, len, soffset)
530 end
531 end
532 end
533
534 *(x::Real, r::StepRangeLen{<:Real,<:TwicePrecision}) =
535 StepRangeLen(x*r.ref, twiceprecision(x*r.step, nbitslen(r)), length(r), r.offset)
536 *(r::StepRangeLen{<:Real,<:TwicePrecision}, x::Real) = x*r
537 /(r::StepRangeLen{<:Real,<:TwicePrecision}, x::Real) =
538 StepRangeLen(r.ref/x, twiceprecision(r.step/x, nbitslen(r)), length(r), r.offset)
539
540 StepRangeLen{T,R,S,L}(r::StepRangeLen{T,R,S,L}) where {T<:AbstractFloat,R<:TwicePrecision,S<:TwicePrecision,L} = r
541
542 StepRangeLen{T,R,S,L}(r::StepRangeLen) where {T<:AbstractFloat,R<:TwicePrecision,S<:TwicePrecision,L} =
543 _convertSRL(StepRangeLen{T,R,S,L}, r)
544
545 StepRangeLen{Float64}(r::StepRangeLen) =
546 _convertSRL(StepRangeLen{Float64,TwicePrecision{Float64},TwicePrecision{Float64},Int}, r)
547 StepRangeLen{T}(r::StepRangeLen) where {T<:IEEEFloat} =
548 _convertSRL(StepRangeLen{T,Float64,Float64,Int}, r)
549
550 StepRangeLen{Float64}(r::AbstractRange) =
551 _convertSRL(StepRangeLen{Float64,TwicePrecision{Float64},TwicePrecision{Float64},Int}, r)
552 StepRangeLen{T}(r::AbstractRange) where {T<:IEEEFloat} =
553 _convertSRL(StepRangeLen{T,Float64,Float64,Int}, r)
554
555 function _convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::StepRangeLen{<:Integer}) where {T,R,S,L}
556 StepRangeLen{T,R,S,L}(R(r.ref), S(r.step), L(length(r)), L(r.offset))
557 end
558
559 function _convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::AbstractRange{<:Integer}) where {T,R,S,L}
560 StepRangeLen{T,R,S,L}(R(first(r)), S(step(r)), L(length(r)))
561 end
562
563 function _convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::AbstractRange{U}) where {T,R,S,L,U}
564 # if start and step have a rational approximation in the old type,
565 # then we transfer that rational approximation to the new type
566 f, s = first(r), step(r)
567 start_n, start_d = rat(f)
568 step_n, step_d = rat(s)
569 if start_d != 0 && step_d != 0 &&
570 U(start_n/start_d) == f && U(step_n/step_d) == s
571 den = lcm_unchecked(start_d, step_d)
572 m = maxintfloat(T, Int)
573 if den != 0 && abs(f*den) <= m && abs(s*den) <= m &&
574 rem(den, start_d) == 0 && rem(den, step_d) == 0
575 start_n = round(Int, f*den)
576 step_n = round(Int, s*den)
577 return floatrange(T, start_n, step_n, L(length(r)), den)
578 end
579 end
580 return __convertSRL(StepRangeLen{T,R,S,L}, r)
581 end
582
583 function __convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::StepRangeLen{U}) where {T,R,S,L,U}
584 StepRangeLen{T,R,S,L}(R(r.ref), S(r.step), L(length(r)), L(r.offset))
585 end
586 function __convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::AbstractRange{U}) where {T,R,S,L,U}
587 StepRangeLen{T,R,S,L}(R(first(r)), S(step(r)), L(length(r)))
588 end
589
590 function sum(r::StepRangeLen)
591 l = length(r)
592 # Compute the contribution of step over all indices.
593 # Indexes on opposite side of r.offset contribute with opposite sign,
594 # r.step * (sum(1:np) - sum(1:nn))
595 np, nn = l - r.offset, r.offset - 1 # positive, negative
596 # To prevent overflow in sum(1:n), multiply its factors by the step
597 sp, sn = sumpair(np), sumpair(nn)
598 W = widen(typeof(l))
599 Δn = W(sp[1]) * W(sp[2]) - W(sn[1]) * W(sn[2])
600 s = r.step * Δn
601 # Add in contributions of ref
602 ref = r.ref * l
603 convert(eltype(r), s + ref)
604 end
605 function sum(r::StepRangeLen{<:Any,<:TwicePrecision,<:TwicePrecision})
606 l = length(r)
607 # Compute the contribution of step over all indices.
608 # Indexes on opposite side of r.offset contribute with opposite sign,
609 # r.step * (sum(1:np) - sum(1:nn))
610 np, nn = l - r.offset, r.offset - 1 # positive, negative
611 # To prevent overflow in sum(1:n), multiply its factors by the step
612 sp, sn = sumpair(np), sumpair(nn)
613 tp = _tp_prod(r.step, sp[1], sp[2])
614 tn = _tp_prod(r.step, sn[1], sn[2])
615 s_hi, s_lo = add12(tp.hi, -tn.hi)
616 s_lo += tp.lo - tn.lo
617 # Add in contributions of ref
618 ref = r.ref * l
619 sm_hi, sm_lo = add12(s_hi, ref.hi)
620 add12(sm_hi, sm_lo + ref.lo)[1]
621 end
622
623 # sum(1:n) as a product of two integers
624 sumpair(n::Integer) = iseven(n) ? (n+1, n>>1) : (n, (n+1)>>1)
625
626 function +(r1::StepRangeLen{T,R}, r2::StepRangeLen{T,R}) where T where R<:TwicePrecision
627 len = length(r1)
628 (len == length(r2) ||
629 throw(DimensionMismatch("argument dimensions must match")))
630 if r1.offset == r2.offset
631 imid = r1.offset
632 ref = r1.ref + r2.ref
633 else
634 imid = round(typeof(len), (r1.offset+r2.offset)/2)
635 ref1mid = _getindex_hiprec(r1, imid)
636 ref2mid = _getindex_hiprec(r2, imid)
637 ref = ref1mid + ref2mid
638 end
639 step = twiceprecision(r1.step + r2.step, nbitslen(T, len, imid))
640 StepRangeLen{T,typeof(ref),typeof(step),typeof(len)}(ref, step, len, imid)
641 end
642
643 ## LinRange
644
645 # For Float16, Float32, and Float64, this returns a StepRangeLen
646 function range_start_stop_length(start::T, stop::T, len::Integer) where {T<:IEEEFloat}
647 len = len + 0 # promote with Int
648 len < 2 && return _linspace1(T, start, stop, len)
649 if start == stop
650 return steprangelen_hp(T, start, zero(T), 0, len, 1)
651 end
652 # Attempt to find exact rational approximations
653 start_n, start_d = rat(start)
654 stop_n, stop_d = rat(stop)
655 if start_d != 0 && stop_d != 0
656 den = lcm_unchecked(start_d, stop_d)
657 m = maxintfloat(T, Int)
658 if den != 0 && abs(den*start) <= m && abs(den*stop) <= m
659 start_n = round(Int, den*start)
660 stop_n = round(Int, den*stop)
661 if T(start_n/den) == start && T(stop_n/den) == stop
662 return _linspace(T, start_n, stop_n, len, den)
663 end
664 end
665 end
666 _linspace(start, stop, len)
667 end
668
669 function _linspace(start::T, stop::T, len::Integer) where {T<:IEEEFloat}
670 len = len + 0 # promote with Int
671 (isfinite(start) && isfinite(stop)) || throw(ArgumentError("start and stop must be finite, got $start and $stop"))
672 # Find the index that returns the smallest-magnitude element
673 Δ, Δfac = stop-start, 1
674 if !isfinite(Δ) # handle overflow for large endpoints
675 Δ, Δfac = stop/len - start/len, len
676 end
677 tmin = -(start/Δ)/Δfac # t such that (1-t)*start + t*stop == 0
678 L = typeof(len)
679 lenn1 = len - oneunit(L)
680 imin = round(L, tmin*lenn1 + 1) # index approximately corresponding to t
681 if 1 < imin < len
682 # The smallest-magnitude element is in the interior
683 t = (imin - 1)/lenn1
684 ref = T((1-t)*start + t*stop)
685 step = imin-1 < len-imin ? (ref-start)/(imin-1) : (stop-ref)/(len-imin)
686 elseif imin <= 1
687 imin = oneunit(L)
688 ref = start
689 step = (Δ/(lenn1))*Δfac
690 else
691 imin = len
692 ref = stop
693 step = (Δ/(lenn1))*Δfac
694 end
695 if len == 2 && !isfinite(step)
696 # For very large endpoints where step overflows, exploit the
697 # split-representation to handle the overflow
698 return steprangelen_hp(T, start, (-start, stop), 0, len, oneunit(L))
699 end
700 # 2x calculations to get high precision endpoint matching while also
701 # preventing overflow in ref_hi+(i-offset)*step_hi
702 m, k = prevfloat(floatmax(T)), max(imin-1, len-imin)
703 step_hi_pre = clamp(step, max(-(m+ref)/k, (-m+ref)/k), min((m-ref)/k, (m+ref)/k))
704 nb = nbitslen(T, len, imin)
705 step_hi = truncbits(step_hi_pre, nb)
706 x1_hi, x1_lo = add12((1-imin)*step_hi, ref)
707 x2_hi, x2_lo = add12((len-imin)*step_hi, ref)
708 a, b = (start - x1_hi) - x1_lo, (stop - x2_hi) - x2_lo
709 step_lo = (b - a)/(len - 1)
710 ref_lo = a - (1 - imin)*step_lo
711 steprangelen_hp(T, (ref, ref_lo), (step_hi, step_lo), 0, len, imin)
712 end
713
714 # range for rational numbers, start = start_n/den, stop = stop_n/den
715 # Note this returns a StepRangeLen
716 _linspace(::Type{T}, start::Integer, stop::Integer, len::Integer) where {T<:IEEEFloat} = _linspace(T, start, stop, len, one(start))
717 function _linspace(::Type{T}, start_n::Integer, stop_n::Integer, len::Integer, den::Integer) where T<:IEEEFloat
718 len = len + 0 # promote with Int
719 len < 2 && return _linspace1(T, start_n/den, stop_n/den, len)
720 L = typeof(len)
721 start_n == stop_n && return steprangelen_hp(T, (start_n, den), (zero(start_n), den), 0, len, oneunit(L))
722 tmin = -start_n/(Float64(stop_n) - Float64(start_n))
723 imin = round(typeof(len), tmin*(len-1)+1)
724 imin = clamp(imin, oneunit(L), len)
725 W = widen(L)
726 start_n = W(start_n)
727 stop_n = W(stop_n)
728 ref_num = W(len-imin) * start_n + W(imin-1) * stop_n
729 ref_denom = W(len-1) * den
730 ref = (ref_num, ref_denom)
731 step_full = (stop_n - start_n, ref_denom)
732 steprangelen_hp(T, ref, step_full, nbitslen(T, len, imin), len, imin)
733 end
734
735 # For len < 2
736 function _linspace1(::Type{T}, start, stop, len::Integer) where T<:IEEEFloat
737 len >= 0 || throw(ArgumentError("range($start, stop=$stop, length=$len): negative length"))
738 if len <= 1
739 len == 1 && (start == stop || throw(ArgumentError("range($start, stop=$stop, length=$len): endpoints differ")))
740 # Ensure that first(r)==start and last(r)==stop even for len==0
741 # The output type must be consistent with steprangelen_hp
742 if T<:Union{Float32,Float16}
743 return StepRangeLen{T}(Float64(start), Float64(start) - Float64(stop), len, 1)
744 else # T == Float64
745 return StepRangeLen(TwicePrecision(start, zero(T)), TwicePrecision(start, -stop), len, 1)
746 end
747 end
748 throw(ArgumentError("should only be called for len < 2, got $len"))
749 end
750
751 ### Numeric utilities
752
753 # Approximate x with a rational representation as a pair of Int values.
754 # Guaranteed to return, but not guaranteed to return a precise answer.
755 # https://en.wikipedia.org/wiki/Continued_fraction#Best_rational_approximations
756 function rat(x)
757 y = x
758 a = d = 1
759 b = c = 0
760 m = maxintfloat(narrow(typeof(x)), Int)
761 while abs(y) <= m
762 f = trunc(Int, y)
763 y -= f
764 a, c = f*a + c, a
765 b, d = f*b + d, b
766 max(abs(a), abs(b)) <= convert(Int,m) || return c, d
767 oftype(x,a)/oftype(x,b) == x && break
768 y = inv(y)
769 end
770 return a, b
771 end
772
773 # This version of lcm does not check for overflows
774 lcm_unchecked(a::T, b::T) where T<:Integer = a * div(b, gcd(a, b))
775
776 narrow(::Type{T}) where {T<:AbstractFloat} = Float64
777 narrow(::Type{Float64}) = Float32
778 narrow(::Type{Float32}) = Float16
779 narrow(::Type{Float16}) = Float16
780
781 function _tp_prod(t::TwicePrecision, x, y...)
782 @inline
783 _tp_prod(t * x, y...)
784 end
785 _tp_prod(t::TwicePrecision) = t
786 <(x::TwicePrecision{T}, y::TwicePrecision{T}) where {T} =
787 x.hi < y.hi || ((x.hi == y.hi) & (x.lo < y.lo))
788
789 isbetween(a, x, b) = a <= x <= b || b <= x <= a