From 80b6c8b3e2f859b1688e86b6b20ea123196ed7c4 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Thu, 16 May 2024 06:50:20 -0500 Subject: [PATCH] Fix bug when rounding large numbers to floating point types (#54314) - fix #52355 using option 4 (round to nearest representable integer) - update docstrings *including documenting convert to Inf behavior even though Inf is not the "closest" floating point value* - add some assorted tests --------- Co-authored-by: mikmoore <95002244+mikmoore@users.noreply.github.com> --- base/essentials.jl | 5 +++-- base/float.jl | 13 +++++++++++++ base/rounding.jl | 13 ++++++++++++- test/rounding.jl | 12 ++++++++++++ 4 files changed, 40 insertions(+), 3 deletions(-) diff --git a/base/essentials.jl b/base/essentials.jl index 4e8a22fd229a9..ecf5744c21722 100644 --- a/base/essentials.jl +++ b/base/essentials.jl @@ -399,8 +399,9 @@ Stacktrace: [...] ``` -If `T` is a [`AbstractFloat`](@ref) type, -then it will return the closest value to `x` representable by `T`. +If `T` is a [`AbstractFloat`](@ref) type, then it will return the +closest value to `x` representable by `T`. Inf is treated as one +ulp greater than `floatmax(T)` for purposes of determining nearest. ```jldoctest julia> x = 1/3 diff --git a/base/float.jl b/base/float.jl index 64dcb8b807550..ff628f0ac7126 100644 --- a/base/float.jl +++ b/base/float.jl @@ -464,6 +464,19 @@ round(x::IEEEFloat, ::RoundingMode{:Down}) = floor_llvm(x) round(x::IEEEFloat, ::RoundingMode{:Up}) = ceil_llvm(x) round(x::IEEEFloat, ::RoundingMode{:Nearest}) = rint_llvm(x) +rounds_up(x, ::RoundingMode{:Down}) = false +rounds_up(x, ::RoundingMode{:Up}) = true +rounds_up(x, ::RoundingMode{:ToZero}) = signbit(x) +rounds_up(x, ::RoundingMode{:FromZero}) = !signbit(x) +function _round_convert(::Type{T}, x_integer, x, r::Union{RoundingMode{:ToZero}, RoundingMode{:FromZero}, RoundingMode{:Up}, RoundingMode{:Down}}) where {T<:AbstractFloat} + x_t = convert(T, x_integer) + if rounds_up(x, r) + x_t < x ? nextfloat(x_t) : x_t + else + x_t > x ? prevfloat(x_t) : x_t + end +end + ## floating point promotions ## promote_rule(::Type{Float32}, ::Type{Float16}) = Float32 promote_rule(::Type{Float64}, ::Type{Float16}) = Float64 diff --git a/base/rounding.jl b/base/rounding.jl index d80edda1e418f..98b4c30822245 100644 --- a/base/rounding.jl +++ b/base/rounding.jl @@ -338,6 +338,10 @@ The [`RoundingMode`](@ref) `r` controls the direction of the rounding; the defau of 0.5) being rounded to the nearest even integer. Note that `round` may give incorrect results if the global rounding mode is changed (see [`rounding`](@ref)). +When rounding to a floating point type, will round to integers representable by that type +(and Inf) rather than true integers. Inf is treated as one ulp greater than the +`floatmax(T)` for purposes of determining "nearest", similar to [`convert`](@ref). + # Examples ```jldoctest julia> round(1.7) @@ -363,6 +367,12 @@ julia> round(123.456; sigdigits=2) julia> round(357.913; sigdigits=4, base=2) 352.0 + +julia> round(Float16, typemax(UInt128)) +Inf16 + +julia> floor(Float16, typemax(UInt128)) +Float16(6.55e4) ``` !!! note @@ -466,6 +476,7 @@ floor(::Type{T}, x) where T = round(T, x, RoundDown) ceil(::Type{T}, x) where T = round(T, x, RoundUp) round(::Type{T}, x) where T = round(T, x, RoundNearest) -round(::Type{T}, x, r::RoundingMode) where T = convert(T, round(x, r)) +round(::Type{T}, x, r::RoundingMode) where T = _round_convert(T, round(x, r), x, r) +_round_convert(::Type{T}, x_integer, x, r) where T = convert(T, x_integer) round(x::Integer, r::RoundingMode) = x diff --git a/test/rounding.jl b/test/rounding.jl index 045c834e63013..76b15ec1d9118 100644 --- a/test/rounding.jl +++ b/test/rounding.jl @@ -458,3 +458,15 @@ end @test_throws InexactError round(Int128, -Inf16) # More comprehensive testing is present in test/floatfuncs.jl end + +@testset "floor(<:AbstractFloat, large_number) (#52355)" begin + @test floor(Float32, 0xffff_ffff) == prevfloat(2f0^32) <= 0xffff_ffff + @test trunc(Float16, typemax(UInt128)) == floatmax(Float16) + @test round(Float16, typemax(UInt128)) == Inf16 + for i in [-BigInt(floatmax(Float64)), -BigInt(floatmax(Float64))*100, BigInt(floatmax(Float64)), BigInt(floatmax(Float64))*100] + f = ceil(Float64, i) + @test f >= i + @test isinteger(f) || isinf(f) + @test prevfloat(f) < i + end +end