From c6d44a31079264235fb8e72078c1a07301f76fcb Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Tue, 28 May 2024 18:49:14 -0400 Subject: [PATCH] fix double-counting and non-deterministic results in `summarysize` fixes #53061 Co-authored-by: Orestis Ousoultzoglou --- base/reflection.jl | 15 ++++++++++++--- base/summarysize.jl | 32 ++++++++++++++++++++++++-------- src/datatype.c | 33 +++++++++++++++++++++++++++++++++ test/misc.jl | 17 +++++++++++++++++ 4 files changed, 86 insertions(+), 11 deletions(-) diff --git a/base/reflection.jl b/base/reflection.jl index 3b6e69801aa137..029a481e1686ca 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -529,6 +529,17 @@ function datatype_nfields(dt::DataType) return unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).nfields end +""" + Base.datatype_npointers(dt::DataType) -> Int + +Return the number of pointers in the layout of a datatype. +""" +function datatype_npointers(dt::DataType) + @_foldable_meta + dt.layout == C_NULL && throw(UndefRefError()) + return unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).npointers +end + """ Base.datatype_pointerfree(dt::DataType) -> Bool @@ -537,9 +548,7 @@ Can be called on any `isconcretetype`. """ function datatype_pointerfree(dt::DataType) @_foldable_meta - dt.layout == C_NULL && throw(UndefRefError()) - npointers = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).npointers - return npointers == 0 + return datatype_npointers(dt) == 0 end """ diff --git a/base/summarysize.jl b/base/summarysize.jl index 25058247680995..408e50933b4b8d 100644 --- a/base/summarysize.jl +++ b/base/summarysize.jl @@ -8,6 +8,9 @@ struct SummarySize chargeall::Any end +nth_pointer_isdefined(obj, i::Int) = ccall(:jl_nth_pointer_isdefined, Cint, (Any, Csize_t), obj, i-1) != 0 +get_nth_pointer(obj, i::Int) = ccall(:jl_get_nth_pointer, Any, (Any, Csize_t), obj, i-1) + """ Base.summarysize(obj; exclude=Union{...}, chargeall=Union{...}) -> Int @@ -50,15 +53,28 @@ function summarysize(obj; val = x[i] end elseif isa(x, GenericMemory) - nf = length(x) - if @inbounds @inline isassigned(x, i) - val = x[i] + T = eltype(x) + if Base.allocatedinline(T) + np = datatype_npointers(T) + nf = length(x) * np + idx = (i-1) รท np + 1 + if @inbounds @inline isassigned(x, idx) + elt = x[idx] + p = (i-1) % np + 1 + if nth_pointer_isdefined(elt, p) + val = get_nth_pointer(elt, p) + end + end + else + nf = length(x) + if @inbounds @inline isassigned(x, i) + val = x[i] + end end else - nf = nfields(x) - ft = typeof(x).types - if !isbitstype(ft[i]) && isdefined(x, i) - val = getfield(x, i) + nf = datatype_npointers(typeof(x)) + if nth_pointer_isdefined(x, i) + val = get_nth_pointer(x, i) end end if nf > i @@ -82,7 +98,7 @@ end # and so is somewhat approximate. key = ccall(:jl_value_ptr, Ptr{Cvoid}, (Any,), obj) haskey(ss.seen, key) ? (return 0) : (ss.seen[key] = true) - if nfields(obj) > 0 + if datatype_npointers(typeof(obj)) > 0 push!(ss.frontier_x, obj) push!(ss.frontier_i, 1) end diff --git a/src/datatype.c b/src/datatype.c index abbec420bb617c..00dd60fcf234b5 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -2253,6 +2253,39 @@ JL_DLLEXPORT size_t jl_get_field_offset(jl_datatype_t *ty, int field) return jl_field_offset(ty, field - 1); } +jl_value_t *get_nth_pointer(jl_value_t *v, size_t i) +{ + jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(v); + const jl_datatype_layout_t *ly = dt->layout; + uint32_t npointers = ly->npointers; + if (i >= npointers) + jl_bounds_error_int(v, i); + const uint8_t *ptrs8 = (const uint8_t *)jl_dt_layout_ptrs(ly); + const uint16_t *ptrs16 = (const uint16_t *)jl_dt_layout_ptrs(ly); + const uint32_t *ptrs32 = (const uint32_t*)jl_dt_layout_ptrs(ly); + uint32_t fld; + if (ly->flags.fielddesc_type == 0) + fld = ptrs8[i]; + else if (ly->flags.fielddesc_type == 1) + fld = ptrs16[i]; + else + fld = ptrs32[i]; + return ((jl_value_t**)v)[fld]; +} + +JL_DLLEXPORT jl_value_t *jl_get_nth_pointer(jl_value_t *v, size_t i) +{ + jl_value_t *ptrf = get_nth_pointer(v, i); + if (__unlikely(ptrf == NULL)) + jl_throw(jl_undefref_exception); + return ptrf; +} + +JL_DLLEXPORT int jl_nth_pointer_isdefined(jl_value_t *v, size_t i) +{ + return get_nth_pointer(v, i) != NULL; +} + #ifdef __cplusplus } #endif diff --git a/test/misc.jl b/test/misc.jl index 6c2f0f89135fd7..8622cf9bf1d335 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -573,6 +573,23 @@ end # issue #44780 @test summarysize(BigInt(2)^1000) > summarysize(BigInt(2)) +# issue #53061 +mutable struct S53061 + x::Union{Float64, Tuple{Float64, Float64}} + y::Union{Float64, Tuple{Float64, Float64}} +end +let s = S53061[S53061(rand(), (rand(),rand())) for _ in 1:10^4] + @test allequal(summarysize(s) for i in 1:10) +end +struct Z53061 + x::S53061 + y::Int64 +end +let z = Z53061[Z53061(S53061(rand(), (rand(),rand())), 0) for _ in 1:10^4] + @test allequal(summarysize(z) for i in 1:10) + @test abs(summarysize(z) - 640000)/640000 <= 0.01 +end + ## test conversion from UTF-8 to UTF-16 (for Windows APIs) # empty arrays