Skip to content

Commit

Permalink
fix double-counting and non-deterministic results in summarysize
Browse files Browse the repository at this point in the history
fixes #53061

Co-authored-by: Orestis Ousoultzoglou <[email protected]>
  • Loading branch information
JeffBezanson and xlxs4 committed May 29, 2024
1 parent 196dec6 commit c6d44a3
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 11 deletions.
15 changes: 12 additions & 3 deletions base/reflection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,17 @@ function datatype_nfields(dt::DataType)
return unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).nfields
end

"""
Base.datatype_npointers(dt::DataType) -> Int
Return the number of pointers in the layout of a datatype.
"""
function datatype_npointers(dt::DataType)
@_foldable_meta
dt.layout == C_NULL && throw(UndefRefError())
return unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).npointers
end

"""
Base.datatype_pointerfree(dt::DataType) -> Bool
Expand All @@ -537,9 +548,7 @@ Can be called on any `isconcretetype`.
"""
function datatype_pointerfree(dt::DataType)
@_foldable_meta
dt.layout == C_NULL && throw(UndefRefError())
npointers = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).npointers
return npointers == 0
return datatype_npointers(dt) == 0
end

"""
Expand Down
32 changes: 24 additions & 8 deletions base/summarysize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ struct SummarySize
chargeall::Any
end

nth_pointer_isdefined(obj, i::Int) = ccall(:jl_nth_pointer_isdefined, Cint, (Any, Csize_t), obj, i-1) != 0
get_nth_pointer(obj, i::Int) = ccall(:jl_get_nth_pointer, Any, (Any, Csize_t), obj, i-1)

"""
Base.summarysize(obj; exclude=Union{...}, chargeall=Union{...}) -> Int
Expand Down Expand Up @@ -50,15 +53,28 @@ function summarysize(obj;
val = x[i]
end
elseif isa(x, GenericMemory)
nf = length(x)
if @inbounds @inline isassigned(x, i)
val = x[i]
T = eltype(x)
if Base.allocatedinline(T)
np = datatype_npointers(T)
nf = length(x) * np
idx = (i-1) ÷ np + 1
if @inbounds @inline isassigned(x, idx)
elt = x[idx]
p = (i-1) % np + 1
if nth_pointer_isdefined(elt, p)
val = get_nth_pointer(elt, p)
end
end
else
nf = length(x)
if @inbounds @inline isassigned(x, i)
val = x[i]
end
end
else
nf = nfields(x)
ft = typeof(x).types
if !isbitstype(ft[i]) && isdefined(x, i)
val = getfield(x, i)
nf = datatype_npointers(typeof(x))
if nth_pointer_isdefined(x, i)
val = get_nth_pointer(x, i)
end
end
if nf > i
Expand All @@ -82,7 +98,7 @@ end
# and so is somewhat approximate.
key = ccall(:jl_value_ptr, Ptr{Cvoid}, (Any,), obj)
haskey(ss.seen, key) ? (return 0) : (ss.seen[key] = true)
if nfields(obj) > 0
if datatype_npointers(typeof(obj)) > 0
push!(ss.frontier_x, obj)
push!(ss.frontier_i, 1)
end
Expand Down
33 changes: 33 additions & 0 deletions src/datatype.c
Original file line number Diff line number Diff line change
Expand Up @@ -2253,6 +2253,39 @@ JL_DLLEXPORT size_t jl_get_field_offset(jl_datatype_t *ty, int field)
return jl_field_offset(ty, field - 1);
}

jl_value_t *get_nth_pointer(jl_value_t *v, size_t i)
{
jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(v);
const jl_datatype_layout_t *ly = dt->layout;
uint32_t npointers = ly->npointers;
if (i >= npointers)
jl_bounds_error_int(v, i);
const uint8_t *ptrs8 = (const uint8_t *)jl_dt_layout_ptrs(ly);
const uint16_t *ptrs16 = (const uint16_t *)jl_dt_layout_ptrs(ly);
const uint32_t *ptrs32 = (const uint32_t*)jl_dt_layout_ptrs(ly);
uint32_t fld;
if (ly->flags.fielddesc_type == 0)
fld = ptrs8[i];
else if (ly->flags.fielddesc_type == 1)
fld = ptrs16[i];
else
fld = ptrs32[i];
return ((jl_value_t**)v)[fld];
}

JL_DLLEXPORT jl_value_t *jl_get_nth_pointer(jl_value_t *v, size_t i)
{
jl_value_t *ptrf = get_nth_pointer(v, i);
if (__unlikely(ptrf == NULL))
jl_throw(jl_undefref_exception);
return ptrf;
}

JL_DLLEXPORT int jl_nth_pointer_isdefined(jl_value_t *v, size_t i)
{
return get_nth_pointer(v, i) != NULL;
}

#ifdef __cplusplus
}
#endif
17 changes: 17 additions & 0 deletions test/misc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,23 @@ end
# issue #44780
@test summarysize(BigInt(2)^1000) > summarysize(BigInt(2))

# issue #53061
mutable struct S53061
x::Union{Float64, Tuple{Float64, Float64}}
y::Union{Float64, Tuple{Float64, Float64}}
end
let s = S53061[S53061(rand(), (rand(),rand())) for _ in 1:10^4]
@test allequal(summarysize(s) for i in 1:10)
end
struct Z53061
x::S53061
y::Int64
end
let z = Z53061[Z53061(S53061(rand(), (rand(),rand())), 0) for _ in 1:10^4]
@test allequal(summarysize(z) for i in 1:10)
@test abs(summarysize(z) - 640000)/640000 <= 0.01
end

## test conversion from UTF-8 to UTF-16 (for Windows APIs)

# empty arrays
Expand Down

0 comments on commit c6d44a3

Please sign in to comment.