Skip to content

Commit

Permalink
issubset: don't create a Set out of dicts (#32003)
Browse files Browse the repository at this point in the history
  • Loading branch information
rfourquet authored and StefanKarpinski committed Aug 21, 2019
1 parent 0ef07f8 commit 828a2ec
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 9 deletions.
30 changes: 21 additions & 9 deletions base/abstractset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -251,16 +251,15 @@ true
issubset, ,

function issubset(l, r)
if haslength(r)
rlen = length(r)
if isa(l, AbstractSet)
# check l for too many unique elements
length(l) > rlen && return false
if haslength(r) && (isa(l, AbstractSet) || !hasfastin(r))
rlen = length(r) # conditions above make this length computed only when needed
# check l for too many unique elements
if isa(l, AbstractSet) && length(l) > rlen
return false
end
# if r is big enough, convert it to a Set
# threshold empirically determined by repeatedly
# sampling using these two methods (see #26198)
if rlen > 70 && !isa(r, AbstractSet)
# when `in` would be too slow and r is big enough, convert it to a Set
# this threshold was empirically determined (cf. #26198)
if !hasfastin(r) && rlen > 70
return issubset(l, Set(r))
end
end
Expand All @@ -270,6 +269,19 @@ function issubset(l, r)
return true
end

"""
hasfastin(T)
Determine whether the computation `x ∈ collection` where `collection::T` can be considered
as a "fast" operation (typically constant or logarithmic complexity).
The definition `hasfastin(x) = hasfastin(typeof(x))` is provided for convenience so that instances
can be passed instead of types.
However the form that accepts a type argument should be defined for new types.
"""
hasfastin(::Type) = false
hasfastin(::Union{Type{<:AbstractSet},Type{<:AbstractDict},Type{<:AbstractRange}}) = true
hasfastin(x) = hasfastin(typeof(x))

(l, r) = r l

## strict subset comparison
Expand Down
18 changes: 18 additions & 0 deletions test/sets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,24 @@ end
# symdiff must NOT uniquify
@test symdiff([1, 2, 1]) == symdiff!([1, 2, 1]) == [2]
@test symdiff([1, 2, 1], [2, 2]) == symdiff!([1, 2, 1], [2, 2]) == [2]

# Base.hasfastin
@test all(Base.hasfastin, Any[Dict(1=>2), Set(1), BitSet(1), 1:9, 1:2:9,
Dict, Set, BitSet, UnitRange, StepRange])
@test !any(Base.hasfastin, Any[[1, 2, 3], "123",
Array, String])

# tests for Dict
d1 = Dict(1=>nothing, 2=>nothing)
d2 = Dict(1=>nothing, 3=>nothing)
d3 = Dict(1=>nothing, 2=>nothing, 3=>nothing)
@test d3 == merge(d1, d2)
@test !issubset(d1, d2)
@test !issubset(d2, d1)
@test !issubset(d3, d1)
@test !issubset(d3, d2)
@test issubset(d1, d3)
@test issubset(d2, d3)
end

@testset "unique" begin
Expand Down

2 comments on commit 828a2ec

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily benchmark build, I will reply here when finished:

@nanosoldier runbenchmarks(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your benchmark job has completed - possible performance regressions were detected. A full report can be found here. cc @ararslan

Please sign in to comment.