diff --git a/NEWS.md b/NEWS.md index e49a28fe0fd1b..32f5dba76ea3f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -58,6 +58,7 @@ Standard library changes constructing the range. ([#40382]) * TCP socket objects now expose `closewrite` functionality and support half-open mode usage ([#40783]). * Intersect returns a result with the eltype of the type-promoted eltypes of the two inputs ([#41769]). +* `Iterators.countfrom` now accepts any type that defines `+`. ([#37747]) #### InteractiveUtils * A new macro `@time_imports` for reporting any time spent importing packages and their dependencies ([#41612]) diff --git a/base/broadcast.jl b/base/broadcast.jl index af9a809e7c59b..d5daab992390c 100644 --- a/base/broadcast.jl +++ b/base/broadcast.jl @@ -1087,10 +1087,12 @@ broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::LinRange) = LinRange(-r.star # For #18336 we need to prevent promotion of the step type: broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::AbstractRange, x::Number) = range(first(r) + x, step=step(r), length=length(r)) broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Number, r::AbstractRange) = range(x + first(r), step=step(r), length=length(r)) -broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::OrdinalRange, x::Real) = range(first(r) + x, last(r) + x, step=step(r)) -broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Real, r::Real) = range(x + first(r), x + last(r), step=step(r)) -broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::AbstractUnitRange, x::Real) = range(first(r) + x, last(r) + x) -broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Real, r::AbstractUnitRange) = range(x + first(r), x + last(r)) +broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::OrdinalRange, x::Integer) = range(first(r) + x, last(r) + x, step=step(r)) +broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Integer, r::OrdinalRange) = range(x + first(r), x + last(r), step=step(r)) +broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::AbstractUnitRange, x::Integer) = range(first(r) + x, last(r) + x) +broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Integer, r::AbstractUnitRange) = range(x + first(r), x + last(r)) +broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::AbstractUnitRange, x::Real) = range(first(r) + x, length=length(r)) +broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Real, r::AbstractUnitRange) = range(x + first(r), length=length(r)) broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::StepRangeLen{T}, x::Number) where T = StepRangeLen{typeof(T(r.ref)+x)}(r.ref + x, r.step, length(r), r.offset) broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Number, r::StepRangeLen{T}) where T = @@ -1101,9 +1103,10 @@ broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r1::AbstractRange, r2::Abstract broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractRange, x::Number) = range(first(r) - x, step=step(r), length=length(r)) broadcasted(::DefaultArrayStyle{1}, ::typeof(-), x::Number, r::AbstractRange) = range(x - first(r), step=-step(r), length=length(r)) -broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::OrdinalRange, x::Real) = range(first(r) - x, last(r) - x, step=step(r)) -broadcasted(::DefaultArrayStyle{1}, ::typeof(-), x::Real, r::OrdinalRange) = range(x - first(r), x - last(r), step=-step(r)) -broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractUnitRange, x::Real) = range(first(r) - x, last(r) - x) +broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::OrdinalRange, x::Integer) = range(first(r) - x, last(r) - x, step=step(r)) +broadcasted(::DefaultArrayStyle{1}, ::typeof(-), x::Integer, r::OrdinalRange) = range(x - first(r), x - last(r), step=-step(r)) +broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractUnitRange, x::Integer) = range(first(r) - x, last(r) - x) +broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractUnitRange, x::Real) = range(first(r) - x, length=length(r)) broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::StepRangeLen{T}, x::Number) where T = StepRangeLen{typeof(T(r.ref)-x)}(r.ref - x, r.step, length(r), r.offset) broadcasted(::DefaultArrayStyle{1}, ::typeof(-), x::Number, r::StepRangeLen{T}) where T = diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index d3d62c9b2dfaa..6008526799ca2 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -1316,7 +1316,7 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}= compact.result[old_result_idx][:inst]), (compact.idx, active_bb) end -function maybe_erase_unused!(extra_worklist, compact, idx, callback = x->nothing) +function maybe_erase_unused!(extra_worklist::Vector{Int}, compact::IncrementalCompact, idx::Int, callback = x::SSAValue->nothing) stmt = compact.result[idx][:inst] stmt === nothing && return false if compact_exprtype(compact, SSAValue(idx)) === Bottom diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 07901f8c2f0a2..c61852125ca94 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -110,7 +110,8 @@ function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{I end end -function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#), pi_callback=(pi, idx)->false) +function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#), + callback = (@nospecialize(pi), @nospecialize(idx)) -> false) while true if isa(defssa, OldSSAValue) if already_inserted(compact, defssa) @@ -124,7 +125,7 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA end def = compact[defssa] if isa(def, PiNode) - if pi_callback(def, defssa) + if callback(def, defssa) return defssa end def = def.val @@ -135,7 +136,7 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA end defssa = def elseif isa(def, AnySSAValue) - pi_callback(def, defssa) + callback(def, defssa) if isa(def, SSAValue) is_old(compact, defssa) && (def = OldSSAValue(def.id)) end @@ -148,12 +149,15 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA end end -function simple_walk_constraint(compact::IncrementalCompact, @nospecialize(defidx), @nospecialize(typeconstraint) = types(compact)[defidx]) +function simple_walk_constraint(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#), + @nospecialize(typeconstraint) = types(compact)[defssa]) callback = function (@nospecialize(pi), @nospecialize(idx)) - isa(pi, PiNode) && (typeconstraint = typeintersect(typeconstraint, widenconst(pi.typ))) + if isa(pi, PiNode) + typeconstraint = typeintersect(typeconstraint, widenconst(pi.typ)) + end return false end - def = simple_walk(compact, defidx, callback) + def = simple_walk(compact, defssa, callback) return Pair{Any, Any}(def, typeconstraint) end @@ -273,8 +277,10 @@ function is_getfield_captures(@nospecialize(def), compact::IncrementalCompact) return oc ⊑ Core.OpaqueClosure end -function lift_leaves(compact::IncrementalCompact, @nospecialize(stmt), - @nospecialize(result_t), field::Int, leaves::Vector{Any}) +# try to compute lifted values that can replace `getfield(x, field)` call +# where `x` is an immutable struct that are defined at any of `leaves` +function lift_leaves(compact::IncrementalCompact, + @nospecialize(result_t), field::Int, leaves::Vector{Any}) # For every leaf, the lifted value lifted_leaves = IdDict{Any, Any}() maybe_undef = false @@ -396,13 +402,13 @@ function lift_leaves(compact::IncrementalCompact, @nospecialize(stmt), elseif isa(leaf, Union{Argument, Expr}) return nothing end - !ismutable(leaf) || return nothing + ismutable(leaf) && return nothing isdefined(leaf, field) || return nothing val = getfield(leaf, field) is_inlineable_constant(val) || return nothing lifted_leaves[leaf_key] = RefValue{Any}(quoted(val)) end - lifted_leaves, maybe_undef + return lifted_leaves, maybe_undef end make_MaybeUndef(@nospecialize(typ)) = isa(typ, MaybeUndef) ? typ : MaybeUndef(typ) @@ -415,13 +421,11 @@ function lift_comparison!(compact::IncrementalCompact, idx::Int, typeconstraint = widenconst(c2) val = stmt.args[3] else - cmp = c2 + cmp = c2::Const typeconstraint = widenconst(c1) val = stmt.args[2] end - is_type_only = isdefined(typeof(cmp), :instance) - if isa(val, Union{OldSSAValue, SSAValue}) val, typeconstraint = simple_walk_constraint(compact, val, typeconstraint) end @@ -497,7 +501,7 @@ function perform_lifting!(compact::IncrementalCompact, if is_old(compact, old_node_ssa) && isa(val, SSAValue) val = OldSSAValue(val.id) end - if isa(val, Union{NewSSAValue, SSAValue, OldSSAValue}) + if isa(val, AnySSAValue) val = simple_walk(compact, val) end if val in keys(lifted_leaves) @@ -508,11 +512,12 @@ function perform_lifting!(compact::IncrementalCompact, continue end lifted_val = lifted_val.x - if isa(lifted_val, Union{NewSSAValue, SSAValue, OldSSAValue}) - lifted_val = simple_walk(compact, lifted_val, (pi, idx)->true) + if isa(lifted_val, AnySSAValue) + callback = (@nospecialize(pi), @nospecialize(idx)) -> true + lifted_val = simple_walk(compact, lifted_val, callback) end push!(new_node.values, lifted_val) - elseif isa(val, Union{NewSSAValue, SSAValue, OldSSAValue}) && val in keys(reverse_mapping) + elseif isa(val, AnySSAValue) && val in keys(reverse_mapping) push!(new_node.edges, edge) push!(new_node.values, lifted_phis[reverse_mapping[val]].ssa) else @@ -532,14 +537,31 @@ function perform_lifting!(compact::IncrementalCompact, if stmt_val in keys(lifted_leaves) stmt_val = lifted_leaves[stmt_val] - elseif isa(stmt_val, Union{NewSSAValue, SSAValue, OldSSAValue}) && stmt_val in keys(reverse_mapping) + elseif isa(stmt_val, AnySSAValue) && stmt_val in keys(reverse_mapping) stmt_val = RefValue{Any}(lifted_phis[reverse_mapping[stmt_val]].ssa) end return stmt_val end -assertion_counter = 0 +""" + getfield_elim_pass!(ir::IRCode) -> newir::IRCode + +`getfield` elimination pass, a.k.a. Scalar Replacements of Aggregates optimization. + +This pass is based on a local alias analysis that collects field information by def-use chain walking. +It looks for struct allocation sites ("definitions"), and `getfield` calls as well as +`:foreigncall`s that preserve the structs ("usages"). If "definitions" have enough information, +then this pass will replace corresponding usages with lifted values. +`mutable struct`s require additional cares and need to be handled separately from immutables. +For `mutable struct`s, `setfield!` calls account for "definitions" also, and the pass should +give up the lifting conservatively when there are any "intermediate usages" that may escape +the mutable struct (e.g. non-inlined generic function call that takes the mutable struct as +its argument). + +In a case when all usages are fully eliminated, `struct` allocation may also be erased as +a result of dead code elimination. +""" function getfield_elim_pass!(ir::IRCode) compact = IncrementalCompact(ir) insertions = Vector{Any}() @@ -554,7 +576,6 @@ function getfield_elim_pass!(ir::IRCode) result_t = compact_exprtype(compact, SSAValue(idx)) is_getfield = is_setfield = false field_ordering = :unspecified - is_ccall = false # Step 1: Check whether the statement we're looking at is a getfield/setfield! if is_known_call(stmt, setfield!, compact) is_setfield = true @@ -610,8 +631,8 @@ function getfield_elim_pass!(ir::IRCode) old_preserves = stmt.args[(6+nccallargs):end] for (pidx, preserved_arg) in enumerate(old_preserves) isa(preserved_arg, SSAValue) || continue - let intermediaries = IdSet() - callback = function(@nospecialize(pi), ssa::AnySSAValue) + let intermediaries = IdSet{Int}() + callback = function (@nospecialize(pi), @nospecialize(ssa)) push!(intermediaries, ssa.id) return false end @@ -670,8 +691,8 @@ function getfield_elim_pass!(ir::IRCode) if ismutabletype(struct_typ) isa(def, SSAValue) || continue - let intermediaries = IdSet() - callback = function(@nospecialize(pi), ssa::AnySSAValue) + let intermediaries = IdSet{Int}() + callback = function (@nospecialize(pi), @nospecialize(ssa)) push!(intermediaries, ssa.id) return false end @@ -691,6 +712,8 @@ function getfield_elim_pass!(ir::IRCode) continue end + # perform SROA on immutable structs here on + if isa(def, Union{OldSSAValue, SSAValue}) def, typeconstraint = simple_walk_constraint(compact, def, typeconstraint) end @@ -703,7 +726,7 @@ function getfield_elim_pass!(ir::IRCode) field = try_compute_fieldidx(struct_typ, field) field === nothing && continue - r = lift_leaves(compact, stmt, result_t, field, leaves) + r = lift_leaves(compact, result_t, field, leaves) r === nothing && continue lifted_leaves, any_undef = r @@ -736,14 +759,13 @@ function getfield_elim_pass!(ir::IRCode) @assert val !== nothing end - global assertion_counter - assertion_counter::Int += 1 + # global assertion_counter + # assertion_counter::Int += 1 #insert_node_here!(compact, Expr(:assert_egal, Symbol(string("assert_egal_", assertion_counter)), SSAValue(idx), val), nothing, 0, true) #continue compact[idx] = val === nothing ? nothing : val.x end - non_dce_finish!(compact) # Copy the use count, `simple_dce!` may modify it and for our predicate # below we need it consistent with the state of the IR here (after tracking @@ -874,11 +896,12 @@ function getfield_elim_pass!(ir::IRCode) end ir end +# assertion_counter = 0 function adce_erase!(phi_uses::Vector{Int}, extra_worklist::Vector{Int}, compact::IncrementalCompact, idx::Int) # return whether this made a change if isa(compact.result[idx][:inst], PhiNode) - return maybe_erase_unused!(extra_worklist, compact, idx, val -> phi_uses[val.id] -= 1) + return maybe_erase_unused!(extra_worklist, compact, idx, val::SSAValue -> phi_uses[val.id] -= 1) else return maybe_erase_unused!(extra_worklist, compact, idx) end @@ -893,7 +916,7 @@ function count_uses(@nospecialize(stmt), uses::Vector{Int}) end end -function mark_phi_cycles(compact::IncrementalCompact, safe_phis::BitSet, phi::Int) +function mark_phi_cycles!(compact::IncrementalCompact, safe_phis::BitSet, phi::Int) worklist = Int[] push!(worklist, phi) while !isempty(worklist) @@ -909,6 +932,11 @@ function mark_phi_cycles(compact::IncrementalCompact, safe_phis::BitSet, phi::In end end +""" + adce_pass!(ir::IRCode) -> newir::IRCode + +Aggressive Dead Code Elimination pass to eliminate code. +""" function adce_pass!(ir::IRCode) phi_uses = fill(0, length(ir.stmts) + length(ir.new_nodes)) all_phis = Int[] @@ -940,7 +968,7 @@ function adce_pass!(ir::IRCode) for phi in all_phis # Save any phi cycles that have non-phi uses if compact.used_ssas[phi] - phi_uses[phi] != 0 - mark_phi_cycles(compact, safe_phis, phi) + mark_phi_cycles!(compact, safe_phis, phi) end end for phi in all_phis diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl index 653923ace6e8e..c7c34edd84308 100644 --- a/base/compiler/ssair/verify.jl +++ b/base/compiler/ssair/verify.jl @@ -202,6 +202,10 @@ function verify_ir(ir::IRCode, print::Bool=true) @verify_error "SSAValue as assignment LHS" error("") end + if stmt.args[2] isa GlobalRef + # undefined GlobalRef as assignment RHS is OK + continue + end elseif stmt.head === :gc_preserve_end # We allow gc_preserve_end tokens to span across try/catch # blocks, which isn't allowed for regular SSA values, so diff --git a/base/compiler/typelimits.jl b/base/compiler/typelimits.jl index 23045c65cc6bb..3be0965bcd834 100644 --- a/base/compiler/typelimits.jl +++ b/base/compiler/typelimits.jl @@ -377,26 +377,25 @@ function tmerge(@nospecialize(typea), @nospecialize(typeb)) return Bool end # type-lattice for Const and PartialStruct wrappers - if (isa(typea, PartialStruct) || isa(typea, Const)) && - (isa(typeb, PartialStruct) || isa(typeb, Const)) && - widenconst(typea) === widenconst(typeb) + if ((isa(typea, PartialStruct) || isa(typea, Const)) && + (isa(typeb, PartialStruct) || isa(typeb, Const)) && + widenconst(typea) === widenconst(typeb)) - typea_nfields = nfields_tfunc(typea) - typeb_nfields = nfields_tfunc(typeb) - if !isa(typea_nfields, Const) || !isa(typeb_nfields, Const) || typea_nfields.val !== typeb_nfields.val + typea_nfields = nfields_tfunc(typea) + typeb_nfields = nfields_tfunc(typeb) + if !isa(typea_nfields, Const) || !isa(typeb_nfields, Const) || typea_nfields.val !== typeb_nfields.val return widenconst(typea) - end + end - type_nfields = typea_nfields.val::Int - fields = Vector{Any}(undef, type_nfields) - anyconst = false - for i = 1:type_nfields + type_nfields = typea_nfields.val::Int + fields = Vector{Any}(undef, type_nfields) + anyconst = false + for i = 1:type_nfields fields[i] = tmerge(getfield_tfunc(typea, Const(i)), getfield_tfunc(typeb, Const(i))) anyconst |= has_nontrivial_const_info(fields[i]) - end - return anyconst ? PartialStruct(widenconst(typea), fields) : - widenconst(typea) + end + return anyconst ? PartialStruct(widenconst(typea), fields) : widenconst(typea) end if isa(typea, PartialOpaque) && isa(typeb, PartialOpaque) && widenconst(typea) == widenconst(typeb) if !(typea.source === typeb.source && diff --git a/base/iterators.jl b/base/iterators.jl index c0a68a36d836e..5116bd450021f 100644 --- a/base/iterators.jl +++ b/base/iterators.jl @@ -592,8 +592,8 @@ IteratorSize(::Type{<:Rest{I}}) where {I} = rest_iteratorsize(IteratorSize(I)) # Count -- infinite counting -struct Count{S<:Number} - start::S +struct Count{T,S} + start::T step::S end @@ -613,11 +613,13 @@ julia> for v in Iterators.countfrom(5, 2) 9 ``` """ -countfrom(start::Number, step::Number) = Count(promote(start, step)...) -countfrom(start::Number) = Count(start, oneunit(start)) -countfrom() = Count(1, 1) +countfrom(start::T, step::S) where {T,S} = Count{typeof(start+step),S}(start, step) +countfrom(start::Number, step::Number) = Count(promote(start, step)...) +countfrom(start) = Count(start, oneunit(start)) +countfrom() = Count(1, 1) -eltype(::Type{Count{S}}) where {S} = S + +eltype(::Type{Count{T,S}}) where {T,S} = T iterate(it::Count, state=it.start) = (state, state + it.step) diff --git a/base/math.jl b/base/math.jl index d6cbaf6068a3f..caedb02ec127f 100644 --- a/base/math.jl +++ b/base/math.jl @@ -880,11 +880,39 @@ function frexp(x::T) where T<:IEEEFloat return reinterpret(T, xu), k end -rem(x::Float64, y::Float64, ::RoundingMode{:Nearest}) = - ccall((:remainder, libm),Float64,(Float64,Float64),x,y) -rem(x::Float32, y::Float32, ::RoundingMode{:Nearest}) = - ccall((:remainderf, libm),Float32,(Float32,Float32),x,y) -rem(x::Float16, y::Float16, r::RoundingMode{:Nearest}) = Float16(rem(Float32(x), Float32(y), r)) +# NOTE: This `rem` method is adapted from the msun `remainder` and `remainderf` +# functions, which are under the following license: +# +# Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +# +# Developed at SunSoft, a Sun Microsystems, Inc. business. +# Permission to use, copy, modify, and distribute this +# software is freely granted, provided that this notice +# is preserved. +function rem(x::T, p::T, ::RoundingMode{:Nearest}) where T<:IEEEFloat + (iszero(p) || !isfinite(x) || isnan(p)) && return T(NaN) + x == p && return copysign(zero(T), x) + oldx = x + x = abs(rem(x, 2p)) # 2p may overflow but that's okay + p = abs(p) + if p < 2 * floatmin(T) # Check whether dividing p by 2 will underflow + if 2x > p + x -= p + if 2x >= p + x -= p + end + end + else + p_half = p / 2 + if x > p_half + x -= p + if x >= p_half + x -= p + end + end + end + return flipsign(x, oldx) +end """ diff --git a/src/ccall.cpp b/src/ccall.cpp index d8cc164f8ae18..53f6f6a50ec15 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -28,8 +28,12 @@ static bool runtime_sym_gvs(jl_codegen_params_t &emission_context, const char *f symMap = &emission_context.symMapExe; } else if ((intptr_t)f_lib == (intptr_t)JL_LIBJULIA_INTERNAL_DL_LIBNAME) { + libptrgv = prepare_global_in(M, jldlli_var); + symMap = &emission_context.symMapDlli; + } + else if ((intptr_t)f_lib == (intptr_t)JL_LIBJULIA_DL_LIBNAME) { libptrgv = prepare_global_in(M, jldll_var); - symMap = &emission_context.symMapDl; + symMap = &emission_context.symMapDll; } else #endif @@ -570,10 +574,22 @@ static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_va if (f_name != NULL) { // just symbol, default to JuliaDLHandle // will look in process symbol table + if (!llvmcall) { + void *symaddr; + std::string iname("i"); + iname += f_name; + if (jl_dlsym(jl_libjulia_internal_handle, iname.c_str(), &symaddr, 0)) { #ifdef _OS_WINDOWS_ - if (!llvmcall) - f_lib = jl_dlfind_win32(f_name); + f_lib = JL_LIBJULIA_INTERNAL_DL_LIBNAME; #endif + f_name = jl_symbol_name(jl_symbol(iname.c_str())); + } +#ifdef _OS_WINDOWS_ + else { + f_lib = jl_dlfind_win32(f_name); + } +#endif + } } else if (jl_is_cpointer_type(jl_typeof(ptr))) { fptr = *(void(**)(void))jl_data_ptr(ptr); @@ -1271,7 +1287,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) #ifdef _OS_WINDOWS_ if ((f_lib == JL_EXE_LIBNAME) || // preventing invalid pointer access (f_lib == JL_LIBJULIA_INTERNAL_DL_LIBNAME) || - (!strcmp(f_lib, JL_LIBJULIA_DL_LIBNAME)) || + (f_lib == JL_LIBJULIA_DL_LIBNAME) || (!strcmp(f_lib, jl_crtdll_basename))) { // libjulia-like } @@ -1283,7 +1299,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) } return f_name && f_name == name; }; -#define is_libjulia_func(name) _is_libjulia_func((uintptr_t)&(name), StringRef(#name)) +#define is_libjulia_func(name) _is_libjulia_func((uintptr_t)&(name), StringRef(XSTR(name))) // emit arguments jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nccallargs); diff --git a/src/codegen.cpp b/src/codegen.cpp index a675ebd71cfc5..af01b54a9352f 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -402,6 +402,11 @@ static const auto jlexe_var = new JuliaVariable{ [](LLVMContext &C) { return T_pint8; }, }; static const auto jldll_var = new JuliaVariable{ + XSTR(jl_libjulia_handle), + true, + [](LLVMContext &C) { return T_pint8; }, +}; +static const auto jldlli_var = new JuliaVariable{ XSTR(jl_libjulia_internal_handle), true, [](LLVMContext &C) { return T_pint8; }, @@ -8089,7 +8094,8 @@ static void init_jit_functions(void) add_named_global(jlRTLD_DEFAULT_var, &jl_RTLD_DEFAULT_handle); #ifdef _OS_WINDOWS_ add_named_global(jlexe_var, &jl_exe_handle); - add_named_global(jldll_var, &jl_libjulia_internal_handle); + add_named_global(jldll_var, &jl_libjulia_handle); + add_named_global(jldlli_var, &jl_libjulia_internal_handle); #endif global_jlvalue_to_llvm(new JuliaVariable{"jl_true", true, get_pjlvalue}, &jl_true); global_jlvalue_to_llvm(new JuliaVariable{"jl_false", true, get_pjlvalue}, &jl_false); @@ -8209,42 +8215,20 @@ extern "C" void jl_init_llvm(void) #endif // Parse command line flags after initialization - const char *const argv_tailmerge[] = {"", "-enable-tail-merge=0"}; // NOO TOUCHIE; NO TOUCH! See #922 - cl::ParseCommandLineOptions(sizeof(argv_tailmerge)/sizeof(argv_tailmerge[0]), argv_tailmerge, "disable-tail-merge\n"); -#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_) - const char *const argv_copyprop[] = {"", "-disable-copyprop"}; // llvm bug 21743 - cl::ParseCommandLineOptions(sizeof(argv_copyprop)/sizeof(argv_copyprop[0]), argv_copyprop, "disable-copyprop\n"); -#endif -#if defined(_CPU_X86_) || defined(_CPU_X86_64_) - const char *const argv_avoidsfb[] = {"", "-x86-disable-avoid-SFB"}; // llvm bug 41629, see https://gist.github.com/vtjnash/192cab72a6cfc00256ff118238163b55 - cl::ParseCommandLineOptions(sizeof(argv_avoidsfb)/sizeof(argv_avoidsfb[0]), argv_avoidsfb, "disable-avoidsfb\n"); -#endif -#if JL_LLVM_VERSION >= 120000 - // https://reviews.llvm.org/rGc068e9c8c123e7f8c8f3feb57245a012ccd09ccf - Optional envValue = sys::Process::GetEnv("JULIA_LLVM_ARGS"); - if (envValue) { - SmallVector newArgv; - BumpPtrAllocator A; - StringSaver Saver(A); - newArgv.push_back(Saver.save("Julia").data()); - - // Parse the value of the environment variable into a "command line" - // and hand it off to ParseCommandLineOptions(). - cl::TokenizeGNUCommandLine(*envValue, Saver, newArgv); - int newArgc = static_cast(newArgv.size()); - cl::ParseCommandLineOptions(newArgc, &newArgv[0]); - } -#else - cl::ParseEnvironmentOptions("Julia", "JULIA_LLVM_ARGS"); -#endif - + StringMap &llvmopts = cl::getRegisteredOptions(); + const char *const argv[1] = {"julia"}; + cl::ParseCommandLineOptions(1, argv, "", nullptr, "JULIA_LLVM_ARGS"); + + // Set preferred non-default options + cl::Option *clopt; + clopt = llvmopts.lookup("enable-tail-merge"); // NOO TOUCHIE; NO TOUCH! See #922 + if (clopt->getNumOccurrences() == 0) + cl::ProvidePositionalOption(clopt, "0", 1); // if the patch adding this option has been applied, lower its limit to provide // better DAGCombiner performance. - auto &clOptions = cl::getRegisteredOptions(); - if (clOptions.find("combiner-store-merge-dependence-limit") != clOptions.end()) { - const char *const argv_smdl[] = {"", "-combiner-store-merge-dependence-limit=4"}; - cl::ParseCommandLineOptions(sizeof(argv_smdl)/sizeof(argv_smdl[0]), argv_smdl); - } + clopt = llvmopts.lookup("combiner-store-merge-dependence-limit"); + if (clopt && clopt->getNumOccurrences() == 0) + cl::ProvidePositionalOption(clopt, "4", 1); TargetOptions options = TargetOptions(); //options.PrintMachineCode = true; //Print machine code produced during JIT compiling @@ -8346,6 +8330,8 @@ extern "C" void jl_init_llvm(void) if (jl_using_perf_jitevents) jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createPerfJITEventListener()); #endif + + cl::PrintOptionValues(); } extern "C" void jl_init_codegen(void) diff --git a/src/init.c b/src/init.c index 5fe2d388a40f3..49e61935100f8 100644 --- a/src/init.c +++ b/src/init.c @@ -653,15 +653,19 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) void *stack_lo, *stack_hi; jl_init_stack_limits(1, &stack_lo, &stack_hi); - // Load libjulia-internal (which contains this function), and libjulia, explicitly. jl_libjulia_internal_handle = jl_load_dynamic_library(NULL, JL_RTLD_DEFAULT, 1); - jl_libjulia_handle = jl_load_dynamic_library(JL_LIBJULIA_SONAME, JL_RTLD_DEFAULT, 1); #ifdef _OS_WINDOWS_ + jl_exe_handle = GetModuleHandleA(NULL); + jl_RTLD_DEFAULT_handle = jl_libjulia_internal_handle; + if (!GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, + (LPCWSTR)&jl_any_type, + (HMODULE*)&jl_libjulia_handle)) { + jl_error("could not load base module"); + } jl_ntdll_handle = jl_dlopen("ntdll.dll", 0); // bypass julia's pathchecking for system dlls jl_kernel32_handle = jl_dlopen("kernel32.dll", 0); jl_crtdll_handle = jl_dlopen(jl_crtdll_name, 0); jl_winsock_handle = jl_dlopen("ws2_32.dll", 0); - jl_exe_handle = GetModuleHandleA(NULL); JL_MUTEX_INIT(&jl_in_stackwalk); SymSetOptions(SYMOPT_UNDNAME | SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES | SYMOPT_IGNORE_CVREC); if (!SymInitialize(GetCurrentProcess(), "", 1)) { @@ -680,8 +684,7 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) #endif #endif -#if \ - defined(JL_USE_INTEL_JITEVENTS) || \ +#if defined(JL_USE_INTEL_JITEVENTS) || \ defined(JL_USE_OPROFILE_JITEVENTS) || \ defined(JL_USE_PERF_JITEVENTS) const char *jit_profiling = getenv("ENABLE_JITPROFILING"); diff --git a/src/jitlayers.h b/src/jitlayers.h index 1d3a334639d21..89aaa7c94fec1 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -63,7 +63,8 @@ typedef struct { StringMap> libMapGV; #ifdef _OS_WINDOWS_ SymMapGV symMapExe; - SymMapGV symMapDl; + SymMapGV symMapDll; + SymMapGV symMapDlli; #endif SymMapGV symMapDefault; // Map from distinct callee's to its GOT entry. diff --git a/src/julia_internal.h b/src/julia_internal.h index 7bc0233fa31a2..6153f3e48820f 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1113,12 +1113,11 @@ STATIC_INLINE uint64_t cong(uint64_t max, uint64_t unbias, uint64_t *seed) return *seed % max; } -// libuv stuff: -JL_DLLEXPORT extern void *jl_libjulia_handle; JL_DLLEXPORT extern void *jl_libjulia_internal_handle; JL_DLLEXPORT extern void *jl_RTLD_DEFAULT_handle; #if defined(_OS_WINDOWS_) JL_DLLEXPORT extern void *jl_exe_handle; +JL_DLLEXPORT extern void *jl_libjulia_handle; extern void *jl_ntdll_handle; extern void *jl_kernel32_handle; extern void *jl_crtdll_handle; @@ -1137,12 +1136,8 @@ JL_DLLEXPORT jl_value_t *jl_get_cfunction_trampoline( // Windows only #define JL_EXE_LIBNAME ((const char*)1) -#define JL_LIBJULIA_INTERNAL_DL_LIBNAME ((const char*)2) -#if defined(JL_DEBUG_BUILD) -#define JL_LIBJULIA_DL_LIBNAME "libjulia-debug" -#else -#define JL_LIBJULIA_DL_LIBNAME "libjulia" -#endif +#define JL_LIBJULIA_DL_LIBNAME ((const char*)2) +#define JL_LIBJULIA_INTERNAL_DL_LIBNAME ((const char*)3) const char *jl_dlfind_win32(const char *name); // libuv wrappers: diff --git a/src/runtime_ccall.cpp b/src/runtime_ccall.cpp index ebbc9668bf6a3..7d55090f19a65 100644 --- a/src/runtime_ccall.cpp +++ b/src/runtime_ccall.cpp @@ -36,7 +36,7 @@ void *jl_get_library_(const char *f_lib, int throw_err) return jl_exe_handle; if (f_lib == JL_LIBJULIA_INTERNAL_DL_LIBNAME) return jl_libjulia_internal_handle; - if (strcmp(f_lib, JL_LIBJULIA_DL_LIBNAME) == 0) + if (f_lib == JL_LIBJULIA_DL_LIBNAME) return jl_libjulia_handle; #endif JL_LOCK(&libmap_lock); diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl index 20e064fc333bb..ad36e98488745 100644 --- a/stdlib/LinearAlgebra/src/diagonal.jl +++ b/stdlib/LinearAlgebra/src/diagonal.jl @@ -366,12 +366,12 @@ function _rdiv!(Dc::Diagonal, Db::Diagonal, Da::Diagonal) end ldiv!(Dc::Diagonal, Da::Diagonal, Db::Diagonal) = Diagonal(ldiv!(Dc.diag, Da, Db.diag)) -# (l/r)mul!, l/rdiv!, *, / and \ Optimization for AbstractTriangular. +# Optimizations for [l/r]mul!, l/rdiv!, *, / and \ between AbstractTriangular and Diagonal. # These functions are generally more efficient if we calculate the whole data field. -# The following code implements them in a unified patten to avoid missing. -function _setdiag!(data, f, x, ys...) - for i in 1:length(x) - data[i,i] = f(map(arg -> arg[i], (x, ys...))...) +# The following code implements them in a unified pattern to avoid missing. +function _setdiag!(data, f, diag, diag′...) + for i in 1:length(diag) + data[i,i] = f(map(x -> x[i], (diag, diag′...))...) end data end @@ -387,23 +387,23 @@ for Tri in (:UpperTriangular, :LowerTriangular) @eval $fun(D::Diagonal, A::$UTri) = $Tri(_setdiag!($fun(D, A.data), $f, D.diag)) end # 3-arg ldiv! - @eval ldiv!(out::$Tri, D::Diagonal, A::$Tri) = $Tri(ldiv!(out.data, D, A.data)) - @eval ldiv!(out::$Tri, D::Diagonal, A::$UTri) = $Tri(_setdiag!(ldiv!(out.data, D, A.data), inv, D.diag)) - # 3-arg mul!: invoke 5-arg mul! rather than lmul! - @eval mul!(out::$Tri, A::Union{$Tri,$UTri}, D::Diagonal) = mul!(out, A, D, true, false) + @eval ldiv!(C::$Tri, D::Diagonal, A::$Tri) = $Tri(ldiv!(C.data, D, A.data)) + @eval ldiv!(C::$Tri, D::Diagonal, A::$UTri) = $Tri(_setdiag!(ldiv!(C.data, D, A.data), inv, D.diag)) + # 3-arg mul!: fallback to 5-arg mul! rather than lmul! + @eval mul!(C::$Tri, A::Union{$Tri,$UTri}, D::Diagonal) = mul!(C, A, D, true, false) # 5-arg mul! - @eval @inline mul!(out::$Tri, D::Diagonal, A::$Tri, α::Number, β::Number) = - $Tri(mul!(out.data, D, A.data, α, β)) - @eval @inline function mul!(out::$Tri, D::Diagonal, A::$UTri, α::Number, β::Number) - diag′ = iszero(β) ? D.diag : diag(out) - data = mul!(out.data, D, A.data, α, β) + @eval @inline mul!(C::$Tri, D::Diagonal, A::$Tri, α::Number, β::Number) = $Tri(mul!(C.data, D, A.data, α, β)) + @eval @inline function mul!(C::$Tri, D::Diagonal, A::$UTri, α::Number, β::Number) + iszero(α) && return _rmul_or_fill!(C, β) + diag′ = iszero(β) ? D.diag : diag(C) + data = mul!(C.data, D, A.data, α, β) $Tri(_setdiag!(data, MulAddMul(α, β), D.diag, diag′)) end - @eval @inline mul!(out::$Tri, A::$Tri, D::Diagonal, α::Number, β::Number) = - $Tri(mul!(out.data, A.data, D, α, β)) - @eval @inline function mul!(out::$Tri, A::$UTri, D::Diagonal, α::Number, β::Number) - diag′ = iszero(β) ? D.diag : diag(out) - data = mul!(out.data, A.data, D, α, β) + @eval @inline mul!(C::$Tri, A::$Tri, D::Diagonal, α::Number, β::Number) = $Tri(mul!(C.data, A.data, D, α, β)) + @eval @inline function mul!(C::$Tri, A::$UTri, D::Diagonal, α::Number, β::Number) + iszero(α) && return rmul!(C, β) + diag′ = iszero(β) ? D.diag : diag(C) + data = mul!(C.data, A.data, D, α, β) $Tri(_setdiag!(data, MulAddMul(α, β), D.diag, diag′)) end end diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl index 8d85b1a1cc5c8..6e06f12ef4fe0 100644 --- a/test/cmdlineargs.jl +++ b/test/cmdlineargs.jl @@ -93,6 +93,26 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no` @test v[2] == "1" @test isempty(v[3]) end + + let v = readchomperrors(setenv(`$exename -e 0`, "JULIA_LLVM_ARGS" => "-print-options", "HOME" => homedir())) + @test v[1] + @test contains(v[2], r"print-options + = 1") + @test contains(v[2], r"combiner-store-merge-dependence-limit + = 4") + @test contains(v[2], r"enable-tail-merge + = 2") + @test isempty(v[3]) + end + let v = readchomperrors(setenv(`$exename -e 0`, "JULIA_LLVM_ARGS" => "-print-options -enable-tail-merge=1 -combiner-store-merge-dependence-limit=6", "HOME" => homedir())) + @test v[1] + @test contains(v[2], r"print-options + = 1") + @test contains(v[2], r"combiner-store-merge-dependence-limit + = 6") + @test contains(v[2], r"enable-tail-merge + = 1") + @test isempty(v[3]) + end + let v = readchomperrors(setenv(`$exename -e 0`, "JULIA_LLVM_ARGS" => "-print-options -enable-tail-merge=1 -enable-tail-merge=1", "HOME" => homedir())) + @test !v[1] + @test isempty(v[2]) + @test v[3] == "julia: for the --enable-tail-merge option: may only occur zero or one times!" + end end let exename = `$(Base.julia_cmd()) --startup-file=no --color=no` diff --git a/test/iterators.jl b/test/iterators.jl index 86c325a85b617..b16b75291c270 100644 --- a/test/iterators.jl +++ b/test/iterators.jl @@ -3,6 +3,7 @@ using Base.Iterators using Random using Base: IdentityUnitRange +using Dates: Date, Day @test Base.IteratorSize(Any) isa Base.SizeUnknown @@ -123,7 +124,7 @@ end # countfrom # --------- -let i = 0, k = 1 +let i = 0, k = 1, l = 0 for j = countfrom(0, 2) @test j == i*2 i += 1 @@ -134,6 +135,15 @@ let i = 0, k = 1 k += 1 k <= 10 || break end + # test that `start` promotes to `typeof(start+step)` + for j = countfrom(Int[0, 0], Float64[1.0, 2.0]) + @test j isa Vector{Float64} + @test j == l*[1, 2] + l += 1 + l <= 10 || break + end + # test with `start` and `step` having different types + @test collect(take(countfrom(Date(2020,12,25), Day(1)), 12)) == range(Date(2020,12,25), step=Day(1), length=12) end # take diff --git a/test/numbers.jl b/test/numbers.jl index aae3b5cf970b1..cf1eea2fc7365 100644 --- a/test/numbers.jl +++ b/test/numbers.jl @@ -2523,6 +2523,17 @@ end @test rem(T(-1.5), T(2), RoundNearest) == 0.5 @test rem(T(-1.5), T(2), RoundDown) == 0.5 @test rem(T(-1.5), T(2), RoundUp) == -1.5 + for mode in [RoundToZero, RoundNearest, RoundDown, RoundUp] + @test isnan(rem(T(1), T(0), mode)) + @test isnan(rem(T(Inf), T(2), mode)) + @test isnan(rem(T(1), T(NaN), mode)) + # FIXME: The broken case erroneously returns -Inf + @test rem(T(4), floatmin(T) * 2, mode) == 0 broken=(T == BigFloat && mode == RoundUp) + end + @test isequal(rem(nextfloat(typemin(T)), T(2), RoundToZero), -0.0) + @test isequal(rem(nextfloat(typemin(T)), T(2), RoundNearest), -0.0) + @test isequal(rem(nextfloat(typemin(T)), T(2), RoundDown), 0.0) + @test isequal(rem(nextfloat(typemin(T)), T(2), RoundUp), 0.0) end @testset "rem for $T RoundNearest" for T in (Int8, Int16, Int32, Int64, Int128) diff --git a/test/ranges.jl b/test/ranges.jl index e46207cc1481c..121e5a97cb517 100644 --- a/test/ranges.jl +++ b/test/ranges.jl @@ -687,14 +687,14 @@ end end @testset "broadcasted operations with scalars" for T in (Int, UInt, Int128) @test broadcast(-, T(1):3, 2) === T(1)-2:1 - @test broadcast(-, T(1):3, 0.25) === T(1)-0.25:3-0.25 + @test broadcast(-, T(1):3, 0.25) === range(T(1)-0.25, length=T(3)) == T(1)-0.25:3-0.25 @test broadcast(+, T(1):3) === T(1):3 @test broadcast(+, T(1):3, 2) === T(3):5 - @test broadcast(+, T(1):3, 0.25) === T(1)+0.25:3+0.25 + @test broadcast(+, T(1):3, 0.25) === range(T(1)+0.25, length=T(3)) == T(1)+0.25:3+0.25 @test broadcast(+, T(1):2:6, 1) === T(2):2:6 - @test broadcast(+, T(1):2:6, 0.3) === T(1)+0.3:2:5+0.3 + @test broadcast(+, T(1):2:6, 0.3) === range(T(1)+0.3, step=2, length=T(3)) == T(1)+0.3:2:5+0.3 @test broadcast(-, T(1):2:6, 1) === T(0):2:4 - @test broadcast(-, T(1):2:6, 0.3) === T(1)-0.3:2:5-0.3 + @test broadcast(-, T(1):2:6, 0.3) === range(T(1)-0.3, step=2, length=T(3)) == T(1)-0.3:2:5-0.3 is_unsigned = T <: Unsigned is_unsigned && @test length(broadcast(-, T(1):3, 2)) === length(T(1)-2:T(3)-2) @test broadcast(-, T(1):3) == -T(1):-T(1):-T(3) @@ -1534,6 +1534,11 @@ end @test @inferred(x .\ r) === 0.5:0.5:2.5 @test @inferred(2 .* (r .+ 1) .+ 2) == 6:2:14 + + # issue #42291 + @test length((1:5) .- 1/7) == 5 + @test length((1:5) .+ -1/7) == 5 + @test length(-1/7 .+ (1:5)) == 5 end @testset "Bad range calls" begin