diff --git a/src/array/ArrayBenchmarks.jl b/src/array/ArrayBenchmarks.jl
index 6730046f..998c4e54 100644
--- a/src/array/ArrayBenchmarks.jl
+++ b/src/array/ArrayBenchmarks.jl
@@ -195,6 +195,8 @@ for s in (5, 500)
     g["catnd_setind", s] = @benchmarkable perf_catnd_setind($s)
 end
 
+g["4467"] = @benchmarkable perf_cat_4467()
+
 ############################
 # in-place growth (#13977) #
 ############################
diff --git a/src/array/cat.jl b/src/array/cat.jl
index fa56244a..b3f66e9b 100644
--- a/src/array/cat.jl
+++ b/src/array/cat.jl
@@ -58,3 +58,8 @@ function perf_catnd_setind(n)
     C[1, :, (3n+1):4n, 1] = A
     return C
 end
+
+function perf_cat_4467()
+    a, b, c, d, e = 1.0, 2.0, 3.0, 4.0, 5
+    return [a, b, c, d, e]
+end
diff --git a/src/io/IOBenchmarks.jl b/src/io/IOBenchmarks.jl
index 7ec44dbb..88448c4a 100644
--- a/src/io/IOBenchmarks.jl
+++ b/src/io/IOBenchmarks.jl
@@ -59,4 +59,27 @@ testdata_buf = serialized_buf(testdata)
 g["serialize", "Matrix{Float64}"] = @benchmarkable serialize(io, $testdata) setup=(io=IOBuffer())
 g["deserialize", "Matrix{Float64}"] = @benchmarkable (seek($testdata_buf, 0); deserialize($testdata_buf))
 
+function perf_skipchars_21109()
+    mktemp() do _, file
+        println(file, "G")
+        flush(file)
+        seek(file, 0)
+        @static if VERSION <= v"0.7.0-DEV.3495"
+            skipchars(file, islowercase)
+        else
+            skipchars(islowercase, file)
+        end
+
+        for i in 1:1000000
+            @static if VERSION <= v"0.7.0-DEV.3495"
+                skipchars(file, islowercase)
+            else
+                skipchars(islowercase, file)
+            end
+        end
+    end
+end
+
+SUITE["skipchars"] = @benchmarkable perf_skipchars_21109()
+
 end # module
diff --git a/src/misc/MiscellaneousBenchmarks.jl b/src/misc/MiscellaneousBenchmarks.jl
index 792c39d0..cf860602 100644
--- a/src/misc/MiscellaneousBenchmarks.jl
+++ b/src/misc/MiscellaneousBenchmarks.jl
@@ -122,4 +122,174 @@ g["macroexpand", "evalpoly"] = @benchmarkable macroexpand(@__MODULE__, $(Expr(:m
 
 ###########################################################################
 
+# Issue #12165
+
+struct FloatingPointDatatype
+    class::UInt8
+    bitfield1::UInt8
+    bitfield2::UInt8
+    bitfield3::UInt8
+    size::UInt32
+    bitoffset::UInt16
+    bitprecision::UInt16
+    exponentlocation::UInt8
+    exponentsize::UInt8
+    mantissalocation::UInt8
+    mantissasize::UInt8
+    exponentbias::UInt32
+end
+
+h5type(::Type{Float16}) =
+    FloatingPointDatatype(0x00, 0x20, 0x0f, 0x00, UInt32(2), 0x0000, UInt16(16), UInt8(10), 0x05, 0x00, UInt32(10), 0x0000000f)
+h5type(::Type{Float32}) =
+    FloatingPointDatatype(0x00, 0x20, 0x1f, 0x00, UInt32(4), 0x0000, UInt16(32), UInt8(23), 0x08, 0x00, UInt32(23), 0x0000007f)
+h5type(::Type{Float64}) =
+    FloatingPointDatatype(0x00, 0x20, 0x3f, 0x00, UInt32(8), 0x0000, UInt16(64), UInt8(52), 0x0b, 0x00, UInt32(52), 0x000003ff)
+
+struct UnsupportedFeatureException <: Exception end
+
+function jltype(dt::FloatingPointDatatype)
+    if dt == h5type(Float64)
+        return 64
+    elseif dt == h5type(Float32)
+        return 32
+    elseif dt == h5type(Float16)
+        return 16
+    else
+        throw(UnsupportedFeatureException())
+    end
+end
+
+x_16 = fill(h5type(Float16), 1000000)
+x_32 = fill(h5type(Float32), 1000000)
+x_64 = fill(h5type(Float64), 1000000)
+
+function perf_jltype(x)
+    y = 0
+    for i = 1:length(x)
+        y += jltype(x[i])
+    end
+    y
+end
+
+g = addgroup!(SUITE, "issue 12165")
+g["Float16"] = @benchmarkable perf_jltype($x_16)
+g["Float32"] = @benchmarkable perf_jltype($x_32)
+g["Float64"] = @benchmarkable perf_jltype($x_64)
+
+
+#########################################################################
+# issue #18129
+
+function perf_cheapest_insertion_18129(distmat::Matrix{T}, initpath::Vector{Int}) where {T<:Real}
+    check_square(distmat, "Distance matrix passed to cheapest_insertion must be square.")
+
+    n = size(distmat, 1)
+    path = copy(initpath)
+
+    # collect cities to visited
+    visitus = setdiff(collect(1:n), initpath)
+
+    # helper for insertion cost
+    # tour cost change for inserting node k after the node at index after in the path
+    function inscost(k, after)
+        return distmat[path[after], k] +
+              distmat[k, path[after + 1]] -
+              distmat[path[after], path[after + 1]]
+    end
+
+    counter = 0
+    while !isempty(visitus)
+        bestCost = Inf
+        bestInsertion = (-1, -1)
+        for k in visitus
+            for after in 1:(length(path) - 1) # can't insert after end of path
+                counter += 1
+                c = inscost(k, after)
+                if c < bestCost
+                    bestCost = c
+                    bestInsertion = (k, after)
+                end
+            end
+        end
+        # bestInsertion now holds (k, after)
+        # insert into path, remove from to-do list
+        k, after = bestInsertion
+        insert!(path, after + 1, k)
+        visitus = setdiff(visitus, k)
+    end
+
+    return (path, pathcost(distmat, path))
+end
+
+###
+# helpers
+###
+
+# make sure a passed distance matrix is a square
+function check_square(m, msg)
+    if size(m, 1) != size(m, 2)
+        error(msg)
+    end
+end
+
+# helper for readable one-line path costs
+# optionally specify the bounds for the subpath we want the cost of
+# defaults to the whole path
+# but when calculating reversed path costs can help to have subpath costs
+function pathcost(distmat::Matrix{T}, path::Vector{Int}, lb::Int = 1, ub::Int = length(path)) where {T<:Real}
+    cost = zero(T)
+    for i in lb:(ub - 1)
+        @inbounds cost += distmat[path[i], path[i+1]]
+    end
+    return cost
+end
+
+dm = samerand(Float64, 300, 300)
+SUITE["18129"] = @benchmarkable perf_cheapest_insertion_18129($dm, $([1, 1]))
+
+
+###############################################################################
+# issue #20517
+
+function perf_dsum_20517(A::Matrix)
+    z = zero(A[1,1])
+    n = size(A,1)
+    B = Vector{typeof(z)}(undef, n)
+
+    @inbounds for j in 1:n
+        B[j] = mapreduce(k -> A[j,k]*A[k,j], +, 1:j; init=z)
+    end
+    B
+end
+
+A = samerand(127,127)
+SUITE["20517"] = @benchmarkable perf_dsum_20517($A)
+
+
+###############################################
+# issue # 23042
+
+struct Foo_23042{T<:Number, A<:AbstractMatrix{T}}
+    data::A
+end
+
+Foo_23042(data::AbstractMatrix) = Foo_23042{eltype(data), typeof(data)}(data)
+
+
+function perf_copy_23042(a, b)
+    for i in 1:length(a.data)
+        @inbounds a.data[i] = b.data[i]
+    end
+    a
+end
+
+g = addgroup!(SUITE, "23042")
+
+for T in (Float32, Float64, Complex{Float32}, Complex{Float64})
+    b = samerand(T, 128, 128)
+    a = similar(b)
+    g[string(T)] = @benchmarkable perf_copy_23042($(Foo_23042(a)), $(Foo_23042(b)))
+end
+
 end
diff --git a/src/scalar/ScalarBenchmarks.jl b/src/scalar/ScalarBenchmarks.jl
index 6b101019..664aa892 100644
--- a/src/scalar/ScalarBenchmarks.jl
+++ b/src/scalar/ScalarBenchmarks.jl
@@ -83,6 +83,87 @@ for X in (INTS..., Char, Bool)
     end
 end
 
+
+function perf_13786(k::Float64,τ::Float64,twopiN::Float64)
+  W = getW(k,twopiN)
+  y = 1.00-k*τ
+  sqy = y>0.0 ? sqrt(y) : 0.0
+  tof::Float64 = sqy * (W*y + τ)
+  return tof
+end
+
+function getW(k::Float64,twopiN::Float64)
+     local sqrt2l= sqrt(2.0)
+     local K2c0  = 0.7542472332656508
+     local K2c1 = -0.2
+     local K2c2 = 0.08081220356417687
+     local K2c3 = -0.031746031746031744
+     local K2c4  = 0.012244273267299524
+     local K2c5 = -2.0/429.0
+     local K2c6 = 8.0 * sqrt(2.0)/6435.0
+     local K2c7 = -8.0/12155.0
+     local K2c8 = 8.0*sqrt(2.0)/46189.0
+     local k00 =0.3535533905932738
+     local k02  =3.00*k00*0.25
+     local k03  =-2.00/3.0
+     local k04  =15.0*sqrt(2.0)/128.0
+     local k05  =-2.0/5.0
+     local k06  =35.0*sqrt(2.0)/512.0
+     local k07  =-8/35.0
+     local k08  =315.0*sqrt(2.0)/8192.0
+     local twopi = 2.0*π
+
+    W = 0.0
+
+    @fastmath begin
+        if k<=-0.02
+          t2 = (k*k)
+          t3 = acos(t2 - 1.0)
+          t4 = 2.0 - t2
+          t6 = twopi - t3   + twopiN
+          t5 = 1.0/t4
+          W =   (t6 * sqrt(t5) - k)*t5
+        elseif (k>=0.02) & (k < (sqrt2l-0.02+twopiN))
+          t2 = (k*k)
+          t3 = acos(t2 - 1.00)
+          t4 = 2.00 - t2
+          t6 = (t3  + twopiN)
+          t5 = 1.00/t4
+          W =   (t6 * sqrt(t5) - k)*t5
+        elseif k<0.02 # then!series to k ~= 0 improve convergence
+          xt = twopiN+π
+          t2 = k*k
+          t3 = t2*k
+          t4 = t2*t2
+          t5 = t4*k
+          t6 = t3*t3
+          t7 = t5*t2
+          t8 = t4*t4
+          W =    xt*k00 - k + (xt*k02)*t2 + k03*t3 + (xt*k04)*t4 + k05*t5 + (xt*k06)*t6  + k07*t7 + (xt*k08)*t8
+        elseif (k> (sqrt2l-0.02) && k< (sqrt2l+0.02))
+          xt = k-sqrt2l
+          t2 = xt*xt
+          t3 = t2*xt
+          t4 = t3*xt
+          t5 = t4*xt
+          t6 = t5*xt
+          t7 = t6*xt
+          t8 = t7*xt
+          W = K2c0 + K2c1*k + K2c2*t2 + K2c3*t3 + K2c4*t4 + K2c5*t5 + K2c6*t6 + K2c7*t7 + K2c8*t8
+        elseif k >= (sqrt2l+0.02)
+          t7 = (k+1.0) * (k-1.00)
+          t3 =  log(t7 + sqrt(t7*t7-1.0))
+          t4 =  t7-1.0
+          t5 =  1.0/t4
+          W =   (-t3 * sqrt(t5) + k)*t5
+        end
+    end
+
+    return W
+end
+
+fstmth["13786"] = @benchmarkable perf_13786(0.4, 0.5, 0.0)
+
 #############
 # iteration #
 #############
diff --git a/src/tuple/TupleBenchmarks.jl b/src/tuple/TupleBenchmarks.jl
index 0423dbbf..bcf21e19 100644
--- a/src/tuple/TupleBenchmarks.jl
+++ b/src/tuple/TupleBenchmarks.jl
@@ -174,4 +174,22 @@ for (m, v) in zip((m2x2, m4x4, m8x8, m16x16), (v2, v4, v8, v16 ))
 end
 
 
+function _add( a::NTuple{4,Float32}, b::NTuple{4,Float32} )
+    (a[1]+b[1],a[2]+b[2],a[3]+b[3],a[4]+b[4])
+end
+
+function _mul( a::NTuple{4,Float32}, b::NTuple{4,Float32} )
+   (a[1]*b[1],a[2]*b[2],a[3]*b[3],a[4]*b[4])
+end
+
+function _madd( a::NTuple{4,Float32}, b::NTuple{4,Float32}, c::NTuple{4,Float32} )
+   _add(_mul(a,b),c)
+end
+
+perf_tuple_11899(t) = _madd(t, t, t)
+g = addgroup!(SUITE, "misc", ["tuple"])
+t = (Float32(1.0), Float32(2.0), Float32(3.0), Float32(4.0))
+g["11899"] = @benchmarkable perf_tuple_11899($t)
+
+
 end # module