JuliaData · bkamins · Sep 19, 2022 · Sep 13, 2022 · Sep 13, 2022 · Sep 13, 2022
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -12,7 +12,6 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.0'
           - '1.6'
           - '1' # automatically expands to the latest stable 1.x release of Julia
           - 'nightly'

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,10 @@
 # DataFrames.jl v1.4 Release Notes
 
+## Julia compatibility change
+
+* DataFrames.jl 1.4 requires Julia 1.6
+  ([#3145](https://github.com/JuliaData/DataFrames.jl/pull/3145))
+
 ## New functionalities
 
 * `subset` and `subset!` now allow passing zero column selectors

diff --git a/Project.toml b/Project.toml
@@ -25,7 +25,7 @@ Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
 
 [compat]
 CategoricalArrays = "0.10.0"
-Compat = "3.46, 4.2"
+Compat = "4.2"
 DataAPI = "1.10"
 InvertedIndices = "1"
 IteratorInterfaceExtensions = "0.1.1, 1"
@@ -38,7 +38,7 @@ SortingAlgorithms = "0.1, 0.2, 0.3, 1"
 TableTraits = "0.4, 1"
 Tables = "1.8.1"
 Unitful = "1"
-julia = "1"
+julia = "1.6"
 
 [extras]
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"

diff --git a/src/DataFrames.jl b/src/DataFrames.jl
@@ -87,23 +87,8 @@ export AbstractDataFrame,
        unstack,
        valuecols
 
-if VERSION >= v"1.1.0-DEV.792"
-    import Base.eachcol, Base.eachrow
-else
-    import Compat.eachcol, Compat.eachrow
-    export eachcol, eachrow
-end
-
-if VERSION < v"1.2"
-    export hasproperty
-end
-
-if isdefined(Base, :only)  # Introduced in 1.4.0
-    import Base.only
-else
-    import Compat.only
-    export only
-end
+using Base.Threads: @spawn
+using Base: ComposedFunction
 
 if isdefined(Base, :keepat!)  # Introduced in 1.7.0
     import Base.keepat!
@@ -112,36 +97,6 @@ else
     export keepat!
 end
 
-if isdefined(Base, :popat!)  # Introduced in 1.5.0
-    import Base.popat!
-else
-    import Compat.popat!
-    export popat!
-end
-
-if VERSION >= v"1.3"
-    using Base.Threads: @spawn
-else
-    # This is the definition of @async in Base
-    macro spawn(expr)
-        thunk = esc(:(()->($expr)))
-        var = esc(Base.sync_varname)
-        quote
-            local task = Task($thunk)
-            if $(Expr(:isdefined, var))
-                push!($var, task)
-            end
-            schedule(task)
-        end
-    end
-end
-
-if isdefined(Base, :ComposedFunction) # Julia >= 1.6.0-DEV.85
-    using Base: ComposedFunction
-else
-    using Compat: ComposedFunction
-end
-
 if VERSION >= v"1.9.0-DEV.1163"
     import Base: stack
 else

diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
@@ -382,10 +382,6 @@ Return `true` if data frame `df` has zero rows, and `false` otherwise.
 """
 Base.isempty(df::AbstractDataFrame) = nrow(df) == 0
 
-if VERSION < v"1.6"
-    Base.firstindex(df::AbstractDataFrame, i::Integer) = first(axes(df, i))
-    Base.lastindex(df::AbstractDataFrame, i::Integer) = last(axes(df, i))
-end
 Base.axes(df::AbstractDataFrame, i::Integer) = Base.OneTo(size(df, i))
 
 """
@@ -496,7 +492,7 @@ end
 
 If `df` has a single row return it as a `DataFrameRow`; otherwise throw `ArgumentError`.
 """
-function only(df::AbstractDataFrame)
+function Base.only(df::AbstractDataFrame)
     nrow(df) != 1 && throw(ArgumentError("data frame must contain exactly 1 row"))
     return df[1, :]
 end

diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl
@@ -72,7 +72,7 @@ julia> eachrow(view(df, [4, 3], [2, 1]))
    2 │    13      3
 ```
 """
-eachrow(df::AbstractDataFrame) = DataFrameRows(df)
+Base.eachrow(df::AbstractDataFrame) = DataFrameRows(df)
 
 Base.IndexStyle(::Type{<:DataFrameRows}) = Base.IndexLinear()
 Base.size(itr::DataFrameRows) = (size(parent(itr), 1), )
@@ -172,7 +172,7 @@ julia> sum.(eachcol(df))
  50
 ```
 """
-eachcol(df::AbstractDataFrame) = DataFrameColumns(df)
+Base.eachcol(df::AbstractDataFrame) = DataFrameColumns(df)
 
 Base.IteratorSize(::Type{<:DataFrameColumns}) = Base.HasShape{1}()
 Base.size(itr::DataFrameColumns) = (size(parent(itr), 2),)
@@ -191,10 +191,6 @@ Base.eltype(::Type{<:DataFrameColumns}) = AbstractVector
 Base.firstindex(itr::DataFrameColumns) = 1
 Base.lastindex(itr::DataFrameColumns) = length(itr)
 
-if VERSION < v"1.6"
-    Base.firstindex(itr::DataFrameColumns, i::Integer) = first(axes(itr, i))
-    Base.lastindex(itr::DataFrameColumns, i::Integer) = last(axes(itr, i))
-end
 Base.axes(itr::DataFrameColumns, i::Integer) = Base.OneTo(size(itr, i))
 
 Base.iterate(itr::DataFrameColumns, i::Integer=1) =

diff --git a/src/abstractdataframe/show.jl b/src/abstractdataframe/show.jl
@@ -59,15 +59,6 @@ function ourshow(io::IO, x::Markdown.MD, truncstring::Int)
     return print(io, len < length(r) - 1 ? first(r, len)*'…' : first(r, len))
 end
 
-# AbstractChar: https://github.com/JuliaLang/julia/pull/34730 (1.5.0-DEV.261)
-# Irrational: https://github.com/JuliaLang/julia/pull/34741 (1.5.0-DEV.266)
-if VERSION < v"1.5.0-DEV.261" || VERSION < v"1.5.0-DEV.266"
-    function ourshow(io::IO, x::T, truncstring::Int) where T <: Union{AbstractChar, Irrational}
-        io = IOContext(io, :compact=>get(io, :compact, true), :typeinfo=>typeof(x))
-        show(io, x)
-    end
-end
-
 # For most data frames, especially wide, columns having the same element type
 # occur multiple times. batch_compacttype ensures that we compute string
 # representation of a specific column element type only once and then reuse it.

diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl
@@ -198,15 +198,9 @@ struct DataFrame <: AbstractDataFrame
 
         # we write into columns as we know that it is guaranteed
         # that it was freshly allocated in the outer constructor
-        @static if VERSION >= v"1.4"
-            if copycols && len >= 1_000_000 && length(columns) > 1 && Threads.nthreads() > 1
-                @sync for i in eachindex(columns)
-                    Threads.@spawn columns[i] = _preprocess_column(columns[i], len, copycols)
-                end
-            else
-                for i in eachindex(columns)
-                    columns[i] = _preprocess_column(columns[i], len, copycols)
-                end
+        if copycols && len >= 1_000_000 && length(columns) > 1 && Threads.nthreads() > 1
+            @sync for i in eachindex(columns)
+                Threads.@spawn columns[i] = _preprocess_column(columns[i], len, copycols)
             end
         else
             for i in eachindex(columns)
@@ -546,20 +540,15 @@ function _threaded_getindex(selected_rows::AbstractVector,
                             selected_columns::AbstractVector,
                             df_columns::AbstractVector,
                             idx::AbstractIndex)
-    @static if VERSION >= v"1.4"
-        if length(selected_rows) >= 1_000_000 && Threads.nthreads() > 1
-            new_columns = Vector{AbstractVector}(undef, length(selected_columns))
-            @sync for i in eachindex(new_columns)
-                Threads.@spawn new_columns[i] = df_columns[selected_columns[i]][selected_rows]
-            end
-            return DataFrame(new_columns, idx, copycols=false)
-        else
-            return DataFrame(AbstractVector[df_columns[i][selected_rows] for i in selected_columns],
-                             idx, copycols=false)
+    if length(selected_rows) >= 1_000_000 && Threads.nthreads() > 1
+        new_columns = Vector{AbstractVector}(undef, length(selected_columns))
+        @sync for i in eachindex(new_columns)
+            Threads.@spawn new_columns[i] = df_columns[selected_columns[i]][selected_rows]
         end
+        return DataFrame(new_columns, idx, copycols=false)
     else
         return DataFrame(AbstractVector[df_columns[i][selected_rows] for i in selected_columns],
-                         idx, copycols=false)
+                            idx, copycols=false)
     end
 end
 
@@ -1109,7 +1098,7 @@ julia> df
    2 │     3      6
 ```
 """
-function popat!(df::DataFrame, i::Integer)
+function Base.popat!(df::DataFrame, i::Integer)
     i isa Bool && throw(ArgumentError("Invalid index of type Bool"))
     nt = NamedTuple(df[i, :])
     deleteat!(df, i)

diff --git a/src/dataframerow/dataframerow.jl b/src/dataframerow/dataframerow.jl
@@ -385,10 +385,6 @@ Base.ndims(::Type{<:DataFrameRow}) = 1
 Base.firstindex(r::DataFrameRow) = 1
 Base.lastindex(r::DataFrameRow) = length(r)
 
-if VERSION < v"1.6"
-    Base.firstindex(r::DataFrameRow, i::Integer) = first(axes(r, i))
-    Base.lastindex(r::DataFrameRow, i::Integer) = last(axes(r, i))
-end
 Base.axes(r::DataFrameRow, i::Integer) = Base.OneTo(size(r, i))
 
 Base.iterate(r::DataFrameRow) = iterate(r, 1)

diff --git a/src/groupeddataframe/complextransforms.jl b/src/groupeddataframe/complextransforms.jl
@@ -264,12 +264,8 @@ function _combine_rows_with_first!((firstrow,)::Ref{Any},
     # Create up to one task per thread
     # This has lower overhead than creating one task per group,
     # but is optimal only if operations take roughly the same time for all groups
-    if VERSION >= v"1.4" && threads && isthreadsafe(outcols, incols)
-        basesize = max(1, cld(len - 1, Threads.nthreads()))
-        partitions = Iterators.partition(2:len, basesize)
-    else
-        partitions = (2:len,)
-    end
+    basesize = max(1, cld(len - 1, Threads.nthreads()))
+    partitions = Iterators.partition(2:len, basesize)
     widen_type_lock = ReentrantLock()
     outcolsref = Ref{NTuple{<:Any, AbstractVector}}(outcols)
     type_widened = fill(false, length(partitions))
@@ -320,16 +316,9 @@ end
 
 # This needs to be in a separate function
 # to work around a crash due to JuliaLang/julia#29430
-if VERSION >= v"1.1.0-DEV.723"
-    @inline function do_append!(do_it, col, vals)
-        do_it && append!(col, vals)
-        return do_it
-    end
-else
-    @noinline function do_append!(do_it, col, vals)
-        do_it && append!(col, vals)
-        return do_it
-    end
+@inline function do_append!(do_it, col, vals)
+    do_it && append!(col, vals)
+    return do_it
 end
 
 _get_col(rows::AbstractDataFrame, j::Int) = rows[!, j]

diff --git a/src/groupeddataframe/fastaggregates.jl b/src/groupeddataframe/fastaggregates.jl
@@ -162,9 +162,7 @@ function groupreduce!_helper(res::AbstractVector, f, op, condf, adjust, checkemp
                              batches)
     for batch in batches
         # Allow other tasks to do garbage collection while this one runs
-        @static if VERSION >= v"1.4"
-            GC.safepoint()
-        end
+        GC.safepoint()
 
         @inbounds for i in batch
             gix = groups[i]
@@ -194,12 +192,8 @@ function groupreduce!(res::AbstractVector, f, op, condf, adjust, checkempty::Boo
         counts = Int[]
     end
     groups = gd.groups
-    @static if VERSION >= v"1.4"
-        batchsize = Threads.nthreads() > 1 ? 100_000 : typemax(Int)
-        batches = Iterators.partition(eachindex(incol, groups), batchsize)
-    else
-        batches = (eachindex(incol, groups),)
-    end
+    batchsize = Threads.nthreads() > 1 ? 100_000 : typemax(Int)
+    batches = Iterators.partition(eachindex(incol, groups), batchsize)
 
     groupreduce!_helper(res, f, op, condf, adjust, checkempty,
                              incol, groups, counts, batches)
@@ -254,12 +248,6 @@ groupreduce(f, op, condf::typeof(!ismissing), adjust, checkempty::Bool,
 (r::Reduce)(incol::AbstractVector, gd::GroupedDataFrame) =
     groupreduce((x, i) -> x, r.op, r.condf, r.adjust, r.checkempty, incol, gd)
 
-# this definition is missing in Julia 1.0 LTS and is required by aggregation for var
-# TODO: remove this when we drop 1.0 support
-if VERSION < v"1.1"
-    Base.zero(::Type{Missing}) = missing
-end
-
 function (agg::Aggregate{typeof(var)})(incol::AbstractVector, gd::GroupedDataFrame)
     means = groupreduce((x, i) -> x, Base.add_sum, agg.condf, /, false, incol, gd)
     z = zero(eltype(incol)) - zero(eltype(means))

diff --git a/src/groupeddataframe/groupeddataframe.jl b/src/groupeddataframe/groupeddataframe.jl
@@ -524,10 +524,6 @@ Base.ndims(::Type{<:GroupedDataFrame}) = 1
 Base.firstindex(gd::GroupedDataFrame) = 1
 Base.lastindex(gd::GroupedDataFrame) = gd.ngroups
 
-if VERSION < v"1.6"
-    Base.firstindex(gd::GroupedDataFrame, i::Integer) = first(axes(gd, i))
-    Base.lastindex(gd::GroupedDataFrame, i::Integer) = last(axes(gd, i))
-end
 Base.axes(gd::GroupedDataFrame, i::Integer) = Base.OneTo(size(gd, i))
 
 Base.first(gd::GroupedDataFrame) = gd[1]
@@ -624,10 +620,6 @@ Base.ndims(::Type{<:GroupKey}) = 1
 Base.firstindex(key::GroupKey) = 1
 Base.lastindex(key::GroupKey) = length(key)
 
-if VERSION < v"1.6"
-    Base.firstindex(key::GroupKey, i::Integer) = first(axes(key, i))
-    Base.lastindex(key::GroupKey, i::Integer) = last(axes(key, i))
-end
 Base.axes(key::GroupKey, i::Integer) = Base.OneTo(size(key, i))
 
 Base.names(key::GroupKey) = string.(parent(key).cols)

diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
@@ -712,18 +712,10 @@ function _combine(gd::GroupedDataFrame,
         try
             wait(t)
         catch e
-            @static if VERSION > v"1.3"
-                if e isa TaskFailedException
-                    throw(t.exception)
-                else
-                    rethrow(e)
-                end
+            if e isa TaskFailedException
+                throw(t.exception)
             else
-                if e isa ErrorException
-                    throw(t.exception)
-                else
-                    rethrow(e)
-                end
+                rethrow(e)
             end
         end
     end