From a37dd26860d4ceb80586f9e00180568692d0084c Mon Sep 17 00:00:00 2001 From: pat-alt Date: Thu, 5 Sep 2024 11:42:13 +0200 Subject: [PATCH] formatting --- .github/workflows/FormatCheck.yml | 28 +++++++++ .../ConformalPrediction.jl | 2 +- src/ConformalPrediction/bar.jl | 9 +-- src/ConformalPrediction/classification.jl | 55 +++++++++++------- src/ConformalPrediction/regression.jl | 4 +- .../counterfactuals.jl | 42 +++++--------- src/CounterfactualExplations/data.jl | 18 +++--- src/CounterfactualExplations/models.jl | 32 ++++------ src/LaplaceRedux/LaplaceRedux.jl | 58 ++++++++----------- 9 files changed, 124 insertions(+), 124 deletions(-) create mode 100644 .github/workflows/FormatCheck.yml diff --git a/.github/workflows/FormatCheck.yml b/.github/workflows/FormatCheck.yml new file mode 100644 index 0000000..dfb67ad --- /dev/null +++ b/.github/workflows/FormatCheck.yml @@ -0,0 +1,28 @@ +name: Format Check + +on: + push: + branches: + - 'main' + - 'release-' + tags: ['*'] + pull_request: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: julia-actions/setup-julia@latest + with: + version: 1 + - uses: actions/checkout@v1 + - name: Install JuliaFormatter + run: | + using Pkg + Pkg.add("JuliaFormatter") + shell: julia --color=yes {0} + - name: Format code + run: | + using JuliaFormatter + format("."; verbose=true) + shell: julia --color=yes {0} \ No newline at end of file diff --git a/src/ConformalPrediction/ConformalPrediction.jl b/src/ConformalPrediction/ConformalPrediction.jl index 3d77911..c1c9981 100644 --- a/src/ConformalPrediction/ConformalPrediction.jl +++ b/src/ConformalPrediction/ConformalPrediction.jl @@ -40,4 +40,4 @@ end include("regression.jl") include("bar.jl") -include("classification.jl") \ No newline at end of file +include("classification.jl") diff --git a/src/ConformalPrediction/bar.jl b/src/ConformalPrediction/bar.jl index 8503187..94aee11 100644 --- a/src/ConformalPrediction/bar.jl +++ b/src/ConformalPrediction/bar.jl @@ -7,11 +7,7 @@ A `Plots.jl` recipe that can be used to visualize the set size distribution of a conformal predictor. In the regression case, prediction interval widths are stratified into discrete bins. It can be useful to plot the distribution of set sizes in order to visually asses how adaptive a conformal predictor is. For more adaptive predictors the distribution of set sizes is typically spread out more widely, which reflects that “the procedure is effectively distinguishing between easy and hard inputs”. This is desirable: when for a given sample it is difficult to make predictions, this should be reflected in the set size (or interval width in the regression case). Since ‘difficult’ lies on some spectrum that ranges from ‘very easy’ to ‘very difficult’ the set size should vary across the spectrum of ‘empty set’ to ‘all labels included’. """ -@recipe function plot( - conf_model::ConformalModel, - fitresult, - X -) +@recipe function plot(conf_model::ConformalModel, fitresult, X) # Plot attributes: xtickfontsize --> 6 @@ -28,5 +24,4 @@ A `Plots.jl` recipe that can be used to visualize the set size distribution of a label --> "" x, y end - -end \ No newline at end of file +end diff --git a/src/ConformalPrediction/classification.jl b/src/ConformalPrediction/classification.jl index ad966f1..fd37c5e 100644 --- a/src/ConformalPrediction/classification.jl +++ b/src/ConformalPrediction/classification.jl @@ -90,20 +90,21 @@ In the case of univariate inputs or higher dimensional inputs, a stacked area pl # Predictions: ŷ = MLJBase.predict(conf_model, fitresult, Xraw) nout = length(levels(y)) - ŷ = - map(_y -> ismissing(_y) ? [0 for i = 1:nout] : pdf.(_y, levels(y)), ŷ) |> _y -> reduce(hcat, _y) + ŷ = (_y -> reduce(hcat, _y))(map( + _y -> ismissing(_y) ? [0 for i in 1:nout] : pdf.(_y, levels(y)), ŷ + )) ŷ = permutedims(ŷ) println(x) println(ŷ[sortperm(x), :]) # Area chart args = (x, ŷ) - data = cumsum(args[end], dims=2) + data = cumsum(args[end]; dims=2) x = length(args) == 1 ? (axes(data, 1)) : args[1] seriestype := :line for i in axes(data, 2) @series begin - fillrange := i > 1 ? data[:, i-1] : 0 + fillrange := i > 1 ? data[:, i - 1] : 0 x, data[:, i] end end @@ -114,8 +115,21 @@ In the case of univariate inputs or higher dimensional inputs, a stacked area pl # Setup: x1, x2, x1range, x2range, Z, xlims, ylims, _default_title = setup_contour_cp( - conf_model, fitresult, X, y, xlims, ylims, zoom, ntest, target, - plot_set_size, plot_classification_loss, plot_set_loss, temp, κ, loss_matrix, + conf_model, + fitresult, + X, + y, + xlims, + ylims, + zoom, + ntest, + target, + plot_set_size, + plot_classification_loss, + plot_set_loss, + temp, + κ, + loss_matrix, ) # Contour: @@ -136,13 +150,19 @@ In the case of univariate inputs or higher dimensional inputs, a stacked area pl x1[group_idx], x2[group_idx] end end - end - end function setup_contour_cp( - conf_model, fitresult, X, y, xlims, ylims, zoom, ntest, target, + conf_model, + fitresult, + X, + y, + xlims, + ylims, + zoom, + ntest, + target, plot_set_size, plot_classification_loss, plot_set_loss, @@ -201,23 +221,14 @@ function setup_contour_cp( elseif plot_classification_loss _target = categorical([target]; levels=levels(y)) z = ConformalPrediction.ConformalTraining.classification_loss( - conf_model, - fitresult, - [x1 x2], - _target; - temp=temp, - loss_matrix=loss_matrix, + conf_model, fitresult, [x1 x2], _target; temp=temp, loss_matrix=loss_matrix ) elseif plot_set_loss z = ConformalPrediction.ConformalTraining.smooth_size_loss( - conf_model, - fitresult, - [x1 x2]; - κ=κ, - temp=temp, + conf_model, fitresult, [x1 x2]; κ=κ, temp=temp ) else - z = ismissing(p̂) ? [missing for i = 1:length(levels(y))] : pdf.(p̂, levels(y)) + z = ismissing(p̂) ? [missing for i in 1:length(levels(y))] : pdf.(p̂, levels(y)) z = replace(z, 0 => missing) end push!(Z, z) @@ -226,4 +237,4 @@ function setup_contour_cp( Z = Z[findall(levels(y) .== target)[1][1], :] return x1, x2, x1range, x2range, Z, xlims, ylims, _default_title -end \ No newline at end of file +end diff --git a/src/ConformalPrediction/regression.jl b/src/ConformalPrediction/regression.jl index 4734026..e0ecaf5 100644 --- a/src/ConformalPrediction/regression.jl +++ b/src/ConformalPrediction/regression.jl @@ -56,11 +56,9 @@ A `Plots.jl` recipe that can be used to visualize the conformal predictions of a label := train_lab vec(x), vec(y) end - end function setup_ci(X, y, input_var, xlims, ylims, zoom) - Xraw = deepcopy(X) _names = get_names(Xraw) X = permutedims(MLJBase.matrix(X)) @@ -88,4 +86,4 @@ function setup_ci(X, y, input_var, xlims, ylims, zoom) xlims, ylims = generate_lims(x, y, xlims, ylims, zoom) return x, y, xlims, ylims, Xraw -end \ No newline at end of file +end diff --git a/src/CounterfactualExplations/counterfactuals.jl b/src/CounterfactualExplations/counterfactuals.jl index a2d2390..9f09188 100644 --- a/src/CounterfactualExplations/counterfactuals.jl +++ b/src/CounterfactualExplations/counterfactuals.jl @@ -21,10 +21,9 @@ Calling `Plots.plot` on a `CounterfactualExplanation` object will plot the train dim_red=:pca, plot_loss=false, loss_fun=nothing, - plot_up_to = nothing, - n_points = nothing, + plot_up_to=nothing, + n_points=nothing, ) - if !isnothing(n_points) if n_points < size(ce.data.X, 2) @info "Undersampling to $(n_points) points." @@ -35,7 +34,7 @@ Calling `Plots.plot` on a `CounterfactualExplanation` object will plot the train ce = deepcopy(ce) ce.data = DataPreprocessing.subsample(ce.data, n_points) else - xlims, ylims = nothing, nothing + xlims, ylims = nothing, nothing end # Asserts @@ -45,23 +44,14 @@ Calling `Plots.plot` on a `CounterfactualExplanation` object will plot the train xlims = get(plotattributes, :xlims, xlims) ylims = get(plotattributes, :ylims, ylims) ms = get(plotattributes, :markersize, 3) - mspath = ms*2 - msfinal = mspath*2 + mspath = ms * 2 + msfinal = mspath * 2 # Plot attributes linewidth --> 0.1 contour_series, X, y, xlims, ylims = setup_model_plot( - ce.M, - ce.data, - target, - length_out, - zoom, - dim_red, - plot_loss, - loss_fun, - xlims, - ylims, + ce.M, ce.data, target, length_out, zoom, dim_red, plot_loss, loss_fun, xlims, ylims ) xlims --> xlims @@ -95,10 +85,10 @@ Calling `Plots.plot` on a `CounterfactualExplanation` object will plot the train path_x, path_y = setup_ce_plot(ce) # Outer loop over number of counterfactuals: - for (num_counterfactual, X) in enumerate(eachslice(path_x, dims=3)) + for (num_counterfactual, X) in enumerate(eachslice(path_x; dims=3)) # Inner loop over counterfactual search steps: - steps = zip(eachcol(X), path_y) - for (i,(x,y)) in enumerate(steps) + steps = zip(eachcol(X), path_y) + for (i, (x, y)) in enumerate(steps) i <= max_iter || break _final_iter = i == length(steps) || i == max_iter _annotate = i == length(steps) && ce.num_counterfactuals > 1 @@ -106,9 +96,10 @@ Calling `Plots.plot` on a `CounterfactualExplanation` object will plot the train seriestype := :scatter markercolor := CategoricalArrays.levelcode.(y[num_counterfactual]) markersize := _final_iter ? msfinal : mspath - series_annotation := _annotate ? text("C$(num_counterfactual)", mspath) : nothing + series_annotation := + _annotate ? text("C$(num_counterfactual)", mspath) : nothing label := :none - x[1,:], x[2,:] + x[1, :], x[2, :] end end end @@ -131,12 +122,11 @@ animate_path(ce) """ function animate_path( ce::CounterfactualExplanation, - path = tempdir(); - plot_up_to::Union{Nothing,Int} = nothing, - legend = :topright, + path=tempdir(); + plot_up_to::Union{Nothing,Int}=nothing, + legend=:topright, kwrgs..., ) - max_iter = total_steps(ce) max_iter = if isnothing(plot_up_to) total_steps(ce) @@ -145,7 +135,7 @@ function animate_path( end max_iter += 1 - anim = @animate for t = 1:max_iter + anim = @animate for t in 1:max_iter plot(ce; plot_up_to=t, legend=legend, kwrgs...) end return anim diff --git a/src/CounterfactualExplations/data.jl b/src/CounterfactualExplations/data.jl index c9b030d..2f32ff4 100644 --- a/src/CounterfactualExplations/data.jl +++ b/src/CounterfactualExplations/data.jl @@ -1,6 +1,6 @@ using MLUtils -function embed(data::CounterfactualData, X::AbstractArray = nothing; dim_red::Symbol = :pca) +function embed(data::CounterfactualData, X::AbstractArray=nothing; dim_red::Symbol=:pca) # Training compressor: if typeof(data.input_encoder) <: MultivariateStats.AbstractDimensionalityReduction @@ -12,9 +12,9 @@ function embed(data::CounterfactualData, X::AbstractArray = nothing; dim_red::Sy else @info "Training model to compress data." if dim_red == :pca - tfn = MultivariateStats.fit(PCA, X_train; maxoutdim = 2) + tfn = MultivariateStats.fit(PCA, X_train; maxoutdim=2) elseif dim_red == :tsne - tfn = MultivariateStats.fit(TSNE, X_train; maxoutdim = 2) + tfn = MultivariateStats.fit(TSNE, X_train; maxoutdim=2) end data.input_encoder = nothing X = isnothing(X) ? X_train : X @@ -22,9 +22,9 @@ function embed(data::CounterfactualData, X::AbstractArray = nothing; dim_red::Sy end # Transforming: - X = typeof(X) <: Vector{<:Matrix} ? MLUtils.stack(X, dims = 2) : X + X = typeof(X) <: Vector{<:Matrix} ? MLUtils.stack(X; dims=2) : X if !isnothing(tfn) && !isnothing(X) - X = mapslices(x -> MultivariateStats.predict(tfn, x), X, dims = 1) + X = mapslices(x -> MultivariateStats.predict(tfn, x), X; dims=1) else X = isnothing(X) ? X_train : X end @@ -42,13 +42,13 @@ function embed_path(ce::CounterfactualExplanation) return embed(data_, path(ce)) end -function prepare_for_plotting(data::CounterfactualData; dim_red::Symbol = :pca) +function prepare_for_plotting(data::CounterfactualData; dim_red::Symbol=:pca) X, _ = DataPreprocessing.unpack_data(data) y = data.output_encoder.labels @assert size(X, 1) != 1 "Don't know how to plot 1-dimensional data." multi_dim = size(X, 1) > 2 if multi_dim - X = embed(data, X; dim_red = dim_red) + X = embed(data, X; dim_red=dim_red) end return X', y, multi_dim end @@ -58,10 +58,10 @@ end Calling `Plots.plot` on a `data::CounterfactualData` object will generate a scatter plot of the data. """ -@recipe function plot(data::CounterfactualData; dim_red = :pca) +@recipe function plot(data::CounterfactualData; dim_red=:pca) # Set up: - X, y, _ = prepare_for_plotting(data; dim_red = dim_red) + X, y, _ = prepare_for_plotting(data; dim_red=dim_red) # Scatter plot: for (i, x) in enumerate(unique(sort(y))) diff --git a/src/CounterfactualExplations/models.jl b/src/CounterfactualExplations/models.jl index 707de47..c42d8eb 100644 --- a/src/CounterfactualExplations/models.jl +++ b/src/CounterfactualExplations/models.jl @@ -19,12 +19,12 @@ Calling `Plots.plot` on a `AbstractFittedModel` will plot the model's prediction @recipe function plot( M::AbstractFittedModel, data::CounterfactualData; - target = nothing, - length_out = 100, - zoom = -0.1, - dim_red = :pca, - plot_loss = false, - loss_fun = nothing, + target=nothing, + length_out=100, + zoom=-0.1, + dim_red=:pca, + plot_loss=false, + loss_fun=nothing, ) # Asserts @@ -38,16 +38,7 @@ Calling `Plots.plot` on a `AbstractFittedModel` will plot the model's prediction linewidth --> 0.1 contour_series, X, y, xlims, ylims = setup_model_plot( - M, - data, - target, - length_out, - zoom, - dim_red, - plot_loss, - loss_fun, - xlims, - ylims, + M, data, target, length_out, zoom, dim_red, plot_loss, loss_fun, xlims, ylims ) xlims --> xlims @@ -69,7 +60,6 @@ Calling `Plots.plot` on a `AbstractFittedModel` will plot the model's prediction X[group_idx, 1], X[group_idx, 2] end end - end function setup_model_plot( @@ -99,7 +89,7 @@ function setup_model_plot( end target_encoded = data.output_encoder(target) - X, y, multi_dim = prepare_for_plotting(data; dim_red = dim_red) + X, y, multi_dim = prepare_for_plotting(data; dim_red=dim_red) # Surface range: zoom = zoom * maximum(abs.(X)) @@ -114,8 +104,8 @@ function setup_model_plot( else ylims = ylims .+ (zoom, -zoom) end - x_range = convert.(eltype(X), range(xlims[1]; stop = xlims[2], length = length_out)) - y_range = convert.(eltype(X), range(ylims[1]; stop = ylims[2], length = length_out)) + x_range = convert.(eltype(X), range(xlims[1]; stop=xlims[2], length=length_out)) + y_range = convert.(eltype(X), range(ylims[1]; stop=ylims[2], length=length_out)) plot_loss = plot_loss || !isnothing(loss_fun) @@ -158,7 +148,7 @@ function setup_model_plot( end function voronoi(X::AbstractMatrix, y::AbstractVector) - knnc = KNNClassifier(; K = 1) # KNNClassifier instantiation + knnc = KNNClassifier(; K=1) # KNNClassifier instantiation X = MLJBase.table(X) y = CategoricalArrays.categorical(y) knnc_mach = MLJBase.machine(knnc, X, y) diff --git a/src/LaplaceRedux/LaplaceRedux.jl b/src/LaplaceRedux/LaplaceRedux.jl index 5120669..4062be4 100644 --- a/src/LaplaceRedux/LaplaceRedux.jl +++ b/src/LaplaceRedux/LaplaceRedux.jl @@ -38,11 +38,10 @@ Calling `Plots.plot` on a `Laplace` object will plot the posterior predictive di # Plot attributes lw = get(plotattributes, :linewidth, 1) - lw_yhat = lw*2 - lw_contour = lw*0.1 + lw_yhat = lw * 2 + lw_contour = lw * 0.1 if la.likelihood == :regression - xrange, yrange, xlims, ylims = surface_range(X, y, xlims, ylims, zoom, length_out) xlims := xlims ylims := ylims @@ -66,11 +65,9 @@ Calling `Plots.plot` on a `Laplace` object will plot the posterior predictive di label --> "ytrain" vec(X), vec(y) end - end if la.likelihood == :classification - xrange, yrange, xlims, ylims = surface_range(X, xlims, ylims, zoom, length_out) xlims := xlims ylims := ylims @@ -95,15 +92,10 @@ Calling `Plots.plot` on a `Laplace` object will plot the posterior predictive di X[1, group_idx], X[2, group_idx] end end - end - end -function surface_range( - X::AbstractArray, y::AbstractArray, - xlims,ylims,zoom,length_out, -) +function surface_range(X::AbstractArray, y::AbstractArray, xlims, ylims, zoom, length_out) # Surface range: if isnothing(xlims) @@ -116,14 +108,12 @@ function surface_range( else ylims = ylims .+ (zoom, -zoom) end - x_range = range(xlims[1]; stop = xlims[2], length = length_out) - y_range = range(ylims[1]; stop = ylims[2], length = length_out) + x_range = range(xlims[1]; stop=xlims[2], length=length_out) + y_range = range(ylims[1]; stop=ylims[2], length=length_out) return x_range, y_range, xlims, ylims - end -function surface_range(X::AbstractArray,xlims,ylims,zoom,length_out) - +function surface_range(X::AbstractArray, xlims, ylims, zoom, length_out) if isnothing(xlims) xlims = (minimum(X[1, :]), maximum(X[1, :])) .+ (zoom, -zoom) else @@ -134,16 +124,15 @@ function surface_range(X::AbstractArray,xlims,ylims,zoom,length_out) else ylims = ylims .+ (zoom, -zoom) end - x_range = range(xlims[1]; stop = xlims[2], length = length_out) - y_range = range(ylims[1]; stop = ylims[2], length = length_out) + x_range = range(xlims[1]; stop=xlims[2], length=length_out) + y_range = range(ylims[1]; stop=ylims[2], length=length_out) return x_range, y_range, xlims, ylims end function get_contour(la::Laplace, x_range, y_range, link_approx, target, title) - predict_ = function (la, X::AbstractVector) - z = LaplaceRedux.predict(la, X; link_approx = link_approx) + z = LaplaceRedux.predict(la, X; link_approx=link_approx) if LaplaceRedux.outdim(la) == 1 # binary z = [1.0 - z[1], z[1]] end @@ -178,34 +167,33 @@ The intervals are taken in step of 0.05 quantiles. - `samp_distr` -- an array of sampled distributions F(x_t) corresponding to the y_t stacked column-wise. - `n_bins` -- numbers of bins to use. """ -function calibration_plot(la::Laplace, y_cal, samp_distr; n_bins = 20) - quantiles = collect(range(0; stop = 1, length = n_bins + 1)) +function calibration_plot(la::Laplace, y_cal, samp_distr; n_bins=20) + quantiles = collect(range(0; stop=1, length=n_bins + 1)) # Create a new plot object p = plot() - plot!([0, 1], [0, 1], label = "Perfect calibration", linestyle = :dash, color = :black) + plot!([0, 1], [0, 1]; label="Perfect calibration", linestyle=:dash, color=:black) # Compute the empirical frequency if la.likelihood == :regression emp_freq = empirical_frequency_regression(y_cal, samp_distr; n_bins) - plot!(p, quantiles, emp_freq, color = :blue, label = "neural network") + plot!(p, quantiles, emp_freq; color=:blue, label="neural network") plot!( p, quantiles, - emp_freq, - fillrange = quantiles, - color = :lightblue, - label = "miscalibration area", + emp_freq; + fillrange=quantiles, + color=:lightblue, + label="miscalibration area", ) # Calculate the area between the curve and the diagonal area = trapz((quantiles), vec(abs.(emp_freq - quantiles))) annotate!( - 0.75, - 0.05, - ("Miscalibration area = $(round(area, digits=2))", 8, 11, :bottom), + 0.75, 0.05, ("Miscalibration area = $(round(area, digits=2))", 8, 11, :bottom) ) elseif la.likelihood == :classification - num_p_per_interval, emp_freq, bin_centers = - empirical_frequency_binary_classification(y_cal, samp_distr; n_bins) - plot!(bin_centers, emp_freq, label = "Observed average", lw = 2) + num_p_per_interval, emp_freq, bin_centers = empirical_frequency_binary_classification( + y_cal, samp_distr; n_bins + ) + plot!(bin_centers, emp_freq; label="Observed average", lw=2) end # Add labels and title @@ -216,5 +204,5 @@ function calibration_plot(la::Laplace, y_cal, samp_distr; n_bins = 20) ylims!(0, 1) # Show the plot - display(p) + return display(p) end