From 5d5a2d282d3c52c4b4d9fc01742aa92a595b7bb6 Mon Sep 17 00:00:00 2001
From: Seth Axen <seth@sethaxen.com>
Date: Tue, 8 Aug 2023 22:15:37 +0200
Subject: [PATCH] Convert ArviZ.jl into a meta-package (#297)

* Remove unneeded `@doc`

* Remove all unneeded dependencies

* Remove ArviZStats

* Remove R installation from CI

* Add PosteriorStats and Reexport as deps

* Reexport component packages

* Fix broken links

* Update stats API docs

* Add back PSIS compat bound

* Remove unused test dependencies

* Remove test Project.toml

* Add missing SampleChainsDynamicHMC compat

* Remove docs compat entry

* Add component packages as docs deps

This is needed so that extensions can load the package with `using SomePackage`

* Correctly set up to build docstrings in extensions

* Build docs strictly

* Document meta-ness of package

* Build docs nightly

* Increment minor version number

* Add StatsBase for doctests

* Disable caching

* Try building docs directly

* Copy from InferenceObjects's docs build [no ci]

* Revert "Copy from InferenceObjects's docs build [no ci]"

This reverts commit cafadf8872140cf92515c5fd26fd0b48f62fc613.

* Revert "Revert "Copy from InferenceObjects's docs build""

This reverts commit 0cdfd99bcf7437f26a9f0b3e73d40090cef3489d.

* Revert "Disable caching"

This reverts commit 6ccbadfbd8bd6a4adcae67d0429e6123e0a9b10f.
---
 .github/workflows/CompatHelper.yml      |   8 +-
 .github/workflows/ci.yml                |  13 -
 .github/workflows/documenter.yml        |   8 +-
 Project.toml                            |  49 +--
 docs/Project.toml                       |  23 +-
 docs/make.jl                            |  45 +-
 docs/src/api/stats.md                   |  16 +-
 docs/src/index.md                       |  16 +-
 docs/src/working_with_inference_data.md |   6 +-
 src/ArviZ.jl                            |  67 +--
 src/ArviZStats/ArviZStats.jl            |  59 ---
 src/ArviZStats/compare.jl               | 213 ----------
 src/ArviZStats/elpdresult.jl            |  72 ----
 src/ArviZStats/hdi.jl                   | 167 --------
 src/ArviZStats/loo.jl                   | 176 --------
 src/ArviZStats/loo_pit.jl               | 270 ------------
 src/ArviZStats/model_weights.jl         | 303 --------------
 src/ArviZStats/r2_score.jl              |  99 -----
 src/ArviZStats/summarystats.jl          | 254 ------------
 src/ArviZStats/utils.jl                 | 529 ------------------------
 src/ArviZStats/waic.jl                  | 113 -----
 src/conversions.jl                      |   2 +-
 test/ArviZStats/compare.jl              | 131 ------
 test/ArviZStats/hdi.jl                  | 127 ------
 test/ArviZStats/helpers.jl              |  47 ---
 test/ArviZStats/loo.jl                  | 192 ---------
 test/ArviZStats/loo_pit.jl              | 144 -------
 test/ArviZStats/model_weights.jl        | 141 -------
 test/ArviZStats/r2_score.jl             |  49 ---
 test/ArviZStats/runtests.jl             |  19 -
 test/ArviZStats/summarystats.jl         | 316 --------------
 test/ArviZStats/utils.jl                | 412 ------------------
 test/ArviZStats/waic.jl                 | 130 ------
 test/Project.toml                       |  40 --
 test/runtests.jl                        |   1 -
 35 files changed, 98 insertions(+), 4159 deletions(-)
 delete mode 100644 src/ArviZStats/ArviZStats.jl
 delete mode 100644 src/ArviZStats/compare.jl
 delete mode 100644 src/ArviZStats/elpdresult.jl
 delete mode 100644 src/ArviZStats/hdi.jl
 delete mode 100644 src/ArviZStats/loo.jl
 delete mode 100644 src/ArviZStats/loo_pit.jl
 delete mode 100644 src/ArviZStats/model_weights.jl
 delete mode 100644 src/ArviZStats/r2_score.jl
 delete mode 100644 src/ArviZStats/summarystats.jl
 delete mode 100644 src/ArviZStats/utils.jl
 delete mode 100644 src/ArviZStats/waic.jl
 delete mode 100644 test/ArviZStats/compare.jl
 delete mode 100644 test/ArviZStats/hdi.jl
 delete mode 100644 test/ArviZStats/helpers.jl
 delete mode 100644 test/ArviZStats/loo.jl
 delete mode 100644 test/ArviZStats/loo_pit.jl
 delete mode 100644 test/ArviZStats/model_weights.jl
 delete mode 100644 test/ArviZStats/r2_score.jl
 delete mode 100644 test/ArviZStats/runtests.jl
 delete mode 100644 test/ArviZStats/summarystats.jl
 delete mode 100644 test/ArviZStats/utils.jl
 delete mode 100644 test/ArviZStats/waic.jl
 delete mode 100644 test/Project.toml

diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
index 6e479ff9..b888b26a 100644
--- a/.github/workflows/CompatHelper.yml
+++ b/.github/workflows/CompatHelper.yml
@@ -10,11 +10,7 @@ jobs:
       - name: Pkg.add("CompatHelper")
         run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
       - name: CompatHelper.main()
-        run: |
-          using CompatHelper
-          subdirs = ["", "test", "docs"]
-          CompatHelper.main(; subdirs)
-        shell: julia --color=yes {0}
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_KEY }}
+          COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
+        run: julia --color=yes -e 'using CompatHelper; CompatHelper.main()'
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f1cbc21a..93a83da0 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -18,19 +18,6 @@ jobs:
         os: [ubuntu-latest]
     steps:
       - uses: actions/checkout@v2
-      - uses: r-lib/actions/setup-r@v2
-        if: matrix.os == 'ubuntu-latest'
-      - name: Set R lib path for RCall.jl
-        if: matrix.os == 'ubuntu-latest'
-        run: echo "LD_LIBRARY_PATH=$(R RHOME)/lib" >> $GITHUB_ENV
-      - name: Install R packages
-        if: matrix.os == 'ubuntu-latest'
-        run: |
-          install.packages("remotes")
-          remotes::install_github("stan-dev/loo")
-        shell: Rscript {0}
-        env:
-          GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
       - uses: julia-actions/setup-julia@v1
         with:
           version: ${{ matrix.julia-version }}
diff --git a/.github/workflows/documenter.yml b/.github/workflows/documenter.yml
index 6cee7525..76225a65 100644
--- a/.github/workflows/documenter.yml
+++ b/.github/workflows/documenter.yml
@@ -4,6 +4,8 @@ on:
     branches: [main]
     tags: [v*]
   pull_request:
+  schedule:
+    - cron: "0 0 * * *"
 
 jobs:
   docs:
@@ -13,8 +15,12 @@ jobs:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@v1
       - uses: julia-actions/cache@v1
-      - uses: julia-actions/julia-buildpkg@latest
+      - uses: julia-actions/julia-buildpkg@v1
+      - name: Install dependencies
+        run: julia --color=yes --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
       - uses: julia-actions/julia-docdeploy@v1
+        with:
+          install-package: false
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
diff --git a/Project.toml b/Project.toml
index f66a8e3f..d98c035b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,33 +1,16 @@
 name = "ArviZ"
 uuid = "131c737c-5715-5e2e-ad31-c244f01c1dc7"
 authors = ["Seth Axen <seth.axen@gmail.com>"]
-version = "0.9.2"
+version = "0.10.0"
 
 [deps]
-DataInterpolations = "82cc6244-b520-54b8-b5a6-8a565e85f1d0"
-Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
-DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
-Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
-DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 InferenceObjects = "b5cf5a8d-e756-4ee3-b014-01d49d192c00"
-IteratorInterfaceExtensions = "82899510-4779-5014-852e-03e436cf321d"
-LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
 MCMCDiagnosticTools = "be115224-59cd-429b-ad48-344e309966f0"
-Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
-Optim = "429524aa-4258-5aef-a3af-852621145aeb"
-OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 PSIS = "ce719bf2-d5d0-4fb9-925d-10a81b42ad04"
-PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
-Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
-REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+PosteriorStats = "7f36be82-ad55-44ba-a5c0-b8b5480d7aa5"
+Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
-Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
-Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
-TableTraits = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
-Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 
 [weakdeps]
 MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
@@ -36,32 +19,30 @@ SampleChainsDynamicHMC = "6d9fd711-e8b2-4778-9c70-c1dfb499d4c4"
 
 [extensions]
 ArviZMCMCChainsExt = "MCMCChains"
-ArviZSampleChainsExt = "SampleChains"
 ArviZSampleChainsDynamicHMCExt = ["SampleChains", "SampleChainsDynamicHMC"]
+ArviZSampleChainsExt = "SampleChains"
 
 [compat]
-DataInterpolations = "4"
-DimensionalData = "0.23, 0.24"
-Distributions = "0.21, 0.22, 0.23, 0.24, 0.25"
-DocStringExtensions = "0.8, 0.9"
-InferenceObjects = "0.3.10"
-IteratorInterfaceExtensions = "0.1.1, 1"
-LogExpFunctions = "0.2.0, 0.3"
+InferenceObjects = "0.3.11"
 MCMCChains = "6"
 MCMCDiagnosticTools = "0.3.4"
-Optim = "1"
-OrderedCollections = "1"
+PosteriorStats = "0.1.1"
 PSIS = "0.9.1"
-PrettyTables = "2.1, 2.2"
+Reexport = "1"
 Requires = "0.5.2, 1.0"
 SampleChains = "0.5"
-Setfield = "1"
+SampleChainsDynamicHMC = "0.3"
 StatsBase = "0.32, 0.33, 0.34"
-TableTraits = "0.4, 1"
-Tables = "1"
 julia = "1.6"
 
 [extras]
+DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
 MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
+OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SampleChains = "754583d1-7fc4-4dab-93b5-5eaca5c9622e"
 SampleChainsDynamicHMC = "6d9fd711-e8b2-4778-9c70-c1dfb499d4c4"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["DimensionalData", "MCMCChains", "OrderedCollections", "Random", "SampleChains", "SampleChainsDynamicHMC", "Test"]
diff --git a/docs/Project.toml b/docs/Project.toml
index bde922a9..31b84aa6 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -9,31 +9,16 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
 EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
+InferenceObjects = "b5cf5a8d-e756-4ee3-b014-01d49d192c00"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
 MCMCDiagnosticTools = "be115224-59cd-429b-ad48-344e309966f0"
 MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
 MLJIteration = "614be32b-d00c-4edb-bd02-1eb411ab5e55"
+PSIS = "ce719bf2-d5d0-4fb9-925d-10a81b42ad04"
 PlutoStaticHTML = "359b1769-a58e-495b-9770-312e911026ad"
 PlutoUI = "7f904dfe-b85e-4ff6-b463-dae2292396a8"
+PosteriorStats = "7f36be82-ad55-44ba-a5c0-b8b5480d7aa5"
 SampleChains = "754583d1-7fc4-4dab-93b5-5eaca5c9622e"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
-
-[compat]
-AlgebraOfGraphics = "0.6.9"
-ArviZ = "0.9"
-ArviZExampleData = "0.1.5"
-CairoMakie = "0.8.9, 0.9, 0.10"
-DataFrames = "1"
-DimensionalData = "0.23, 0.24"
-Distributions = "0.25"
-EvoTrees = "0.14.8, 0.15"
-Documenter = "0.27"
-MCMCChains = "6.0"
-MCMCDiagnosticTools = "0.3"
-MLJBase = "0.21.6"
-MLJIteration = "0.5.1"
-PlutoStaticHTML = "4.0.5, 5, 6"
-PlutoUI = "0.7"
-SampleChains = "0.5"
-Statistics = "1"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
diff --git a/docs/make.jl b/docs/make.jl
index c4c32dfc..b05ee38a 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -25,30 +25,50 @@ function download_asset(remote_fn, fn=remote_fn)
     )
 end
 
+function get_extension(mod::Module, submodule::Symbol)
+    if isdefined(Base, :get_extension)
+        return Base.get_extension(mod, submodule)
+    else
+        return getproperty(mod, submodule)
+    end
+end
+
 # download arviz-devs org logo assets
 download_asset("ArviZ.png", "logo.png")
 download_asset("ArviZ_white.png", "logo-dark.png")
 download_asset("favicon.ico")
 
+InferenceObjectsMCMCDiagnosticToolsExt = get_extension(
+    InferenceObjects, :InferenceObjectsMCMCDiagnosticToolsExt
+)
+InferenceObjectsPosteriorStatsExt = get_extension(
+    InferenceObjects, :InferenceObjectsPosteriorStatsExt
+)
+
+for subpkg in (InferenceObjects, MCMCDiagnosticTools, PosteriorStats, PSIS)
+    DocMeta.setdocmeta!(subpkg, :DocTestSetup, :(using $(Symbol(subpkg))))
+end
 DocMeta.setdocmeta!(
-    ArviZ.MCMCDiagnosticTools, :DocTestSetup, :(using ArviZ.MCMCDiagnosticTools);
+    InferenceObjectsMCMCDiagnosticToolsExt, :DocTestSetup, :(using MCMCDiagnosticTools)
 )
-DocMeta.setdocmeta!(ArviZ.InferenceObjects, :DocTestSetup, :(using ArviZ.InferenceObjects);)
+DocMeta.setdocmeta!(
+    InferenceObjectsPosteriorStatsExt, :DocTestSetup, :(using PosteriorStats)
+)
+
+modules = [
+    ArviZ,
+    InferenceObjects,
+    InferenceObjectsMCMCDiagnosticToolsExt,
+    InferenceObjectsPosteriorStatsExt,
+    MCMCDiagnosticTools,
+    PosteriorStats,
+    PSIS,
+]
 
 doctestfilters = [
     r"\s+\"created_at\" => .*",  # ignore timestamps in doctests
 ]
 
-modules = [ArviZ, InferenceObjects, MCMCDiagnosticTools, PSIS]
-if isdefined(Base, :get_extension)
-    # using Requires, these docstrings are automatically loaded, but as an extension we need
-    # to manually specify the module
-    push!(
-        modules,
-        Base.get_extension(InferenceObjects, :InferenceObjectsMCMCDiagnosticToolsExt),
-    )
-end
-
 makedocs(;
     modules,
     sitename="ArviZ.jl",
@@ -79,6 +99,7 @@ makedocs(;
     doctestfilters,
     linkcheck=true,
     analytics="G-W1G68W77YV",
+    strict=Documenter.except(:footnote, :missing_docs),
 )
 
 deploydocs(; repo="github.com/arviz-devs/ArviZ.jl.git", devbranch="main", push_preview=true)
diff --git a/docs/src/api/stats.md b/docs/src/api/stats.md
index 03014a48..ce1e515c 100644
--- a/docs/src/api/stats.md
+++ b/docs/src/api/stats.md
@@ -8,6 +8,10 @@ Pages = ["stats.md"]
 
 ```@docs
 SummaryStats
+default_summary_stats
+default_stats
+default_diagnostics
+summarize
 summarystats
 ```
 
@@ -22,9 +26,11 @@ r2_score
 ## Pareto-smoothed importance sampling
 
 ```@docs
-PSIS.PSISResult
-PSIS.psis
-PSIS.psis!
+PSISResult
+ess_is
+PSISPlots.paretoshapeplot
+psis
+psis!
 ```
 
 ## LOO and WAIC
@@ -61,8 +67,8 @@ Stacking
 loo_pit
 ```
 
-### Utilities
+## Utilities
 
 ```@docs
-ArviZStats.smooth_data
+PosteriorStats.smooth_data
 ```
diff --git a/docs/src/index.md b/docs/src/index.md
index 86c1f97d..c9276fec 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1,8 +1,20 @@
 # [ArviZ.jl: Exploratory analysis of Bayesian models in Julia](@id arvizjl)
 
-ArviZ.jl is a Julia package for exploratory analysis of Bayesian models.
+ArviZ.jl is a Julia meta-package for exploratory analysis of Bayesian models.
 It is part of the [ArviZ project](https://www.arviz.org/), which also includes a related [Python package](https://python.arviz.org/).
 
+ArviZ consists of and re-exports the following subpackages, along with extensions integrating them with InferenceObjects:
+- [InferenceObjects.jl](https://julia.arviz.org/InferenceObjects): a base package implementing the [`InferenceData`](@ref) type with utilities for building, saving, and working with it
+- [MCMCDiagnosticTools.jl](https://julia.arviz.org/MCMCDiagnosticTools): diagnostics for Markov Chain Monte Carlo methods
+- [PSIS.jl](https://julia.arviz.org/PSIS): Pareto-smoothed importance sampling
+- [PosteriorStats.jl](https://julia.arviz.org/PosteriorStats): common statistical analyses for the Bayesian workflow
+
+Additional functionality can be loaded with the following packages:
+- [ArviZExampleData.jl](https://julia.arviz.org/ArviZExampleData): example `InferenceData` objects, useful for demonstration and testing
+- [ArviZPythonPlots.jl](https://julia.arviz.org/ArviZPythonPlots): Python ArviZ's library of plotting functions for Julia types
+
+See the navigation bar for more useful packages.
+
 ## [Installation](@id installation)
 
 From the Julia REPL, type `]` to enter the Pkg REPL mode and run
@@ -17,4 +29,4 @@ See the [API Overview](@ref api) for description of functions.
 
 ## [Extending ArviZ.jl](@id extendingarviz)
 
-To use a custom data type with ArviZ.jl, simply overload [`convert_to_inference_data`](@ref) to convert your input(s) to an [`InferenceData`](@ref).
+To use a custom data type with ArviZ.jl, simply overload [`InferenceObjects.convert_to_inference_data`](@ref) to convert your input(s) to an [`InferenceObjects.InferenceData`](@ref).
diff --git a/docs/src/working_with_inference_data.md b/docs/src/working_with_inference_data.md
index 1937aa23..c2e140cc 100644
--- a/docs/src/working_with_inference_data.md
+++ b/docs/src/working_with_inference_data.md
@@ -15,7 +15,7 @@ idata = load_example_data("centered_eight")
 ```
 
 !!! info
-	  `Dataset`s are [`DimensionalData.AbstractDimStack`](https://rafaqz.github.io/DimensionalData.jl/stable/api/#DimensionalData.AbstractDimStack)s and can be used identically.
+	  `Dataset`s are [`DimensionalData.AbstractDimStack`](https://rafaqz.github.io/DimensionalData.jl/stable/reference/#DimensionalData.AbstractDimStack)s and can be used identically.
 	  The variables a `Dataset` contains are called "layers", and dimensions of the same name that appear in more than one layer within a `Dataset` must have the same indices.
 
 `InferenceData` behaves like a `NamedTuple` and can be used similarly.
@@ -121,7 +121,7 @@ Let’s keep only chain 0 here.
 For the subset to take effect on all relevant `InferenceData` groups -- `posterior`, `sample_stats`, `log_likelihood`, and `posterior_predictive` -- we will index `InferenceData` instead of `Dataset`.
 
 Here we use DimensionalData's `At` selector.
-Its [other selectors](https://rafaqz.github.io/DimensionalData.jl/stable/api/#Selectors) are also supported.
+Its [other selectors](https://rafaqz.github.io/DimensionalData.jl/stable/reference/#selectors) are also supported.
 
 ```@example wwid
 idata[chain=At(0)]
@@ -179,7 +179,7 @@ dropdims(mean(post; dims=(:chain, :draw)); dims=(:chain, :draw))
 
 ## Renaming a dimension
 
-We can rename a dimension in a `Dataset` using DimensionalData's [`set`](https://rafaqz.github.io/DimensionalData.jl/stable/api/#DimensionalData.Dimensions.LookupArrays.set) method:
+We can rename a dimension in a `Dataset` using DimensionalData's [`set`](https://rafaqz.github.io/DimensionalData.jl/stable/reference/#DimensionalData.Dimensions.LookupArrays.set) method:
 
 ```@example wwid
 theta_bis = set(post.theta; school=:school_bis)
diff --git a/src/ArviZ.jl b/src/ArviZ.jl
index ab4ae567..c2aa3bb7 100644
--- a/src/ArviZ.jl
+++ b/src/ArviZ.jl
@@ -1,32 +1,11 @@
 module ArviZ
 
-using Base: @__doc__
-using REPL
-using OrderedCollections: OrderedDict
-using DimensionalData: DimensionalData, Dimensions
-using LogExpFunctions: logsumexp
+using Reexport
 
-import Base:
-    convert,
-    get,
-    getindex,
-    getproperty,
-    hash,
-    haskey,
-    iterate,
-    length,
-    propertynames,
-    setindex,
-    show,
-    write,
-    +
-import Base.Docs: getdoc
-using StatsBase: StatsBase
-import Markdown: @doc_str
-
-using InferenceObjects
-
-using MCMCDiagnosticTools:
+## Re-export component packages
+@reexport using InferenceObjects
+# only export recommended functions and corresponding utilities
+@reexport using MCMCDiagnosticTools:
     MCMCDiagnosticTools,
     AutocovMethod,
     FFTAutocovMethod,
@@ -37,46 +16,16 @@ using MCMCDiagnosticTools:
     mcse,
     rhat,
     rstar
-
-# Exports
-
-## Stats
-export ArviZStats
-export AbstractELPDResult, PSISLOOResult, WAICResult
-export PSIS, PSISResult, psis, psis!
-export elpd_estimates, information_criterion, loo, waic
-export AbstractModelWeightsMethod, BootstrappedPseudoBMA, PseudoBMA, Stacking, model_weights
-export ModelComparisonResult, compare
-export SummaryStats, summarystats
-export hdi, hdi!, loo_pit, r2_score
-
-## Diagnostics
-export MCMCDiagnosticTools, AutocovMethod, FFTAutocovMethod, BDAAutocovMethod
-export bfmi, ess, ess_rhat, mcse, rhat, rstar
-
-## InferenceObjects
-export InferenceObjects,
-    Dataset,
-    InferenceData,
-    convert_to_dataset,
-    convert_to_inference_data,
-    from_dict,
-    from_namedtuple,
-    namedtuple_to_dataset
-
-## NetCDF I/O
-export from_netcdf, to_netcdf
+@reexport using PosteriorStats
+@reexport using PSIS
+@reexport using StatsBase: summarystats
 
 ## Conversions
 export from_mcmcchains, from_samplechains
 
 const EXTENSIONS_SUPPORTED = isdefined(Base, :get_extension)
-const DEFAULT_SAMPLE_DIMS = Dimensions.key2dim((:chain, :draw))
 
 include("utils.jl")
-include("ArviZStats/ArviZStats.jl")
-using .ArviZStats
-
 include("conversions.jl")
 
 if !EXTENSIONS_SUPPORTED
diff --git a/src/ArviZStats/ArviZStats.jl b/src/ArviZStats/ArviZStats.jl
deleted file mode 100644
index d22ec997..00000000
--- a/src/ArviZStats/ArviZStats.jl
+++ /dev/null
@@ -1,59 +0,0 @@
-module ArviZStats
-
-using ArviZ: ArviZ
-using DataInterpolations: DataInterpolations
-using DimensionalData: DimensionalData, Dimensions
-using Distributions: Distributions
-using DocStringExtensions: FIELDS, FUNCTIONNAME, TYPEDEF, TYPEDFIELDS, SIGNATURES
-using InferenceObjects: InferenceObjects
-using IteratorInterfaceExtensions: IteratorInterfaceExtensions
-using LinearAlgebra: mul!, norm
-using LogExpFunctions: LogExpFunctions
-using Markdown: @doc_str
-using MCMCDiagnosticTools: MCMCDiagnosticTools
-using Optim: Optim
-using PrettyTables: PrettyTables
-using Printf: Printf
-using PSIS: PSIS, PSISResult, psis, psis!
-using Random: Random
-using Setfield: Setfield
-using Statistics: Statistics
-using StatsBase: StatsBase, summarystats
-using Tables: Tables
-using TableTraits: TableTraits
-
-# PSIS
-export PSIS, PSISResult, psis, psis!
-
-# LOO-CV
-export AbstractELPDResult, PSISLOOResult, WAICResult
-export elpd_estimates, information_criterion, loo, waic
-
-# Model weighting and comparison
-export AbstractModelWeightsMethod, BootstrappedPseudoBMA, PseudoBMA, Stacking, model_weights
-export ModelComparisonResult, compare
-
-# Summary statistics
-export SummaryStats, summarystats
-
-# Others
-export hdi, hdi!, loo_pit, r2_score
-
-# load for docstrings
-using ArviZ: InferenceData, convert_to_dataset, ess
-
-const DEFAULT_INTERVAL_PROB = 0.94
-const INFORMATION_CRITERION_SCALES = (deviance=-2, log=1, negative_log=-1)
-
-include("utils.jl")
-include("hdi.jl")
-include("elpdresult.jl")
-include("loo.jl")
-include("waic.jl")
-include("model_weights.jl")
-include("compare.jl")
-include("loo_pit.jl")
-include("r2_score.jl")
-include("summarystats.jl")
-
-end  # module
diff --git a/src/ArviZStats/compare.jl b/src/ArviZStats/compare.jl
deleted file mode 100644
index 8a96994e..00000000
--- a/src/ArviZStats/compare.jl
+++ /dev/null
@@ -1,213 +0,0 @@
-"""
-    compare(models; kwargs...)
-
-Compare models based on their expected log pointwise predictive density (ELPD).
-
-`models` is a `Tuple`, `NamedTuple`, or `AbstractVector` whose values are either
-[`AbstractELPDResult`](@ref) entries or any argument to `elpd_method`, which must produce an
-`AbstractELPDResult`.
-
-The weights are returned in the same type of
-collection.
-
-The argument may be any object with a `pairs` method where each value is either an
-[`InferenceData`](@ref) or an [`AbstractELPDResult`](@ref).
-
-The ELPD is estimated either by Pareto smoothed importance sampling leave-one-out
-cross-validation (LOO) or using the widely applicable information criterion (WAIC).
-We recommend loo. Read more theory here - in a paper by some of the
-leading authorities on model comparison dx.doi.org/10.1111/1467-9868.00353
-
-# Arguments
-
-  - `models`: a `Tuple`, `NamedTuple`, or `AbstractVector` whose values are either
-    [`AbstractELPDResult`](@ref) entries or any argument to `elpd_method`.
-
-# Keywords
-
-  - `weights_method::AbstractModelWeightsMethod=Stacking()`: the method to be used to weight
-    the models. See [`model_weights`](@ref) for details
-
-      + `elpd_method=loo`: a method that computes an `AbstractELPDResult` from an argument in
-        `models`.
-
-  - `sort::Bool=true`: Whether to sort models by decreasing ELPD.
-
-# Returns
-
-  - [`ModelComparisonResult`](@ref): A container for the model comparison results.
-
-# Examples
-
-Compare the centered and non centered models of the eight school problem using the defaults:
-[`loo`](@ref) and [`Stacking`](@ref) weights:
-
-```jldoctest compare; filter = [r"└.*"]
-julia> using ArviZ, ArviZExampleData
-
-julia> models = (
-           centered=load_example_data("centered_eight"),
-           non_centered=load_example_data("non_centered_eight"),
-       );
-
-julia> mc = compare(models)
-┌ Warning: 1 parameters had Pareto shape values 0.7 < k ≤ 1. Resulting importance sampling estimates are likely to be unstable.
-└ @ PSIS ~/.julia/packages/PSIS/...
-ModelComparisonResult with Stacking weights
-               rank  elpd  elpd_mcse  elpd_diff  elpd_diff_mcse  weight    p   ⋯
- non_centered     1   -31        1.4       0              0.0       1.0  0.9   ⋯
- centered         2   -31        1.4       0.06           0.067     0.0  0.9   ⋯
-                                                                1 column omitted
-```
-
-Compare the same models from pre-computed PSIS-LOO results and computing
-[`BootstrappedPseudoBMA`](@ref) weights:
-
-```jldoctest compare; setup = :(using Random; Random.seed!(23))
-julia> elpd_results = mc.elpd_result;
-
-julia> compare(elpd_results; weights_method=BootstrappedPseudoBMA())
-ModelComparisonResult with BootstrappedPseudoBMA weights
-               rank  elpd  elpd_mcse  elpd_diff  elpd_diff_mcse  weight    p   ⋯
- non_centered     1   -31        1.4       0              0.0      0.52  0.9   ⋯
- centered         2   -31        1.4       0.06           0.067    0.48  0.9   ⋯
-                                                                1 column omitted
-```
-"""
-function compare(
-    inputs;
-    weights_method::AbstractModelWeightsMethod=Stacking(),
-    elpd_method=loo,
-    model_names=_indices(inputs),
-    sort::Bool=true,
-)
-    length(model_names) === length(inputs) ||
-        throw(ArgumentError("Length of `model_names` must match length of `inputs`"))
-    elpd_results = map(Base.Fix1(_maybe_elpd_results, elpd_method), inputs)
-    weights = model_weights(weights_method, elpd_results)
-    perm = _sortperm(elpd_results; by=x -> elpd_estimates(x).elpd, rev=true)
-    i_elpd_max = first(perm)
-    elpd_max_i = elpd_estimates(elpd_results[i_elpd_max]; pointwise=true).elpd
-    elpd_diff_and_mcse = map(elpd_results) do r
-        elpd_diff_j = similar(elpd_max_i)
-        # workaround for named dimension packages that check dimension names are exact, for
-        # cases where dimension names differ
-        map!(-, elpd_diff_j, elpd_max_i, elpd_estimates(r; pointwise=true).elpd)
-        return _sum_and_se(elpd_diff_j)
-    end
-    elpd_diff = map(first, elpd_diff_and_mcse)
-    elpd_diff_mcse = map(last, elpd_diff_and_mcse)
-    rank = _assimilar(elpd_results, (1:length(elpd_results))[perm])
-    result = ModelComparisonResult(
-        model_names, rank, elpd_diff, elpd_diff_mcse, weights, elpd_results, weights_method
-    )
-    sort || return result
-    return _permute(result, perm)
-end
-
-_maybe_elpd_results(elpd_method, x::AbstractELPDResult; kwargs...) = x
-function _maybe_elpd_results(elpd_method, x; kwargs...)
-    elpd_result = elpd_method(x; kwargs...)
-    elpd_result isa AbstractELPDResult && return elpd_result
-    throw(
-        ErrorException(
-            "Return value of `elpd_method` must be an `AbstractELPDResult`, not `$(typeof(elpd_result))`.",
-        ),
-    )
-end
-
-"""
-    ModelComparisonResult
-
-Result of model comparison using ELPD.
-
-This struct implements the Tables and TableTraits interfaces.
-
-Each field returns a collection of the corresponding entry for each model:
-$(FIELDS)
-"""
-struct ModelComparisonResult{E,N,R,W,ER,M}
-    "Names of the models, if provided."
-    name::N
-    "Ranks of the models (ordered by decreasing ELPD)"
-    rank::R
-    "ELPD of a model subtracted from the largest ELPD of any model"
-    elpd_diff::E
-    "Monte Carlo standard error of the ELPD difference"
-    elpd_diff_mcse::E
-    "Model weights computed with `weights_method`"
-    weight::W
-    """`AbstactELPDResult`s for each model, which can be used to access useful stats like
-    ELPD estimates, pointwise estimates, and Pareto shape values for PSIS-LOO"""
-    elpd_result::ER
-    "Method used to compute model weights with [`model_weights`](@ref)"
-    weights_method::M
-end
-
-#### custom tabular show methods
-
-function Base.show(io::IO, mime::MIME"text/plain", r::ModelComparisonResult; kwargs...)
-    return _show(io, mime, r; kwargs...)
-end
-function Base.show(io::IO, mime::MIME"text/html", r::ModelComparisonResult; kwargs...)
-    return _show(io, mime, r; kwargs...)
-end
-
-function _show(io::IO, mime::MIME, r::ModelComparisonResult; kwargs...)
-    row_labels = collect(r.name)
-    cols = Tables.columnnames(r)[2:end]
-    table = NamedTuple{cols}(Tables.columntable(r))
-
-    weights_method_name = _typename(r.weights_method)
-    weights = table.weight
-    digits_weights = ceil(Int, -log10(maximum(weights))) + 1
-    weight_formatter = PrettyTables.ft_printf(
-        "%.$(digits_weights)f", findfirst(==(:weight), cols)
-    )
-    return _show_prettytable(
-        io,
-        mime,
-        table;
-        title="ModelComparisonResult with $(weights_method_name) weights",
-        row_labels,
-        extra_formatters=(weight_formatter,),
-        kwargs...,
-    )
-end
-
-function _permute(r::ModelComparisonResult, perm)
-    return ModelComparisonResult(
-        (_permute(getfield(r, k), perm) for k in fieldnames(typeof(r))[1:(end - 1)])...,
-        r.weights_method,
-    )
-end
-
-#### Tables interface as column table
-
-Tables.istable(::Type{<:ModelComparisonResult}) = true
-Tables.columnaccess(::Type{<:ModelComparisonResult}) = true
-Tables.columns(r::ModelComparisonResult) = r
-function Tables.columnnames(::ModelComparisonResult)
-    return (
-        :name, :rank, :elpd, :elpd_mcse, :elpd_diff, :elpd_diff_mcse, :weight, :p, :p_mcse
-    )
-end
-function Tables.getcolumn(r::ModelComparisonResult, i::Int)
-    return Tables.getcolumn(r, Tables.columnnames(r)[i])
-end
-function Tables.getcolumn(r::ModelComparisonResult, nm::Symbol)
-    nm ∈ fieldnames(typeof(r)) && return getfield(r, nm)
-    if nm ∈ (:elpd, :elpd_mcse, :p, :p_mcse)
-        return map(e -> getproperty(elpd_estimates(e), nm), r.elpd_result)
-    end
-    throw(ArgumentError("Unrecognized column name $nm"))
-end
-Tables.rowaccess(::Type{<:ModelComparisonResult}) = true
-Tables.rows(r::ModelComparisonResult) = Tables.rows(Tables.columntable(r))
-
-IteratorInterfaceExtensions.isiterable(::ModelComparisonResult) = true
-function IteratorInterfaceExtensions.getiterator(r::ModelComparisonResult)
-    return Tables.datavaluerows(Tables.columntable(r))
-end
-
-TableTraits.isiterabletable(::ModelComparisonResult) = true
diff --git a/src/ArviZStats/elpdresult.jl b/src/ArviZStats/elpdresult.jl
deleted file mode 100644
index 52f38c3f..00000000
--- a/src/ArviZStats/elpdresult.jl
+++ /dev/null
@@ -1,72 +0,0 @@
-"""
-$(TYPEDEF)
-
-An abstract type representing the result of an ELPD computation.
-
-Every subtype stores estimates of both the expected log predictive density (`elpd`) and the
-effective number of parameters `p`, as well as standard errors and pointwise estimates of
-each, from which other relevant estimates can be computed.
-
-Subtypes implement the following functions:
-- [`elpd_estimates`](@ref)
-- [`information_criterion`](@ref)
-"""
-abstract type AbstractELPDResult end
-
-function _show_elpd_estimates(
-    io::IO, mime::MIME"text/plain", r::AbstractELPDResult; kwargs...
-)
-    estimates = elpd_estimates(r)
-    table = map(Base.vect, NamedTuple{(:elpd, :elpd_mcse, :p, :p_mcse)}(estimates))
-    _show_prettytable(io, mime, table; kwargs...)
-    return nothing
-end
-
-"""
-    $(FUNCTIONNAME)(result::AbstractELPDResult; pointwise=false) -> (; elpd, elpd_mcse, lpd)
-
-Return the (E)LPD estimates from the `result`.
-"""
-function elpd_estimates end
-
-"""
-    $(FUNCTIONNAME)(elpd, scale::Symbol)
-
-Compute the information criterion for the given `scale` from the `elpd` estimate.
-
-`scale` must be one of $(keys(INFORMATION_CRITERION_SCALES)).
-
-See also: [`effective_number_of_parameters`](@ref), [`loo`](@ref), [`waic`](@ref)
-"""
-function information_criterion(estimates, scale::Symbol)
-    scale_value = INFORMATION_CRITERION_SCALES[scale]
-    return scale_value * estimates.elpd
-end
-
-"""
-    $(FUNCTIONNAME)(result::AbstractELPDResult, scale::Symbol; pointwise=false)
-
-Compute information criterion for the given `scale` from the existing ELPD `result`.
-
-`scale` must be one of $(keys(INFORMATION_CRITERION_SCALES)).
-
-If `pointwise=true`, then pointwise estimates are returned.
-"""
-function information_criterion(
-    result::AbstractELPDResult, scale::Symbol; pointwise::Bool=false
-)
-    return information_criterion(elpd_estimates(result; pointwise), scale)
-end
-
-function _lpd_pointwise(log_likelihood, dims)
-    ndraws = prod(Base.Fix1(size, log_likelihood), dims)
-    lpd = LogExpFunctions.logsumexp(log_likelihood; dims)
-    T = eltype(lpd)
-    return dropdims(lpd; dims) .- log(T(ndraws))
-end
-
-function _elpd_estimates_from_pointwise(pointwise)
-    elpd, elpd_mcse = _sum_and_se(pointwise.elpd)
-    p, p_mcse = _sum_and_se(pointwise.p)
-    return (; elpd, elpd_mcse, p, p_mcse)
-end
diff --git a/src/ArviZStats/hdi.jl b/src/ArviZStats/hdi.jl
deleted file mode 100644
index 141a2e40..00000000
--- a/src/ArviZStats/hdi.jl
+++ /dev/null
@@ -1,167 +0,0 @@
-# this pattern ensures that the type is completely specified at compile time
-const HDI_BOUND_DIM = Dimensions.format(
-    Dimensions.Dim{:hdi_bound}([:lower, :upper]), Base.OneTo(2)
-)
-
-"""
-    hdi(samples::AbstractArray{<:Real}; prob=$(DEFAULT_INTERVAL_PROB)) -> (; lower, upper)
-
-Estimate the unimodal highest density interval (HDI) of `samples` for the probability `prob`.
-
-The HDI is the minimum width Bayesian credible interval (BCI). That is, it is the smallest
-possible interval containing at least `(100*prob)`% of the draws.[^Hyndman1996]
-
-`samples` is an array of shape `(draws[, chains[, params...]])`. If multiple parameters are
-present, then `lower` and `upper` are arrays with the shape `(params...,)`, computed
-separately for each marginal.
-
-This implementation uses the algorithm of [^ChenShao1999].
-
-!!! note
-    Any default value of `prob` is arbitrary. The default value of
-    `prob=$(DEFAULT_INTERVAL_PROB)` instead of a more common default like `prob=0.95` is
-    chosen to reminder the user of this arbitrariness.
-
-[^Hyndman1996]: Rob J. Hyndman (1996) Computing and Graphing Highest Density Regions,
-                Amer. Stat., 50(2): 120-6.
-                DOI: [10.1080/00031305.1996.10474359](https://doi.org/10.1080/00031305.1996.10474359)
-                [jstor](https://doi.org/10.2307/2684423).
-[^ChenShao1999]: Ming-Hui Chen & Qi-Man Shao (1999)
-                 Monte Carlo Estimation of Bayesian Credible and HPD Intervals,
-                 J Comput. Graph. Stat., 8:1, 69-92.
-                 DOI: [10.1080/10618600.1999.10474802](https://doi.org/10.1080/00031305.1996.10474359)
-                 [jstor](https://doi.org/10.2307/1390921).
-
-# Examples
-
-Here we calculate the 83% HDI for a normal random variable:
-
-```jldoctest hdi; setup = :(using Random; Random.seed!(78))
-julia> using ArviZ
-
-julia> x = randn(2_000);
-
-julia> hdi(x; prob=0.83) |> pairs
-pairs(::NamedTuple) with 2 entries:
-  :lower => -1.38266
-  :upper => 1.25982
-```
-
-We can also calculate the HDI for a 3-dimensional array of samples:
-
-```jldoctest hdi; setup = :(using Random; Random.seed!(67))
-julia> x = randn(1_000, 1, 1) .+ reshape(0:5:10, 1, 1, :);
-
-julia> hdi(x) |> pairs
-pairs(::NamedTuple) with 2 entries:
-  :lower => [-1.9674, 3.0326, 8.0326]
-  :upper => [1.90028, 6.90028, 11.9003]
-```
-"""
-function hdi(x::AbstractArray{<:Real}; kwargs...)
-    xcopy = similar(x)
-    copyto!(xcopy, x)
-    return hdi!(xcopy; kwargs...)
-end
-
-"""
-    hdi!(samples::AbstractArray{<:Real}; prob=$(DEFAULT_INTERVAL_PROB)) -> (; lower, upper)
-
-A version of [hdi](@ref) that sorts `samples` in-place while computing the HDI.
-"""
-function hdi!(x::AbstractArray{<:Real}; prob::Real=DEFAULT_INTERVAL_PROB)
-    0 < prob < 1 || throw(DomainError(prob, "HDI `prob` must be in the range `(0, 1)`.]"))
-    return _hdi!(x, prob)
-end
-
-function _hdi!(x::AbstractVector{<:Real}, prob::Real)
-    isempty(x) && throw(ArgumentError("HDI cannot be computed for an empty array."))
-    n = length(x)
-    interval_length = floor(Int, prob * n) + 1
-    if any(isnan, x) || interval_length == n
-        lower, upper = extrema(x)
-    else
-        npoints_to_check = n - interval_length + 1
-        sort!(x)
-        lower_range = @views x[begin:(begin - 1 + npoints_to_check)]
-        upper_range = @views x[(begin - 1 + interval_length):end]
-        lower, upper = argmax(Base.splat(-), zip(lower_range, upper_range))
-    end
-    return (; lower, upper)
-end
-_hdi!(x::AbstractMatrix{<:Real}, prob::Real) = _hdi!(vec(x), prob)
-function _hdi!(x::AbstractArray{<:Real}, prob::Real)
-    ndims(x) > 0 ||
-        throw(ArgumentError("HDI cannot be computed for a 0-dimensional array."))
-    axes_out = _param_axes(x)
-    lower = similar(x, axes_out)
-    upper = similar(x, axes_out)
-    for (i, x_slice) in zip(eachindex(lower), _eachparam(x))
-        lower[i], upper[i] = _hdi!(x_slice, prob)
-    end
-    return (; lower, upper)
-end
-
-"""
-    hdi(data::InferenceData; prob=$DEFAULT_INTERVAL_PROB) -> Dataset
-    hdi(data::Dataset; prob=$DEFAULT_INTERVAL_PROB) -> Dataset
-
-Calculate the highest density interval (HDI) for each parameter in the data.
-
-# Examples
-
-Calculate HDI for all parameters in the `posterior` group of an `InferenceData`:
-
-```jldoctest hdi_infdata
-julia> using ArviZ, ArviZExampleData
-
-julia> idata = load_example_data("centered_eight");
-
-julia> hdi(idata)
-Dataset with dimensions:
-  Dim{:school} Categorical{String} String[Choate, Deerfield, …, St. Paul's, Mt. Hermon] Unordered,
-  Dim{:hdi_bound} Categorical{Symbol} Symbol[:lower, :upper] ForwardOrdered
-and 3 layers:
-  :mu    Float64 dims: Dim{:hdi_bound} (2)
-  :theta Float64 dims: Dim{:school}, Dim{:hdi_bound} (8×2)
-  :tau   Float64 dims: Dim{:hdi_bound} (2)
-```
-
-We can also calculate the HDI for a subset of variables:
-
-```jldoctest hdi_infdata
-julia> hdi(idata.posterior[(:theta,)]).theta
-8×2 DimArray{Float64,2} theta with dimensions:
-  Dim{:school} Categorical{String} String[Choate, Deerfield, …, St. Paul's, Mt. Hermon] Unordered,
-  Dim{:hdi_bound} Categorical{Symbol} Symbol[:lower, :upper] ForwardOrdered
-                        :lower    :upper
-  "Choate"            -4.56375  17.1324
-  "Deerfield"         -4.31055  14.2535
-  "Phillips Andover"  -7.76922  13.6755
-  "Phillips Exeter"   -4.48955  14.6635
-  "Hotchkiss"         -6.46991  11.7191
-  "Lawrenceville"     -7.04111  12.2087
-  "St. Paul's"        -3.09262  16.2685
-  "Mt. Hermon"        -5.85834  16.0143
-```
-"""
-hdi(data::InferenceObjects.InferenceData; kwargs...) = hdi(data.posterior; kwargs...)
-function hdi(data::InferenceObjects.Dataset; kwargs...)
-    results = map(DimensionalData.data(data), DimensionalData.layerdims(data)) do var, dims
-        x = _draw_chains_params_array(DimensionalData.DimArray(var, dims))
-        r = hdi(x; kwargs...)
-        lower, upper = map(Base.Fix2(_as_dimarray, x), r)
-        return cat(lower, upper; dims=HDI_BOUND_DIM)
-    end
-    dims = Dimensions.combinedims(
-        Dimensions.otherdims(data, InferenceObjects.DEFAULT_SAMPLE_DIMS), HDI_BOUND_DIM
-    )
-    return DimensionalData.rebuild(
-        data;
-        data=map(parent, results),
-        dims,
-        layerdims=map(Dimensions.dims, results),
-        refdims=(),
-        metadata=DimensionalData.NoMetadata(),
-    )
-end
diff --git a/src/ArviZStats/loo.jl b/src/ArviZStats/loo.jl
deleted file mode 100644
index 0a728d82..00000000
--- a/src/ArviZStats/loo.jl
+++ /dev/null
@@ -1,176 +0,0 @@
-"""
-$(SIGNATURES)
-
-Results of Pareto-smoothed importance sampling leave-one-out cross-validation (PSIS-LOO).
-
-See also: [`loo`](@ref), [`AbstractELPDResult`](@ref)
-
-$(FIELDS)
-"""
-struct PSISLOOResult{E,P,R<:PSIS.PSISResult} <: AbstractELPDResult
-    "Estimates of the expected log pointwise predictive density (ELPD) and effective number of parameters (p)"
-    estimates::E
-    "Pointwise estimates"
-    pointwise::P
-    "Pareto-smoothed importance sampling (PSIS) results"
-    psis_result::R
-end
-
-function elpd_estimates(r::PSISLOOResult; pointwise::Bool=false)
-    return pointwise ? r.pointwise : r.estimates
-end
-
-function Base.show(io::IO, mime::MIME"text/plain", result::PSISLOOResult; kwargs...)
-    _show_elpd_estimates(io, mime, result; title="PSISLOOResult with estimates", kwargs...)
-    println(io)
-    println(io)
-    print(io, "and ")
-    show(io, mime, result.psis_result)
-    return nothing
-end
-
-"""
-    loo(log_likelihood; reff=nothing, kwargs...) -> PSISLOOResult{<:NamedTuple,<:NamedTuple}
-
-Compute the Pareto-smoothed importance sampling leave-one-out cross-validation (PSIS-LOO).
-[^Vehtari2017][^LOOFAQ]
-
-`log_likelihood` must be an array of log-likelihood values with shape
-`(chains, draws[, params...])`.
-
-# Keywords
-
-  - `reff::Union{Real,AbstractArray{<:Real}}`: The relative effective sample size(s) of the
-    _likelihood_ values. If an array, it must have the same data dimensions as the
-    corresponding log-likelihood variable. If not provided, then this is estimated using
-    [`ess`](@ref).
-  - `kwargs`: Remaining keywords are forwarded to [`psis`](@ref).
-
-See also: [`PSISLOOResult`](@ref), [`waic`](@ref)
-
-[^Vehtari2017]: Vehtari, A., Gelman, A. & Gabry, J.
-    Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC.
-    Stat Comput 27, 1413–1432 (2017).
-    doi: [10.1007/s11222-016-9696-4](https://doi.org/10.1007/s11222-016-9696-4)
-    arXiv: [1507.04544](https://arxiv.org/abs/1507.04544)
-[^LOOFAQ]: Aki Vehtari. Cross-validation FAQ. https://mc-stan.org/loo/articles/online-only/faq.html
-
-# Examples
-
-Manually compute ``R_\\mathrm{eff}`` and calculate PSIS-LOO of a model:
-
-```jldoctest
-julia> using ArviZ, ArviZExampleData
-
-julia> idata = load_example_data("centered_eight");
-
-julia> log_like = PermutedDimsArray(idata.log_likelihood.obs, (:draw, :chain, :school));
-
-julia> reff = ess(log_like; kind=:basic, split_chains=1, relative=true);
-
-julia> loo(log_like; reff)
-PSISLOOResult with estimates
- elpd  elpd_mcse    p  p_mcse
-  -31        1.4  0.9    0.34
-
-and PSISResult with 500 draws, 4 chains, and 8 parameters
-Pareto shape (k) diagnostic values:
-                    Count      Min. ESS
- (-Inf, 0.5]  good  7 (87.5%)  151
-  (0.5, 0.7]  okay  1 (12.5%)  446
-```
-"""
-loo(ll::AbstractArray; kwargs...) = _loo(ll; kwargs...)
-
-"""
-    loo(data::Dataset; [var_name::Symbol,] kwargs...) -> PSISLOOResult{<:NamedTuple,<:Dataset}
-    loo(data::InferenceData; [var_name::Symbol,] kwargs...) -> PSISLOOResult{<:NamedTuple,<:Dataset}
-
-Compute PSIS-LOO from log-likelihood values in `data`.
-
-If more than one log-likelihood variable is present, then `var_name` must be provided.
-
-# Examples
-
-Calculate PSIS-LOO of a model:
-
-```jldoctest
-julia> using ArviZ, ArviZExampleData
-
-julia> idata = load_example_data("centered_eight");
-
-julia> loo(idata)
-PSISLOOResult with estimates
- elpd  elpd_mcse    p  p_mcse
-  -31        1.4  0.9    0.34
-
-and PSISResult with 500 draws, 4 chains, and 8 parameters
-Pareto shape (k) diagnostic values:
-                    Count      Min. ESS
- (-Inf, 0.5]  good  6 (75.0%)  135
-  (0.5, 0.7]  okay  2 (25.0%)  421
-```
-"""
-function loo(
-    data::Union{InferenceObjects.InferenceData,InferenceObjects.Dataset};
-    var_name::Union{Symbol,Nothing}=nothing,
-    kwargs...,
-)
-    log_like = _draw_chains_params_array(log_likelihood(data, var_name))
-    result = loo(log_like; kwargs...)
-    pointwise = Dimensions.rebuild(
-        ArviZ.convert_to_dataset(result.pointwise; default_dims=());
-        metadata=DimensionalData.NoMetadata(),
-    )
-    return PSISLOOResult(result.estimates, pointwise, result.psis_result)
-end
-
-function _psis_loo_setup(log_like, _reff; kwargs...)
-    if _reff === nothing
-        # normalize log likelihoods to improve numerical stability of ESS estimate
-        like = LogExpFunctions.softmax(log_like; dims=(1, 2))
-        reff = MCMCDiagnosticTools.ess(like; kind=:basic, split_chains=1, relative=true)
-    else
-        reff = _reff
-    end
-    # smooth importance weights
-    psis_result = PSIS.psis(-log_like, reff; kwargs...)
-    return psis_result
-end
-
-function _loo(log_like; reff=nothing, kwargs...)
-    _check_log_likelihood(log_like)
-    psis_result = _psis_loo_setup(log_like, reff; kwargs...)
-    return _loo(log_like, psis_result)
-end
-function _loo(log_like, psis_result, dims=(1, 2))
-    # compute pointwise estimates
-    lpd_i = _maybe_scalar(_lpd_pointwise(log_like, dims))
-    elpd_i, elpd_se_i = map(
-        _maybe_scalar, _elpd_loo_pointwise_and_se(psis_result, log_like, dims)
-    )
-    p_i = lpd_i - elpd_i
-    pointwise = (;
-        elpd=elpd_i,
-        elpd_mcse=elpd_se_i,
-        p=p_i,
-        reff=psis_result.reff,
-        pareto_shape=psis_result.pareto_shape,
-    )
-
-    # combine estimates
-    estimates = _elpd_estimates_from_pointwise(pointwise)
-
-    return PSISLOOResult(estimates, pointwise, psis_result)
-end
-
-function _elpd_loo_pointwise_and_se(psis_result::PSIS.PSISResult, log_likelihood, dims)
-    log_norm = LogExpFunctions.logsumexp(psis_result.log_weights; dims)
-    log_weights = psis_result.log_weights .- log_norm
-    elpd_i = _log_mean(log_likelihood, log_weights; dims)
-    elpd_i_se = _se_log_mean(log_likelihood, log_weights; dims, log_mean=elpd_i)
-    return (
-        elpd=_maybe_scalar(dropdims(elpd_i; dims)),
-        elpd_se=_maybe_scalar(dropdims(elpd_i_se; dims) ./ sqrt.(psis_result.reff)),
-    )
-end
diff --git a/src/ArviZStats/loo_pit.jl b/src/ArviZStats/loo_pit.jl
deleted file mode 100644
index a6b0bcaf..00000000
--- a/src/ArviZStats/loo_pit.jl
+++ /dev/null
@@ -1,270 +0,0 @@
-"""
-    loo_pit(y, y_pred, log_weights; kwargs...) -> Union{Real,AbstractArray}
-
-Compute leave-one-out probability integral transform (LOO-PIT) checks.
-
-# Arguments
-
-  - `y`: array of observations with shape `(params...,)`
-  - `y_pred`: array of posterior predictive samples with shape `(draws, chains, params...)`.
-  - `log_weights`: array of normalized log LOO importance weights with shape
-    `(draws, chains, params...)`.
-
-# Keywords
-
-  - `is_discrete`: If not provided, then it is set to `true` iff elements of `y` and `y_pred`
-    are all integer-valued. If `true`, then data are smoothed using [`smooth_data`](@ref) to
-    make them non-discrete before estimating LOO-PIT values.
-  - `kwargs`: Remaining keywords are forwarded to `smooth_data` if data is discrete.
-
-# Returns
-
-  - `pitvals`: LOO-PIT values with same size as `y`. If `y` is a scalar, then `pitvals` is a
-    scalar.
-
-LOO-PIT is a marginal posterior predictive check. If ``y_{-i}`` is the array ``y`` of
-observations with the ``i``th observation left out, and ``y_i^*`` is a posterior prediction
-of the ``i``th observation, then the LOO-PIT value for the ``i``th observation is defined as
-
-```math
-P(y_i^* \\le y_i \\mid y_{-i}) = \\int_{-\\infty}^{y_i} p(y_i^* \\mid y_{-i}) \\mathrm{d} y_i^*
-```
-
-The LOO posterior predictions and the corresponding observations should have similar
-distributions, so if conditional predictive distributions are well-calibrated, then all
-LOO-PIT values should be approximately uniformly distributed on ``[0, 1]``.[^Gabry2019]
-
-[^Gabry2019]: Gabry, J., Simpson, D., Vehtari, A., Betancourt, M. & Gelman, A.
-    Visualization in Bayesian Workflow.
-    J. R. Stat. Soc. Ser. A Stat. Soc. 182, 389–402 (2019).
-    doi: [10.1111/rssa.12378](https://doi.org/10.1111/rssa.12378)
-    arXiv: [1709.01449](https://arxiv.org/abs/1709.01449)
-
-# Examples
-
-Calculate LOO-PIT values using as test quantity the observed values themselves.
-
-```jldoctest loo_pit1
-julia> using ArviZ, ArviZExampleData
-
-julia> idata = load_example_data("centered_eight");
-
-julia> log_weights = loo(idata; var_name=:obs).psis_result.log_weights;
-
-julia> loo_pit(
-            idata.observed_data.obs,
-            permutedims(idata.posterior_predictive.obs, (:draw, :chain, :school)),
-            log_weights,
-        )
-8-element DimArray{Float64,1} with dimensions:
-  Dim{:school} Categorical{String} String[Choate, Deerfield, …, St. Paul's, Mt. Hermon] Unordered
- "Choate"            0.943511
- "Deerfield"         0.63797
- "Phillips Andover"  0.316697
- "Phillips Exeter"   0.582252
- "Hotchkiss"         0.295321
- "Lawrenceville"     0.403318
- "St. Paul's"        0.902508
- "Mt. Hermon"        0.655275
-```
-
-Calculate LOO-PIT values using as test quantity the square of the difference between
-each observation and `mu`.
-
-```jldoctest loo_pit1
-julia> using DimensionalData, Statistics
-
-julia> T = idata.observed_data.obs .- only(median(idata.posterior.mu; dims=(:draw, :chain)));
-
-julia> T_pred = permutedims(
-           broadcast_dims(-, idata.posterior_predictive.obs, idata.posterior.mu),
-           (:draw, :chain, :school),
-       );
-
-julia> loo_pit(T .^ 2, T_pred .^ 2, log_weights)
-8-element DimArray{Float64,1} with dimensions:
-  Dim{:school} Categorical{String} String[Choate, Deerfield, …, St. Paul's, Mt. Hermon] Unordered
- "Choate"            0.873577
- "Deerfield"         0.243686
- "Phillips Andover"  0.357563
- "Phillips Exeter"   0.149908
- "Hotchkiss"         0.435094
- "Lawrenceville"     0.220627
- "St. Paul's"        0.775086
- "Mt. Hermon"        0.296706
-```
-"""
-function loo_pit(
-    y::Union{AbstractArray,Number},
-    y_pred::AbstractArray,
-    log_weights::AbstractArray;
-    is_discrete::Union{Bool,Nothing}=nothing,
-    kwargs...,
-)
-    sample_dims = (1, 2)
-    size(y) == size(y_pred)[3:end] ||
-        throw(ArgumentError("data dimensions of `y` and `y_pred` must have the size"))
-    size(log_weights) == size(y_pred) ||
-        throw(ArgumentError("`log_weights` and `y_pred` must have same size"))
-    _is_discrete = if is_discrete === nothing
-        all(isinteger, y) && all(isinteger, y_pred)
-    else
-        is_discrete
-    end
-    if _is_discrete
-        is_discrete === nothing &&
-            @warn "All data and predictions are integer-valued. Smoothing data before running `loo_pit`."
-        y_smooth = smooth_data(y; kwargs...)
-        y_pred_smooth = smooth_data(y_pred; dims=_otherdims(y_pred, sample_dims), kwargs...)
-        return _loo_pit(y_smooth, y_pred_smooth, log_weights)
-    else
-        return _loo_pit(y, y_pred, log_weights)
-    end
-end
-
-"""
-    loo_pit(idata::InferenceData, log_weights; kwargs...) -> DimArray
-
-Compute LOO-PIT values using existing normalized log LOO importance weights.
-
-# Keywords
-
-  - `y_name`: Name of observed data variable in `idata.observed_data`. If not provided, then
-    the only observed data variable is used.
-  - `y_pred_name`: Name of posterior predictive variable in `idata.posterior_predictive`.
-    If not provided, then `y_name` is used.
-  - `kwargs`: Remaining keywords are forwarded to [`loo_pit`](@ref).
-
-# Examples
-
-Calculate LOO-PIT values using already computed log weights.
-
-```jldoctest
-julia> using ArviZ, ArviZExampleData
-
-julia> idata = load_example_data("centered_eight");
-
-julia> loo_result = loo(idata; var_name=:obs);
-
-julia> loo_pit(idata, loo_result.psis_result.log_weights; y_name=:obs)
-8-element DimArray{Float64,1} loo_pit_obs with dimensions:
-  Dim{:school} Categorical{String} String[Choate, Deerfield, …, St. Paul's, Mt. Hermon] Unordered
- "Choate"            0.943511
- "Deerfield"         0.63797
- "Phillips Andover"  0.316697
- "Phillips Exeter"   0.582252
- "Hotchkiss"         0.295321
- "Lawrenceville"     0.403318
- "St. Paul's"        0.902508
- "Mt. Hermon"        0.655275
-```
-"""
-function loo_pit(
-    idata::InferenceObjects.InferenceData,
-    log_weights::AbstractArray;
-    y_name::Union{Symbol,Nothing}=nothing,
-    y_pred_name::Union{Symbol,Nothing}=nothing,
-    kwargs...,
-)
-    (_y_name, y), (_, _y_pred) = observations_and_predictions(idata, y_name, y_pred_name)
-    y_pred = _draw_chains_params_array(_y_pred)
-    pitvals = loo_pit(y, y_pred, log_weights; kwargs...)
-    return DimensionalData.rebuild(pitvals; name=Symbol("loo_pit_$(_y_name)"))
-end
-
-"""
-    loo_pit(idata::InferenceData; kwargs...) -> DimArray
-
-Compute LOO-PIT from groups in `idata` using PSIS-LOO.
-
-See also: [`loo`](@ref), [`psis`](@ref)
-
-# Keywords
-
-  - `y_name`: Name of observed data variable in `idata.observed_data`. If not provided, then
-    the only observed data variable is used.
-  - `y_pred_name`: Name of posterior predictive variable in `idata.posterior_predictive`.
-    If not provided, then `y_name` is used.
-  - `log_likelihood_name`: Name of log-likelihood variable in `idata.log_likelihood`.
-    If not provided, then `y_name` is used if `idata` has a `log_likelihood` group,
-    otherwise the only variable is used.
-  - `reff::Union{Real,AbstractArray{<:Real}}`: The relative effective sample size(s) of the
-    _likelihood_ values. If an array, it must have the same data dimensions as the
-    corresponding log-likelihood variable. If not provided, then this is estimated using
-    [`ess`](@ref).
-  - `kwargs`: Remaining keywords are forwarded to [`loo_pit`](@ref).
-
-# Examples
-
-Calculate LOO-PIT values using as test quantity the observed values themselves.
-
-```jldoctest
-julia> using ArviZ, ArviZExampleData
-
-julia> idata = load_example_data("centered_eight");
-
-julia> loo_pit(idata; y_name=:obs)
-8-element DimArray{Float64,1} loo_pit_obs with dimensions:
-  Dim{:school} Categorical{String} String[Choate, Deerfield, …, St. Paul's, Mt. Hermon] Unordered
- "Choate"            0.943511
- "Deerfield"         0.63797
- "Phillips Andover"  0.316697
- "Phillips Exeter"   0.582252
- "Hotchkiss"         0.295321
- "Lawrenceville"     0.403318
- "St. Paul's"        0.902508
- "Mt. Hermon"        0.655275
-```
-"""
-function loo_pit(
-    idata::InferenceObjects.InferenceData;
-    y_name::Union{Symbol,Nothing}=nothing,
-    y_pred_name::Union{Symbol,Nothing}=nothing,
-    log_likelihood_name::Union{Symbol,Nothing}=nothing,
-    reff=nothing,
-    kwargs...,
-)
-    (_y_name, y), (_, _y_pred) = observations_and_predictions(idata, y_name, y_pred_name)
-    y_pred = _draw_chains_params_array(_y_pred)
-    if log_likelihood_name === nothing
-        if haskey(idata, :log_likelihood)
-            _log_like = log_likelihood(idata.log_likelihood, _y_name)
-        elseif haskey(idata, :sample_stats) && haskey(idata.sample_stats, :log_likelihood)
-            _log_like = idata.sample_stats.log_likelihood
-        else
-            throw(ArgumentError("There must be a `log_likelihood` group in `idata`"))
-        end
-    else
-        _log_like = log_likelihood(idata.log_likelihood, log_likelihood_name)
-    end
-    log_like = _draw_chains_params_array(_log_like)
-    psis_result = _psis_loo_setup(log_like, reff)
-    pitvals = loo_pit(y, y_pred, psis_result.log_weights; kwargs...)
-    return DimensionalData.rebuild(pitvals; name=Symbol("loo_pit_$(_y_name)"))
-end
-
-function _loo_pit(y::Number, y_pred, log_weights)
-    return @views exp.(LogExpFunctions.logsumexp(log_weights[y_pred .≤ y]))
-end
-function _loo_pit(y::AbstractArray, y_pred, log_weights)
-    sample_dims = (1, 2)
-    T = typeof(exp(zero(float(eltype(log_weights)))))
-    pitvals = similar(y, T)
-    param_dims = _otherdims(log_weights, sample_dims)
-    # work around for `eachslices` not supporting multiple dims in older Julia versions
-    map!(
-        pitvals,
-        y,
-        CartesianIndices(map(Base.Fix1(axes, y_pred), param_dims)),
-        CartesianIndices(map(Base.Fix1(axes, log_weights), param_dims)),
-    ) do yi, i1, i2
-        yi_pred = @views y_pred[:, :, i1]
-        lwi = @views log_weights[:, :, i2]
-        init = T(-Inf)
-        sel_iter = Iterators.flatten((
-            init, (lwi_j for (lwi_j, yi_pred_j) in zip(lwi, yi_pred) if yi_pred_j ≤ yi)
-        ))
-        return clamp(exp(LogExpFunctions.logsumexp(sel_iter)), 0, 1)
-    end
-    return pitvals
-end
diff --git a/src/ArviZStats/model_weights.jl b/src/ArviZStats/model_weights.jl
deleted file mode 100644
index 83302a43..00000000
--- a/src/ArviZStats/model_weights.jl
+++ /dev/null
@@ -1,303 +0,0 @@
-const DEFAULT_STACKING_OPTIMIZER = Optim.LBFGS()
-
-"""
-$(TYPEDEF)
-
-An abstract type representing methods for computing model weights.
-
-Subtypes implement [`model_weights`](@ref)`(method, elpd_results)`.
-"""
-abstract type AbstractModelWeightsMethod end
-
-"""
-    model_weights(elpd_results; method=Stacking())
-    model_weights(method::AbstractModelWeightsMethod, elpd_results)
-
-Compute weights for each model in `elpd_results` using `method`.
-
-`elpd_results` is a `Tuple`, `NamedTuple`, or `AbstractVector` with
-[`AbstractELPDResult`](@ref) entries. The weights are returned in the same type of
-collection.
-
-[`Stacking`](@ref) is the recommended approach, as it performs well even when the true data
-generating process is not included among the candidate models. See [^YaoVehtari2018] for
-details.
-
-See also: [`AbstractModelWeightsMethod`](@ref), [`compare`](@ref)
-
-[^YaoVehtari2018]: Yuling Yao, Aki Vehtari, Daniel Simpson, and Andrew Gelman.
-    Using Stacking to Average Bayesian Predictive Distributions.
-    2018. Bayesian Analysis. 13, 3, 917–1007.
-    doi: [10.1214/17-BA1091](https://doi.org/10.1214/17-BA1091)
-    arXiv: [1704.02030](https://arxiv.org/abs/1704.02030)
-
-# Examples
-
-Compute [`Stacking`](@ref) weights for two models:
-
-```jldoctest model_weights; filter = [r"└.*"]
-julia> using ArviZ, ArviZExampleData
-
-julia> models = (
-           centered=load_example_data("centered_eight"),
-           non_centered=load_example_data("non_centered_eight"),
-       );
-
-julia> elpd_results = map(loo, models);
-┌ Warning: 1 parameters had Pareto shape values 0.7 < k ≤ 1. Resulting importance sampling estimates are likely to be unstable.
-└ @ PSIS ~/.julia/packages/PSIS/...
-
-julia> model_weights(elpd_results; method=Stacking()) |> pairs
-pairs(::NamedTuple) with 2 entries:
-  :centered     => 5.34175e-19
-  :non_centered => 1.0
-```
-
-Now we compute [`BootstrappedPseudoBMA`](@ref) weights for the same models:
-
-```jldoctest model_weights; setup = :(using Random; Random.seed!(94))
-julia> model_weights(elpd_results; method=BootstrappedPseudoBMA()) |> pairs
-pairs(::NamedTuple) with 2 entries:
-  :centered     => 0.483723
-  :non_centered => 0.516277
-```
-"""
-function model_weights(elpd_results; method::AbstractModelWeightsMethod=Stacking())
-    return model_weights(method, elpd_results)
-end
-
-# Akaike-type weights are defined as exp(-AIC/2), normalized to 1, which on the log-score
-# IC scale is equivalent to softmax
-akaike_weights!(w, elpds) = LogExpFunctions.softmax!(w, elpds)
-_akaike_weights(elpds) = _softmax(elpds)
-
-"""
-$(TYPEDEF)
-
-Model weighting method using pseudo Bayesian Model Averaging (pseudo-BMA) and Akaike-type
-weighting.
-
-    PseudoBMA(; regularize=false)
-    PseudoBMA(regularize)
-
-Construct the method with optional regularization of the weights using the standard error of
-the ELPD estimate.
-
-!!! note
-
-    This approach is not recommended, as it produces unstable weight estimates. It is
-    recommended to instead use [`BootstrappedPseudoBMA`](@ref) to stabilize the weights
-    or [`Stacking`](@ref). For details, see [^YaoVehtari2018].
-
-[^YaoVehtari2018]: Yuling Yao, Aki Vehtari, Daniel Simpson, and Andrew Gelman.
-    Using Stacking to Average Bayesian Predictive Distributions.
-    2018. Bayesian Analysis. 13, 3, 917–1007.
-    doi: [10.1214/17-BA1091](https://doi.org/10.1214/17-BA1091)
-    arXiv: [1704.02030](https://arxiv.org/abs/1704.02030)
-
-See also: [`Stacking`](@ref)
-"""
-struct PseudoBMA <: AbstractModelWeightsMethod
-    regularize::Bool
-end
-PseudoBMA(; regularize::Bool=false) = PseudoBMA(regularize)
-
-function model_weights(method::PseudoBMA, elpd_results)
-    elpds = map(elpd_results) do result
-        est = elpd_estimates(result)
-        method.regularize || return est.elpd
-        return est.elpd - est.elpd_mcse / 2
-    end
-    return _akaike_weights(elpds)
-end
-
-"""
-$(TYPEDEF)
-
-Model weighting method using pseudo Bayesian Model Averaging using Akaike-type weighting
-with the Bayesian bootstrap (pseudo-BMA+)[^YaoVehtari2018].
-
-The Bayesian bootstrap stabilizes the model weights.
-
-    BootstrappedPseudoBMA(; rng=Random.default_rng(), samples=1_000, alpha=1)
-    BootstrappedPseudoBMA(rng, samples, alpha)
-
-Construct the method.
-
-$(TYPEDFIELDS)
-
-See also: [`Stacking`](@ref)
-
-[^YaoVehtari2018]: Yuling Yao, Aki Vehtari, Daniel Simpson, and Andrew Gelman.
-    Using Stacking to Average Bayesian Predictive Distributions.
-    2018. Bayesian Analysis. 13, 3, 917–1007.
-    doi: [10.1214/17-BA1091](https://doi.org/10.1214/17-BA1091)
-    arXiv: [1704.02030](https://arxiv.org/abs/1704.02030)
-"""
-struct BootstrappedPseudoBMA{R<:Random.AbstractRNG,T<:Real} <: AbstractModelWeightsMethod
-    "The random number generator to use for the Bayesian bootstrap"
-    rng::R
-    "The number of samples to draw for bootstrapping"
-    samples::Int
-    """The shape parameter in the Dirichlet distribution used for the Bayesian bootstrap.
-    The default (1) corresponds to a uniform distribution on the simplex."""
-    alpha::T
-end
-function BootstrappedPseudoBMA(;
-    rng::Random.AbstractRNG=Random.default_rng(), samples::Int=1_000, alpha::Real=1
-)
-    return BootstrappedPseudoBMA(rng, samples, alpha)
-end
-
-function model_weights(method::BootstrappedPseudoBMA, elpd_results)
-    _elpd = vec(elpd_estimates(first(values(elpd_results)); pointwise=true).elpd)
-    α = similar(_elpd)
-    n = length(α)
-    rng = method.rng
-    α_dist = Distributions.Dirichlet(n, method.alpha)
-    ic_mat = _elpd_matrix(elpd_results)
-    elpd_mean = similar(ic_mat, axes(ic_mat, 2))
-    weights_mean = zero(elpd_mean)
-    w = similar(weights_mean)
-    for _ in 1:(method.samples)
-        _model_weights_bootstrap!(w, elpd_mean, α, rng, α_dist, ic_mat)
-        weights_mean .+= w
-    end
-    weights_mean ./= method.samples
-    return _assimilar(elpd_results, weights_mean)
-end
-
-function _model_weights_bootstrap!(w, elpd_mean, α, rng, α_dist, ic_mat)
-    Random.rand!(rng, α_dist, α)
-    mul!(elpd_mean, ic_mat', α)
-    elpd_mean .*= length(α)
-    akaike_weights!(w, elpd_mean)
-    return w
-end
-
-"""
-$(TYPEDEF)
-
-Model weighting using stacking of predictive distributions[^YaoVehtari2018].
-
-    Stacking(; optimizer=Optim.LBFGS(), options=Optim.Options()
-    Stacking(optimizer[, options])
-
-Construct the method, optionally customizing the optimization.
-
-$(TYPEDFIELDS)
-
-See also: [`BootstrappedPseudoBMA`](@ref)
-
-[^YaoVehtari2018]: Yuling Yao, Aki Vehtari, Daniel Simpson, and Andrew Gelman.
-    Using Stacking to Average Bayesian Predictive Distributions.
-    2018. Bayesian Analysis. 13, 3, 917–1007.
-    doi: [10.1214/17-BA1091](https://doi.org/10.1214/17-BA1091)
-    arXiv: [1704.02030](https://arxiv.org/abs/1704.02030)
-"""
-struct Stacking{O<:Optim.AbstractOptimizer} <: AbstractModelWeightsMethod
-    """The optimizer to use for the optimization of the weights. The optimizer must support
-    projected gradient optimization viae a `manifold` field."""
-    optimizer::O
-    """The Optim options to use for the optimization of the weights."""
-    options::Optim.Options
-
-    function Stacking(
-        optimizer::Optim.AbstractOptimizer, options::Optim.Options=Optim.Options()
-    )
-        hasfield(typeof(optimizer), :manifold) ||
-            throw(ArgumentError("The optimizer must have a `manifold` field."))
-        _optimizer = Setfield.@set optimizer.manifold = Optim.Sphere()
-        return new{typeof(_optimizer)}(_optimizer, options)
-    end
-end
-function Stacking(;
-    optimizer::Optim.AbstractOptimizer=DEFAULT_STACKING_OPTIMIZER,
-    options::Optim.Options=Optim.Options(),
-)
-    return Stacking(optimizer, options)
-end
-
-function model_weights(method::Stacking, elpd_pairs)
-    ic_mat = _elpd_matrix(elpd_pairs)
-    exp_ic_mat = exp.(ic_mat)
-    _, weights = _model_weights_stacking(exp_ic_mat, method.optimizer, method.options)
-    return _assimilar(elpd_pairs, weights)
-end
-function _model_weights_stacking(exp_ic_mat, optimizer, options)
-    # set up optimization objective
-    objective = InplaceStackingOptimObjective(exp_ic_mat)
-
-    # set up initial point on optimization manifold
-    w0 = similar(exp_ic_mat, axes(exp_ic_mat, 2))
-    fill!(w0, 1//length(w0))
-    x0 = _initial_point(objective, w0)
-
-    # optimize
-    sol = Optim.optimize(Optim.only_fg!(objective), x0, optimizer, options)
-
-    # check convergence
-    Optim.converged(sol) ||
-        @warn "Optimization of stacking weights failed to converge after $(Optim.iterations(sol)) iterations."
-
-    # return solution and weights
-    w = _final_point(objective, sol.minimizer)
-    return sol, w
-end
-
-function _elpd_matrix(elpd_results)
-    elpd_values = map(elpd_results) do result
-        return vec(elpd_estimates(result; pointwise=true).elpd)
-    end
-    elpd_mat = reduce(hcat, elpd_values)
-    return elpd_mat
-end
-
-# Optimize on the probability simplex by converting the problem to optimization on the unit
-# sphere, optimizing with projected gradients, and mapping the solution back to the sphere.
-# When the objective function on the simplex is convex, each global minimizer on the sphere
-# maps to the global minimizer on the simplex, but the optimization manifold is simple, and
-# no inequality constraints exist.
-# Q Li, D McKenzie, W Yin. "From the simplex to the sphere: faster constrained optimization
-# using the Hadamard parametrization." Inf. Inference. 12.3 (2023): iaad017.
-# doi: 10.1093/imaiai/iaad017. arXiv: 2112.05273
-struct InplaceStackingOptimObjective{E,C}
-    exp_ic_mat::E
-    cache::C
-end
-function InplaceStackingOptimObjective(exp_ic_mat)
-    cache = (
-        similar(exp_ic_mat, axes(exp_ic_mat, 1)), similar(exp_ic_mat, axes(exp_ic_mat, 2))
-    )
-    return InplaceStackingOptimObjective(exp_ic_mat, cache)
-end
-function (obj::InplaceStackingOptimObjective)(F, G, x)
-    exp_ic_mat = obj.exp_ic_mat
-    cache, w = obj.cache
-    _sphere_to_simplex!(w, x)
-    mul!(cache, exp_ic_mat, w)
-    cache .= inv.(cache)
-    if G !== nothing
-        mul!(G, exp_ic_mat', cache)
-        G .*= -1
-        _∇sphere_to_simplex!(G, x)
-    end
-    if F !== nothing
-        return sum(log, cache)
-    end
-    return nothing
-end
-_initial_point(::InplaceStackingOptimObjective, w0) = _simplex_to_sphere(w0)
-_final_point(::InplaceStackingOptimObjective, x) = _sphere_to_simplex(x)
-
-# if ∑xᵢ² = 1, then if wᵢ = xᵢ², then w is on the probability simplex
-_sphere_to_simplex(x) = x .^ 2
-function _sphere_to_simplex!(w, x)
-    w .= x .^ 2
-    return w
-end
-_simplex_to_sphere(x) = sqrt.(x)
-function _∇sphere_to_simplex!(∂x, x)
-    ∂x .*= 2 .* x
-    return ∂x
-end
diff --git a/src/ArviZStats/r2_score.jl b/src/ArviZStats/r2_score.jl
deleted file mode 100644
index e175fb6c..00000000
--- a/src/ArviZStats/r2_score.jl
+++ /dev/null
@@ -1,99 +0,0 @@
-"""
-    r2_score(y_true::AbstractVector, y_pred::AbstractVecOrMat) -> (; r2, r2_std)
-
-``R²`` for linear Bayesian regression models.[^GelmanGoodrich2019]
-
-# Arguments
-
-  - `y_true`: Observed data of length `noutputs`
-  - `y_pred`: Predicted data with size `(ndraws[, nchains], noutputs)`
-
-[^GelmanGoodrich2019]: Andrew Gelman, Ben Goodrich, Jonah Gabry & Aki Vehtari (2019)
-    R-squared for Bayesian Regression Models, The American Statistician,
-    73:3, 307-9,
-    DOI: [10.1080/00031305.2018.1549100](https://doi.org/10.1080/00031305.2018.1549100).
-
-# Examples
-
-```jldoctest
-julia> using ArviZ, ArviZExampleData
-
-julia> idata = load_example_data("regression1d");
-
-julia> y_true = idata.observed_data.y;
-
-julia> y_pred = PermutedDimsArray(idata.posterior_predictive.y, (:draw, :chain, :y_dim_0));
-
-julia> r2_score(y_true, y_pred) |> pairs
-pairs(::NamedTuple) with 2 entries:
-  :r2     => 0.683197
-  :r2_std => 0.0368838
-```
-"""
-function r2_score(y_true, y_pred)
-    r_squared = r2_samples(y_true, y_pred)
-    return NamedTuple{(:r2, :r2_std)}(StatsBase.mean_and_std(r_squared; corrected=false))
-end
-
-"""
-    r2_score(idata::InferenceData; y_name, y_pred_name) -> (; r2, r2_std)
-
-Compute ``R²`` from `idata`, automatically formatting the predictions to the correct shape.
-
-# Keywords
-
-  - `y_name`: Name of observed data variable in `idata.observed_data`. If not provided, then
-    the only observed data variable is used.
-  - `y_pred_name`: Name of posterior predictive variable in `idata.posterior_predictive`.
-    If not provided, then `y_name` is used.
-
-# Examples
-
-```jldoctest
-julia> using ArviZ, ArviZExampleData
-
-julia> idata = load_example_data("regression10d");
-
-julia> r2_score(idata) |> pairs
-pairs(::NamedTuple) with 2 entries:
-  :r2     => 0.998385
-  :r2_std => 0.000100621
-```
-"""
-function r2_score(
-    idata::InferenceObjects.InferenceData;
-    y_name::Union{Symbol,Nothing}=nothing,
-    y_pred_name::Union{Symbol,Nothing}=nothing,
-)
-    (_, y), (_, y_pred) = observations_and_predictions(idata, y_name, y_pred_name)
-    return r2_score(y, _draw_chains_params_array(y_pred))
-end
-
-"""
-    r2_samples(y_true::AbstractVector, y_pred::AbstractMatrix) -> AbstractVector
-
-``R²`` samples for Bayesian regression models. Only valid for linear models.
-
-See also [`r2_score`](@ref).
-
-# Arguments
-
-  - `y_true`: Observed data of length `noutputs`
-  - `y_pred`: Predicted data with size `(ndraws[, nchains], noutputs)`
-"""
-function r2_samples(y_true::AbstractVector, y_pred::AbstractArray)
-    @assert ndims(y_pred) ∈ (2, 3)
-    corrected = false
-    dims = ndims(y_pred)
-
-    var_y_est = dropdims(Statistics.var(y_pred; corrected, dims); dims)
-    y_true_reshape = reshape(y_true, ntuple(one, ndims(y_pred) - 1)..., :)
-    var_residual = dropdims(Statistics.var(y_pred .- y_true_reshape; corrected, dims); dims)
-
-    # allocate storage for type-stability
-    T = typeof(first(var_y_est) / first(var_residual))
-    sample_axes = ntuple(Base.Fix1(axes, y_pred), ndims(y_pred) - 1)
-    r_squared = similar(y_pred, T, sample_axes)
-    r_squared .= var_y_est ./ (var_y_est .+ var_residual)
-    return r_squared
-end
diff --git a/src/ArviZStats/summarystats.jl b/src/ArviZStats/summarystats.jl
deleted file mode 100644
index 2377cd76..00000000
--- a/src/ArviZStats/summarystats.jl
+++ /dev/null
@@ -1,254 +0,0 @@
-const DEFAULT_METRIC_DIM = Dimensions.key2dim(:_metric)
-
-"""
-$(SIGNATURES)
-
-A container for a column table of values computed by [`summarystats`](@ref).
-
-This object implements the Tables and TableTraits interfaces and has a custom `show` method.
-
-$(FIELDS)
-"""
-struct SummaryStats{D<:NamedTuple}
-    """The summary statistics for each variable, with the first entry containing the
-    variable names"""
-    data::D
-end
-
-# forward key interfaces from its parent
-Base.parent(stats::SummaryStats) = getfield(stats, :data)
-Base.propertynames(stats::SummaryStats) = propertynames(parent(stats))
-Base.getproperty(stats::SummaryStats, nm::Symbol) = getproperty(parent(stats), nm)
-Base.keys(stats::SummaryStats) = keys(parent(stats))
-Base.haskey(stats::SummaryStats, nm::Symbol) = haskey(parent(stats), nm)
-Base.length(stats::SummaryStats) = length(parent(stats))
-Base.getindex(stats::SummaryStats, i::Int) = getindex(parent(stats), i)
-Base.getindex(stats::SummaryStats, nm::Symbol) = getindex(parent(stats), nm)
-function Base.iterate(stats::SummaryStats, i::Int=firstindex(parent(stats)))
-    return iterate(parent(stats), i)
-end
-
-#### custom tabular show methods
-
-function Base.show(io::IO, mime::MIME"text/plain", stats::SummaryStats; kwargs...)
-    return _show(io, mime, stats; kwargs...)
-end
-function Base.show(io::IO, mime::MIME"text/html", stats::SummaryStats; kwargs...)
-    return _show(io, mime, stats; kwargs...)
-end
-
-function _show(io::IO, mime::MIME, stats::SummaryStats; kwargs...)
-    data = NamedTuple{eachindex(stats)[2:end]}(parent(stats))
-    rhat_formatter = _prettytables_rhat_formatter(data)
-    extra_formatters = rhat_formatter === nothing ? () : (rhat_formatter,)
-    return _show_prettytable(
-        io,
-        mime,
-        data;
-        title="SummaryStats",
-        row_labels=stats.variable,
-        extra_formatters,
-        kwargs...,
-    )
-end
-
-#### Tables interface as column table
-
-Tables.istable(::Type{<:SummaryStats}) = true
-Tables.columnaccess(::Type{<:SummaryStats}) = true
-Tables.columns(s::SummaryStats) = s
-Tables.columnnames(s::SummaryStats) = Tables.columnnames(parent(s))
-Tables.getcolumn(s::SummaryStats, i::Int) = Tables.getcolumn(parent(s), i)
-Tables.getcolumn(s::SummaryStats, nm::Symbol) = Tables.getcolumn(parent(s), nm)
-Tables.rowaccess(::Type{<:SummaryStats}) = true
-Tables.rows(s::SummaryStats) = Tables.rows(parent(s))
-Tables.schema(s::SummaryStats) = Tables.schema(parent(s))
-
-IteratorInterfaceExtensions.isiterable(::SummaryStats) = true
-function IteratorInterfaceExtensions.getiterator(s::SummaryStats)
-    return Tables.datavaluerows(Tables.columntable(s))
-end
-
-TableTraits.isiterabletable(::SummaryStats) = true
-
-"""
-    summarystats(data::InferenceData; group=:posterior, kwargs...)
-    summarystats(data::Dataset; kwargs...)
-
-Compute summary statistics and diagnostics on the `data`.
-
-# Keywords
-
-  - `prob_interval::Real`: The value of the `prob` argument to [`hdi`](@ref) used to compute
-    the highest density interval. Defaults to $(DEFAULT_INTERVAL_PROB).
-  - `return_type::Type`: The type of object to return. Valid options are [`Dataset`](@ref)
-    and [`SummaryStats`](@ref). Defaults to `SummaryStats`.
-  - `metric_dim`: The dimension name or type to use for the computed metrics. Only used
-    if `return_type` is `Dataset`. Defaults to `$(sprint(show, "text/plain", DEFAULT_METRIC_DIM))`.
-  - `compact_labels::Bool`: Whether to use compact names for the variables. Only used if
-    `return_type` is `SummaryStats`. Defaults to `true`.
-  - `kind::Symbol`: Whether to compute just statistics (`:stats`), just diagnostics
-    (`:diagnostics`), or both (`:both`). Defaults to `:both`.
-
-# Examples
-
-Compute the summary statistics and diagnostics on posterior draws of the centered eight
-model:
-
-```jldoctest summarystats
-julia> using ArviZ, ArviZExampleData
-
-julia> idata = load_example_data("centered_eight");
-
-julia> summarystats(idata.posterior[(:mu, :tau)])
-SummaryStats
-      mean  std  hdi_3%  hdi_97%  mcse_mean  mcse_std  ess_tail  ess_bulk  rha ⋯
- mu    4.5  3.5  -1.62     10.7        0.23      0.11       659       241  1.0 ⋯
- tau   4.1  3.1   0.896     9.67       0.26      0.17        38        67  1.0 ⋯
-                                                                1 column omitted
-```
-
-Compute just the statistics on all variables:
-
-```jldoctest summarystats
-julia> summarystats(idata.posterior; kind=:stats)
-SummaryStats
-                          mean   std  hdi_3%  hdi_97%
- mu                       4.49  3.49  -1.62     10.7
- theta[Choate]            6.46  5.87  -4.56     17.1
- theta[Deerfield]         5.03  4.88  -4.31     14.3
- theta[Phillips Andover]  3.94  5.69  -7.77     13.7
- theta[Phillips Exeter]   4.87  5.01  -4.49     14.7
- theta[Hotchkiss]         3.67  4.96  -6.47     11.7
- theta[Lawrenceville]     3.97  5.19  -7.04     12.2
- theta[St. Paul's]        6.58  5.11  -3.09     16.3
- theta[Mt. Hermon]        4.77  5.74  -5.86     16.0
- tau                      4.12  3.10   0.896     9.67
-```
-
-Compute the statistics and diagnostics from the posterior group of an `InferenceData` and
-store in a `Dataset`:
-
-```jldoctest summarystats
-julia> summarystats(idata; return_type=Dataset)
-Dataset with dimensions:
-  Dim{:_metric} Categorical{String} String[mean, std, …, ess_bulk, rhat] Unordered,
-  Dim{:school} Categorical{String} String[Choate, Deerfield, …, St. Paul's, Mt. Hermon] Unordered
-and 3 layers:
-  :mu    Float64 dims: Dim{:_metric} (9)
-  :theta Float64 dims: Dim{:school}, Dim{:_metric} (8×9)
-  :tau   Float64 dims: Dim{:_metric} (9)
-```
-"""
-function StatsBase.summarystats(
-    data::InferenceObjects.InferenceData; group::Symbol=:posterior, kwargs...
-)
-    return summarystats(data[group]; kwargs...)
-end
-function StatsBase.summarystats(
-    data::InferenceObjects.Dataset; return_type::Type=SummaryStats, kwargs...
-)
-    return _summarize(return_type, data; kwargs...)
-end
-
-function _summarize(
-    ::Type{InferenceObjects.Dataset},
-    data::InferenceObjects.Dataset;
-    kind::Symbol=:both,
-    prob_interval::Real=DEFAULT_INTERVAL_PROB,
-    metric_dim=DEFAULT_METRIC_DIM,
-)
-    dims = Dimensions.dims(data, InferenceObjects.DEFAULT_SAMPLE_DIMS)
-    stats = [
-        "mean" => (data -> dropdims(Statistics.mean(data; dims); dims)),
-        "std" => (data -> dropdims(Statistics.std(data; dims); dims)),
-        _interval_prob_to_strings("hdi", prob_interval) =>
-            (data -> hdi(data; prob=prob_interval)),
-    ]
-    diagnostics = [
-        "mcse_mean" => (data -> MCMCDiagnosticTools.mcse(data; kind=Statistics.mean)),
-        "mcse_std" => (data -> MCMCDiagnosticTools.mcse(data; kind=Statistics.std)),
-        "ess_tail" => (data -> MCMCDiagnosticTools.ess(data; kind=:tail)),
-        ("ess_bulk", "rhat") => (data -> MCMCDiagnosticTools.ess_rhat(data)),
-    ]
-    metrics = if kind === :both
-        vcat(stats, diagnostics)
-    elseif kind === :stats
-        stats
-    elseif kind === :diagnostics
-        diagnostics
-    else
-        error("Invalid value for `kind`: $kind")
-    end
-    metric_vals = map(metrics) do (_, f)
-        f(data)
-    end
-    metric_names = collect(Iterators.flatten(map(_astuple ∘ first, metrics)))
-    cat_dim = Dimensions.rebuild(Dimensions.basedims(metric_dim), metric_names)
-    ds = cat(metric_vals...; dims=cat_dim)::InferenceObjects.Dataset
-    return DimensionalData.rebuild(ds; metadata=DimensionalData.NoMetadata(), refdims=dims)
-end
-function _summarize(
-    ::Type{SummaryStats},
-    data::InferenceObjects.Dataset;
-    compact_labels::Bool=true,
-    metric_dim=DEFAULT_METRIC_DIM,
-    kwargs...,
-)
-    ds = _summarize(InferenceObjects.Dataset, data; metric_dim, kwargs...)
-    table = _as_flat_table(ds, metric_dim; compact_labels)
-    return SummaryStats(table)
-end
-
-function _interval_prob_to_strings(interval_type, prob; digits=2)
-    α = (1 - prob) / 2
-    perc_lower = string(round(100 * α; digits))
-    perc_upper = string(round(100 * (1 - α); digits))
-    return map((perc_lower, perc_upper)) do s
-        s = replace(s, r"\.0+$" => "")
-        return "$(interval_type)_$s%"
-    end
-end
-
-function _as_flat_table(ds, dim; compact_labels::Bool=true)
-    row_table = Iterators.map(_indices_iterator(ds, dim)) do (var, indices)
-        var_select = isempty(indices) ? var : view(var, indices...)
-        return (
-            variable=_indices_to_name(var, indices, compact_labels),
-            _arr_to_namedtuple(var_select)...,
-        )
-    end
-    return Tables.columntable(row_table)
-end
-
-function _indices_iterator(ds::DimensionalData.AbstractDimStack, dims)
-    return Iterators.flatten(
-        Iterators.map(Base.Fix2(_indices_iterator, dims), DimensionalData.layers(ds))
-    )
-end
-function _indices_iterator(var::DimensionalData.AbstractDimArray, dims)
-    dims_flatten = Dimensions.otherdims(var, dims)
-    isempty(dims_flatten) && return ((var, ()),)
-    indices_iter = DimensionalData.DimKeys(dims_flatten)
-    return zip(Iterators.cycle((var,)), indices_iter)
-end
-
-function _arr_to_namedtuple(arr::DimensionalData.AbstractDimVector)
-    ks = Tuple(map(Symbol, DimensionalData.lookup(arr, 1)))
-    return NamedTuple{ks}(Tuple(arr))
-end
-
-function _indices_to_name(var, dims, compact)
-    name = DimensionalData.name(var)
-    isempty(dims) && return string(name)
-    elements = if compact
-        map(string ∘ Dimensions.val ∘ Dimensions.val, dims)
-    else
-        map(dims) do d
-            val = Dimensions.val(Dimensions.val(d))
-            val_str = sprint(show, "text/plain", val)
-            return "$(Dimensions.name(d))=At($val_str)"
-        end
-    end
-    return "$name[" * join(elements, ',') * "]"
-end
diff --git a/src/ArviZStats/utils.jl b/src/ArviZStats/utils.jl
deleted file mode 100644
index b04aed99..00000000
--- a/src/ArviZStats/utils.jl
+++ /dev/null
@@ -1,529 +0,0 @@
-"""
-    log_likelihood(data::InferenceData[, var_name]) -> DimArray
-    log_likelihood(data::Dataset[, var_name]) -> DimArray
-
-Get the log-likelihood array for the specified variable in `data`.
-
-`var_name` must be provided if the `log_likelihood` group has more than one variable.
-
-To support older InferenceData versions, if the `log_likelihood` group is not present, then
-the `sample_stats` group is checked for a `log_likelihood` variable or for `var_name` if
-provided
-"""
-function log_likelihood(
-    data::InferenceObjects.InferenceData, var_name::Union{Symbol,Nothing}=nothing
-)
-    if haskey(data, :log_likelihood)
-        return log_likelihood(data.log_likelihood, var_name)
-    elseif haskey(data, :sample_stats)
-        # for old InferenceData versions, log-likelihood was stored in sample_stats
-        _var_name = var_name === nothing ? :log_likelihood : var_name
-        return log_likelihood(data.sample_stats, _var_name)
-    else
-        throw(ArgumentError("Data must contain `log_likelihood` or `sample_stats` group"))
-    end
-end
-function log_likelihood(
-    log_like::InferenceObjects.Dataset, var_name::Union{Symbol,Nothing}=nothing
-)
-    if !(var_name === nothing)
-        haskey(log_like, var_name) ||
-            throw(ArgumentError("Variable `$(var_name)` not found in group"))
-        return log_like[var_name]
-    else
-        var_names = keys(log_like)
-        length(var_names) == 1 || throw(
-            ArgumentError(
-                "`var_name` must be specified if there are multiple variables in group"
-            ),
-        )
-        return log_like[first(var_names)]
-    end
-end
-
-function _check_log_likelihood(x)
-    if any(!isfinite, x)
-        @warn "All log likelihood values must be finite, but some are not."
-    end
-    return nothing
-end
-
-function _only_observed_data_key(idata::InferenceObjects.InferenceData; var_name=nothing)
-    haskey(idata, :observed_data) ||
-        throw(ArgumentError("Data must contain an `observed_data` group."))
-    ks = keys(idata.observed_data)
-    isempty(ks) && throw(ArgumentError("`observed_data` group must not be empty."))
-    if length(ks) > 1
-        msg = "More than one observed data variable: $(ks)."
-        if var_name !== nothing
-            msg = "$msg `$var_name` must be specified."
-        end
-        throw(ArgumentError(msg))
-    end
-    return first(ks)
-end
-
-# get name of group and group itself most likely to contain posterior predictive draws
-function _post_pred_or_post_name_group(idata)
-    haskey(idata, :posterior_predictive) &&
-        return :posterior_predictive => idata.posterior_predictive
-    haskey(idata, :posterior) && return :posterior => idata.posterior
-    throw(ArgumentError("No `posterior_predictive` or `posterior` group"))
-end
-
-"""
-    observations_and_predictions(data::InferenceData[, y_name[, y_pred_name]])
-
-Get arrays of observations and predictions for the specified variable in `data`.
-
-If `y_name` and/or `y_pred_name` is not provided, then they are inferred from the data.
-Generally this requires that either there is a single variable in `observed_data` or that
-there is only one variable in `posterior` or `posterior_predictive` that has a matching name
-in `observed_data`, optionally with the suffix `_pred`.
-
-The return value has the structure `(y_name => y, y_pred_name => y_pred)`, where `y_name`
-and `y_pred_name` are the actual names found.
-"""
-function observations_and_predictions end
-function observations_and_predictions(
-    idata::InferenceObjects.InferenceData, y_name::Union{Symbol,Nothing}=nothing
-)
-    return observations_and_predictions(idata, y_name, nothing)
-end
-function observations_and_predictions(
-    idata::InferenceObjects.InferenceData, y_name::Symbol, y_pred_name::Symbol
-)
-    haskey(idata, :observed_data) ||
-        throw(ArgumentError("Data must contain `observed_data` group"))
-    y = idata.observed_data[y_name]
-    _, post_pred = _post_pred_or_post_name_group(idata)
-    y_pred = post_pred[y_pred_name]
-    return (y_name => y, y_pred_name => y_pred)
-end
-function observations_and_predictions(
-    idata::InferenceObjects.InferenceData, ::Nothing, y_pred_name::Symbol
-)
-    y_name = _only_observed_data_key(idata; var_name=:y_name)
-    y = idata.observed_data[y_name]
-    _, post_pred = _post_pred_or_post_name_group(idata)
-    y_pred = post_pred[y_pred_name]
-    return (y_name => y, y_pred_name => y_pred)
-end
-function observations_and_predictions(
-    idata::InferenceObjects.InferenceData, y_name::Symbol, ::Nothing
-)
-    haskey(idata, :observed_data) ||
-        throw(ArgumentError("Data must contain `observed_data` group"))
-    observed_data = idata.observed_data
-    y = observed_data[y_name]
-    post_pred_name, post_pred = _post_pred_or_post_name_group(idata)
-    y_pred_names = (y_name, Symbol("$(y_name)_pred"))
-    for y_pred_name in y_pred_names
-        if haskey(post_pred, y_pred_name)
-            y_pred = post_pred[y_pred_name]
-            return (y_name => y, y_pred_name => y_pred)
-        end
-    end
-    throw(
-        ArgumentError(
-            "Could not find names $y_pred_names in group `$post_pred_name`. `y_pred_name` must be specified.",
-        ),
-    )
-end
-function observations_and_predictions(
-    idata::InferenceObjects.InferenceData, ::Nothing, ::Nothing
-)
-    haskey(idata, :observed_data) ||
-        throw(ArgumentError("Data must contain `observed_data` group"))
-    observed_data = idata.observed_data
-    obs_keys = keys(observed_data)
-    if length(obs_keys) == 1
-        y_name = first(obs_keys)
-        return observations_and_predictions(idata, y_name, nothing)
-    else
-        _, post_pred = _post_pred_or_post_name_group(idata)
-        var_name_pairs = filter(
-            !isnothing,
-            map(obs_keys) do k
-                for k_pred in (k, Symbol("$(k)_pred"))
-                    haskey(post_pred, k_pred) && return (k, k_pred)
-                end
-                return nothing
-            end,
-        )
-        if length(var_name_pairs) == 1
-            y_name, y_pred_name = first(var_name_pairs)
-            y = observed_data[y_name]
-            y_pred = post_pred[y_pred_name]
-            return (y_name => y, y_pred_name => y_pred)
-        else
-            throw(
-                ArgumentError(
-                    "No unique pair of variable names. `y_name` and/or `y_pred_name` must be specified.",
-                ),
-            )
-        end
-    end
-end
-
-"""
-    smooth_data(y; dims=:, interp_method=CubicSpline, offset_frac=0.01)
-
-Smooth `y` along `dims` using `interp_method`.
-
-`interp_method` is a 2-argument callabale that takes the arguments `y` and `x` and returns
-a DataInterpolations.jl interpolation method, defaulting to a cubic spline interpolator.
-
-`offset_frac` is the fraction of the length of `y` to use as an offset when interpolating.
-"""
-function smooth_data(
-    y;
-    dims::Union{Int,Tuple{Int,Vararg{Int}},Colon}=Colon(),
-    interp_method=DataInterpolations.CubicSpline,
-    offset_frac=1//100,
-)
-    T = float(eltype(y))
-    y_interp = similar(y, T)
-    n = dims isa Colon ? length(y) : prod(Base.Fix1(size, y), dims)
-    x = range(0, 1; length=n)
-    x_interp = range(0 + offset_frac, 1 - offset_frac; length=n)
-    _smooth_data!(y_interp, interp_method, y, x, x_interp, dims)
-    return y_interp
-end
-
-function _smooth_data!(y_interp, interp_method, y, x, x_interp, ::Colon)
-    interp = interp_method(vec(y), x)
-    interp(vec(y_interp), x_interp)
-    return y_interp
-end
-function _smooth_data!(y_interp, interp_method, y, x, x_interp, dims)
-    for (y_interp_i, y_i) in zip(
-        _eachslice(y_interp; dims=_otherdims(y_interp, dims)),
-        _eachslice(y; dims=_otherdims(y, dims)),
-    )
-        interp = interp_method(vec(y_i), x)
-        interp(vec(y_interp_i), x_interp)
-    end
-    return y_interp
-end
-
-Base.@pure _typename(::T) where {T} = T.name.name
-
-_astuple(x) = (x,)
-_astuple(x::Tuple) = x
-
-function _assimilar(x::AbstractArray, y)
-    z = similar(x, eltype(y))
-    z .= y
-    return z
-end
-_assimilar(x::AbstractArray, y::NamedTuple) = _assimilar(x, values(y))
-function _assimilar(x::Tuple, y)
-    z = NTuple{length(x),eltype(y)}(y)
-    return z
-end
-function _assimilar(x::NamedTuple, y)
-    z = NamedTuple{fieldnames(typeof(x))}(_assimilar(values(x), y))
-    return z
-end
-
-_sortperm(x; kwargs...) = sortperm(collect(x); kwargs...)
-
-_permute(x::AbstractVector, p::AbstractVector) = x[p]
-_permute(x::Tuple, p::AbstractVector) = x[p]
-function _permute(x::NamedTuple, p::AbstractVector)
-    return NamedTuple{_permute(keys(x), p)}(_permute(values(x), p))
-end
-
-# TODO: try to find a way to do this that works for arrays with named indices
-_indices(x) = keys(x)
-
-# eachslice-like iterator that accepts multiple dimensions and has a `size` even for older
-# Julia versions
-@static if VERSION ≥ v"1.9-"
-    _eachslice(x; dims) = eachslice(x; dims)
-else
-    function _eachslice(x; dims)
-        _dims = _astuple(dims)
-        alldims_perm = (_otherdims(x, _dims)..., _dims...)
-        dims_axes = map(Base.Fix1(axes, x), _dims)
-        other_dims = ntuple(_ -> Colon(), ndims(x) - length(_dims))
-        xperm = PermutedDimsArray(x, alldims_perm)
-        return Base.Iterators.map(CartesianIndices(dims_axes)) do i
-            return view(xperm, other_dims..., i)
-        end
-    end
-end
-_eachslice(x::DimensionalData.AbstractDimArray; dims) = eachslice(x; dims)
-
-_alldims(x) = ntuple(identity, ndims(x))
-
-_otherdims(x, dims) = filter(∉(dims), _alldims(x))
-
-_param_dims(x::AbstractArray) = ntuple(i -> i + 2, max(0, ndims(x) - 2))
-
-_param_axes(x::AbstractArray) = map(Base.Fix1(axes, x), _param_dims(x))
-
-function _params_array(x::AbstractArray, param_dim::Int=3)
-    param_dim > 0 || throw(ArgumentError("param_dim must be positive"))
-    sample_sizes = ntuple(Base.Fix1(size, x), param_dim - 1)
-    return reshape(x, sample_sizes..., :)
-end
-
-function _eachparam(x::AbstractArray, param_dim::Int=3)
-    return eachslice(_params_array(x, param_dim); dims=param_dim)
-end
-
-_as_dimarray(x::DimensionalData.AbstractDimArray, ::DimensionalData.AbstractDimArray) = x
-function _as_dimarray(x::Union{Real,Missing}, arr::DimensionalData.AbstractDimArray)
-    return Dimensions.rebuild(arr, fill(x), ())
-end
-
-_maybe_scalar(x) = x
-_maybe_scalar(x::AbstractArray{<:Any,0}) = x[]
-
-function _draw_chains_params_array(x::DimensionalData.AbstractDimArray)
-    sample_dims = Dimensions.dims(x, InferenceObjects.DEFAULT_SAMPLE_DIMS)
-    param_dims = Dimensions.otherdims(x, sample_dims)
-    dims_combined = Dimensions.combinedims(sample_dims, param_dims)
-    Dimensions.dimsmatch(Dimensions.dims(x), dims_combined) && return x
-    return PermutedDimsArray(x, dims_combined)
-end
-
-_logabssubexp(x, y) = LogExpFunctions.logsubexp(reverse(minmax(x, y))...)
-
-# softmax with support for other mappable iterators
-_softmax(x::AbstractArray) = LogExpFunctions.softmax(x)
-function _softmax(x)
-    nrm = LogExpFunctions.logsumexp(x)
-    return map(x) do xi
-        return exp(xi - nrm)
-    end
-end
-
-# compute sum and estimate of standard error of sum
-function _sum_and_se(x; dims=:)
-    s = sum(x; dims)
-    n = dims isa Colon ? length(x) : prod(Base.Fix1(size, x), dims)
-    se = StatsBase.std(x; dims) * sqrt(oftype(one(eltype(s)), n))
-    return s, se
-end
-_sum_and_se(x::Number; kwargs...) = (x, oftype(float(x), NaN))
-
-function _log_mean(logx, log_weights; dims=:)
-    log_expectand = logx .+ log_weights
-    return LogExpFunctions.logsumexp(log_expectand; dims)
-end
-
-function _se_log_mean(
-    logx, log_weights; dims=:, log_mean=_log_mean(logx, log_weights; dims)
-)
-    # variance of mean estimated using self-normalized importance weighting
-    # Art B. Owen. (2013) Monte Carlo theory, methods and examples. eq. 9.9
-    log_expectand = @. 2 * (log_weights + _logabssubexp(logx, log_mean))
-    log_var_mean = LogExpFunctions.logsumexp(log_expectand; dims)
-    # use delta method to asymptotically map variance of mean to variance of logarithm of mean
-    se_log_mean = @. exp(log_var_mean / 2 - log_mean)
-    return se_log_mean
-end
-
-"""
-    sigdigits_matching_se(x, se; sigdigits_max=7, scale=2) -> Int
-
-Get number of significant digits of `x` so that the last digit of `x` is the first digit of
-`se*scale`.
-"""
-function sigdigits_matching_se(x::Real, se::Real; sigdigits_max::Int=7, scale::Real=2)
-    (iszero(x) || !isfinite(x) || !isfinite(se) || !isfinite(scale)) && return 0
-    sigdigits_max ≥ 0 || throw(ArgumentError("`sigdigits_max` must be non-negative"))
-    se ≥ 0 || throw(ArgumentError("`se` must be non-negative"))
-    iszero(se) && return sigdigits_max
-    scale > 0 || throw(ArgumentError("`scale` must be positive"))
-    first_digit_x = floor(Int, log10(abs(x)))
-    last_digit_x = floor(Int, log10(se * scale))
-    sigdigits_x = first_digit_x - last_digit_x + 1
-    return clamp(sigdigits_x, 0, sigdigits_max)
-end
-
-# format a number with the given number of significant digits
-# - chooses scientific or decimal notation by whichever is most appropriate
-# - shows trailing zeros if significant
-# - removes trailing decimal point if no significant digits after decimal point
-function _printf_with_sigdigits(v::Real, sigdigits)
-    s = sprint(Printf.format, Printf.Format("%#.$(sigdigits)g"), v)
-    return replace(s, r"\.$" => "")
-end
-
-#
-# PrettyTables formatter utility functions
-#
-
-"""
-    ft_printf_sigdigits(sigdigits[, columns])
-
-Use Printf to format the elements in the `columns` to the number of `sigdigits`.
-
-If `sigdigits` is a `Real`, and `columns` is not specified (or is empty), then the
-formatting will be applied to the entire table.
-Otherwise, if `sigdigits` is a `Real` and `columns` is a vector, then the elements in the
-columns will be formatted to the number of `sigdigits`.
-"""
-function ft_printf_sigdigits(sigdigits::Int, columns::AbstractVector{Int}=Int[])
-    if isempty(columns)
-        return (v, _, _) -> begin
-            v isa Real || return v
-            return _printf_with_sigdigits(v, sigdigits)
-        end
-    else
-        return (v, _, j) -> begin
-            v isa Real || return v
-            for col in columns
-                col == j && return _printf_with_sigdigits(v, sigdigits)
-            end
-            return v
-        end
-    end
-end
-
-"""
-    ft_printf_sigdigits_matching_se(se_vals[, columns]; kwargs...)
-
-Use Printf to format the elements in the `columns` to sigdigits based on the standard error
-column in `se_vals`.
-
-All values are formatted with Printf to the number of significant digits determined by
-[`sigdigits_matching_se`](@ref). `kwargs` are forwarded to that function.
-
-`se_vals` must be the same length as any of the columns in the table.
-If `columns` is a non-empty vector, then the formatting is only applied to those columns.
-Otherwise, the formatting is applied to the entire table.
-"""
-function ft_printf_sigdigits_matching_se(
-    se_vals::AbstractVector, columns::AbstractVector{Int}=Int[]; kwargs...
-)
-    if isempty(columns)
-        return (v, i, _) -> begin
-            v isa Real || return v
-            sigdigits = sigdigits_matching_se(v, se_vals[i]; kwargs...)
-            return _printf_with_sigdigits(v, sigdigits)
-        end
-    else
-        return (v, i, j) -> begin
-            v isa Real || return v
-            for col in columns
-                if col == j
-                    sigdigits = sigdigits_matching_se(v, se_vals[i]; kwargs...)
-                    return _printf_with_sigdigits(v, sigdigits)
-                end
-            end
-            return v
-        end
-    end
-end
-
-function _prettytables_rhat_formatter(data)
-    cols = findall(x -> x === :rhat, keys(data))
-    isempty(cols) && return nothing
-    return PrettyTables.ft_printf("%.2f", cols)
-end
-
-function _prettytables_integer_formatter(data)
-    cols = findall(v -> eltype(v) <: Integer, values(data))
-    isempty(cols) && return nothing
-    return PrettyTables.ft_printf("%d", cols)
-end
-
-# formatting functions for special columns
-# see https://ronisbr.github.io/PrettyTables.jl/stable/man/formatters/
-function _default_prettytables_formatters(data; sigdigits_se=2, sigdigits_default=3)
-    formatters = []
-    for (i, k) in enumerate(keys(data))
-        for mcse_key in (Symbol("mcse_$k"), Symbol("$(k)_mcse"))
-            if haskey(data, mcse_key)
-                push!(formatters, ft_printf_sigdigits_matching_se(data[mcse_key], [i]))
-                continue
-            end
-        end
-    end
-    mcse_cols = findall(keys(data)) do k
-        s = string(k)
-        return startswith(s, "mcse_") || endswith(s, "_mcse")
-    end
-    isempty(mcse_cols) || push!(formatters, ft_printf_sigdigits(sigdigits_se, mcse_cols))
-    ess_cols = findall(_is_ess_label, keys(data))
-    isempty(ess_cols) || push!(formatters, PrettyTables.ft_printf("%d", ess_cols))
-    ft_integer = _prettytables_integer_formatter(data)
-    ft_integer === nothing || push!(formatters, ft_integer)
-    push!(formatters, ft_printf_sigdigits(sigdigits_default))
-    return formatters
-end
-
-function _show_prettytable(
-    io::IO, data; sigdigits_se=2, sigdigits_default=3, extra_formatters=(), kwargs...
-)
-    formatters = (
-        extra_formatters...,
-        _default_prettytables_formatters(data; sigdigits_se, sigdigits_default)...,
-    )
-    alignment = fill(:r, length(data))
-    for (i, v) in enumerate(values(data))
-        if !(eltype(v) <: Real)
-            alignment[i] = :l
-        end
-    end
-    kwargs_new = merge(
-        (
-            show_subheader=false,
-            vcrop_mode=:middle,
-            show_omitted_cell_summary=true,
-            row_label_alignment=:l,
-            formatters,
-            alignment,
-        ),
-        kwargs,
-    )
-    PrettyTables.pretty_table(io, data; kwargs_new...)
-    return nothing
-end
-
-function _show_prettytable(
-    io::IO,
-    ::MIME"text/plain",
-    data;
-    title_crayon=PrettyTables.Crayon(),
-    hlines=:none,
-    vlines=:none,
-    newline_at_end=false,
-    kwargs...,
-)
-    alignment_anchor_regex = Dict(
-        i => [r"\.", r"e", r"^NaN$", r"Inf$"] for (i, (k, v)) in enumerate(pairs(data)) if
-        (eltype(v) <: Real && !(eltype(v) <: Integer) && !_is_ess_label(k))
-    )
-    alignment_anchor_fallback = :r
-    alignment_anchor_fallback_override = Dict(
-        i => :r for (i, k) in enumerate(keys(data)) if _is_ess_label(k)
-    )
-    return _show_prettytable(
-        io,
-        data;
-        backend=Val(:text),
-        title_crayon,
-        hlines,
-        vlines,
-        newline_at_end,
-        alignment_anchor_regex,
-        alignment_anchor_fallback,
-        alignment_anchor_fallback_override,
-        kwargs...,
-    )
-end
-function _show_prettytable(
-    io::IO, ::MIME"text/html", data; minify=true, max_num_of_rows=25, kwargs...
-)
-    return _show_prettytable(
-        io, data; backend=Val(:html), minify, max_num_of_rows, kwargs...
-    )
-end
-
-_is_ess_label(k::Symbol) = ((k === :ess) || startswith(string(k), "ess_"))
diff --git a/src/ArviZStats/waic.jl b/src/ArviZStats/waic.jl
deleted file mode 100644
index e3718bc0..00000000
--- a/src/ArviZStats/waic.jl
+++ /dev/null
@@ -1,113 +0,0 @@
-"""
-$(SIGNATURES)
-
-Results of computing the widely applicable information criterion (WAIC).
-
-See also: [`waic`](@ref), [`AbstractELPDResult`](@ref)
-
-$(FIELDS)
-"""
-struct WAICResult{E,P} <: AbstractELPDResult
-    "Estimates of the expected log pointwise predictive density (ELPD) and effective number of parameters (p)"
-    estimates::E
-    "Pointwise estimates"
-    pointwise::P
-end
-
-function elpd_estimates(r::WAICResult; pointwise::Bool=false)
-    return pointwise ? r.pointwise : r.estimates
-end
-
-function Base.show(io::IO, mime::MIME"text/plain", result::WAICResult; kwargs...)
-    _show_elpd_estimates(io, mime, result; title="WAICResult with estimates", kwargs...)
-    return nothing
-end
-
-"""
-    waic(log_likelihood::AbstractArray) -> WAICResult{<:NamedTuple,<:NamedTuple}
-
-Compute the widely applicable information criterion (WAIC).[^Watanabe2010][^Vehtari2017][^LOOFAQ]
-
-`log_likelihood` must be an array of log-likelihood values with shape
-`(chains, draws[, params...])`.
-
-See also: [`WAICResult`](@ref), [`loo`](@ref)
-
-[^Watanabe2010]: Watanabe, S. Asymptotic Equivalence of Bayes Cross Validation and Widely Applicable Information Criterion in Singular Learning Theory. 11(116):3571−3594, 2010. https://jmlr.csail.mit.edu/papers/v11/watanabe10a.html
-[^Vehtari2017]: Vehtari, A., Gelman, A. & Gabry, J.
-    Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC.
-    Stat Comput 27, 1413–1432 (2017).
-    doi: [10.1007/s11222-016-9696-4](https://doi.org/10.1007/s11222-016-9696-4)
-    arXiv: [1507.04544](https://arxiv.org/abs/1507.04544)
-[^LOOFAQ]: Aki Vehtari. Cross-validation FAQ. https://mc-stan.org/loo/articles/online-only/faq.html
-
-# Examples
-
-Calculate WAIC of a model:
-
-```jldoctest
-julia> using ArviZ, ArviZExampleData
-
-julia> idata = load_example_data("centered_eight");
-
-julia> log_like = PermutedDimsArray(idata.log_likelihood.obs, (:draw, :chain, :school));
-
-julia> waic(log_like)
-WAICResult with estimates
- elpd  elpd_mcse    p  p_mcse
-  -31        1.4  0.9    0.33
-```
-"""
-waic(ll::AbstractArray) = _waic(ll)
-
-"""
-    waic(data::Dataset; [var_name::Symbol]) -> WAICResult{<:NamedTuple,<:Dataset}
-    waic(data::InferenceData; [var_name::Symbol]) -> WAICResult{<:NamedTuple,<:Dataset}
-
-Compute WAIC from log-likelihood values in `data`.
-
-If more than one log-likelihood variable is present, then `var_name` must be provided.
-
-# Examples
-
-Calculate WAIC of a model:
-
-```jldoctest
-julia> using ArviZ, ArviZExampleData
-
-julia> idata = load_example_data("centered_eight");
-
-julia> waic(idata)
-WAICResult with estimates
- elpd  elpd_mcse    p  p_mcse
-  -31        1.4  0.9    0.33
-```
-"""
-function waic(
-    data::Union{InferenceObjects.InferenceData,InferenceObjects.Dataset};
-    var_name::Union{Symbol,Nothing}=nothing,
-)
-    result = waic(log_likelihood(data, var_name))
-    log_like = _draw_chains_params_array(log_likelihood(data, var_name))
-    result = waic(log_like)
-    pointwise = Dimensions.rebuild(
-        ArviZ.convert_to_dataset(result.pointwise; default_dims=());
-        metadata=DimensionalData.NoMetadata(),
-    )
-    return WAICResult(result.estimates, pointwise)
-end
-
-function _waic(log_like, dims=(1, 2))
-    _check_log_likelihood(log_like)
-
-    # compute pointwise estimates
-    lpd_i = _lpd_pointwise(log_like, dims)
-    p_i = _maybe_scalar(dropdims(Statistics.var(log_like; corrected=true, dims); dims))
-    elpd_i = lpd_i - p_i
-    pointwise = (elpd=elpd_i, p=p_i)
-
-    # combine estimates
-    estimates = _elpd_estimates_from_pointwise(pointwise)
-
-    return WAICResult(estimates, pointwise)
-end
diff --git a/src/conversions.jl b/src/conversions.jl
index 1e4e80d2..535cddb3 100644
--- a/src/conversions.jl
+++ b/src/conversions.jl
@@ -1,4 +1,4 @@
-@doc doc"""
+"""
     from_mcmcchains(posterior::MCMCChains.Chains; kwargs...) -> InferenceData
     from_mcmcchains(; kwargs...) -> InferenceData
     from_mcmcchains(
diff --git a/test/ArviZStats/compare.jl b/test/ArviZStats/compare.jl
deleted file mode 100644
index 31aa2734..00000000
--- a/test/ArviZStats/compare.jl
+++ /dev/null
@@ -1,131 +0,0 @@
-using Test
-using ArviZ
-using ArviZExampleData
-using DimensionalData
-using IteratorInterfaceExtensions
-using Tables
-using TableTraits
-
-function _isequal(x::ModelComparisonResult, y::ModelComparisonResult)
-    return Tables.columntable(x) == Tables.columntable(y)
-end
-
-@testset "compare" begin
-    eight_schools_data = (
-        centered=load_example_data("centered_eight"),
-        non_centered=load_example_data("non_centered_eight"),
-    )
-    eight_schools_loo_results = map(loo, eight_schools_data)
-    mc1 = @inferred ModelComparisonResult compare(eight_schools_loo_results)
-
-    @testset "basic checks" begin
-        @test mc1.name == (:non_centered, :centered)
-        @test mc1.rank == (non_centered=1, centered=2)
-        @test _isapprox(
-            mc1.elpd_diff,
-            (
-                non_centered=0.0,
-                centered=(
-                    eight_schools_loo_results.non_centered.estimates.elpd -
-                    eight_schools_loo_results.centered.estimates.elpd
-                ),
-            ),
-        )
-        @test mc1.elpd_diff.non_centered == 0.0
-        @test mc1.elpd_diff.centered > 0
-        @test mc1.weight == NamedTuple{(:non_centered, :centered)}(
-            model_weights(eight_schools_loo_results)
-        )
-        @test mc1.elpd_result ==
-            NamedTuple{(:non_centered, :centered)}(eight_schools_loo_results)
-
-        @test_throws ArgumentError compare(eight_schools_loo_results; model_names=[:foo])
-        @test_throws ErrorException compare(eight_schools_data; elpd_method=x -> nothing)
-    end
-
-    @testset "keywords are forwarded" begin
-        @test _isequal(compare(eight_schools_data), mc1)
-        mc2 = compare(eight_schools_loo_results; weights_method=PseudoBMA())
-        @test !_isequal(mc2, compare(eight_schools_loo_results))
-        @test mc2.weights_method === PseudoBMA()
-        mc3 = compare(eight_schools_loo_results; sort=false)
-        for k in filter(!=(:weights_method), propertynames(mc1))
-            if k === :name
-                @test getproperty(mc3, k) == reverse(getproperty(mc1, k))
-            else
-                @test getproperty(mc3, k) ==
-                    NamedTuple{(:centered, :non_centered)}(getproperty(mc1, k))
-            end
-        end
-        mc3 = compare(eight_schools_loo_results; model_names=[:a, :b])
-        @test mc3.name == [:b, :a]
-        mc4 = compare(eight_schools_data; elpd_method=waic)
-        @test !_isequal(mc4, mc2)
-    end
-
-    @testset "ModelComparisonResult" begin
-        @testset "Tables interface" begin
-            @test Tables.istable(typeof(mc1))
-            @test Tables.columnaccess(typeof(mc1))
-            @test Tables.columns(mc1) == mc1
-            @test Tables.columnnames(mc1) == (
-                :name,
-                :rank,
-                :elpd,
-                :elpd_mcse,
-                :elpd_diff,
-                :elpd_diff_mcse,
-                :weight,
-                :p,
-                :p_mcse,
-            )
-            table = Tables.columntable(mc1)
-            for k in (:name, :rank, :elpd_diff, :elpd_diff_mcse, :weight)
-                @test getproperty(table, k) == collect(getproperty(mc1, k))
-            end
-            for k in (:elpd, :elpd_mcse, :p, :p_mcse)
-                @test getproperty(table, k) ==
-                    collect(map(x -> getproperty(x.estimates, k), mc1.elpd_result))
-            end
-            for (i, k) in enumerate(Tables.columnnames(mc1))
-                @test Tables.getcolumn(mc1, i) == Tables.getcolumn(mc1, k)
-            end
-            @test_throws ArgumentError Tables.getcolumn(mc1, :foo)
-            @test Tables.rowaccess(typeof(mc1))
-            @test map(NamedTuple, Tables.rows(mc1)) ==
-                map(NamedTuple, Tables.rows(Tables.columntable(mc1)))
-        end
-
-        @testset "TableTraits interface" begin
-            @test IteratorInterfaceExtensions.isiterable(mc1)
-            @test TableTraits.isiterabletable(mc1)
-            nt = collect(Iterators.take(IteratorInterfaceExtensions.getiterator(mc1), 1))[1]
-            @test isequal(
-                nt,
-                (; (k => Tables.getcolumn(mc1, k)[1] for k in Tables.columnnames(mc1))...),
-            )
-            nt = collect(Iterators.take(IteratorInterfaceExtensions.getiterator(mc1), 2))[2]
-            @test isequal(
-                nt,
-                (; (k => Tables.getcolumn(mc1, k)[2] for k in Tables.columnnames(mc1))...),
-            )
-        end
-
-        @testset "show" begin
-            mc5 = compare(eight_schools_loo_results; weights_method=PseudoBMA())
-            @test sprint(show, "text/plain", mc1) == """
-                ModelComparisonResult with Stacking weights
-                               rank  elpd  elpd_mcse  elpd_diff  elpd_diff_mcse  weight    p  p_mcse
-                 non_centered     1   -31        1.4       0              0.0       1.0  0.9    0.32
-                 centered         2   -31        1.4       0.06           0.067     0.0  0.9    0.34"""
-
-            @test sprint(show, "text/plain", mc5) == """
-                ModelComparisonResult with PseudoBMA weights
-                               rank  elpd  elpd_mcse  elpd_diff  elpd_diff_mcse  weight    p  p_mcse
-                 non_centered     1   -31        1.4       0              0.0      0.52  0.9    0.32
-                 centered         2   -31        1.4       0.06           0.067    0.48  0.9    0.34"""
-
-            @test startswith(sprint(show, "text/html", mc1), "<table")
-        end
-    end
-end
diff --git a/test/ArviZStats/hdi.jl b/test/ArviZStats/hdi.jl
deleted file mode 100644
index e0fcdfc7..00000000
--- a/test/ArviZStats/hdi.jl
+++ /dev/null
@@ -1,127 +0,0 @@
-using ArviZ
-using DimensionalData
-using OffsetArrays
-using Statistics
-using Test
-
-@testset "hdi/hdi!" begin
-    @testset "AbstractVector" begin
-        @testset for n in (10, 100, 1_000),
-            prob in (1 / n, 0.5, 0.73, 0.96, (n - 1 + 0.1) / n),
-            T in (Float32, Float64, Int64)
-
-            x = T <: Integer ? rand(T(1):T(30), n) : randn(T, n)
-            r = @inferred hdi(x; prob)
-            @test r isa NamedTuple{(:lower, :upper),NTuple{2,T}}
-            l, u = r
-            interval_length = floor(Int, prob * n) + 1
-            if T <: Integer
-                @test sum(x -> l ≤ x ≤ u, x) ≥ interval_length
-            else
-                @test sum(x -> l ≤ x ≤ u, x) == interval_length
-            end
-            xsort = sort(x)
-            lind = 1:(n - interval_length + 1)
-            uind = interval_length:n
-            @assert all(collect(uind) .- collect(lind) .+ 1 .== interval_length)
-            @test minimum(xsort[uind] - xsort[lind]) ≈ u - l
-
-            @test hdi!(copy(x); prob) == r
-        end
-    end
-
-    @testset "edge cases and errors" begin
-        @testset "NaNs returned if contains NaNs" begin
-            x = randn(1000)
-            x[3] = NaN
-            @test isequal(hdi(x), (lower=NaN, upper=NaN))
-        end
-
-        @testset "errors for empty array" begin
-            x = Float64[]
-            @test_throws ArgumentError hdi(x)
-        end
-
-        @testset "errors for 0-dimensional array" begin
-            x = fill(1.0)
-            @test_throws ArgumentError hdi(x)
-        end
-
-        @testset "test errors when prob is not in (0, 1)" begin
-            x = randn(1_000)
-            @testset for prob in (0, 1, -0.1, 1.1, NaN)
-                @test_throws DomainError hdi(x; prob)
-            end
-        end
-    end
-
-    @testset "AbstractArray consistent with AbstractVector" begin
-        @testset for sz in ((100, 2), (100, 2, 3), (100, 2, 3, 4)),
-            prob in (0.72, 0.81),
-            T in (Float32, Float64, Int64)
-
-            x = T <: Integer ? rand(T(1):T(30), sz) : randn(T, sz)
-            r = @inferred hdi(x; prob)
-            if ndims(x) == 2
-                @test r isa NamedTuple{(:lower, :upper),NTuple{2,T}}
-                @test r == hdi(vec(x); prob)
-            else
-                @test r isa NamedTuple{(:lower, :upper),NTuple{2,Array{T,length(sz) - 2}}}
-                r_slices = dropdims(
-                    mapslices(x -> hdi(x; prob), x; dims=(1, 2)); dims=(1, 2)
-                )
-                @test r.lower == first.(r_slices)
-                @test r.upper == last.(r_slices)
-            end
-
-            @test hdi!(copy(x); prob) == r
-        end
-    end
-
-    @testset "OffsetArray" begin
-        @testset for n in (100, 1_000), prob in (0.732, 0.864), T in (Float32, Float64)
-            x = randn(T, (n, 2, 3, 4))
-            xoff = OffsetArray(x, (-1, 2, -3, 4))
-            r = hdi(x; prob)
-            roff = @inferred hdi(xoff; prob)
-            @test roff isa NamedTuple{(:lower, :upper),<:NTuple{2,OffsetMatrix{T}}}
-            @test axes(roff.lower) == (axes(xoff, 3), axes(xoff, 4))
-            @test axes(roff.upper) == (axes(xoff, 3), axes(xoff, 4))
-            @test collect(roff.lower) == r.lower
-            @test collect(roff.upper) == r.upper
-        end
-    end
-
-    @testset "Dataset/InferenceData" begin
-        nt = (x=randn(1000, 3), y=randn(1000, 3, 4), z=randn(1000, 3, 4, 2))
-        posterior = convert_to_dataset(nt)
-        posterior_perm = convert_to_dataset((
-            x=permutedims(posterior.x),
-            y=permutedims(posterior.y, (3, 2, 1)),
-            z=permutedims(posterior.z, (3, 2, 4, 1)),
-        ))
-        idata = InferenceData(; posterior)
-        @testset for prob in (0.76, 0.93)
-            if VERSION ≥ v"1.9"
-                r1 = @inferred hdi(posterior; prob)
-            else
-                r1 = hdi(posterior; prob)
-            end
-            r1_perm = hdi(posterior_perm; prob)
-            for k in (:x, :y, :z)
-                rk = hdi(posterior[k]; prob)
-                @test r1[k][hdi_bound=At(:lower)] == rk.lower
-                @test r1[k][hdi_bound=At(:upper)] == rk.upper
-                # equality check is safe because these are always data values
-                @test r1_perm[k][hdi_bound=At(:lower)] == rk.lower
-                @test r1_perm[k][hdi_bound=At(:upper)] == rk.upper
-            end
-            if VERSION ≥ v"1.9"
-                r2 = @inferred hdi(idata; prob)
-            else
-                r2 = hdi(idata; prob)
-            end
-            @test r1 == r2
-        end
-    end
-end
diff --git a/test/ArviZStats/helpers.jl b/test/ArviZStats/helpers.jl
deleted file mode 100644
index 7d9e41e8..00000000
--- a/test/ArviZStats/helpers.jl
+++ /dev/null
@@ -1,47 +0,0 @@
-using RCall
-
-r_loo_installed() = !isempty(rcopy(R"system.file(package='loo')"))
-
-# R loo with our API
-function loo_r(log_likelihood; reff=nothing)
-    R"require('loo')"
-    if reff === nothing
-        reff = rcopy(R"loo::relative_eff(exp($(log_likelihood)))")
-    end
-    result = R"loo::loo($log_likelihood, r_eff=$reff)"
-    estimates = rcopy(R"$(result)$estimates")
-    estimates = (
-        elpd=estimates[1, 1],
-        elpd_mcse=estimates[1, 2],
-        p=estimates[2, 1],
-        p_mcse=estimates[2, 2],
-    )
-    pointwise = rcopy(R"$(result)$pointwise")
-    pointwise = (
-        elpd=pointwise[:, 1],
-        elpd_mcse=pointwise[:, 2],
-        p=pointwise[:, 3],
-        reff=reff,
-        pareto_shape=pointwise[:, 5],
-    )
-    return (; estimates, pointwise)
-end
-
-# R loo with our API
-function waic_r(log_likelihood)
-    R"require('loo')"
-    result = R"loo::waic($log_likelihood)"
-    estimates = rcopy(R"$(result)$estimates")
-    estimates = (
-        elpd=estimates[1, 1],
-        elpd_mcse=estimates[1, 2],
-        p=estimates[2, 1],
-        p_mcse=estimates[2, 2],
-    )
-    pointwise = rcopy(R"$(result)$pointwise")
-    pointwise = (elpd=pointwise[:, 1], p=pointwise[:, 2])
-    return (; estimates, pointwise)
-end
-
-_isapprox(x::AbstractArray, y::AbstractArray; kwargs...) = isapprox(x, y; kwargs...)
-_isapprox(x, y; kwargs...) = all(map((x, y) -> isapprox(x, y; kwargs...), x, y))
diff --git a/test/ArviZStats/loo.jl b/test/ArviZStats/loo.jl
deleted file mode 100644
index e37ab33d..00000000
--- a/test/ArviZStats/loo.jl
+++ /dev/null
@@ -1,192 +0,0 @@
-using Test
-using ArviZ
-using ArviZ.ArviZStats
-using ArviZExampleData
-using DimensionalData
-using Logging: SimpleLogger, with_logger
-
-include("helpers.jl")
-
-@testset "loo" begin
-    @testset "core functionality" begin
-        @testset for sz in ((1000, 4), (1000, 4, 2), (100, 4, 2, 3)),
-            T in (Float32, Float64),
-            TA in (Array, DimArray)
-
-            atol_perm = cbrt(eps(T))
-
-            log_likelihood = randn(T, sz)
-            if TA === DimArray
-                log_likelihood = DimArray(
-                    log_likelihood, (:draw, :chain, :param1, :param2)[1:length(sz)]
-                )
-            end
-            loo_result =
-                TA === DimArray ? loo(log_likelihood) : @inferred(loo(log_likelihood))
-            @test loo_result isa ArviZStats.PSISLOOResult
-            estimates = elpd_estimates(loo_result)
-            pointwise = elpd_estimates(loo_result; pointwise=true)
-            @testset "return types and values as expected" begin
-                @test estimates isa NamedTuple{(:elpd, :elpd_mcse, :p, :p_mcse),NTuple{4,T}}
-                @test pointwise isa
-                    NamedTuple{(:elpd, :elpd_mcse, :p, :reff, :pareto_shape)}
-                if length(sz) == 2
-                    @test eltype(pointwise) === T
-                else
-                    @test eltype(pointwise) <: TA{T,length(sz) - 2}
-                end
-                @test loo_result.psis_result isa PSIS.PSISResult
-                @test loo_result.psis_result.reff == pointwise.reff
-                @test loo_result.psis_result.pareto_shape == pointwise.pareto_shape
-            end
-            @testset "information criterion" begin
-                @test information_criterion(loo_result, :log) == estimates.elpd
-                @test information_criterion(loo_result, :negative_log) == -estimates.elpd
-                @test information_criterion(loo_result, :deviance) == -2 * estimates.elpd
-                @test information_criterion(loo_result, :log; pointwise=true) ==
-                    pointwise.elpd
-                @test information_criterion(loo_result, :negative_log; pointwise=true) ==
-                    -pointwise.elpd
-                @test information_criterion(loo_result, :deviance; pointwise=true) ==
-                    -2 * pointwise.elpd
-            end
-
-            TA === DimArray && @testset "consistency with InferenceData argument" begin
-                idata1 = InferenceData(; log_likelihood=Dataset((; x=log_likelihood)))
-                loo_result1 = loo(idata1)
-                @test isequal(loo_result1.estimates, loo_result.estimates)
-                @test loo_result1.pointwise isa Dataset
-                if length(sz) == 2
-                    @test issetequal(
-                        keys(loo_result1.pointwise),
-                        (:elpd, :elpd_mcse, :p, :reff, :pareto_shape),
-                    )
-                else
-                    @test loo_result1.pointwise.elpd == loo_result.pointwise.elpd
-                    @test loo_result1.pointwise.elpd_mcse == loo_result.pointwise.elpd_mcse
-                    @test loo_result1.pointwise.p == loo_result.pointwise.p
-                    @test loo_result1.pointwise.reff == loo_result.pointwise.reff
-                    @test loo_result1.pointwise.pareto_shape ==
-                        loo_result.pointwise.pareto_shape
-                end
-
-                ll_perm = permutedims(
-                    log_likelihood, (ntuple(x -> x + 2, length(sz) - 2)..., 2, 1)
-                )
-                idata2 = InferenceData(; log_likelihood=Dataset((; y=ll_perm)))
-                loo_result2 = loo(idata2)
-                @test loo_result2.estimates.elpd ≈ loo_result1.estimates.elpd atol =
-                    atol_perm
-                @test isapprox(
-                    loo_result2.estimates.elpd_mcse,
-                    loo_result1.estimates.elpd_mcse;
-                    nans=true,
-                    atol=atol_perm,
-                )
-                @test loo_result2.estimates.p ≈ loo_result1.estimates.p atol = atol_perm
-                @test isapprox(
-                    loo_result2.estimates.p_mcse,
-                    loo_result1.estimates.p_mcse;
-                    nans=true,
-                    atol=atol_perm,
-                )
-                @test isapprox(
-                    loo_result2.pointwise.elpd_mcse,
-                    loo_result1.pointwise.elpd_mcse;
-                    nans=true,
-                    atol=atol_perm,
-                )
-                @test loo_result2.pointwise.p ≈ loo_result1.pointwise.p atol = atol_perm
-                @test loo_result2.pointwise.reff ≈ loo_result1.pointwise.reff atol =
-                    atol_perm
-                @test loo_result2.pointwise.pareto_shape ≈
-                    loo_result1.pointwise.pareto_shape atol = atol_perm
-            end
-        end
-    end
-    @testset "keywords forwarded" begin
-        log_likelihood = convert_to_dataset((x=randn(1000, 4, 2, 3), y=randn(1000, 4, 3)))
-        @test loo(log_likelihood; var_name=:x).estimates == loo(log_likelihood.x).estimates
-        @test loo(log_likelihood; var_name=:y).estimates == loo(log_likelihood.y).estimates
-        @test loo(log_likelihood; var_name=:x, reff=0.5).pointwise.reff == fill(0.5, 2, 3)
-    end
-    @testset "errors" begin
-        log_likelihood = convert_to_dataset((x=randn(1000, 4, 2, 3), y=randn(1000, 4, 3)))
-        @test_throws ArgumentError loo(log_likelihood)
-        @test_throws ArgumentError loo(log_likelihood; var_name=:z)
-        @test_throws DimensionMismatch loo(log_likelihood; var_name=:x, reff=rand(2))
-    end
-    @testset "warnings" begin
-        io = IOBuffer()
-        log_likelihood = randn(100, 4)
-        @testset for bad_val in (NaN, -Inf, Inf)
-            log_likelihood[1] = bad_val
-            result = with_logger(SimpleLogger(io)) do
-                loo(log_likelihood)
-            end
-            msg = String(take!(io))
-            @test occursin("Warning:", msg)
-        end
-
-        io = IOBuffer()
-        log_likelihood = randn(100, 4)
-        @testset for bad_reff in (NaN, 0, Inf)
-            result = with_logger(SimpleLogger(io)) do
-                loo(log_likelihood; reff=bad_reff)
-            end
-            msg = String(take!(io))
-            @test occursin("Warning:", msg)
-        end
-
-        io = IOBuffer()
-        log_likelihood = randn(5, 1)
-        result = with_logger(SimpleLogger(io)) do
-            loo(log_likelihood)
-        end
-        msg = String(take!(io))
-        @test occursin("Warning:", msg)
-    end
-    @testset "show" begin
-        idata = load_example_data("centered_eight")
-        # regression test
-        @test sprint(show, "text/plain", loo(idata)) == """
-            PSISLOOResult with estimates
-             elpd  elpd_mcse    p  p_mcse
-              -31        1.4  0.9    0.34
-
-            and PSISResult with 500 draws, 4 chains, and 8 parameters
-            Pareto shape (k) diagnostic values:
-                                Count      Min. ESS
-             (-Inf, 0.5]  good  6 (75.0%)  135
-              (0.5, 0.7]  okay  2 (25.0%)  421"""
-    end
-    @testset "agrees with R loo" begin
-        if r_loo_installed()
-            @testset for ds_name in ["centered_eight", "non_centered_eight"]
-                idata = load_example_data(ds_name)
-                log_likelihood = idata.log_likelihood.obs
-                data_dims = otherdims(log_likelihood, (:draw, :chain))
-                log_likelihood = permutedims(log_likelihood, (:draw, :chain, data_dims...))
-                reff_rand = rand(data_dims)
-                @testset for reff in (nothing, reff_rand)
-                    result_r = loo_r(log_likelihood; reff)
-                    result = loo(log_likelihood; reff)
-                    @test result.estimates.elpd ≈ result_r.estimates.elpd
-                    @test result.estimates.elpd_mcse ≈ result_r.estimates.elpd_mcse
-                    @test result.estimates.p ≈ result_r.estimates.p
-                    @test result.estimates.p_mcse ≈ result_r.estimates.p_mcse
-                    @test result.pointwise.elpd ≈ result_r.pointwise.elpd
-                    # increased tolerance for elpd_mcse, since we use a different approach
-                    @test result.pointwise.elpd_mcse ≈ result_r.pointwise.elpd_mcse rtol =
-                        0.01
-                    @test result.pointwise.p ≈ result_r.pointwise.p
-                    @test result.pointwise.reff ≈ result_r.pointwise.reff
-                    @test result.pointwise.pareto_shape ≈ result_r.pointwise.pareto_shape
-                end
-            end
-        else
-            @warn "Skipping consistency tests against R loo::loo, since loo is not installed."
-            @test_broken false
-        end
-    end
-end
diff --git a/test/ArviZStats/loo_pit.jl b/test/ArviZStats/loo_pit.jl
deleted file mode 100644
index f4093413..00000000
--- a/test/ArviZStats/loo_pit.jl
+++ /dev/null
@@ -1,144 +0,0 @@
-using Test
-using ArviZ
-using ArviZ.ArviZStats
-using DimensionalData
-using Distributions
-using StatsBase
-
-@testset "loo_pit" begin
-    @testset "scalar data" begin
-        ndraws = 100
-        nchains = 3
-        y = randn()
-        y_pred = randn(ndraws, nchains)
-        weights = rand(ndraws, nchains)
-        log_weights = log.(weights) .- log(sum(weights))
-        pitvals = @inferred loo_pit(y, y_pred, log_weights)
-        @test pitvals isa typeof(y)
-        @test 0 <= pitvals <= 1
-        @test pitvals ≈ mean(y_pred .≤ y, StatsBase.weights(weights))
-    end
-    @testset "array data" begin
-        ndraws = 100
-        nchains = 3
-        @testset for sz in ((100,), (5, 4)), T in (Float32, Float64)
-            y = randn(T, sz...)
-            y_pred = randn(T, ndraws, nchains, sz...)
-            weights = rand(T, ndraws, nchains, sz...)
-            weights ./= sum(weights; dims=(1, 2))
-            log_weights = log.(weights)
-            pitvals = @inferred loo_pit(y, y_pred, log_weights)
-            @test pitvals isa typeof(y)
-            @test size(pitvals) == sz
-            @test all(p -> 0 ≤ p ≤ 1, pitvals)
-            pitvals_exp = dropdims(
-                sum((y_pred .≤ reshape(y, 1, 1, sz...)) .* weights; dims=(1, 2));
-                dims=(1, 2),
-            )
-            @test pitvals ≈ pitvals_exp
-        end
-    end
-    @testset "discrete data" begin
-        ndraws = 1_000
-        nchains = 3
-        dists = Binomial.(10:10:100, 0.25)
-        d = product_distribution(dists)
-        y = rand(d)
-        y_sample = rand(d, ndraws * nchains)
-        y_pred = reshape(transpose(y_sample), ndraws, nchains, length(y))
-        loglike = mapslices(yi -> logpdf.(dists, yi), y_pred; dims=3)
-        log_weights = psis(loglike).log_weights
-        pit_vals = loo_pit(
-            ArviZStats.smooth_data(y; dims=1),
-            ArviZStats.smooth_data(y_pred; dims=3),
-            log_weights,
-        )
-        ϵ = sqrt(eps())
-        @test loo_pit(y, y_pred, log_weights) == pit_vals
-        @test loo_pit(y, y_pred, log_weights; is_discrete=true) == pit_vals
-        @test loo_pit(y, y_pred, log_weights; is_discrete=false) != pit_vals
-        @test !(loo_pit(y .+ ϵ, y_pred, log_weights) ≈ pit_vals)
-        @test loo_pit(y .+ ϵ, y_pred, log_weights; is_discrete=true) ≈ pit_vals
-        @test !(loo_pit(y, y_pred .+ ϵ, log_weights) ≈ pit_vals)
-        @test loo_pit(y, y_pred .+ ϵ, log_weights; is_discrete=true) ≈ pit_vals
-    end
-    @testset "DimArray data" begin
-        draw_dim = Dim{:draw}(1:100)
-        chain_dim = Dim{:chain}(0:2)
-        sample_dims = (draw_dim, chain_dim)
-        param_dims = (Dim{:param1}(1:2), Dim{:param2}([:a, :b, :c]))
-        all_dims = (sample_dims..., param_dims...)
-        y = DimArray(randn(size(param_dims)...), param_dims)
-        y_pred = DimArray(randn(size(all_dims)...), all_dims)
-        weights = DimArray(rand(size(all_dims)...), all_dims)
-        weights ./= sum(weights; dims=(:draw, :chain))
-        log_weights = log.(weights)
-        pitvals = @inferred loo_pit(y, y_pred, log_weights)
-        @test pitvals isa typeof(y)
-        @test all(p -> 0 ≤ p ≤ 1, pitvals)
-        @test DimensionalData.data(pitvals) ==
-            loo_pit(map(DimensionalData.data, (y, y_pred, log_weights))...)
-    end
-    @testset "from InferenceData" begin
-        draw_dim = Dim{:draw}(1:100)
-        chain_dim = Dim{:chain}(0:2)
-        sample_dims = (draw_dim, chain_dim)
-        param_dims = (Dim{:param1}(1:2), Dim{:param2}([:a, :b, :c]))
-        all_dims = (sample_dims..., param_dims...)
-        y = DimArray(randn(size(param_dims)...), param_dims)
-        z = DimArray(fill(randn()), ())
-        y_pred = DimArray(randn(size(all_dims)...), all_dims)
-        log_like = DimArray(randn(size(all_dims)...), all_dims)
-        log_weights = loo(log_like).psis_result.log_weights
-        pit_vals = loo_pit(y, y_pred, log_weights)
-
-        idata1 = InferenceData(;
-            observed_data=Dataset((; y)),
-            posterior_predictive=Dataset((; y=y_pred)),
-            log_likelihood=Dataset((; y=log_like)),
-        )
-        @test_throws Exception loo_pit(idata1; y_name=:z)
-        @test_throws Exception loo_pit(idata1; y_pred_name=:z)
-        @test_throws Exception loo_pit(idata1; log_likelihood_name=:z)
-        @test loo_pit(idata1) == pit_vals
-        VERSION ≥ v"1.7" && @inferred loo_pit(idata1)
-        @test loo_pit(idata1; y_name=:y) == pit_vals
-        @test loo_pit(idata1; y_name=:y, y_pred_name=:y, log_likelihood_name=:y) == pit_vals
-
-        idata2 = InferenceData(;
-            observed_data=Dataset((; z, y)),
-            posterior_predictive=Dataset((; y_pred)),
-            log_likelihood=Dataset((; log_like)),
-        )
-        @test_throws ArgumentError loo_pit(idata2)
-        @test_throws ArgumentError loo_pit(
-            idata2; y_name=:z, y_pred_name=:y_pred, log_likelihood_name=:log_like
-        )
-        @test_throws ArgumentError loo_pit(idata2; y_name=:y, y_pred_name=:y_pred)
-        @test loo_pit(idata2; y_name=:y, log_likelihood_name=:log_like) == pit_vals
-        @test loo_pit(
-            idata2; y_name=:y, y_pred_name=:y_pred, log_likelihood_name=:log_like
-        ) == pit_vals
-        idata3 = InferenceData(;
-            observed_data=Dataset((; y)),
-            posterior_predictive=Dataset((; y=y_pred)),
-            sample_stats=Dataset((; log_likelihood=log_like)),
-        )
-        @test loo_pit(idata3) == pit_vals
-        VERSION ≥ v"1.7" && @inferred loo_pit(idata3)
-
-        all_dims_perm = (param_dims..., reverse(sample_dims)...)
-        idata4 = InferenceData(;
-            observed_data=Dataset((; y)),
-            posterior_predictive=Dataset((; y=permutedims(y_pred, all_dims_perm))),
-            log_likelihood=Dataset((; y=permutedims(log_like, all_dims_perm))),
-        )
-        @test loo_pit(idata4) ≈ pit_vals
-        VERSION ≥ v"1.7" && @inferred loo_pit(idata4)
-
-        idata5 = InferenceData(;
-            observed_data=Dataset((; y)), posterior_predictive=Dataset((; y=y_pred))
-        )
-        @test_throws ArgumentError loo_pit(idata5)
-    end
-end
diff --git a/test/ArviZStats/model_weights.jl b/test/ArviZStats/model_weights.jl
deleted file mode 100644
index 69a8ffe7..00000000
--- a/test/ArviZStats/model_weights.jl
+++ /dev/null
@@ -1,141 +0,0 @@
-using Test
-using ArviZ
-using DimensionalData
-using FiniteDifferences
-using LinearAlgebra
-using Optim
-using Random
-
-struct DummyOptimizer <: Optim.AbstractOptimizer end
-
-@testset "model_weights" begin
-    function test_model_weights(weights_method)
-        @testset "weights are same collection as arguments" begin
-            elpd_results_tuple = map(loo, (randn(1000, 4, 2, 3), randn(1000, 4, 2, 3)))
-            weights_tuple = @inferred model_weights(weights_method(), elpd_results_tuple)
-            @test weights_tuple isa NTuple{2,Float64}
-            @test sum(weights_tuple) ≈ 1
-
-            elpd_results_nt = NamedTuple{(:x, :y)}(elpd_results_tuple)
-            weights_nt = @inferred model_weights(weights_method(), elpd_results_nt)
-            @test weights_nt isa NamedTuple{(:x, :y),NTuple{2,Float64}}
-            @test _isapprox(values(weights_nt), weights_tuple)
-
-            elpd_results_da = DimArray(collect(elpd_results_tuple), Dim{:model}([:x, :y]))
-            weights_da = @inferred model_weights(weights_method(), elpd_results_da)
-            @test weights_da isa DimArray
-            @test Dimensions.dimsmatch(weights_da, elpd_results_da)
-            @test _isapprox(weights_da, collect(weights_tuple))
-        end
-
-        @testset "weights invariant to order" begin
-            elpd_results = map(
-                waic, (randn(1000, 4, 10), randn(1000, 4, 10), randn(1000, 4, 10))
-            )
-            weights1 = model_weights(weights_method(), elpd_results)
-            weights2 = model_weights(weights_method(), reverse(elpd_results))
-            T = eltype(weights1)
-            @test _isapprox(weights1, reverse(weights2); atol=sqrt(eps(T)))
-        end
-
-        @testset "identical models get the same weights" begin
-            ll = randn(1000, 4, 10)
-            result = waic(ll)
-            elpd_results = fill(result, 3)
-            weights = model_weights(weights_method(), elpd_results)
-            @test sum(weights) ≈ 1
-            @test weights ≈ fill(weights[1], length(weights))
-        end
-
-        @testset "better model gets higher weight" begin
-            elpd_results = (
-                non_centered=loo(load_example_data("non_centered_eight")),
-                centered=loo(load_example_data("centered_eight")),
-            )
-            weights = model_weights(weights_method(), elpd_results)
-            @test sum(weights) ≈ 1
-            @test weights[1] > weights[2]
-        end
-    end
-
-    @testset "PseudoBMA" begin
-        @test !PseudoBMA().regularize
-        @test PseudoBMA(true) === PseudoBMA(; regularize=true)
-
-        test_model_weights(PseudoBMA)
-
-        @testset "regularization is respected" begin
-            elpd_results = map(waic, [randn(1000, 4, 2, 3) for _ in 1:2])
-            weights_reg = model_weights(PseudoBMA(true), elpd_results)
-            weights_nonreg = model_weights(PseudoBMA(false), elpd_results)
-            @test !(weights_reg ≈ weights_nonreg)
-        end
-    end
-    @testset "BootstrappedPseudoBMA" begin
-        test_model_weights() do
-            # use the same seed for every run
-            rng = MersenneTwister(37)
-            BootstrappedPseudoBMA(; rng)
-        end
-
-        @testset "number of samples can be configured" begin
-            elpd_results = map(waic, [randn(1000, 4, 2, 3) for _ in 1:2])
-            rng = MersenneTwister(64)
-            weights1 = model_weights(BootstrappedPseudoBMA(; rng, samples=10), elpd_results)
-            rng = MersenneTwister(64)
-            weights2 = model_weights(
-                BootstrappedPseudoBMA(; rng, samples=100), elpd_results
-            )
-            @test !(weights1 ≈ weights2)
-        end
-    end
-    @testset "Stacking" begin
-        @testset "InplaceStackingOptimObjective" begin
-            E = rand(20, 10)
-            obj = ArviZStats.InplaceStackingOptimObjective(E)
-            @test obj.cache isa NTuple{2,Vector{Float64}}
-            @test length(obj.cache[1]) == size(E, 1)
-            @test length(obj.cache[2]) == size(E, 2)
-
-            x = normalize(randn(size(E, 2)))
-
-            # test derivatives with finite differences
-            grad_exp = FiniteDifferences.grad(
-                central_fdm(5, 1), x -> obj(true, nothing, x), x
-            )[1]
-            grad = similar(x)
-            obj(true, grad, x)
-            @test grad ≈ grad_exp
-
-            grad = similar(x)
-            obj(nothing, grad, x)
-            @test grad ≈ grad_exp
-
-            @test @allocated(obj(true, nothing, x)) ≤ 32
-            @test @allocated(obj(true, grad, x)) == @allocated(obj(true, nothing, x))
-        end
-
-        @testset "constructor errors if invalid optimizer provided" begin
-            @test_throws ArgumentError Stacking(; optimizer=DummyOptimizer())
-        end
-
-        @testset "stacking is default" begin
-            elpd_results = map(waic, [randn(1000, 4, 2, 3) for _ in 1:2])
-            @test model_weights(elpd_results) == model_weights(Stacking(), elpd_results)
-        end
-
-        test_model_weights(Stacking)
-
-        @testset "alternate optimizer options are used" begin
-            elpd_results = map(waic, [randn(1000, 4, 2, 3) for _ in 1:10])
-            weights1 = model_weights(Stacking(), elpd_results)
-            weights2 = model_weights(Stacking(), elpd_results)
-            optimizer = GradientDescent()
-            weights3 = model_weights(Stacking(; optimizer), elpd_results)
-            options = Optim.Options(; iterations=2)
-            weights4 = model_weights(Stacking(; options), elpd_results)
-            @test weights3 != weights1 == weights2 != weights4
-            @test weights3 ≈ weights1
-        end
-    end
-end
diff --git a/test/ArviZStats/r2_score.jl b/test/ArviZStats/r2_score.jl
deleted file mode 100644
index a3c7208f..00000000
--- a/test/ArviZStats/r2_score.jl
+++ /dev/null
@@ -1,49 +0,0 @@
-using ArviZ
-using ArviZExampleData
-using GLM
-using Statistics
-using Test
-
-@testset "r2_score/r2_sample" begin
-    @testset "basic" begin
-        n = 100
-        @testset for T in (Float32, Float64),
-            sz in (300, (100, 3)),
-            σ in T.((2, 1, 0.5, 0.1))
-
-            x = range(T(0), T(1); length=n)
-            slope = T(2)
-            intercept = T(3)
-            y = @. slope * x + intercept + randn(T) * σ
-            x_reshape = length(sz) == 1 ? x' : reshape(x, 1, 1, :)
-            y_pred = slope .* x_reshape .+ intercept .+ randn(T, sz..., n) .* σ
-
-            r2_val = @inferred r2_score(y, y_pred)
-            @test r2_val isa NamedTuple{(:r2, :r2_std),NTuple{2,T}}
-            r2_draws = @inferred ArviZStats.r2_samples(y, y_pred)
-            @test r2_val.r2 ≈ mean(r2_draws)
-            @test r2_val.r2_std ≈ std(r2_draws; corrected=false)
-
-            # check rough consistency with GLM
-            res = lm(@formula(y ~ 1 + x), (; x=Float64.(x), y=Float64.(y)))
-            @test r2_val.r2 ≈ r2(res) rtol = 1
-        end
-    end
-
-    @testset "InferenceData inputs" begin
-        @testset for name in ("regression1d", "regression10d")
-            idata = load_example_data(name)
-            VERSION ≥ v"1.9" && @inferred r2_score(idata)
-            r2_val = r2_score(idata)
-            @test r2_val == r2_score(
-                idata.observed_data.y,
-                PermutedDimsArray(idata.posterior_predictive.y, (:draw, :chain, :y_dim_0)),
-            )
-            @test r2_val == r2_score(idata; y_name=:y)
-            @test r2_val == r2_score(idata; y_pred_name=:y)
-            @test r2_val == r2_score(idata; y_name=:y, y_pred_name=:y)
-            @test_throws Exception r2_score(idata; y_name=:z)
-            @test_throws Exception r2_score(idata; y_pred_name=:z)
-        end
-    end
-end
diff --git a/test/ArviZStats/runtests.jl b/test/ArviZStats/runtests.jl
deleted file mode 100644
index 30a47e86..00000000
--- a/test/ArviZStats/runtests.jl
+++ /dev/null
@@ -1,19 +0,0 @@
-using ArviZ, Random, Statistics, Test
-using ArviZ.ArviZStats
-using ArviZExampleData
-using Random
-
-Random.seed!(97)
-
-@testset "ArviZStats" begin
-    include("helpers.jl")
-    include("utils.jl")
-    include("hdi.jl")
-    include("loo.jl")
-    include("loo_pit.jl")
-    include("waic.jl")
-    include("model_weights.jl")
-    include("compare.jl")
-    include("r2_score.jl")
-    include("summarystats.jl")
-end
diff --git a/test/ArviZStats/summarystats.jl b/test/ArviZStats/summarystats.jl
deleted file mode 100644
index a348b35b..00000000
--- a/test/ArviZStats/summarystats.jl
+++ /dev/null
@@ -1,316 +0,0 @@
-using ArviZ
-using ArviZExampleData
-using DimensionalData
-using IteratorInterfaceExtensions
-using Statistics
-using Tables
-using TableTraits
-using Test
-
-_maybescalar(x) = x
-_maybescalar(x::AbstractArray{<:Any,0}) = x[]
-
-_maybevec(x::AbstractArray) = vec(x)
-_maybevec(x) = x
-
-@testset "summary statistics" begin
-    @testset "summarystats" begin
-        @testset "return_type=Dataset" begin
-            data = (x=randn(100, 2), y=randn(100, 2, 3), z=randn(100, 2, 2, 3))
-            ds = convert_to_dataset(
-                data;
-                dims=(y=[:a], z=[:b, :c]),
-                coords=(a=["q", "r", "s"], b=[0, 1], c=[:m, :n, :o]),
-            )
-            idata1 = InferenceData(; posterior=ds)
-            idata2 = InferenceData(; prior=ds)
-
-            stats = @inferred Dataset summarystats(ds; return_type=Dataset)
-            @test parent(stats) == parent(summarystats(idata1; return_type=Dataset))
-            @test parent(stats) ==
-                parent(summarystats(idata2; group=:prior, return_type=Dataset))
-
-            @test issetequal(keys(stats), keys(ds))
-            @test isempty(DimensionalData.metadata(stats))
-            @test hasdim(stats, :_metric)
-            @test lookup(stats, :_metric) == [
-                "mean",
-                "std",
-                "hdi_3%",
-                "hdi_97%",
-                "mcse_mean",
-                "mcse_std",
-                "ess_tail",
-                "ess_bulk",
-                "rhat",
-            ]
-            @test view(stats; _metric=At("mean")) ==
-                dropdims(mean(ds; dims=(:draw, :chain)); dims=(:draw, :chain))
-            @test view(stats; _metric=At("std")) ==
-                dropdims(std(ds; dims=(:draw, :chain)); dims=(:draw, :chain))
-            hdi_ds = hdi(ds)
-            @test stats[_metric=At("hdi_3%")] == hdi_ds[hdi_bound=1]
-            @test stats[_metric=At("hdi_97%")] == hdi_ds[hdi_bound=2]
-            @test view(stats; _metric=At("mcse_mean")) == mcse(ds; kind=mean)
-            @test view(stats; _metric=At("mcse_std")) == mcse(ds; kind=std)
-            @test view(stats; _metric=At("ess_tail")) == ess(ds; kind=:tail)
-            @test view(stats; _metric=At("ess_bulk")) == ess(ds; kind=:bulk)
-            @test view(stats; _metric=At("rhat")) == rhat(ds)
-
-            stats2 = summarystats(ds; return_type=Dataset, prob_interval=0.8)
-            @test lookup(stats2, :_metric) == [
-                "mean",
-                "std",
-                "hdi_10%",
-                "hdi_90%",
-                "mcse_mean",
-                "mcse_std",
-                "ess_tail",
-                "ess_bulk",
-                "rhat",
-            ]
-            hdi_ds2 = hdi(ds; prob=0.8)
-            @test stats2[_metric=At("hdi_10%")] == hdi_ds2[hdi_bound=1]
-            @test stats2[_metric=At("hdi_90%")] == hdi_ds2[hdi_bound=2]
-
-            stats3 = summarystats(ds; return_type=Dataset, kind=:stats)
-            @test lookup(stats3, :_metric) == ["mean", "std", "hdi_3%", "hdi_97%"]
-            @test stats3 == stats[_metric=At(["mean", "std", "hdi_3%", "hdi_97%"])]
-
-            stats4 = summarystats(ds; return_type=Dataset, kind=:diagnostics)
-            @test lookup(stats4, :_metric) ==
-                ["mcse_mean", "mcse_std", "ess_tail", "ess_bulk", "rhat"]
-            @test stats4 == stats[_metric=At([
-                "mcse_mean", "mcse_std", "ess_tail", "ess_bulk", "rhat"
-            ])]
-
-            stats5 = summarystats(
-                ds;
-                return_type=Dataset,
-                metric_dim=:__metric,
-                kind=:stats,
-                prob_interval=0.95,
-            )
-            @test lookup(stats5, :__metric) == ["mean", "std", "hdi_2.5%", "hdi_97.5%"]
-        end
-
-        @testset "_indices_iterator" begin
-            data = (x=randn(3), y=randn(3, 3), z=randn(3, 2, 3))
-            ds = namedtuple_to_dataset(
-                data;
-                dims=(x=[:s], y=[:s, :a], z=[:s, :b, :c]),
-                coords=(a=["q", "r", "s"], b=[0, 1], c=[:m, :n, :o]),
-                default_dims=(),
-            )
-            var_inds = collect(ArviZStats._indices_iterator(ds, :s))
-            @test length(var_inds) == 1 + size(ds.y, :a) + size(ds.z, :b) * size(ds.z, :c)
-            @test var_inds[1] == (data.x, ())
-            @test first.(var_inds[2:4]) == fill(data.y, size(ds.y, :a))
-            @test last.(var_inds[2:4]) == vec(DimensionalData.DimKeys(otherdims(ds.y, :s)))
-            @test first.(var_inds[5:end]) == fill(data.z, size(ds.z, :b) * size(ds.z, :c))
-            @test last.(var_inds[5:end]) ==
-                vec(DimensionalData.DimKeys(otherdims(ds.z, :s)))
-        end
-
-        @testset "_indices_to_name" begin
-            data = (x=randn(3), y=randn(3, 3), z=randn(3, 2, 3))
-            ds = namedtuple_to_dataset(
-                data;
-                dims=(x=[:s], y=[:s, :a], z=[:s, :b, :c]),
-                coords=(a=["q", "r", "s"], b=[0, 1], c=[:m, :n, :o]),
-                default_dims=(),
-            )
-
-            @test ArviZStats._indices_to_name(ds.x, (), true) == "x"
-            @test ArviZStats._indices_to_name(ds.x, (), false) == "x"
-            y_names = map(DimensionalData.DimKeys(dims(ds.y, :a))) do d
-                return ArviZStats._indices_to_name(ds.y, d, true)
-            end
-            @test y_names == ["y[q]", "y[r]", "y[s]"]
-            y_names_verbose = map(DimensionalData.DimKeys(dims(ds.y, :a))) do d
-                return ArviZStats._indices_to_name(ds.y, d, false)
-            end
-            @test y_names_verbose == ["y[a=At(\"q\")]", "y[a=At(\"r\")]", "y[a=At(\"s\")]"]
-            z_names = vec(
-                map(DimensionalData.DimKeys(dims(ds.z, (:b, :c)))) do d
-                    return ArviZStats._indices_to_name(ds.z, d, true)
-                end,
-            )
-            @test z_names == ["z[0,m]", "z[1,m]", "z[0,n]", "z[1,n]", "z[0,o]", "z[1,o]"]
-            z_names_verbose = vec(
-                map(DimensionalData.DimKeys(dims(ds.z, (:b, :c)))) do d
-                    return ArviZStats._indices_to_name(ds.z, d, false)
-                end,
-            )
-            @test z_names_verbose == [
-                "z[b=At(0),c=At(:m)]",
-                "z[b=At(1),c=At(:m)]",
-                "z[b=At(0),c=At(:n)]",
-                "z[b=At(1),c=At(:n)]",
-                "z[b=At(0),c=At(:o)]",
-                "z[b=At(1),c=At(:o)]",
-            ]
-        end
-
-        @testset "return_type=SummaryStats" begin
-            data = (x=randn(100, 2), y=randn(100, 2, 3), z=randn(100, 2, 2, 3))
-            ds = convert_to_dataset(
-                data;
-                dims=(y=[:a], z=[:b, :c]),
-                coords=(a=["q", "r", "s"], b=[0, 1], c=[:m, :n, :o]),
-            )
-
-            stats = @inferred SummaryStats summarystats(ds)
-            stats_data = parent(stats)
-            @test stats_data.variable == [
-                "x",
-                "y[q]",
-                "y[r]",
-                "y[s]",
-                "z[0,m]",
-                "z[1,m]",
-                "z[0,n]",
-                "z[1,n]",
-                "z[0,o]",
-                "z[1,o]",
-            ]
-            @test issetequal(
-                keys(stats_data),
-                [
-                    :variable,
-                    :mean,
-                    :std,
-                    Symbol("hdi_3%"),
-                    Symbol("hdi_97%"),
-                    :mcse_mean,
-                    :mcse_std,
-                    :ess_tail,
-                    :ess_bulk,
-                    :rhat,
-                ],
-            )
-
-            # check a handful of values
-            @test stats_data.mean == reduce(
-                vcat, map(vec, DimensionalData.layers(mean(ds; dims=(:draw, :chain))))
-            )
-            @test stats_data.std == reduce(
-                vcat, map(vec, DimensionalData.layers(std(ds; dims=(:draw, :chain))))
-            )
-            hdi_ds = hdi(ds)
-            @test stats_data[Symbol("hdi_3%")] ==
-                reduce(vcat, map(_maybevec, DimensionalData.layers(hdi_ds[hdi_bound=1])))
-            @test stats_data[Symbol("hdi_97%")] ==
-                reduce(vcat, map(_maybevec, DimensionalData.layers(hdi_ds[hdi_bound=2])))
-            @test stats_data.mcse_mean ==
-                reduce(vcat, map(_maybevec, DimensionalData.layers(mcse(ds; kind=mean))))
-            @test stats_data.mcse_std ==
-                reduce(vcat, map(_maybevec, DimensionalData.layers(mcse(ds; kind=std))))
-            @test stats_data.ess_bulk ==
-                reduce(vcat, map(_maybevec, DimensionalData.layers(ess(ds))))
-            @test stats_data.rhat ==
-                reduce(vcat, map(_maybevec, DimensionalData.layers(rhat(ds))))
-
-            stats2 = summarystats(ds; prob_interval=0.8, kind=:stats)
-            @test issetequal(
-                keys(parent(stats2)),
-                [:variable, :mean, :std, Symbol("hdi_10%"), Symbol("hdi_90%")],
-            )
-        end
-    end
-
-    @testset "SummaryStats" begin
-        data = (
-            variable=["a", "bb", "ccc", "d", "e"],
-            est=randn(5),
-            mcse_est=randn(5),
-            rhat=rand(5),
-            ess=rand(5),
-        )
-
-        stats = @inferred SummaryStats(data)
-
-        @testset "basic interfaces" begin
-            @test parent(stats) === data
-            @test propertynames(stats) == propertynames(data)
-            for k in propertynames(stats)
-                @test getproperty(stats, k) == getproperty(data, k)
-            end
-            @test keys(stats) == keys(data)
-            for k in keys(stats)
-                @test haskey(stats, k) == haskey(data, k)
-                @test getindex(stats, k) == getindex(data, k)
-            end
-            @test !haskey(stats, :foo)
-            @test length(stats) == length(data)
-            for i in 1:length(data)
-                @test getindex(stats, i) == getindex(data, i)
-            end
-            @test Base.iterate(stats) == Base.iterate(data)
-            @test Base.iterate(stats, 2) == Base.iterate(data, 2)
-        end
-
-        @testset "Tables interface" begin
-            @test Tables.istable(typeof(stats))
-            @test Tables.columnaccess(typeof(stats))
-            @test Tables.columns(stats) === stats
-            @test Tables.columnnames(stats) == keys(stats)
-            table = Tables.columntable(stats)
-            @test table == data
-            for (i, k) in enumerate(Tables.columnnames(stats))
-                @test Tables.getcolumn(stats, i) == Tables.getcolumn(stats, k)
-            end
-            @test_throws ErrorException Tables.getcolumn(stats, :foo)
-            @test Tables.rowaccess(typeof(stats))
-            @test Tables.rows(stats) == Tables.rows(parent(stats))
-            @test Tables.schema(stats) == Tables.schema(parent(stats))
-        end
-
-        @testset "TableTraits interface" begin
-            @test IteratorInterfaceExtensions.isiterable(stats)
-            @test TableTraits.isiterabletable(stats)
-            nt = collect(Iterators.take(IteratorInterfaceExtensions.getiterator(stats), 1))[1]
-            @test isequal(
-                nt,
-                (;
-                    (
-                        k => Tables.getcolumn(stats, k)[1] for
-                        k in Tables.columnnames(stats)
-                    )...
-                ),
-            )
-            nt = collect(Iterators.take(IteratorInterfaceExtensions.getiterator(stats), 2))[2]
-            @test isequal(
-                nt,
-                (;
-                    (
-                        k => Tables.getcolumn(stats, k)[2] for
-                        k in Tables.columnnames(stats)
-                    )...
-                ),
-            )
-        end
-
-        @testset "show" begin
-            data = (
-                variable=["a", "bb", "ccc", "d", "e"],
-                est=[111.11, 1.2345e-6, 5.4321e8, Inf, NaN],
-                mcse_est=[0.0012345, 5.432e-5, 2.1234e5, Inf, NaN],
-                rhat=vcat(1.009, 1.011, 0.99, Inf, NaN),
-                ess=vcat(312.45, 23.32, 1011.98, Inf, NaN),
-                ess_bulk=vcat(9.2345, 876.321, 999.99, Inf, NaN),
-            )
-            stats = SummaryStats(data)
-            @test sprint(show, "text/plain", stats) == """
-                SummaryStats
-                              est  mcse_est  rhat   ess  ess_bulk
-                 a    111.110       0.0012   1.01   312         9
-                 bb     1.e-06      5.4e-05  1.01    23       876
-                 ccc    5.432e+08   2.1e+05  0.99  1012      1000
-                 d       Inf         Inf      Inf   Inf       Inf
-                 e       NaN         NaN      NaN   NaN       NaN"""
-
-            @test startswith(sprint(show, "text/html", stats), "<table")
-        end
-    end
-end
diff --git a/test/ArviZStats/utils.jl b/test/ArviZStats/utils.jl
deleted file mode 100644
index 8e9b8ec6..00000000
--- a/test/ArviZStats/utils.jl
+++ /dev/null
@@ -1,412 +0,0 @@
-using Test
-using ArviZ
-using ArviZ.ArviZStats
-using DimensionalData
-using Random
-using StatsBase
-
-@testset "utils" begin
-    @testset "log_likelihood" begin
-        ndraws = 100
-        nchains = 4
-        nparams = 3
-        x = randn(ndraws, nchains, nparams)
-        log_like = convert_to_dataset((; x))
-        @test ArviZStats.log_likelihood(log_like) == x
-        @test ArviZStats.log_likelihood(log_like, :x) == x
-        @test_throws Exception ArviZStats.log_likelihood(log_like, :y)
-        idata = InferenceData(; log_likelihood=log_like)
-        @test ArviZStats.log_likelihood(idata) == x
-        @test ArviZStats.log_likelihood(idata, :x) == x
-        @test_throws Exception ArviZStats.log_likelihood(idata, :y)
-
-        y = randn(ndraws, nchains)
-        log_like = convert_to_dataset((; x, y))
-        @test_throws Exception ArviZStats.log_likelihood(log_like)
-        @test ArviZStats.log_likelihood(log_like, :x) == x
-        @test ArviZStats.log_likelihood(log_like, :y) == y
-
-        idata = InferenceData(; log_likelihood=log_like)
-        @test_throws Exception ArviZStats.log_likelihood(idata)
-        @test ArviZStats.log_likelihood(idata, :x) == x
-        @test ArviZStats.log_likelihood(idata, :y) == y
-
-        # test old InferenceData versions
-        sample_stats = convert_to_dataset((; lp=randn(ndraws, nchains), log_likelihood=x))
-        idata = InferenceData(; sample_stats)
-        @test ArviZStats.log_likelihood(idata) == x
-
-        sample_stats = convert_to_dataset((; lp=randn(ndraws, nchains), log_like=x))
-        idata = InferenceData(; sample_stats)
-        @test_throws ArgumentError ArviZStats.log_likelihood(idata)
-        @test ArviZStats.log_likelihood(idata, :log_like) == x
-
-        idata = InferenceData()
-        @test_throws ArgumentError ArviZStats.log_likelihood(idata)
-    end
-
-    @testset "observations_and_predictions" begin
-        @testset "unique names" begin
-            @testset for pred_group in (:posterior, :posterior_predictive),
-                y_name in (:y, :z),
-                y_pred_name in (y_name, Symbol("$(y_name)_pred"))
-
-                observed_data = namedtuple_to_dataset(
-                    (; (y_name => randn(10),)...); default_dims=()
-                )
-                pred = namedtuple_to_dataset((; (y_pred_name => randn(100, 4, 10),)...))
-                y = observed_data[y_name]
-                y_pred = pred[y_pred_name]
-                idata = InferenceData(; observed_data, pred_group => pred)
-
-                obs_pred_exp = (y_name => y, y_pred_name => y_pred)
-                @testset for y_name_hint in (y_name, nothing),
-                    y_pred_name_hint in (y_pred_name, nothing)
-
-                    obs_pred = @inferred ArviZStats.observations_and_predictions(
-                        idata, y_name_hint, y_pred_name_hint
-                    )
-                    @test obs_pred == obs_pred_exp
-                end
-
-                @test_throws Exception ArviZStats.observations_and_predictions(
-                    idata, :foo, :bar
-                )
-                @test_throws Exception ArviZStats.observations_and_predictions(
-                    idata, :foo, nothing
-                )
-                @test_throws Exception ArviZStats.observations_and_predictions(
-                    idata, nothing, :bar
-                )
-            end
-        end
-
-        @testset "unique pairs" begin
-            @testset for pred_group in (:posterior, :posterior_predictive),
-                y_name in (:y, :z),
-                y_pred_name in (y_name, Symbol("$(y_name)_pred"))
-
-                observed_data = namedtuple_to_dataset(
-                    (; (y_name => randn(10), :w => randn(3))...); default_dims=()
-                )
-                pred = namedtuple_to_dataset((;
-                    (y_pred_name => randn(100, 4, 10), :q => randn(100, 4, 2))...
-                ))
-                y = observed_data[y_name]
-                y_pred = pred[y_pred_name]
-                idata = InferenceData(; observed_data, pred_group => pred)
-
-                obs_pred_exp = (y_name => y, y_pred_name => y_pred)
-                @testset for y_name_hint in (y_name, nothing),
-                    y_pred_name_hint in (y_pred_name, nothing)
-
-                    y_name_hint === nothing && y_pred_name_hint !== nothing && continue
-                    obs_pred = ArviZStats.observations_and_predictions(
-                        idata, y_name_hint, y_pred_name_hint
-                    )
-                    @test obs_pred == obs_pred_exp
-                end
-
-                @test_throws Exception ArviZStats.observations_and_predictions(
-                    idata, :foo, :bar
-                )
-                @test_throws Exception ArviZStats.observations_and_predictions(
-                    idata, :foo, nothing
-                )
-                @test_throws Exception ArviZStats.observations_and_predictions(
-                    idata, nothing, :bar
-                )
-            end
-        end
-
-        @testset "non-unique names" begin
-            @testset for pred_group in (:posterior, :posterior_predictive),
-                pred_suffix in ("", "_pred")
-
-                y_pred_name = Symbol("y$pred_suffix")
-                z_pred_name = Symbol("z$pred_suffix")
-                observed_data = namedtuple_to_dataset(
-                    (; (:y => randn(10), :z => randn(5))...); default_dims=()
-                )
-                pred = namedtuple_to_dataset((;
-                    (z_pred_name => randn(100, 4, 5), y_pred_name => randn(100, 4, 10))...
-                ))
-                y = observed_data.y
-                z = observed_data.z
-                y_pred = pred[y_pred_name]
-                z_pred = pred[z_pred_name]
-                idata = InferenceData(; observed_data, pred_group => pred)
-
-                @testset for (name, pred_name) in ((:y, y_pred_name), (:z, z_pred_name)),
-                    pred_name_hint in (pred_name, nothing)
-
-                    @test ArviZStats.observations_and_predictions(
-                        idata, name, pred_name_hint
-                    ) == (name => observed_data[name], pred_name => pred[pred_name])
-                end
-
-                @test_throws ArgumentError ArviZStats.observations_and_predictions(idata)
-                @test_throws ArgumentError ArviZStats.observations_and_predictions(
-                    idata, nothing, nothing
-                )
-                @test_throws ErrorException ArviZStats.observations_and_predictions(
-                    idata, :foo, :bar
-                )
-                @test_throws ErrorException ArviZStats.observations_and_predictions(
-                    idata, :foo, nothing
-                )
-            end
-        end
-
-        @testset "missing groups" begin
-            observed_data = namedtuple_to_dataset((; y=randn(10)); default_dims=())
-            idata = InferenceData(; observed_data)
-            @test_throws ArgumentError ArviZStats.observations_and_predictions(idata)
-            @test_throws ArgumentError ArviZStats.observations_and_predictions(
-                idata, :y, :y_pred
-            )
-            @test_throws ArgumentError ArviZStats.observations_and_predictions(
-                idata, :y, nothing
-            )
-            @test_throws ArgumentError ArviZStats.observations_and_predictions(
-                idata, nothing, :y_pred
-            )
-
-            posterior_predictive = namedtuple_to_dataset((; y_pred=randn(100, 4, 10)))
-            idata = InferenceData(; posterior_predictive)
-            @test_throws ArgumentError ArviZStats.observations_and_predictions(idata)
-            @test_throws ArgumentError ArviZStats.observations_and_predictions(
-                idata, :y, :y_pred
-            )
-            @test_throws ArgumentError ArviZStats.observations_and_predictions(
-                idata, :y, nothing
-            )
-            @test_throws ArgumentError ArviZStats.observations_and_predictions(
-                idata, nothing, :y_pred
-            )
-        end
-    end
-
-    @testset "_assimilar" begin
-        @testset for x in ([8, 2, 5], (8, 2, 5), (; a=8, b=2, c=5))
-            @test @inferred(ArviZStats._assimilar((x=1.0, y=2.0, z=3.0), x)) ==
-                (x=8, y=2, z=5)
-            @test @inferred(ArviZStats._assimilar((randn(3)...,), x)) == (8, 2, 5)
-            dim = Dim{:foo}(["a", "b", "c"])
-            y = DimArray(randn(3), dim)
-            @test @inferred(ArviZStats._assimilar(y, x)) == DimArray([8, 2, 5], dim)
-        end
-    end
-
-    @testset "_sortperm/_permute" begin
-        @testset for (x, y) in (
-            [3, 1, 4, 2] => [1, 2, 3, 4],
-            (3, 1, 4, 2) => (1, 2, 3, 4),
-            (x=3, y=1, z=4, w=2) => (y=1, w=2, x=3, z=4),
-        )
-            perm = ArviZStats._sortperm(x)
-            @test perm == [2, 4, 1, 3]
-            @test ArviZStats._permute(x, perm) == y
-        end
-    end
-
-    @testset "_eachslice" begin
-        x = randn(2, 3, 4)
-        slices = ArviZStats._eachslice(x; dims=(3, 1))
-        @test size(slices) == (size(x, 3), size(x, 1))
-        slices = collect(slices)
-        for i in axes(x, 3), j in axes(x, 1)
-            @test slices[i, j] == x[j, :, i]
-        end
-
-        @test ArviZStats._eachslice(x; dims=2) == ArviZStats._eachslice(x; dims=(2,))
-
-        if VERSION ≥ v"1.9-"
-            for dims in ((3, 1), (2, 3), 3)
-                @test ArviZStats._eachslice(x; dims) === eachslice(x; dims)
-            end
-        end
-
-        da = DimArray(x, (Dim{:a}(1:2), Dim{:b}(['x', 'y', 'z']), Dim{:c}(0:3)))
-        for dims in (2, (1, 3), (3, 1), (2, 3), (:c, :a))
-            @test ArviZStats._eachslice(da; dims) === eachslice(da; dims)
-        end
-    end
-
-    @testset "_draw_chains_params_array" begin
-        chaindim = Dim{:chain}(1:4)
-        drawdim = Dim{:draw}(1:2:200)
-        paramdim1 = Dim{:param1}(0:1)
-        paramdim2 = Dim{:param2}([:a, :b, :c])
-        dims = (drawdim, chaindim, paramdim1, paramdim2)
-        x = DimArray(randn(size(dims)), dims)
-        xperm = permutedims(x, (chaindim, drawdim, paramdim1, paramdim2))
-        @test @inferred ArviZStats._draw_chains_params_array(xperm) ≈ x
-        xperm = permutedims(x, (paramdim1, chaindim, drawdim, paramdim2))
-        @test @inferred ArviZStats._draw_chains_params_array(xperm) ≈ x
-        xperm = permutedims(x, (paramdim1, drawdim, paramdim2, chaindim))
-        @test @inferred ArviZStats._draw_chains_params_array(xperm) ≈ x
-    end
-
-    @testset "_logabssubexp" begin
-        x, y = rand(2)
-        @test @inferred(ArviZStats._logabssubexp(log(x), log(y))) ≈ log(abs(x - y))
-        @test ArviZStats._logabssubexp(log(y), log(x)) ≈ log(abs(y - x))
-    end
-
-    @testset "_sum_and_se" begin
-        @testset for n in (100, 1_000), scale in (1, 5)
-            x = randn(n) * scale
-            s, se = @inferred ArviZ.ArviZStats._sum_and_se(x)
-            @test s ≈ sum(x)
-            @test se ≈ StatsBase.sem(x) * n
-
-            x = randn(n, 10) * scale
-            s, se = @inferred ArviZ.ArviZStats._sum_and_se(x; dims=1)
-            @test s ≈ sum(x; dims=1)
-            @test se ≈ mapslices(StatsBase.sem, x; dims=1) * n
-
-            x = randn(10, n) * scale
-            s, se = @inferred ArviZ.ArviZStats._sum_and_se(x; dims=2)
-            @test s ≈ sum(x; dims=2)
-            @test se ≈ mapslices(StatsBase.sem, x; dims=2) * n
-        end
-        @testset "::Number" begin
-            @test isequal(ArviZ.ArviZStats._sum_and_se(2), (2, NaN))
-            @test isequal(ArviZ.ArviZStats._sum_and_se(3.5f0; dims=()), (3.5f0, NaN32))
-        end
-    end
-
-    @testset "_log_mean" begin
-        x = rand(1000)
-        logx = log.(x)
-        w = rand(1000)
-        w ./= sum(w)
-        logw = log.(w)
-        @test ArviZStats._log_mean(logx, logw) ≈ log(mean(x, StatsBase.fweights(w)))
-        x = rand(1000, 4)
-        logx = log.(x)
-        @test ArviZStats._log_mean(logx, logw; dims=1) ≈
-            log.(mean(x, StatsBase.fweights(w); dims=1))
-    end
-
-    @testset "_se_log_mean" begin
-        ndraws = 1_000
-        @testset for n in (1_000, 10_000), scale in (1, 5)
-            x = rand(n) * scale
-            w = rand(n)
-            w = StatsBase.weights(w ./ sum(w))
-            logx = log.(x)
-            logw = log.(w)
-            se = @inferred ArviZStats._se_log_mean(logx, logw)
-            se_exp = std(log(mean(rand(n) * scale, w)) for _ in 1:ndraws)
-            @test se ≈ se_exp rtol = 1e-1
-        end
-    end
-
-    @testset "sigdigits_matching_se" begin
-        @test ArviZStats.sigdigits_matching_se(123.456, 0.01) == 5
-        @test ArviZStats.sigdigits_matching_se(123.456, 1) == 3
-        @test ArviZStats.sigdigits_matching_se(123.456, 0.0001) == 7
-        @test ArviZStats.sigdigits_matching_se(1e5, 0.1) == 7
-        @test ArviZStats.sigdigits_matching_se(1e5, 0.2; scale=5) == 6
-        @test ArviZStats.sigdigits_matching_se(1e4, 0.5) == 5
-        @test ArviZStats.sigdigits_matching_se(1e4, 0.5; scale=1) == 6
-        @test ArviZStats.sigdigits_matching_se(1e5, 0.1; sigdigits_max=2) == 2
-
-        # errors
-        @test_throws ArgumentError ArviZStats.sigdigits_matching_se(123.456, -1)
-        @test_throws ArgumentError ArviZStats.sigdigits_matching_se(
-            123.456, 1; sigdigits_max=-1
-        )
-        @test_throws ArgumentError ArviZStats.sigdigits_matching_se(123.456, 1; scale=-1)
-
-        # edge cases
-        @test ArviZStats.sigdigits_matching_se(0.0, 1) == 0
-        @test ArviZStats.sigdigits_matching_se(NaN, 1) == 0
-        @test ArviZStats.sigdigits_matching_se(Inf, 1) == 0
-        @test ArviZStats.sigdigits_matching_se(100, 1; scale=Inf) == 0
-        @test ArviZStats.sigdigits_matching_se(100, Inf) == 0
-        @test ArviZStats.sigdigits_matching_se(100, 0) == 7
-        @test ArviZStats.sigdigits_matching_se(100, 0; sigdigits_max=2) == 2
-    end
-
-    @testset "_printf_with_sigdigits" begin
-        @test ArviZStats._printf_with_sigdigits(123.456, 1) == "1.e+02"
-        @test ArviZStats._printf_with_sigdigits(-123.456, 1) == "-1.e+02"
-        @test ArviZStats._printf_with_sigdigits(123.456, 2) == "1.2e+02"
-        @test ArviZStats._printf_with_sigdigits(-123.456, 2) == "-1.2e+02"
-        @test ArviZStats._printf_with_sigdigits(123.456, 3) == "123"
-        @test ArviZStats._printf_with_sigdigits(-123.456, 3) == "-123"
-        @test ArviZStats._printf_with_sigdigits(123.456, 4) == "123.5"
-        @test ArviZStats._printf_with_sigdigits(-123.456, 4) == "-123.5"
-        @test ArviZStats._printf_with_sigdigits(123.456, 5) == "123.46"
-        @test ArviZStats._printf_with_sigdigits(-123.456, 5) == "-123.46"
-        @test ArviZStats._printf_with_sigdigits(123.456, 6) == "123.456"
-        @test ArviZStats._printf_with_sigdigits(-123.456, 6) == "-123.456"
-        @test ArviZStats._printf_with_sigdigits(123.456, 7) == "123.4560"
-        @test ArviZStats._printf_with_sigdigits(-123.456, 7) == "-123.4560"
-        @test ArviZStats._printf_with_sigdigits(123.456, 8) == "123.45600"
-        @test ArviZStats._printf_with_sigdigits(0.00000123456, 1) == "1.e-06"
-        @test ArviZStats._printf_with_sigdigits(0.00000123456, 2) == "1.2e-06"
-    end
-
-    @testset "ft_printf_sigdigits" begin
-        @testset "all columns" begin
-            @testset for sigdigits in 1:5
-                ft1 = ArviZStats.ft_printf_sigdigits(sigdigits)
-                for i in 1:10, j in 1:5
-                    v = randn()
-                    @test ft1(v, i, j) == ArviZStats._printf_with_sigdigits(v, sigdigits)
-                    @test ft1("foo", i, j) == "foo"
-                end
-            end
-        end
-        @testset "subset of columns" begin
-            @testset for sigdigits in 1:5
-                ft = ArviZStats.ft_printf_sigdigits(sigdigits, [2, 3])
-                for i in 1:10, j in 1:5
-                    v = randn()
-                    if j ∈ [2, 3]
-                        @test ft(v, i, j) == ArviZStats._printf_with_sigdigits(v, sigdigits)
-                    else
-                        @test ft(v, i, j) === v
-                    end
-                    @test ft("foo", i, j) == "foo"
-                end
-            end
-        end
-    end
-
-    @testset "ft_printf_sigdigits_matching_se" begin
-        @testset "all columns" begin
-            @testset for scale in 1:3
-                se = rand(5)
-                ft = ArviZStats.ft_printf_sigdigits_matching_se(se; scale)
-                for i in eachindex(se), j in 1:5
-                    v = randn()
-                    sigdigits = ArviZStats.sigdigits_matching_se(v, se[i]; scale)
-                    @test ft(v, i, j) == ArviZStats._printf_with_sigdigits(v, sigdigits)
-                    @test ft("foo", i, j) == "foo"
-                end
-            end
-        end
-
-        @testset "subset of columns" begin
-            @testset for scale in 1:3
-                se = rand(5)
-                ft = ArviZStats.ft_printf_sigdigits_matching_se(se, [2, 3]; scale)
-                for i in eachindex(se), j in 1:5
-                    v = randn()
-                    if j ∈ [2, 3]
-                        sigdigits = ArviZStats.sigdigits_matching_se(v, se[i]; scale)
-                        @test ft(v, i, j) == ArviZStats._printf_with_sigdigits(v, sigdigits)
-                        @test ft("foo", i, j) == "foo"
-                    else
-                        @test ft(v, i, j) === v
-                    end
-                end
-            end
-        end
-    end
-end
diff --git a/test/ArviZStats/waic.jl b/test/ArviZStats/waic.jl
deleted file mode 100644
index 3473f49d..00000000
--- a/test/ArviZStats/waic.jl
+++ /dev/null
@@ -1,130 +0,0 @@
-using Test
-using ArviZ
-using ArviZ.ArviZStats
-using ArviZExampleData
-using DimensionalData
-using Logging: SimpleLogger, with_logger
-
-include("helpers.jl")
-
-@testset "waic" begin
-    @testset "core functionality" begin
-        @testset for sz in ((1000, 4), (1000, 4, 2), (100, 4, 2, 3)),
-            T in (Float32, Float64),
-            TA in (Array, DimArray)
-
-            atol_perm = cbrt(eps(T))
-
-            log_likelihood = randn(T, sz)
-            if TA === DimArray
-                log_likelihood = DimArray(
-                    log_likelihood, (:draw, :chain, :param1, :param2)[1:length(sz)]
-                )
-            end
-            waic_result =
-                TA === DimArray ? waic(log_likelihood) : @inferred(waic(log_likelihood))
-            @test waic_result isa ArviZStats.WAICResult
-            estimates = elpd_estimates(waic_result)
-            pointwise = elpd_estimates(waic_result; pointwise=true)
-            @testset "return types and values as expected" begin
-                @test estimates isa NamedTuple{(:elpd, :elpd_mcse, :p, :p_mcse),NTuple{4,T}}
-                @test pointwise isa NamedTuple{(:elpd, :p)}
-                if length(sz) == 2
-                    @test eltype(pointwise) === T
-                else
-                    @test eltype(pointwise) <: TA{T,length(sz) - 2}
-                end
-            end
-            @testset "information criterion" begin
-                @test information_criterion(waic_result, :log) == estimates.elpd
-                @test information_criterion(waic_result, :negative_log) == -estimates.elpd
-                @test information_criterion(waic_result, :deviance) == -2 * estimates.elpd
-                @test information_criterion(waic_result, :log; pointwise=true) ==
-                    pointwise.elpd
-                @test information_criterion(waic_result, :negative_log; pointwise=true) ==
-                    -pointwise.elpd
-                @test information_criterion(waic_result, :deviance; pointwise=true) ==
-                    -2 * pointwise.elpd
-            end
-
-            TA === DimArray && @testset "consistency with InferenceData argument" begin
-                idata1 = InferenceData(; log_likelihood=Dataset((; x=log_likelihood)))
-                waic_result1 = waic(idata1)
-                @test isequal(waic_result1.estimates, waic_result.estimates)
-                @test waic_result1.pointwise isa Dataset
-                if length(sz) == 2
-                    @test issetequal(keys(waic_result1.pointwise), (:elpd, :p))
-                else
-                    @test waic_result1.pointwise.elpd == waic_result.pointwise.elpd
-                    @test waic_result1.pointwise.p == waic_result.pointwise.p
-                end
-
-                ll_perm = permutedims(
-                    log_likelihood, (ntuple(x -> x + 2, length(sz) - 2)..., 2, 1)
-                )
-                idata2 = InferenceData(; log_likelihood=Dataset((; y=ll_perm)))
-                waic_result2 = waic(idata2)
-                @test waic_result2.estimates.elpd ≈ waic_result1.estimates.elpd atol =
-                    atol_perm
-                @test isapprox(
-                    waic_result2.estimates.elpd_mcse,
-                    waic_result1.estimates.elpd_mcse;
-                    nans=true,
-                    atol=atol_perm,
-                )
-                @test waic_result2.estimates.p ≈ waic_result1.estimates.p atol =
-                    atol_perm
-                @test isapprox(
-                    waic_result2.estimates.p_mcse,
-                    waic_result1.estimates.p_mcse;
-                    nans=true,
-                    atol=atol_perm,
-                )
-                @test waic_result2.pointwise.p ≈ waic_result1.pointwise.p atol =
-                    atol_perm
-            end
-        end
-    end
-    @testset "warnings" begin
-        io = IOBuffer()
-        log_likelihood = randn(100, 4)
-        @testset for bad_val in (NaN, -Inf, Inf)
-            log_likelihood[1] = bad_val
-            result = with_logger(SimpleLogger(io)) do
-                waic(log_likelihood)
-            end
-            msg = String(take!(io))
-            @test occursin("Warning:", msg)
-        end
-    end
-    @testset "show" begin
-        idata = load_example_data("centered_eight")
-        # regression test
-        @test sprint(show, "text/plain", waic(idata)) == """
-            WAICResult with estimates
-             elpd  elpd_mcse    p  p_mcse
-              -31        1.4  0.9    0.33"""
-    end
-    @testset "agrees with R waic" begin
-        if r_loo_installed()
-            @testset for ds_name in ["centered_eight", "non_centered_eight"]
-                idata = load_example_data(ds_name)
-                log_likelihood = idata.log_likelihood.obs
-                data_dims = otherdims(log_likelihood, (:draw, :chain))
-                log_likelihood = permutedims(log_likelihood, (:draw, :chain, data_dims...))
-                reff_rand = rand(data_dims)
-                result_r = waic_r(log_likelihood)
-                result = waic(log_likelihood)
-                @test result.estimates.elpd ≈ result_r.estimates.elpd
-                @test result.estimates.elpd_mcse ≈ result_r.estimates.elpd_mcse
-                @test result.estimates.p ≈ result_r.estimates.p
-                @test result.estimates.p_mcse ≈ result_r.estimates.p_mcse
-                @test result.pointwise.elpd ≈ result_r.pointwise.elpd
-                @test result.pointwise.p ≈ result_r.pointwise.p
-            end
-        else
-            @warn "Skipping consistency tests against R loo::waic, since loo is not installed."
-            @test_broken false
-        end
-    end
-end
diff --git a/test/Project.toml b/test/Project.toml
deleted file mode 100644
index eb933a07..00000000
--- a/test/Project.toml
+++ /dev/null
@@ -1,40 +0,0 @@
-[deps]
-ArviZExampleData = "2f96bb34-afd9-46ae-bcd0-9b2d4372fe3c"
-DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
-Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
-FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
-GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
-IteratorInterfaceExtensions = "82899510-4779-5014-852e-03e436cf321d"
-LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
-MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
-OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
-Optim = "429524aa-4258-5aef-a3af-852621145aeb"
-OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
-RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-SampleChains = "754583d1-7fc4-4dab-93b5-5eaca5c9622e"
-SampleChainsDynamicHMC = "6d9fd711-e8b2-4778-9c70-c1dfb499d4c4"
-Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
-StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
-TableTraits = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
-Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[compat]
-ArviZExampleData = "0.1.5"
-DimensionalData = "0.20, 0.21, 0.22, 0.23, 0.24"
-Distributions = "0.25"
-FiniteDifferences = "0.12"
-GLM = "1"
-IteratorInterfaceExtensions = "0.1.1, 1"
-MCMCChains = "6.0"
-OffsetArrays = "1"
-Optim = "1"
-OrderedCollections = "1"
-RCall = "0.13"
-SampleChains = "0.5"
-SampleChainsDynamicHMC = "0.3"
-StatsBase = "0.32, 0.33, 0.34"
-TableTraits = "0.4, 1"
-Tables = "1"
diff --git a/test/runtests.jl b/test/runtests.jl
index 79971f8c..14b03173 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,7 +4,6 @@ using Test
 @testset "ArviZ" begin
     include("helpers.jl")
     include("test_utils.jl")
-    include("ArviZStats/runtests.jl")
     include("test_samplechains.jl")
     include("test_mcmcchains.jl")
 end