From 1ef80a958293dc1e3460fa67535c0cf0058d45a4 Mon Sep 17 00:00:00 2001
From: Phillip Alday <me@phillipalday.com>
Date: Sat, 19 Aug 2023 23:00:28 -0500
Subject: [PATCH 1/3] better error when formula variables missing from dataset

---
 src/linearmixedmodel.jl | 11 ++++++++---
 test/pls.jl             |  7 +++++++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/linearmixedmodel.jl b/src/linearmixedmodel.jl
index 3548ab780..6ee58867c 100644
--- a/src/linearmixedmodel.jl
+++ b/src/linearmixedmodel.jl
@@ -57,6 +57,11 @@ function LinearMixedModel(
     f::FormulaTerm, tbl::Tables.ColumnTable; contrasts=Dict{Symbol,Any}(), wts=[],
     σ=nothing, amalgamate=true,
 )
+    fvars = StatsModels.termvars(f)
+    tvars = Tables.columnnames(tbl)
+    fvars ⊆ tvars  ||
+        throw(ArgumentError("The following formula variables are not present in the table: $(setdiff(fvars, tvars))"))
+
     # TODO: perform missing_omit() after apply_schema() when improved
     # missing support is in a StatsModels release
     tbl, _ = StatsModels.missing_omit(tbl, f)
@@ -354,14 +359,14 @@ end
 
 """
     confint(pr::MixedModelProfile; level::Real=0.95)
-    
+
 Compute profile confidence intervals for (fixed effects) coefficients, with confidence level `level` (by default 95%).
 
 !!! note
-    The API guarantee is for a Tables.jl compatible table. The exact return type is an 
+    The API guarantee is for a Tables.jl compatible table. The exact return type is an
     implementation detail and may change in a future minor release without being considered
     breaking.
-  
+
 """
 function StatsBase.confint(m::MixedModel{T}; level=0.95) where {T}
     cutoff = sqrt.(quantile(Chisq(1), level))
diff --git a/test/pls.jl b/test/pls.jl
index 4902555e3..41e12a6ce 100644
--- a/test/pls.jl
+++ b/test/pls.jl
@@ -271,6 +271,13 @@ end
     lrt = likelihoodratiotest(models(:pastes)...)
     @test length(lrt.deviance) == length(lrt.formulas) == length(lrt.models )== 2
     @test first(lrt.tests.pvalues) ≈ 0.5233767966395597 atol=0.0001
+
+    @testset "missing variables in formula" begin
+        ae = ArgumentError("The following formula variables are not present in the table: [:reaction, :joy, :subj]")
+        @test_throws(ae,
+                     fit(MixedModel, @formula(reaction ~ 1 + joy + (1|subj)), dataset(:pastes)))
+
+    end
 end
 
 @testset "InstEval" begin

From 0dd98dc72453a90cf69ee2c52da8f7c465a71497 Mon Sep 17 00:00:00 2001
From: Phillip Alday <me@phillipalday.com>
Date: Sat, 19 Aug 2023 23:03:01 -0500
Subject: [PATCH 2/3] NEWS and version bump

---
 NEWS.md      | 7 ++++++-
 Project.toml | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 871f45f46..995d069ff 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,10 @@
+MixedModels v4.18.0 Release Notes
+==============================
+* More user-friendly error messages when a formula contains variables not in the data. [#707]
+
 MixedModels v4.17.0 Release Notes
 ==============================
-* New kwarg `amalgamate` can be used to disable amalgation of random effects terms sharing a single grouping variable. Generally, `amalgamate=false` will result in a slower fit, but may improve convergence in some pathological cases. Note that this feature is experimental and changes to it are **not** considered breakings. [#673]
+* **EXPERIMENTAL** New kwarg `amalgamate` can be used to disable amalgation of random effects terms sharing a single grouping variable. Generally, `amalgamate=false` will result in a slower fit, but may improve convergence in some pathological cases. Note that this feature is experimental and changes to it are **not** considered breakings. [#673]
 * More informative error messages when passing a `Distribution` or `Link` type instead of the desired instance. [#698]
 * More informative error message on the intentional decision not to define methods for the coefficient of determination. [#698]
 * **EXPERIMENTAL** Return `finitial` when PIRLS drifts into a portion of the parameter space that yields a (numerically) invalid covariance matrix. This recovery strategy may be removed in a future release. [#616]
@@ -456,3 +460,4 @@ Package dependencies
 [#694]: https://github.com/JuliaStats/MixedModels.jl/issues/694
 [#698]: https://github.com/JuliaStats/MixedModels.jl/issues/698
 [#703]: https://github.com/JuliaStats/MixedModels.jl/issues/703
+[#707]: https://github.com/JuliaStats/MixedModels.jl/issues/707
diff --git a/Project.toml b/Project.toml
index 0aceb5fa4..e55fbdcec 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "MixedModels"
 uuid = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316"
 author = ["Phillip Alday <me@phillipalday.com>", "Douglas Bates <dmbates@gmail.com>", "Jose Bayoan Santiago Calderon <jbs3hp@virginia.edu>"]
-version = "4.17.0"
+version = "4.18.0"
 
 [deps]
 Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"

From 03772bcf8f88ba3825096a684d5001350ff0081f Mon Sep 17 00:00:00 2001
From: Phillip Alday <palday@users.noreply.github.com>
Date: Sun, 20 Aug 2023 04:06:11 +0000
Subject: [PATCH 3/3] JuliaFormatter

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 src/linearmixedmodel.jl | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/linearmixedmodel.jl b/src/linearmixedmodel.jl
index 6ee58867c..aeed811b2 100644
--- a/src/linearmixedmodel.jl
+++ b/src/linearmixedmodel.jl
@@ -59,8 +59,12 @@ function LinearMixedModel(
 )
     fvars = StatsModels.termvars(f)
     tvars = Tables.columnnames(tbl)
-    fvars ⊆ tvars  ||
-        throw(ArgumentError("The following formula variables are not present in the table: $(setdiff(fvars, tvars))"))
+    fvars ⊆ tvars ||
+        throw(
+            ArgumentError(
+                "The following formula variables are not present in the table: $(setdiff(fvars, tvars))",
+            ),
+        )
 
     # TODO: perform missing_omit() after apply_schema() when improved
     # missing support is in a StatsModels release