From f6158ea53b90a60899ee8171c04dc1f978fb8723 Mon Sep 17 00:00:00 2001
From: Jade Lovelace <software@lfcode.ca>
Date: Mon, 26 Feb 2024 00:30:51 -0800
Subject: [PATCH 01/10] finally.hh: include works by itself; mark as nodiscard

---
 src/libutil/finally.hh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/libutil/finally.hh b/src/libutil/finally.hh
index db654301fdd..4cae20a3689 100644
--- a/src/libutil/finally.hh
+++ b/src/libutil/finally.hh
@@ -1,11 +1,13 @@
 #pragma once
 ///@file
 
+#include <utility>
+
 /**
  * A trivial class to run a function at the end of a scope.
  */
 template<typename Fn>
-class Finally
+class [[nodiscard("Finally values must be used")]] Finally
 {
 private:
     Fn fun;

From 76aced691552e193e0225af40f8acf484cfeaefe Mon Sep 17 00:00:00 2001
From: Jade Lovelace <software@lfcode.ca>
Date: Mon, 26 Feb 2024 01:21:54 -0800
Subject: [PATCH 02/10] finally.hh: delete copy constructor which is a bad idea

---
 src/libutil/finally.hh | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/libutil/finally.hh b/src/libutil/finally.hh
index 4cae20a3689..f9f0195a155 100644
--- a/src/libutil/finally.hh
+++ b/src/libutil/finally.hh
@@ -11,8 +11,15 @@ class [[nodiscard("Finally values must be used")]] Finally
 {
 private:
     Fn fun;
+    bool movedFrom = false;
 
 public:
     Finally(Fn fun) : fun(std::move(fun)) { }
-    ~Finally() { fun(); }
+    // Copying Finallys is definitely not a good idea and will cause them to be
+    // called twice.
+    Finally(Finally &other) = delete;
+    Finally(Finally &&other) : fun(std::move(other.fun)) {
+        other.movedFrom = true;
+    }
+    ~Finally() { if (!movedFrom) fun(); }
 };

From 70a6ce139bd39f915a6c2c499d741e2c27557dc0 Mon Sep 17 00:00:00 2001
From: Jade Lovelace <software@lfcode.ca>
Date: Mon, 26 Feb 2024 00:34:11 -0800
Subject: [PATCH 03/10] refactor: move readline stuff into its own file

This is in direct preparation for an automation mode of nix repl.
---
 src/libcmd/repl-interacter.cc | 175 ++++++++++++++++++++++++++++++++++
 src/libcmd/repl-interacter.hh |  48 ++++++++++
 src/libcmd/repl.cc            | 175 ++--------------------------------
 src/libcmd/repl.hh            |   5 -
 4 files changed, 232 insertions(+), 171 deletions(-)
 create mode 100644 src/libcmd/repl-interacter.cc
 create mode 100644 src/libcmd/repl-interacter.hh

diff --git a/src/libcmd/repl-interacter.cc b/src/libcmd/repl-interacter.cc
new file mode 100644
index 00000000000..9aa1f7bb93d
--- /dev/null
+++ b/src/libcmd/repl-interacter.cc
@@ -0,0 +1,175 @@
+#include "file-system.hh"
+#include "libcmd/repl.hh"
+#include <cstdio>
+
+#ifdef USE_READLINE
+#include <readline/history.h>
+#include <readline/readline.h>
+#else
+// editline < 1.15.2 don't wrap their API for C++ usage
+// (added in https://github.com/troglobit/editline/commit/91398ceb3427b730995357e9d120539fb9bb7461).
+// This results in linker errors due to to name-mangling of editline C symbols.
+// For compatibility with these versions, we wrap the API here
+// (wrapping multiple times on newer versions is no problem).
+extern "C" {
+#include <editline.h>
+}
+#endif
+
+#include "signals.hh"
+#include "finally.hh"
+#include "repl-interacter.hh"
+
+namespace nix {
+
+namespace {
+// Used to communicate to NixRepl::getLine whether a signal occurred in ::readline.
+volatile sig_atomic_t g_signal_received = 0;
+
+void sigintHandler(int signo)
+{
+    g_signal_received = signo;
+}
+};
+
+static detail::ReplCompleterMixin * curRepl; // ugly
+
+static char * completionCallback(char * s, int * match)
+{
+    auto possible = curRepl->completePrefix(s);
+    if (possible.size() == 1) {
+        *match = 1;
+        auto * res = strdup(possible.begin()->c_str() + strlen(s));
+        if (!res)
+            throw Error("allocation failure");
+        return res;
+    } else if (possible.size() > 1) {
+        auto checkAllHaveSameAt = [&](size_t pos) {
+            auto & first = *possible.begin();
+            for (auto & p : possible) {
+                if (p.size() <= pos || p[pos] != first[pos])
+                    return false;
+            }
+            return true;
+        };
+        size_t start = strlen(s);
+        size_t len = 0;
+        while (checkAllHaveSameAt(start + len))
+            ++len;
+        if (len > 0) {
+            *match = 1;
+            auto * res = strdup(std::string(*possible.begin(), start, len).c_str());
+            if (!res)
+                throw Error("allocation failure");
+            return res;
+        }
+    }
+
+    *match = 0;
+    return nullptr;
+}
+
+static int listPossibleCallback(char * s, char *** avp)
+{
+    auto possible = curRepl->completePrefix(s);
+
+    if (possible.size() > (INT_MAX / sizeof(char *)))
+        throw Error("too many completions");
+
+    int ac = 0;
+    char ** vp = nullptr;
+
+    auto check = [&](auto * p) {
+        if (!p) {
+            if (vp) {
+                while (--ac >= 0)
+                    free(vp[ac]);
+                free(vp);
+            }
+            throw Error("allocation failure");
+        }
+        return p;
+    };
+
+    vp = check((char **) malloc(possible.size() * sizeof(char *)));
+
+    for (auto & p : possible)
+        vp[ac++] = check(strdup(p.c_str()));
+
+    *avp = vp;
+
+    return ac;
+}
+
+ReadlineLikeInteracter::Guard ReadlineLikeInteracter::init(detail::ReplCompleterMixin * repl)
+{
+    // Allow nix-repl specific settings in .inputrc
+    rl_readline_name = "nix-repl";
+    try {
+        createDirs(dirOf(historyFile));
+    } catch (SystemError & e) {
+        logWarning(e.info());
+    }
+#ifndef USE_READLINE
+    el_hist_size = 1000;
+#endif
+    read_history(historyFile.c_str());
+    auto oldRepl = curRepl;
+    curRepl = repl;
+    Guard restoreRepl([oldRepl] { curRepl = oldRepl; });
+#ifndef USE_READLINE
+    rl_set_complete_func(completionCallback);
+    rl_set_list_possib_func(listPossibleCallback);
+#endif
+    return restoreRepl;
+}
+
+bool ReadlineLikeInteracter::getLine(std::string & input, const std::string & prompt)
+{
+    struct sigaction act, old;
+    sigset_t savedSignalMask, set;
+
+    auto setupSignals = [&]() {
+        act.sa_handler = sigintHandler;
+        sigfillset(&act.sa_mask);
+        act.sa_flags = 0;
+        if (sigaction(SIGINT, &act, &old))
+            throw SysError("installing handler for SIGINT");
+
+        sigemptyset(&set);
+        sigaddset(&set, SIGINT);
+        if (sigprocmask(SIG_UNBLOCK, &set, &savedSignalMask))
+            throw SysError("unblocking SIGINT");
+    };
+    auto restoreSignals = [&]() {
+        if (sigprocmask(SIG_SETMASK, &savedSignalMask, nullptr))
+            throw SysError("restoring signals");
+
+        if (sigaction(SIGINT, &old, 0))
+            throw SysError("restoring handler for SIGINT");
+    };
+
+    setupSignals();
+    char * s = readline(prompt.c_str());
+    Finally doFree([&]() { free(s); });
+    restoreSignals();
+
+    if (g_signal_received) {
+        g_signal_received = 0;
+        input.clear();
+        return true;
+    }
+
+    if (!s)
+        return false;
+    input += s;
+    input += '\n';
+    return true;
+}
+
+ReadlineLikeInteracter::~ReadlineLikeInteracter()
+{
+    write_history(historyFile.c_str());
+}
+
+};
diff --git a/src/libcmd/repl-interacter.hh b/src/libcmd/repl-interacter.hh
new file mode 100644
index 00000000000..e549bab36ab
--- /dev/null
+++ b/src/libcmd/repl-interacter.hh
@@ -0,0 +1,48 @@
+#pragma once
+/// @file
+
+#include "finally.hh"
+#include "types.hh"
+#include <functional>
+#include <string>
+
+namespace nix {
+
+namespace detail {
+/** Provides the completion hooks for the repl, without exposing its complete
+ * internals. */
+struct ReplCompleterMixin {
+    virtual StringSet completePrefix(const std::string & prefix) = 0;
+};
+};
+
+enum class ReplPromptType {
+    ReplPrompt,
+    ContinuationPrompt,
+};
+
+class ReplInteracter
+{
+public:
+    using Guard = Finally<std::function<void()>>;
+
+    virtual Guard init(detail::ReplCompleterMixin * repl) = 0;
+    /** Returns a boolean of whether the interacter got EOF */
+    virtual bool getLine(std::string & input, const std::string & prompt) = 0;
+    virtual ~ReplInteracter(){};
+};
+
+class ReadlineLikeInteracter : public virtual ReplInteracter
+{
+    std::string historyFile;
+public:
+    ReadlineLikeInteracter(std::string historyFile)
+        : historyFile(historyFile)
+    {
+    }
+    virtual Guard init(detail::ReplCompleterMixin * repl) override;
+    virtual bool getLine(std::string & input, const std::string & prompt) override;
+    virtual ~ReadlineLikeInteracter() override;
+};
+
+};
diff --git a/src/libcmd/repl.cc b/src/libcmd/repl.cc
index 8b83608fa89..8af3c5ff393 100644
--- a/src/libcmd/repl.cc
+++ b/src/libcmd/repl.cc
@@ -3,32 +3,17 @@
 #include <cstring>
 #include <climits>
 
-#include <setjmp.h>
-
-#ifdef USE_READLINE
-#include <readline/history.h>
-#include <readline/readline.h>
-#else
-// editline < 1.15.2 don't wrap their API for C++ usage
-// (added in https://github.com/troglobit/editline/commit/91398ceb3427b730995357e9d120539fb9bb7461).
-// This results in linker errors due to to name-mangling of editline C symbols.
-// For compatibility with these versions, we wrap the API here
-// (wrapping multiple times on newer versions is no problem).
-extern "C" {
-#include <editline.h>
-}
-#endif
-
+#include "libcmd/repl-interacter.hh"
 #include "repl.hh"
 
 #include "ansicolor.hh"
-#include "signals.hh"
 #include "shared.hh"
 #include "eval.hh"
 #include "eval-cache.hh"
 #include "eval-inline.hh"
 #include "eval-settings.hh"
 #include "attr-path.hh"
+#include "signals.hh"
 #include "store-api.hh"
 #include "log-store.hh"
 #include "common-eval-args.hh"
@@ -38,7 +23,6 @@ extern "C" {
 #include "flake/flake.hh"
 #include "flake/lockfile.hh"
 #include "users.hh"
-#include "terminal.hh"
 #include "editor-for.hh"
 #include "finally.hh"
 #include "markdown.hh"
@@ -75,6 +59,7 @@ enum class ProcessLineResult {
 
 struct NixRepl
     : AbstractNixRepl
+    , detail::ReplCompleterMixin
     #if HAVE_BOEHMGC
     , gc
     #endif
@@ -90,17 +75,16 @@ struct NixRepl
     int displ;
     StringSet varNames;
 
-    const Path historyFile;
+    std::unique_ptr<ReplInteracter> interacter;
 
     NixRepl(const SearchPath & searchPath, nix::ref<Store> store,ref<EvalState> state,
             std::function<AnnotatedValues()> getValues);
-    virtual ~NixRepl();
+    virtual ~NixRepl() = default;
 
     ReplExitStatus mainLoop() override;
     void initEnv() override;
 
-    StringSet completePrefix(const std::string & prefix);
-    bool getLine(std::string & input, const std::string & prompt);
+    virtual StringSet completePrefix(const std::string & prefix) override;
     StorePath getDerivationPath(Value & v);
     ProcessLineResult processLine(std::string line);
 
@@ -143,14 +127,8 @@ NixRepl::NixRepl(const SearchPath & searchPath, nix::ref<Store> store, ref<EvalS
     , debugTraceIndex(0)
     , getValues(getValues)
     , staticEnv(new StaticEnv(nullptr, state->staticBaseEnv.get()))
-    , historyFile(getDataDir() + "/nix/repl-history")
-{
-}
-
-
-NixRepl::~NixRepl()
+    , interacter(make_unique<ReadlineLikeInteracter>(getDataDir() + "/nix/repl-history"))
 {
-    write_history(historyFile.c_str());
 }
 
 void runNix(Path program, const Strings & args,
@@ -169,79 +147,6 @@ void runNix(Path program, const Strings & args,
     return;
 }
 
-static NixRepl * curRepl; // ugly
-
-static char * completionCallback(char * s, int *match) {
-  auto possible = curRepl->completePrefix(s);
-  if (possible.size() == 1) {
-    *match = 1;
-    auto *res = strdup(possible.begin()->c_str() + strlen(s));
-    if (!res) throw Error("allocation failure");
-    return res;
-  } else if (possible.size() > 1) {
-    auto checkAllHaveSameAt = [&](size_t pos) {
-      auto &first = *possible.begin();
-      for (auto &p : possible) {
-        if (p.size() <= pos || p[pos] != first[pos])
-          return false;
-      }
-      return true;
-    };
-    size_t start = strlen(s);
-    size_t len = 0;
-    while (checkAllHaveSameAt(start + len)) ++len;
-    if (len > 0) {
-      *match = 1;
-      auto *res = strdup(std::string(*possible.begin(), start, len).c_str());
-      if (!res) throw Error("allocation failure");
-      return res;
-    }
-  }
-
-  *match = 0;
-  return nullptr;
-}
-
-static int listPossibleCallback(char *s, char ***avp) {
-  auto possible = curRepl->completePrefix(s);
-
-  if (possible.size() > (INT_MAX / sizeof(char*)))
-    throw Error("too many completions");
-
-  int ac = 0;
-  char **vp = nullptr;
-
-  auto check = [&](auto *p) {
-    if (!p) {
-      if (vp) {
-        while (--ac >= 0)
-          free(vp[ac]);
-        free(vp);
-      }
-      throw Error("allocation failure");
-    }
-    return p;
-  };
-
-  vp = check((char **)malloc(possible.size() * sizeof(char*)));
-
-  for (auto & p : possible)
-    vp[ac++] = check(strdup(p.c_str()));
-
-  *avp = vp;
-
-  return ac;
-}
-
-namespace {
-    // Used to communicate to NixRepl::getLine whether a signal occurred in ::readline.
-    volatile sig_atomic_t g_signal_received = 0;
-
-    void sigintHandler(int signo) {
-        g_signal_received = signo;
-    }
-}
-
 static std::ostream & showDebugTrace(std::ostream & out, const PosTable & positions, const DebugTrace & dt)
 {
     if (dt.isError)
@@ -281,24 +186,7 @@ ReplExitStatus NixRepl::mainLoop()
 
     loadFiles();
 
-    // Allow nix-repl specific settings in .inputrc
-    rl_readline_name = "nix-repl";
-    try {
-        createDirs(dirOf(historyFile));
-    } catch (SystemError & e) {
-        logWarning(e.info());
-    }
-#ifndef USE_READLINE
-    el_hist_size = 1000;
-#endif
-    read_history(historyFile.c_str());
-    auto oldRepl = curRepl;
-    curRepl = this;
-    Finally restoreRepl([&] { curRepl = oldRepl; });
-#ifndef USE_READLINE
-    rl_set_complete_func(completionCallback);
-    rl_set_list_possib_func(listPossibleCallback);
-#endif
+    auto _guard = interacter->init(static_cast<detail::ReplCompleterMixin *>(this));
 
     std::string input;
 
@@ -307,7 +195,7 @@ ReplExitStatus NixRepl::mainLoop()
         logger->pause();
         // When continuing input from previous lines, don't print a prompt, just align to the same
         // number of chars as the prompt.
-        if (!getLine(input, input.empty() ? "nix-repl> " : "          ")) {
+        if (!interacter->getLine(input, input.empty() ? "nix-repl> " : "          ")) {
             // Ctrl-D should exit the debugger.
             state->debugStop = false;
             logger->cout("");
@@ -356,51 +244,6 @@ ReplExitStatus NixRepl::mainLoop()
     }
 }
 
-
-bool NixRepl::getLine(std::string & input, const std::string & prompt)
-{
-    struct sigaction act, old;
-    sigset_t savedSignalMask, set;
-
-    auto setupSignals = [&]() {
-        act.sa_handler = sigintHandler;
-        sigfillset(&act.sa_mask);
-        act.sa_flags = 0;
-        if (sigaction(SIGINT, &act, &old))
-            throw SysError("installing handler for SIGINT");
-
-        sigemptyset(&set);
-        sigaddset(&set, SIGINT);
-        if (sigprocmask(SIG_UNBLOCK, &set, &savedSignalMask))
-            throw SysError("unblocking SIGINT");
-    };
-    auto restoreSignals = [&]() {
-        if (sigprocmask(SIG_SETMASK, &savedSignalMask, nullptr))
-            throw SysError("restoring signals");
-
-        if (sigaction(SIGINT, &old, 0))
-            throw SysError("restoring handler for SIGINT");
-    };
-
-    setupSignals();
-    char * s = readline(prompt.c_str());
-    Finally doFree([&]() { free(s); });
-    restoreSignals();
-
-    if (g_signal_received) {
-        g_signal_received = 0;
-        input.clear();
-        return true;
-    }
-
-    if (!s)
-      return false;
-    input += s;
-    input += '\n';
-    return true;
-}
-
-
 StringSet NixRepl::completePrefix(const std::string & prefix)
 {
     StringSet completions;
diff --git a/src/libcmd/repl.hh b/src/libcmd/repl.hh
index 21aa8bfc7cf..aac79ec7404 100644
--- a/src/libcmd/repl.hh
+++ b/src/libcmd/repl.hh
@@ -3,11 +3,6 @@
 
 #include "eval.hh"
 
-#if HAVE_BOEHMGC
-#define GC_INCLUDE_NEW
-#include <gc/gc_cpp.h>
-#endif
-
 namespace nix {
 
 struct AbstractNixRepl

From ea31b8a117e0a2e18809fd3921209d106d9040c8 Mon Sep 17 00:00:00 2001
From: Jade Lovelace <software@lfcode.ca>
Date: Mon, 26 Feb 2024 00:43:44 -0800
Subject: [PATCH 04/10] refactor: repl prompts are now the job of the
 interacter

---
 src/libcmd/repl-interacter.cc | 19 +++++++++++++++----
 src/libcmd/repl-interacter.hh |  4 ++--
 src/libcmd/repl.cc            |  2 +-
 3 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/libcmd/repl-interacter.cc b/src/libcmd/repl-interacter.cc
index 9aa1f7bb93d..3e34ecdb6ad 100644
--- a/src/libcmd/repl-interacter.cc
+++ b/src/libcmd/repl-interacter.cc
@@ -1,5 +1,3 @@
-#include "file-system.hh"
-#include "libcmd/repl.hh"
 #include <cstdio>
 
 #ifdef USE_READLINE
@@ -19,6 +17,8 @@ extern "C" {
 #include "signals.hh"
 #include "finally.hh"
 #include "repl-interacter.hh"
+#include "file-system.hh"
+#include "libcmd/repl.hh"
 
 namespace nix {
 
@@ -124,7 +124,18 @@ ReadlineLikeInteracter::Guard ReadlineLikeInteracter::init(detail::ReplCompleter
     return restoreRepl;
 }
 
-bool ReadlineLikeInteracter::getLine(std::string & input, const std::string & prompt)
+static constexpr const char * promptForType(ReplPromptType promptType)
+{
+    switch (promptType) {
+    case ReplPromptType::ReplPrompt:
+        return "nix-repl> ";
+    case ReplPromptType::ContinuationPrompt:
+        return "          ";
+    }
+    assert(false);
+}
+
+bool ReadlineLikeInteracter::getLine(std::string & input, ReplPromptType promptType)
 {
     struct sigaction act, old;
     sigset_t savedSignalMask, set;
@@ -150,7 +161,7 @@ bool ReadlineLikeInteracter::getLine(std::string & input, const std::string & pr
     };
 
     setupSignals();
-    char * s = readline(prompt.c_str());
+    char * s = readline(promptForType(promptType));
     Finally doFree([&]() { free(s); });
     restoreSignals();
 
diff --git a/src/libcmd/repl-interacter.hh b/src/libcmd/repl-interacter.hh
index e549bab36ab..cc70efd0729 100644
--- a/src/libcmd/repl-interacter.hh
+++ b/src/libcmd/repl-interacter.hh
@@ -28,7 +28,7 @@ public:
 
     virtual Guard init(detail::ReplCompleterMixin * repl) = 0;
     /** Returns a boolean of whether the interacter got EOF */
-    virtual bool getLine(std::string & input, const std::string & prompt) = 0;
+    virtual bool getLine(std::string & input, ReplPromptType promptType) = 0;
     virtual ~ReplInteracter(){};
 };
 
@@ -41,7 +41,7 @@ public:
     {
     }
     virtual Guard init(detail::ReplCompleterMixin * repl) override;
-    virtual bool getLine(std::string & input, const std::string & prompt) override;
+    virtual bool getLine(std::string & input, ReplPromptType promptType) override;
     virtual ~ReadlineLikeInteracter() override;
 };
 
diff --git a/src/libcmd/repl.cc b/src/libcmd/repl.cc
index 8af3c5ff393..228d66f5e2a 100644
--- a/src/libcmd/repl.cc
+++ b/src/libcmd/repl.cc
@@ -195,7 +195,7 @@ ReplExitStatus NixRepl::mainLoop()
         logger->pause();
         // When continuing input from previous lines, don't print a prompt, just align to the same
         // number of chars as the prompt.
-        if (!interacter->getLine(input, input.empty() ? "nix-repl> " : "          ")) {
+        if (!interacter->getLine(input, input.empty() ? ReplPromptType::ReplPrompt : ReplPromptType::ContinuationPrompt)) {
             // Ctrl-D should exit the debugger.
             state->debugStop = false;
             logger->cout("");

From 9bbf0adc3363f5c0d28c75fcd5d43f9c01681f2e Mon Sep 17 00:00:00 2001
From: Jade Lovelace <software@lfcode.ca>
Date: Tue, 12 Mar 2024 19:18:43 -0700
Subject: [PATCH 05/10] Implement a parser for a literate testing system for
 the repl

This parser can be reused for other purposes. It's inspired by
https://bitheap.org/cram/

Although eelco's impostor exists https://github.com/mobusoperandi/eelco,
it is not very nice to depend on out of tree testing frameworks with no
way to customize them.
---
 Makefile                                      |   1 +
 .../repl_characterization/.gitignore          |   1 +
 .../repl_characterization/data/basic.ast      |  11 ++
 .../repl_characterization/data/basic.test     |  11 ++
 .../functional/repl_characterization/local.mk |  17 ++
 .../repl_characterization.cc                  |  42 +++++
 tests/unit/libutil-support/local.mk           |   3 +-
 .../tests/cli-literate-parser.cc              | 174 ++++++++++++++++++
 .../tests/cli-literate-parser.hh              | 127 +++++++++++++
 .../unit/libutil-support/tests/debug-char.hh  |  24 +++
 10 files changed, 410 insertions(+), 1 deletion(-)
 create mode 100644 tests/functional/repl_characterization/.gitignore
 create mode 100644 tests/functional/repl_characterization/data/basic.ast
 create mode 100644 tests/functional/repl_characterization/data/basic.test
 create mode 100644 tests/functional/repl_characterization/local.mk
 create mode 100644 tests/functional/repl_characterization/repl_characterization.cc
 create mode 100644 tests/unit/libutil-support/tests/cli-literate-parser.cc
 create mode 100644 tests/unit/libutil-support/tests/cli-literate-parser.hh
 create mode 100644 tests/unit/libutil-support/tests/debug-char.hh

diff --git a/Makefile b/Makefile
index c3dc83c7722..24180a9d54c 100644
--- a/Makefile
+++ b/Makefile
@@ -45,6 +45,7 @@ makefiles += \
   tests/functional/git-hashing/local.mk \
   tests/functional/dyn-drv/local.mk \
   tests/functional/test-libstoreconsumer/local.mk \
+  tests/functional/repl_characterization/local.mk \
   tests/functional/plugins/local.mk
 endif
 
diff --git a/tests/functional/repl_characterization/.gitignore b/tests/functional/repl_characterization/.gitignore
new file mode 100644
index 00000000000..4c6412c2f37
--- /dev/null
+++ b/tests/functional/repl_characterization/.gitignore
@@ -0,0 +1 @@
+test-repl-characterization
diff --git a/tests/functional/repl_characterization/data/basic.ast b/tests/functional/repl_characterization/data/basic.ast
new file mode 100644
index 00000000000..d494b00aa68
--- /dev/null
+++ b/tests/functional/repl_characterization/data/basic.ast
@@ -0,0 +1,11 @@
+Commentary "meow meow meow"
+Command "command"
+Output "output output one"
+Output ""
+Output ""
+Output "output output two"
+Commentary "meow meow"
+Command "command two"
+Output "output output output"
+Commentary "commentary"
+Output "output output output"
diff --git a/tests/functional/repl_characterization/data/basic.test b/tests/functional/repl_characterization/data/basic.test
new file mode 100644
index 00000000000..d6b8427b4e9
--- /dev/null
+++ b/tests/functional/repl_characterization/data/basic.test
@@ -0,0 +1,11 @@
+meow meow meow
+  nix-repl> command
+  output output one
+
+
+  output output two
+meow meow
+  nix-repl> command two
+  output output output
+commentary
+  output output output
diff --git a/tests/functional/repl_characterization/local.mk b/tests/functional/repl_characterization/local.mk
new file mode 100644
index 00000000000..e7b36c99c3b
--- /dev/null
+++ b/tests/functional/repl_characterization/local.mk
@@ -0,0 +1,17 @@
+programs += test-repl-characterization
+
+test-repl-characterization_DIR := $(d)
+
+test-repl-characterization_ENV := _NIX_TEST_UNIT_DATA=$(d)/data
+
+# do not install
+test-repl-characterization_INSTALL_DIR :=
+
+test-repl-characterization_SOURCES := \
+  $(wildcard $(d)/*.cc) \
+
+test-repl-characterization_CXXFLAGS += -I src/libutil -I tests/unit/libutil-support
+
+test-repl-characterization_LIBS = libutil libutil-test-support
+
+test-repl-characterization_LDFLAGS = $(THREAD_LDFLAGS) $(SODIUM_LIBS) $(EDITLINE_LIBS) $(BOOST_LDFLAGS) $(LOWDOWN_LIBS) $(GTEST_LIBS)
diff --git a/tests/functional/repl_characterization/repl_characterization.cc b/tests/functional/repl_characterization/repl_characterization.cc
new file mode 100644
index 00000000000..5b73e7a8984
--- /dev/null
+++ b/tests/functional/repl_characterization/repl_characterization.cc
@@ -0,0 +1,42 @@
+#include <gtest/gtest.h>
+
+#include <string>
+#include <string_view>
+#include <optional>
+#include <unistd.h>
+
+#include "tests/characterization.hh"
+#include "tests/cli-literate-parser.hh"
+
+using namespace std::string_literals;
+
+namespace nix {
+
+static constexpr const char * REPL_PROMPT = "nix-repl> ";
+
+class ReplSessionTest : public CharacterizationTest
+{
+    Path unitTestData = getUnitTestData();
+
+public:
+    Path goldenMaster(std::string_view testStem) const override
+    {
+        return unitTestData + "/" + testStem;
+    }
+};
+
+TEST_F(ReplSessionTest, parses)
+{
+    writeTest("basic.ast", [this]() {
+        const std::string content = readFile(goldenMaster("basic.test"));
+        auto parser = CLILiterateParser{REPL_PROMPT};
+        parser.feed(content);
+
+        std::ostringstream out{};
+        for (auto & bit : parser.syntax()) {
+            out << bit.print() << "\n";
+        }
+        return out.str();
+    });
+}
+};
diff --git a/tests/unit/libutil-support/local.mk b/tests/unit/libutil-support/local.mk
index 5f7835c9f61..1c0ff8f6bae 100644
--- a/tests/unit/libutil-support/local.mk
+++ b/tests/unit/libutil-support/local.mk
@@ -14,6 +14,7 @@ libutil-test-support_SOURCES := $(wildcard $(d)/tests/*.cc)
 
 libutil-test-support_CXXFLAGS += $(libutil-tests_EXTRA_INCLUDES)
 
-libutil-test-support_LIBS = libutil
+# libexpr so we can steal their string printer from print.cc
+libutil-test-support_LIBS = libutil libexpr
 
 libutil-test-support_LDFLAGS := $(THREAD_LDFLAGS) -lrapidcheck
diff --git a/tests/unit/libutil-support/tests/cli-literate-parser.cc b/tests/unit/libutil-support/tests/cli-literate-parser.cc
new file mode 100644
index 00000000000..16a263f9b7d
--- /dev/null
+++ b/tests/unit/libutil-support/tests/cli-literate-parser.cc
@@ -0,0 +1,174 @@
+#include "cli-literate-parser.hh"
+#include "libexpr/print.hh"
+#include "debug-char.hh"
+#include "types.hh"
+#include "util.hh"
+#include <iostream>
+#include <memory>
+#include <boost/algorithm/string/trim.hpp>
+
+using namespace std::string_literals;
+
+namespace nix {
+
+static constexpr const bool DEBUG_PARSER = false;
+
+constexpr auto CLILiterateParser::stateDebug(State const & s) -> const char *
+{
+    return std::visit(
+        overloaded{// clang-format off
+            [](Indent const&) -> const char * { return "indent"; },
+            [](Commentary const&) -> const char * { return "indent"; },
+            [](Prompt const&) -> const char * { return "prompt"; },
+            [](Command const&) -> const char * { return "command"; },
+            [](OutputLine const&) -> const char * { return "output_line"; }},
+        // clang-format on
+        s);
+}
+
+auto CLILiterateParser::Node::print() const -> std::string
+{
+    std::ostringstream s{};
+    switch (kind) {
+    case NodeKind::COMMENTARY:
+        s << "Commentary ";
+        break;
+    case NodeKind::COMMAND:
+        s << "Command ";
+        break;
+    case NodeKind::OUTPUT:
+        s << "Output ";
+        break;
+    }
+    printLiteralString(s, this->text);
+    return s.str();
+}
+
+void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os)
+{
+    for (auto & node : nodes) {
+        *os << node.print() << "\\n";
+    }
+}
+
+auto CLILiterateParser::parse(std::string prompt, std::string_view const & input, size_t indent) -> std::vector<Node>
+{
+    CLILiterateParser p{std::move(prompt), indent};
+    p.feed(input);
+    return CLILiterateParser::intoSyntax(std::move(p));
+}
+
+auto CLILiterateParser::intoSyntax(CLILiterateParser && inst) -> std::vector<Node>
+{
+    return std::move(inst.syntax_);
+}
+
+CLILiterateParser::CLILiterateParser(std::string prompt, size_t indent)
+    : state_(indent == 0 ? State(Prompt{}) : State(Indent{}))
+    , prompt_(prompt)
+    , indent_(indent)
+    , lastWasOutput_(false)
+    , syntax_(std::vector<Node>{})
+{
+    assert(!prompt.empty());
+}
+
+void CLILiterateParser::feed(char c)
+{
+    if constexpr (DEBUG_PARSER) {
+        std::cout << stateDebug(state_) << " " << DebugChar{c} << "\n";
+    }
+
+    if (c == '\n') {
+        onNewline();
+        return;
+    }
+
+    std::visit(
+        overloaded{
+            [&](Indent & s) {
+                if (c == ' ') {
+                    if (++s.pos >= indent_) {
+                        transition(Prompt{});
+                    }
+                } else {
+                    transition(Commentary{AccumulatingState{.lineAccumulator = std::string{c}}});
+                }
+            },
+            [&](Prompt & s) {
+                if (s.pos >= prompt_.length()) {
+                    transition(Command{AccumulatingState{.lineAccumulator = std::string{c}}});
+                    return;
+                } else if (c == prompt_[s.pos]) {
+                    // good prompt character
+                    ++s.pos;
+                } else {
+                    // didn't match the prompt, so it must have actually been output.
+                    s.lineAccumulator.push_back(c);
+                    transition(OutputLine{AccumulatingState{.lineAccumulator = std::move(s.lineAccumulator)}});
+                    return;
+                }
+                s.lineAccumulator.push_back(c);
+            },
+            [&](AccumulatingState & s) { s.lineAccumulator.push_back(c); }},
+        state_);
+}
+
+void CLILiterateParser::onNewline()
+{
+    State lastState = std::move(state_);
+    bool newLastWasOutput = false;
+
+    syntax_.push_back(std::visit(
+        overloaded{
+            [&](Indent & s) {
+                // XXX: technically this eats trailing spaces
+
+                // a newline following output is considered part of that output
+                if (lastWasOutput_) {
+                    newLastWasOutput = true;
+                    return Node::mkOutput("");
+                }
+                return Node::mkCommentary("");
+            },
+            [&](Commentary & s) { return Node::mkCommentary(std::move(s.lineAccumulator)); },
+            [&](Command & s) { return Node::mkCommand(std::move(s.lineAccumulator)); },
+            [&](OutputLine & s) {
+                newLastWasOutput = true;
+                return Node::mkOutput(std::move(s.lineAccumulator));
+            },
+            [&](Prompt & s) {
+                // INDENT followed by newline is also considered a blank output line
+                return Node::mkOutput(std::move(s.lineAccumulator));
+            }},
+        lastState));
+
+    transition(Indent{});
+    lastWasOutput_ = newLastWasOutput;
+}
+
+void CLILiterateParser::feed(std::string_view s)
+{
+    for (char ch : s) {
+        feed(ch);
+    }
+}
+
+void CLILiterateParser::transition(State new_state)
+{
+    // When we expect INDENT and we are parsing without indents, commentary
+    // cannot exist, so we want to transition directly into PROMPT before
+    // resuming normal processing.
+    if (Indent * i = std::get_if<Indent>(&new_state); i != nullptr && indent_ == 0) {
+        new_state = Prompt{AccumulatingState{}, i->pos};
+    }
+
+    state_ = new_state;
+}
+
+auto CLILiterateParser::syntax() const -> std::vector<Node> const &
+{
+    return syntax_;
+}
+
+};
diff --git a/tests/unit/libutil-support/tests/cli-literate-parser.hh b/tests/unit/libutil-support/tests/cli-literate-parser.hh
new file mode 100644
index 00000000000..f63a74658e7
--- /dev/null
+++ b/tests/unit/libutil-support/tests/cli-literate-parser.hh
@@ -0,0 +1,127 @@
+#pragma once
+///@file
+
+#include <compare>
+#include <memory>
+#include <sstream>
+#include <variant>
+#include <vector>
+#include <string>
+
+namespace nix {
+/*
+ * A DFA parser for literate test cases for CLIs.
+ *
+ * FIXME: implement merging of these, so you can auto update cases that have
+ * comments.
+ *
+ * Format:
+ * COMMENTARY
+ * INDENT PROMPT COMMAND
+ * INDENT OUTPUT
+ *
+ * e.g.
+ * commentary commentary commentary
+ *   nix-repl> :t 1
+ *   an integer
+ *
+ * Yields:
+ * Commentary "commentary commentary commentary"
+ * Command ":t 1"
+ * Output "an integer"
+ *
+ * Note: one Output line is generated for each line of the sources, because
+ * this is effectively necessary to be able to align them in the future to
+ * auto-update tests.
+ */
+class CLILiterateParser
+{
+public:
+
+    enum class NodeKind {
+        COMMENTARY,
+        COMMAND,
+        OUTPUT,
+    };
+
+    struct Node
+    {
+        NodeKind kind;
+        std::string text;
+        std::strong_ordering operator<=>(Node const &) const = default;
+
+        static Node mkCommentary(std::string text)
+        {
+            return Node{.kind = NodeKind::COMMENTARY, .text = text};
+        }
+
+        static Node mkCommand(std::string text)
+        {
+            return Node{.kind = NodeKind::COMMAND, .text = text};
+        }
+
+        static Node mkOutput(std::string text)
+        {
+            return Node{.kind = NodeKind::OUTPUT, .text = text};
+        }
+
+        auto print() const -> std::string;
+    };
+
+    CLILiterateParser(std::string prompt, size_t indent = 2);
+
+    auto syntax() const -> std::vector<Node> const &;
+
+    /** Feeds a character into the parser */
+    void feed(char c);
+
+    /** Feeds a string into the parser */
+    void feed(std::string_view s);
+
+    /** Parses an input in a non-streaming fashion */
+    static auto parse(std::string prompt, std::string_view const & input, size_t indent = 2) -> std::vector<Node>;
+
+    /** Consumes a CLILiterateParser and gives you the syntax out of it */
+    static auto intoSyntax(CLILiterateParser && inst) -> std::vector<Node>;
+
+private:
+
+    struct AccumulatingState
+    {
+        std::string lineAccumulator;
+    };
+    struct Indent
+    {
+        size_t pos = 0;
+    };
+    struct Commentary : public AccumulatingState
+    {};
+    struct Prompt : AccumulatingState
+    {
+        size_t pos = 0;
+    };
+    struct Command : public AccumulatingState
+    {};
+    struct OutputLine : public AccumulatingState
+    {};
+
+    using State = std::variant<Indent, Commentary, Prompt, Command, OutputLine>;
+    State state_;
+
+    constexpr static auto stateDebug(State const&) -> const char *;
+
+    const std::string prompt_;
+    const size_t indent_;
+
+    /** Last line was output, so we consider a blank to be part of the output */
+    bool lastWasOutput_;
+
+    std::vector<Node> syntax_;
+
+    void transition(State newState);
+    void onNewline();
+};
+
+// Override gtest printing for lists of nodes
+void PrintTo(std::vector<CLILiterateParser::Node> const & nodes, std::ostream * os);
+};
diff --git a/tests/unit/libutil-support/tests/debug-char.hh b/tests/unit/libutil-support/tests/debug-char.hh
new file mode 100644
index 00000000000..765d8553fc8
--- /dev/null
+++ b/tests/unit/libutil-support/tests/debug-char.hh
@@ -0,0 +1,24 @@
+///@file
+#include <ostream>
+#include <boost/io/ios_state.hpp>
+
+namespace nix {
+
+struct DebugChar
+{
+    char c;
+};
+
+inline std::ostream & operator<<(std::ostream & s, DebugChar c)
+{
+    boost::io::ios_flags_saver _ifs(s);
+
+    if (isprint(c.c)) {
+        s << static_cast<char>(c.c);
+    } else {
+        s << std::hex << "0x" << (static_cast<unsigned int>(c.c) & 0xff);
+    }
+    return s;
+}
+
+}

From 103a3670f99d3f7303bd536200b35bb74436b10b Mon Sep 17 00:00:00 2001
From: Jade Lovelace <software@lfcode.ca>
Date: Tue, 12 Mar 2024 19:18:43 -0700
Subject: [PATCH 06/10] Implement a repl characterization test system

This allows for automating using the repl without needing a PTY, with
very easy to write test files.
---
 .../data/basic_repl.test                      |  60 +++++++
 .../repl_characterization.cc                  |  56 ++++++-
 .../repl_characterization/test-session.cc     | 157 ++++++++++++++++++
 .../repl_characterization/test-session.hh     |  69 ++++++++
 .../tests/cli-literate-parser.cc              |  73 ++++++++
 .../tests/cli-literate-parser.hh              |   7 +
 .../tests/terminal-code-eater.cc              |  85 ++++++++++
 .../tests/terminal-code-eater.hh              |  29 ++++
 8 files changed, 535 insertions(+), 1 deletion(-)
 create mode 100644 tests/functional/repl_characterization/data/basic_repl.test
 create mode 100644 tests/functional/repl_characterization/test-session.cc
 create mode 100644 tests/functional/repl_characterization/test-session.hh
 create mode 100644 tests/unit/libutil-support/tests/terminal-code-eater.cc
 create mode 100644 tests/unit/libutil-support/tests/terminal-code-eater.hh

diff --git a/tests/functional/repl_characterization/data/basic_repl.test b/tests/functional/repl_characterization/data/basic_repl.test
new file mode 100644
index 00000000000..a8dea6d7cbf
--- /dev/null
+++ b/tests/functional/repl_characterization/data/basic_repl.test
@@ -0,0 +1,60 @@
+  nix-repl> 1 + 1
+  2
+
+  nix-repl> :doc builtins.head
+  Synopsis: builtins.head list
+
+      Return the first element of a list; abort evaluation if
+      the argument isn’t a list or is an empty list. You can
+      test whether a list is empty by comparing it with [].
+
+  nix-repl> f = a: "" + a
+
+Expect the trace to not contain any traceback:
+
+  nix-repl> f 2
+  error:
+         … while evaluating a path segment
+           at «string»:1:10:
+              1|  a: "" + a
+               |          ^
+
+         error: cannot coerce an integer to a string: 2
+
+  nix-repl> :te
+  showing error traces
+
+Expect the trace to have traceback:
+
+  nix-repl> f 2
+  error:
+         … from call site
+           at «string»:1:1:
+              1| f 2
+               | ^
+
+         … while calling anonymous lambda
+           at «string»:1:2:
+              1|  a: "" + a
+               |  ^
+
+         … while evaluating a path segment
+           at «string»:1:10:
+              1|  a: "" + a
+               |          ^
+
+         error: cannot coerce an integer to a string: 2
+
+Turning it off should also work:
+
+  nix-repl> :te
+  not showing error traces
+
+  nix-repl> f 2
+  error:
+         … while evaluating a path segment
+           at «string»:1:10:
+              1|  a: "" + a
+               |          ^
+
+         error: cannot coerce an integer to a string: 2
diff --git a/tests/functional/repl_characterization/repl_characterization.cc b/tests/functional/repl_characterization/repl_characterization.cc
index 5b73e7a8984..d0e2c0017a0 100644
--- a/tests/functional/repl_characterization/repl_characterization.cc
+++ b/tests/functional/repl_characterization/repl_characterization.cc
@@ -2,9 +2,11 @@
 
 #include <string>
 #include <string_view>
-#include <optional>
 #include <unistd.h>
 
+#include "test-session.hh"
+#include "util.hh"
+#include "file-system.hh"
 #include "tests/characterization.hh"
 #include "tests/cli-literate-parser.hh"
 
@@ -14,6 +16,18 @@ namespace nix {
 
 static constexpr const char * REPL_PROMPT = "nix-repl> ";
 
+// ASCII ENQ character
+static constexpr const char * AUTOMATION_PROMPT = "\x05";
+
+static std::string_view trimOutLog(std::string_view outLog)
+{
+    const std::string trailer = "\n"s + AUTOMATION_PROMPT;
+    if (outLog.ends_with(trailer)) {
+        outLog.remove_suffix(trailer.length());
+    }
+    return outLog;
+}
+
 class ReplSessionTest : public CharacterizationTest
 {
     Path unitTestData = getUnitTestData();
@@ -23,6 +37,40 @@ class ReplSessionTest : public CharacterizationTest
     {
         return unitTestData + "/" + testStem;
     }
+
+    void runReplTest(std::string_view const & content, std::vector<std::string> extraArgs = {}) const
+    {
+        auto syntax = CLILiterateParser::parse(REPL_PROMPT, content);
+
+        Strings args{"--quiet", "repl", "--quiet", "--extra-experimental-features", "repl-automation"};
+        args.insert(args.end(), extraArgs.begin(), extraArgs.end());
+
+        // TODO: why the fuck does this need two --quiets
+        auto process = RunningProcess::start("nix", args);
+        auto session = TestSession{AUTOMATION_PROMPT, std::move(process)};
+
+        for (auto & bit : syntax) {
+            if (bit.kind != CLILiterateParser::NodeKind::COMMAND) {
+                continue;
+            }
+
+            if (!session.waitForPrompt()) {
+                ASSERT_TRUE(false);
+            }
+            session.runCommand(bit.text);
+        }
+        if (!session.waitForPrompt()) {
+            ASSERT_TRUE(false);
+        }
+        session.close();
+
+        auto parsedOutLog = CLILiterateParser::parse(AUTOMATION_PROMPT, trimOutLog(session.outLog), 0);
+
+        CLILiterateParser::tidyOutputForComparison(parsedOutLog);
+        CLILiterateParser::tidyOutputForComparison(syntax);
+
+        ASSERT_EQ(parsedOutLog, syntax);
+    }
 };
 
 TEST_F(ReplSessionTest, parses)
@@ -39,4 +87,10 @@ TEST_F(ReplSessionTest, parses)
         return out.str();
     });
 }
+
+TEST_F(ReplSessionTest, repl_basic)
+{
+    readTest("basic_repl.test", [this](std::string input) { runReplTest(input); });
+}
+
 };
diff --git a/tests/functional/repl_characterization/test-session.cc b/tests/functional/repl_characterization/test-session.cc
new file mode 100644
index 00000000000..e0de3dac3ed
--- /dev/null
+++ b/tests/functional/repl_characterization/test-session.cc
@@ -0,0 +1,157 @@
+#include <iostream>
+
+#include "test-session.hh"
+#include "processes.hh"
+#include "tests/debug-char.hh"
+#include "util.hh"
+
+namespace nix {
+
+static constexpr const bool DEBUG_REPL_PARSER = false;
+
+RunningProcess RunningProcess::start(std::string executable, Strings args)
+{
+    args.push_front(executable);
+
+    Pipe procStdin{};
+    Pipe procStdout{};
+
+    procStdin.create();
+    procStdout.create();
+
+    // This is separate from runProgram2 because we have different IO requirements
+    pid_t pid = startProcess([&]() {
+        if (dup2(procStdout.writeSide.get(), STDOUT_FILENO) == -1)
+            throw SysError("dupping stdout");
+        if (dup2(procStdin.readSide.get(), STDIN_FILENO) == -1)
+            throw SysError("dupping stdin");
+        procStdin.writeSide.close();
+        procStdout.readSide.close();
+        if (dup2(STDOUT_FILENO, STDERR_FILENO) == -1)
+            throw SysError("dupping stderr");
+        execvp(executable.c_str(), stringsToCharPtrs(args).data());
+        throw SysError("exec did not happen");
+    });
+
+    procStdout.writeSide.close();
+    procStdin.readSide.close();
+
+    return RunningProcess{
+        .pid = pid,
+        .procStdin = std::move(procStdin),
+        .procStdout = std::move(procStdout),
+    };
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunneeded-internal-declaration"
+#endif
+std::ostream & operator<<(std::ostream & os, ReplOutputParser::State s)
+{
+    switch (s) {
+    case ReplOutputParser::State::Prompt:
+        os << "prompt";
+        break;
+    case ReplOutputParser::State::Context:
+        os << "context";
+        break;
+    }
+    return os;
+}
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+void ReplOutputParser::transition(State new_state, char responsible_char, bool wasPrompt)
+{
+    if constexpr (DEBUG_REPL_PARSER) {
+        std::cerr << "transition " << new_state << " for " << DebugChar{responsible_char}
+                  << (wasPrompt ? " [prompt]" : "") << "\n";
+    }
+    state = new_state;
+    pos_in_prompt = 0;
+}
+
+bool ReplOutputParser::feed(char c)
+{
+    if (c == '\n') {
+        transition(State::Prompt, c);
+        return false;
+    }
+    switch (state) {
+    case State::Context:
+        break;
+    case State::Prompt:
+        if (pos_in_prompt == prompt.length() - 1 && prompt[pos_in_prompt] == c) {
+            transition(State::Context, c, true);
+            return true;
+        }
+        if (pos_in_prompt >= prompt.length() - 1 || prompt[pos_in_prompt] != c) {
+            transition(State::Context, c);
+            break;
+        }
+        pos_in_prompt++;
+        break;
+    }
+    return false;
+}
+
+/** Waits for the prompt and then returns if a prompt was found */
+bool TestSession::waitForPrompt()
+{
+    std::vector<char> buf(1024);
+
+    for (;;) {
+        ssize_t res = read(proc.procStdout.readSide.get(), buf.data(), buf.size());
+
+        if (res < 0) {
+            throw SysError("read");
+        }
+        if (res == 0) {
+            return false;
+        }
+
+        bool foundPrompt = false;
+        for (ssize_t i = 0; i < res; ++i) {
+            // foundPrompt = foundPrompt || outputParser.feed(buf[i]);
+            bool wasEaten = true;
+            eater.feed(buf[i], [&](char c) {
+                wasEaten = false;
+                foundPrompt = outputParser.feed(buf[i]) || foundPrompt;
+
+                outLog.push_back(c);
+            });
+
+            if constexpr (DEBUG_REPL_PARSER) {
+                std::cerr << "raw " << DebugChar{buf[i]} << (wasEaten ? " [eaten]" : "") << "\n";
+            }
+        }
+
+        if (foundPrompt) {
+            return true;
+        }
+    }
+}
+
+void TestSession::close()
+{
+    proc.procStdin.close();
+    proc.procStdout.close();
+}
+
+void TestSession::runCommand(std::string command)
+{
+    if constexpr (DEBUG_REPL_PARSER)
+        std::cerr << "runCommand " << command << "\n";
+    command += "\n";
+    // We have to feed a newline into the output parser, since Nix might not
+    // give us a newline before a prompt in all cases (it might clear line
+    // first, e.g.)
+    outputParser.feed('\n');
+    // Echo is disabled, so we have to make our own
+    outLog.append(command);
+    writeFull(proc.procStdin.writeSide.get(), command, false);
+}
+
+};
diff --git a/tests/functional/repl_characterization/test-session.hh b/tests/functional/repl_characterization/test-session.hh
new file mode 100644
index 00000000000..69d05cb50dc
--- /dev/null
+++ b/tests/functional/repl_characterization/test-session.hh
@@ -0,0 +1,69 @@
+#pragma once
+///@file
+
+#include <sched.h>
+#include <string>
+
+#include "file-descriptor.hh"
+#include "tests/terminal-code-eater.hh"
+
+namespace nix {
+
+struct RunningProcess
+{
+    pid_t pid;
+    Pipe procStdin;
+    Pipe procStdout;
+
+    static RunningProcess start(std::string executable, Strings args);
+};
+
+/** DFA that catches repl prompts */
+class ReplOutputParser
+{
+public:
+    ReplOutputParser(std::string prompt)
+        : prompt(prompt)
+    {
+        assert(!prompt.empty());
+    }
+    /** Feeds in a character and returns whether this is an open prompt */
+    bool feed(char c);
+
+    enum class State {
+        Prompt,
+        Context,
+    };
+
+private:
+    State state = State::Prompt;
+    size_t pos_in_prompt = 0;
+    std::string const prompt;
+
+    void transition(State state, char responsible_char, bool wasPrompt = false);
+};
+
+struct TestSession
+{
+    RunningProcess proc;
+    ReplOutputParser outputParser;
+    TerminalCodeEater eater;
+    std::string outLog;
+    std::string prompt;
+
+    TestSession(std::string prompt, RunningProcess && proc)
+        : proc(std::move(proc))
+        , outputParser(prompt)
+        , eater({})
+        , outLog({})
+        , prompt(prompt)
+    {
+    }
+
+    bool waitForPrompt();
+
+    void runCommand(std::string command);
+
+    void close();
+};
+};
diff --git a/tests/unit/libutil-support/tests/cli-literate-parser.cc b/tests/unit/libutil-support/tests/cli-literate-parser.cc
index 16a263f9b7d..221a3aebe02 100644
--- a/tests/unit/libutil-support/tests/cli-literate-parser.cc
+++ b/tests/unit/libutil-support/tests/cli-literate-parser.cc
@@ -171,4 +171,77 @@ auto CLILiterateParser::syntax() const -> std::vector<Node> const &
     return syntax_;
 }
 
+auto CLILiterateParser::unparse(const std::string & prompt, const std::vector<Node> & syntax, size_t indent)
+    -> std::string
+{
+    std::string indent_str(indent, ' ');
+    std::ostringstream out{};
+
+    for (auto & node : syntax) {
+        switch (node.kind) {
+        case NodeKind::COMMENTARY:
+            // TODO: should not ignore commentary
+            break;
+        case NodeKind::COMMAND:
+            out << indent_str << prompt << node.text << "\n";
+            break;
+        case NodeKind::OUTPUT:
+            out << indent_str << node.text << "\n";
+            break;
+        }
+    }
+
+    return out.str();
+}
+
+void CLILiterateParser::tidyOutputForComparison(std::vector<Node> & syntax)
+{
+    std::vector<Node> newSyntax{};
+
+    // Eat trailing newlines, so assume that the very end was actually a command
+    bool lastWasCommand = true;
+    bool newLastWasCommand = true;
+
+    auto v = std::ranges::reverse_view(syntax);
+
+    for (auto it = v.begin(); it != v.end(); ++it) {
+        Node item = *it;
+
+        lastWasCommand = newLastWasCommand;
+        // chomp commentary
+        if (item.kind == NodeKind::COMMENTARY) {
+            continue;
+        }
+
+        if (item.kind == NodeKind::COMMAND) {
+            newLastWasCommand = true;
+
+            if (item.text == "") {
+                // chomp empty commands
+                continue;
+            }
+        }
+
+        if (item.kind == NodeKind::OUTPUT) {
+            // TODO: horrible
+            bool nextIsCommand =
+                (it + 1 == v.end()) ? false : (it + 1)->kind == NodeKind::COMMAND;
+            std::string trimmedText = boost::algorithm::trim_right_copy(item.text);
+            if ((lastWasCommand || nextIsCommand) && trimmedText == "") {
+                // chomp empty text above or directly below commands
+                continue;
+            }
+
+            // real output, stop chomping
+            newLastWasCommand = false;
+
+            item = Node::mkOutput(std::move(trimmedText));
+        }
+        newSyntax.push_back(std::move(item));
+    }
+
+    std::reverse(newSyntax.begin(), newSyntax.end());
+    syntax = std::move(newSyntax);
+}
+
 };
diff --git a/tests/unit/libutil-support/tests/cli-literate-parser.hh b/tests/unit/libutil-support/tests/cli-literate-parser.hh
index f63a74658e7..c17a1fd18b7 100644
--- a/tests/unit/libutil-support/tests/cli-literate-parser.hh
+++ b/tests/unit/libutil-support/tests/cli-literate-parser.hh
@@ -81,9 +81,16 @@ public:
     /** Parses an input in a non-streaming fashion */
     static auto parse(std::string prompt, std::string_view const & input, size_t indent = 2) -> std::vector<Node>;
 
+    /** Returns, losslessly, the string that would have generated a syntax tree */
+    static auto unparse(std::string const & prompt, std::vector<Node> const & syntax, size_t indent = 2) -> std::string;
+
     /** Consumes a CLILiterateParser and gives you the syntax out of it */
     static auto intoSyntax(CLILiterateParser && inst) -> std::vector<Node>;
 
+    /** Tidies syntax to remove trailing whitespace from outputs and remove any
+     * empty prompts */
+    static void tidyOutputForComparison(std::vector<Node> & syntax);
+
 private:
 
     struct AccumulatingState
diff --git a/tests/unit/libutil-support/tests/terminal-code-eater.cc b/tests/unit/libutil-support/tests/terminal-code-eater.cc
new file mode 100644
index 00000000000..51e1d565ef7
--- /dev/null
+++ b/tests/unit/libutil-support/tests/terminal-code-eater.cc
@@ -0,0 +1,85 @@
+#include "terminal-code-eater.hh"
+#include "debug-char.hh"
+#include <assert.h>
+#include <cstdint>
+#include <iostream>
+
+namespace nix {
+
+static constexpr const bool DEBUG_EATER = false;
+
+void TerminalCodeEater::feed(char c, std::function<void(char)> on_char)
+{
+    auto isParamChar = [](char v) -> bool { return v >= 0x30 && v <= 0x3f; };
+    auto isIntermediateChar = [](char v) -> bool { return v >= 0x20 && v <= 0x2f; };
+    auto isFinalChar = [](char v) -> bool { return v >= 0x40 && v <= 0x7e; };
+    if constexpr (DEBUG_EATER) {
+        std::cerr << "eater" << DebugChar{c} << "\n";
+    }
+
+    switch (state) {
+    case State::ExpectESC:
+        switch (c) {
+        case '\e':
+            transition(State::ExpectESCSeq);
+            return;
+        // Just eat \r, since it is part of clearing a line
+        case '\r':
+            return;
+        }
+        if constexpr (DEBUG_EATER) {
+            std::cerr << "eater uneat" << DebugChar{c} << "\n";
+        }
+        on_char(c);
+        break;
+    case State::ExpectESCSeq:
+        switch (c) {
+        // CSI
+        case '[':
+            transition(State::InCSIParams);
+            return;
+        default:
+            transition(State::ExpectESC);
+            return;
+        }
+        break;
+    // https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_(Control_Sequence_Introducer)_sequences
+    // A CSI sequence is: CSI [\x30-\x3f]* [\x20-\x2f]* [\x40-\x7e]
+    //                        ^ params     ^ intermediates ^ final byte
+    case State::InCSIParams:
+        if (isFinalChar(c)) {
+            transition(State::ExpectESC);
+            return;
+        } else if (isIntermediateChar(c)) {
+            transition(State::InCSIIntermediates);
+            return;
+        } else if (isParamChar(c)) {
+            return;
+        } else {
+            // Corrupt escape sequence? Throw an assert, for now.
+            // transition(State::ExpectESC);
+            assert(false && "Corrupt terminal escape sequence");
+            return;
+        }
+        break;
+    case State::InCSIIntermediates:
+        if (isFinalChar(c)) {
+            transition(State::ExpectESC);
+            return;
+        } else if (isIntermediateChar(c)) {
+            return;
+        } else {
+            // Corrupt escape sequence? Throw an assert, for now.
+            // transition(State::ExpectESC);
+            assert(false && "Corrupt terminal escape sequence in intermediates");
+            return;
+        }
+        break;
+    }
+}
+
+void TerminalCodeEater::transition(State new_state)
+{
+    state = new_state;
+}
+};
diff --git a/tests/unit/libutil-support/tests/terminal-code-eater.hh b/tests/unit/libutil-support/tests/terminal-code-eater.hh
new file mode 100644
index 00000000000..d904bcc2069
--- /dev/null
+++ b/tests/unit/libutil-support/tests/terminal-code-eater.hh
@@ -0,0 +1,29 @@
+#pragma once
+/// @file
+
+#include <functional>
+
+namespace nix {
+
+/** DFA that eats terminal escapes
+ *
+ * See: https://invisible-island.net/xterm/ctlseqs/ctlseqs.html
+ */
+class TerminalCodeEater
+{
+public:
+    void feed(char c, std::function<void(char)> on_char);
+
+private:
+    enum class State {
+        ExpectESC,
+        ExpectESCSeq,
+        InCSIParams,
+        InCSIIntermediates,
+    };
+
+    State state = State::ExpectESC;
+
+    void transition(State new_state);
+};
+};

From f24aea283b73b9bd8a5a35654b95e140cfd058c8 Mon Sep 17 00:00:00 2001
From: Jade Lovelace <software@lfcode.ca>
Date: Tue, 12 Mar 2024 19:18:43 -0700
Subject: [PATCH 07/10] Add regression tests for #9917, #9918

---
 .../data/regression_9917.nix                     |  9 +++++++++
 .../data/regression_9917.test                    | 14 ++++++++++++++
 .../data/regression_9918.nix                     |  5 +++++
 .../data/regression_9918.test                    | 16 ++++++++++++++++
 .../repl_characterization.cc                     | 11 +++++++++++
 5 files changed, 55 insertions(+)
 create mode 100644 tests/functional/repl_characterization/data/regression_9917.nix
 create mode 100644 tests/functional/repl_characterization/data/regression_9917.test
 create mode 100644 tests/functional/repl_characterization/data/regression_9918.nix
 create mode 100644 tests/functional/repl_characterization/data/regression_9918.test

diff --git a/tests/functional/repl_characterization/data/regression_9917.nix b/tests/functional/repl_characterization/data/regression_9917.nix
new file mode 100644
index 00000000000..ab49ce3a228
--- /dev/null
+++ b/tests/functional/repl_characterization/data/regression_9917.nix
@@ -0,0 +1,9 @@
+let
+  a = builtins.trace "before inner break" (
+    builtins.break { msg = "hello"; }
+  );
+  b = builtins.trace "before outer break" (
+    builtins.break a
+  );
+in
+  b
diff --git a/tests/functional/repl_characterization/data/regression_9917.test b/tests/functional/repl_characterization/data/regression_9917.test
new file mode 100644
index 00000000000..a37ea5805c8
--- /dev/null
+++ b/tests/functional/repl_characterization/data/regression_9917.test
@@ -0,0 +1,14 @@
+https://github.com/NixOS/nix/pull/9917 (Enter debugger more reliably in let expressions and function calls)
+
+This test ensures that continues don't skip opportunities to enter the debugger.
+  trace: before outer break
+  info: breakpoint reached
+
+  nix-repl> :c
+  trace: before inner break
+  info: breakpoint reached
+
+  nix-repl> :c
+
+  nix-repl> msg
+  "hello"
diff --git a/tests/functional/repl_characterization/data/regression_9918.nix b/tests/functional/repl_characterization/data/regression_9918.nix
new file mode 100644
index 00000000000..3860ce70e0e
--- /dev/null
+++ b/tests/functional/repl_characterization/data/regression_9918.nix
@@ -0,0 +1,5 @@
+let
+  r = [];
+  x = builtins.throw r;
+in
+  x
diff --git a/tests/functional/repl_characterization/data/regression_9918.test b/tests/functional/repl_characterization/data/regression_9918.test
new file mode 100644
index 00000000000..4a1931122e2
--- /dev/null
+++ b/tests/functional/repl_characterization/data/regression_9918.test
@@ -0,0 +1,16 @@
+  error:
+         … while evaluating the error message passed to builtin.throw
+
+         error: cannot coerce a list to a string: [ ]
+
+We expect to be able to see locals like r in the debugger:
+
+  nix-repl> r
+  [ ]
+
+  nix-repl> :env
+  Env level 0
+  static: x r
+
+  Env level 1
+  builtins true false null scopedImport import isNull break abort throw derivationStrict placeholder baseNameOf dirOf removeAttrs map toString fetchMercurial fetchTree fetchTarball fetchGit fromTOML derivation
diff --git a/tests/functional/repl_characterization/repl_characterization.cc b/tests/functional/repl_characterization/repl_characterization.cc
index d0e2c0017a0..e32a62e7879 100644
--- a/tests/functional/repl_characterization/repl_characterization.cc
+++ b/tests/functional/repl_characterization/repl_characterization.cc
@@ -93,4 +93,15 @@ TEST_F(ReplSessionTest, repl_basic)
     readTest("basic_repl.test", [this](std::string input) { runReplTest(input); });
 }
 
+#define DEBUGGER_TEST(name) \
+    TEST_F(ReplSessionTest, name) \
+    { \
+        readTest(#name ".test", [this](std::string input) { \
+            runReplTest(input, {"--debugger", "-f", goldenMaster(#name ".nix")}); \
+        }); \
+    }
+
+DEBUGGER_TEST(regression_9918);
+DEBUGGER_TEST(regression_9917);
+
 };

From b6b5272f6dcdf0ad56b85e8f369e1e5736032368 Mon Sep 17 00:00:00 2001
From: Jade Lovelace <software@lfcode.ca>
Date: Tue, 12 Mar 2024 19:18:43 -0700
Subject: [PATCH 08/10] repl_characterization: eat newlines after commands and
 source-dir paths

This is because they are unrepresentable in the source files with
commentary but not in the output, so we should just eat them in
normalization. It's ok.
---
 .../repl_characterization/data/basic.ast      |  6 ++++++
 .../repl_characterization/data/basic.test     |  6 ++++++
 .../data/basic_tidied.ast                     | 10 ++++++++++
 .../repl_characterization.cc                  | 19 ++++++++++++++++++-
 4 files changed, 40 insertions(+), 1 deletion(-)
 create mode 100644 tests/functional/repl_characterization/data/basic_tidied.ast

diff --git a/tests/functional/repl_characterization/data/basic.ast b/tests/functional/repl_characterization/data/basic.ast
index d494b00aa68..af70e149bc1 100644
--- a/tests/functional/repl_characterization/data/basic.ast
+++ b/tests/functional/repl_characterization/data/basic.ast
@@ -9,3 +9,9 @@ Command "command two"
 Output "output output output"
 Commentary "commentary"
 Output "output output output"
+Output ""
+Commentary "the blank below should be chomped"
+Command "command three"
+Commentary ""
+Output "meow output"
+Output ""
diff --git a/tests/functional/repl_characterization/data/basic.test b/tests/functional/repl_characterization/data/basic.test
index d6b8427b4e9..d62bacbbc9f 100644
--- a/tests/functional/repl_characterization/data/basic.test
+++ b/tests/functional/repl_characterization/data/basic.test
@@ -9,3 +9,9 @@ meow meow
   output output output
 commentary
   output output output
+
+the blank below should be chomped
+  nix-repl> command three
+
+  meow output
+
diff --git a/tests/functional/repl_characterization/data/basic_tidied.ast b/tests/functional/repl_characterization/data/basic_tidied.ast
new file mode 100644
index 00000000000..878065a5cce
--- /dev/null
+++ b/tests/functional/repl_characterization/data/basic_tidied.ast
@@ -0,0 +1,10 @@
+Command "command"
+Output "output output one"
+Output ""
+Output ""
+Output "output output two"
+Command "command two"
+Output "output output output"
+Output "output output output"
+Command "command three"
+Output "meow output"
diff --git a/tests/functional/repl_characterization/repl_characterization.cc b/tests/functional/repl_characterization/repl_characterization.cc
index e32a62e7879..1287e689061 100644
--- a/tests/functional/repl_characterization/repl_characterization.cc
+++ b/tests/functional/repl_characterization/repl_characterization.cc
@@ -3,6 +3,7 @@
 #include <string>
 #include <string_view>
 #include <unistd.h>
+#include <boost/algorithm/string/replace.hpp>
 
 #include "test-session.hh"
 #include "util.hh"
@@ -64,7 +65,10 @@ class ReplSessionTest : public CharacterizationTest
         }
         session.close();
 
-        auto parsedOutLog = CLILiterateParser::parse(AUTOMATION_PROMPT, trimOutLog(session.outLog), 0);
+        auto replacedOutLog = boost::algorithm::replace_all_copy(session.outLog, unitTestData, "TEST_DATA");
+        auto cleanedOutLog = trimOutLog(replacedOutLog);
+
+        auto parsedOutLog = CLILiterateParser::parse(AUTOMATION_PROMPT, cleanedOutLog, 0);
 
         CLILiterateParser::tidyOutputForComparison(parsedOutLog);
         CLILiterateParser::tidyOutputForComparison(syntax);
@@ -86,6 +90,19 @@ TEST_F(ReplSessionTest, parses)
         }
         return out.str();
     });
+
+    writeTest("basic_tidied.ast", [this]() {
+        const std::string content = readFile(goldenMaster("basic.test"));
+        auto syntax = CLILiterateParser::parse(REPL_PROMPT, content);
+
+        CLILiterateParser::tidyOutputForComparison(syntax);
+
+        std::ostringstream out{};
+        for (auto & bit : syntax) {
+            out << bit.print() << "\n";
+        }
+        return out.str();
+    });
 }
 
 TEST_F(ReplSessionTest, repl_basic)

From 36ca1c030cb706b7faabad93ad656766ab167b7e Mon Sep 17 00:00:00 2001
From: Jade Lovelace <software@lfcode.ca>
Date: Tue, 12 Mar 2024 19:18:43 -0700
Subject: [PATCH 09/10] repl_characterization: Also verify the stack trace
 exists

---
 .../data/regression_9917.test                 | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tests/functional/repl_characterization/data/regression_9917.test b/tests/functional/repl_characterization/data/regression_9917.test
index a37ea5805c8..44ca951b5a6 100644
--- a/tests/functional/repl_characterization/data/regression_9917.test
+++ b/tests/functional/repl_characterization/data/regression_9917.test
@@ -8,6 +8,26 @@ This test ensures that continues don't skip opportunities to enter the debugger.
   trace: before inner break
   info: breakpoint reached
 
+  nix-repl> :bt
+
+  0: error: breakpoint reached
+  «none»:0
+  1: while calling a function
+  TEST_DATA/regression_9917.nix:3:5
+
+       2|   a = builtins.trace "before inner break" (
+       3|     builtins.break { msg = "hello"; }
+        |     ^
+       4|   );
+
+  2: while calling a function
+  TEST_DATA/regression_9917.nix:2:7
+
+       1| let
+       2|   a = builtins.trace "before inner break" (
+        |       ^
+       3|     builtins.break { msg = "hello"; }
+
   nix-repl> :c
 
   nix-repl> msg

From 62eecdb0a1fc8f22f567711ca204895a55cb06a4 Mon Sep 17 00:00:00 2001
From: Jade Lovelace <software@lfcode.ca>
Date: Tue, 12 Mar 2024 19:18:43 -0700
Subject: [PATCH 10/10] Test that :st does ... something

---
 .../repl_characterization/data/stack_vars.nix |  9 +++
 .../data/stack_vars.test                      | 74 +++++++++++++++++++
 .../repl_characterization.cc                  |  1 +
 3 files changed, 84 insertions(+)
 create mode 100644 tests/functional/repl_characterization/data/stack_vars.nix
 create mode 100644 tests/functional/repl_characterization/data/stack_vars.test

diff --git a/tests/functional/repl_characterization/data/stack_vars.nix b/tests/functional/repl_characterization/data/stack_vars.nix
new file mode 100644
index 00000000000..1476bc251c9
--- /dev/null
+++ b/tests/functional/repl_characterization/data/stack_vars.nix
@@ -0,0 +1,9 @@
+let
+  a = builtins.trace "before inner break" (
+    let meow' = 3; in builtins.break { msg = "hello"; }
+  );
+  b = builtins.trace "before outer break" (
+    let meow = 2; in builtins.break a
+  );
+in
+  b
diff --git a/tests/functional/repl_characterization/data/stack_vars.test b/tests/functional/repl_characterization/data/stack_vars.test
new file mode 100644
index 00000000000..0537f9c0329
--- /dev/null
+++ b/tests/functional/repl_characterization/data/stack_vars.test
@@ -0,0 +1,74 @@
+  trace: before outer break
+  info: breakpoint reached
+
+Here we are in the outer break and the let of "meow". st should show meow there
+as it is in scope.
+  nix-repl> :st
+
+  0: error: breakpoint reached
+  «none»:0
+  Env level 0
+  static: meow
+
+  Env level 1
+  static: a b
+
+  Env level 2
+  builtins true false null scopedImport import isNull break abort throw derivationStrict placeholder baseNameOf dirOf removeAttrs map toString fetchMercurial fetchTree fetchTarball fetchGit fromTOML derivation
+
+  nix-repl> meow
+  2
+
+If we :st past the frame in the backtrace with the meow in it, the meow should not be there.
+
+  nix-repl> :st 3
+
+  3: while calling a function
+  TEST_DATA/stack_vars.nix:5:7
+
+       4|   );
+       5|   b = builtins.trace "before outer break" (
+        |       ^
+       6|     let meow = 2; in builtins.break a
+
+  Env level 0
+  static: a b
+
+  Env level 1
+  builtins true false null scopedImport import isNull break abort throw derivationStrict placeholder baseNameOf dirOf removeAttrs map toString fetchMercurial fetchTree fetchTarball fetchGit fromTOML derivation
+
+  nix-repl> :c
+  trace: before inner break
+  info: breakpoint reached
+
+  nix-repl> :st
+
+  0: error: breakpoint reached
+  «none»:0
+  Env level 0
+  static: meow'
+
+  Env level 1
+  static: a b
+
+  Env level 2
+  builtins true false null scopedImport import isNull break abort throw derivationStrict placeholder baseNameOf dirOf removeAttrs map toString fetchMercurial fetchTree fetchTarball fetchGit fromTOML derivation
+
+  nix-repl> meow'
+  3
+
+  nix-repl> :st 3
+
+  3: while calling a function
+  TEST_DATA/stack_vars.nix:2:7
+
+       1| let
+       2|   a = builtins.trace "before inner break" (
+        |       ^
+       3|     let meow' = 3; in builtins.break { msg = "hello"; }
+
+  Env level 0
+  static: a b
+
+  Env level 1
+  builtins true false null scopedImport import isNull break abort throw derivationStrict placeholder baseNameOf dirOf removeAttrs map toString fetchMercurial fetchTree fetchTarball fetchGit fromTOML derivation
diff --git a/tests/functional/repl_characterization/repl_characterization.cc b/tests/functional/repl_characterization/repl_characterization.cc
index 1287e689061..c466156a913 100644
--- a/tests/functional/repl_characterization/repl_characterization.cc
+++ b/tests/functional/repl_characterization/repl_characterization.cc
@@ -120,5 +120,6 @@ TEST_F(ReplSessionTest, repl_basic)
 
 DEBUGGER_TEST(regression_9918);
 DEBUGGER_TEST(regression_9917);
+DEBUGGER_TEST(stack_vars);
 
 };