diff --git a/.gitignore b/.gitignore
index ba72453bb..55841367d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,12 +16,19 @@ bin/
 .*.swp
 
 # Other files
-*.jar
-*.class
 *.er
 *.log
 *.bck
 *.so
+*.patch
+
+# Binaries
+*.jar
+*.class
+
+# Archives
+*.gz
+*.zip
 
 # Serialised models
 *.ser
@@ -29,3 +36,12 @@ bin/
 # Temporary stuff
 junk/*
 .DS_Store
+.ipynb_checkpoints
+
+# Profiling files
+*.jfr
+*.iprof
+*.jfc
+
+# Tutorial files
+tutorials/*.svm
diff --git a/Common/LibSVM/src/main/java/org/tribuo/common/libsvm/LibSVMTrainer.java b/Common/LibSVM/src/main/java/org/tribuo/common/libsvm/LibSVMTrainer.java
index 37e28a489..0885d6d13 100644
--- a/Common/LibSVM/src/main/java/org/tribuo/common/libsvm/LibSVMTrainer.java
+++ b/Common/LibSVM/src/main/java/org/tribuo/common/libsvm/LibSVMTrainer.java
@@ -131,6 +131,7 @@ protected LibSVMTrainer() {}
     /**
      * Constructs a LibSVMTrainer from the parameters.
      * @param parameters The SVM parameters.
+     * @param seed The RNG seed.
      */
     protected LibSVMTrainer(SVMParameters<T> parameters, long seed) {
         this.parameters = parameters.getParameters();
diff --git a/Core/src/main/java/org/tribuo/Model.java b/Core/src/main/java/org/tribuo/Model.java
index 1f42fbcf8..8c0e3f5d3 100644
--- a/Core/src/main/java/org/tribuo/Model.java
+++ b/Core/src/main/java/org/tribuo/Model.java
@@ -300,20 +300,21 @@ public String toString() {
 
     /**
      * Casts the model to the specified output type, assuming it is valid.
-     * <p>
      * If it's not valid, throws {@link ClassCastException}.
-     * @param inputModel The model to cast.
+     * <p>
+     * This method is intended for use on a deserialized model to restore it's
+     * generic type in a safe way.
      * @param outputType The output type to cast to.
-     * @param <T> The output type.
+     * @param <U> The output type.
      * @return The model cast to the correct value.
      */
-    public static <T extends Output<T>> Model<T> castModel(Model<?> inputModel, Class<T> outputType) {
-        if (inputModel.validate(outputType)) {
+    public <U extends Output<U>> Model<U> castModel(Class<U> outputType) {
+        if (validate(outputType)) {
             @SuppressWarnings("unchecked") // guarded by validate
-            Model<T> castedModel = (Model<T>) inputModel;
+            Model<U> castedModel = (Model<U>) this;
             return castedModel;
         } else {
-            throw new ClassCastException("Attempted to cast model to " + outputType.getName() + " which is not valid for model " + inputModel.toString());
+            throw new ClassCastException("Attempted to cast model to " + outputType.getName() + " which is not valid for model " + this.toString());
         }
     }
     
diff --git a/Core/src/main/java/org/tribuo/ensemble/BaggingTrainer.java b/Core/src/main/java/org/tribuo/ensemble/BaggingTrainer.java
index 5f20c43e0..765e07dd2 100644
--- a/Core/src/main/java/org/tribuo/ensemble/BaggingTrainer.java
+++ b/Core/src/main/java/org/tribuo/ensemble/BaggingTrainer.java
@@ -49,6 +49,7 @@
  * "The Elements of Statistical Learning"
  * Springer 2001. <a href="http://web.stanford.edu/~hastie/ElemStatLearn/">PDF</a>
  * </pre>
+ * @param <T> The prediction type.
  */
 public class BaggingTrainer<T extends Output<T>> implements Trainer<T> {
     
@@ -177,6 +178,7 @@ public EnsembleModel<T> train(Dataset<T> examples, Map<String, Provenance> runPr
      * @param labelIDs The output domain.
      * @param randInt A random int from an rng instance
      * @param runProvenance Provenance for this instance.
+     * @param invocationCount The invocation count for the inner trainer.
      * @return The trained ensemble member.
      */
     protected Model<T> trainSingleModel(Dataset<T> examples, ImmutableFeatureMap featureIDs, ImmutableOutputInfo<T> labelIDs, int randInt, Map<String,Provenance> runProvenance, int invocationCount) {
diff --git a/Core/src/main/java/org/tribuo/ensemble/EnsembleCombiner.java b/Core/src/main/java/org/tribuo/ensemble/EnsembleCombiner.java
index af3a11e01..ca79a0eb8 100644
--- a/Core/src/main/java/org/tribuo/ensemble/EnsembleCombiner.java
+++ b/Core/src/main/java/org/tribuo/ensemble/EnsembleCombiner.java
@@ -79,9 +79,10 @@ default ONNXNode exportCombiner(ONNXNode input) {
      * will be required to provide ONNX support.
      * @param input the node to be ensembled according to this implementation.
      * @param weight The node of weights for ensembling.
+     * @param <U> The type of the weights input reference.
      * @return The leaf node of the graph of operations added to ensemble input.
      */
-    default <T extends ONNXRef<?>> ONNXNode exportCombiner(ONNXNode input, T weight) {
+    default <U extends ONNXRef<?>> ONNXNode exportCombiner(ONNXNode input, U weight) {
         Logger.getLogger(this.getClass().getName()).severe("Tried to export an ensemble combiner to ONNX format, but this is not implemented.");
         throw new IllegalStateException("This ensemble cannot be exported as the combiner '" + this.getClass() + "' uses the default implementation of EnsembleCombiner.exportCombiner.");
     }
diff --git a/Data/src/main/java/org/tribuo/data/sql/SQLDBConfig.java b/Data/src/main/java/org/tribuo/data/sql/SQLDBConfig.java
index a9b5c9907..bfba02f4a 100644
--- a/Data/src/main/java/org/tribuo/data/sql/SQLDBConfig.java
+++ b/Data/src/main/java/org/tribuo/data/sql/SQLDBConfig.java
@@ -72,7 +72,7 @@ private SQLDBConfig() {}
     /**
      * Constructs a SQL database configuration.
      * <p>
-     * Note it is recommended that wallet based connections are used rather than this constructor using {@link SQLDBConfig(String,Map)}.
+     * Note it is recommended that wallet based connections are used rather than this constructor using {@link #SQLDBConfig(String,Map)}.
      * @param connectionString The connection string.
      * @param username The username.
      * @param password The password.
@@ -87,7 +87,7 @@ public SQLDBConfig(String connectionString, String username, String password, Ma
     /**
      * Constructs a SQL database configuration.
      * <p>
-     * Note it is recommended that wallet based connections are used rather than this constructor using {@link SQLDBConfig(String,Map)}.
+     * Note it is recommended that wallet based connections are used rather than this constructor using {@link #SQLDBConfig(String,Map)}.
      * @param host The host to connect to.
      * @param port The port to connect on.
      * @param db The db name.
diff --git a/Interop/OCI/src/main/java/org/tribuo/interop/oci/OCIModel.java b/Interop/OCI/src/main/java/org/tribuo/interop/oci/OCIModel.java
index 904254dc5..ef633b86f 100644
--- a/Interop/OCI/src/main/java/org/tribuo/interop/oci/OCIModel.java
+++ b/Interop/OCI/src/main/java/org/tribuo/interop/oci/OCIModel.java
@@ -309,6 +309,7 @@ public static ConfigFileAuthenticationDetailsProvider makeAuthProvider(Path conf
      * @param configFile      The OCI configuration file, if null use the default file.
      * @param endpointURL     The endpoint URL.
      * @param outputConverter The converter for the specified output type.
+     * @param <T> The output type.
      * @return An OCIModel ready to score new inputs.
      */
     public static <T extends Output<T>> OCIModel<T> createOCIModel(OutputFactory<T> factory,
@@ -332,6 +333,7 @@ public static <T extends Output<T>> OCIModel<T> createOCIModel(OutputFactory<T>
      * @param profileName     The profile name in the OCI configuration file, if null uses the default profile.
      * @param endpointURL     The endpoint URL.
      * @param outputConverter The converter for the specified output type.
+     * @param <T> The output type.
      * @return An OCIModel ready to score new inputs.
      */
     public static <T extends Output<T>> OCIModel<T> createOCIModel(OutputFactory<T> factory,
diff --git a/Interop/OCI/src/main/java/org/tribuo/interop/oci/OCIModelCLI.java b/Interop/OCI/src/main/java/org/tribuo/interop/oci/OCIModelCLI.java
index 6337a8e6d..81db75aa9 100644
--- a/Interop/OCI/src/main/java/org/tribuo/interop/oci/OCIModelCLI.java
+++ b/Interop/OCI/src/main/java/org/tribuo/interop/oci/OCIModelCLI.java
@@ -64,7 +64,7 @@ private static void createModelAndDeploy(OCIModelOptions options) throws IOExcep
         // Load the Tribuo model
         Model<Label> model;
         try (ObjectInputStream ois = new ObjectInputStream(Files.newInputStream(options.modelPath))) {
-            model = Model.castModel((Model<?>) ois.readObject(),Label.class);
+            model = ((Model<?>)ois.readObject()).castModel(Label.class);
         }
         if (!(model instanceof ONNXExportable)) {
             throw new IllegalArgumentException("Model not ONNXExportable, received " + model.toString());
diff --git a/Interop/OCI/src/main/java/org/tribuo/interop/oci/OCIUtil.java b/Interop/OCI/src/main/java/org/tribuo/interop/oci/OCIUtil.java
index 312ae3f75..bc3ee48a0 100644
--- a/Interop/OCI/src/main/java/org/tribuo/interop/oci/OCIUtil.java
+++ b/Interop/OCI/src/main/java/org/tribuo/interop/oci/OCIUtil.java
@@ -373,6 +373,7 @@ public static <T extends Output<T>, U extends Model<T> & ONNXExportable> String
     /**
      * Creates the OCI DS model artifact zip file.
      * @param onnxFile The ONNX file to create.
+     * @param config The model artifact configuration.
      * @return The path referring to the zip file.
      * @throws IOException If the file could not be created or the ONNX file could not be read.
      */
diff --git a/Json/src/main/java/org/tribuo/json/StripProvenance.java b/Json/src/main/java/org/tribuo/json/StripProvenance.java
index dbdbdab7a..925b49807 100644
--- a/Json/src/main/java/org/tribuo/json/StripProvenance.java
+++ b/Json/src/main/java/org/tribuo/json/StripProvenance.java
@@ -16,18 +16,15 @@
 
 package org.tribuo.json;
 
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.SerializationFeature;
 import com.oracle.labs.mlrg.olcut.config.ConfigurationManager;
 import com.oracle.labs.mlrg.olcut.config.Option;
 import com.oracle.labs.mlrg.olcut.config.Options;
 import com.oracle.labs.mlrg.olcut.config.UsageException;
-import com.oracle.labs.mlrg.olcut.config.json.JsonProvenanceModule;
+import com.oracle.labs.mlrg.olcut.config.json.JsonProvenanceSerialization;
 import com.oracle.labs.mlrg.olcut.provenance.ListProvenance;
 import com.oracle.labs.mlrg.olcut.provenance.ObjectProvenance;
 import com.oracle.labs.mlrg.olcut.provenance.Provenance;
 import com.oracle.labs.mlrg.olcut.provenance.ProvenanceUtil;
-import com.oracle.labs.mlrg.olcut.provenance.io.ObjectMarshalledProvenance;
 import com.oracle.labs.mlrg.olcut.provenance.primitives.HashProvenance;
 import com.oracle.labs.mlrg.olcut.util.IOUtil;
 import com.oracle.labs.mlrg.olcut.util.LabsLogFormatter;
@@ -315,11 +312,8 @@ public static <T extends Output<T>> void main(String[] args) {
             ModelProvenance oldProvenance = input.getProvenance();
 
             logger.info("Marshalling provenance and creating JSON.");
-            List<ObjectMarshalledProvenance> list = ProvenanceUtil.marshalProvenance(oldProvenance);
-            ObjectMapper mapper = new ObjectMapper();
-            mapper.registerModule(new JsonProvenanceModule());
-            mapper.enable(SerializationFeature.INDENT_OUTPUT);
-            String jsonResult = mapper.writeValueAsString(list);
+            JsonProvenanceSerialization jsonProvenanceSerialization = new JsonProvenanceSerialization(true);
+            String jsonResult = jsonProvenanceSerialization.marshalAndSerialize(oldProvenance);
 
             logger.info("Hashing JSON file");
             MessageDigest digest = o.hashType.getDigest();
@@ -340,8 +334,7 @@ public static <T extends Output<T>> void main(String[] args) {
 
             ModelProvenance newProvenance = tuple.provenance;
             logger.info("Marshalling provenance and creating JSON.");
-            List<ObjectMarshalledProvenance> newList = ProvenanceUtil.marshalProvenance(newProvenance);
-            String newJsonResult = mapper.writeValueAsString(newList);
+            String newJsonResult = jsonProvenanceSerialization.marshalAndSerialize(newProvenance);
 
             logger.info("Old provenance = \n" + jsonResult);
             logger.info("New provenance = \n" + newJsonResult);
diff --git a/README.md b/README.md
index 9903f6656..398aaf60e 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,9 @@ architectures on Windows 10, macOS and Linux (RHEL/OL/CentOS 7+), as these are
 supported platforms for the native libraries with which we interface. If you're
 interested in another platform and wish to use one of the native library
 interfaces (ONNX Runtime, TensorFlow, and XGBoost), we recommend reaching out
-to the developers of those libraries.
+to the developers of those libraries. Note the reproducibility package
+requires Java 17, and as such is not part of the `tribuo-all` Maven Central
+deployment.
 
 ## Documentation
 
@@ -85,6 +87,7 @@ Tribuo has implementations or interfaces for:
 |Algorithm|Implementation|Notes|
 |---|---|---|
 |Linear models|Tribuo|Uses SGD and allows any gradient optimizer|
+|Factorization Machines|Tribuo|Uses SGD and allows any gradient optimizer|
 |CART|Tribuo||
 |SVM-SGD|Tribuo|An implementation of the Pegasos algorithm|
 |Adaboost.SAMME|Tribuo|Can use any Tribuo classification trainer as the base learner|
@@ -109,6 +112,7 @@ output.
 |Algorithm|Implementation|Notes|
 |---|---|---|
 |Linear models|Tribuo|Uses SGD and allows any gradient optimizer|
+|Factorization Machines|Tribuo|Uses SGD and allows any gradient optimizer|
 |CART|Tribuo||
 |Lasso|Tribuo|Using the LARS algorithm|
 |Elastic Net|Tribuo|Using the co-ordinate descent algorithm|
@@ -124,6 +128,7 @@ algorithms over time.
 
 |Algorithm|Implementation|Notes|
 |---|---|---|
+|HDBSCAN\*|Tribuo||
 |K-Means|Tribuo|Includes both sequential and parallel backends, and the K-Means++ initialisation algorithm|
 
 ### Anomaly Detection
@@ -146,7 +151,9 @@ more multi-label specific implementations over time.
 |Algorithm|Implementation|Notes|
 |---|---|---|
 |Independent wrapper|Tribuo|Converts a multi-class classification algorithm into a multi-label one by producing a separate classifier for each label|
+|Classifier Chains|Tribuo|Provides classifier chains and randomized classifier chain ensembles using any of Tribuo's multi-class classification algorithms|
 |Linear models|Tribuo|Uses SGD and allows any gradient optimizer|
+|Factorization Machines|Tribuo|Uses SGD and allows any gradient optimizer|
 
 ### Interfaces
 
@@ -158,10 +165,10 @@ discuss how it would fit into Tribuo.
 Currently we have interfaces to:
 
 * [LibLinear](https://github.com/bwaldvogel/liblinear-java) - via the LibLinear-java port of the original [LibLinear](https://www.csie.ntu.edu.tw/~cjlin/liblinear/) (v2.43).
-* [LibSVM](https://www.csie.ntu.edu.tw/~cjlin/libsvm/) - using the pure Java transformed version of the C++ implementation (v3.24).
-* [ONNX Runtime](https://onnxruntime.ai) - via the Java API contributed by our group (v1.7.0).
-* [TensorFlow](https://tensorflow.org) - Using [TensorFlow Java](https://github.com/tensorflow/java) v0.3.1 (based on TensorFlow v2.4.1). This allows the training and deployment of TensorFlow models entirely in Java.
-* [XGBoost](https://xgboost.ai) - via the built in XGBoost4J API (v1.4.1).
+* [LibSVM](https://www.csie.ntu.edu.tw/~cjlin/libsvm/) - using the pure Java transformed version of the C++ implementation (v3.25).
+* [ONNX Runtime](https://onnxruntime.ai) - via the Java API contributed by our group (v1.9.0).
+* [TensorFlow](https://tensorflow.org) - Using [TensorFlow Java](https://github.com/tensorflow/java) v0.4.0 (based on TensorFlow v2.7.0). This allows the training and deployment of TensorFlow models entirely in Java.
+* [XGBoost](https://xgboost.ai) - via the built in XGBoost4J API (v1.5.0).
 
 ## Binaries
 
@@ -187,7 +194,7 @@ implementation ("org.tribuo:tribuo-all:4.1.0@pom") {
 ```
 
 The `tribuo-all` dependency is a pom which depends on all the Tribuo
-subprojects.
+subprojects except for the reproducibility project which requires Java 17.
 
 Most of Tribuo is pure Java and thus cross-platform, however some of the
 interfaces link to libraries which use native code. Those interfaces
@@ -197,11 +204,13 @@ are supplied. If you need support for a specific platform, reach out to the
 maintainers of those projects. As of the 4.1 release these native packages
 all provide x86\_64 binaries for Windows, macOS and Linux. It is also possible
 to compile each package for macOS ARM64 (i.e., Apple Silicon), though there are
-no binaries available on Maven Central for that platform.
+no binaries available on Maven Central for that platform. When developing
+on an ARM platform you can select the `arm` profile in Tribuo's pom.xml to
+disable the native library tests.
 
 Individual jars are published for each Tribuo module. It is preferable to
 depend only on the modules necessary for the specific project. This prevents
-your code from unnecessarily pulling in large dependencies like TensorFlow
+your code from unnecessarily pulling in large dependencies like TensorFlow.
 
 ## Compiling from source
 
diff --git a/Util/ONNXExport/src/main/java/org/tribuo/util/onnx/ONNXContext.java b/Util/ONNXExport/src/main/java/org/tribuo/util/onnx/ONNXContext.java
index 8a175e5d3..420850730 100644
--- a/Util/ONNXExport/src/main/java/org/tribuo/util/onnx/ONNXContext.java
+++ b/Util/ONNXExport/src/main/java/org/tribuo/util/onnx/ONNXContext.java
@@ -56,7 +56,7 @@ public ONNXContext() {
      * ONNXContext instance. All inputs must belong to the calling instance of ONNXContext. This is the root method for
      * constructing ONNXNodes which all other methods on ONNXContext and {@code ONNXRef} call.
      * @param op An ONNXOperator to add to the graph, taking {@code inputs} as input.
-     * @param inputs A list of {@ONNXRef}s created by this instance of ONNXContext.
+     * @param inputs A list of {@link ONNXRef}s created by this instance of ONNXContext.
      * @param outputs A list of names that the output nodes of {@code op} should take.
      * @param attributes A map of attributes of the operation, passed to {@link ONNXOperators#build(ONNXContext, String, String, Map)}.
      * @param <T> The ONNXRef type of inputs
@@ -82,7 +82,7 @@ public <T extends ONNXRef<?>> List<ONNXNode> operation(ONNXOperators op,
      * IllegalStateException if the operator has multiple outputs. The graph elements created by the operation are added
      * to the calling ONNXContext instance. All inputs must belong to the calling instance of ONNXContext.
      * @param op An ONNXOperator to add to the graph, taking {@code inputs} as input.
-     * @param inputs A list of {@ONNXRef}s created by this instance of ONNXContext.
+     * @param inputs A list of {@link ONNXRef}s created by this instance of ONNXContext.
      * @param outputName Name that the output node of {@code op} should take.
      * @param attributes A map of attributes of the operation, passed to {@link ONNXOperators#build(ONNXContext, String, String, Map)}.
      * @param <T> The ONNXRef type of inputs
@@ -102,7 +102,7 @@ public <T extends ONNXRef<?>> ONNXNode operation(ONNXOperators op, List<T> input
      * IllegalStateException if the operator has multiple outputs. The graph elements created by the operation are added
      * to the calling ONNXContext instance. All inputs must belong to the calling instance of ONNXContext.
      * @param op An ONNXOperator to add to the graph, taking {@code inputs} as input.
-     * @param inputs A list of {@ONNXRef}s created by this instance of ONNXContext.
+     * @param inputs A list of {@link ONNXRef}s created by this instance of ONNXContext.
      * @param outputName Name that the output node of {@code op} should take.
      * @param <T> The ONNXRef type of inputs
      * @return An {@link ONNXNode} that is the output nodes of {@code op}.
diff --git a/Util/ONNXExport/src/main/java/org/tribuo/util/onnx/ONNXRef.java b/Util/ONNXExport/src/main/java/org/tribuo/util/onnx/ONNXRef.java
index 5f7d26bf7..197dea39f 100644
--- a/Util/ONNXExport/src/main/java/org/tribuo/util/onnx/ONNXRef.java
+++ b/Util/ONNXExport/src/main/java/org/tribuo/util/onnx/ONNXRef.java
@@ -36,7 +36,7 @@
  * can thus be passed around without needing to pass their governing context as well.
  * <p>
  * N.B. This class will be sealed once the library is updated past Java 8. Users should not subclass this class.
- * @param <T>
+ * @param <T> The protobuf type this reference generates.
  */
 public abstract class ONNXRef<T extends GeneratedMessageV3> {
     // Unfortunately there is no other shared supertype for OnnxML protobufs
@@ -44,19 +44,36 @@ public abstract class ONNXRef<T extends GeneratedMessageV3> {
     private final String baseName;
     protected final ONNXContext context;
 
-
+    /**
+     * Creates an ONNXRef for the specified context, protobuf and name.
+     * @param context The ONNXContext we're operating in.
+     * @param backRef The protobuf reference.
+     * @param baseName The name of this reference.
+     */
     ONNXRef(ONNXContext context, T backRef, String baseName) {
         this.context = context;
         this.backRef = backRef;
         this.baseName = baseName;
     }
 
+    /**
+     * Gets the output name of this object.
+     * @return The output name.
+     */
     public abstract String getReference();
 
+    /**
+     * The name of this object.
+     * @return The name.
+     */
     public String getBaseName() {
         return baseName;
     }
 
+    /**
+     * The context this reference operates in.
+     * @return The context.
+     */
     public ONNXContext onnxContext() {
         return context;
     }
@@ -66,7 +83,7 @@ public ONNXContext onnxContext() {
      * as the first argument to {@code inputs}, with {@code otherInputs} append as subsequent arguments. The other
      * arguments behave as in the analogous method on ONNXContext.
      * @param op An ONNXOperator to add to the graph, taking {@code inputs} as input.
-     * @param otherInputs A list of {@ONNXRef}s created by this instance of ONNXContext.
+     * @param otherInputs A list of {@link ONNXRef}s created by this instance of ONNXContext.
      * @param outputs A list of names that the output nodes of {@code op} should take.
      * @param attributes A map of attributes of the operation, passed to {@link ONNXOperators#build(ONNXContext, String, String, Map)}.
      * @return a list of {@link ONNXNode}s that are the output nodes of {@code op}.
@@ -199,7 +216,7 @@ public <Ret extends ONNXRef<?>> Ret assignTo(Ret output) {
     /**
      * Casts this ONNXRef to a different type using the {@link ONNXOperators#CAST} operation, and returning the output
      * node of that op. Currently supports only float, double, int, and long, which are specified by their respective
-     * {@link Class} objects (eg. {@link float.class}). Throws {@link IllegalArgumentException} when an unsupported cast
+     * {@link Class} objects (e.g., {@code float.class}). Throws {@link IllegalArgumentException} when an unsupported cast
      * is requested.
      * @param clazz The class object specifying the type to cast to.
      * @return An ONNXRef representing this object cast into the requested type.
diff --git a/Util/ONNXExport/src/main/java/org/tribuo/util/onnx/package-info.java b/Util/ONNXExport/src/main/java/org/tribuo/util/onnx/package-info.java
index 4abf7339f..1c2aeedb4 100644
--- a/Util/ONNXExport/src/main/java/org/tribuo/util/onnx/package-info.java
+++ b/Util/ONNXExport/src/main/java/org/tribuo/util/onnx/package-info.java
@@ -15,7 +15,7 @@
  */
 
 /**
- * Interfaces and utilities for writing <a href="https://onnx.ai>ONNX</a> models from Java.
+ * Interfaces and utilities for writing <a href="https://onnx.ai">ONNX</a> models from Java.
  * <p>
  * Developed to support <a href="https://tribuo.org">Tribuo</a>, but can be used to export
  * other machine learning models from JVM languages.
diff --git a/docs/Architecture.md b/docs/Architecture.md
index 054db2f22..3bee56c57 100644
--- a/docs/Architecture.md
+++ b/docs/Architecture.md
@@ -421,3 +421,67 @@ that Tribuo has no knowledge of the true feature names, and the system
 transparently hashes the inputs. The feature names tend to be particularly
 sensitive when working with NLP problems. For example, without such hashing,
 bigrams would appear in the feature domains.
+
+## ONNX Export
+
+From v4.2 Tribuo supports exporting some models in the [ONNX](https://onnx.ai)
+model format. The ONNX format is a cross-platform model exchange format which
+can be loaded in by many different machine learning libraries. Tribuo supports
+inference on ONNX models via ONNX Runtime. Models which can be exported
+implement the `ONNXExportable` interface, which provides methods for
+constructing the ONNX protobuf and serializing it to disk. As of the release of
+4.2, a subset of Tribuo's models are supported: linear models, sparse linear
+models, LibSVM models, factorization machines, and ensembles thereof. We plan
+to expand the set of exportable models in future releases. It is unlikely that
+Tribuo will support direct ONNX export of TensorFlow models, however this can
+be achieved by saving the Tribuo trained model in TensorFlow Saved Model
+format, and then using the Python
+[tf2onnx](https://github.com/onnx/tensorflow-onnx) project to convert that into
+an onnx file.
+
+### ONNX and provenance
+
+Tribuo-exported ONNX files contain the Tribuo model provenance, stored as a 
+protobuf in the metadata field "TRIBUO\_PROVENANCE". If the model is loaded
+back into Tribuo via ONNX Runtime, then the model provenance can be recovered
+from the file, allowing the reproducibility system and the model tracking
+features to work.
+
+### ONNX and deployment
+
+The ONNX format is widely supported in industry and across cloud providers.
+Many hardware accelerators and edge computing vendors provide ONNX support for
+their inference platforms, and this allows Tribuo-trained models to be widely
+deployed after they have been exported. Tribuo provides an interface to [OCI
+Data Science Model
+Deployment](https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-about.htm)
+which deploys an ONNX model on [Oracle Cloud](https://www.oracle.com/cloud/),
+and also can wrap a model deployment REST endpoint so it appears as a Tribuo
+Model, allowing cloud deployment and inference from Tribuo. ONNX models are
+also supported by [Oracle Machine Learning
+Services](https://docs.oracle.com/en/database/oracle/machine-learning/omlss/index.html),
+and many other cloud providers also provide ONNX model inference services which
+can be used with exported Tribuo ONNX models.
+
+## Reproducibility
+
+From v4.2 Tribuo has a built-in reproducibility system for non-sequence Models.
+This accepts a `Model` or `ModelProvenance` instance, automatically extracts
+the configuration from the instance and then retrains the model, using the
+data loading pipeline and training hyperparameters specified in the model provenance.
+The system produces a diff of the reproduced model's provenance against the
+original provenance, highlighting areas where the new model may behave differently
+to the old one (e.g., showing if the number of features differs, or if the data
+files have changed).
+
+This is useful to check the validity of deployed production models, and to allow
+easy comparison between a production model and one trained on current data. Over
+time we plan to expand this system to support experimenting with different model
+hyperparameters and training data configurations, tracking all this information
+using the provenance built into Tribuo.
+
+The reproducibility system requires Java 17, and as such is not included in the
+`tribuo-all` Maven Central target. It is designed to be used in a development
+environment rather than deployed in a production system like the rest of
+Tribuo.  As Tribuo migrates to newer versions of Java, we will consider
+providing a jlink'd version of this utility.
diff --git a/docs/FAQs.md b/docs/FAQs.md
index 4882dc276..2fc1847d0 100644
--- a/docs/FAQs.md
+++ b/docs/FAQs.md
@@ -90,7 +90,7 @@ libraries, and given Python's lax approach to typing, those methods are only
 part of the API by convention rather that being enforced by the type system. In
 Tribuo, we've separated training from prediction. Tribuo's fit method is called
 "train" and lives on the `Trainer` interface, whereas Tribuo's "predict" method
-lives on the Model class. Tribuo uses the same predict call to produce both the
+lives on the `Model` class. Tribuo uses the same predict call to produce both the
 outputs and the scores for those outputs. Its predict method is the equivalent
 of both "predict" and "predict\_proba" in scikit-learn. We made this separation
 between training and prediction so as to enable the type system to act as a
diff --git a/docs/HelperPrograms.md b/docs/HelperPrograms.md
index b256f115e..b92019766 100644
--- a/docs/HelperPrograms.md
+++ b/docs/HelperPrograms.md
@@ -59,6 +59,14 @@ operations best done in user code if they are required, however we consider
 `StripProvenance` part of the supported API as it performs a complex function
 and is best expressed as a standalone program.
 
+### OCIModelCLI
+
+Found in tribuo-oci, `org.tribuo.oci.OCIModelCLI` can deploy a Tribuo 
+multi-class classification model in OCI Data Science using the model deployment
+API. It provides a CLI wrapper around the functions in `org.tribuo.oci.OCIUtil`
+which can deploy classification, regression and multi-label classification 
+models to OCI.
+
 ### PreprocessAndSerialize
 
 Found in tribuo-data, `org.tribuo.data.PreprocessAndSerialize` loads in a
diff --git a/docs/Internals.md b/docs/Internals.md
index 4026dab49..c5c189ce7 100644
--- a/docs/Internals.md
+++ b/docs/Internals.md
@@ -67,6 +67,8 @@ provenance built into their models and evaluations.
 
 ## Tracing a training and evaluation run
 
+This section describes the internal process of a training and evaluation run.
+
 ### DataSource 
 `Example`s are created in a `DataSource`. Preferably they are created with a
 `Feature` list as this ensures the O(n log n) sort cost is paid once, rather than
diff --git a/docs/PackageOverview.md b/docs/PackageOverview.md
index 81696141d..b51e76647 100644
--- a/docs/PackageOverview.md
+++ b/docs/PackageOverview.md
@@ -55,7 +55,7 @@ a math library, and common modules shared across prediction types.
     are always applied at prediction time.
   - `util` - Utilities for basic operations such as for working with arrays and
     random samples.
-- Data - (artifactID `tribuo-data`, package root: `org.tribuo.data`) provides classes which deal with sampled data, columnar data, csv
+- Data - (artifactID: `tribuo-data`, package root: `org.tribuo.data`) provides classes which deal with sampled data, columnar data, csv
   files and text inputs. The user is encouraged to provide their own text
 processing infrastructure implementation, as the one here is fairly basic.
   - `columnar` - The columnar package provides many useful base classes for
@@ -66,9 +66,9 @@ processing infrastructure implementation, as the one here is fairly basic.
     working with JDBC sources.
   - `text` - Text processing infrastructure interfaces and an example
     implementation.
-- Json - (artifactID `tribuo-json`, package root: `org.tribuo.json`) provides functionality
+- Json - (artifactID: `tribuo-json`, package root: `org.tribuo.json`) provides functionality
 for loading from json data sources, and for stripping provenance out of a model.
-- Math - (artifactID `tribuo-math`, package root: `org.tribuo.math`) provides a linear algebra library for working with both sparse
+- Math - (artifactID: `tribuo-math`, package root: `org.tribuo.math`) provides a linear algebra library for working with both sparse
  and dense vectors and matrices.
   - `kernel` - a set of kernel functions for use in the SGD package (and elsewhere).
   - `la` - a linear algebra library containing functions used in the
@@ -79,6 +79,20 @@ should be considered the default algorithm since it works best across the
 widest range of linear SGD problems.
   - `util` - various util classes for working with arrays, vectors and matrices.
 
+## Util libraries
+
+There are 3 utility libraries which are used by Tribuo but do not depend
+on other parts of it.
+
+- InformationTheory - (artifactID: `tribuo-util-infotheory`, package root: `org.tribuo.util.infotheory`) provides discrete information theoretic functions suitable
+for computing clustering metrics, feature selection and structure learning.
+- ONNXExport - (artifactID: `tribuo-util-onnx`, package root: `org.tribuo.util.onnx`) provides infrastructure for building ONNX graphs from Java.
+This package is suitable for use in other JVM libraries which want to write ONNX models, and provides additional type safety and usability over
+directly writing the protobufs.
+- Tokenization - (artifactID: `tribuo-util-tokenization`, package root: `org.tribuo.util.tokens`) provides a tokenization API suitable 
+for feature extraction or information retrieval, along with several tokenizer implementations, including a wordpiece implementation
+suitable for use with models like BERT.
+
 ## Multi-class Classification
 
 Multi-class classification is the act of assigning a single label from a set of
@@ -93,7 +107,7 @@ labels to a test example.  The classification module has several submodules:
 | LibLinear | `tribuo-classification-liblinear` | `org.tribuo.classification.liblinear` | A wrapper around the LibLinear-java library. This provides linear-SVMs and other l1 or l2 regularised linear classifiers. |
 | LibSVM | `tribuo-classification-libsvm` | `org.tribuo.classification.libsvm` | A wrapper around the Java version of LibSVM. This provides linear & kernel SVMs with sigmoid, gaussian and polynomial kernels. |
 | Multinomial Naive Bayes | `tribuo-classification-mnnaivebayes` | `org.tribuo.classification.mnb` | An implementation of a multinomial naive bayes classifier. Since it aims to store a compact in-memory representation of the model, it only keeps track of weights for observed feature/class pairs. |
-| SGD | `tribuo-classification-sgd` | `org.tribuo.classification.sgd` | An implementation of stochastic gradient descent based classifiers. It includes a linear package for logistic regression and linear-SVM (using log and hinge losses, respectively), a kernel package for training a kernel-SVM using the Pegasos algorithm, and a crf package for training a linear-chain CRF. These implementations depend upon the stochastic gradient optimisers in the main Math package. The linear and crf packages can use any of the provided gradient optimisers, which enforce various different kinds of regularisation or convergence metrics. This is the preferred package for linear classification and for sequence classification due to the speed and scalability of the SGD approach. |
+| SGD | `tribuo-classification-sgd` | `org.tribuo.classification.sgd` | An implementation of stochastic gradient descent based classifiers. It includes a linear package for logistic regression and linear-SVM (using log and hinge losses, respectively), a kernel package for training a kernel-SVM using the Pegasos algorithm, a crf package for training a linear-chain CRF, and a fm package for training pairwise factorization machines. These implementations depend upon the stochastic gradient optimisers in the main Math package. The linear, fm, and crf packages can use any of the provided gradient optimisers, which enforce various different kinds of regularisation or convergence metrics. This is the preferred package for linear classification and for sequence classification due to the speed and scalability of the SGD approach. |
 | XGBoost | `tribuo-classification-xgboost` | `org.tribuo.classification.xgboost` | A wrapper around the XGBoost Java API. XGBoost requires a C library accessed via JNI.  XGBoost is a scalable implementation of gradient boosted trees. |
 
 ## Multi-label Classification
@@ -111,7 +125,7 @@ convert a classification trainer into a multi-label trainer.
 | Folder | ArtifactID | Package root | Description |
 | --- | --- | --- | --- |
 | Core | `tribuo-multilabel-core` | `org.tribuo.multilabel` | Contains an Output subclass for multi-label prediction, evaluation code for checking the performance of a multi-label model, and a basic implementation of independent binary predictions. It also contains implementations of Classifier Chains and Classifier Chain Ensembles, which are more powerful ensemble techniques for multi-label prediction tasks. |
-| SGD | `tribuo-multilabel-sgd` | `org.tribuo.multilabel.sgd` | An implementation of stochastic gradient descent based classifiers. It includes a linear package for independent logistic regression and linear-SVM (using log and hinge losses, respectively) for each output label. These implementations depend upon the stochastic gradient optimisers in the main Math package. The linear package can use any of the provided gradient optimisers, which enforce various different kinds of regularisation or convergence metrics. |
+| SGD | `tribuo-multilabel-sgd` | `org.tribuo.multilabel.sgd` | An implementation of stochastic gradient descent based classifiers. It includes a linear package for independent logistic regression and linear-SVM (using log and hinge losses, respectively), along with factorization machines using either loss for each output label. These implementations depend upon the stochastic gradient optimisers in the main Math package. The linear and fm packages can use any of the provided gradient optimisers, which enforce various different kinds of regularisation or convergence metrics. |
 
 ## Regression
 
@@ -124,7 +138,7 @@ This package provides several modules:
 | LibLinear | `tribuo-regression-liblinear` | `org.tribuo.regression.liblinear` | A wrapper around the LibLinear-java library. This provides linear-SVMs and other l1 or l2 regularised linear regressions. |
 | LibSVM | `tribuo-regression-libsvm` | `org.tribuo.regression.libsvm` | A wrapper around the Java version of LibSVM. This provides linear & kernel SVRs with sigmoid, gaussian and polynomial kernels. |
 | RegressionTrees | `tribuo-regression-tree` | `org.tribuo.regression.rtree` | An implementation of two types of CART regression trees. The first type builds a separate tree per output dimension, while the second type builds a single tree for all outputs. |
-| SGD | `tribuo-regression-sgd` | `org.tribuo.regression.sgd` | An implementation of stochastic gradient descent for linear regression. It uses the main Math package's set of gradient optimisers, which allow for various regularisation and descent algorithms. |
+| SGD | `tribuo-regression-sgd` | `org.tribuo.regression.sgd` | An implementation of stochastic gradient descent for linear regression and factorization machine regression. It uses the main Math package's set of gradient optimisers, which allow for various regularisation and descent algorithms. |
 | SLM | `tribuo-regression-slm` | `org.tribuo.regression.slm` | An implementation of sparse linear models. It includes a co-ordinate descent implementation of ElasticNet, a LARS implementation, a LASSO implementation using LARS, and a couple of sequential forward selection algorithms. |
 | XGBoost | `tribuo-regression-xgboost` | `org.tribuo.regression.xgboost` | A wrapper around the XGBoost Java API. XGBoost requires a C library accessed via JNI. |
 
@@ -137,6 +151,7 @@ one cluster. This package provides two modules:
 | Folder | ArtifactID | Package root | Description |
 | --- | --- | --- | --- |
 | Core | `tribuo-clustering-core` | `org.tribuo.clustering` | Contains the Output subclass for use with clustering data, as well as the evaluation code for measuring clustering performance. |
+| HDBSCAN | `tribuo-clustering-hdbscan` | `org.tribuo.clustering.hdbscan` | An implementation of HDBSCAN, a non-parametric density based clustering algorithm. |
 | KMeans | `tribuo-clustering-kmeans` | `org.tribuo.clustering.kmeans` | An implementation of K-Means using the Java 8 Stream API for parallelisation, along with the K-Means++ initialization algorithm. |
 
 ## Anomaly Detection
@@ -165,15 +180,22 @@ Randomized Trees (ExtraTrees).
 Tribuo supports loading a number of third party models which were trained
 outside the system (even in other programming languages) and scoring them from
 Java using Tribuo's infrastructure. Currently, we support loading ONNX,
-TensorFlow and XGBoost models.
+TensorFlow and XGBoost models. Additionally we support wrapping an 
+[OCI Data Science](https://www.oracle.com/data-science/cloud-infrastructure-data-science.html) 
+model deployment in a Tribuo model.
 
+- OCI - Supports deploying Tribuo models to OCI Data Science, and wrapping OCI
+  Data Science models in Tribuo external models to allow them to be served with 
+other Tribuo models.
 - ONNX - [ONNX](https://onnx.ai) (Open Neural Network eXchange) format is used
   by several deep learning systems as an export format, and there are
 converters from systems like scikit-learn to the ONNX format.  Tribuo provides
 a wrapper around Microsoft's [ONNX Runtime](https://onnxruntime.ai) that can
 score ONNX models on both CPU and GPU platforms. ONNX support is found in the
 `tribuo-onnx` artifact in the `org.tribuo.interop.onnx` package which also
-provides a feature extractor that uses BERT embedding models.
+provides a feature extractor that uses BERT embedding models. This package can
+load Tribuo-exported ONNX models and extract the stored Tribuo provenance
+objects from those models.
 - TensorFlow - Tribuo supports loading [TensorFlow](https://tensorflow.org)'s
   frozen graphs and saved models and scoring them.
 - XGBoost - Tribuo supports loading [XGBoost](https://xgboost.ai)
@@ -181,8 +203,8 @@ provides a feature extractor that uses BERT embedding models.
 
 ## TensorFlow
 
-Tribuo includes experimental support for TensorFlow-Java 0.3.1 (using
-TensorFlow 2.4.1) in the `tribuo-tensorflow` artifact in the
+Tribuo includes experimental support for TensorFlow-Java 0.4.0 (using
+TensorFlow 2.7.0) in the `tribuo-tensorflow` artifact in the
 `org.tribuo.interop.tensorflow` package. Models can be defined using
 TensorFlow-Java's graph construction mechanisms, and Tribuo will manage the
 gradient optimizer output function and loss function. It includes a Java
diff --git a/docs/Roadmap.md b/docs/Roadmap.md
index c0c2401a0..85f2cae2a 100644
--- a/docs/Roadmap.md
+++ b/docs/Roadmap.md
@@ -28,7 +28,8 @@ specific operations (though this can be achieved today using `DatasetView` and p
 - Make `Example`s immutable after they've been added to a `Dataset`. This is likely to be a breaking change.
 - Add support for global feature transformations, like normalizing to a unit vector, applying PCA and others.
 - Integrate with a plotting library.
-- ONNX format model export.
+- ONNX format model export. 
+    - In 4.2 we support exporting linear models, sparse linear models, factorization machines, liblinear, libsvm and ensembles containing the previously listed models.
 
 ## Internals
 
@@ -53,22 +54,28 @@ examples, or examples which didn't have suitable features for the model).
 
 ## New ML algorithms or parameters
 
-- ~~Add K-Means++ initialisation for K-Means.~~ Integrated in Tribuo 4.1.
+- ~~Add K-Means++ initialisation for K-Means.~~ 
+    - Integrated in Tribuo 4.1.
 - ~~Add extra parameters to the tree trainers to allow for an ExtraTrees style ensemble, and to 
-specify a minimum purity decrease requirement.~~ Integrated in Tribuo 4.1.
+specify a minimum purity decrease requirement.~~ 
+    - Integrated in Tribuo 4.1.
 - Gaussian Processes.
 - Vowpal Wabbit interface.
 - Feature selection. We already have several feature selection algorithms implemented 
 in a Tribuo compatible interface, but the codebase isn't quite ready for release.
 - Support word embedding features.
-- ~~Support contextualised word embeddings (through the ONNX or TensorFlow interfaces).~~ ONNX support for BERT embeddings is integrated in Tribuo 4.1.
-- More complex Multi-Label prediction algorithms.
+- ~~Support contextualised word embeddings (through the ONNX or TensorFlow interfaces).~~ 
+    - ONNX support for BERT embeddings is integrated in Tribuo 4.1.
+- ~~More complex Multi-Label prediction algorithms.~~
     - A Multi-Label linear SGD is integrated in Tribuo 4.1.
-    - Classifier chains and classifier chain ensembles are planned for Tribuo 4.2.
+    - Multi-label factorization machines are integrated in Tribuo 4.2.
+    - Classifier chains and classifier chain ensembles are integrated in Tribuo 4.2.
 - More anomaly detection algorithms.
     - LibLinear based anomaly detection is integrated in Tribuo 4.1.
 - More clustering algorithms.
-- Factorization machines for classification.
+    - Added HDBSCAN in Tribuo 4.2.
+- ~~Factorization machines for classification and regression.~~ 
+    - Integrated in Tribuo 4.2.
 
 ## Performance
 
@@ -84,5 +91,9 @@ in a Tribuo compatible interface, but the codebase isn't quite ready for release
 
 ## Documentation
 
-- Fill out the javadoc so it exists for all public and protected methods, including constructors.
-- Add more tutorials. Note: Tribuo 4.0.2 adds tutorials for external model loading and columnar data processing, and 4.1 adds tutorials for TensorFlow and document classification
+- Fill out the javadoc so it exists for all public and protected methods, including constructors. 
+    - Javadoc for all public methods and fields is present in Tribuo 4.2.
+- Add more tutorials. 
+    - Tribuo 4.0.2 adds tutorials for external model loading and columnar data processing.
+    - Tribuo 4.1 adds tutorials for TensorFlow and document classification.
+    - Tribuo 4.2 adds tutorials for multi-label classification, ONNX export, and model reproducibility.
diff --git a/docs/Security.md b/docs/Security.md
index 924aefa5c..8a7b0f641 100644
--- a/docs/Security.md
+++ b/docs/Security.md
@@ -44,7 +44,7 @@ native code inside an application container like a JavaEE or JakartaEE server.
 Multiple instances of Tribuo running inside separate containers may cause
 issues with JNI library loading due to ClassLoader security considerations.
 
-## Configuration
+## SecurityManager configuration
 Tribuo uses [OLCUT](https://github.com/oracle/olcut)'s configuration and
 provenance systems, which use reflection to construct and inspect classes.
 Therefore, when running with a Java security manager, you need to give the
@@ -52,7 +52,7 @@ OLCUT jar appropriate permissions. We have tested this set of permissions,
 which allows the configuration and provenance systems to work:
 
     // OLCUT permissions
-    grant codeBase "file:/path/to/olcut/olcut-core-5.1.6.jar" {
+    grant codeBase "file:/path/to/olcut/olcut-core-5.2.0.jar" {
             permission java.lang.RuntimePermission "accessDeclaredMembers";
             permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
             permission java.util.logging.LoggingPermission "control";
@@ -68,7 +68,12 @@ This scope should be narrowed based on your requirements. If you need to save
 an OLCUT configuration, you will also need to add write permissions for the
 save location.
 
-Similar file read and write permissions are necessary for Tribuo to be able to
+Tribuo uses `ForkJoinPool` for parallelism, which requires the `modifyThread`
+and `modifyThreadGroup` privileges when running under a `java.lang.SecurityManager`.
+Therefore classes which have parallel execution inside will require those
+permissions in addition to the ones listed for OLCUT above.
+
+File read and write permissions are necessary for Tribuo to be able to
 load and save models; therefore, you'll need to grant Tribuo those permissions
 using a similar snippet when running with a security manager.
 
diff --git a/docs/example-configs/all-classification-config.xml b/docs/example-configs/all-classification-config.xml
new file mode 100644
index 000000000..a0a9365c5
--- /dev/null
+++ b/docs/example-configs/all-classification-config.xml
@@ -0,0 +1,173 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+  ~ Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+  ~
+  ~ Licensed under the Apache License, Version 2.0 (the "License");
+  ~ you may not use this file except in compliance with the License.
+  ~ You may obtain a copy of the License at
+  ~
+  ~     http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<!--
+    Description:
+        Configuration for classification trainers.
+-->
+
+<config>
+    <!-- ensembles -->
+    <component name="votecombiner" type="org.tribuo.classification.ensemble.VotingCombiner"/>
+
+    <component name="adaboost" type="org.tribuo.classification.ensemble.AdaBoostTrainer">
+        <property name="innerTrainer" value="cart"/>
+        <property name="numMembers" value="10"/>
+        <property name="seed" value="12345"/>
+    </component>
+
+    <component name="bagging" type="org.tribuo.ensemble.BaggingTrainer">
+        <property name="innerTrainer" value="cart"/>
+        <property name="combiner" value="votecombiner"/>
+        <property name="numMembers" value="10"/>
+        <property name="seed" value="12345"/>
+    </component>
+
+    <component name="rf" type="org.tribuo.common.tree.RandomForestTrainer">
+        <property name="innerTrainer" value="cart-rf"/>
+        <property name="combiner" value="votecombiner"/>
+        <property name="numMembers" value="5"/>
+        <property name="seed" value="12345"/>
+    </component>
+
+    <component name="extra" type="org.tribuo.common.tree.ExtraTreesTrainer">
+        <property name="innerTrainer" value="cart-extra"/>
+        <property name="combiner" value="votecombiner"/>
+        <property name="numMembers" value="5"/>
+        <property name="seed" value="12345"/>
+    </component>
+
+    <!-- trees -->
+    <component name="cart" type="org.tribuo.classification.dtree.CARTClassificationTrainer">
+        <property name="maxDepth" value="8"/>
+        <property name="fractionFeaturesInSplit" value="1.0"/>
+        <property name="seed" value="12345"/>
+        <property name="impurity" value="gini"/>
+    </component>
+
+    <component name="cart-rf" type="org.tribuo.classification.dtree.CARTClassificationTrainer">
+        <property name="maxDepth" value="8"/>
+        <property name="fractionFeaturesInSplit" value="0.5"/>
+        <property name="seed" value="12345"/>
+        <property name="impurity" value="gini"/>
+    </component>
+
+    <component name="cart-extra" type="org.tribuo.classification.dtree.CARTClassificationTrainer">
+        <property name="maxDepth" value="8"/>
+        <property name="fractionFeaturesInSplit" value="1.0"/>
+        <property name="seed" value="12345"/>
+        <property name="impurity" value="gini"/>
+        <property name="useRandomSplitPoints" value="true"/>
+    </component>
+
+    <component name="gini" type="org.tribuo.classification.dtree.impurity.GiniIndex"/>
+    <component name="entropy" type="org.tribuo.classification.dtree.impurity.Entropy"/>
+
+    <!-- liblinear -->
+    <component name="liblinear" type="org.tribuo.classification.liblinear.LibLinearClassificationTrainer">
+        <property name="trainerType" value="liblinear-algorithm"/>
+        <property name="cost" value="1.0"/>
+        <property name="terminationCriterion" value="0.1"/>
+    </component>
+
+    <component name="liblinear-algorithm" type="org.tribuo.classification.liblinear.LinearClassificationType">
+        <property name="type" value="L2R_L1LOSS_SVC_DUAL"/>
+    </component>
+
+    <!-- libsvm -->
+    <component name="libsvm" type="org.tribuo.classification.libsvm.LibSVMClassificationTrainer">
+        <property name="svmType" value="nu"/>
+        <property name="kernelType" value="RBF"/>
+        <property name="nu" value="0.5"/>
+        <property name="cost" value="1.0"/>
+        <property name="gamma" value="1.0"/>
+        <property name="shrinking" value="true"/>
+        <property name="probability" value="true"/>
+    </component>
+
+    <component name="nu" type="org.tribuo.classification.libsvm.SVMClassificationType">
+        <property name="type" value="NU_SVC"/>
+    </component>
+
+    <!-- Naive Bayes -->
+    <component name="mnb" type="org.tribuo.classification.mnb.MultinomialNaiveBayesTrainer">
+        <property name="alpha" value="1.0"/>
+    </component>
+
+    <!-- SGD based models -->
+    <component name="logistic" type="org.tribuo.classification.sgd.linear.LinearSGDTrainer">
+        <property name="objective" value="log"/>
+        <property name="optimiser" value="adagradparam"/>
+        <property name="epochs" value="10"/>
+        <property name="loggingInterval" value="100"/>
+        <property name="minibatchSize" value="1"/>
+        <property name="seed" value="1"/>
+    </component>
+
+    <component name="fm" type="org.tribuo.classification.sgd.fm.FMClassificationTrainer">
+        <property name="objective" value="log"/>
+        <property name="optimiser" value="adagrad"/>
+        <property name="epochs" value="10"/>
+        <property name="loggingInterval" value="1000"/>
+        <property name="minibatchSize" value="1"/>
+        <property name="seed" value="1"/>
+        <property name="factorizedDimSize" value="5"/>
+        <property name="variance" value="0.5"/>
+    </component>
+
+    <component name="log" type="org.tribuo.classification.sgd.objectives.LogMulticlass"/>
+
+    <component name="adagradparam" type="org.tribuo.math.optimisers.ParameterAveraging">
+        <property name="optimiser" value="adagrad"/>
+    </component>
+
+    <component name="adagrad" type="org.tribuo.math.optimisers.AdaGrad">
+        <property name="initialLearningRate" value="1.0"/>
+        <property name="epsilon" value="0.1"/>
+    </component>
+
+    <component name="kernel-svm" type="org.tribuo.classification.sgd.kernel.KernelSVMTrainer">
+        <property name="kernel" value="rbf-kernel"/>
+        <property name="lambda" value="0.001"/>
+        <property name="seed" value="12345"/>
+    </component>
+
+    <component name="rbf-kernel" type="org.tribuo.math.kernel.RBF">
+        <property name="gamma" value="0.5"/>
+    </component>
+
+    <!-- xgboost -->
+    <component name="xgboost" type="org.tribuo.classification.xgboost.XGBoostClassificationTrainer">
+        <property name="numTrees" value="20"/>
+        <property name="eta" value="0.5"/>
+        <property name="gamma" value="0.1"/>
+        <property name="maxDepth" value="5"/>
+        <property name="minChildWeight" value="1.0"/>
+        <property name="subsample" value="1.0"/>
+        <property name="nThread" value="6"/>
+        <property name="seed" value="1"/>
+    </component>
+
+    <!-- k-nn -->
+    <component name="3-nn" type="org.tribuo.common.nearest.KNNTrainer">
+        <property name="k" value="3"/>
+        <property name="distance" value="L2"/>
+        <property name="combiner" value="votecombiner"/>
+    </component>
+
+</config>
\ No newline at end of file
diff --git a/docs/example-configs/all-multilabel-config.xml b/docs/example-configs/all-multilabel-config.xml
new file mode 100644
index 000000000..9e8ee9534
--- /dev/null
+++ b/docs/example-configs/all-multilabel-config.xml
@@ -0,0 +1,95 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+  ~ Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+  ~
+  ~ Licensed under the Apache License, Version 2.0 (the "License");
+  ~ you may not use this file except in compliance with the License.
+  ~ You may obtain a copy of the License at
+  ~
+  ~     http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<!--
+    Description:
+        Configuration for each multi-label trainer.
+-->
+
+<config>
+    <!-- this label trainer is used in classifier chains and binary relevance -->
+    <component name="logistic" type="org.tribuo.classification.sgd.linear.LinearSGDTrainer">
+        <property name="objective" value="log"/>
+        <property name="optimiser" value="adagrad"/>
+        <property name="epochs" value="10"/>
+        <property name="loggingInterval" value="100"/>
+        <property name="minibatchSize" value="1"/>
+        <property name="seed" value="1"/>
+    </component>
+
+    <component name="log" type="org.tribuo.classification.sgd.objectives.LogMulticlass"/>
+
+    <component name="adagrad" type="org.tribuo.math.optimisers.AdaGrad">
+        <property name="initialLearningRate" value="1.0"/>
+        <property name="epsilon" value="0.1"/>
+    </component>
+    <!-- end of label trainer -->
+
+    <component name="multi-label-factory" type="org.tribuo.multilabel.MultiLabelFactory"/>
+
+    <!-- sgd based models -->
+    <component name="ml-logistic" type="org.tribuo.multilabel.sgd.linear.LinearSGDTrainer">
+        <property name="objective" value="bce"/>
+        <property name="optimiser" value="adam"/>
+        <property name="epochs" value="10"/>
+        <property name="loggingInterval" value="1000"/>
+        <property name="minibatchSize" value="1"/>
+        <property name="seed" value="1"/>
+    </component>
+
+    <component name="ml-fm" type="org.tribuo.multilabel.sgd.fm.FMMultiLabelTrainer">
+        <property name="objective" value="bce"/>
+        <property name="optimiser" value="adam"/>
+        <property name="epochs" value="10"/>
+        <property name="loggingInterval" value="1000"/>
+        <property name="minibatchSize" value="1"/>
+        <property name="seed" value="1"/>
+        <property name="factorizedDimSize" value="5"/>
+        <property name="variance" value="0.5"/>
+    </component>
+
+    <component name="bce" type="org.tribuo.multilabel.sgd.objectives.BinaryCrossEntropy"/>
+
+    <component name="adam" type="org.tribuo.math.optimisers.Adam"/>
+
+    <!-- ensembles -->
+    <component name="ml-cc" type="org.tribuo.multilabel.baseline.ClassifierChainTrainer">
+        <property name="innerTrainer" value="logistic"/>
+        <property name="randomOrder" value="true"/>
+        <property name="seed" value="12345"/>
+    </component>
+
+    <component name="ml-br" type="org.tribuo.multilabel.baseline.IndependentMultiLabelTrainer">
+        <property name="innerTrainer" value="logistic"/>
+    </component>
+
+    <component name="ml-cc-ensemble" type="org.tribuo.multilabel.ensemble.CCEnsembleTrainer">
+        <property name="innerTrainer" value="logistic"/>
+        <property name="numMembers" value="10"/>
+        <property name="seed" value="12345"/>
+    </component>
+
+    <!-- k-nn -->
+    <component name="ml-3-nn" type="org.tribuo.common.nearest.KNNTrainer">
+        <property name="k" value="3"/>
+        <property name="distance" value="L2"/>
+        <property name="combiner" value="combiner-ml"/>
+    </component>
+
+    <component name="combiner-ml" type="org.tribuo.multilabel.ensemble.MultiLabelVotingCombiner"/>
+</config>
diff --git a/docs/example-configs/all-regression-config.xml b/docs/example-configs/all-regression-config.xml
new file mode 100644
index 000000000..493d51680
--- /dev/null
+++ b/docs/example-configs/all-regression-config.xml
@@ -0,0 +1,164 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+  ~ Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+  ~
+  ~ Licensed under the Apache License, Version 2.0 (the "License");
+  ~ you may not use this file except in compliance with the License.
+  ~ You may obtain a copy of the License at
+  ~
+  ~     http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<!--
+    Description:
+        Configuration for each regression trainer.
+-->
+
+<config>
+    <!-- liblinear -->
+    <component name="liblinear" type="org.tribuo.regression.liblinear.LibLinearRegressionTrainer">
+        <property name="trainerType" value="liblinear-algorithm"/>
+        <property name="cost" value="1.0"/>
+        <property name="terminationCriterion" value="0.1"/>
+        <property name="epsilon" value="0.5"/>
+        <property name="maxIterations" value="1000"/>
+    </component>
+
+    <component name="liblinear-algorithm" type="org.tribuo.regression.liblinear.LinearRegressionType">
+        <property name="type" value="L2R_L2LOSS_SVR_DUAL"/>
+    </component>
+
+    <!-- libsvm -->
+    <component name="libsvm" type="org.tribuo.regression.libsvm.LibSVMRegressionTrainer">
+        <property name="svmType" value="nu"/>
+        <property name="kernelType" value="RBF"/>
+        <property name="nu" value="0.5"/>
+        <property name="gamma" value="0.5"/>
+        <property name="p" value="0.5"/>
+        <property name="shrinking" value="true"/>
+        <property name="probability" value="true"/>
+    </component>
+
+    <component name="nu" type="org.tribuo.regression.libsvm.SVMRegressionType">
+        <property name="type" value="NU_SVR"/>
+    </component>
+
+    <!-- sgd based models -->
+    <component name="linear" type="org.tribuo.regression.sgd.linear.LinearSGDTrainer">
+        <property name="objective" value="squared"/>
+        <property name="optimiser" value="adagrad"/>
+        <property name="epochs" value="10"/>
+        <property name="loggingInterval" value="1000"/>
+        <property name="minibatchSize" value="1"/>
+        <property name="seed" value="1"/>
+    </component>
+
+    <component name="fm" type="org.tribuo.regression.sgd.fm.FMRegressionTrainer">
+        <property name="objective" value="squared"/>
+        <property name="optimiser" value="adagrad"/>
+        <property name="epochs" value="10"/>
+        <property name="loggingInterval" value="1000"/>
+        <property name="minibatchSize" value="1"/>
+        <property name="seed" value="1"/>
+        <property name="standardise" value="true"/>
+        <property name="factorizedDimSize" value="5"/>
+        <property name="variance" value="0.5"/>
+    </component>
+
+    <component name="squared" type="org.tribuo.regression.sgd.objectives.SquaredLoss"/>
+
+    <component name="adagrad" type="org.tribuo.math.optimisers.AdaGrad">
+        <property name="initialLearningRate" value="1.0"/>
+        <property name="epsilon" value="0.1"/>
+    </component>
+
+    <!-- sparse linear models -->
+    <component name="enet" type="org.tribuo.regression.slm.ElasticNetCDTrainer">
+        <property name="alpha" value="1.0"/>
+        <property name="l1Ratio" value="0.5"/>
+        <property name="tolerance" value="1e-4"/>
+        <property name="maxIterations" value="500"/>
+        <property name="randomise" value="false"/>
+        <property name="seed" value="0"/>
+    </component>
+
+    <component name="lars" type="org.tribuo.regression.slm.LARSTrainer">
+        <property name="maxNumFeatures" value="10"/>
+    </component>
+
+    <!-- xgboost -->
+    <component name="xgboost-reg" type="org.tribuo.regression.xgboost.XGBoostRegressionTrainer">
+        <property name="numTrees" value="20"/>
+        <property name="eta" value="0.5"/>
+        <property name="gamma" value="0.1"/>
+        <property name="maxDepth" value="5"/>
+        <property name="minChildWeight" value="1.0"/>
+        <property name="subsample" value="1.0"/>
+        <property name="nThread" value="6"/>
+        <property name="seed" value="1"/>
+    </component>
+
+    <!-- tree based models -->
+    <component name="cart-reg" type="org.tribuo.regression.rtree.CARTRegressionTrainer">
+        <property name="maxDepth" value="8"/>
+        <property name="fractionFeaturesInSplit" value="1.0"/>
+        <property name="seed" value="12345"/>
+        <property name="impurity" value="mse"/>
+    </component>
+    <component name="joint-cart-reg" type="org.tribuo.regression.rtree.CARTJointRegressionTrainer">
+        <property name="maxDepth" value="8"/>
+        <property name="fractionFeaturesInSplit" value="1.0"/>
+        <property name="seed" value="12345"/>
+        <property name="impurity" value="mse"/>
+    </component>
+    <component name="mse" type="org.tribuo.regression.rtree.impurity.MeanSquaredError"/>
+    <component name="mae" type="org.tribuo.regression.rtree.impurity.MeanAbsoluteError"/>
+
+    <!-- tree ensembles -->
+    <component name="rf-reg" type="org.tribuo.common.tree.RandomForestTrainer">
+        <property name="innerTrainer" value="cart-reg-rf"/>
+        <property name="numMembers" value="10"/>
+        <property name="seed" value="12345"/>
+        <property name="combiner" value="reg-combiner"/>
+    </component>
+
+    <component name="cart-reg-rf" type="org.tribuo.regression.rtree.CARTRegressionTrainer">
+        <property name="maxDepth" value="8"/>
+        <property name="fractionFeaturesInSplit" value="0.5"/>
+        <property name="seed" value="12345"/>
+        <property name="impurity" value="mse"/>
+    </component>
+
+    <component name="extra-reg" type="org.tribuo.common.tree.ExtraTreesTrainer">
+        <property name="innerTrainer" value="cart-reg-extra"/>
+        <property name="numMembers" value="10"/>
+        <property name="seed" value="12345"/>
+        <property name="combiner" value="reg-combiner"/>
+    </component>
+
+    <component name="cart-reg-extra" type="org.tribuo.regression.rtree.CARTRegressionTrainer">
+        <property name="maxDepth" value="8"/>
+        <property name="fractionFeaturesInSplit" value="1.0"/>
+        <property name="useRandomSplitPoints" value="true"/>
+        <property name="seed" value="12345"/>
+        <property name="impurity" value="mse"/>
+    </component>
+
+    <!-- k-nn -->
+    <component name="3-nn" type="org.tribuo.common.nearest.KNNTrainer">
+        <property name="k" value="3"/>
+        <property name="distance" value="L2"/>
+        <property name="combiner" value="reg-combiner"/>
+    </component>
+
+    <component name="reg-combiner" type="org.tribuo.regression.ensemble.AveragingCombiner"/>
+
+    <component name="regression-factory" type="org.tribuo.regression.RegressionFactory"/>
+</config>
diff --git a/docs/jep-290-filter.txt b/docs/jep-290-filter.txt
index 6de2395b2..3d004a0ca 100644
--- a/docs/jep-290-filter.txt
+++ b/docs/jep-290-filter.txt
@@ -1 +1 @@
-org.tribuo.**;libsvm.svm_model;libsvm.svm_parameter;libsvm.svm_node;de.bwaldvogel.liblinear.Model;de.bwaldvogel.liblinear.SolverType;java.util.**;java.lang.*;!*;
+org.tribuo.**;com.oracle.labs.mlrg.olcut.util.*;com.oracle.labs.mlrg.olcut.provenance.**;com.oracle.labs.mlrg.olcut.config.*;libsvm.svm_model;libsvm.svm_parameter;libsvm.svm_node;de.bwaldvogel.liblinear.Model;de.bwaldvogel.liblinear.SolverType;java.util.**;java.io.File;java.nio.file.Path;java.net.URL;java.time.*;java.lang.*;!*
diff --git a/docs/release-notes/tribuo-v4-1-1-release-notes.md b/docs/release-notes/tribuo-v4-1-1-release-notes.md
new file mode 100644
index 000000000..55619c68b
--- /dev/null
+++ b/docs/release-notes/tribuo-v4-1-1-release-notes.md
@@ -0,0 +1,48 @@
+# Tribuo v4.1.1 Release Notes
+
+This is the first patch release for Tribuo v4.1. The main fixes in this release
+are to the multi-dimensional output regression support, and to support the use
+of KMeans and KNN models when running under a restrictive `SecurityManager`.
+Additionally this release pulls in TensorFlow-Java 0.4.0 which upgrades the
+TensorFlow native library to 2.7.0 fixing several CVEs. Note those CVEs may not
+be applicable to TensorFlow-Java, as many of them relate to Python codepaths
+which are not included in TensorFlow-Java. Note the TensorFlow upgrade is a
+breaking API change as graph initialization is handled differently in this
+release, which causes unavoidable changes in Tribuo's TF API.
+
+## Multi-dimensional Regression fix
+
+In Tribuo 4.1.0 and earlier there is a severe bug in multi-dimensional
+regression models (i.e., regression tasks with multiple output dimensions).
+Models other than `LinearSGDModel` and `SparseLinearModel` (apart from when
+using the `ElasticNetCDTrainer`) have a bug in how the output dimension indices
+are constructed, and may produce incorrect outputs for all dimensions (as the
+output will be for a different dimension than the one named in the `Regressor`
+object). This has been fixed, and loading in models trained in earlier versions
+of Tribuo will patch the model to rearrange the dimensions appropriately.
+Unfortunately this fix cannot be applied to tree based models, and so all
+multi-output regression tree based models should be retrained using Tribuo 4.2
+as they are irretrievably corrupt. Additionally when using standardization in
+multi-output regression LibSVM models dimensions past the first dimension have
+the model improperly stored and will also need to be retrained with Tribuo 4.2.
+See [#177](https://github.com/oracle/tribuo/pull/177) for more details.
+
+## Bug fixes
+
+- NPE fix for LIME explanations using models which don't support per class weights ([#157](https://github.com/oracle/tribuo/pull/157)).
+- Fixing a bug in multi-label evaluation which swapped FP for FN ([#167](https://github.com/oracle/tribuo/pull/167)).
+- Fixing LibSVM and LibLinear so they have reproducible behaviour ([#172](https://github.com/oracle/tribuo/pull/172)).
+- Provenance fix for TransformTrainer and an extra factory for XGBoostExternalModel so you can make them from an in memory booster ([#176](https://github.com/oracle/tribuo/pull/176))
+- Fix multidimensional regression ([#177](https://github.com/oracle/tribuo/pull/177)) (fixes regression ids, fixes libsvm so it emits correct standardized models, adds support for per dimension feature weights in XGBoostRegressionModel).
+- Normalize LibSVMDataSource paths consistently in the provenance ([#181](https://github.com/oracle/tribuo/pull/181)).
+- KMeans and KNN now run correctly when using OpenSearch's SecurityManager ([#197](https://github.com/oracle/tribuo/pull/197)).
+- TensorFlow-Java 0.4.0 ([#195](https://github.com/oracle/tribuo/pull/195)).
+
+
+## Contributors
+
+- Adam Pocock ([@Craigacp](https://github.com/Craigacp))
+- Jack Sullivan ([@JackSullivan](https://github.com/JackSullivan))
+- Philip Ogren ([@pogren](https://github.com/pogren))
+- Jeffrey Alexander ([@jhalexand](https://github.com/jhalexand))
+
diff --git a/docs/release-notes/tribuo-v4-2-release-notes.md b/docs/release-notes/tribuo-v4-2-release-notes.md
new file mode 100644
index 000000000..3f64f4faf
--- /dev/null
+++ b/docs/release-notes/tribuo-v4-2-release-notes.md
@@ -0,0 +1,174 @@
+# Tribuo v4.2 Release Notes
+
+Tribuo 4.2 adds new models, ONNX export for several types of models, a
+reproducibility framework for recreating Tribuo models, easy deployment of
+Tribuo models on Oracle Cloud, along with several smaller improvements and bug
+fixes. We've added more tutorials covering the new features along with
+multi-label classification, and further expanded the javadoc to cover all
+public methods.
+
+In Tribuo 4.1.0 and earlier there is a severe bug in multi-dimensional
+regression models (i.e., regression tasks with multiple output dimensions).
+Models other than `LinearSGDModel` and `SparseLinearModel` (apart from when
+using the `ElasticNetCDTrainer`) have a bug in how the output dimension indices
+are constructed, and may produce incorrect outputs for all dimensions (as the
+output will be for a different dimension than the one named in the `Regressor`
+object). This has been fixed, and loading in models trained in earlier versions
+of Tribuo will patch the model to rearrange the dimensions appropriately.
+Unfortunately this fix cannot be applied to tree based models, and so all
+multi-output regression tree based models should be retrained using Tribuo 4.2
+as they are irretrievably corrupt. Additionally when using standardization in
+multi-output regression LibSVM models dimensions past the first dimension have
+the model improperly stored and will also need to be retrained with Tribuo 4.2.
+See [#177](https://github.com/oracle/tribuo/pull/177) for more details.
+
+Note the KMeans implementation had several internal changes to support running
+with a `java.lang.SecurityManager` which will break any subclasses of `KMeansTrainer`.
+In most cases changing the signature of any overridden `mStep` method to match
+the new signature, and allowing the `fjp` argument to be null in single threaded 
+execution will fix the subclass.
+
+## New models
+
+In this release we've added [Factorization
+Machines](https://www.computer.org/csdl/proceedings-article/icdm/2010/4256a995/12OmNwMFMfl),
+[Classifier
+Chains](https://link.springer.com/content/pdf/10.1007/s10994-011-5256-5.pdf)
+and
+[HDBSCAN\*](https://link.springer.com/chapter/10.1007/978-3-642-37456-2_14).
+Factorization machines are a powerful non-linear predictor which uses a
+factorized approximation to learn a per output feature-feature interaction term
+in addition to a linear model. We've added Factorization Machines for
+multi-class classification, multi-label classification and regression.
+Classifier chains are an ensemble approach to multi-label classification which
+given a specific ordering of the labels learns a chain of classifiers where
+each classifier gets the features along with the predicted labels from earlier
+in the chain. We also added ensembles of randomly ordered classifier chains
+which work well in situations when the ground truth label ordering is unknown
+(i.e., most of the time).  HDBSCAN is a hierarchical density based clustering
+algorithm which chooses the number of clusters based on properties of the data
+rather than as a hyperparameter. The Tribuo implementation can cluster a
+dataset, and then at prediction time it provides the cluster the given
+datapoint would be in without modifying the cluster structure.
+
+- Classifier Chains ([#149](https://github.com/oracle/tribuo/pull/149)), which
+  also adds the jaccard score as a multi-label evaluation metric, and a
+multi-label voting combiner for use in multi-label ensembles.
+- Factorization machines ([#179](https://github.com/oracle/tribuo/pull/179)).
+- HDBSCAN ([#196](https://github.com/oracle/tribuo/pull/196)).
+
+## ONNX Export
+
+The [ONNX](https://onnx.ai) format is a cross-platform and cross-library model
+exchange format. Tribuo can already serve ONNX models via its [ONNX
+Runtime](https://onnxruntime.ai) interface, and now has the ability to export
+models in ONNX format for serving on edge devices, in cloud services, or in
+other languages like Python or C#.
+
+In this release Tribuo supports exporting linear models (multi-class
+classification, multi-label classification and regression), sparse linear
+regression models, factorization machines (multi-class classification,
+multi-label classification and regression), LibLinear models (multi-class
+classification and regression), LibSVM models (multi-class classification and
+regression), along with ensembles of those models, including arbitrary levels
+of ensemble nesting. We plan to expand this coverage to more models over time,
+however for TensorFlow we recommend users export those models as a Saved Model
+and use the Python tf2onnx converter.
+
+Tribuo models exported in ONNX format preserve their provenance information in
+a metadata field which is accessible when the ONNX model is loaded back into
+Tribuo. The provenance is stored as a protobuf so could be read from other
+libraries or platforms if necessary.
+
+The ONNX export support is in a separate module with no dependencies, and could
+be used elsewhere on the JVM to support generating ONNX graphs. We welcome
+contributions to build out the ONNX support in that module.
+
+- ONNX export for LinearSGDModels
+  ([#154](https://github.com/oracle/tribuo/pull/154)), which also adds a
+multi-label output transformer for scoring multi-label ONNX models.
+- ONNX export for SparseLinearModel ([#163](https://github.com/oracle/tribuo/pull/163)).
+- Add provenance to ONNX exported models ([#182](https://github.com/oracle/tribuo/pull/182)).
+- Refactor ONNX tensor creation ([#187](https://github.com/oracle/tribuo/pull/187)).
+- ONNX ensemble export support ([#186](https://github.com/oracle/tribuo/pull/186)).
+- ONNX export for LibSVM and LibLinear ([#191](https://github.com/oracle/tribuo/pull/191)).
+- Refactor ONNX support to improve type safety ([#199](https://github.com/oracle/tribuo/pull/199)).
+- Extract ONNX support into separate module ([#TBD](https://github.com/oracle/tribuo/pull/)).
+
+## Reproducibility Framework
+
+Tribuo has strong model metadata support via its provenance system which
+records how models, datasets and evaluations are created. In this release we
+enhance this support by adding a push-button reproduction framework which
+accepts either a model provenance or a model object and rebuilds the complete
+training pipeline, ensuring consistent usage of RNGs and other mutable state.
+
+This allows Tribuo to easily rebuild models to see if updated datasets could
+change performance, or even if the model is actually reproducible (which may be
+required for regulatory reasons).  Over time we hope to expand this support
+into a full experimental framework, allowing models to be rebuilt with
+hyperparameter or data changes as part of the data science process or for
+debugging models in production.
+
+This framework was written by Joseph Wonsil and Prof. Margo Seltzer at the
+University of British Columbia as part of a collaboration between Prof. Seltzer
+and Oracle Labs. We're excited to continue working with Joe, Margo and the rest
+of the lab at UBC, as this is excellent work.
+
+Note the reproducibility framework module requires Java 16 or greater, and is
+thus not included in the `tribuo-all` meta-module.
+
+- Reproducibility framework ([#185](https://github.com/oracle/tribuo/pull/185), with minor changes in [#189](https://github.com/oracle/tribuo/pull/189) and [#190](https://github.com/oracle/tribuo/pull/190)).
+
+## OCI Data Science Integration
+
+[Oracle Cloud Data
+Science](https://www.oracle.com/data-science/cloud-infrastructure-data-science.html)
+is a platform for building and deploying models in Oracle Cloud.  The model
+deployment functionality wraps a Python runtime and deploys them with an
+auto-scaler at a REST endpoint. In this release we've added support for
+deploying Tribuo models which are ONNX exportable directly to OCI DS, allowing
+scale-out deployments of models from the JVM. We also added a `OCIModel`
+wrapper which scores Tribuo `Example` objects using a deployed model's REST
+endpoint, allowing easy use of cloud resources for ML on the JVM.
+
+- Oracle Cloud Data Science integration ([#200](https://github.com/oracle/tribuo/pull/200)).
+
+## Small improvements
+
+- Date field processor and locale support in metadata extractors ([#148](https://github.com/oracle/tribuo/pull/148))
+- Multi-output response processor allowing loading different formats of multi-label and multi-dimensional regression datasets ([#150](https://github.com/oracle/tribuo/pull/150))
+- ARM dev profile for compiling Tribuo on ARM platforms ([#152](https://github.com/oracle/tribuo/pull/152))
+- Refactor CSVLoader so it uses CSVDataSource and parses CSV files using RowProcessor, allowing an easy transition to more complex columnar extraction ([#153](https://github.com/oracle/tribuo/pull/153))
+- Configurable anomaly demo data source ([#160](https://github.com/oracle/tribuo/pull/160))
+- Configurable clustering demo data source ([#161](https://github.com/oracle/tribuo/pull/161))
+- Configurable classification demo data source ([#162](https://github.com/oracle/tribuo/pull/162)) 
+- Multi-Label tutorial and configurable multi-label demo data source ([#166](https://github.com/oracle/tribuo/pull/166)) (also adds a multi-label tutorial) plus fix in [#168](https://github.com/oracle/tribuo/pull/168) after #167
+- Add javadoc for all public methods and fields ([#175](https://github.com/oracle/tribuo/pull/175)) (also fixes a bug in Util.vectorNorm)
+- Add hooks for model equality checks to trees and LibSVM models ([#183](https://github.com/oracle/tribuo/pull/183)) (also fixes a bug in liblinear get top features)
+- XGBoost 1.5.0 ([#192](https://github.com/oracle/tribuo/pull/192))
+- TensorFlow Java 0.4.0 ([#195](https://github.com/oracle/tribuo/pull/195)) (note this changes Tribuo's TF API slightly as TF-Java 0.4.0 has a different method of initializing the session)
+- KMeans now uses dense vectors when appropriate, speeding up training ([#201](https://github.com/oracle/tribuo/pull/201))
+- Documentation updates, ONNX and reproducibility tutorials ([#205](https://github.com/oracle/tribuo/pull/205))
+
+## Bug fixes
+
+- NPE fix for LIME explanations using models which don't support per class weights ([#157](https://github.com/oracle/tribuo/pull/157))
+- Fixing a bug in multi-label evaluation which swapped FP for FN ([#167](https://github.com/oracle/tribuo/pull/167))
+- Persist CSVDataSource headers in the provenance ([#171](https://github.com/oracle/tribuo/pull/171))
+- Fixing LibSVM and LibLinear so they have reproducible behaviour ([#172](https://github.com/oracle/tribuo/pull/172))
+- Provenance fix for TransformTrainer and an extra factory for XGBoostExternalModel so you can make them from an in memory booster ([#176](https://github.com/oracle/tribuo/pull/176))
+- Fix multidimensional regression ([#177](https://github.com/oracle/tribuo/pull/177)) (fixes regression ids, fixes libsvm so it emits correct standardized models, adds support for per dimension feature weights in XGBoostRegressionModel)
+- Fix provenance generation for FieldResponseProcessor and BinaryResponseProcessor ([#178](https://github.com/oracle/tribuo/pull/178)) 
+- Normalize LibSVMDataSource paths consistently in the provenance ([#181](https://github.com/oracle/tribuo/pull/181))
+- KMeans and KNN now run correctly when using OpenSearch's SecurityManager ([#197](https://github.com/oracle/tribuo/pull/197))
+
+## Contributors
+
+- Adam Pocock ([@Craigacp](https://github.com/Craigacp))
+- Jack Sullivan ([@JackSullivan](https://github.com/JackSullivan))
+- Joseph Wonsil ([@jwons](https://github.com/jwons))
+- Philip Ogren ([@pogren](https://github.com/pogren))
+- Jeffrey Alexander ([@jhalexand](https://github.com/jhalexand))
+- Geoff Stewart ([@geoffreydstewart](https://github.com/geoffreydstewart))
+
diff --git a/pom.xml b/pom.xml
index 06a57076f..aa5b22859 100644
--- a/pom.xml
+++ b/pom.xml
@@ -214,6 +214,12 @@
                 <artifactId>maven-javadoc-plugin</artifactId>
                 <version>3.3.1</version>
                 <configuration>
+                    <additionalJOptions>
+                        <additionalJOption>-Xmaxerrs</additionalJOption>
+                        <additionalJOption>65536</additionalJOption>
+                        <additionalJOption>-Xmaxwarns</additionalJOption>
+                        <additionalJOption>65536</additionalJOption>
+                    </additionalJOptions>
                     <source>8</source>
                     <show>protected</show>
                     <notimestamp>true</notimestamp>
@@ -288,6 +294,12 @@
                         </goals>
                         <phase>site</phase>
                         <configuration>
+                            <additionalJOptions>
+                                <additionalJOption>-Xmaxerrs</additionalJOption>
+                                <additionalJOption>65536</additionalJOption>
+                                <additionalJOption>-Xmaxwarns</additionalJOption>
+                                <additionalJOption>65536</additionalJOption>
+                            </additionalJOptions>
                             <overview>./Core/src/main/javadoc/overview.html</overview>
                             <bottom>Copyright &#169; 2015&#x2013;2021 Oracle and/or its affiliates. All rights reserved.</bottom>
                             <groups>
diff --git a/tutorials/README.md b/tutorials/README.md
index 0524d6c0a..a51c8334e 100644
--- a/tutorials/README.md
+++ b/tutorials/README.md
@@ -5,8 +5,9 @@ These tutorials require the [IJava](https://github.com/SpencerPark/IJava) Jupyte
 The tutorials expect the data and required jars to be in the same directory as the notebooks. The dataset download
 links are given in the tutorial, and Tribuo's jars are on Maven Central, attached to the GitHub release, or you
 can build it yourself with `mvn clean package` using Apache Maven.
-The code in them should work on Java 8 with the addition of types to replace the use of the `var` keyword
-added in Java 10, and replacing the collections factories introduced in Java 9.
+In most cases code in them should work on Java 8 with the addition of types to replace the use of the `var` keyword
+added in Java 10, and replacing the collections factories introduced in Java 9, with the exception of the reproducibility
+tutorial which requires Java 16+ as the reproducibility package uses newer Java features.
 
 The tutorials cover:
 - [Intro classification with Irises](irises-tribuo-v4.ipynb)
@@ -20,3 +21,5 @@ The tutorials cover:
 - [Document classification and extracting features from text](document-classification-tribuo-v4.ipynb)
 - [Importing third-party models](external-models-tribuo-v4.ipynb)
 - [Training and deploying TensorFlow models](tensorflow-tribuo-v4.ipynb)
+- [ONNX export and deployment](onnx-export-tribuo-v4.ipynb)
+- [Model reproducibility](reproducibility-tribuo-v4.ipynb)
diff --git a/tutorials/configuration-tribuo-v4.ipynb b/tutorials/configuration-tribuo-v4.ipynb
index f54cf227c..0ab70c3d5 100644
--- a/tutorials/configuration-tribuo-v4.ipynb
+++ b/tutorials/configuration-tribuo-v4.ipynb
@@ -89,6 +89,13 @@
     "ConfigurationManager.addFileFormatFactory(new JsonConfigFactory())"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "OLCUT supports XML, JSON, [edn](https://github.com/edn-format/edn), and [protobuf](https://developers.google.com/protocol-buffers) format configuration files. It also supports serialization for `Provenance` objects in XML, JSON, and protobuf formats."
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -131,7 +138,7 @@
    "source": [
     "var className = \"org.tribuo.classification.sgd.linear.LinearSGDTrainer\";\n",
     "var clazz = (Class<? extends Configurable>) Class.forName(className);\n",
-    "Map map = DescribeConfigurable.generateFieldInfo(clazz);\n",
+    "var map = DescribeConfigurable.generateFieldInfo(clazz);\n",
     "\n",
     "var output = DescribeConfigurable.generateDescription(map);\n",
     "\n",
@@ -183,13 +190,6 @@
     "System.out.println(writer.toString(\"UTF-8\"));"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "At the moment using it from the REPL is missing some type information in `DescribeConfigurable.generateFieldInfo`, we'll fix that in the next OLCUT release."
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -774,6 +774,8 @@
    "source": [
     "We can see that both models perform identically. This is because our provenance system records the RNG seeds used at all points, and Tribuo is scrupulous about how and when it uses PRNGs. If you find a model reconstruction that gives a different answer (unless you're using XGBoost or TensorFlow, both of which have some non-determinism beyond our control) then file an issue on our GitHub as that's a bug.\n",
     "\n",
+    "We provide a simple push-button replication facility in the `tribuo-reproducibility` project, see the tutorial on reproducibilty for more details.\n",
+    "\n",
     "## What else lives in the Provenance?\n",
     "\n",
     "These evaluations have provenance in the same way the models do, and we can use a pretty printer in OLCUT to make it a little more human readable.\n",
@@ -1093,7 +1095,7 @@
    "metadata": {},
    "source": [
     "## Conclusion\n",
-    "We've taken a closer look at Tribuo's configuration and provenance systems, showing how to train a model using a configuration file, how to inspect the model's provenance, extract it's configuration, and finally how to combine that extracted configuration with other programmatic elements of the Tribuo library (in this case the feature transformation system). We saw that the provenance combines both the configuration of the trainer and the datasource, along with runtime information extracted from the dataset itself (e.g., timestamps and file hashes).\n",
+    "We've taken a closer look at Tribuo's configuration and provenance systems, showing how to train a model using a configuration file, how to inspect the model's provenance, extract it's configuration, and finally how to combine that extracted configuration with other programmatic elements of the Tribuo library (in this case the feature transformation system). We saw that the provenance combines both the configuration of the trainer and the datasource, along with runtime information extracted from the dataset itself (e.g., timestamps and file hashes). Tribuo's provenance objects are also persisted in ONNX model files exported from Tribuo, and these provenances can be recovered later using Tribuo's `ONNXExternalModel` class which provides ONNX model inference. For more details on ONNX export see the ONNX export and deployment tutorial.\n",
     "\n",
     "Tribuo's configuration system is integrated into a CLI options/arguments parsing system, which can be used to override elements from the configuration file. The values from the options are then stored in the `ConfigurationManager` and appear in the provenance and downstream configuration objects as expected. Tribuo also provides a redaction system for configuration files (e.g., to ensure a password isn't stored in the provenance) and for provenance objects themselves (e.g., to remove the data provenance from a trained model), which aids model deployment to untrusted or less trusted systems."
    ]
diff --git a/tutorials/external-models-tribuo-v4.ipynb b/tutorials/external-models-tribuo-v4.ipynb
index a3d0aa50d..f9cf61689 100644
--- a/tutorials/external-models-tribuo-v4.ipynb
+++ b/tutorials/external-models-tribuo-v4.ipynb
@@ -5,7 +5,7 @@
    "metadata": {},
    "source": [
     "# Working with external models\n",
-    "Tribuo can load in models trained in third party systems and deploy them alongside native Tribuo models. In Tribuo 4.1 we support models trained externally in [XGBoost](https://xgboost.ai), [TensorFlow](https://tensorflow.org) frozen graphs & saved models, and models stored in ONNX (Open Neural Network eXchange) format. The latter is particularly interesting for Tribuo as many libraries can export models in ONNX format, such as [scikit-learn](https://scikit-learn.org), [pytorch](https://pytorch.org), TensorFlow among others. For a more complete list of the supported onnx models you can look at the [ONNX website](https://onnx.ai). Tribuo's ONNX support is supplied by [ONNX Runtime](https://microsoft.github.io/onnxruntime/), using the Java interface our group in Oracle Labs contributed to that project.\n",
+    "Tribuo can load in models trained in third party systems and deploy them alongside native Tribuo models. In Tribuo 4.1+ we support models trained externally in [XGBoost](https://xgboost.ai), [TensorFlow](https://tensorflow.org) frozen graphs & saved models, and models stored in ONNX (Open Neural Network eXchange) format. The latter is particularly interesting for Tribuo as many libraries can export models in ONNX format, such as [scikit-learn](https://scikit-learn.org), [pytorch](https://pytorch.org), TensorFlow among others. For a more complete list of the supported onnx models you can look at the [ONNX website](https://onnx.ai). Tribuo's ONNX support is supplied by [ONNX Runtime](https://microsoft.github.io/onnxruntime/), using the Java interface our group in Oracle Labs contributed to that project. Tribuo 4.2 added support for exporting models in ONNX format, and those models can be loaded back in to Tribuo using our ONNX Runtime interface.\n",
     "\n",
     "In this tutorial we'll look at loading in models trained in XGBoost, scikit-learn and pytorch, all for MNIST and we'll deploy them next to a logistic regression model trained in Tribuo. We discuss using external TensorFlow models in the [TensorFlow tutorial](https://github.com/oracle/tribuo/blob/main/tutorials/tensorflow-tribuo-v4.ipynb), as TensorFlow brings it's own complexities. Note these models all depend on native libraries, which are available for x86\\_64 platforms on Windows, Linux and macOS. Both ONNX Runtime and XGBoost support macOS arm64 (i.e., Apple Silicon Macs), but you'll need to compile those from source and add them to Tribuo's class path to make this tutorial run on that platform.\n",
     "\n",
@@ -453,7 +453,7 @@
     "## Conclusion\n",
     "We saw how to load in externally trained models in multiple formats, and how to deploy those models alongside Tribuo's native models. We also looked at how ONNX models can accept different tensor shapes as inputs, and used Tribuo's mechanisms for converting an `Example` into either a vector or a tensor depending on if the external model expected a vector or an image as an input.\n",
     "\n",
-    "Given how useful the ONNX model import code is, allowing Tribuo to load in many different kinds of models trained in many different libraries, it's natural to ask what support Tribuo has for exporting ONNX models. At the moment we don't support exporting Tribuo's native models to ONNX format, but we're investigating how to do this purely from Java, and we hope to be able to do this in a future release."
+    "Given how useful the ONNX model import code is, allowing Tribuo to load in many different kinds of models trained in many different libraries, it's natural to ask what support Tribuo has for exporting ONNX models. As of 4.2 Tribuo can export linear models, sparse linear models, LibLinear, LibSVM, factorization machines, and ensembles thereof. We plan to expand this to cover more of Tribuo's models over time."
    ]
   }
  ],
@@ -469,7 +469,7 @@
    "mimetype": "text/x-java-source",
    "name": "Java",
    "pygments_lexer": "java",
-   "version": "17-ea+22-1964"
+   "version": "17+35-LTS-2724"
   }
  },
  "nbformat": 4,
diff --git a/tutorials/irises-tribuo-v4.ipynb b/tutorials/irises-tribuo-v4.ipynb
index 7f06b253f..fa10fe888 100644
--- a/tutorials/irises-tribuo-v4.ipynb
+++ b/tutorials/irises-tribuo-v4.ipynb
@@ -336,17 +336,82 @@
      "text": [
       "TrainTestSplitter(\n",
       "\tclass-name = org.tribuo.evaluation.TrainTestSplitter\n",
-      "\tsource = CSVLoader(\n",
-      "\t\t\tclass-name = org.tribuo.data.csv.CSVLoader\n",
+      "\tsource = CSVDataSource(\n",
+      "\t\t\tclass-name = org.tribuo.data.csv.CSVDataSource\n",
+      "\t\t\theaders = List[\n",
+      "\t\t\t\tsepalLength\n",
+      "\t\t\t\tsepalWidth\n",
+      "\t\t\t\tpetalLength\n",
+      "\t\t\t\tpetalWidth\n",
+      "\t\t\t\tspecies\n",
+      "\t\t\t]\n",
+      "\t\t\trowProcessor = RowProcessor(\n",
+      "\t\t\t\t\tclass-name = org.tribuo.data.columnar.RowProcessor\n",
+      "\t\t\t\t\tmetadataExtractors = List[]\n",
+      "\t\t\t\t\tfieldProcessorList = List[\n",
+      "\t\t\t\t\t\tDoubleFieldProcessor(\n",
+      "\t\t\t\t\t\t\t\t\tclass-name = org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\n",
+      "\t\t\t\t\t\t\t\t\tfieldName = petalLength\n",
+      "\t\t\t\t\t\t\t\t\tonlyFieldName = true\n",
+      "\t\t\t\t\t\t\t\t\tthrowOnInvalid = true\n",
+      "\t\t\t\t\t\t\t\t\thost-short-name = FieldProcessor\n",
+      "\t\t\t\t\t\t\t\t)\n",
+      "\t\t\t\t\t\tDoubleFieldProcessor(\n",
+      "\t\t\t\t\t\t\t\t\tclass-name = org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\n",
+      "\t\t\t\t\t\t\t\t\tfieldName = petalWidth\n",
+      "\t\t\t\t\t\t\t\t\tonlyFieldName = true\n",
+      "\t\t\t\t\t\t\t\t\tthrowOnInvalid = true\n",
+      "\t\t\t\t\t\t\t\t\thost-short-name = FieldProcessor\n",
+      "\t\t\t\t\t\t\t\t)\n",
+      "\t\t\t\t\t\tDoubleFieldProcessor(\n",
+      "\t\t\t\t\t\t\t\t\tclass-name = org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\n",
+      "\t\t\t\t\t\t\t\t\tfieldName = sepalWidth\n",
+      "\t\t\t\t\t\t\t\t\tonlyFieldName = true\n",
+      "\t\t\t\t\t\t\t\t\tthrowOnInvalid = true\n",
+      "\t\t\t\t\t\t\t\t\thost-short-name = FieldProcessor\n",
+      "\t\t\t\t\t\t\t\t)\n",
+      "\t\t\t\t\t\tDoubleFieldProcessor(\n",
+      "\t\t\t\t\t\t\t\t\tclass-name = org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\n",
+      "\t\t\t\t\t\t\t\t\tfieldName = sepalLength\n",
+      "\t\t\t\t\t\t\t\t\tonlyFieldName = true\n",
+      "\t\t\t\t\t\t\t\t\tthrowOnInvalid = true\n",
+      "\t\t\t\t\t\t\t\t\thost-short-name = FieldProcessor\n",
+      "\t\t\t\t\t\t\t\t)\n",
+      "\t\t\t\t\t]\n",
+      "\t\t\t\t\tfeatureProcessors = List[]\n",
+      "\t\t\t\t\tresponseProcessor = FieldResponseProcessor(\n",
+      "\t\t\t\t\t\t\tclass-name = org.tribuo.data.columnar.processors.response.FieldResponseProcessor\n",
+      "\t\t\t\t\t\t\tuppercase = false\n",
+      "\t\t\t\t\t\t\tfieldNames = List[\n",
+      "\t\t\t\t\t\t\t\tspecies\n",
+      "\t\t\t\t\t\t\t]\n",
+      "\t\t\t\t\t\t\tdefaultValues = List[\n",
+      "\t\t\t\t\t\t\t\t\n",
+      "\t\t\t\t\t\t\t]\n",
+      "\t\t\t\t\t\t\tdisplayField = false\n",
+      "\t\t\t\t\t\t\toutputFactory = LabelFactory(\n",
+      "\t\t\t\t\t\t\t\t\tclass-name = org.tribuo.classification.LabelFactory\n",
+      "\t\t\t\t\t\t\t\t)\n",
+      "\t\t\t\t\t\t\thost-short-name = ResponseProcessor\n",
+      "\t\t\t\t\t\t)\n",
+      "\t\t\t\t\tweightExtractor = FieldExtractor(\n",
+      "\t\t\t\t\t\t\tclass-name = org.tribuo.data.columnar.FieldExtractor\n",
+      "\t\t\t\t\t\t)\n",
+      "\t\t\t\t\treplaceNewlinesWithSpaces = true\n",
+      "\t\t\t\t\tregexMappingProcessors = Map{}\n",
+      "\t\t\t\t\thost-short-name = RowProcessor\n",
+      "\t\t\t\t)\n",
+      "\t\t\tquote = \"\n",
+      "\t\t\toutputRequired = true\n",
       "\t\t\toutputFactory = LabelFactory(\n",
       "\t\t\t\t\tclass-name = org.tribuo.classification.LabelFactory\n",
       "\t\t\t\t)\n",
-      "\t\t\tresponse-name = species\n",
       "\t\t\tseparator = ,\n",
-      "\t\t\tquote = \"\n",
-      "\t\t\tpath = file:/Users/apocock/Development/Tribuo/tutorials/bezdekIris.data\n",
-      "\t\t\tfile-modified-time = 1999-12-14T15:12:39-05:00\n",
+      "\t\t\tdataPath = /Users/apocock/Development/Tribuo/tutorials/bezdekIris.data\n",
       "\t\t\tresource-hash = 0FED2A99DB77EC533A62DC66894D3EC6DF3B58B6A8F3CF4A6B47E4086B7F97DC\n",
+      "\t\t\tfile-modified-time = 1999-12-14T15:12:39-05:00\n",
+      "\t\t\tdatasource-creation-time = 2021-11-01T12:52:18.814629-04:00\n",
+      "\t\t\thost-short-name = DataSource\n",
       "\t\t)\n",
       "\ttrain-proportion = 0.7\n",
       "\tseed = 1\n",
@@ -365,7 +430,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can see the model was trained on a datasource which was split in two, using a specific random seed & split percentage. The original datasource was a CSV file, and the file modified time and SHA-256 hash are recorded too.\n",
+    "We can see the model was trained on a datasource which was split in two, using a specific random seed & split percentage. The original datasource was a CSV file, and the file modified time and SHA-256 hash are recorded too. As of Tribuo v4.2 `CSVLoader` now generates a `CSVDataSource` allowing simpler migration to more complex columnar processing than the old method, along with producing more accurate provenance information suitable for automatic reproduction of models.\n",
     "\n",
     "We can similarly inspect the trainer provenance to find out about the training algorithm."
    ]
@@ -397,7 +462,7 @@
       "\t\t\tclass-name = org.tribuo.classification.sgd.objectives.LogMulticlass\n",
       "\t\t\thost-short-name = LabelObjective\n",
       "\t\t)\n",
-      "\ttribuo-version = 4.1.0\n",
+      "\ttribuo-version = 4.2.0-SNAPSHOT\n",
       "\ttrain-invocation-count = 0\n",
       "\tis-sequence = false\n",
       "\thost-short-name = Trainer\n",
@@ -458,7 +523,7 @@
       "    \"tribuo-version\" : {\n",
       "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
       "      \"key\" : \"tribuo-version\",\n",
-      "      \"value\" : \"4.1.0\",\n",
+      "      \"value\" : \"4.2.0-SNAPSHOT\",\n",
       "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
       "      \"additional\" : \"\",\n",
       "      \"is-reference\" : false\n",
@@ -466,7 +531,7 @@
       "    \"java-version\" : {\n",
       "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
       "      \"key\" : \"java-version\",\n",
-      "      \"value\" : \"17-ea\",\n",
+      "      \"value\" : \"17\",\n",
       "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
       "      \"additional\" : \"\",\n",
       "      \"is-reference\" : false\n",
@@ -490,7 +555,7 @@
       "    \"trained-at\" : {\n",
       "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
       "      \"key\" : \"trained-at\",\n",
-      "      \"value\" : \"2021-05-24T12:27:10.387150-04:00\",\n",
+      "      \"value\" : \"2021-11-01T12:52:19.228195-04:00\",\n",
       "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.DateTimeProvenance\",\n",
       "      \"additional\" : \"\",\n",
       "      \"is-reference\" : false\n",
@@ -553,7 +618,7 @@
       "    \"tribuo-version\" : {\n",
       "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
       "      \"key\" : \"tribuo-version\",\n",
-      "      \"value\" : \"4.1.0\",\n",
+      "      \"value\" : \"4.2.0-SNAPSHOT\",\n",
       "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
       "      \"additional\" : \"\",\n",
       "      \"is-reference\" : false\n",
@@ -612,7 +677,7 @@
       "    \"tribuo-version\" : {\n",
       "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
       "      \"key\" : \"tribuo-version\",\n",
-      "      \"value\" : \"4.1.0\",\n",
+      "      \"value\" : \"4.2.0-SNAPSHOT\",\n",
       "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
       "      \"additional\" : \"\",\n",
       "      \"is-reference\" : false\n",
@@ -731,8 +796,8 @@
       "    \"source\" : {\n",
       "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
       "      \"key\" : \"source\",\n",
-      "      \"value\" : \"csvloader-6\",\n",
-      "      \"provenance-class\" : \"org.tribuo.data.csv.CSVLoader$CSVLoaderProvenance\",\n",
+      "      \"value\" : \"csvdatasource-6\",\n",
+      "      \"provenance-class\" : \"org.tribuo.data.csv.CSVDataSource$CSVDataSourceProvenance\",\n",
       "      \"additional\" : \"\",\n",
       "      \"is-reference\" : true\n",
       "    },\n",
@@ -825,31 +890,70 @@
       "  }\n",
       "}, {\n",
       "  \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.ObjectMarshalledProvenance\",\n",
-      "  \"object-name\" : \"csvloader-6\",\n",
-      "  \"object-class-name\" : \"org.tribuo.data.csv.CSVLoader\",\n",
-      "  \"provenance-class\" : \"org.tribuo.data.csv.CSVLoader$CSVLoaderProvenance\",\n",
+      "  \"object-name\" : \"csvdatasource-6\",\n",
+      "  \"object-class-name\" : \"org.tribuo.data.csv.CSVDataSource\",\n",
+      "  \"provenance-class\" : \"org.tribuo.data.csv.CSVDataSource$CSVDataSourceProvenance\",\n",
       "  \"map\" : {\n",
       "    \"resource-hash\" : {\n",
-      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
-      "      \"key\" : \"resource-hash\",\n"
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "      \"key\" : \"resource-hash\",\n",
       "      \"value\" : \"0FED2A99DB77EC533A62DC66894D3EC6DF3B58B6A8F3CF4A6B47E4086B7F97DC\",\n",
       "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.HashProvenance\",\n",
       "      \"additional\" : \"SHA256\",\n",
       "      \"is-reference\" : false\n",
       "    },\n",
-      "    \"path\" : {\n",
+      "    \"headers\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.ListMarshalledProvenance\",\n",
+      "      \"list\" : [ {\n",
+      "        \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "        \"key\" : \"headers\",\n",
+      "        \"value\" : \"sepalLength\",\n",
+      "        \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "        \"additional\" : \"\",\n",
+      "        \"is-reference\" : false\n",
+      "      }, {\n",
+      "        \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "        \"key\" : \"headers\",\n",
+      "        \"value\" : \"sepalWidth\",\n",
+      "        \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "        \"additional\" : \"\",\n",
+      "        \"is-reference\" : false\n",
+      "      }, {\n",
+      "        \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "        \"key\" : \"headers\",\n",
+      "        \"value\" : \"petalLength\",\n",
+      "        \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "        \"additional\" : \"\",\n",
+      "        \"is-reference\" : false\n",
+      "      }, {\n",
+      "        \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "        \"key\" : \"headers\",\n",
+      "        \"value\" : \"petalWidth\",\n",
+      "        \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "        \"additional\" : \"\",\n",
+      "        \"is-reference\" : false\n",
+      "      }, {\n",
+      "        \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "        \"key\" : \"headers\",\n",
+      "        \"value\" : \"species\",\n",
+      "        \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "        \"additional\" : \"\",\n",
+      "        \"is-reference\" : false\n",
+      "      } ]\n",
+      "    },\n",
+      "    \"rowProcessor\" : {\n",
       "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
-      "      \"key\" : \"path\",\n",
-      "      \"value\" : \"file:/Users/apocock/Development/Tribuo/tutorials/bezdekIris.data\",\n",
-      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.URLProvenance\",\n",
+      "      \"key\" : \"rowProcessor\",\n",
+      "      \"value\" : \"rowprocessor-7\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl\",\n",
       "      \"additional\" : \"\",\n",
-      "      \"is-reference\" : false\n",
+      "      \"is-reference\" : true\n",
       "    },\n",
       "    \"file-modified-time\" : {\n",
       "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
@@ -867,18 +971,26 @@
       "      \"additional\" : \"\",\n",
       "      \"is-reference\" : false\n",
       "    },\n",
-      "    \"response-name\" : {\n",
+      "    \"outputRequired\" : {\n",
       "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
-      "      \"key\" : \"response-name\",\n",
-      "      \"value\" : \"species\",\n",
-      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "      \"key\" : \"outputRequired\",\n",
+      "      \"value\" : \"true\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.BooleanProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"datasource-creation-time\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"datasource-creation-time\",\n",
+      "      \"value\" : \"2021-11-01T12:52:18.814629-04:00\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.DateTimeProvenance\",\n",
       "      \"additional\" : \"\",\n",
       "      \"is-reference\" : false\n",
       "    },\n",
       "    \"outputFactory\" : {\n",
       "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
       "      \"key\" : \"outputFactory\",\n",
-      "      \"value\" : \"labelfactory-7\",\n",
+      "      \"value\" : \"labelfactory-15\",\n",
       "      \"provenance-class\" : \"org.tribuo.classification.LabelFactory$LabelFactoryProvenance\",\n",
       "      \"additional\" : \"\",\n",
       "      \"is-reference\" : true\n",
@@ -891,10 +1003,123 @@
       "      \"additional\" : \"\",\n",
       "      \"is-reference\" : false\n",
       "    },\n",
+      "    \"host-short-name\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"host-short-name\",\n",
+      "      \"value\" : \"DataSource\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"class-name\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"class-name\",\n",
+      "      \"value\" : \"org.tribuo.data.csv.CSVDataSource\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"dataPath\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"dataPath\",\n",
+      "      \"value\" : \"/Users/apocock/Development/Tribuo/tutorials/bezdekIris.data\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.FileProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    }\n",
+      "  }\n",
+      "}, {\n",
+      "  \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.ObjectMarshalledProvenance\",\n",
+      "  \"object-name\" : \"rowprocessor-7\",\n",
+      "  \"object-class-name\" : \"org.tribuo.data.columnar.RowProcessor\",\n",
+      "  \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl\",\n",
+      "  \"map\" : {\n",
+      "    \"metadataExtractors\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.ListMarshalledProvenance\",\n",
+      "      \"list\" : [ ]\n",
+      "    },\n",
+      "    \"fieldProcessorList\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.ListMarshalledProvenance\",\n",
+      "      \"list\" : [ {\n",
+      "        \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "        \"key\" : \"fieldProcessorList\",\n",
+      "        \"value\" : \"doublefieldprocessor-9\",\n",
+      "        \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl\",\n",
+      "        \"additional\" : \"\",\n",
+      "        \"is-reference\" : true\n",
+      "      }, {\n",
+      "        \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "        \"key\" : \"fieldProcessorList\",\n",
+      "        \"value\" : \"doublefieldprocessor-10\",\n",
+      "        \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl\",\n",
+      "        \"additional\" : \"\",\n",
+      "        \"is-reference\" : true\n",
+      "      }, {\n",
+      "        \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "        \"key\" : \"fieldProcessorList\",\n",
+      "        \"value\" : \"doublefieldprocessor-11\",\n",
+      "        \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl\",\n",
+      "        \"additional\" : \"\",\n",
+      "        \"is-reference\" : true\n",
+      "      }, {\n",
+      "        \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "        \"key\" : \"fieldProcessorList\",\n",
+      "        \"value\" : \"doublefieldprocessor-12\",\n",
+      "        \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl\",\n",
+      "        \"additional\" : \"\",\n",
+      "        \"is-reference\" : true\n",
+      "      } ]\n",
+      "    },\n",
+      "    \"featureProcessors\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.ListMarshalledProvenance\",\n",
+      "      \"list\" : [ ]\n",
+      "    },\n",
+      "    \"responseProcessor\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"responseProcessor\",\n",
+      "      \"value\" : \"fieldresponseprocessor-13\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : true\n",
+      "    },\n",
+      "    \"weightExtractor\" : {\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"weightExtractor\",\n",
+      "      \"value\" : \"fieldextractor-14\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.impl.NullConfiguredProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : true\n",
+      "    },\n",
+      "    \"replaceNewlinesWithSpaces\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"replaceNewlinesWithSpaces\",\n",
+      "      \"value\" : \"true\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.BooleanProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"regexMappingProcessors\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.MapMarshalledProvenance\",\n",
+      "      \"map\" : { }\n",
+      "    },\n",
+      "    \"host-short-name\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"host-short-name\",\n",
+      "      \"value\" : \"RowProcessor\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
       "    \"class-name\" : {\n",
       "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
       "      \"key\" : \"class-name\",\n",
-      "      \"value\" : \"org.tribuo.data.csv.CSVLoader\",\n",
+      "      \"value\" : \"org.tribuo.data.columnar.RowProcessor\",\n",
       "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
       "      \"additional\" : \"\",\n",
       "      \"is-reference\" : false\n",
@@ -902,7 +1127,7 @@
       "  }\n",
       "}, {\n",
       "  \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.ObjectMarshalledProvenance\",\n",
-      "  \"object-name\" : \"labelfactory-7\",\n",
+      "  \"object-name\" : \"labelfactory-15\",\n",
       "  \"object-class-name\" : \"org.tribuo.classification.LabelFactory\",\n",
       "  \"provenance-class\" : \"org.tribuo.classification.LabelFactory$LabelFactoryProvenance\",\n",
       "  \"map\" : {\n",
@@ -915,6 +1140,284 @@
       "      \"is-reference\" : false\n",
       "    }\n",
       "  }\n",
+      "}, {\n",
+      "  \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.ObjectMarshalledProvenance\",\n",
+      "  \"object-name\" : \"doublefieldprocessor-9\",\n",
+      "  \"object-class-name\" : \"org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\",\n",
+      "  \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl\",\n",
+      "  \"map\" : {\n",
+      "    \"fieldName\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"fieldName\",\n",
+      "      \"value\" : \"petalLength\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"onlyFieldName\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"onlyFieldName\",\n",
+      "      \"value\" : \"true\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.BooleanProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"throwOnInvalid\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"throwOnInvalid\",\n",
+      "      \"value\" : \"true\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.BooleanProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"host-short-name\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"host-short-name\",\n",
+      "      \"value\" : \"FieldProcessor\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"class-name\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"class-name\",\n",
+      "      \"value\" : \"org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    }\n",
+      "  }\n",
+      "}, {\n",
+      "  \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.ObjectMarshalledProvenance\",\n",
+      "  \"object-name\" : \"doublefieldprocessor-10\",\n",
+      "  \"object-class-name\" : \"org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\",\n",
+      "  \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl\",\n",
+      "  \"map\" : {\n",
+      "    \"fieldName\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"fieldName\",\n",
+      "      \"value\" : \"petalWidth\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"onlyFieldName\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"onlyFieldName\",\n",
+      "      \"value\" : \"true\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.BooleanProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"throwOnInvalid\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"throwOnInvalid\",\n",
+      "      \"value\" : \"true\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.BooleanProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"host-short-name\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"host-short-name\",\n",
+      "      \"value\" : \"FieldProcessor\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"class-name\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"class-name\",\n",
+      "      \"value\" : \"org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    }\n",
+      "  }\n",
+      "}, {\n",
+      "  \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.ObjectMarshalledProvenance\",\n",
+      "  \"object-name\" : \"doublefieldprocessor-11\",\n",
+      "  \"object-class-name\" : \"org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\",\n",
+      "  \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl\",\n",
+      "  \"map\" : {\n",
+      "    \"fieldName\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"fieldName\",\n",
+      "      \"value\" : \"sepalWidth\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"onlyFieldName\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"onlyFieldName\",\n",
+      "      \"value\" : \"true\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.BooleanProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"throwOnInvalid\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"throwOnInvalid\",\n",
+      "      \"value\" : \"true\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.BooleanProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"host-short-name\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"host-short-name\",\n",
+      "      \"value\" : \"FieldProcessor\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"class-name\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"class-name\",\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "      \"value\" : \"org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    }\n",
+      "  }\n",
+      "}, {\n",
+      "  \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.ObjectMarshalledProvenance\",\n",
+      "  \"object-name\" : \"doublefieldprocessor-12\",\n",
+      "  \"object-class-name\" : \"org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\",\n",
+      "  \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl\",\n",
+      "  \"map\" : {\n",
+      "    \"fieldName\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"fieldName\",\n",
+      "      \"value\" : \"sepalLength\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"onlyFieldName\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"onlyFieldName\",\n",
+      "      \"value\" : \"true\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.BooleanProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"throwOnInvalid\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"throwOnInvalid\",\n",
+      "      \"value\" : \"true\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.BooleanProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"host-short-name\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"host-short-name\",\n",
+      "      \"value\" : \"FieldProcessor\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"class-name\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"class-name\",\n",
+      "      \"value\" : \"org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    }\n",
+      "  }\n",
+      "}, {\n",
+      "  \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.ObjectMarshalledProvenance\",\n",
+      "  \"object-name\" : \"fieldresponseprocessor-13\",\n",
+      "  \"object-class-name\" : \"org.tribuo.data.columnar.processors.response.FieldResponseProcessor\",\n",
+      "  \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl\",\n",
+      "  \"map\" : {\n",
+      "    \"uppercase\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"uppercase\",\n",
+      "      \"value\" : \"false\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.BooleanProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"fieldNames\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.ListMarshalledProvenance\",\n",
+      "      \"list\" : [ {\n",
+      "        \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "        \"key\" : \"fieldNames\",\n",
+      "        \"value\" : \"species\",\n",
+      "        \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "        \"additional\" : \"\",\n",
+      "        \"is-reference\" : false\n",
+      "      } ]\n",
+      "    },\n",
+      "    \"defaultValues\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.ListMarshalledProvenance\",\n",
+      "      \"list\" : [ {\n",
+      "        \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "        \"key\" : \"defaultValues\",\n",
+      "        \"value\" : \"\",\n",
+      "        \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "        \"additional\" : \"\",\n",
+      "        \"is-reference\" : false\n",
+      "      } ]\n",
+      "    },\n",
+      "    \"displayField\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"displayField\",\n",
+      "      \"value\" : \"false\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.BooleanProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"outputFactory\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"outputFactory\",\n",
+      "      \"value\" : \"labelfactory-15\",\n",
+      "      \"provenance-class\" : \"org.tribuo.classification.LabelFactory$LabelFactoryProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : true\n",
+      "    },\n",
+      "    \"host-short-name\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"host-short-name\",\n",
+      "      \"value\" : \"ResponseProcessor\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    },\n",
+      "    \"class-name\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"class-name\",\n",
+      "      \"value\" : \"org.tribuo.data.columnar.processors.response.FieldResponseProcessor\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    }\n",
+      "  }\n",
+      "}, {\n",
+      "  \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.ObjectMarshalledProvenance\",\n",
+      "  \"object-name\" : \"fieldextractor-14\",\n",
+      "  \"object-class-name\" : \"org.tribuo.data.columnar.FieldExtractor\",\n",
+      "  \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.impl.NullConfiguredProvenance\",\n",
+      "  \"map\" : {\n",
+      "    \"class-name\" : {\n",
+      "      \"marshalled-class\" : \"com.oracle.labs.mlrg.olcut.provenance.io.SimpleMarshalledProvenance\",\n",
+      "      \"key\" : \"class-name\",\n",
+      "      \"value\" : \"org.tribuo.data.columnar.FieldExtractor\",\n",
+      "      \"provenance-class\" : \"com.oracle.labs.mlrg.olcut.provenance.primitives.StringProvenance\",\n",
+      "      \"additional\" : \"\",\n",
+      "      \"is-reference\" : false\n",
+      "    }\n",
+      "  }\n",
       "} ]\n"
      ]
     }
@@ -940,7 +1443,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "linear-sgd-model - Model(class-name=org.tribuo.classification.sgd.linear.LinearSGDModel,dataset=Dataset(class-name=org.tribuo.MutableDataset,datasource=SplitDataSourceProvenance(className=org.tribuo.evaluation.TrainTestSplitter,innerSourceProvenance=CSV(class-name=org.tribuo.data.csv.CSVLoader,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),response-name=species,separator=,,quote=\",path=file:/Users/apocock/Development/Tribuo/tutorials/bezdekIris.data,file-modified-time=1999-12-14T15:12:39-05:00,resource-hash=SHA-256[0FED2A99DB77EC533A62DC66894D3EC6DF3B58B6A8F3CF4A6B47E4086B7F97DC]),trainProportion=0.7,seed=1,size=150,isTrain=true),transformations=[],is-sequence=false,is-dense=true,num-examples=105,num-features=4,num-outputs=3,tribuo-version=4.1.0),trainer=Trainer(class-name=org.tribuo.classification.sgd.linear.LogisticRegressionTrainer,seed=12345,minibatchSize=1,shuffle=true,epochs=5,optimiser=StochasticGradientOptimiser(class-name=org.tribuo.math.optimisers.AdaGrad,epsilon=0.1,initialLearningRate=1.0,initialValue=0.0,host-short-name=StochasticGradientOptimiser),loggingInterval=1000,objective=LabelObjective(class-name=org.tribuo.classification.sgd.objectives.LogMulticlass,host-short-name=LabelObjective),tribuo-version=4.1.0,train-invocation-count=0,is-sequence=false,host-short-name=Trainer),trained-at=2021-05-24T12:27:10.387150-04:00,instance-values={},tribuo-version=4.1.0,java-version=17-ea,os-name=Mac OS X,os-arch=x86_64)\n"
+      "linear-sgd-model - Model(class-name=org.tribuo.classification.sgd.linear.LinearSGDModel,dataset=Dataset(class-name=org.tribuo.MutableDataset,datasource=SplitDataSourceProvenance(className=org.tribuo.evaluation.TrainTestSplitter,innerSourceProvenance=DataSource(class-name=org.tribuo.data.csv.CSVDataSource,headers=[sepalLength, sepalWidth, petalLength, petalWidth, species],rowProcessor=RowProcessor(class-name=org.tribuo.data.columnar.RowProcessor,metadataExtractors=[],fieldProcessorList=[FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=petalLength,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor), FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=petalWidth,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor), FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=sepalWidth,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor), FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=sepalLength,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor)],featureProcessors=[],responseProcessor=ResponseProcessor(class-name=org.tribuo.data.columnar.processors.response.FieldResponseProcessor,uppercase=false,fieldNames=[species],defaultValues=[],displayField=false,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),host-short-name=ResponseProcessor),weightExtractor=null,replaceNewlinesWithSpaces=true,regexMappingProcessors={},host-short-name=RowProcessor),quote=\",outputRequired=true,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),separator=,,dataPath=/Users/apocock/Development/Tribuo/tutorials/bezdekIris.data,resource-hash=SHA-256[0FED2A99DB77EC533A62DC66894D3EC6DF3B58B6A8F3CF4A6B47E4086B7F97DC],file-modified-time=1999-12-14T15:12:39-05:00,datasource-creation-time=2021-11-01T12:52:18.814629-04:00,host-short-name=DataSource),trainProportion=0.7,seed=1,size=150,isTrain=true),transformations=[],is-sequence=false,is-dense=true,num-examples=105,num-features=4,num-outputs=3,tribuo-version=4.2.0-SNAPSHOT),trainer=Trainer(class-name=org.tribuo.classification.sgd.linear.LogisticRegressionTrainer,seed=12345,minibatchSize=1,shuffle=true,epochs=5,optimiser=StochasticGradientOptimiser(class-name=org.tribuo.math.optimisers.AdaGrad,epsilon=0.1,initialLearningRate=1.0,initialValue=0.0,host-short-name=StochasticGradientOptimiser),loggingInterval=1000,objective=LabelObjective(class-name=org.tribuo.classification.sgd.objectives.LogMulticlass,host-short-name=LabelObjective),tribuo-version=4.2.0-SNAPSHOT,train-invocation-count=0,is-sequence=false,host-short-name=Trainer),trained-at=2021-11-01T12:52:19.228195-04:00,instance-values={},tribuo-version=4.2.0-SNAPSHOT,java-version=17,os-name=Mac OS X,os-arch=x86_64)\n"
      ]
     }
    ],
@@ -965,27 +1468,77 @@
      "output_type": "stream",
      "text": [
       "{\n",
-      "  \"tribuo-version\" : \"4.1.0\",\n",
+      "  \"tribuo-version\" : \"4.2.0-SNAPSHOT\",\n",
       "  \"dataset-provenance\" : {\n",
       "    \"num-features\" : \"4\",\n",
       "    \"num-examples\" : \"45\",\n",
       "    \"num-outputs\" : \"3\",\n",
-      "    \"tribuo-version\" : \"4.1.0\",\n",
+      "    \"tribuo-version\" : \"4.2.0-SNAPSHOT\",\n",
       "    \"datasource\" : {\n",
       "      \"train-proportion\" : \"0.7\",\n",
       "      \"seed\" : \"1\",\n",
       "      \"size\" : \"150\",\n",
       "      \"source\" : {\n",
       "        \"resource-hash\" : \"0FED2A99DB77EC533A62DC66894D3EC6DF3B58B6A8F3CF4A6B47E4086B7F97DC\",\n",
-      "        \"path\" : \"file:/Users/apocock/Development/Tribuo/tutorials/bezdekIris.data\",\n",
+      "        \"headers\" : [ \"sepalLength\", \"sepalWidth\", \"petalLength\", \"petalWidth\", \"species\" ],\n",
+      "        \"rowProcessor\" : {\n",
+      "          \"metadataExtractors\" : [ ],\n",
+      "          \"fieldProcessorList\" : [ {\n",
+      "            \"fieldName\" : \"petalLength\",\n",
+      "            \"onlyFieldName\" : \"true\",\n",
+      "            \"throwOnInvalid\" : \"true\",\n",
+      "            \"host-short-name\" : \"FieldProcessor\",\n",
+      "            \"class-name\" : \"org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\"\n",
+      "          }, {\n",
+      "            \"fieldName\" : \"petalWidth\",\n",
+      "            \"onlyFieldName\" : \"true\",\n",
+      "            \"throwOnInvalid\" : \"true\",\n",
+      "            \"host-short-name\" : \"FieldProcessor\",\n",
+      "            \"class-name\" : \"org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\"\n",
+      "          }, {\n",
+      "            \"fieldName\" : \"sepalWidth\",\n",
+      "            \"onlyFieldName\" : \"true\",\n",
+      "            \"throwOnInvalid\" : \"true\",\n",
+      "            \"host-short-name\" : \"FieldProcessor\",\n",
+      "            \"class-name\" : \"org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\"\n",
+      "          }, {\n",
+      "            \"fieldName\" : \"sepalLength\",\n",
+      "            \"onlyFieldName\" : \"true\",\n",
+      "            \"throwOnInvalid\" : \"true\",\n",
+      "            \"host-short-name\" : \"FieldProcessor\",\n",
+      "            \"class-name\" : \"org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\"\n",
+      "          } ],\n",
+      "          \"featureProcessors\" : [ ],\n",
+      "          \"responseProcessor\" : {\n",
+      "            \"uppercase\" : \"false\",\n",
+      "            \"fieldNames\" : [ \"species\" ],\n",
+      "            \"defaultValues\" : [ \"\" ],\n",
+      "            \"displayField\" : \"false\",\n",
+      "            \"outputFactory\" : {\n",
+      "              \"class-name\" : \"org.tribuo.classification.LabelFactory\"\n",
+      "            },\n",
+      "            \"host-short-name\" : \"ResponseProcessor\",\n",
+      "            \"class-name\" : \"org.tribuo.data.columnar.processors.response.FieldResponseProcessor\"\n",
+      "          },\n",
+      "          \"weightExtractor\" : {\n",
+      "            \"class-name\" : \"org.tribuo.data.columnar.FieldExtractor\"\n",
+      "          },\n",
+      "          \"replaceNewlinesWithSpaces\" : \"true\",\n",
+      "          \"regexMappingProcessors\" : { },\n",
+      "          \"host-short-name\" : \"RowProcessor\",\n",
+      "          \"class-name\" : \"org.tribuo.data.columnar.RowProcessor\"\n",
+      "        },\n",
       "        \"file-modified-time\" : \"1999-12-14T15:12:39-05:00\",\n",
       "        \"quote\" : \"\\\"\",\n",
-      "        \"response-name\" : \"species\",\n",
+      "        \"outputRequired\" : \"true\",\n",
+      "        \"datasource-creation-time\" : \"2021-11-01T12:52:18.814629-04:00\",\n",
       "        \"outputFactory\" : {\n",
       "          \"class-name\" : \"org.tribuo.classification.LabelFactory\"\n",
       "        },\n",
       "        \"separator\" : \",\",\n",
-      "        \"class-name\" : \"org.tribuo.data.csv.CSVLoader\"\n",
+      "        \"host-short-name\" : \"DataSource\",\n",
+      "        \"class-name\" : \"org.tribuo.data.csv.CSVDataSource\",\n",
+      "        \"dataPath\" : \"/Users/apocock/Development/Tribuo/tutorials/bezdekIris.data\"\n",
       "      },\n",
       "      \"class-name\" : \"org.tribuo.evaluation.TrainTestSplitter\",\n",
       "      \"is-train\" : \"false\"\n",
@@ -998,11 +1551,11 @@
       "  \"class-name\" : \"org.tribuo.provenance.EvaluationProvenance\",\n",
       "  \"model-provenance\" : {\n",
       "    \"instance-values\" : { },\n",
-      "    \"tribuo-version\" : \"4.1.0\",\n",
-      "    \"java-version\" : \"17-ea\",\n",
+      "    \"tribuo-version\" : \"4.2.0-SNAPSHOT\",\n",
+      "    \"java-version\" : \"17\",\n",
       "    \"trainer\" : {\n",
       "      \"seed\" : \"12345\",\n",
-      "      \"tribuo-version\" : \"4.1.0\",\n",
+      "      \"tribuo-version\" : \"4.2.0-SNAPSHOT\",\n",
       "      \"minibatchSize\" : \"1\",\n",
       "      \"train-invocation-count\" : \"0\",\n",
       "      \"is-sequence\" : \"false\",\n",
@@ -1024,28 +1577,78 @@
       "      }\n",
       "    },\n",
       "    \"os-arch\" : \"x86_64\",\n",
-      "    \"trained-at\" : \"2021-05-24T12:27:10.387150-04:00\",\n",
+      "    \"trained-at\" : \"2021-11-01T12:52:19.228195-04:00\",\n",
       "    \"os-name\" : \"Mac OS X\",\n",
       "    \"dataset\" : {\n",
       "      \"num-features\" : \"4\",\n",
       "      \"num-examples\" : \"105\",\n",
       "      \"num-outputs\" : \"3\",\n",
-      "      \"tribuo-version\" : \"4.1.0\",\n",
+      "      \"tribuo-version\" : \"4.2.0-SNAPSHOT\",\n",
       "      \"datasource\" : {\n",
       "        \"train-proportion\" : \"0.7\",\n",
       "        \"seed\" : \"1\",\n",
       "        \"size\" : \"150\",\n",
       "        \"source\" : {\n",
       "          \"resource-hash\" : \"0FED2A99DB77EC533A62DC66894D3EC6DF3B58B6A8F3CF4A6B47E4086B7F97DC\",\n",
-      "          \"path\" : \"file:/Users/apocock/Development/Tribuo/tutorials/bezdekIris.data\",\n",
+      "          \"headers\" : [ \"sepalLength\", \"sepalWidth\", \"petalLength\", \"petalWidth\", \"species\" ],\n",
+      "          \"rowProcessor\" : {\n",
+      "            \"metadataExtractors\" : [ ],\n",
+      "            \"fieldProcessorList\" : [ {\n",
+      "              \"fieldName\" : \"petalLength\",\n",
+      "              \"onlyFieldName\" : \"true\",\n",
+      "              \"throwOnInvalid\" : \"true\",\n",
+      "              \"host-short-name\" : \"FieldProcessor\",\n",
+      "              \"class-name\" : \"org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\"\n",
+      "            }, {\n",
+      "              \"fieldName\" : \"petalWidth\",\n",
+      "              \"onlyFieldName\" : \"true\",\n",
+      "              \"throwOnInvalid\" : \"true\",\n",
+      "              \"host-short-name\" : \"FieldProcessor\",\n",
+      "              \"class-name\" : \"org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\"\n",
+      "            }, {\n",
+      "              \"fieldName\" : \"sepalWidth\",\n",
+      "              \"onlyFieldName\" : \"true\",\n",
+      "              \"throwOnInvalid\" : \"true\",\n",
+      "              \"host-short-name\" : \"FieldProcessor\",\n",
+      "              \"class-name\" : \"org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\"\n",
+      "            }, {\n",
+      "              \"fieldName\" : \"sepalLength\",\n",
+      "              \"onlyFieldName\" : \"true\",\n",
+      "              \"throwOnInvalid\" : \"true\",\n",
+      "              \"host-short-name\" : \"FieldProcessor\",\n",
+      "              \"class-name\" : \"org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\"\n",
+      "            } ],\n",
+      "            \"featureProcessors\" : [ ],\n",
+      "            \"responseProcessor\" : {\n",
+      "              \"uppercase\" : \"false\",\n",
+      "              \"fieldNames\" : [ \"species\" ],\n",
+      "              \"defaultValues\" : [ \"\" ],\n",
+      "              \"displayField\" : \"false\",\n",
+      "              \"outputFactory\" : {\n",
+      "                \"class-name\" : \"org.tribuo.classification.LabelFactory\"\n",
+      "              },\n",
+      "              \"host-short-name\" : \"ResponseProcessor\",\n",
+      "              \"class-name\" : \"org.tribuo.data.columnar.processors.response.FieldResponseProcessor\"\n",
+      "            },\n",
+      "            \"weightExtractor\" : {\n",
+      "              \"class-name\" : \"org.tribuo.data.columnar.FieldExtractor\"\n",
+      "            },\n",
+      "            \"replaceNewlinesWithSpaces\" : \"true\",\n",
+      "            \"regexMappingProcessors\" : { },\n",
+      "            \"host-short-name\" : \"RowProcessor\",\n",
+      "            \"class-name\" : \"org.tribuo.data.columnar.RowProcessor\"\n",
+      "          },\n",
       "          \"file-modified-time\" : \"1999-12-14T15:12:39-05:00\",\n",
       "          \"quote\" : \"\\\"\",\n",
-      "          \"response-name\" : \"species\",\n",
+      "          \"outputRequired\" : \"true\",\n",
+      "          \"datasource-creation-time\" : \"2021-11-01T12:52:18.814629-04:00\",\n",
       "          \"outputFactory\" : {\n",
       "            \"class-name\" : \"org.tribuo.classification.LabelFactory\"\n",
       "          },\n",
       "          \"separator\" : \",\",\n",
-      "          \"class-name\" : \"org.tribuo.data.csv.CSVLoader\"\n",
+      "          \"host-short-name\" : \"DataSource\",\n",
+      "          \"class-name\" : \"org.tribuo.data.csv.CSVDataSource\",\n",
+      "          \"dataPath\" : \"/Users/apocock/Development/Tribuo/tutorials/bezdekIris.data\"\n",
       "        },\n",
       "        \"class-name\" : \"org.tribuo.evaluation.TrainTestSplitter\",\n",
       "        \"is-train\" : \"true\"\n",
@@ -1080,7 +1683,7 @@
    "metadata": {},
    "source": [
     "## Loading and saving models\n",
-    "Tribuo uses Java Serialization to save and load models. Models and Datasets are `java.io.Serializable` and can be written to input and output streams in the usual manner. Here we'll go through saving and loading the model we just trained, but the procedure is the same for all other Tribuo models.\n",
+    "Tribuo uses Java Serialization to save and load models. Models and Datasets are `java.io.Serializable` and can be written to input and output streams in the usual manner. Here we'll go through saving and loading the model we just trained, but the procedure is the same for all other Tribuo models. We're going to save this out into the tutorials directory as this model file is used in the reproducibility tutorial.\n",
     "\n",
     "First we save the model out using an `ObjectOutputStream`."
    ]
@@ -1091,7 +1694,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "File tmpFile = File.createTempFile(\"irisModel\",\"ser\");\n",
+    "File tmpFile = new File(\"iris-lr-model.ser\");\n",
     "try (ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(tmpFile))) {\n",
     "    oos.writeObject(irisModel);\n",
     "}"
@@ -1112,7 +1715,7 @@
    },
    "outputs": [],
    "source": [
-    "String filterPattern = Files.readAllLines(Paths.get(\"../docs/jep-290-allowlist.txt\")).get(0);\n",
+    "String filterPattern = Files.readAllLines(Paths.get(\"../docs/jep-290-filter.txt\")).get(0);\n",
     "ObjectInputFilter filter = ObjectInputFilter.Config.createFilter(filterPattern);\n",
     "Model<?> loadedModel;\n",
     "try (ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(new FileInputStream(tmpFile)))) {\n",
@@ -1125,7 +1728,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "As Tribuo's models are generically typed, and Java's generics are erased, this requires an unchecked cast to apply the right type to the model. Tribuo has a mechanism for validating that the type is correct, `model.validate(Class<? extends Output<?>>)` which returns true if the supplied class is the same as the internal output type stored in this model."
+    "As Tribuo's models are generically typed, and Java's generics are erased, this requires an unchecked cast to apply the right type to the model. Tribuo has a mechanism for validating that the type is correct, `model.validate(Class<? extends Output<?>>)` which returns true if the supplied class is the same as the internal output type stored in this model. There's also `model.castModel(Class<U extends Output<U>>)` which wraps up the validate check and either casts the model appropriately or throws `ClassCastException` if the type is invalid."
    ]
   },
   {
@@ -1199,7 +1802,7 @@
    "mimetype": "text/x-java-source",
    "name": "Java",
    "pygments_lexer": "java",
-   "version": "17-ea+22-1964"
+   "version": "17+35-LTS-2724"
   }
  },
  "nbformat": 4,
diff --git a/tutorials/onnx-export-tribuo-v4.ipynb b/tutorials/onnx-export-tribuo-v4.ipynb
new file mode 100644
index 000000000..77e25c14b
--- /dev/null
+++ b/tutorials/onnx-export-tribuo-v4.ipynb
@@ -0,0 +1,952 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Model export and deployment tutorial\n",
+    "\n",
+    "Tribuo works best as a library which provides training and deployment inside the JVM where the application is running, however sometimes you need to deploy models elsewhere, either in another programming environment like Python, or in a cloud service. To support these use cases many of Tribuo's models can be exported as [ONNX](https://onnx.ai) models, a cross-platform model exchange format. ONNX is widely supported across industry, for edge devices, hardware accelerators, and cloud services. Tribuo also supports loading in ONNX models and scoring them as native Tribuo models, for more information on that see the external models tutorial.\n",
+    "\n",
+    "This tutorial will show how to export models in ONNX format, how to recover the provenance information from Tribuo-exported ONNX models, and how to deploy an ONNX model in [OCI Data Science](https://www.oracle.com/data-science/cloud-infrastructure-data-science.html) though of course other cloud providers support ONNX models too. We'll show how to export a factorization machine, create an ensemble of a factorization machine along with some other models, export the ensemble, then we'll discuss how to interact with the provenance of an exported model, before concluding with deploying that model to OCI.\n",
+    "\n",
+    "## Setup\n",
+    "\n",
+    "This tutorial requires ONNX Runtime to score the exported models, so by default will only run on x86\\_64 platforms. ONNX Runtime can be compiled on ARM64 platforms, but that binary is not in the Maven Central jar Tribuo depends on, so will need to be compiled from scratch to run the tutorial on ARM.\n",
+    "\n",
+    "We're going to use MNIST as the example dataset for this tutorial, so you'll need to download it if you haven't already.\n",
+    "\n",
+    "First the training set:\n",
+    "\n",
+    "`wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz`\n",
+    "\n",
+    "`wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz`\n",
+    "\n",
+    "Then the test set:\n",
+    "\n",
+    "`wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz`\n",
+    "\n",
+    "`wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz`\n",
+    "\n",
+    "As usual we'll load in some jars for classification problems, along with Tribuo's ONNX Runtime and OCI interfaces."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%jars ./tribuo-classification-experiments-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n",
+    "%jars ./tribuo-oci-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n",
+    "%jars ./tribuo-onnx-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n",
+    "%jars ./tribuo-json-4.2.0-SNAPSHOT-jar-with-dependencies.jar"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import java.nio.file.Files;\n",
+    "import java.nio.file.Paths;\n",
+    "\n",
+    "import org.tribuo.*;\n",
+    "import org.tribuo.classification.*;\n",
+    "import org.tribuo.classification.ensemble.*;\n",
+    "import org.tribuo.classification.evaluation.*;\n",
+    "import org.tribuo.classification.sgd.fm.FMClassificationTrainer;\n",
+    "import org.tribuo.classification.sgd.linear.*;\n",
+    "import org.tribuo.classification.sgd.objectives.LogMulticlass;\n",
+    "import org.tribuo.ensemble.*;\n",
+    "import org.tribuo.data.csv.CSVLoader;\n",
+    "import org.tribuo.datasource.IDXDataSource;\n",
+    "import org.tribuo.evaluation.TrainTestSplitter;\n",
+    "import org.tribuo.interop.onnx.*;\n",
+    "import org.tribuo.math.optimisers.*;\n",
+    "import org.tribuo.interop.oci.*;\n",
+    "import org.tribuo.util.onnx.*;\n",
+    "import org.tribuo.util.Util;\n",
+    "import com.oracle.bmc.ConfigFileReader;\n",
+    "import com.oracle.bmc.auth.ConfigFileAuthenticationDetailsProvider;\n",
+    "import com.oracle.bmc.datascience.DataScienceClient;\n",
+    "import com.oracle.labs.mlrg.olcut.provenance.ProvenanceUtil;\n",
+    "import com.oracle.labs.mlrg.olcut.util.Pair;\n",
+    "\n",
+    "import ai.onnxruntime.*;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Then we'll load in MNIST and Wine Quality."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MNIST train size = 60000, number of features = 717, number of classes = 10\n",
+      "MNIST test size = 10000, number of features = 668, number of classes = 10\n"
+     ]
+    }
+   ],
+   "source": [
+    "var labelFactory = new LabelFactory();\n",
+    "var labelEvaluator = new LabelEvaluator();\n",
+    "var mnistTrainSource = new IDXDataSource<>(Paths.get(\"train-images-idx3-ubyte.gz\"),Paths.get(\"train-labels-idx1-ubyte.gz\"),labelFactory);\n",
+    "var mnistTestSource = new IDXDataSource<>(Paths.get(\"t10k-images-idx3-ubyte.gz\"),Paths.get(\"t10k-labels-idx1-ubyte.gz\"),labelFactory);\n",
+    "var mnistTrain = new MutableDataset<>(mnistTrainSource);\n",
+    "var mnistTest = new MutableDataset<>(mnistTestSource);\n",
+    "System.out.println(String.format(\"MNIST train size = %d, number of features = %d, number of classes = %d\",mnistTrain.size(),mnistTrain.getFeatureMap().size(),mnistTrain.getOutputInfo().size()));\n",
+    "System.out.println(String.format(\"MNIST test size = %d, number of features = %d, number of classes = %d\",mnistTest.size(),mnistTest.getFeatureMap().size(),mnistTest.getOutputInfo().size()));"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Exporting a single classification model\n",
+    "\n",
+    "We're going to train a multi-class [Factorization Machine](https://ieeexplore.ieee.org/document/5694074), which is a non-linear model that approximates all the non-linear feature interactions with a small per-feature embedding vector. It's similar to a logistic regression with an additional feature-feature interaction term, one per output label. In Tribuo Factorization Machines can be trained using stochastic gradient descent, using the standard SGD algorithms Tribuo uses for other models. We're going to use AdaGrad as it's usually a good baseline."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "var fmLabelTrainer = new FMClassificationTrainer(new LogMulticlass(),  // Loss function\n",
+    "                                                 new AdaGrad(0.1,0.1), // Gradient optimiser\n",
+    "                                                 5,                    // Number of training epochs\n",
+    "                                                 30000,                // Logging interval\n",
+    "                                                 Trainer.DEFAULT_SEED, // RNG seed\n",
+    "                                                 6,                    // Factor size\n",
+    "                                                 0.1                   // Factor initialisation variance\n",
+    "                                                 );"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "After defining the model we train it as usual. Factorization machines take a little longer to train than logistic regression does, but not excessively so."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training factorization machine took (00:00:18:816)\n"
+     ]
+    }
+   ],
+   "source": [
+    "var fmStartTime = System.currentTimeMillis();\n",
+    "var fmMNIST = fmLabelTrainer.train(mnistTrain);\n",
+    "var fmEndTime = System.currentTimeMillis();\n",
+    "System.out.println(\"Training factorization machine took \" + Util.formatDuration(fmStartTime,fmEndTime));"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And then evaluate it using Tribuo's built in evaluation system."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Scoring factorization machine took (00:00:00:475)\n",
+      "Class                           n          tp          fn          fp      recall        prec          f1\n",
+      "0                             980         959          21          31       0.979       0.969       0.974\n",
+      "1                           1,135       1,120          15          22       0.987       0.981       0.984\n",
+      "2                           1,032         976          56          57       0.946       0.945       0.945\n",
+      "3                           1,010         952          58          39       0.943       0.961       0.952\n",
+      "4                             982         952          30          49       0.969       0.951       0.960\n",
+      "5                             892         857          35          63       0.961       0.932       0.946\n",
+      "6                             958         920          38          30       0.960       0.968       0.964\n",
+      "7                           1,028         969          59          36       0.943       0.964       0.953\n",
+      "8                             974         916          58          57       0.940       0.941       0.941\n",
+      "9                           1,009         951          58          44       0.943       0.956       0.949\n",
+      "Total                      10,000       9,572         428         428\n",
+      "Accuracy                                                                    0.957\n",
+      "Micro Average                                                               0.957       0.957       0.957\n",
+      "Macro Average                                                               0.957       0.957       0.957\n",
+      "Balanced Error Rate                                                         0.043\n",
+      "               0       1       2       3       4       5       6       7       8       9\n",
+      "0            959       0       0       0       1       2       7       4       4       3\n",
+      "1              0   1,120       4       1       3       0       3       0       4       0\n",
+      "2              6       5     976       7       7       2       5       8      14       2\n",
+      "3              0       2      15     952       0      19       1       3      14       4\n",
+      "4              3       3       7       1     952       0       4       1       1      10\n",
+      "5              3       1       0       6       1     857       5       5      13       1\n",
+      "6              8       2       7       2       7      11     920       1       0       0\n",
+      "7              2       5      13       5       4       4       0     969       4      22\n",
+      "8              2       1       9       9      11      15       4       5     916       2\n",
+      "9              7       3       2       8      15      10       1       9       3     951\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "fmStartTime = System.currentTimeMillis();\n",
+    "var mnistFMEval = labelEvaluator.evaluate(fmMNIST,mnistTest);\n",
+    "fmEndTime = System.currentTimeMillis();\n",
+    "System.out.println(\"Scoring factorization machine took \" + Util.formatDuration(fmStartTime,fmEndTime));\n",
+    "System.out.println(mnistFMEval.toString());\n",
+    "System.out.println(mnistFMEval.getConfusionMatrix().toString());"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We get about 95% accuracy on MNIST, which is pretty good for a fairly simple model. Now let's export it to ONNX, then  we'll load it back in via Tribuo's ONNX Runtime interface and compare the performance. We'll use this model in the reproducibility tutorial so we'll save it to disk in the tutorials folder.\n",
+    "\n",
+    "Tribuo `Model`s which support ONNX export implement the `ONNXExportable` interface which defines methods for constructing an ONNX protobuf and saving it to disk."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "var fmMNISTPath = Paths.get(\".\",\"fm-mnist.onnx\");\n",
+    "fmMNIST.saveONNXModel(\"org.tribuo.tutorials.onnxexport.fm\", // namespace for the model\n",
+    "                      0,                                    // model version number\n",
+    "                      fmMNISTPath                           // path to save the model\n",
+    "                      );"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To load an ONNX model we need to define the mapping between Tribuo's feature names and the indices that the ONNX model understands. Fortunately for models exported from Tribuo we already have that information, as it is stored in the feature and output maps. We'll extract it into the general form that `ONNXExternalModel` expects."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Map<String, Integer> mnistFeatureMap = new HashMap<>();\n",
+    "for (VariableInfo f : fmMNIST.getFeatureIDMap()){\n",
+    "    VariableIDInfo id = (VariableIDInfo) f;\n",
+    "    mnistFeatureMap.put(id.getName(),id.getID());\n",
+    "}\n",
+    "Map<Label, Integer> mnistOutputMap = new HashMap<>();\n",
+    "for (Pair<Integer,Label> l : fmMNIST.getOutputIDInfo()) {\n",
+    "    mnistOutputMap.put(l.getB(), l.getA());\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we'll define a test function that compares two sets of predictions, as ONNX Runtime uses single precision for computations, and Tribuo uses double precision so the prediction scores are never bitwise equal."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "public boolean checkPredictions(List<Prediction<Label>> nativePredictions, List<Prediction<Label>> onnxPredictions, double delta) {\n",
+    "    for (int i = 0; i < nativePredictions.size(); i++) {\n",
+    "        Prediction<Label> tribuo = nativePredictions.get(i);\n",
+    "        Prediction<Label> external = onnxPredictions.get(i);\n",
+    "        // Check the predicted label\n",
+    "        if (!tribuo.getOutput().getLabel().equals(external.getOutput().getLabel())) {\n",
+    "            System.out.println(\"At index \" + i + \" predictions are not equal - \"\n",
+    "                    + tribuo.getOutput().getLabel() + \" and \"\n",
+    "                    + external.getOutput().getLabel());\n",
+    "            return false;\n",
+    "        }\n",
+    "        // Check the maximum score\n",
+    "        if (Math.abs(tribuo.getOutput().getScore() - external.getOutput().getScore()) > delta) {\n",
+    "            System.out.println(\"At index \" + i + \" predictions are not equal - \"\n",
+    "                    + tribuo.getOutput() + \" and \"\n",
+    "                    + external.getOutput());\n",
+    "            return false;\n",
+    "        }\n",
+    "        // Check the score distribution\n",
+    "        for (Map.Entry<String, Label> l : tribuo.getOutputScores().entrySet()) {\n",
+    "            Label other = external.getOutputScores().get(l.getKey());\n",
+    "            if (other == null) {\n",
+    "                System.out.println(\"At index \" + i + \" failed to find label \" + l.getKey() + \" in ORT prediction.\");\n",
+    "                return false;\n",
+    "            } else {\n",
+    "                if (Math.abs(l.getValue().getScore() - other.getScore()) > delta) {\n",
+    "                    System.out.println(\"At index \" + i + \" predictions are not equal - \"\n",
+    "                            + tribuo.getOutputScores() + \" and \"\n",
+    "                            + external.getOutputScores());\n",
+    "                    return false;\n",
+    "                }\n",
+    "            }\n",
+    "        }\n",
+    "    }\n",
+    "    return true;\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Then we'll construct the `ONNXExternalModel` loading our freshly created ONNX model using the feature and output mappings we built earlier. First we create a `SessionOptions` which controls the model inference. By default it uses a single thread on one CPU, but by setting values in the options object before building the external model we can make it run on multiple threads, use GPUs or other accelerator hardware supported by ONNX Runtime."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "var ortEnv = OrtEnvironment.getEnvironment();\n",
+    "var sessionOpts = new OrtSession.SessionOptions();\n",
+    "var denseTransformer = new DenseTransformer();\n",
+    "var labelTransformer = new LabelTransformer();\n",
+    "ONNXExternalModel<Label> onnxFM = ONNXExternalModel.createOnnxModel(labelFactory, mnistFeatureMap, mnistOutputMap,\n",
+    "                    denseTransformer, labelTransformer, sessionOpts, fmMNISTPath, \"input\");"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "An `ONNXExternalModel` is a Tribuo model so we can use the same evaluation infrastructure."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Scoring ONNX factorization machine took (00:00:01:578)\n",
+      "Class                           n          tp          fn          fp      recall        prec          f1\n",
+      "0                             980         959          21          31       0.979       0.969       0.974\n",
+      "1                           1,135       1,120          15          22       0.987       0.981       0.984\n",
+      "2                           1,032         976          56          57       0.946       0.945       0.945\n",
+      "3                           1,010         952          58          39       0.943       0.961       0.952\n",
+      "4                             982         952          30          49       0.969       0.951       0.960\n",
+      "5                             892         857          35          63       0.961       0.932       0.946\n",
+      "6                             958         920          38          30       0.960       0.968       0.964\n",
+      "7                           1,028         969          59          36       0.943       0.964       0.953\n",
+      "8                             974         916          58          57       0.940       0.941       0.941\n",
+      "9                           1,009         951          58          44       0.943       0.956       0.949\n",
+      "Total                      10,000       9,572         428         428\n",
+      "Accuracy                                                                    0.957\n",
+      "Micro Average                                                               0.957       0.957       0.957\n",
+      "Macro Average                                                               0.957       0.957       0.957\n",
+      "Balanced Error Rate                                                         0.043\n",
+      "               0       1       2       3       4       5       6       7       8       9\n",
+      "0            959       0       0       0       1       2       7       4       4       3\n",
+      "1              0   1,120       4       1       3       0       3       0       4       0\n",
+      "2              6       5     976       7       7       2       5       8      14       2\n",
+      "3              0       2      15     952       0      19       1       3      14       4\n",
+      "4              3       3       7       1     952       0       4       1       1      10\n",
+      "5              3       1       0       6       1     857       5       5      13       1\n",
+      "6              8       2       7       2       7      11     920       1       0       0\n",
+      "7              2       5      13       5       4       4       0     969       4      22\n",
+      "8              2       1       9       9      11      15       4       5     916       2\n",
+      "9              7       3       2       8      15      10       1       9       3     951\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "var onnxStartTime = System.currentTimeMillis();\n",
+    "var mnistONNXEval = labelEvaluator.evaluate(onnxFM,mnistTest);\n",
+    "var onnxEndTime = System.currentTimeMillis();\n",
+    "System.out.println(\"Scoring ONNX factorization machine took \" + Util.formatDuration(onnxStartTime,onnxEndTime));\n",
+    "System.out.println(mnistONNXEval.toString());\n",
+    "System.out.println(mnistONNXEval.getConfusionMatrix().toString());"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The two models evaluate the same, but they could be producing slightly different probability values, so let's check it using our more precise comparsion function. `checkPrediction` will log any divergence it finds, as well as returning true or false if the predictions differ. We're going to use a delta of 1e-5, and consider differences below that threshold to be irrelevant."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predictions are equal - true\n"
+     ]
+    }
+   ],
+   "source": [
+    "System.out.println(\"Predictions are equal - \" + \n",
+    "                    checkPredictions(mnistFMEval.getPredictions(), mnistONNXEval.getPredictions(), 1e-5));"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "An important part of a Tribuo model is the provenance. We don't want to lose that information when exporting models to ONNX format, so we encode the provenance in the ONNX protobuf. It uses the marshalled provenance format from OLCUT, and the protos are available in OLCUT so they could be parsed in other systems. As a result when loading in a Tribuo-exported ONNX model the `ONNXExternalModel` class has two provenance objects, one for the `ONNXExternalModel` itself, and one for the original Model object.\n",
+    "\n",
+    "Let's examine both of these provenances. First the one for the `ONNXExternalModel`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ONNXExternalModel provenance:\n",
+      "ONNXExternalModel(\n",
+      "\tclass-name = org.tribuo.interop.onnx.ONNXExternalModel\n",
+      "\tdataset = Dataset(\n",
+      "\t\t\tclass-name = org.tribuo.Dataset\n",
+      "\t\t\tdatasource = DataSource(\n",
+      "\t\t\t\t\tdescription = unknown-external-data\n",
+      "\t\t\t\t\toutputFactory = LabelFactory(\n",
+      "\t\t\t\t\t\t\tclass-name = org.tribuo.classification.LabelFactory\n",
+      "\t\t\t\t\t\t)\n",
+      "\t\t\t\t\tdatasource-creation-time = 2021-12-14T15:03:45.571121-05:00\n",
+      "\t\t\t\t)\n",
+      "\t\t\ttransformations = List[]\n",
+      "\t\t\tis-sequence = false\n",
+      "\t\t\tis-dense = false\n",
+      "\t\t\tnum-examples = -1\n",
+      "\t\t\tnum-features = 717\n",
+      "\t\t\tnum-outputs = 10\n",
+      "\t\t\ttribuo-version = 4.2.0-SNAPSHOT\n",
+      "\t\t)\n",
+      "\ttrainer = Trainer(\n",
+      "\t\t\tclass-name = org.tribuo.Trainer\n",
+      "\t\t\tfileModifiedTime = 2021-12-14T15:03:44.423-05:00\n",
+      "\t\t\tmodelHash = 5934A79EA0B7A569DF2A42F08BE2DBED1C3E7D25A90C33D811D77502AEEFA431\n",
+      "\t\t\tlocation = file:/Users/apocock/Development/Tribuo/tutorials/./fm-mnist.onnx\n",
+      "\t\t)\n",
+      "\ttrained-at = 2021-12-14T15:03:45.568284-05:00\n",
+      "\tinstance-values = Map{\n",
+      "\t\tmodel-domain=org.tribuo.tutorials.onnxexport.fm\n",
+      "\t\tmodel-graphname=FMClassificationModel\n",
+      "\t\tmodel-description=factorization-machine-model - Model(class-name=org.tribuo.classification.sgd.fm.FMClassificationModel,dataset=Dataset(class-name=org.tribuo.MutableDataset,datasource=DataSource(class-name=org.tribuo.datasource.IDXDataSource,outputPath=/Users/apocock/Development/Tribuo/tutorials/train-labels-idx1-ubyte.gz,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),featuresPath=/Users/apocock/Development/Tribuo/tutorials/train-images-idx3-ubyte.gz,features-file-modified-time=2000-07-21T14:20:24-04:00,output-resource-hash=SHA-256[3552534A0A558BBED6AED32B30C495CCA23D567EC52CAC8BE1A0730E8010255C],datasource-creation-time=2021-12-14T15:03:23.159717-05:00,output-file-modified-time=2000-07-21T14:20:27-04:00,idx-feature-type=UBYTE,features-resource-hash=SHA-256[440FCABF73CC546FA21475E81EA370265605F56BE210A4024D2CA8F203523609],host-short-name=DataSource),transformations=[],is-sequence=false,is-dense=false,num-examples=60000,num-features=717,num-outputs=10,tribuo-version=4.2.0-SNAPSHOT),trainer=Trainer(class-name=org.tribuo.classification.sgd.fm.FMClassificationTrainer,seed=12345,variance=0.1,minibatchSize=1,factorizedDimSize=6,shuffle=true,epochs=5,optimiser=StochasticGradientOptimiser(class-name=org.tribuo.math.optimisers.AdaGrad,epsilon=0.1,initialLearningRate=0.1,initialValue=0.0,host-short-name=StochasticGradientOptimiser),loggingInterval=30000,objective=LabelObjective(class-name=org.tribuo.classification.sgd.objectives.LogMulticlass,host-short-name=LabelObjective),tribuo-version=4.2.0-SNAPSHOT,train-invocation-count=0,is-sequence=false,host-short-name=Trainer),trained-at=2021-12-14T15:03:43.488204-05:00,instance-values={},tribuo-version=4.2.0-SNAPSHOT,java-version=11.0.10,os-name=Mac OS X,os-arch=x86_64)\n",
+      "\t\tmodel-producer=Tribuo\n",
+      "\t\tmodel-version=0\n",
+      "\t\tinput-name=input\n",
+      "\t}\n",
+      "\ttribuo-version = 4.2.0-SNAPSHOT\n",
+      "\tjava-version = 11.0.10\n",
+      "\tos-name = Mac OS X\n",
+      "\tos-arch = x86_64\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "System.out.println(\"ONNXExternalModel provenance:\\n\" + ProvenanceUtil.formattedProvenanceString(onnxFM.getProvenance()));"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This has the location the ONNX file was loaded from, a hash of the file, and timestamps for both the ONNX file and the model object wrapping it.\n",
+    "\n",
+    "Now let's look at the original Model provenance:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ONNX file provenance:\n",
+      "FMClassificationModel(\n",
+      "\tclass-name = org.tribuo.classification.sgd.fm.FMClassificationModel\n",
+      "\tdataset = MutableDataset(\n",
+      "\t\t\tclass-name = org.tribuo.MutableDataset\n",
+      "\t\t\tdatasource = IDXDataSource(\n",
+      "\t\t\t\t\tclass-name = org.tribuo.datasource.IDXDataSource\n",
+      "\t\t\t\t\toutputFactory = LabelFactory(\n",
+      "\t\t\t\t\t\t\tclass-name = org.tribuo.classification.LabelFactory\n",
+      "\t\t\t\t\t\t)\n",
+      "\t\t\t\t\toutputPath = /Users/apocock/Development/Tribuo/tutorials/train-labels-idx1-ubyte.gz\n",
+      "\t\t\t\t\tfeaturesPath = /Users/apocock/Development/Tribuo/tutorials/train-images-idx3-ubyte.gz\n",
+      "\t\t\t\t\tfeatures-file-modified-time = 2000-07-21T14:20:24-04:00\n",
+      "\t\t\t\t\toutput-resource-hash = 3552534A0A558BBED6AED32B30C495CCA23D567EC52CAC8BE1A0730E8010255C\n",
+      "\t\t\t\t\tdatasource-creation-time = 2021-12-14T15:03:23.159717-05:00\n",
+      "\t\t\t\t\toutput-file-modified-time = 2000-07-21T14:20:27-04:00\n",
+      "\t\t\t\t\tidx-feature-type = UBYTE\n",
+      "\t\t\t\t\tfeatures-resource-hash = 440FCABF73CC546FA21475E81EA370265605F56BE210A4024D2CA8F203523609\n",
+      "\t\t\t\t\thost-short-name = DataSource\n",
+      "\t\t\t\t)\n",
+      "\t\t\ttransformations = List[]\n",
+      "\t\t\tis-sequence = false\n",
+      "\t\t\tis-dense = false\n",
+      "\t\t\tnum-examples = 60000\n",
+      "\t\t\tnum-features = 717\n",
+      "\t\t\tnum-outputs = 10\n",
+      "\t\t\ttribuo-version = 4.2.0-SNAPSHOT\n",
+      "\t\t)\n",
+      "\ttrainer = FMClassificationTrainer(\n",
+      "\t\t\tclass-name = org.tribuo.classification.sgd.fm.FMClassificationTrainer\n",
+      "\t\t\tseed = 12345\n",
+      "\t\t\tvariance = 0.1\n",
+      "\t\t\tminibatchSize = 1\n",
+      "\t\t\tfactorizedDimSize = 6\n",
+      "\t\t\tshuffle = true\n",
+      "\t\t\tepochs = 5\n",
+      "\t\t\toptimiser = AdaGrad(\n",
+      "\t\t\t\t\tclass-name = org.tribuo.math.optimisers.AdaGrad\n",
+      "\t\t\t\t\tepsilon = 0.1\n",
+      "\t\t\t\t\tinitialLearningRate = 0.1\n",
+      "\t\t\t\t\tinitialValue = 0.0\n",
+      "\t\t\t\t\thost-short-name = StochasticGradientOptimiser\n",
+      "\t\t\t\t)\n",
+      "\t\t\tloggingInterval = 30000\n",
+      "\t\t\tobjective = LogMulticlass(\n",
+      "\t\t\t\t\tclass-name = org.tribuo.classification.sgd.objectives.LogMulticlass\n",
+      "\t\t\t\t\thost-short-name = LabelObjective\n",
+      "\t\t\t\t)\n",
+      "\t\t\ttribuo-version = 4.2.0-SNAPSHOT\n",
+      "\t\t\ttrain-invocation-count = 0\n",
+      "\t\t\tis-sequence = false\n",
+      "\t\t\thost-short-name = Trainer\n",
+      "\t\t)\n",
+      "\ttrained-at = 2021-12-14T15:03:43.488204-05:00\n",
+      "\tinstance-values = Map{}\n",
+      "\ttribuo-version = 4.2.0-SNAPSHOT\n",
+      "\tjava-version = 11.0.10\n",
+      "\tos-name = Mac OS X\n",
+      "\tos-arch = x86_64\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "System.out.println(\"ONNX file provenance:\\n\" + ProvenanceUtil.formattedProvenanceString(onnxFM.getTribuoProvenance().get()));"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can also check that the provenance extracted from the ONNX file is the same as the provenance in the original model object."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Provenances are equal\n"
+     ]
+    }
+   ],
+   "source": [
+    "var equality = fmMNIST.getProvenance().equals(onnxFM.getTribuoProvenance().get()) ? \"equal\" : \"not equal\";\n",
+    "System.out.println(\"Provenances are \" + equality);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Exporting an ensemble\n",
+    "\n",
+    "Tribuo allows the creation of arbitrary ensembles, and these are usually powerful models which are useful to deploy. So we're going to make a 3 element voting ensemble out of our factorization machine along with two other models and export that to ONNX as well. The other models are a logistic regression and a smaller factorization machine, but we could use any classification model supported by Tribuo, including another ensemble. As this is a small ensemble of similar models our goal is to demonstrate the functionality rather than improve performance on MNIST too much."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "var lrTrainer = new LogisticRegressionTrainer();\n",
+    "var smallFMTrainer = new FMClassificationTrainer(new LogMulticlass(),  // Loss function\n",
+    "                                                 new AdaGrad(0.1,0.1), // Gradient optimiser\n",
+    "                                                 2,                    // Number of training epochs\n",
+    "                                                 30000,                // Logging interval\n",
+    "                                                 42L,                  // RNG seed\n",
+    "                                                 3,                    // Factor size\n",
+    "                                                 0.1                   // Factor initialisation variance\n",
+    "                                                 );\n",
+    "var lrModel = lrTrainer.train(mnistTrain);\n",
+    "var smallFMModel = smallFMTrainer.train(mnistTrain);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Tribuo's `WeightedEnsembleModel` class allows the creation of arbitrary ensembles with or without voting weights. We're going to create an unweighted ensemble of our three models using the standard `VotingCombiner` which takes a majority vote between the three classes, with ties broken by the first label."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "var ensemble = WeightedEnsembleModel.createEnsembleFromExistingModels(\"ensemble\", // Model name\n",
+    "                                           List.of(fmMNIST,lrModel,smallFMModel), // Ensemble members\n",
+    "                                           new VotingCombiner());                 // Combination operator"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Scoring ensemble took (00:00:00:880)\n",
+      "Class                           n          tp          fn          fp      recall        prec          f1\n",
+      "0                             980         965          15          43       0.985       0.957       0.971\n",
+      "1                           1,135       1,119          16          34       0.986       0.971       0.978\n",
+      "2                           1,032         979          53          86       0.949       0.919       0.934\n",
+      "3                           1,010         926          84          38       0.917       0.961       0.938\n",
+      "4                             982         937          45          49       0.954       0.950       0.952\n",
+      "5                             892         837          55          49       0.938       0.945       0.942\n",
+      "6                             958         922          36          32       0.962       0.966       0.964\n",
+      "7                           1,028         978          50          52       0.951       0.950       0.950\n",
+      "8                             974         918          56          98       0.943       0.904       0.923\n",
+      "9                           1,009         917          92          21       0.909       0.978       0.942\n",
+      "Total                      10,000       9,498         502         502\n",
+      "Accuracy                                                                    0.950\n",
+      "Micro Average                                                               0.950       0.950       0.950\n",
+      "Macro Average                                                               0.949       0.950       0.949\n",
+      "Balanced Error Rate                                                         0.051\n",
+      "               0       1       2       3       4       5       6       7       8       9\n",
+      "0            965       0       0       1       0       2       7       3       2       0\n",
+      "1              0   1,119       5       0       0       0       5       1       5       0\n",
+      "2              7       5     979       4       5       1       3       7      20       1\n",
+      "3              3       3      29     926       1      14       0       8      25       1\n",
+      "4              3       2      11       1     937       0       3       1      11      13\n",
+      "5              8       1       2       9       3     837      10       5      17       0\n",
+      "6              8       2       5       3       2      14     922       0       2       0\n",
+      "7              2       9      21       3       6       1       0     978       2       6\n",
+      "8              5       4      10       7      10       9       2       9     918       0\n",
+      "9              7       8       3      10      22       8       2      18      14     917\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "var ensembleStartTime = System.currentTimeMillis();\n",
+    "var ensembleEval = labelEvaluator.evaluate(ensemble,mnistTest);\n",
+    "var ensembleEndTime = System.currentTimeMillis();\n",
+    "System.out.println(\"Scoring ensemble took \" + Util.formatDuration(ensembleStartTime,ensembleEndTime));\n",
+    "System.out.println(ensembleEval.toString());\n",
+    "System.out.println(ensembleEval.getConfusionMatrix().toString());"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As before, we use the `saveONNXModel` method on the `ONNXExportable` interface to write out the model. Note if one of the ensemble members isn't `ONNXExportable` then you'll get a runtime exception out of this call."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "var ensemblePath = Paths.get(\".\",\"ensemble-mnist.onnx\");\n",
+    "ensemble.saveONNXModel(\"org.tribuo.tutorials.onnxexport.ensemble\", // namespace for the model\n",
+    "                      0,                                           // model version number\n",
+    "                      ensemblePath                                 // path to save the model\n",
+    "                      );"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can load this model into `ONNXExternalModel` as well:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Scoring ONNX ensemble took (00:00:01:901)\n",
+      "Predictions are equal - true\n"
+     ]
+    }
+   ],
+   "source": [
+    "var onnxEnsemble = ONNXExternalModel.createOnnxModel(labelFactory, mnistFeatureMap, mnistOutputMap,\n",
+    "                    denseTransformer, labelTransformer, sessionOpts, ensemblePath, \"input\");\n",
+    "onnxStartTime = System.currentTimeMillis();\n",
+    "var mnistONNXEnsembleEval = labelEvaluator.evaluate(onnxEnsemble,mnistTest);\n",
+    "onnxEndTime = System.currentTimeMillis();\n",
+    "System.out.println(\"Scoring ONNX ensemble took \" + Util.formatDuration(onnxStartTime,onnxEndTime));\n",
+    "System.out.println(\"Predictions are equal - \" + \n",
+    "                    checkPredictions(ensembleEval.getPredictions(), mnistONNXEnsembleEval.getPredictions(), 1e-5));"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Deploying the model\n",
+    "\n",
+    "This portion of the tutorial describes how to deploy the ONNX model on OCI Data Science, using their model deployment service. ONNX models can also be deployed in many other machine learning cloud services, or via a functions-as-a-service offering using something like ONNX Runtime. ONNX models can also be deployed using [Oracle Machine Learning Services](https://blogs.oracle.com/machinelearning/post/introducing-oracle-machine-learning-services), or in many other environments, including other cloud providers.\n",
+    "\n",
+    "Tribuo's OCI Data Science support comes in two parts, a set of static methods for deploying models on the cloud, and the `OCIModel` class which wraps a model endpoint and allows using it as a normal Tribuo model. Underneath the covers we're going to use an OCI DS conda environment which contains ONNX Runtime in Python, and use that to make predictions from our model trained in Java.\n",
+    "\n",
+    "To run this part of the tutorial you'll need to have configured your access to OCI Data Science (if you've not done this before then you can see a tutorial on how to do that [here](https://github.com/oracle/oci-data-science-ai-samples/blob/master/labs/MLSummit21/lab-0-tenancy-setup.md)), setup authentication to allow [CLI access to OCI](https://docs.oracle.com/en-us/iaas/Content/API/Concepts/sdkconfig.htm) and you'll need the compartment & project ids for the OCI Data Science project you want to deploy into."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "// Set these variables appropriately for your OCI account\n",
+    "var compartmentID = \"your-oci-compartment-id\";\n",
+    "var projectID = \"your-oci-ds-project-id\";\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we'll instantiate the DS client, and build the config object which captures all the information about the model we're uploading. The models are run inside a [conda environment](https://docs.oracle.com/en-us/iaas/data-science/using/conda_understand_environments.htm), and you need to select one which contains ONNX Runtime 1.6.0 or newer (as Tribuo emits ONNX models using Opset 13, which is supported in ONNX Runtime 1.6+). This can either be a custom one you've created, or one provided by OCI Data Science."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "// Instantiate the client\n",
+    "var provider = new ConfigFileAuthenticationDetailsProvider(ConfigFileReader.parseDefault());\n",
+    "var dsClient = new DataScienceClient(provider);\n",
+    "\n",
+    "// Instantiate an ObjectMapper for parsing the REST calls\n",
+    "var objMapper = OCIUtil.createObjectMapper();\n",
+    "\n",
+    "// Select the conda environment\n",
+    "var condaName = \"dataexpl_p37_cpu_v3\"; // Also referred to as the \"slug\" in the OCI DS docs\n",
+    "var condaPath = \"oci://service-conda-packs@id19sfcrra6z/service_pack/cpu/Data Exploration and Manipulation for CPU Python 3.7/3.0/dataexpl_p37_cpu_v3\";\n",
+    "\n",
+    "// Instantiate the model configuration\n",
+    "var dsConfig = new OCIUtil.OCIDSConfig(compartmentID,projectID);\n",
+    "var modelConfig = new OCIUtil.OCIModelArtifactConfig(dsConfig,          // Data Science config\n",
+    "                                             \"tribuo-tutorial-model\",   // Model name\n",
+    "                                             \"A factorization machine\", // Model description\n",
+    "                                             \"org.tribuo.tutorial.test\",// ONNX model domain\n",
+    "                                             0,                         // ONNX model version\n",
+    "                                             condaName,                 // Conda environment name\n",
+    "                                             condaPath);                // Conda environment path on object storage"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can now upload the model into OCI Data Science. The `createModel` method has an overload that accepts an ONNX file on disk, or you can pass in any model which implements `ONNXExportable`. Tribuo takes care of setting the model metadata according to the information it can extract from the `Model` object, and it automatically generates the necessary python script and yaml file which control the model's environment in the deployment. Note models are distinct from model deployments, so a single model artifact can be deployed multiple times with different endpoints, VM sizes and scaling parameters. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "var modelID = OCIUtil.createModel(fmMNIST,dsClient,objMapper,modelConfig);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `modelID` is the reference for the model artifact stored in Oracle Cloud, and we'll need this to create a deployment wrapping the model.\n",
+    "\n",
+    "To specify the model deployment configuration there's a `OCIModelDeploymentConfig` wrapper class, it contains the model ID, the model deployment name, the VM shape, maximum number of VM instances to create, and the bandwidth available for that model. At time of writing OCI DS supports the `VM.Standard2` shapes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "var deployConfig = new OCIUtil.OCIModelDeploymentConfig(dsConfig,modelID,\"tribuo-tutorial-deployment\",\"VM.Standard2.1\",10,1);\n",
+    "\n",
+    "var deployURL = OCIUtil.deploy(deployConfig,dsClient,objMapper);\n",
+    "System.out.println(deployURL);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Model deployments take a few minutes, so you'll need to wait a while if you've been following along with the tutorial. The deployment progress can be checked on the OCI console for the data science project you are using.\n",
+    "\n",
+    "Once the deployment has finished, we can wrap it in an `OCIModel` and then check it's the same as the factorization machine we deployed. An `OCIModel` is a subclass of `ExternalModel` in the same way that externally trained ONNX models are, so we need to supply the mapping between Tribuo's feature domain & the feature indices expected by the model, the output domain mapping, and a `OCIOutputConverter` instance which can convert the prediction matrix into Tribuo's `Prediction` objects. As we've deployed a factorization machine for MNIST, we'll use `OCILabelConverter`, and the mappings are the same as the ones we used for the ONNX model earlier."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "var ociLabelConverter = new OCILabelConverter(true);\n",
+    "var ociModel = OCIModel.createOCIModel(labelFactory,mnistFeatureMap, mnistOutputMap, \n",
+    "                                       Paths.get(\"~/.oci/config\"), // OCI authentication config\n",
+    "                                       deployURL,                  // Model endpoint URL\n",
+    "                                       ociLabelConverter);         // Output converter"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As `OCIModel` is a Tribuo model we can evaluate it using our standard tools."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Scoring OCI model took (00:01:06:960)\n",
+      "Class                           n          tp          fn          fp      recall        prec          f1\n",
+      "0                             980         959          21          31       0.979       0.969       0.974\n",
+      "1                           1,135       1,120          15          22       0.987       0.981       0.984\n",
+      "2                           1,032         976          56          57       0.946       0.945       0.945\n",
+      "3                           1,010         952          58          39       0.943       0.961       0.952\n",
+      "4                             982         952          30          49       0.969       0.951       0.960\n",
+      "5                             892         857          35          63       0.961       0.932       0.946\n",
+      "6                             958         920          38          30       0.960       0.968       0.964\n",
+      "7                           1,028         969          59          36       0.943       0.964       0.953\n",
+      "8                             974         916          58          57       0.940       0.941       0.941\n",
+      "9                           1,009         951          58          44       0.943       0.956       0.949\n",
+      "Total                      10,000       9,572         428         428\n",
+      "Accuracy                                                                    0.957\n",
+      "Micro Average                                                               0.957       0.957       0.957\n",
+      "Macro Average                                                               0.957       0.957       0.957\n",
+      "Balanced Error Rate                                                         0.043\n",
+      "               0       1       2       3       4       5       6       7       8       9\n",
+      "0            959       0       0       0       1       2       7       4       4       3\n",
+      "1              0   1,120       4       1       3       0       3       0       4       0\n",
+      "2              6       5     976       7       7       2       5       8      14       2\n",
+      "3              0       2      15     952       0      19       1       3      14       4\n",
+      "4              3       3       7       1     952       0       4       1       1      10\n",
+      "5              3       1       0       6       1     857       5       5      13       1\n",
+      "6              8       2       7       2       7      11     920       1       0       0\n",
+      "7              2       5      13       5       4       4       0     969       4      22\n",
+      "8              2       1       9       9      11      15       4       5     916       2\n",
+      "9              7       3       2       8      15      10       1       9       3     951\n",
+      "\n",
+      "Predictions are equal - true\n"
+     ]
+    }
+   ],
+   "source": [
+    "var ociStartTime = System.currentTimeMillis();\n",
+    "var ociEval = labelEvaluator.evaluate(ociModel,mnistTest);\n",
+    "var ociEndTime = System.currentTimeMillis();\n",
+    "System.out.println(\"Scoring OCI model took \" + Util.formatDuration(ociStartTime,ociEndTime));\n",
+    "System.out.println(ociEval.toString());\n",
+    "System.out.println(ociEval.getConfusionMatrix().toString());\n",
+    "\n",
+    "System.out.println(\"Predictions are equal - \" + \n",
+    "                    checkPredictions(ociEval.getPredictions(), mnistFMEval.getPredictions(), 1e-5));"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can see that the model performs identically to the Tribuo version, though it takes a little longer as each call to predict incurs some network latency.\n",
+    "\n",
+    "## Conclusion\n",
+    "\n",
+    "We've looked at exporting models out of Tribuo in ONNX format, where they can be used in different languages, runtimes and deployed in cloud environments like OCI Data Science. Over time we plan to expand Tribuo's support for ONNX export to cover more models. Tribuo's ONNX support is a separate module from the rest of Tribuo and could be used to build ONNX models in other packages on the JVM. If you're interested in expanding the support for ONNX in Java, you can open a [Github issue](https://github.com/oracle/tribuo/issues) for Tribuo, or you can talk to the ONNX community in their [Slack workspace](https://onnx.ai/slack.html)."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Java",
+   "language": "java",
+   "name": "java"
+  },
+  "language_info": {
+   "codemirror_mode": "java",
+   "file_extension": ".jshell",
+   "mimetype": "text/x-java-source",
+   "name": "Java",
+   "pygments_lexer": "java",
+   "version": "11.0.10+8-LTS-162"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tutorials/reproducibility-tribuo-v4.ipynb b/tutorials/reproducibility-tribuo-v4.ipynb
new file mode 100644
index 000000000..a5c6a8565
--- /dev/null
+++ b/tutorials/reproducibility-tribuo-v4.ipynb
@@ -0,0 +1,803 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Reproducibility Tutorial\n",
+    "\n",
+    "Reproducibility of ML models and evaluations is frequently a problem across many ML systems. It's usually two problems, the first is a description of the computation that was executed, and the second is a method of replaying that computation. In Tribuo we built our provenance system to make our models *self-describing* by which we mean they capture a complete description of the computation that produced them, solving the first issue. In v4.2 we added an automated reproducibility system which consumes the provenance data and retrains the model. As well as the reproducibility system we also added a mechanism for diffing provenance objects allowing easy comparison between the reproduced and original models. This is because the models are only guaranteed to be identical if the data is the same, and any differences in the data will show up in the data provenance object.\n",
+    "\n",
+    "## Setup\n",
+    "\n",
+    "Before running this tutorial, please run the irises classification and ONNX export tutorial to build the two models that we're going to reproduce.\n",
+    "\n",
+    "We're going to load in the classification jar, onnx jar, and the reproducibility jar. Note the reproducibility jar is written in Java 16, and so this tutorial requires Java 16 or later. Then we'll import the necessary classes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%jars ./tribuo-classification-experiments-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n",
+    "%jars ./tribuo-onnx-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n",
+    "%jars ./tribuo-json-4.2.0-SNAPSHOT-jar-with-dependencies.jar\n",
+    "%jars ./tribuo-reproducibility-4.2.0-SNAPSHOT.jar"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import org.tribuo.*;\n",
+    "import org.tribuo.classification.*;\n",
+    "import org.tribuo.classification.evaluation.*;\n",
+    "import org.tribuo.classification.sgd.fm.*;\n",
+    "import org.tribuo.classification.sgd.linear.*;\n",
+    "import org.tribuo.datasource.*;\n",
+    "import org.tribuo.interop.onnx.*;\n",
+    "import org.tribuo.reproducibility.*;\n",
+    "import com.oracle.labs.mlrg.olcut.provenance.*;\n",
+    "import com.oracle.labs.mlrg.olcut.util.*;\n",
+    "import ai.onnxruntime.*;\n",
+    "\n",
+    "import java.nio.file.*;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Reproducing a Tribuo Model\n",
+    "\n",
+    "The reproducibility system works on Tribuo `Model` or `ModelProvenance` objects. When using the `ModelProvenance` the system loads in the original training data, processes and transforms it according to the columnar processing and transforms applied, then rebuilds the original trainer including it's RNG state, before passing the data into the train method and returning the reproduced model. When using the `Model` object, it performs the same steps as for a `ModelProvenance` and then compares the feature and output domains to provide more information about any differences between the feature and output domains used by the model. Over time we plan to expand the validation applied to the reproduced model to show if the features have different ranges or histograms.\n",
+    "\n",
+    "We're going to load in the Irises logistic regression model trained in the first tutorial."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "linear-sgd-model - Model(class-name=org.tribuo.classification.sgd.linear.LinearSGDModel,dataset=Dataset(class-name=org.tribuo.MutableDataset,datasource=SplitDataSourceProvenance(className=org.tribuo.evaluation.TrainTestSplitter,innerSourceProvenance=DataSource(class-name=org.tribuo.data.csv.CSVDataSource,headers=[sepalLength, sepalWidth, petalLength, petalWidth, species],rowProcessor=RowProcessor(class-name=org.tribuo.data.columnar.RowProcessor,metadataExtractors=[],fieldProcessorList=[FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=petalLength,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor), FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=petalWidth,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor), FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=sepalWidth,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor), FieldProcessor(class-name=org.tribuo.data.columnar.processors.field.DoubleFieldProcessor,fieldName=sepalLength,onlyFieldName=true,throwOnInvalid=true,host-short-name=FieldProcessor)],featureProcessors=[],responseProcessor=ResponseProcessor(class-name=org.tribuo.data.columnar.processors.response.FieldResponseProcessor,uppercase=false,fieldNames=[species],defaultValues=[],displayField=false,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),host-short-name=ResponseProcessor),weightExtractor=null,replaceNewlinesWithSpaces=true,regexMappingProcessors={},host-short-name=RowProcessor),quote=\",outputRequired=true,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),separator=,,dataPath=/Users/apocock/Development/Tribuo/tutorials/bezdekIris.data,resource-hash=SHA-256[0FED2A99DB77EC533A62DC66894D3EC6DF3B58B6A8F3CF4A6B47E4086B7F97DC],file-modified-time=1999-12-14T15:12:39-05:00,datasource-creation-time=2021-11-01T12:52:18.814629-04:00,host-short-name=DataSource),trainProportion=0.7,seed=1,size=150,isTrain=true),transformations=[],is-sequence=false,is-dense=true,num-examples=105,num-features=4,num-outputs=3,tribuo-version=4.2.0-SNAPSHOT),trainer=Trainer(class-name=org.tribuo.classification.sgd.linear.LogisticRegressionTrainer,seed=12345,minibatchSize=1,shuffle=true,epochs=5,optimiser=StochasticGradientOptimiser(class-name=org.tribuo.math.optimisers.AdaGrad,epsilon=0.1,initialLearningRate=1.0,initialValue=0.0,host-short-name=StochasticGradientOptimiser),loggingInterval=1000,objective=LabelObjective(class-name=org.tribuo.classification.sgd.objectives.LogMulticlass,host-short-name=LabelObjective),tribuo-version=4.2.0-SNAPSHOT,train-invocation-count=0,is-sequence=false,host-short-name=Trainer),trained-at=2021-11-01T12:52:19.228195-04:00,instance-values={},tribuo-version=4.2.0-SNAPSHOT,java-version=17,os-name=Mac OS X,os-arch=x86_64)\n"
+     ]
+    }
+   ],
+   "source": [
+    "File irisModelFile = new File(\"iris-lr-model.ser\");\n",
+    "String filterPattern = Files.readAllLines(Paths.get(\"../docs/jep-290-filter.txt\")).get(0);\n",
+    "ObjectInputFilter filter = ObjectInputFilter.Config.createFilter(filterPattern);\n",
+    "LinearSGDModel loadedModel;\n",
+    "try (ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(new FileInputStream(irisModelFile)))) {\n",
+    "    ois.setObjectInputFilter(filter);\n",
+    "    loadedModel = (LinearSGDModel) ois.readObject();\n",
+    "}\n",
+    "\n",
+    "System.out.println(loadedModel.toString());"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The reproducibility system lives in the `ReproUtil` class. This class is constructed with a `Model` or a `ModelProvenance` and `Class<T extends Output<T>>` for the output class."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "var repro = new ReproUtil<>(loadedModel);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we can separately rebuild the dataset and the trainer, though note if you mutate the objects returned by these methods then you won't get the exact same model back from the reproduction. We're still working on the API for the reproducibility system and expect to make this API more robust over time."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MutableDataset(\n",
+      "\tclass-name = org.tribuo.MutableDataset\n",
+      "\tdatasource = TrainTestSplitter(\n",
+      "\t\t\tclass-name = org.tribuo.evaluation.TrainTestSplitter\n",
+      "\t\t\tsource = CSVDataSource(\n",
+      "\t\t\t\t\tclass-name = org.tribuo.data.csv.CSVDataSource\n",
+      "\t\t\t\t\theaders = List[\n",
+      "\t\t\t\t\t\tsepalLength\n",
+      "\t\t\t\t\t\tsepalWidth\n",
+      "\t\t\t\t\t\tpetalLength\n",
+      "\t\t\t\t\t\tpetalWidth\n",
+      "\t\t\t\t\t\tspecies\n",
+      "\t\t\t\t\t]\n",
+      "\t\t\t\t\trowProcessor = RowProcessor(\n",
+      "\t\t\t\t\t\t\tclass-name = org.tribuo.data.columnar.RowProcessor\n",
+      "\t\t\t\t\t\t\tmetadataExtractors = List[]\n",
+      "\t\t\t\t\t\t\tfieldProcessorList = List[\n",
+      "\t\t\t\t\t\t\t\tDoubleFieldProcessor(\n",
+      "\t\t\t\t\t\t\t\t\t\t\tclass-name = org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\n",
+      "\t\t\t\t\t\t\t\t\t\t\tfieldName = petalLength\n",
+      "\t\t\t\t\t\t\t\t\t\t\tonlyFieldName = true\n",
+      "\t\t\t\t\t\t\t\t\t\t\tthrowOnInvalid = true\n",
+      "\t\t\t\t\t\t\t\t\t\t\thost-short-name = FieldProcessor\n",
+      "\t\t\t\t\t\t\t\t\t\t)\n",
+      "\t\t\t\t\t\t\t\tDoubleFieldProcessor(\n",
+      "\t\t\t\t\t\t\t\t\t\t\tclass-name = org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\n",
+      "\t\t\t\t\t\t\t\t\t\t\tfieldName = petalWidth\n",
+      "\t\t\t\t\t\t\t\t\t\t\tonlyFieldName = true\n",
+      "\t\t\t\t\t\t\t\t\t\t\tthrowOnInvalid = true\n",
+      "\t\t\t\t\t\t\t\t\t\t\thost-short-name = FieldProcessor\n",
+      "\t\t\t\t\t\t\t\t\t\t)\n",
+      "\t\t\t\t\t\t\t\tDoubleFieldProcessor(\n",
+      "\t\t\t\t\t\t\t\t\t\t\tclass-name = org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\n",
+      "\t\t\t\t\t\t\t\t\t\t\tfieldName = sepalWidth\n",
+      "\t\t\t\t\t\t\t\t\t\t\tonlyFieldName = true\n",
+      "\t\t\t\t\t\t\t\t\t\t\tthrowOnInvalid = true\n",
+      "\t\t\t\t\t\t\t\t\t\t\thost-short-name = FieldProcessor\n",
+      "\t\t\t\t\t\t\t\t\t\t)\n",
+      "\t\t\t\t\t\t\t\tDoubleFieldProcessor(\n",
+      "\t\t\t\t\t\t\t\t\t\t\tclass-name = org.tribuo.data.columnar.processors.field.DoubleFieldProcessor\n",
+      "\t\t\t\t\t\t\t\t\t\t\tfieldName = sepalLength\n",
+      "\t\t\t\t\t\t\t\t\t\t\tonlyFieldName = true\n",
+      "\t\t\t\t\t\t\t\t\t\t\tthrowOnInvalid = true\n",
+      "\t\t\t\t\t\t\t\t\t\t\thost-short-name = FieldProcessor\n",
+      "\t\t\t\t\t\t\t\t\t\t)\n",
+      "\t\t\t\t\t\t\t]\n",
+      "\t\t\t\t\t\t\tfeatureProcessors = List[]\n",
+      "\t\t\t\t\t\t\tresponseProcessor = FieldResponseProcessor(\n",
+      "\t\t\t\t\t\t\t\t\tclass-name = org.tribuo.data.columnar.processors.response.FieldResponseProcessor\n",
+      "\t\t\t\t\t\t\t\t\tuppercase = false\n",
+      "\t\t\t\t\t\t\t\t\tfieldNames = List[\n",
+      "\t\t\t\t\t\t\t\t\t\tspecies\n",
+      "\t\t\t\t\t\t\t\t\t]\n",
+      "\t\t\t\t\t\t\t\t\tdefaultValues = List[\n",
+      "\t\t\t\t\t\t\t\t\t\t\n",
+      "\t\t\t\t\t\t\t\t\t]\n",
+      "\t\t\t\t\t\t\t\t\tdisplayField = false\n",
+      "\t\t\t\t\t\t\t\t\toutputFactory = LabelFactory(\n",
+      "\t\t\t\t\t\t\t\t\t\t\tclass-name = org.tribuo.classification.LabelFactory\n",
+      "\t\t\t\t\t\t\t\t\t\t)\n",
+      "\t\t\t\t\t\t\t\t\thost-short-name = ResponseProcessor\n",
+      "\t\t\t\t\t\t\t\t)\n",
+      "\t\t\t\t\t\t\tweightExtractor = FieldExtractor(\n",
+      "\t\t\t\t\t\t\t\t\tclass-name = org.tribuo.data.columnar.FieldExtractor\n",
+      "\t\t\t\t\t\t\t\t)\n",
+      "\t\t\t\t\t\t\treplaceNewlinesWithSpaces = true\n",
+      "\t\t\t\t\t\t\tregexMappingProcessors = Map{}\n",
+      "\t\t\t\t\t\t\thost-short-name = RowProcessor\n",
+      "\t\t\t\t\t\t)\n",
+      "\t\t\t\t\tquote = \"\n",
+      "\t\t\t\t\toutputRequired = true\n",
+      "\t\t\t\t\toutputFactory = LabelFactory(\n",
+      "\t\t\t\t\t\t\tclass-name = org.tribuo.classification.LabelFactory\n",
+      "\t\t\t\t\t\t)\n",
+      "\t\t\t\t\tseparator = ,\n",
+      "\t\t\t\t\tdataPath = /Users/apocock/Development/Tribuo/tutorials/bezdekIris.data\n",
+      "\t\t\t\t\tresource-hash = 0FED2A99DB77EC533A62DC66894D3EC6DF3B58B6A8F3CF4A6B47E4086B7F97DC\n",
+      "\t\t\t\t\tfile-modified-time = 1999-12-14T15:12:39-05:00\n",
+      "\t\t\t\t\tdatasource-creation-time = 2021-11-03T09:51:39.561821-04:00\n",
+      "\t\t\t\t\thost-short-name = DataSource\n",
+      "\t\t\t\t)\n",
+      "\t\t\ttrain-proportion = 0.7\n",
+      "\t\t\tseed = 1\n",
+      "\t\t\tsize = 150\n",
+      "\t\t\tis-train = true\n",
+      "\t\t)\n",
+      "\ttransformations = List[]\n",
+      "\tis-sequence = false\n",
+      "\tis-dense = true\n",
+      "\tnum-examples = 105\n",
+      "\tnum-features = 4\n",
+      "\tnum-outputs = 3\n",
+      "\ttribuo-version = 4.2.0-SNAPSHOT\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "var dataset = repro.recoverDataset();\n",
+    "\n",
+    "System.out.println(ProvenanceUtil.formattedProvenanceString(dataset.getProvenance()));"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Our irises dataset was loaded in using the `CSVLoader` and split with a 70/30 train test split, and we can see that the reproduced training dataset has been split just as we expect."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "LogisticRegressionTrainer(\n",
+      "\tclass-name = org.tribuo.classification.sgd.linear.LogisticRegressionTrainer\n",
+      "\tseed = 12345\n",
+      "\tminibatchSize = 1\n",
+      "\tshuffle = true\n",
+      "\tepochs = 5\n",
+      "\toptimiser = AdaGrad(\n",
+      "\t\t\tclass-name = org.tribuo.math.optimisers.AdaGrad\n",
+      "\t\t\tepsilon = 0.1\n",
+      "\t\t\tinitialLearningRate = 1.0\n",
+      "\t\t\tinitialValue = 0.0\n",
+      "\t\t\thost-short-name = StochasticGradientOptimiser\n",
+      "\t\t)\n",
+      "\tloggingInterval = 1000\n",
+      "\tobjective = LogMulticlass(\n",
+      "\t\t\tclass-name = org.tribuo.classification.sgd.objectives.LogMulticlass\n",
+      "\t\t\thost-short-name = LabelObjective\n",
+      "\t\t)\n",
+      "\ttribuo-version = 4.2.0-SNAPSHOT\n",
+      "\ttrain-invocation-count = 0\n",
+      "\tis-sequence = false\n",
+      "\thost-short-name = Trainer\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "var trainer = repro.recoverTrainer();\n",
+    "System.out.println(ProvenanceUtil.formattedProvenanceString(trainer.getProvenance()));"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The irises model is a logistic regression, using seed `12345` and it's the first model trained by that trainer (as `train-invocation-count` is zero)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "var reproduction = repro.reproduceFromModel();\n",
+    "var reproducedModel = (LinearSGDModel) reproduction.model();"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can compare this provenance to the one in the original model using our diff tool, however as Tribuo records construction timestamps they will not be identical."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\n",
+      "  \"dataset\" : {\n",
+      "    \"datasource\" : {\n",
+      "      \"source\" : {\n",
+      "        \"datasource-creation-time\" : {\n",
+      "          \"original\" : \"2021-11-01T12:52:18.814629-04:00\",\n",
+      "          \"reproduced\" : \"2021-11-03T09:51:39.561821-04:00\"\n",
+      "        }\n",
+      "      }\n",
+      "    }\n",
+      "  },\n",
+      "  \"trained-at\" : {\n",
+      "    \"original\" : \"2021-11-01T12:52:19.228195-04:00\",\n",
+      "    \"reproduced\" : \"2021-11-03T09:51:39.842601-04:00\"\n",
+      "  }\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "System.out.println(ReproUtil.diffProvenance(loadedModel.getProvenance(),reproducedModel.getProvenance()));"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can see that the timestamps are a little different, though the precise difference will depend on when you ran the irises tutorial. You may also see differences in the JVM or other machine provenance if you ran that tutorial on a different machine. If the irises dataset grows a new feature or additional rows in the same file, then the diff will show that the datasets have different numbers of features or samples, and that the file has a different hash.\n",
+    "\n",
+    "For some models we can easily compare the model contents, e.g., for the logistic regression we can directly compare the model weights."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Weights are equal = true\n"
+     ]
+    }
+   ],
+   "source": [
+    "var originalWeights = loadedModel.getWeightsCopy();\n",
+    "var reproducedWeights = reproducedModel.getWeightsCopy();\n",
+    "\n",
+    "System.out.println(\"Weights are equal = \" + originalWeights.equals(reproducedWeights));"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Reproducing an ONNX exported Tribuo Model\n",
+    "\n",
+    "Tribuo models can be exported into the [ONNX](https://onnx.ai) format. When Tribuo models are exported the model provenance is stored as a metadata field in the ONNX file. This doesn't affect anything which serves the ONNX model, but allows Tribuo to load the provenance back in if the model is loaded in as an `ONNXExternalModel` which is Tribuo's class for loading in ONNX models.\n",
+    "\n",
+    "To load a model in as an `ONNXExternalModel` we need to define the feature and label mappings which should be written out separately when the ONNX model is exported. We're going to cheat slightly and get them from the MNIST training set itself."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "var labelFactory = new LabelFactory();\n",
+    "var mnistTrainSource = new IDXDataSource<>(Paths.get(\"train-images-idx3-ubyte.gz\"),Paths.get(\"train-labels-idx1-ubyte.gz\"),labelFactory);\n",
+    "var mnistTestSource = new IDXDataSource<>(Paths.get(\"t10k-images-idx3-ubyte.gz\"),Paths.get(\"t10k-labels-idx1-ubyte.gz\"),labelFactory);\n",
+    "var mnistTrain = new MutableDataset<>(mnistTrainSource);\n",
+    "var mnistTest = new MutableDataset<>(mnistTestSource);\n",
+    "\n",
+    "Map<String, Integer> mnistFeatureMap = new HashMap<>();\n",
+    "for (VariableInfo f : mnistTrain.getFeatureIDMap()){\n",
+    "    VariableIDInfo id = (VariableIDInfo) f;\n",
+    "    mnistFeatureMap.put(id.getName(),id.getID());\n",
+    "}\n",
+    "Map<Label, Integer> mnistOutputMap = new HashMap<>();\n",
+    "for (Pair<Integer,Label> l : mnistTrain.getOutputIDInfo()) {\n",
+    "    mnistOutputMap.put(l.getB(), l.getA());\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now let's load in the ONNX file:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "var ortEnv = OrtEnvironment.getEnvironment();\n",
+    "var sessionOpts = new OrtSession.SessionOptions();\n",
+    "var denseTransformer = new DenseTransformer();\n",
+    "var labelTransformer = new LabelTransformer();\n",
+    "var mnistModelPath = Paths.get(\".\",\"fm-mnist.onnx\");\n",
+    "ONNXExternalModel<Label> onnx = ONNXExternalModel.createOnnxModel(labelFactory, mnistFeatureMap, mnistOutputMap,\n",
+    "                    denseTransformer, labelTransformer, sessionOpts, mnistModelPath, \"input\");"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This model has two provenance objects, one from the creation of the `ONNXExternalModel`, and one from the original training run in Tribuo which is persisted inside the ONNX file."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ONNXExternalModel(\n",
+      "\tclass-name = org.tribuo.interop.onnx.ONNXExternalModel\n",
+      "\tdataset = Dataset(\n",
+      "\t\t\tclass-name = org.tribuo.Dataset\n",
+      "\t\t\tdatasource = DataSource(\n",
+      "\t\t\t\t\tdescription = unknown-external-data\n",
+      "\t\t\t\t\toutputFactory = LabelFactory(\n",
+      "\t\t\t\t\t\t\tclass-name = org.tribuo.classification.LabelFactory\n",
+      "\t\t\t\t\t\t)\n",
+      "\t\t\t\t\tdatasource-creation-time = 2021-11-03T09:51:50.151668-04:00\n",
+      "\t\t\t\t)\n",
+      "\t\t\ttransformations = List[]\n",
+      "\t\t\tis-sequence = false\n",
+      "\t\t\tis-dense = false\n",
+      "\t\t\tnum-examples = -1\n",
+      "\t\t\tnum-features = 717\n",
+      "\t\t\tnum-outputs = 10\n",
+      "\t\t\ttribuo-version = 4.2.0-SNAPSHOT\n",
+      "\t\t)\n",
+      "\ttrainer = Trainer(\n",
+      "\t\t\tclass-name = org.tribuo.Trainer\n",
+      "\t\t\tfileModifiedTime = 2021-10-26T17:51:36.243-04:00\n",
+      "\t\t\tmodelHash = 8DD82B31BD7CFC1C520942590E173AED07AF33C97C32021EE94738FA9FF4CC89\n",
+      "\t\t\tlocation = file:/Users/apocock/Development/Tribuo/tutorials/./fm-mnist.onnx\n",
+      "\t\t)\n",
+      "\ttrained-at = 2021-11-03T09:51:50.149558-04:00\n",
+      "\tinstance-values = Map{\n",
+      "\t\tmodel-domain=org.tribuo.tutorials.onnxexport.fm\n",
+      "\t\tmodel-graphname=FMClassificationModel\n",
+      "\t\tmodel-description=factorization-machine-model - Model(class-name=org.tribuo.classification.sgd.fm.FMClassificationModel,dataset=Dataset(class-name=org.tribuo.MutableDataset,datasource=DataSource(class-name=org.tribuo.datasource.IDXDataSource,outputPath=/Users/apocock/Development/Tribuo/tutorials/train-labels-idx1-ubyte.gz,outputFactory=OutputFactory(class-name=org.tribuo.classification.LabelFactory),featuresPath=/Users/apocock/Development/Tribuo/tutorials/train-images-idx3-ubyte.gz,features-file-modified-time=2000-07-21T14:20:24-04:00,output-resource-hash=SHA-256[3552534A0A558BBED6AED32B30C495CCA23D567EC52CAC8BE1A0730E8010255C],datasource-creation-time=2021-10-26T17:51:22.314557-04:00,output-file-modified-time=2000-07-21T14:20:27-04:00,idx-feature-type=UBYTE,features-resource-hash=SHA-256[440FCABF73CC546FA21475E81EA370265605F56BE210A4024D2CA8F203523609],host-short-name=DataSource),transformations=[],is-sequence=false,is-dense=false,num-examples=60000,num-features=717,num-outputs=10,tribuo-version=4.2.0-SNAPSHOT),trainer=Trainer(class-name=org.tribuo.classification.sgd.fm.FMClassificationTrainer,seed=12345,variance=0.1,minibatchSize=1,factorizedDimSize=6,shuffle=true,epochs=5,optimiser=StochasticGradientOptimiser(class-name=org.tribuo.math.optimisers.AdaGrad,epsilon=0.1,initialLearningRate=0.1,initialValue=0.0,host-short-name=StochasticGradientOptimiser),loggingInterval=30000,objective=LabelObjective(class-name=org.tribuo.classification.sgd.objectives.LogMulticlass,host-short-name=LabelObjective),tribuo-version=4.2.0-SNAPSHOT,train-invocation-count=0,is-sequence=false,host-short-name=Trainer),trained-at=2021-10-26T17:51:35.432511-04:00,instance-values={},tribuo-version=4.2.0-SNAPSHOT,java-version=17-ea,os-name=Mac OS X,os-arch=x86_64)\n",
+      "\t\tmodel-producer=Tribuo\n",
+      "\t\tmodel-version=0\n",
+      "\t\tinput-name=input\n",
+      "\t}\n",
+      "\ttribuo-version = 4.2.0-SNAPSHOT\n",
+      "\tjava-version = 17\n",
+      "\tos-name = Mac OS X\n",
+      "\tos-arch = x86_64\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "System.out.println(ProvenanceUtil.formattedProvenanceString(onnx.getProvenance()));"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `ONNXExternalModel` provenance has a lot of placeholders in it, as you might expect given the information is not always present in ONNX files.\n",
+    "\n",
+    "We can load the Tribuo model provenance using `getTribuoProvenance()`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "FMClassificationModel(\n",
+      "\tclass-name = org.tribuo.classification.sgd.fm.FMClassificationModel\n",
+      "\tdataset = MutableDataset(\n",
+      "\t\t\tclass-name = org.tribuo.MutableDataset\n",
+      "\t\t\tdatasource = IDXDataSource(\n",
+      "\t\t\t\t\tclass-name = org.tribuo.datasource.IDXDataSource\n",
+      "\t\t\t\t\toutputFactory = LabelFactory(\n",
+      "\t\t\t\t\t\t\tclass-name = org.tribuo.classification.LabelFactory\n",
+      "\t\t\t\t\t\t)\n",
+      "\t\t\t\t\toutputPath = /Users/apocock/Development/Tribuo/tutorials/train-labels-idx1-ubyte.gz\n",
+      "\t\t\t\t\tfeaturesPath = /Users/apocock/Development/Tribuo/tutorials/train-images-idx3-ubyte.gz\n",
+      "\t\t\t\t\tfeatures-file-modified-time = 2000-07-21T14:20:24-04:00\n",
+      "\t\t\t\t\toutput-resource-hash = 3552534A0A558BBED6AED32B30C495CCA23D567EC52CAC8BE1A0730E8010255C\n",
+      "\t\t\t\t\tdatasource-creation-time = 2021-10-26T17:51:22.314557-04:00\n",
+      "\t\t\t\t\toutput-file-modified-time = 2000-07-21T14:20:27-04:00\n",
+      "\t\t\t\t\tidx-feature-type = UBYTE\n",
+      "\t\t\t\t\tfeatures-resource-hash = 440FCABF73CC546FA21475E81EA370265605F56BE210A4024D2CA8F203523609\n",
+      "\t\t\t\t\thost-short-name = DataSource\n",
+      "\t\t\t\t)\n",
+      "\t\t\ttransformations = List[]\n",
+      "\t\t\tis-sequence = false\n",
+      "\t\t\tis-dense = false\n",
+      "\t\t\tnum-examples = 60000\n",
+      "\t\t\tnum-features = 717\n",
+      "\t\t\tnum-outputs = 10\n",
+      "\t\t\ttribuo-version = 4.2.0-SNAPSHOT\n",
+      "\t\t)\n",
+      "\ttrainer = FMClassificationTrainer(\n",
+      "\t\t\tclass-name = org.tribuo.classification.sgd.fm.FMClassificationTrainer\n",
+      "\t\t\tseed = 12345\n",
+      "\t\t\tvariance = 0.1\n",
+      "\t\t\tminibatchSize = 1\n",
+      "\t\t\tfactorizedDimSize = 6\n",
+      "\t\t\tshuffle = true\n",
+      "\t\t\tepochs = 5\n",
+      "\t\t\toptimiser = AdaGrad(\n",
+      "\t\t\t\t\tclass-name = org.tribuo.math.optimisers.AdaGrad\n",
+      "\t\t\t\t\tepsilon = 0.1\n",
+      "\t\t\t\t\tinitialLearningRate = 0.1\n",
+      "\t\t\t\t\tinitialValue = 0.0\n",
+      "\t\t\t\t\thost-short-name = StochasticGradientOptimiser\n",
+      "\t\t\t\t)\n",
+      "\t\t\tloggingInterval = 30000\n",
+      "\t\t\tobjective = LogMulticlass(\n",
+      "\t\t\t\t\tclass-name = org.tribuo.classification.sgd.objectives.LogMulticlass\n",
+      "\t\t\t\t\thost-short-name = LabelObjective\n",
+      "\t\t\t\t)\n",
+      "\t\t\ttribuo-version = 4.2.0-SNAPSHOT\n",
+      "\t\t\ttrain-invocation-count = 0\n",
+      "\t\t\tis-sequence = false\n",
+      "\t\t\thost-short-name = Trainer\n",
+      "\t\t)\n",
+      "\ttrained-at = 2021-10-26T17:51:35.432511-04:00\n",
+      "\tinstance-values = Map{}\n",
+      "\ttribuo-version = 4.2.0-SNAPSHOT\n",
+      "\tjava-version = 17-ea\n",
+      "\tos-name = Mac OS X\n",
+      "\tos-arch = x86_64\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "var tribuoProvenance = onnx.getTribuoProvenance().get();\n",
+    "System.out.println(ProvenanceUtil.formattedProvenanceString(tribuoProvenance));"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "From this provenance we can see that the model is a factorization machine running on MNIST (as expected). So now we can build a `ReproUtil` and rebuild the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "var mnistRepro = new ReproUtil<>(tribuoProvenance,Label.class);\n",
+    "\n",
+    "var reproducedMNISTModel = mnistRepro.reproduceFromProvenance();"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can diff the two provenances:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\n",
+      "  \"dataset\" : {\n",
+      "    \"datasource\" : {\n",
+      "      \"datasource-creation-time\" : {\n",
+      "        \"original\" : \"2021-10-26T17:51:22.314557-04:00\",\n",
+      "        \"reproduced\" : \"2021-11-03T09:51:56.746038-04:00\"\n",
+      "      }\n",
+      "    }\n",
+      "  },\n",
+      "  \"java-version\" : {\n",
+      "    \"original\" : \"17-ea\",\n",
+      "    \"reproduced\" : \"17\"\n",
+      "  },\n",
+      "  \"trained-at\" : {\n",
+      "    \"original\" : \"2021-10-26T17:51:35.432511-04:00\",\n",
+      "    \"reproduced\" : \"2021-11-03T09:52:10.606727-04:00\"\n",
+      "  }\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "System.out.println(ReproUtil.diffProvenance(tribuoProvenance, reproducedMNISTModel.getProvenance()));"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As before, it's not very interesting as we're using the same files and so only the creation timestamps are differing. Checking the model weights is tricky with an ONNX model, so we can instead check that the predictions are the same (though Tribuo computes in doubles and ONNX Runtime uses floats so the answers are slightly different). We'll borrow the `checkPredictions` function from the ONNX export tutorial."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "public boolean checkPredictions(List<Prediction<Label>> nativePredictions, List<Prediction<Label>> onnxPredictions, double delta) {\n",
+    "    for (int i = 0; i < nativePredictions.size(); i++) {\n",
+    "        Prediction<Label> tribuo = nativePredictions.get(i);\n",
+    "        Prediction<Label> external = onnxPredictions.get(i);\n",
+    "        // Check the predicted label\n",
+    "        if (!tribuo.getOutput().getLabel().equals(external.getOutput().getLabel())) {\n",
+    "            System.out.println(\"At index \" + i + \" predictions are not equal - \"\n",
+    "                    + tribuo.getOutput().getLabel() + \" and \"\n",
+    "                    + external.getOutput().getLabel());\n",
+    "            return false;\n",
+    "        }\n",
+    "        // Check the maximum score\n",
+    "        if (Math.abs(tribuo.getOutput().getScore() - external.getOutput().getScore()) > delta) {\n",
+    "            System.out.println(\"At index \" + i + \" predictions are not equal - \"\n",
+    "                    + tribuo.getOutput() + \" and \"\n",
+    "                    + external.getOutput());\n",
+    "            return false;\n",
+    "        }\n",
+    "        // Check the score distribution\n",
+    "        for (Map.Entry<String, Label> l : tribuo.getOutputScores().entrySet()) {\n",
+    "            Label other = external.getOutputScores().get(l.getKey());\n",
+    "            if (other == null) {\n",
+    "                System.out.println(\"At index \" + i + \" failed to find label \" + l.getKey() + \" in ORT prediction.\");\n",
+    "                return false;\n",
+    "            } else {\n",
+    "                if (Math.abs(l.getValue().getScore() - other.getScore()) > delta) {\n",
+    "                    System.out.println(\"At index \" + i + \" predictions are not equal - \"\n",
+    "                            + tribuo.getOutputScores() + \" and \"\n",
+    "                            + external.getOutputScores());\n",
+    "                    return false;\n",
+    "                }\n",
+    "            }\n",
+    "        }\n",
+    "    }\n",
+    "    return true;\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we can make predictions from both models and compare the outputs:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predictions are equal = true\n"
+     ]
+    }
+   ],
+   "source": [
+    "var onnxPredictions = onnx.predict(mnistTest);\n",
+    "var reproducedPredictions = reproducedMNISTModel.predict(mnistTest);\n",
+    "\n",
+    "System.out.println(\"Predictions are equal = \" + checkPredictions(reproducedPredictions,onnxPredictions,1e-5));"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Working with provenance diffs\n",
+    "\n",
+    "We can use the provenance diff methods to compute diffs for unrelated models too. We're going to train a logistic regression on MNIST and compare the model provenance against the ONNX factorization machine we just used."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\n",
+      "  \"class-name\" : {\n",
+      "    \"original\" : \"org.tribuo.classification.sgd.fm.FMClassificationModel\",\n",
+      "    \"reproduced\" : \"org.tribuo.classification.sgd.linear.LinearSGDModel\"\n",
+      "  },\n",
+      "  \"dataset\" : {\n",
+      "    \"datasource\" : {\n",
+      "      \"datasource-creation-time\" : {\n",
+      "        \"original\" : \"2021-10-26T17:51:22.314557-04:00\",\n",
+      "        \"reproduced\" : \"2021-11-03T09:51:47.929133-04:00\"\n",
+      "      }\n",
+      "    }\n",
+      "  },\n",
+      "  \"java-version\" : {\n",
+      "    \"original\" : \"17-ea\",\n",
+      "    \"reproduced\" : \"17\"\n",
+      "  },\n",
+      "  \"trained-at\" : {\n",
+      "    \"original\" : \"2021-10-26T17:51:35.432511-04:00\",\n",
+      "    \"reproduced\" : \"2021-11-03T09:52:20.359019-04:00\"\n",
+      "  },\n",
+      "  \"trainer\" : {\n",
+      "    \"class-name\" : {\n",
+      "      \"original\" : \"org.tribuo.classification.sgd.fm.FMClassificationTrainer\",\n",
+      "      \"reproduced\" : \"org.tribuo.classification.sgd.linear.LogisticRegressionTrainer\"\n",
+      "    },\n",
+      "    \"loggingInterval\" : {\n",
+      "      \"original\" : \"30000\",\n",
+      "      \"reproduced\" : \"1000\"\n",
+      "    },\n",
+      "    \"optimiser\" : {\n",
+      "      \"initialLearningRate\" : {\n",
+      "        \"original\" : \"0.1\",\n",
+      "        \"reproduced\" : \"1.0\"\n",
+      "      }\n",
+      "    },\n",
+      "    \"factorizedDimSize\" : {\n",
+      "      \"original\" : \"6\"\n",
+      "    },\n",
+      "    \"variance\" : {\n",
+      "      \"original\" : \"0.1\"\n",
+      "    }\n",
+      "  }\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "var lrTrainer = new LogisticRegressionTrainer();\n",
+    "var lrModel = lrTrainer.train(mnistTrain);\n",
+    "\n",
+    "System.out.println(ReproUtil.diffProvenance(tribuoProvenance, lrModel.getProvenance()));"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This diff is longer than the others we've seen, as expected for two different models with different trainers. As expected the dataset section is mostly empty as both models are trained on an unmodified MNIST training set. The `FMClassificationTrainer` and `LogisticRegressionTrainer` show more differences, but as both are SGD based models there are many common fields. They share fields like a loss function (both used `LogMulticlass`), a gradient optimiser (both used `AdaGrad`), the number of training epochs, and the minibatch size. They used different learning rates (which do appear in the diff under `optimiser`) and the factorization machine also has a few extra parameters not found in the logistic regression, `factorizedDimSize` and `variance`, which are reported as just having an `original` value, meaning they are only found in the first provenance and not the second.\n",
+    "\n",
+    "The current diff format is JSON, and designed to be easily human readable. We left designing a navigable diff object which is easily inspectable from code to future work once we have a better understanding of how people want to use the generated diffs."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Conclusion\n",
+    "\n",
+    "We showed how to load in Tribuo models and reproduce them using our automated reproducibility system. The system executes the same computations as the original training, which in most cases results in an identical model. We have noted that there are some differences between gradient descent based models that are trained on ARM and x86 architectures due to underlying differences in the JVM, but otherwise the reproductions are exact. Over time we plan to expand this reproducibility system into a full experimental framework allowing models to be rebuilt using different datasets, data transformations or training hyperparameters holding all other parameters constant."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Java",
+   "language": "java",
+   "name": "java"
+  },
+  "language_info": {
+   "codemirror_mode": "java",
+   "file_extension": ".jshell",
+   "mimetype": "text/x-java-source",
+   "name": "Java",
+   "pygments_lexer": "java",
+   "version": "17+35-LTS-2724"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}