Merge pull request #10 from pat-alt/develop

Develop
JuliaTrustworthyAI · Oct 15, 2022 · 66bd956 · 66bd956
2 parents 042cc39 + 987aa82
commit 66bd956
Show file tree

Hide file tree

Showing 31 changed files with 803 additions and 356 deletions.
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -3,6 +3,7 @@ on:
   push:
     branches:
       - main
+      - develop
     tags: ['*']
   pull_request:
 concurrency:

diff --git a/Project.toml b/Project.toml
@@ -5,6 +5,9 @@ version = "0.1.0"
 
 [deps]
 MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
+MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
+MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
+PkgTemplates = "14b8a8f1-9102-5b29-a752-f990bacb7fe1"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [compat]

diff --git a/README.md b/README.md
@@ -30,7 +30,7 @@ using Pkg
 Pkg.add(url="https://github.com/pat-alt/ConformalPrediction.jl")
 ```
 
-## Usage Example - Regression 🔍
+## Usage Example - Inductive Conformal Regression 🔍
 
 To illustrate the intended use of the package, let’s have a quick look at a simple regression problem. Using [MLJ](https://alan-turing-institute.github.io/MLJ.jl/dev/) we first generate some synthetic data and then determine indices for our training, calibration and test data:
 
@@ -40,35 +40,47 @@ X, y = MLJ.make_regression(1000, 2)
 train, calibration, test = partition(eachindex(y), 0.4, 0.4)
 ```
 
-We then train a boosted tree ([EvoTrees](https://github.com/Evovest/EvoTrees.jl)) and follow the standard [MLJ](https://alan-turing-institute.github.io/MLJ.jl/dev/) training procedure.
+We then train a decision tree ([DecisionTree](https://github.com/Evovest/DecisionTree.jl)) and follow the standard [MLJ](https://alan-turing-institute.github.io/MLJ.jl/dev/) training procedure.
 
 ``` julia
-EvoTreeRegressor = @load EvoTreeRegressor pkg=EvoTrees
-model = EvoTreeRegressor() 
-mach = machine(model, X, y)
-fit!(mach, rows=train)
+DecisionTreeRegressor = @load DecisionTreeRegressor pkg=DecisionTree
+model = DecisionTreeRegressor() 
 ```
 
-To turn our conventional machine into a conformal machine, we just need to declare it as such and then calibrate it using our calibration data:
+To turn our conventional machine into a conformal model, we just need to declare it as such by using `conformal_model` wrapper function. The generated conformal model instance can wrapped in data to create a *machine* following standard MLJ convention. By default that function instantiates a `SimpleInductiveRegressor`.
+
+Fitting Inductive Conformal Predictors using `fit!` trains the underlying machine learning model, but it does not compute nonconformity scores. That is because Inductive Conformal Predictors rely on a separate set of calibration data. Consequently, conformal models of type `InductiveConformalModel <: ConformalModel` require a separate calibration step to be trained for conformal prediction. This can be implemented by calling the generic `calibrate!` method on the model instance.
 
 ``` julia
 using ConformalPrediction
-conf_mach = conformal_machine(mach)
-calibrate!(conf_mach, selectrows(X, calibration), y[calibration])
+conf_model = conformal_model(model)
+mach = machine(conf_model, X, y)
+fit!(mach, rows=train)
+calibrate!(conf_model, selectrows(X, calibration), y[calibration])
 ```
 
 Predictions can then be computed using the generic `predict` method. The code below produces predictions a random subset of test samples:
 
 ``` julia
-predict(conf_mach, selectrows(X, rand(test,5)))
+predict(conf_model, selectrows(X, rand(test,5)))
 ```
 
-    5-element Vector{Vector{Pair{String, Vector{Float64}}}}:
-     ["lower" => [-2.5656268495995658], "upper" => [1.4558014252276577]]
-     ["lower" => [-2.5656268495995658], "upper" => [1.4558014252276577]]
-     ["lower" => [-2.5656268495995658], "upper" => [1.4558014252276577]]
-     ["lower" => [-3.906072026876036], "upper" => [0.11535624795118737]]
-     ["lower" => [-1.9725646439635294], "upper" => [2.048863630863694]]
+    ╭────────────────────────────────────────────────────────────────────╮
+    │                                                                    │
+    │      (1)   ["lower" => [0.3963962694045419], "upper" =>            │
+    │  [1.0933093154587168]]                                             │
+    │      (2)   ["lower" => [0.819397821856154], "upper" =>             │
+    │  [1.516310867910329]]                                              │
+    │      (3)   ["lower" => [-0.6332868767933615], "upper" =>           │
+    │  [0.06362616926081349]]                                            │
+    │      (4)   ["lower" => [0.7215947047552422], "upper" =>            │
+    │  [1.4185077508094173]]                                             │
+    │      (5)   ["lower" => [2.0323107892753947], "upper" =>            │
+    │  [2.7292238353295697]]                                             │
+    │                                                                    │
+    │                                                                    │
+    │                                                                    │
+    ╰──────────────────────────────────────────────────────── 5 items ───╯
 
 ## Contribute 🛠
 

diff --git a/README.qmd b/README.qmd
@@ -4,17 +4,15 @@ format:
     variant: -raw_html
     wrap: none
     self-contained: true
+execute: 
+  freeze: auto
+  echo: true
+  eval: true
+  output: false
 crossref:
   fig-prefix: Figure
   tbl-prefix: Table
 bibliography: https://raw.githubusercontent.com/pat-alt/bib/main/bib.bib
-output: asis
-execute: 
-  eval: true
-  echo: true
-  output: false
-  freeze: auto  # re-render only when source changes
-jupyter: julia-1.7
 ---
 
 # ConformalPrediction

diff --git a/_freeze/docs/src/classification/simple/execute-results/md.json b/_freeze/docs/src/classification/simple/execute-results/md.json
@@ -1,7 +1,7 @@
 {
   "hash": "23e5ff6ddc8b19eba4e8290b20658f33",
   "result": {
-    "markdown": "---\nformat:\n  commonmark:\n    variant: '-raw_html'\n    wrap: none\n    self-contained: true\ncrossref:\n  fig-prefix: Figure\n  tbl-prefix: Table\nbibliography: 'https://raw.githubusercontent.com/pat-alt/bib/main/bib.bib'\noutput: asis\nexecute:\n  output: false\n  freeze: auto\n  eval: true\n  echo: true\n---\n\n# Classification Tutorial\n\n[INCOMPLETE]\n\nWe firstly generate some synthetic data with three classes and partition it into a training set, a calibration set and a test set:\n\n::: {.cell execution_count=1}\n``` {.julia .cell-code}\nusing MLJ\nX, y = MLJ.make_blobs(1000, 2, centers=3, cluster_std=2)\ntrain, calibration, test = partition(eachindex(y), 0.4, 0.4)\n```\n:::\n\n\nFollowing the standard [MLJ](https://alan-turing-institute.github.io/MLJ.jl/dev/) procedure, we train a boosted tree for the classification task:\n\n::: {.cell execution_count=2}\n``` {.julia .cell-code}\nEvoTreeClassifier = @load EvoTreeClassifier pkg=EvoTrees\nmodel = EvoTreeClassifier() \nmach = machine(model, X, y)\nfit!(mach, rows=train)\n```\n:::\n\n\nNext we instantiate our conformal machine and calibrate using the calibration data:\n\n::: {.cell execution_count=3}\n``` {.julia .cell-code}\nusing ConformalPrediction\nconf_mach = conformal_machine(mach)\ncalibrate!(conf_mach, selectrows(X, calibration), y[calibration])\n```\n:::\n\n\nUsing the generic `predict` method we can generate prediction sets like so:\n\n::: {.cell execution_count=4}\n``` {.julia .cell-code}\npredict(conf_mach, selectrows(X, rand(test,5)))\n```\n\n::: {.cell-output .cell-output-display execution_count=5}\n```\n╭──────────────────────────────────────────────────────────────────────────╮\n│                                                                          │\n│      (1)   Pair[1 => missing, 2 => 0.6448661054062889, 3 => missing]     │\n│      (2)   Pair[1 => missing, 2 => missing, 3 => 0.8197529347049547]     │\n│      (3)   Pair[1 => missing, 2 => 0.8229512785953512, 3 => missing]     │\n│      (4)   Pair[1 => missing, 2 => 0.7858778376049668, 3 => missing]     │\n│      (5)   Pair[1 => missing, 2 => missing, 3 => 0.8197529347049547]     │\n│                                                                          │\n│                                                                          │\n╰────────────────────────────────────────────────────────────── 5 items ───╯\n```\n:::\n:::\n\n\n",
+    "markdown": "---\nformat:\n  commonmark:\n    variant: '-raw_html'\n    wrap: none\n    self-contained: true\ncrossref:\n  fig-prefix: Figure\n  tbl-prefix: Table\nbibliography: 'https://raw.githubusercontent.com/pat-alt/bib/main/bib.bib'\noutput: asis\nexecute:\n  output: false\n  freeze: auto\n  eval: true\n  echo: true\n---\n\n# Classification Tutorial\n\n[INCOMPLETE]\n\nWe firstly generate some synthetic data with three classes and partition it into a training set, a calibration set and a test set:\n\n::: {.cell execution_count=1}\n``` {.julia .cell-code}\nusing MLJ\nX, y = MLJ.make_blobs(1000, 2, centers=3, cluster_std=2)\ntrain, calibration, test = partition(eachindex(y), 0.4, 0.4)\n```\n:::\n\n\nFollowing the standard [MLJ](https://alan-turing-institute.github.io/MLJ.jl/dev/) procedure, we train a decision tree for the classification task:\n\n::: {.cell execution_count=2}\n``` {.julia .cell-code}\nEvoTreeClassifier = @load DecisionTreeClassifier pkg=DecisionTree\nmodel = DecisionTreeClassifier() \nmodel = machine(model, X, y)\nfit!(model, rows=train)\n```\n:::\n\n\nNext we instantiate our conformal model and calibrate using the calibration data:\n\n::: {.cell execution_count=3}\n``` {.julia .cell-code}\nusing ConformalPrediction\nconformal_model = conformal_model(model)\ncalibrate!(conf_model, selectrows(X, calibration), y[calibration])\n```\n:::\n\n\nUsing the generic `predict` method we can generate prediction sets like so:\n\n::: {.cell execution_count=4}\n``` {.julia .cell-code}\npredict(conf_model, selectrows(X, rand(test,5)))\n```\n\n::: {.cell-output .cell-output-display execution_count=5}\n```\n╭──────────────────────────────────────────────────────────────────────────╮\n│                                                                          │\n│      (1)   Pair[1 => missing, 2 => 0.6448661054062889, 3 => missing]     │\n│      (2)   Pair[1 => missing, 2 => missing, 3 => 0.8197529347049547]     │\n│      (3)   Pair[1 => missing, 2 => 0.8229512785953512, 3 => missing]     │\n│      (4)   Pair[1 => missing, 2 => 0.7858778376049668, 3 => missing]     │\n│      (5)   Pair[1 => missing, 2 => missing, 3 => 0.8197529347049547]     │\n│                                                                          │\n│                                                                          │\n╰────────────────────────────────────────────────────────────── 5 items ───╯\n```\n:::\n:::\n\n\n",
     "supporting": [
       "simple_files"
     ],

diff --git a/_freeze/docs/src/index/execute-results/md.json b/_freeze/docs/src/index/execute-results/md.json
@@ -0,0 +1,10 @@
+{
+  "hash": "c56dcfed5fce5fece3f8dd90b08af0bd",
+  "result": {
+    "markdown": "```@meta\nCurrentModule = ConformalPrediction\n```\n\n# ConformalPrediction\n\nDocumentation for [ConformalPrediction.jl](https://github.com/pat-alt/ConformalPrediction.jl).\n\n\n\n`ConformalPrediction.jl` is a package for Uncertainty Quantification (UQ) through Conformal Prediction (CP) in Julia. It is designed to work with supervised models trained in [MLJ](https://alan-turing-institute.github.io/MLJ.jl/dev/). Conformal Prediction is distribution-free, easy-to-understand, easy-to-use and model-agnostic. \n\n## Disclaimer ⚠️\n\nThis package is in its very early stages of development. In fact, I've built this package largely to gain a better understanding of the topic myself. So far only the most simple approaches have been implemented:\n\n- Naive method for regression.\n- LABEL approach for classification [@sadinle2019least].\n\nI have only tested it for a few of the supervised models offered by [MLJ](https://alan-turing-institute.github.io/MLJ.jl/dev/).\n\n## Installation 🚩\n\nYou can install the first stable release from the general registry:\n\n```julia\nusing Pkg\nPkg.add(\"ConformalPrediction\")\n```\n\nThe development version can be installed as follows:\n\n```julia\nusing Pkg\nPkg.add(url=\"https://github.com/pat-alt/ConformalPrediction.jl\")\n```\n\n## Usage Example - Inductive Conformal Regression 🔍\n\nTo illustrate the intended use of the package, let's have a quick look at a simple regression problem. Using [MLJ](https://alan-turing-institute.github.io/MLJ.jl/dev/) we first generate some synthetic data and then determine indices for our training, calibration and test data:\n\n::: {.cell execution_count=2}\n``` {.julia .cell-code}\nusing MLJ\nX, y = MLJ.make_regression(1000, 2)\ntrain, calibration, test = partition(eachindex(y), 0.4, 0.4)\n```\n:::\n\n\nWe then train a decision tree ([DecisionTree](https://github.com/Evovest/DecisionTree.jl)) and follow the standard [MLJ](https://alan-turing-institute.github.io/MLJ.jl/dev/) training procedure.\n\n::: {.cell execution_count=3}\n``` {.julia .cell-code}\nDecisionTreeRegressor = @load DecisionTreeRegressor pkg=DecisionTree\nmodel = DecisionTreeRegressor() \n```\n:::\n\n\nTo turn our conventional machine into a conformal model, we just need to declare it as such by using `conformal_model` wrapper function. The generated conformal model instance can wrapped in data to create a *machine* following standard MLJ convention. By default that function instantiates a `SimpleInductiveRegressor`. \n\nFitting Inductive Conformal Predictors using `fit!` trains the underlying machine learning model, but it does not compute nonconformity scores. That is because Inductive Conformal Predictors rely on a separate set of calibration data. Consequently, conformal models of type `InductiveConformalModel <: ConformalModel` require a separate calibration step to be trained for conformal prediction. This can be implemented by calling the generic `calibrate!` method on the model instance. \n\n::: {.cell execution_count=4}\n``` {.julia .cell-code}\nusing ConformalPrediction\nconf_model = conformal_model(model)\nmach = machine(conf_model, X, y)\nfit!(mach, rows=train)\ncalibrate!(conf_model, selectrows(X, calibration), y[calibration])\n```\n:::\n\n\nPredictions can then be computed using the generic `predict` method. The code below produces predictions a random subset of test samples:\n\n::: {.cell execution_count=5}\n``` {.julia .cell-code}\npredict(conf_model, selectrows(X, rand(test,5)))\n```\n\n::: {.cell-output .cell-output-display execution_count=6}\n```\n╭────────────────────────────────────────────────────────────────────╮\n│                                                                    │\n│      (1)   [\"lower\" => [0.27243371134520067], \"upper\" =>           │\n│  [1.0198357965554317]]                                             │\n│      (2)   [\"lower\" => [0.6621889092109277], \"upper\" =>            │\n│  [1.4095909944211586]]                                             │\n│      (3)   [\"lower\" => [0.6835568713212139], \"upper\" =>            │\n│  [1.430958956531445]]                                              │\n│      (4)   [\"lower\" => [0.6835568713212139], \"upper\" =>            │\n│  [1.430958956531445]]                                              │\n│      (5)   [\"lower\" => [0.005568859502752321], \"upper\" =>          │\n│  [0.7529709447129833]]                                             │\n│                                                                    │\n│                                                                    │\n│                                                                    │\n╰──────────────────────────────────────────────────────── 5 items ───╯\n```\n:::\n:::\n\n\n## Contribute 🛠\n\nContributions are welcome! Please follow the [SciML ColPrac guide](https://github.com/SciML/ColPrac).\n\n## References 🎓\n\n",
+    "supporting": [
+      "index_files"
+    ],
+    "filters": []
+  }
+}
diff --git a/_quarto.yml b/_quarto.yml
@@ -2,5 +2,15 @@ project:
   title: "ConformalPrediction.jl"
   execute-dir: project
 
+crossref:
+  fig-prefix: Figure
+  tbl-prefix: Table
+bibliography: https://raw.githubusercontent.com/pat-alt/bib/main/bib.bib
+
+execute: 
+  freeze: auto
+  echo: true
+  eval: true
+  output: false
 
 
diff --git a/docs/Manifest.toml b/docs/Manifest.toml
@@ -2,7 +2,7 @@
 
 julia_version = "1.8.1"
 manifest_format = "2.0"
-project_hash = "5ff5a6a704a15c1ad1e3ac182ca933ebf64a0761"
+project_hash = "a9c53b8831f0d9c33b8a54796051d54892c81b58"
 
 [[deps.ANSIColoredPrinters]]
 git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c"
@@ -21,6 +21,11 @@ git-tree-sha1 = "69f7020bd72f069c219b5e8c236c1fa90d2cb409"
 uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
 version = "1.2.1"
 
+[[deps.AbstractTrees]]
+git-tree-sha1 = "5c0b629df8a5566a06f5fef5100b53ea56e465a0"
+uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
+version = "0.4.2"
+
 [[deps.Adapt]]
 deps = ["LinearAlgebra"]
 git-tree-sha1 = "195c5505521008abea5aee4f96930717958eac6f"
@@ -158,7 +163,7 @@ uuid = "ed09eef8-17a6-5b46-8889-db040fac31e3"
 version = "0.3.2"
 
 [[deps.ConformalPrediction]]
-deps = ["MLJ", "Statistics"]
+deps = ["MLJ", "MLJBase", "MLJModelInterface", "Statistics"]
 path = ".."
 uuid = "98bfc277-1877-43dc-819b-a3e38c30242f"
 version = "0.1.0"
@@ -205,6 +210,12 @@ version = "1.0.0"
 deps = ["Printf"]
 uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
 
+[[deps.DecisionTree]]
+deps = ["AbstractTrees", "DelimitedFiles", "LinearAlgebra", "Random", "ScikitLearnBase", "Statistics"]
+git-tree-sha1 = "fb3f7ff27befb9877bee84076dd9173185d7d86a"
+uuid = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb"
+version = "0.11.2"
+
 [[deps.DelimitedFiles]]
 deps = ["Mmap"]
 uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
@@ -684,6 +695,12 @@ git-tree-sha1 = "f68deea1f25727f24a4afa9f941763e6fc44f5af"
 uuid = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
 version = "0.20.19"
 
+[[deps.MLJDecisionTreeInterface]]
+deps = ["DecisionTree", "MLJModelInterface", "Random", "Tables"]
+git-tree-sha1 = "d0d682ef8504e1ab705f10307c587239ebb20c4d"
+uuid = "c6f25543-311c-4c74-83dc-3ea6d1015661"
+version = "0.2.5"
+
 [[deps.MLJEnsembles]]
 deps = ["CategoricalArrays", "CategoricalDistributions", "ComputationalResources", "Distributed", "Distributions", "MLJBase", "MLJModelInterface", "ProgressMeter", "Random", "ScientificTypesBase", "StatsBase"]
 git-tree-sha1 = "ed2f724be26d0023cade9d59b55da93f528c3f26"
@@ -1004,6 +1021,12 @@ git-tree-sha1 = "a8e18eb383b5ecf1b5e6fc237eb39255044fd92b"
 uuid = "30f210dd-8aff-4c5f-94ba-8e64358c1161"
 version = "3.0.0"
 
+[[deps.ScikitLearnBase]]
+deps = ["LinearAlgebra", "Random", "Statistics"]
+git-tree-sha1 = "7877e55c1523a4b336b433da39c8e8c08d2f221f"
+uuid = "6e75b9c4-186b-50bd-896f-2d2496a4843e"
+version = "0.5.0"
+
 [[deps.Scratch]]
 deps = ["Dates"]
 git-tree-sha1 = "f94f779c94e58bf9ea243e77a37e16d9de9126bd"

diff --git a/docs/Project.toml b/docs/Project.toml
@@ -1,8 +1,9 @@
 [deps]
 ConformalPrediction = "98bfc277-1877-43dc-819b-a3e38c30242f"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+DecisionTree = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
-EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
 MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
+MLJDecisionTreeInterface = "c6f25543-311c-4c74-83dc-3ea6d1015661"
 PlotThemes = "ccf2f8ad-2431-5c83-bf29-c5338b663b6a"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
diff --git a/docs/_metadata.yml b/docs/_metadata.yml
@@ -0,0 +1,5 @@
+format: 
+  commonmark:
+    variant: -raw_html
+    wrap: none
+    self-contained: true