From 2ae75fa998d6aeb61b2e14426bbda2fff37316e9 Mon Sep 17 00:00:00 2001
From: Jonas Rembser <jonas.rembser@cern.ch>
Date: Fri, 28 Jun 2019 01:04:08 +0200
Subject: [PATCH] speed comparison with m2c added

---
 README.md                      | 28 +++++++++++++++++++++------
 benchmark/benchmark-01-m2c.cpp | 35 ++++++++++++++++++++++++++++++++++
 benchmark/benchmark-01.py      | 10 ++++++++++
 3 files changed, 67 insertions(+), 6 deletions(-)
 create mode 100644 benchmark/benchmark-01-m2c.cpp
diff --git a/README.md b/README.md
index f4b9e0d..cce00c3 100644
--- a/README.md
+++ b/README.md
@@ -82,13 +82,29 @@ So far, FastForest has been bencharked against the inference engine in the xgboo
 C) and the [TMVA framework](https://root.cern.ch/tmva). For every engine, the same tree ensemble of 1000 trees is used,
 and inference is done on a single thread.
 
-| Engine                           | Benchmark time   |
-| :------                          | ---------------: |
-| __FastForest__ (g++ (GCC) 9.1.0) | 0.58 s           |
-| __xgboost__ 0.82 in __Python__ 3.7.3        | 2.6 s           |
-| ROOT 6.16/00 __TMVA__                    | 3.8 s           |
+| Engine                                                                                                  | Benchmark time   |
+| :------                                                                                                 | ---------------: |
+| __FastForest__ (g++ (GCC) 9.1.0)                                                                        | 0.58 s           |
+| [__m2cgen__](https://github.com/BayesWitnesses/m2cgen)                                                  | 1.3 s            |
+| [__xgboost__](https://xgboost.readthedocs.io/en/latest/python/python_api.html) 0.82 in __Python__ 3.7.3 | 2.6 s            |
+| ROOT 6.16/00 [__TMVA__](https://root.cern.ch/tmva)                                                      | 3.8 s            |
 
 The benchmak can be reproduced with the files found in the [benchmark directory](benchmark). The python scripts have to be
-run first as they also train and save the models.
+run first as they also train and save the models. Input type from the code generated by __m2cgen__ was changed from
+`double` to `float` for a better comparison with __FastForest__.
 
 The tests were performed on a Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz.
+
+### Serialization
+
+The FastForests can serialized to it's own binary format. The binary format exactly reflects the memory layout of the
+FastForest class, so saving and loading is as fast as it can be. The serialization to file is done with the `save`
+method.
+```C++
+fastForest.save("forest.bin");
+```
+The serialized FastForest can be read back with it's constructor, this time the one that does not take a reference to a
+vector for the feature names.
+```C++
+FastForest fastForest("forest.bin");
+```
diff --git a/benchmark/benchmark-01-m2c.cpp b/benchmark/benchmark-01-m2c.cpp
new file mode 100644
index 0000000..27fcacc
--- /dev/null
+++ b/benchmark/benchmark-01-m2c.cpp
@@ -0,0 +1,35 @@
+// compile with g++ -o benchmark-01-m2c benchmark-01-m2c.cpp
+
+#include "model.c"
+
+#include <cmath>
+#include <algorithm>
+#include <random>
+#include <numeric>
+#include <iostream>
+#include <ctime>
+
+int main() {
+    const int n = 100000;
+
+    std::vector<float> input(5 * n);
+    std::vector<double> scores(n);
+
+    std::generate(input.begin(), input.end(), std::rand);
+    for (auto& x : input) {
+        x = float(x) / RAND_MAX * 10 - 5;
+    }
+
+    clock_t begin = clock();
+    double out;
+    for (int i = 0; i < n; ++i) {
+        score(input.data() + i * 5, &scores[i]);
+    }
+    double average = std::accumulate(scores.begin(), scores.end(), 0.0) / scores.size();
+    std::cout << average << std::endl;
+
+    clock_t end = clock();
+    double elapsedSecs = double(end - begin) / CLOCKS_PER_SEC;
+
+    std::cout << "Wall time for inference: " << elapsedSecs << " s" << std::endl;
+}
diff --git a/benchmark/benchmark-01.py b/benchmark/benchmark-01.py
index e7c6802..4389eeb 100644
--- a/benchmark/benchmark-01.py
+++ b/benchmark/benchmark-01.py
@@ -2,8 +2,12 @@
 from sklearn.datasets import make_classification
 import numpy as np
 import time
+import sys
 
 import xgboost2tmva
+import m2cgen as m2c
+
+sys.setrecursionlimit(1000000)
 
 X, y = make_classification(n_samples=10000, n_features=5, random_state=42, n_classes=2, weights=[0.5])
 
@@ -12,9 +16,15 @@
 model._Booster.dump_model("model.txt")
 model._Booster.save_model("model.bin")
 
+# export to TMVA-style XML file
 input_variables = [("f"+str(i), "F") for i in range(5)]
 xgboost2tmva.convert_model(model._Booster.get_dump(), input_variables, "model.xml")
 
+# export to hardcoded C
+code = m2c.export_to_c(model)
+with open("model.c", "w") as c_file:
+    c_file.write(code)
+
 X_test = np.random.uniform(-5, 5, size=(100000, 5))
 
 start_time = time.time()