From 2ae75fa998d6aeb61b2e14426bbda2fff37316e9 Mon Sep 17 00:00:00 2001 From: Jonas Rembser Date: Fri, 28 Jun 2019 01:04:08 +0200 Subject: [PATCH] speed comparison with m2c added --- README.md | 28 +++++++++++++++++++++------ benchmark/benchmark-01-m2c.cpp | 35 ++++++++++++++++++++++++++++++++++ benchmark/benchmark-01.py | 10 ++++++++++ 3 files changed, 67 insertions(+), 6 deletions(-) create mode 100644 benchmark/benchmark-01-m2c.cpp diff --git a/README.md b/README.md index f4b9e0d..cce00c3 100644 --- a/README.md +++ b/README.md @@ -82,13 +82,29 @@ So far, FastForest has been bencharked against the inference engine in the xgboo C) and the [TMVA framework](https://root.cern.ch/tmva). For every engine, the same tree ensemble of 1000 trees is used, and inference is done on a single thread. -| Engine | Benchmark time | -| :------ | ---------------: | -| __FastForest__ (g++ (GCC) 9.1.0) | 0.58 s | -| __xgboost__ 0.82 in __Python__ 3.7.3 | 2.6 s | -| ROOT 6.16/00 __TMVA__ | 3.8 s | +| Engine | Benchmark time | +| :------ | ---------------: | +| __FastForest__ (g++ (GCC) 9.1.0) | 0.58 s | +| [__m2cgen__](https://github.com/BayesWitnesses/m2cgen) | 1.3 s | +| [__xgboost__](https://xgboost.readthedocs.io/en/latest/python/python_api.html) 0.82 in __Python__ 3.7.3 | 2.6 s | +| ROOT 6.16/00 [__TMVA__](https://root.cern.ch/tmva) | 3.8 s | The benchmak can be reproduced with the files found in the [benchmark directory](benchmark). The python scripts have to be -run first as they also train and save the models. +run first as they also train and save the models. Input type from the code generated by __m2cgen__ was changed from +`double` to `float` for a better comparison with __FastForest__. The tests were performed on a Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz. + +### Serialization + +The FastForests can serialized to it's own binary format. The binary format exactly reflects the memory layout of the +FastForest class, so saving and loading is as fast as it can be. The serialization to file is done with the `save` +method. +```C++ +fastForest.save("forest.bin"); +``` +The serialized FastForest can be read back with it's constructor, this time the one that does not take a reference to a +vector for the feature names. +```C++ +FastForest fastForest("forest.bin"); +``` diff --git a/benchmark/benchmark-01-m2c.cpp b/benchmark/benchmark-01-m2c.cpp new file mode 100644 index 0000000..27fcacc --- /dev/null +++ b/benchmark/benchmark-01-m2c.cpp @@ -0,0 +1,35 @@ +// compile with g++ -o benchmark-01-m2c benchmark-01-m2c.cpp + +#include "model.c" + +#include +#include +#include +#include +#include +#include + +int main() { + const int n = 100000; + + std::vector input(5 * n); + std::vector scores(n); + + std::generate(input.begin(), input.end(), std::rand); + for (auto& x : input) { + x = float(x) / RAND_MAX * 10 - 5; + } + + clock_t begin = clock(); + double out; + for (int i = 0; i < n; ++i) { + score(input.data() + i * 5, &scores[i]); + } + double average = std::accumulate(scores.begin(), scores.end(), 0.0) / scores.size(); + std::cout << average << std::endl; + + clock_t end = clock(); + double elapsedSecs = double(end - begin) / CLOCKS_PER_SEC; + + std::cout << "Wall time for inference: " << elapsedSecs << " s" << std::endl; +} diff --git a/benchmark/benchmark-01.py b/benchmark/benchmark-01.py index e7c6802..4389eeb 100644 --- a/benchmark/benchmark-01.py +++ b/benchmark/benchmark-01.py @@ -2,8 +2,12 @@ from sklearn.datasets import make_classification import numpy as np import time +import sys import xgboost2tmva +import m2cgen as m2c + +sys.setrecursionlimit(1000000) X, y = make_classification(n_samples=10000, n_features=5, random_state=42, n_classes=2, weights=[0.5]) @@ -12,9 +16,15 @@ model._Booster.dump_model("model.txt") model._Booster.save_model("model.bin") +# export to TMVA-style XML file input_variables = [("f"+str(i), "F") for i in range(5)] xgboost2tmva.convert_model(model._Booster.get_dump(), input_variables, "model.xml") +# export to hardcoded C +code = m2c.export_to_c(model) +with open("model.c", "w") as c_file: + c_file.write(code) + X_test = np.random.uniform(-5, 5, size=(100000, 5)) start_time = time.time()