From d85abc05762008cc8c853c0a6e37e05c4bd153ca Mon Sep 17 00:00:00 2001 From: Ruby Date: Mon, 12 Apr 2021 08:57:15 +0900 Subject: [PATCH] Preparing for OSS release --- .github/workflows/main.yml | 24 +++ .gitignore | 13 ++ LICENSE | 19 ++ MANIFEST.in | 7 + README.md | 93 +++++++++ example_outputs/decision_tree/output.vcl | 30 +++ example_outputs/decision_tree/source.py | 15 ++ example_outputs/linear_regression/output.vcl | 171 +++++++++++++++ example_outputs/linear_regression/source.py | 10 + example_outputs/nusvr/output.vcl | 209 +++++++++++++++++++ example_outputs/nusvr/source.py | 11 + m2vcl/__init__.py | 25 +++ m2vcl/vcl/__init__.py | 0 m2vcl/vcl/code_generator.py | 150 +++++++++++++ m2vcl/vcl/interpreter.py | 77 +++++++ m2vcl/vcl/mixins.py | 27 +++ pyproject.toml | 3 + setup.py | 27 +++ tests/__init__.py | 0 tests/test_vcl_returning.py | 44 ++++ tox.ini | 28 +++ 21 files changed, 983 insertions(+) create mode 100644 .github/workflows/main.yml create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 README.md create mode 100644 example_outputs/decision_tree/output.vcl create mode 100644 example_outputs/decision_tree/source.py create mode 100644 example_outputs/linear_regression/output.vcl create mode 100644 example_outputs/linear_regression/source.py create mode 100644 example_outputs/nusvr/output.vcl create mode 100644 example_outputs/nusvr/source.py create mode 100644 m2vcl/__init__.py create mode 100644 m2vcl/vcl/__init__.py create mode 100644 m2vcl/vcl/code_generator.py create mode 100644 m2vcl/vcl/interpreter.py create mode 100644 m2vcl/vcl/mixins.py create mode 100644 pyproject.toml create mode 100644 setup.py create mode 100644 tests/__init__.py create mode 100644 tests/test_vcl_returning.py create mode 100644 tox.ini diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..3b39a49 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,24 @@ +name: CI Tests + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + max-parallel: 5 + matrix: + python-version: [3.6, 3.7, 3.8, 3.9] + + steps: + - uses: actions/checkout@v1 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install tox tox-gh-actions + - name: Test with tox + run: tox \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a57f7ea --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +# general things to ignore +build/ +dist/ +*.egg-info/ +*.egg +*.py[cod] +__pycache__/ +*.so +*~ + +# due to using tox and pytest +.tox +.cache \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..40e7419 --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2021 Ruby Nealon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..62eab1e --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,7 @@ + +include pyproject.toml +include *.md +include LICENSE +recursive-include src *.py +recursive-include example_outputs *.py +recursive-include example_outputs *.vcl \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..1784404 --- /dev/null +++ b/README.md @@ -0,0 +1,93 @@ +# m2vcl + +![CI Tests status badge](https://github.com/rubyroobs/m2vcl/workflows/CI%20Tests/badge.svg) + +Experimental extension of [m2cgen](https://github.com/BayesWitnesses/m2cgen) to export statistical models to [Varnish Configuration Language](https://varnish-cache.org/docs/trunk/users-guide/vcl.html), for use in the Varnish cache. Right now only Fastly-flavored VCL is the only target supported, though this could theoretically partially target core Varnish in the future. + +## Examples + +For code examples and their generated VCL outputs, see the [example_outputs](https://github.com/rubyroobs/m2vcl/tree/master/example_outputs) directory. + +## Usage + +Use `export_to_fastly_vcl` to export to Fastly-flavored VCL. The `export_to_fasty_vcl` function takes arguemnts `indent` (defaults to 4, indent size in the generated VCL) and `sub_name` (defaults to `score`, the prefix for the generated subroutine and input/output header names). Inputs for the subroutine can be set on the headers `req.http._input_` and outputs will be set on the header `req.http._output_`. + +A working demo is available in [this Fastly fiddle](https://fiddle.fastlydemo.net/fiddle/754b1898), with the source provided below: + +### Generating Python code + +``` +from sklearn.model_selection import train_test_split +from sklearn.datasets import load_iris +from sklearn.tree import DecisionTreeClassifier + +import m2vcl + +iris = load_iris() +X = iris.data +y = iris.target +X_train, X_test, y_train, y_test = train_test_split( + X, y, random_state=0) + +clf = DecisionTreeClassifier(max_leaf_nodes=3, random_state=0) +clf.fit(X_train, y_train) +print(m2vcl.export_to_vcl(clf)) +``` + +### Output VCL + +``` +sub score { + declare local var.input_3 FLOAT; + set var.input_3 = std.atof(req.http.score_input_3); + + declare local var.input_2 FLOAT; + set var.input_2 = std.atof(req.http.score_input_2); + + declare local var.var0_0 FLOAT; + declare local var.var0_1 FLOAT; + declare local var.var0_2 FLOAT; + if (var.input_3 <= 0.800000011920929) { + set var.var0_0 = 1.0; + set var.var0_1 = 0.0; + set var.var0_2 = 0.0; + } else { + if (var.input_2 <= 4.950000047683716) { + set var.var0_0 = 0.0; + set var.var0_1 = 0.9166666666666666; + set var.var0_2 = 0.08333333333333333; + } else { + set var.var0_0 = 0.0; + set var.var0_1 = 0.02564102564102564; + set var.var0_2 = 0.9743589743589743; + } + } + set req.http.score_output_0 = var.var0_0; + set req.http.score_output_1 = var.var0_1; + set req.http.score_output_2 = var.var0_2; + return; +} +``` + +### VCL Usage + +``` +# VCL_DELIVER +set req.http.score_input_2 = "1.23456789"; +set req.http.score_input_3 = "9.87654321"; +call score; +set resp.http.Score-Result-0 = req.http.score_output_0; +set resp.http.Score-Result-1 = req.http.score_output_1; +set resp.http.Score-Result-2 = req.http.score_output_2; +``` + +## Known limitations + +* Precision is limited due to limitations of Fastly, and will be lost for each subroutine the AST is broken down into due to the required float -> string -> float conversion. +* Only tested with a small subset of models i.e. highly experimental - make sure to sanity check outputs + +## Todo + +* Improve test coverage by performing end to end testing on Fastly +* Create tests for more models +* Support core Varnish (may require a VMOD to provide equivalent functionality of [Fastly's math trig](https://developer.fastly.com/reference/vcl/functions/math-trig/)) \ No newline at end of file diff --git a/example_outputs/decision_tree/output.vcl b/example_outputs/decision_tree/output.vcl new file mode 100644 index 0000000..7ecd908 --- /dev/null +++ b/example_outputs/decision_tree/output.vcl @@ -0,0 +1,30 @@ +sub score { + declare local var.input_3 FLOAT; + set var.input_3 = std.atof(req.http.score_input_3); + + declare local var.input_2 FLOAT; + set var.input_2 = std.atof(req.http.score_input_2); + + declare local var.var0_0 FLOAT; + declare local var.var0_1 FLOAT; + declare local var.var0_2 FLOAT; + if (var.input_3 <= 0.800000011920929) { + set var.var0_0 = 1.0; + set var.var0_1 = 0.0; + set var.var0_2 = 0.0; + } else { + if (var.input_2 <= 4.950000047683716) { + set var.var0_0 = 0.0; + set var.var0_1 = 0.9166666666666666; + set var.var0_2 = 0.08333333333333333; + } else { + set var.var0_0 = 0.0; + set var.var0_1 = 0.02564102564102564; + set var.var0_2 = 0.9743589743589743; + } + } + set req.http.score_output_0 = var.var0_0; + set req.http.score_output_1 = var.var0_1; + set req.http.score_output_2 = var.var0_2; + return; +} \ No newline at end of file diff --git a/example_outputs/decision_tree/source.py b/example_outputs/decision_tree/source.py new file mode 100644 index 0000000..c9c0b25 --- /dev/null +++ b/example_outputs/decision_tree/source.py @@ -0,0 +1,15 @@ +from sklearn.model_selection import train_test_split +from sklearn.datasets import load_iris +from sklearn.tree import DecisionTreeClassifier + +import m2vcl + +iris = load_iris() +X = iris.data +y = iris.target +X_train, X_test, y_train, y_test = train_test_split( + X, y, random_state=0) + +clf = DecisionTreeClassifier(max_leaf_nodes=3, random_state=0) +clf.fit(X_train, y_train) +print(m2vcl.export_to_vcl(clf)) diff --git a/example_outputs/linear_regression/output.vcl b/example_outputs/linear_regression/output.vcl new file mode 100644 index 0000000..23b34f9 --- /dev/null +++ b/example_outputs/linear_regression/output.vcl @@ -0,0 +1,171 @@ +sub score { + declare local var.input_0 FLOAT; + set var.input_0 = std.atof(req.http.score_input_0); + + declare local var.input_1 FLOAT; + set var.input_1 = std.atof(req.http.score_input_1); + + declare local var.input_2 FLOAT; + set var.input_2 = std.atof(req.http.score_input_2); + + declare local var.input_3 FLOAT; + set var.input_3 = std.atof(req.http.score_input_3); + + declare local var.input_4 FLOAT; + set var.input_4 = std.atof(req.http.score_input_4); + + declare local var.input_5 FLOAT; + set var.input_5 = std.atof(req.http.score_input_5); + + declare local var.input_6 FLOAT; + set var.input_6 = std.atof(req.http.score_input_6); + + declare local var.input_7 FLOAT; + set var.input_7 = std.atof(req.http.score_input_7); + + declare local var.input_8 FLOAT; + set var.input_8 = std.atof(req.http.score_input_8); + + declare local var.input_9 FLOAT; + set var.input_9 = std.atof(req.http.score_input_9); + + declare local var.input_10 FLOAT; + set var.input_10 = std.atof(req.http.score_input_10); + + declare local var.input_11 FLOAT; + set var.input_11 = std.atof(req.http.score_input_11); + + declare local var.input_12 FLOAT; + set var.input_12 = std.atof(req.http.score_input_12); + + declare local var.var0_0 FLOAT; + set var.var0_0 = var.input_0; + set var.var0_0 *= -0.10801135783679545; + declare local var.var1_0 FLOAT; + set var.var1_0 = var.var0_0; + declare local var.var2_0 FLOAT; + set var.var2_0 = 36.459488385090125; + set var.var2_0 += var.var1_0; + declare local var.var3_0 FLOAT; + set var.var3_0 = var.var2_0; + declare local var.var4_0 FLOAT; + set var.var4_0 = var.input_1; + set var.var4_0 *= 0.04642045836688176; + declare local var.var5_0 FLOAT; + set var.var5_0 = var.var4_0; + declare local var.var6_0 FLOAT; + set var.var6_0 = var.var3_0; + set var.var6_0 += var.var5_0; + declare local var.var7_0 FLOAT; + set var.var7_0 = var.var6_0; + declare local var.var8_0 FLOAT; + set var.var8_0 = var.input_2; + set var.var8_0 *= 0.02055862636707862; + declare local var.var9_0 FLOAT; + set var.var9_0 = var.var8_0; + declare local var.var10_0 FLOAT; + set var.var10_0 = var.var7_0; + set var.var10_0 += var.var9_0; + declare local var.var11_0 FLOAT; + set var.var11_0 = var.var10_0; + declare local var.var12_0 FLOAT; + set var.var12_0 = var.input_3; + set var.var12_0 *= 2.6867338193448966; + declare local var.var13_0 FLOAT; + set var.var13_0 = var.var12_0; + declare local var.var14_0 FLOAT; + set var.var14_0 = var.var11_0; + set var.var14_0 += var.var13_0; + declare local var.var15_0 FLOAT; + set var.var15_0 = var.var14_0; + declare local var.var16_0 FLOAT; + set var.var16_0 = var.input_4; + set var.var16_0 *= -17.766611228300167; + declare local var.var17_0 FLOAT; + set var.var17_0 = var.var16_0; + declare local var.var18_0 FLOAT; + set var.var18_0 = var.var15_0; + set var.var18_0 += var.var17_0; + declare local var.var19_0 FLOAT; + set var.var19_0 = var.var18_0; + declare local var.var20_0 FLOAT; + set var.var20_0 = var.input_5; + set var.var20_0 *= 3.809865206809212; + declare local var.var21_0 FLOAT; + set var.var21_0 = var.var20_0; + declare local var.var22_0 FLOAT; + set var.var22_0 = var.var19_0; + set var.var22_0 += var.var21_0; + declare local var.var23_0 FLOAT; + set var.var23_0 = var.var22_0; + declare local var.var24_0 FLOAT; + set var.var24_0 = var.input_6; + set var.var24_0 *= 0.0006922246403425021; + declare local var.var25_0 FLOAT; + set var.var25_0 = var.var24_0; + declare local var.var26_0 FLOAT; + set var.var26_0 = var.var23_0; + set var.var26_0 += var.var25_0; + declare local var.var27_0 FLOAT; + set var.var27_0 = var.var26_0; + declare local var.var28_0 FLOAT; + set var.var28_0 = var.input_7; + set var.var28_0 *= -1.475566845600255; + declare local var.var29_0 FLOAT; + set var.var29_0 = var.var28_0; + declare local var.var30_0 FLOAT; + set var.var30_0 = var.var27_0; + set var.var30_0 += var.var29_0; + declare local var.var31_0 FLOAT; + set var.var31_0 = var.var30_0; + declare local var.var32_0 FLOAT; + set var.var32_0 = var.input_8; + set var.var32_0 *= 0.30604947898517226; + declare local var.var33_0 FLOAT; + set var.var33_0 = var.var32_0; + declare local var.var34_0 FLOAT; + set var.var34_0 = var.var31_0; + set var.var34_0 += var.var33_0; + declare local var.var35_0 FLOAT; + set var.var35_0 = var.var34_0; + declare local var.var36_0 FLOAT; + set var.var36_0 = var.input_9; + set var.var36_0 *= -0.01233459391657437; + declare local var.var37_0 FLOAT; + set var.var37_0 = var.var36_0; + declare local var.var38_0 FLOAT; + set var.var38_0 = var.var35_0; + set var.var38_0 += var.var37_0; + declare local var.var39_0 FLOAT; + set var.var39_0 = var.var38_0; + declare local var.var40_0 FLOAT; + set var.var40_0 = var.input_10; + set var.var40_0 *= -0.9527472317072923; + declare local var.var41_0 FLOAT; + set var.var41_0 = var.var40_0; + declare local var.var42_0 FLOAT; + set var.var42_0 = var.var39_0; + set var.var42_0 += var.var41_0; + declare local var.var43_0 FLOAT; + set var.var43_0 = var.var42_0; + declare local var.var44_0 FLOAT; + set var.var44_0 = var.input_11; + set var.var44_0 *= 0.009311683273793711; + declare local var.var45_0 FLOAT; + set var.var45_0 = var.var44_0; + declare local var.var46_0 FLOAT; + set var.var46_0 = var.var43_0; + set var.var46_0 += var.var45_0; + declare local var.var47_0 FLOAT; + set var.var47_0 = var.var46_0; + declare local var.var48_0 FLOAT; + set var.var48_0 = var.input_12; + set var.var48_0 *= -0.5247583778554923; + declare local var.var49_0 FLOAT; + set var.var49_0 = var.var48_0; + declare local var.var50_0 FLOAT; + set var.var50_0 = var.var47_0; + set var.var50_0 += var.var49_0; + set req.http.score_output_0 = var.var50_0; + return; +} \ No newline at end of file diff --git a/example_outputs/linear_regression/source.py b/example_outputs/linear_regression/source.py new file mode 100644 index 0000000..f0d1956 --- /dev/null +++ b/example_outputs/linear_regression/source.py @@ -0,0 +1,10 @@ +from sklearn.datasets import load_boston +from sklearn.linear_model import LinearRegression + +import m2vcl + +boston = load_boston() +X, y = boston.data, boston.target +estimator = LinearRegression() +estimator.fit(X, y) +print(m2vcl.export_to_vcl(estimator)) diff --git a/example_outputs/nusvr/output.vcl b/example_outputs/nusvr/output.vcl new file mode 100644 index 0000000..695a3c7 --- /dev/null +++ b/example_outputs/nusvr/output.vcl @@ -0,0 +1,209 @@ +sub score { + declare local var.input_0 FLOAT; + set var.input_0 = std.atof(req.http.score_input_0); + + declare local var.input_1 FLOAT; + set var.input_1 = std.atof(req.http.score_input_1); + + declare local var.input_2 FLOAT; + set var.input_2 = std.atof(req.http.score_input_2); + + declare local var.input_3 FLOAT; + set var.input_3 = std.atof(req.http.score_input_3); + + declare local var.input_4 FLOAT; + set var.input_4 = std.atof(req.http.score_input_4); + + declare local var.var0_0 FLOAT; + set var.var0_0 = 0.144043571160878; + set var.var0_0 -= var.input_0; + declare local var.var1_0 FLOAT; + set var.var1_0 = 1.454273506962975; + set var.var1_0 -= var.input_1; + declare local var.var2_0 FLOAT; + set var.var2_0 = math.exp2(2.0); + set var.var2_0 += math.exp2(2.0); + declare local var.var3_0 FLOAT; + set var.var3_0 = var.var2_0; + declare local var.var4_0 FLOAT; + set var.var4_0 = 0.7610377251469934; + set var.var4_0 -= var.input_2; + declare local var.var5_0 FLOAT; + set var.var5_0 = var.var3_0; + set var.var5_0 += math.exp2(2.0); + declare local var.var6_0 FLOAT; + set var.var6_0 = var.var5_0; + declare local var.var7_0 FLOAT; + set var.var7_0 = 0.12167501649282841; + set var.var7_0 -= var.input_3; + declare local var.var8_0 FLOAT; + set var.var8_0 = var.var6_0; + set var.var8_0 += math.exp2(2.0); + declare local var.var9_0 FLOAT; + set var.var9_0 = var.var8_0; + declare local var.var10_0 FLOAT; + set var.var10_0 = 0.44386323274542566; + set var.var10_0 -= var.input_4; + declare local var.var11_0 FLOAT; + set var.var11_0 = var.var9_0; + set var.var11_0 += math.exp2(2.0); + declare local var.var12_0 FLOAT; + set var.var12_0 = var.var11_0; + declare local var.var13_0 FLOAT; + set var.var13_0 = -0.18941318870892684; + set var.var13_0 *= var.var12_0; + declare local var.var14_0 FLOAT; + set var.var14_0 = math.exp(var.var13_0); + set var.var14_0 *= 0.1728104625243277; + declare local var.var15_0 FLOAT; + set var.var15_0 = var.var14_0; + declare local var.var16_0 FLOAT; + set var.var16_0 = 0.6424068952791205; + set var.var16_0 += var.var15_0; + declare local var.var17_0 FLOAT; + set var.var17_0 = var.var16_0; + declare local var.var18_0 FLOAT; + set var.var18_0 = -1.4543656745987648; + set var.var18_0 -= var.input_0; + declare local var.var19_0 FLOAT; + set var.var19_0 = 0.04575851730144607; + set var.var19_0 -= var.input_1; + declare local var.var20_0 FLOAT; + set var.var20_0 = math.exp2(2.0); + set var.var20_0 += math.exp2(2.0); + declare local var.var21_0 FLOAT; + set var.var21_0 = var.var20_0; + declare local var.var22_0 FLOAT; + set var.var22_0 = -0.1871838500258336; + set var.var22_0 -= var.input_2; + declare local var.var23_0 FLOAT; + set var.var23_0 = var.var21_0; + set var.var23_0 += math.exp2(2.0); + declare local var.var24_0 FLOAT; + set var.var24_0 = var.var23_0; + declare local var.var25_0 FLOAT; + set var.var25_0 = 1.5327792143584575; + set var.var25_0 -= var.input_3; + declare local var.var26_0 FLOAT; + set var.var26_0 = var.var24_0; + set var.var26_0 += math.exp2(2.0); + declare local var.var27_0 FLOAT; + set var.var27_0 = var.var26_0; + declare local var.var28_0 FLOAT; + set var.var28_0 = 1.469358769900285; + set var.var28_0 -= var.input_4; + declare local var.var29_0 FLOAT; + set var.var29_0 = var.var27_0; + set var.var29_0 += math.exp2(2.0); + declare local var.var30_0 FLOAT; + set var.var30_0 = var.var29_0; + declare local var.var31_0 FLOAT; + set var.var31_0 = -0.18941318870892684; + set var.var31_0 *= var.var30_0; + declare local var.var32_0 FLOAT; + set var.var32_0 = math.exp(var.var31_0); + set var.var32_0 *= 0.31665278164397415; + declare local var.var33_0 FLOAT; + set var.var33_0 = var.var32_0; + declare local var.var34_0 FLOAT; + set var.var34_0 = var.var17_0; + set var.var34_0 += var.var33_0; + declare local var.var35_0 FLOAT; + set var.var35_0 = var.var34_0; + declare local var.var36_0 FLOAT; + set var.var36_0 = 0.1549474256969163; + set var.var36_0 -= var.input_0; + declare local var.var37_0 FLOAT; + set var.var37_0 = 0.37816251960217356; + set var.var37_0 -= var.input_1; + declare local var.var38_0 FLOAT; + set var.var38_0 = math.exp2(2.0); + set var.var38_0 += math.exp2(2.0); + declare local var.var39_0 FLOAT; + set var.var39_0 = var.var38_0; + declare local var.var40_0 FLOAT; + set var.var40_0 = -0.8877857476301128; + set var.var40_0 -= var.input_2; + declare local var.var41_0 FLOAT; + set var.var41_0 = var.var39_0; + set var.var41_0 += math.exp2(2.0); + declare local var.var42_0 FLOAT; + set var.var42_0 = var.var41_0; + declare local var.var43_0 FLOAT; + set var.var43_0 = -1.980796468223927; + set var.var43_0 -= var.input_3; + declare local var.var44_0 FLOAT; + set var.var44_0 = var.var42_0; + set var.var44_0 += math.exp2(2.0); + declare local var.var45_0 FLOAT; + set var.var45_0 = var.var44_0; + declare local var.var46_0 FLOAT; + set var.var46_0 = -0.3479121493261526; + set var.var46_0 -= var.input_4; + declare local var.var47_0 FLOAT; + set var.var47_0 = var.var45_0; + set var.var47_0 += math.exp2(2.0); + declare local var.var48_0 FLOAT; + set var.var48_0 = var.var47_0; + declare local var.var49_0 FLOAT; + set var.var49_0 = -0.18941318870892684; + set var.var49_0 *= var.var48_0; + declare local var.var50_0 FLOAT; + set var.var50_0 = math.exp(var.var49_0); + set var.var50_0 *= 0.01053675583169808; + declare local var.var51_0 FLOAT; + set var.var51_0 = var.var50_0; + declare local var.var52_0 FLOAT; + set var.var52_0 = var.var35_0; + set var.var52_0 += var.var51_0; + declare local var.var53_0 FLOAT; + set var.var53_0 = var.var52_0; + declare local var.var54_0 FLOAT; + set var.var54_0 = 0.15634896910398005; + set var.var54_0 -= var.input_0; + declare local var.var55_0 FLOAT; + set var.var55_0 = 1.2302906807277207; + set var.var55_0 -= var.input_1; + declare local var.var56_0 FLOAT; + set var.var56_0 = math.exp2(2.0); + set var.var56_0 += math.exp2(2.0); + declare local var.var57_0 FLOAT; + set var.var57_0 = var.var56_0; + declare local var.var58_0 FLOAT; + set var.var58_0 = 1.2023798487844113; + set var.var58_0 -= var.input_2; + declare local var.var59_0 FLOAT; + set var.var59_0 = var.var57_0; + set var.var59_0 += math.exp2(2.0); + declare local var.var60_0 FLOAT; + set var.var60_0 = var.var59_0; + declare local var.var61_0 FLOAT; + set var.var61_0 = -0.3873268174079523; + set var.var61_0 -= var.input_3; + declare local var.var62_0 FLOAT; + set var.var62_0 = var.var60_0; + set var.var62_0 += math.exp2(2.0); + declare local var.var63_0 FLOAT; + set var.var63_0 = var.var62_0; + declare local var.var64_0 FLOAT; + set var.var64_0 = -0.30230275057533557; + set var.var64_0 -= var.input_4; + declare local var.var65_0 FLOAT; + set var.var65_0 = var.var63_0; + set var.var65_0 += math.exp2(2.0); + declare local var.var66_0 FLOAT; + set var.var66_0 = var.var65_0; + declare local var.var67_0 FLOAT; + set var.var67_0 = -0.18941318870892684; + set var.var67_0 *= var.var66_0; + declare local var.var68_0 FLOAT; + set var.var68_0 = math.exp(var.var67_0); + set var.var68_0 *= -0.49999999999999994; + declare local var.var69_0 FLOAT; + set var.var69_0 = var.var68_0; + declare local var.var70_0 FLOAT; + set var.var70_0 = var.var53_0; + set var.var70_0 += var.var69_0; + set req.http.score_output_0 = var.var70_0; + return; +} \ No newline at end of file diff --git a/example_outputs/nusvr/source.py b/example_outputs/nusvr/source.py new file mode 100644 index 0000000..7924399 --- /dev/null +++ b/example_outputs/nusvr/source.py @@ -0,0 +1,11 @@ +from sklearn.svm import NuSVR +import numpy as np +import m2vcl + +n_samples, n_features = 10, 5 +np.random.seed(0) +y = np.random.randn(n_samples) +X = np.random.randn(n_samples, n_features) +regr = NuSVR(C=1.0, nu=0.1) +regr.fit(X, y) +print(m2vcl.export_to_vcl(regr)) diff --git a/m2vcl/__init__.py b/m2vcl/__init__.py new file mode 100644 index 0000000..2955228 --- /dev/null +++ b/m2vcl/__init__.py @@ -0,0 +1,25 @@ + +from m2cgen.exporters import _export +from .vcl.interpreter import FastlyVCLInterpreter + + +def export_to_vcl(model, indent=4, sub_name="score"): + """ + Generates a VCL code representation of the given model. + Parameters + ---------- + model : object + The model object that should be transpiled into code. + indent : int, optional + The size of indents in the generated code. + sub_name : string, optional + Name of the subroutine in the generated code. + Returns + ------- + code : string + """ + interpreter = FastlyVCLInterpreter( + indent=indent, + sub_name=sub_name + ) + return _export(model, interpreter) diff --git a/m2vcl/vcl/__init__.py b/m2vcl/vcl/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/m2vcl/vcl/code_generator.py b/m2vcl/vcl/code_generator.py new file mode 100644 index 0000000..6c6015f --- /dev/null +++ b/m2vcl/vcl/code_generator.py @@ -0,0 +1,150 @@ +import contextlib + +from m2cgen.interpreters.code_generator import CLikeCodeGenerator +from m2cgen.interpreters.code_generator import CodeTemplate + + +class FastlyVCLCodeGenerator(CLikeCodeGenerator): + + tpl_var_declaration = CodeTemplate( + "declare local {var_name} FLOAT;") + tpl_var_assignment = CodeTemplate("set {var_name} = {value};") + tpl_array_index_access = CodeTemplate("{array_name}_{index}") + tpl_infix_expression = CodeTemplate("{left} {op} {right}") + + def __init__(self, indent=4, + id_prefix="score"): + self.id_prefix = self.header_var(id_prefix) + super().__init__(indent) + + def header_var(self, name): + return f"req.http.{name}" + + def reset_state(self): + super().reset_state() + self._sub_prefix = self.id_prefix + self._sub_start_pos = 0 + self._output_size = 1 + self._input_idxs = {} + self._var_sizes = {} + + def get_var_name(self): + var_name = f"var.var{self._var_idx}" + self._var_idx += 1 + return var_name + + def add_var_declaration(self, size): + var_name = self.get_var_name() + + self._var_sizes[var_name] = size + for i in range(size): + self.add_code_line( + self.tpl_var_declaration(var_name=f"{var_name}_{i}") + ) + + if size == 1: + return var_name + "_0" + + return var_name + + def add_return_statement(self, value): + if value in self._var_sizes: + # this wont work if an array is passed directly to value + # todo: check if that is a possibility + self._output_size = self._var_sizes[value] + + if self._output_size > 1: + for i in range(self._output_size): + self.add_code_line( + self.tpl_var_assignment( + var_name=f"{self._sub_prefix}_output_{i}", + value=f"{value}_{i}" + ) + ) + else: + self.add_code_line(self.tpl_var_assignment( + var_name=f"{self._sub_prefix}_output_0", value=f"{value}")) + + self.add_code_line("return;") + + def add_var_assignment(self, var_name, value, value_size): + for i in range(value_size): + if var_name in self._var_sizes: + var = f"{var_name}_{i}" + else: + var = var_name + self.add_code_line( + self.tpl_var_assignment( + var_name=var, + value=(value[i] if value_size > 1 else value) + ) + ) + + def add_function_def(self, name, args, is_vector_output): + if args != [(True, "input")]: + print(args) + raise "Unexpected args given from FastlyVCLInterpreter" + + self.add_code_line(f"sub {name} {{") + self._sub_start_pos = self._code_buf.tell() + self.increase_indent() + + @contextlib.contextmanager + def function_definition(self, name, args, is_vector_output): + if self._sub_prefix != self.header_var(name): + self._sub_prefix = f"{self._sub_prefix}_{name}" + + self.add_function_def(name, args, is_vector_output) + yield + self.add_block_termination() + + self._code_buf.seek(0) + content = self._code_buf.read(self._sub_start_pos) + for index, var_name in self._input_idxs.items(): + content += str(self._indent * " ") + \ + self.tpl_var_declaration(var_name=var_name) + "\n" + content += str(self._indent * " ") + content += self.tpl_var_assignment( + var_name=f"{var_name}", + value=f"std.atof({self.id_prefix}_input_{index})" + ) + content += "\n\n" + + content += self._code_buf.read() + self._code_buf.seek(0) + self._code_buf.write(content) + + def infix_expression(self, left, right, op): + if op == "=": + raise "Assignment should be done with add_var_assignment" + elif op in ["+", "-", "*", "/", "%", "|", "&", "^", "<<", ">>", "rol"]: + # this could be optimized + var_name = self.add_var_declaration(1) + self.add_var_assignment(var_name, left, 1) + self.add_code_line(f"set {var_name} {op}= {right};") + return var_name + + return self.tpl_infix_expression(left=left, right=right, op=op) + + def sub_invocation(self, sub_name): + self.add_code_line(f"call {sub_name};") + var_name = self.add_var_declaration(1) + self.add_var_assignment( + var_name, + f"std.atof({self.id_prefix}_{sub_name}_output_0)", + 1 + ) + return var_name + + def array_index_access(self, array_name, index): + if array_name == "input": + var_name = f"var.input_{index}" + self._input_idxs[index] = var_name + return var_name + + return super().array_index_access(array_name, index) + + def vector_init(self, values): + # vectors must be assigned value by value + # this is done in add_var_assignment + return values diff --git a/m2vcl/vcl/interpreter.py b/m2vcl/vcl/interpreter.py new file mode 100644 index 0000000..4a15e47 --- /dev/null +++ b/m2vcl/vcl/interpreter.py @@ -0,0 +1,77 @@ +from m2cgen.interpreters import mixins +from m2cgen.interpreters.interpreter import ImperativeToCodeInterpreter +from .code_generator import FastlyVCLCodeGenerator +from .mixins import VCLSubroutinesMixin + + +class FastlyVCLInterpreter(ImperativeToCodeInterpreter, + mixins.LinearAlgebraMixin, + mixins.BinExpressionDepthTrackingMixin, + VCLSubroutinesMixin): + + # VCL does not allow x = y * z style operations + bin_depth_threshold = 1 + + ast_size_check_frequency = 2 + ast_size_per_subroutine_threshold = 200 + + abs_function_name = "math.trunc" + atan_function_name = "math.atan" + exponent_function_name = "math.exp" + exp2_function_name = "math.exp2" + logarithm_function_name = "math.log" + log1p_function_name = "math.log1p" + sqrt_function_name = "math.sqrt" + tanh_function_name = "math.tanh" + + def __init__(self, indent=4, sub_name="score", + *args, **kwargs): + self.indent = indent + self.sub_name = sub_name + + super().__init__(None, *args, **kwargs) + + def interpret(self, expr): + top_cg = self.create_code_generator() + + self.enqueue_subroutine(self.sub_name, expr) + self.process_subroutine_queue(top_cg) + + return top_cg.finalize_and_get_generated_code() + + def create_code_generator(self): + return FastlyVCLCodeGenerator( + indent=self.indent, + id_prefix=self.sub_name, + ) + + def interpret_pow_expr(self, expr, **kwargs): + # Fastly VCL doesn't have a math.pow, so hack around this + base_result = self._do_interpret(expr.base_expr, **kwargs) + exp_result = self._do_interpret(expr.exp_expr, **kwargs) + + if exp_result == "2.0": + return self._cg.function_invocation( + self.exp2_function_name, exp_result) + + var_name = self._cg.add_var_declaration(1) + self._cg.add_var_assignment( + var_name, + self._cg.function_invocation( + self.logarithm_function_name, + exp_result + ), + 1 + ) + self._cg.add_code_line( + f"set {var_name} *= {base_result};" + ) + self._cg.add_var_assignment( + var_name, + self._cg.function_invocation( + self.exponent_function_name, + var_name + ), + 1 + ) + return var_name diff --git a/m2vcl/vcl/mixins.py b/m2vcl/vcl/mixins.py new file mode 100644 index 0000000..e919248 --- /dev/null +++ b/m2vcl/vcl/mixins.py @@ -0,0 +1,27 @@ +from m2cgen import ast +from m2cgen.interpreters.interpreter import BaseToCodeInterpreter +from m2cgen.interpreters.mixins import SubroutinesMixin + + +class VCLSubroutinesMixin(SubroutinesMixin): + + def _pre_interpret_hook(self, expr, ast_size_check_counter=0, **kwargs): + if isinstance(expr, ast.BinExpr) and not expr.to_reuse: + frequency = self._adjust_ast_check_frequency(expr) + self.ast_size_check_frequency = min( + frequency, self.ast_size_check_frequency) + + ast_size_check_counter += 1 + if ast_size_check_counter >= self.ast_size_check_frequency: + ast_size_check_counter = 0 + ast_size = ast.count_exprs(expr) + if ast_size > self.ast_size_per_subroutine_threshold: + sub_name = self._get_subroutine_name() + self.enqueue_subroutine(sub_name, expr) + return self._cg.sub_invocation( + sub_name + ), kwargs + + kwargs['ast_size_check_counter'] = ast_size_check_counter + + return BaseToCodeInterpreter._pre_interpret_hook(self, expr, **kwargs) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..73a7fd1 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools>=40.8.0", "wheel"] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..ed938cd --- /dev/null +++ b/setup.py @@ -0,0 +1,27 @@ +from setuptools import setup +import pathlib + +here = pathlib.Path(__file__).parent.resolve() +long_description = (here / 'README.md').read_text(encoding='utf-8') + +setup( + name='m2vcl', + version='0.0.1', + description='Export statistical models to VCL, for the Varnish cache.', + url='https://github.com/rubyroobs/m2vcl', + long_description=long_description, + long_description_content_type='text/markdown', + author='Ruby Nealon', + author_email='ruby@ruby.sh', + packages=['m2vcl', 'm2vcl.vcl'], + python_requires=">=3.6", + install_requires=[ + "m2cgen", + ], + extras_require={ + 'test': ['scikit-learn', 'numpy'], + }, + project_urls={ + 'Bug Reports': 'https://github.com/rubyroobs/m2vcl/issues', + }, +) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_vcl_returning.py b/tests/test_vcl_returning.py new file mode 100644 index 0000000..8048d8f --- /dev/null +++ b/tests/test_vcl_returning.py @@ -0,0 +1,44 @@ +import unittest + +from sklearn.model_selection import train_test_split +from sklearn.datasets import load_iris, load_boston +from sklearn.linear_model import LinearRegression +from sklearn.tree import DecisionTreeClassifier +from sklearn.svm import NuSVR +import numpy as np +import m2vcl + + +class TestVCLReturning(unittest.TestCase): + + def test_vcl_returning_for_boston_linear_regression(self): + boston = load_boston() + X, y = boston.data, boston.target + estimator = LinearRegression() + estimator.fit(X, y) + + self.assertNotEqual(m2vcl.export_to_vcl(estimator), "") + + def test_vcl_returning_for_iris_decision_tree(self): + iris = load_iris() + X = iris.data + y = iris.target + X_train, X_test, y_train, y_test = train_test_split( + X, y, random_state=0) + + clf = DecisionTreeClassifier(max_leaf_nodes=3, random_state=0) + clf.fit(X_train, y_train) + self.assertNotEqual(m2vcl.export_to_vcl(clf), "") + + def test_vcl_returning_for_nusvr(self): + n_samples, n_features = 10, 5 + np.random.seed(0) + y = np.random.randn(n_samples) + X = np.random.randn(n_samples, n_features) + regr = NuSVR(C=1.0, nu=0.1) + regr.fit(X, y) + self.assertNotEqual(m2vcl.export_to_vcl(regr), "") + + +if __name__ == '__main__': + unittest.main() diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..c7fd43a --- /dev/null +++ b/tox.ini @@ -0,0 +1,28 @@ +[tox] +envlist = py{36,37,38,39} +minversion = 3.3.0 +isolated_build = true + +[gh-actions] +python = + 3.6: py36 + 3.7: py37 + 3.8: py38 + 3.9: py39 + +[testenv] +deps = + check-manifest >= 0.42 + flake8 + pytest + scikit-learn + numpy +commands = + check-manifest --ignore 'tox.ini,tests/**' + python setup.py check -m -s + flake8 . + py.test tests {posargs} + +[flake8] +exclude = .tox,*.egg,build,data +select = E,W,F \ No newline at end of file