[WIP] optimisation experiments #65

rom1504 · 2016-03-08T15:42:47Z

somehow eval("produceArgs(typeArgs)") brings a 1.5x improvement

rom1504 · 2016-03-08T15:45:17Z

Actually I can't see any noticeable improvement in the benchmark with that change.

rom1504 · 2016-03-08T15:54:36Z

I think actually generating an optimized code for each datatype would be interesting.

edit: I mean mostly the extended types.

rom1504 · 2016-03-10T15:31:51Z

Currently testing with:

var Benchmark = require('benchmark');
var ProtoDef = require("protodef").ProtoDef;
var readi8=require("./dist/datatypes/numeric")["i8"][0];

var proto=new ProtoDef();

var buf=new Buffer([0x3d]);
//var v=proto.types.i8[0];
var suite = new Benchmark.Suite;


suite
  .add('readInt8', function() {
    buf.readInt8(0);
  })
  .add('protodef read i8', function() {
    proto.read(buf,0,"i8");
  })
  .add('protodef parsePacketBuffer i8', function() {
    proto.parsePacketBuffer("i8",buf);
  })
  .add('protodef read i8 raw', function() {
    readi8(buf,0);
  })
  .add('protodef read i8 types', function() {
    proto.readI8(buf,0);
    //v(buf,0);
  })
  .on('cycle', function(event) {
    console.log(String(event.target));
  })
  .run({ 'async': false });

See #28 for details.

rom1504 · 2016-03-10T15:48:44Z

Next: optimizing containers

Bench:

var Benchmark = require('benchmark');
var ProtoDef = require("protodef").ProtoDef;
var proto=new ProtoDef();

var buf=new Buffer([0x1d,0x2d,0x3d,0x4d]);
var suite = new Benchmark.Suite;


suite
  .add('readContainer', function() {
    return  {
      value: {
        a: buf.readInt8(0),
        b: buf.readInt8(1),
        c: buf.readInt8(2),
        d: buf.readInt8(3)
      },
      size:4
    };
  })
  .add('protodef readContainer', function() {
    proto.readContainer(buf,0,[
      {
        "name":"a",
        "type":"i8"
      },
      {
        "name":"b",
        "type":"i8"
      },
      {
        "name":"c",
        "type":"i8"
      },
      {
        "name":"d",
        "type":"i8"
      }
    ],{});
  })
  .on('cycle', function(event) {
    console.log(String(event.target));
  })
  .run({ 'async': false });

Current results:

readContainer x 21,070,919 ops/sec ±5.89% (69 runs sampled)
protodef readContainer x 341,468 ops/sec ±7.33% (71 runs sampled)

Pretty bad.

Plan: doing addType and remove the need to pass typeArgs around. (see #28 (comment) )

rom1504 · 2016-03-10T15:53:50Z

var Benchmark = require('benchmark');
var ProtoDef = require("protodef").ProtoDef;
var proto=new ProtoDef();

var buf=new Buffer([0x1d,0x2d,0x3d,0x4d]);
var suite = new Benchmark.Suite;

proto.addType("myContainer",["container",[
  {
    "name":"a",
    "type":"i8"
  },
  {
    "name":"b",
    "type":"i8"
  },
  {
    "name":"c",
    "type":"i8"
  },
  {
    "name":"d",
    "type":"i8"
  }
]]);


suite
  .add('readContainer', function() {
    return  {
      value: {
        a: buf.readInt8(0),
        b: buf.readInt8(1),
        c: buf.readInt8(2),
        d: buf.readInt8(3)
      },
      size:4
    };
  })
  .add('protodef readContainer', function() {
    proto.readMyContainer(buf,0,{},{});
  })
  .on('cycle', function(event) {
    console.log(String(event.target));
  })
  .run({ 'async': false });

Current perf by adding a custom type are even worse:

readContainer x 24,837,841 ops/sec ±4.22% (81 runs sampled)
protodef readContainer x 105,848 ops/sec ±6.75% (69 runs sampled)

rom1504 · 2016-03-10T16:46:37Z

function myCustomContainer(buffer, offset, typeArgs, context){
  var size=0;
  var value={};

  var result;
  result=proto.readI8(buffer,offset+size);
  value["a"]=result.value;
  size+=result.size;
  result=proto.readI8(buffer,offset+size);
  value["b"]=result.value;
  size+=result.size;
  result=proto.readI8(buffer,offset+size);
  value["c"]=result.value;
  size+=result.size;
  result=proto.readI8(buffer,offset+size);
  value["d"]=result.value;
  size+=result.size;

  return {
    value:value,
    size:size
  }
}

is really fast.

Even faster than the baseline for some reason. (30M ops/sec)

Edit: ah the reason why it was faster is because I didn't enable the noAssert option in the baseline. Anyway, pretty good, now to generate this.

rom1504 · 2016-03-10T18:54:21Z

function containerReaderBuild(typeArgs)
{
  var s="function o(proto){";
  s+="return function generated_(buffer, offset){\n";
  s+="var size=0;\n";
  s+="var value={};\n";
  s+="var result;\n";
  typeArgs.forEach(o => {
    s+="result=proto.read"+capitalizeFirstLetter(o.type)+"(buffer,offset+size);\n";
    s+='value["'+o.name+'"]=result.value;'+"\n";
    s+='size+=result.size;'+"\n";
  });
  s+="return {value:value,size:size}\n";
  s+="}\n";
  s+="}\n";
  s+="o;";
  return eval(s);
}

var generated=containerReaderBuild(types)(proto);

console.log(generated(buf,0));

works nicely:

readContainer x 46,689,187 ops/sec ±0.75% (88 runs sampled)
protodef myCustomContainer x 36,781,035 ops/sec ±1.16% (89 runs sampled)
protodef generated container x 37,217,551 ops/sec ±0.59% (92 runs sampled)
protodef readContainer x 140,785 ops/sec ±1.75% (83 runs sampled)

rom1504 · 2016-03-10T18:55:12Z

Apparently http://sweetjs.org/ is a good lib for generating js.

rom1504 · 2016-03-10T18:56:42Z

Ah something I learned here : using this. or using eval with a global variable in the code makes things slow. (that's why I have that o(proto) function there)

roblabla · 2016-03-10T21:49:36Z

Sweetjs is a great idea, but I'm having a hard time understanding how to implement it.

Oh and here's a rewritten version using es6 goodies :D

const containerReaderBuild = (typeArgs) => eval(`(function (proto) {
  function generate(buffer, offset) {
    var size=0;
    var value={};
    var result;
    ${typeArgs.reduce((old, o) => old + `
    result = proto.read${capitalizeFirstLetter(o.type)}(buffer, offset + size);
    value['${o.name}'] = result.value;
    size += result.size;
    `, "")}
    return {value:value,size:size};
  }
});`);
const generated = containerReaderBuild(types)(proto);
console.log(generated(buf,0));

rom1504 · 2016-03-10T21:56:50Z

Yeah using that es6 syntax makes that code look much better.
My version was just a first approach, trying to figure out if it improves speed. And it does, so I'm going to make it actually work for the full container thingy next. (that includes extendType and stuff)
And then trying to figure out how to make the code looks ok. Using es6 might be enough :)

Edit: I don't know how exactly would sweetjs be used here either, it's just something that's been advised to me. I might try to figure that out later.

rom1504 · 2016-03-10T23:42:21Z

Adding back the context destroys the perf. (/20) Got to figure out a way to add it back and keep good perf.

rom1504 · 2016-03-11T00:18:55Z

This is only 2 times slower than the manual version (that doesn't have context handling !):

const containerReaderBuild = (typeArgs) => eval(
  `((proto) => {
  return (buffer, offset, context) => {
    var size=0;
    var value={};
    var value2={};
    value[".."]=context;
    var result;
    ${typeArgs.reduce((old, o) =>  old + `
    result = proto.read${capitalizeFirstLetter(o.type)}(buffer, offset + size);
    ${o.anon
    ? `if(result.value !== undefined) 
    Object.keys(result.value).forEach(key => value2[key]=value[key] = result[key]);`
    : `value2['${o.name}'] = value['${o.name}'] = result.value;`
    }
    size += result.size;
    `, "")}
    return {value:value2,size:size};
  }
});`);
const generated = containerReaderBuild(types)(proto);
console.log(generated(buf,0));

I guess it can be ok.

rom1504 · 2016-03-11T00:22:31Z

I might consider trying to predict at compile time whether context handling is necessary or not. (It's possible if some meta information is added with each type : for example numerical types don't need context)

roblabla · 2016-03-11T01:39:24Z

No need for meta information : you can get the number of argument a function takes by using Function.prototype.length. For instance, numeric.readInt.length should return 3, whereas structures.containers.length should return 5. This should be enough to guess whether context is needed or not.

rom1504 · 2016-03-11T01:47:03Z

Oh yeah indeed.

roblabla · 2016-03-11T01:50:58Z

Concerning context, what we have right now is a poorly obtimized tree in the form of nested hash tables. That obviously sucks performance wise. It's also a terrible over-engineering on my end.

Thinking back, there is a simple and elegant solution that should be implementable. The idea is to only one context object created at read. Instead of creating a new context, container gives the original context and a namespace key.

readContainer(buffer, offset, typeArgs, context, ns) {
    typeArgs.forEach(({type,name,anon}) => {
    tryDoc(() => {
      var readResults = this.read(buffer, offset, type, context, ns + "." + name);
      results.size += readResults.size;
      offset += readResults.size;
      results.value[name] = readResults.value;
    }, name ? name : "unknown");
  });
  return results;
}

This design isn't ideal because it adds an additional arg. I'm thinking maybe using es6 "Map" and having those who need to add a namespace somehow override their prototype or somth. But the basic idea is there.

rom1504 · 2016-03-11T01:58:15Z

context, ns + ".name"

How can this work ? The this.read called need the values read in that container: they need to be added to the context.

roblabla · 2016-03-11T02:14:28Z

fail. var readResults = this.read(buffer, offset, type, context, ns + "." + name); was what I meant.

rom1504 · 2016-03-11T02:26:46Z

Yeah but that's not my problem. What does context contain? It's currently filled in readContainer and used in stuff called by readContainer (for example the compareTo of switch or the countBy of array).
Where would the context values come from/be added in your proposition?

roblabla · 2016-03-11T06:53:10Z

This is why you shouldn't code at 3am.

readContainer(buffer, offset, typeArgs, context, ns) {
    typeArgs.forEach(({type,name,anon}) => {
    tryDoc(() => {
      var readResults = this.read(buffer, offset, type, context, `${ns}.${name}`);
      results.size += readResults.size;
      offset += readResults.size;
      results.value[name] = readResults.value;
      context[`${ns}.${name}`] = readResults.value;
    }, name ? name : "unknown");
  });
  return results;
}

this does introduce some complexity with regards to anon (which I conveniently left out here).

roblabla · 2016-03-11T06:58:05Z

Well looking back, context is implemented as a hack in readContainer (basically hijacking the return result) that should be fairly optimal. Durr 😅 . So I was wondering, what exactly did you add back to destroy the perfs ? The results[".."] = / delete results[".."] ?

rom1504 · 2016-03-11T12:51:32Z

yes results[".."] = / delete results[".."]
For some reason that makes the perf 20x slower. I guess it's about some kind of v8 deoptimization.

Having a second object to store the results, which doesn't contain ".." and isn't passed around in the this.read makes things only 2x slower. I believe that's because it's "more pure" that way, less potential side effect which v8 optimizer would need to take into account (passing the actual value object in the context as we currently do would allow this.read functions to change the value object. We don't need that)

Indeed doing context[``${ns}.${name}``] = readResults.value; probably works too. And it's probably cleaner than having to mess with ...

rom1504 · 2016-03-14T15:58:43Z

Getting the same result as the manual one with:

const containerReaderBuild = (typeArgs) => {
  const requireContext=typeArgs.filter(o => proto[`read${capitalizeFirstLetter(o.type)}`].length==3).length>0;
  return eval(`((proto) => {
    return (buffer, offset${requireContext ? `,context`:``}) => {
      var size=0;
      var value2={};
      ${requireContext ? `
      var value={};
      value[".."]=context;
      ` :``}
      var result;
      ${typeArgs.reduce((old, o) =>  old + `
      result = proto.read${capitalizeFirstLetter(o.type)}(buffer, offset + size${requireContext ? `,value`:``});
      ${o.anon
    ? `if(result.value !== undefined)
      Object.keys(result.value).forEach(key => ${requireContext ? `value[key]=` : ``}value2[key] = result[key]);`
    : `${requireContext ? `value['${o.name}'] =` : ``} value2['${o.name}'] = result.value;`
    }
      size += result.size;
      `, "")}
      return {value:value2,size:size};
    }
  });`);
};
const generated = containerReaderBuild(types)(proto);
console.log(generated(buf,0));

readContainer x 14,898,324 ops/sec ±5.78% (78 runs sampled)
protodef generated container x 14,793,789 ops/sec ±3.42% (84 runs sampled)

rom1504 · 2016-03-14T15:59:46Z

It's not very pretty but I think that's not really a problem, and the code look can be improved if needed.

rom1504 · 2016-03-17T12:46:29Z

interesting here : https://github.com/mafintosh/is-my-json-valid is the second fastest json schema validator and it also uses code generation. The way it generates js might be interesting

rom1504 · 2016-03-17T12:49:17Z

For example https://www.npmjs.com/package/generate-function which https://www.npmjs.com/package/protocol-buffers uses.

rom1504 · 2016-03-20T12:49:05Z

An idea : the current master is a protodef format interpreter. What I'm trying to build here is a protodef format compiler. It might be interesting to keep both (in separate repos I guess). So the interpreter code is more readable and it's possible to check an interpretation against it more easily.

rom1504 · 2016-04-11T12:46:35Z

apparently mineflayer tends to consome 20% cpu of big servers, and having a proxy with 20 clients tends to get lagging.

I'll try to advance on this so everything using protodef can be faster.

This reverts commit 085d509.

…0x improvement) numerical datatypes, add read_<packet> functions as part of that optimization

…o extendTypes

rom1504 · 2016-04-14T18:22:06Z

https://gist.github.com/hansihe/b5b1754e2447c50680fc example of code produced by elixir-protodef

rom1504 · 2016-04-14T18:42:35Z

consider generating a single function for a given type to eliminate function calls completely and also eliminate the context object.
(this is what elixir-protodef do)

rom1504 · 2016-04-14T19:22:36Z

maybe I should just start from scratch and see if there's some convenient estree builder module I could use.

rom1504 · 2016-04-14T19:30:37Z

https://github.com/davidbonnet/astring could be interesting

rom1504 · 2016-04-16T08:20:53Z

Using https://github.com/babel/babel/blob/master/packages/babel-generator/README.md seems like a good idea to go at it.

I also considered using a more appropriate language to do json -> language X but for example ocaml doesn't seem to have an estree generator, only an spidermonkey ast one, so I'm not sure if it's really worth it.

There's the possibility of doing it in elixir but I don't know if elixir is that adapted to do that kind of things in a general way.

rom1504 · 2016-04-16T08:56:57Z

After some investigation, this is what babel-register use to compile at runtime, https://github.com/babel/babel/blob/4c371132ae7321f6d08567eab54a59049e07f246/packages/babel-register/src/node.js#L102

The m._compile seems to be doing the code -> runnable thing transformation. I believe it's part of node. Going to investigate this a bit more. Maybe there's something better than eval.

https://github.com/nodejs/node/blob/7c9a691ee7d13ccf1c883e27fdb96eccc8b73d5e/test/sequential/test-module-loading.js

https://github.com/nodejs/node/blob/b488b19eaf2b2e7a3ca5eccd2445e245847a5f76/lib/module.js#L464

Ok so it seems all require does is call vm.runInThisContext , nothing much better than eval, https://github.com/nodejs/node/blob/b488b19eaf2b2e7a3ca5eccd2445e245847a5f76/lib/module.js#L500

https://nodejs.org/api/vm.html#vm_vm_runinthiscontext_code_options

Ok so my conclusion from these investigations is there is really nothing wrong in eval'ing code since that's what require do.

rom1504 · 2016-04-16T09:47:46Z

Next step : looking into babel helpers and their visitor pattern and see if they are useful only for transforming estree to estree or if they could be useful here.

Babel-template can be interesting maybe : it's used in some of babel code to turn an es6 string into estree.

Example : https://github.com/babel/babel/blob/master/packages/babel-helpers/src/helpers.js

rom1504 · 2016-04-19T13:02:00Z

Simple example of babel-generator usage:

const generate=require('babel-generator');
const t=require('babel-types');

var ifStatement = t.ifStatement(
  t.stringLiteral("top cond"),
  t.whileStatement(
    t.stringLiteral("while cond"),
    t.ifStatement(
      t.stringLiteral("nested"),
      t.expressionStatement(t.numericLiteral(1))
    )
  ),
  t.expressionStatement(t.stringLiteral("alt"))
);

console.log(generate.default(ifStatement).code);

…marks, tests. Pass test + 10x read perf improvement

rom1504 · 2016-04-20T15:37:09Z

Got a 10x perf improvement in reading.
Next:

generate all the code of a type in one function like elixir-protodef to eliminate the context object : should give an additional 10x boost (and get the perf to the same level as manually written code)
make sure it works in all cases (add some more tests) : in particular for nexted types
see how to get back the nice errors. It might be possible to put the error context directly in the throw to avoid doing any try/catch
write the write and sizeOf generators
get rid of the interpreting methods and make sure the api makes sense / update dependents

rom1504 · 2016-04-20T15:41:21Z

To put the code in one function, the variables need unique names. try to follow something like https://gist.github.com/hansihe/b5b1754e2447c50680fc

rom1504 · 2017-03-03T12:07:13Z

https://gist.github.com/roblabla/0cea75cb4e4f6ab7fe27f1929f9457f5

rom1504 · 2017-03-03T12:40:08Z

idea to get things more concrete :

take the ProtoDef tests (https://github.com/ProtoDef-io/ProtoDef/tree/master/test)
manually write perfect js code to read these types
write a compiler to generate the same thing
possibly add more tests to have more code examples

rom1504 · 2019-08-24T16:29:21Z

I may work on this again someday, for now closing

rom1504 force-pushed the optimisation_experiments branch from 21aa8bf to 950e6b6 Compare March 10, 2016 15:35

rom1504 added 5 commits April 14, 2016 15:36

somehow eval("produceArgs(typeArgs)") brings a 1.5x improvement

a458782

Revert "somehow eval("produceArgs(typeArgs)") brings a 1.5x improvement"

bd2d9dc

This reverts commit 085d509.

fix some errors related to PartialReadError, and optimize (a lot : 10…

1c7f8b5

…0x improvement) numerical datatypes, add read_<packet> functions as part of that optimization

WIP

3362c6b

more WIP : readContainerGenerator and a little bit of integrating int…

a774cb4

…o extendTypes

rom1504 force-pushed the optimisation_experiments branch from 950e6b6 to a774cb4 Compare April 14, 2016 16:27

add subtypes handling in container generator

da7a4ce

first pass of generation : read (not everything general), adapt bench…

a193dd5

…marks, tests. Pass test + 10x read perf improvement

rom1504 closed this Aug 24, 2019

rom1504 mentioned this pull request May 9, 2020

consider implementing a compiler in js again #86

Closed

[WIP] optimisation experiments #65

[WIP] optimisation experiments #65

Conversation

rom1504 commented Mar 8, 2016

rom1504 commented Mar 8, 2016

rom1504 commented Mar 8, 2016

rom1504 commented Mar 10, 2016

rom1504 commented Mar 10, 2016

rom1504 commented Mar 10, 2016

rom1504 commented Mar 10, 2016

rom1504 commented Mar 10, 2016

rom1504 commented Mar 10, 2016

rom1504 commented Mar 10, 2016

roblabla commented Mar 10, 2016

rom1504 commented Mar 10, 2016

rom1504 commented Mar 10, 2016

rom1504 commented Mar 11, 2016

rom1504 commented Mar 11, 2016

roblabla commented Mar 11, 2016

rom1504 commented Mar 11, 2016

roblabla commented Mar 11, 2016

rom1504 commented Mar 11, 2016

roblabla commented Mar 11, 2016

rom1504 commented Mar 11, 2016

roblabla commented Mar 11, 2016

roblabla commented Mar 11, 2016

rom1504 commented Mar 11, 2016

rom1504 commented Mar 14, 2016

rom1504 commented Mar 14, 2016

rom1504 commented Mar 17, 2016

rom1504 commented Mar 17, 2016

rom1504 commented Mar 20, 2016

rom1504 commented Apr 11, 2016

rom1504 commented Apr 14, 2016

rom1504 commented Apr 14, 2016

rom1504 commented Apr 14, 2016

rom1504 commented Apr 14, 2016

rom1504 commented Apr 16, 2016

rom1504 commented Apr 16, 2016 • edited Loading

rom1504 commented Apr 16, 2016 • edited Loading

rom1504 commented Apr 19, 2016

rom1504 commented Apr 20, 2016

rom1504 commented Apr 20, 2016

rom1504 commented Mar 3, 2017

rom1504 commented Mar 3, 2017 • edited Loading

rom1504 commented Aug 24, 2019

rom1504 commented Apr 16, 2016 •

edited

Loading

rom1504 commented Apr 16, 2016 •

edited

Loading

rom1504 commented Mar 3, 2017 •

edited

Loading