From d18a07a6cc6ecc06ba756c08a929cb0df1710854 Mon Sep 17 00:00:00 2001 From: Stefan Zeiger Date: Tue, 30 Nov 2021 18:27:04 +0100 Subject: [PATCH] Deduplicate strings in the parser (#137) This leads to slower results in ParseBenchmark (~ 5%) but with parsing taking only 10% of the total time in the main benchmark, the potential for performance regressions is severely limited. We see an improvement of overall benchmark times, both single-threaded and multi-threaded with a shared parse cache, in the main benchmark. --- sjsonnet/src/sjsonnet/Parser.scala | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sjsonnet/src/sjsonnet/Parser.scala b/sjsonnet/src/sjsonnet/Parser.scala index c0c3c940..9fc8933d 100644 --- a/sjsonnet/src/sjsonnet/Parser.scala +++ b/sjsonnet/src/sjsonnet/Parser.scala @@ -49,6 +49,8 @@ class Parser(val currentFile: Path) { private val fileScope = new FileScope(currentFile) + private val strings = new mutable.HashMap[String, String] + def Pos[_: P]: P[Position] = Index.map(offset => new Position(fileScope, offset)) def id[_: P] = P( @@ -251,7 +253,12 @@ class Parser(val currentFile: Path) { } ) - def constructString(pos: Position, lines: Seq[String]) = Val.Str(pos, lines.mkString) + def constructString(pos: Position, lines: Seq[String]) = { + val s = lines.mkString + val unique = strings.getOrElseUpdate(s, s) + Val.Str(pos, unique) + } + // Any `expr` that isn't naively left-recursive def expr2[_: P]: P[Expr] = P( Pos.flatMapX{ pos =>