From 874e9efe74ade085372b78ff0b91b24fc2945cae Mon Sep 17 00:00:00 2001 From: Adam Obuchowicz Date: Thu, 29 Feb 2024 15:42:59 +0100 Subject: [PATCH] Add workaround for huge idmap diffs (#9207) Fixes probably #9198 See [this comment](https://github.com/enso-org/enso/issues/9198#issuecomment-1968484269) for justification. TLDR: the diff algorithm is too slow for our huge idmap. The proper fix would be to reduce idmap size. Expect tasks for that soon. --- .vscode/launch.json | 10 ++- .../stores/project/computedValueRegistry.ts | 12 ++- app/gui2/src/stores/project/index.ts | 24 +++--- app/gui2/ydoc-server/__tests__/edits.bench.ts | 60 ++++++++++++++ app/gui2/ydoc-server/edits.ts | 80 ++++++++++++++++++- 5 files changed, 170 insertions(+), 16 deletions(-) create mode 100644 app/gui2/ydoc-server/__tests__/edits.bench.ts diff --git a/.vscode/launch.json b/.vscode/launch.json index f6ae43496c15..1bb51155a541 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -19,6 +19,14 @@ "type": "node", "debugServer": 4711, "request": "attach" + }, + { + "type": "node-terminal", + "name": "Run Script: dev", + "request": "launch", + "command": "npm run dev", + // "env": {"NODE_OPTIONS": "--inspect"}, + "cwd": "${workspaceFolder}/app/gui2" } ] -} +} \ No newline at end of file diff --git a/app/gui2/src/stores/project/computedValueRegistry.ts b/app/gui2/src/stores/project/computedValueRegistry.ts index 5027a0383d98..225192458c73 100644 --- a/app/gui2/src/stores/project/computedValueRegistry.ts +++ b/app/gui2/src/stores/project/computedValueRegistry.ts @@ -44,8 +44,8 @@ export class ComputedValueRegistry { processUpdates(updates: ExpressionUpdate[]) { for (const update of updates) { const info = this.db.get(update.expressionId) - const newInfo = combineInfo(info, update) - this.db.set(update.expressionId, newInfo) + if (info) updateInfo(info, update) + else this.db.set(update.expressionId, combineInfo(undefined, update)) } } @@ -58,6 +58,14 @@ export class ComputedValueRegistry { } } +function updateInfo(info: ExpressionInfo, update: ExpressionUpdate) { + const newInfo = combineInfo(info, update) + if (newInfo.typename !== info.typename) info.typename = newInfo.typename + if (newInfo.methodCall !== info.methodCall) info.methodCall = newInfo.methodCall + if (newInfo.payload !== info.payload) info.payload = newInfo.payload + if (newInfo.profilingInfo !== info.profilingInfo) info.profilingInfo = update.profilingInfo +} + function combineInfo(info: ExpressionInfo | undefined, update: ExpressionUpdate): ExpressionInfo { const isPending = update.payload.type === 'Pending' return { diff --git a/app/gui2/src/stores/project/index.ts b/app/gui2/src/stores/project/index.ts index 65476ae89392..4b15012a948a 100644 --- a/app/gui2/src/stores/project/index.ts +++ b/app/gui2/src/stores/project/index.ts @@ -608,18 +608,20 @@ export const useProjectStore = defineStore('project', () => { } const dataflowErrors = new ReactiveMapping(computedValueRegistry.db, (id, info) => { - if (info.payload.type !== 'DataflowError') return - const data = useVisualizationData( - ref({ - expressionId: id, - visualizationModule: 'Standard.Visualization.Preprocessor', - expression: { - module: 'Standard.Visualization.Preprocessor', - definedOnType: 'Standard.Visualization.Preprocessor', - name: 'error_preprocessor', - }, - }), + const config = computed(() => + info.payload.type === 'DataflowError' + ? { + expressionId: id, + visualizationModule: 'Standard.Visualization.Preprocessor', + expression: { + module: 'Standard.Visualization.Preprocessor', + definedOnType: 'Standard.Visualization.Preprocessor', + name: 'error_preprocessor', + }, + } + : null, ) + const data = useVisualizationData(config) return computed<{ kind: 'Dataflow'; message: string } | undefined>(() => { const visResult = data.value if (!visResult) return diff --git a/app/gui2/ydoc-server/__tests__/edits.bench.ts b/app/gui2/ydoc-server/__tests__/edits.bench.ts new file mode 100644 index 000000000000..bd379e196b21 --- /dev/null +++ b/app/gui2/ydoc-server/__tests__/edits.bench.ts @@ -0,0 +1,60 @@ +import diff from 'fast-diff' +import { uuidv4 } from 'lib0/random.js' +import { bench, describe } from 'vitest' +import { stupidFastDiff } from '../edits' + +describe('Diff algorithm benchmarks', () => { + let oldString = '' + let newString = '' + + function initializeStrings(length: number) { + // simulating metadata list: + oldString = '' + let i = 0 + while (oldString.length < length / 2) { + oldString += `[{"index":{"value":${i}},"size":{"value":4}},"${uuidv4()}"]` + i += 1 + } + i = 1 + newString = '' + while (newString.length < length / 2) { + newString += `[{"index":{"value":${i}},"size":{"value":5}},"${uuidv4()}"]` + } + } + + function diffBenchmark(length: number) { + bench( + `Diffing ${length}`, + () => { + diff(oldString, newString) + }, + { setup: () => initializeStrings(length), warmupIterations: 1, iterations: 1 }, + ) + } + + function stupidFastDiffBenchmark(length: number) { + bench( + `Fast Diffing ${length}`, + () => { + stupidFastDiff(oldString, newString) + }, + { setup: () => initializeStrings(length), warmupIterations: 1, iterations: 1 }, + ) + } + + diffBenchmark(10000) + diffBenchmark(20000) + diffBenchmark(30000) + diffBenchmark(40000) + diffBenchmark(50000) + // These are too slow for every-day benchmark run + // diffBenchmark(60000) + // diffBenchmark(70000) + // diffBenchmark(80000) + // diffBenchmark(90000) + // diffBenchmark(100000) + // diffBenchmark(250000) + + stupidFastDiffBenchmark(250000) + stupidFastDiffBenchmark(2500000) +}) diff --git a/app/gui2/ydoc-server/edits.ts b/app/gui2/ydoc-server/edits.ts index d07f1c8a5cb5..04ca66cdd77f 100644 --- a/app/gui2/ydoc-server/edits.ts +++ b/app/gui2/ydoc-server/edits.ts @@ -12,6 +12,29 @@ import { assert } from '../shared/util/assert' import { IdMap, ModuleDoc, type VisualizationMetadata } from '../shared/yjsModel' import * as fileFormat from './fileFormat' +/** + * The simulated metadata of this size takes c.a. 1 second on my machine. It should be quite + * bearable, even on slower machines. + * + * Full benchmark results (from edits.bench.ts): + * name hz min max mean p75 p99 p995 p999 rme samples + * · Diffing 10000 8.7370 108.66 132.93 114.46 111.73 132.93 132.93 132.93 ±11.28% 5 + * · Diffing 15000 4.0483 239.82 257.99 247.02 257.99 257.99 257.99 257.99 ±9.71% 3 + * · Diffing 20000 2.1577 462.40 464.52 463.46 464.52 464.52 464.52 464.52 ±2.90% 2 + * · Diffing 25000 1.3744 727.61 727.61 727.61 727.61 727.61 727.61 727.61 ±0.00% 1 + * · Diffing 30000 0.9850 1,015.25 1,015.25 1,015.25 1,015.25 1,015.25 1,015.25 1,015.25 ±0.00% 1 + * · Diffing 35000 0.6934 1,442.27 1,442.27 1,442.27 1,442.27 1,442.27 1,442.27 1,442.27 ±0.00% 1 + * · Diffing 40000 0.5141 1,945.24 1,945.24 1,945.24 1,945.24 1,945.24 1,945.24 1,945.24 ±0.00% 1 + * · Diffing 50000 0.3315 3,016.59 3,016.59 3,016.59 3,016.59 3,016.59 3,016.59 3,016.59 ±0.00% 1 + * · Diffing 60000 0.2270 4,405.46 4,405.46 4,405.46 4,405.46 4,405.46 4,405.46 4,405.46 ±0.00% 1 + * · Diffing 70000 0.1602 6,240.52 6,240.52 6,240.52 6,240.52 6,240.52 6,240.52 6,240.52 ±0.00% 1 + * · Diffing 80000 0.1233 8,110.54 8,110.54 8,110.54 8,110.54 8,110.54 8,110.54 8,110.54 ±0.00% 1 + * · Diffing 90000 0.0954 10,481.47 10,481.47 10,481.47 10,481.47 10,481.47 10,481.47 10,481.47 ±0.00% 1 + * · Diffing 100000 0.0788 12,683.46 12,683.46 12,683.46 12,683.46 12,683.46 12,683.46 12,683.46 ±0.00% 1 + * · Diffing 250000 0.0107 93,253.97 93,253.97 93,253.97 93,253.97 93,253.97 93,253.97 93,253.97 ±0.00% 1 + */ +const MAX_SIZE_FOR_NORMAL_DIFF = 30000 + interface AppliedUpdates { newCode: string | undefined newIdMap: IdMap | undefined @@ -116,12 +139,45 @@ export function translateVisualizationFromFile( } } +/** + * A simplified diff algorithm. + * + * The `fast-diff` package uses Myers' https://neil.fraser.name/writing/diff/myers.pdf with some + * optimizations to generate minimal diff. Unfortunately, event this algorithm is still to slow + * for our metadata. Therefore we need to use faster algorithm which will not produce theoretically + * minimal diff. + * + * This is quick implementation making diff which just replaces entire string except common prefix + * and suffix. + */ +export function stupidFastDiff(oldString: string, newString: string): diff.Diff[] { + const minLength = Math.min(oldString.length, newString.length) + let commonPrefixLen, commonSuffixLen + for (commonPrefixLen = 0; commonPrefixLen < minLength; ++commonPrefixLen) + if (oldString[commonPrefixLen] !== newString[commonPrefixLen]) break + if (oldString.length === newString.length && oldString.length === commonPrefixLen) + return [[0, oldString]] + for (commonSuffixLen = 0; commonSuffixLen < minLength - commonPrefixLen; ++commonSuffixLen) + if (oldString.at(-1 - commonSuffixLen) !== newString.at(-1 - commonSuffixLen)) break + const commonPrefix = oldString.substring(0, commonPrefixLen) + const removed = oldString.substring(commonPrefixLen, oldString.length - commonSuffixLen) + const added = newString.substring(commonPrefixLen, newString.length - commonSuffixLen) + const commonSuffix = oldString.substring(oldString.length - commonSuffixLen, oldString.length) + return (commonPrefix ? ([[0, commonPrefix]] as diff.Diff[]) : []) + .concat(removed ? [[-1, removed]] : []) + .concat(added ? [[1, added]] : []) + .concat(commonSuffix ? [[0, commonSuffix]] : []) +} + export function applyDiffAsTextEdits( lineOffset: number, oldString: string, newString: string, ): TextEdit[] { - const changes = diff(oldString, newString) + const changes = + oldString.length + newString.length > MAX_SIZE_FOR_NORMAL_DIFF + ? stupidFastDiff(oldString, newString) + : diff(oldString, newString) let newIndex = 0 let lineNum = lineOffset let lineStartIdx = 0 @@ -171,7 +227,8 @@ export function prettyPrintDiff(from: string, to: string): string { const colRed = '\x1b[31m' const colGreen = '\x1b[32m' - const diffs = diff(from, to) + const diffs = + from.length + to.length > MAX_SIZE_FOR_NORMAL_DIFF ? stupidFastDiff(from, to) : diff(from, to) if (diffs.length === 1 && diffs[0]![0] === 0) return 'No changes' let content = '' for (let i = 0; i < diffs.length; i++) { @@ -201,3 +258,22 @@ export function prettyPrintDiff(from: string, to: string): string { content += colReset return content } + +if (import.meta.vitest) { + const { test, expect, bench, describe } = import.meta.vitest + + test.each` + oldStr | newStr | expected + ${''} | ${'foo'} | ${[[1, 'foo']]} + ${'foo'} | ${''} | ${[[-1, 'foo']]} + ${'foo'} | ${'foo'} | ${[[0, 'foo']]} + ${'foo'} | ${'bar'} | ${[[-1, 'foo'], [1, 'bar']]} + ${'ababx'} | ${'acacx'} | ${[[0, 'a'], [-1, 'bab'], [1, 'cac'], [0, 'x']]} + ${'ax'} | ${'acacx'} | ${[[0, 'a'], [1, 'cac'], [0, 'x']]} + ${'ababx'} | ${'ax'} | ${[[0, 'a'], [-1, 'bab'], [0, 'x']]} + ${'ababx'} | ${'abacax'} | ${[[0, 'aba'], [-1, 'b'], [1, 'ca'], [0, 'x']]} + ${'axxxa'} | ${'a'} | ${[[0, 'a'], [-1, 'xxxa']]} + `('Stupid diff of $oldStr and $newStr', ({ oldStr, newStr, expected }) => { + expect(stupidFastDiff(oldStr, newStr)).toEqual(expected) + }) +}