From 1043b9fea5b7a1311874e8c23e961aaa7be46e54 Mon Sep 17 00:00:00 2001 From: Amit Markel Date: Tue, 19 Dec 2023 18:56:31 +0200 Subject: [PATCH 1/5] Specify model type and mark it private and readonly. --- javascript/src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/javascript/src/parser.ts b/javascript/src/parser.ts index cb6a5ea7..38b3e75f 100644 --- a/javascript/src/parser.ts +++ b/javascript/src/parser.ts @@ -19,7 +19,7 @@ */ export class Parser { /** BudouX model data */ - model; + private readonly model: Map>; /** * Constructs a BudouX parser. From 228e297f2c35c755e3e11103094e1d4788c11621 Mon Sep 17 00:00:00 2001 From: Amit Markel Date: Tue, 19 Dec 2023 18:58:32 +0200 Subject: [PATCH 2/5] Move the baseScope variable that is a constant to be class-level and readonly. --- javascript/src/parser.ts | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/javascript/src/parser.ts b/javascript/src/parser.ts index 38b3e75f..6343ece4 100644 --- a/javascript/src/parser.ts +++ b/javascript/src/parser.ts @@ -20,6 +20,7 @@ export class Parser { /** BudouX model data */ private readonly model: Map>; + private readonly baseScore: number; /** * Constructs a BudouX parser. @@ -29,6 +30,12 @@ export class Parser { this.model = new Map( Object.entries(model).map(([k, v]) => [k, new Map(Object.entries(v))]) ); + this.baseScore = + -0.5 * + [...this.model.values()] + .map(group => [...group.values()]) + .flat() + .reduce((prev, curr) => prev + curr, 0); } /** @@ -58,15 +65,9 @@ export class Parser { */ parseBoundaries(sentence: string): number[] { const result = []; - const baseScore = - -0.5 * - [...this.model.values()] - .map(group => [...group.values()]) - .flat() - .reduce((prev, curr) => prev + curr, 0); for (let i = 1; i < sentence.length; i++) { - let score = baseScore; + let score = this.baseScore; /* eslint-disable */ score += this.model.get('UW1')?.get(sentence.substring(i - 3, i - 2)) || 0; score += this.model.get('UW2')?.get(sentence.substring(i - 2, i - 1)) || 0; From 9305a5398fb5bb87c239ee6e371950d5451f58f2 Mon Sep 17 00:00:00 2001 From: Amit Markel Date: Tue, 19 Dec 2023 18:59:56 +0200 Subject: [PATCH 3/5] Bail-out earlier on the score summation loop to improve performance. --- javascript/src/parser.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/javascript/src/parser.ts b/javascript/src/parser.ts index 6343ece4..ce2ae3bc 100644 --- a/javascript/src/parser.ts +++ b/javascript/src/parser.ts @@ -72,12 +72,15 @@ export class Parser { score += this.model.get('UW1')?.get(sentence.substring(i - 3, i - 2)) || 0; score += this.model.get('UW2')?.get(sentence.substring(i - 2, i - 1)) || 0; score += this.model.get('UW3')?.get(sentence.substring(i - 1, i)) || 0; + if (score > 0) { result.push(i); continue; } score += this.model.get('UW4')?.get(sentence.substring(i, i + 1)) || 0; score += this.model.get('UW5')?.get(sentence.substring(i + 1, i + 2)) || 0; score += this.model.get('UW6')?.get(sentence.substring(i + 2, i + 3)) || 0; + if (score > 0) { result.push(i); continue; } score += this.model.get('BW1')?.get(sentence.substring(i - 2, i)) || 0; score += this.model.get('BW2')?.get(sentence.substring(i - 1, i + 1)) || 0; score += this.model.get('BW3')?.get(sentence.substring(i, i + 2)) || 0; + if (score > 0) { result.push(i); continue; } score += this.model.get('TW1')?.get(sentence.substring(i - 3, i)) || 0; score += this.model.get('TW2')?.get(sentence.substring(i - 2, i + 1)) || 0; score += this.model.get('TW3')?.get(sentence.substring(i - 1, i + 2)) || 0; From ca6501bf70223b159d012d2c9cd673e4122b3762 Mon Sep 17 00:00:00 2001 From: Amit Markel Date: Wed, 20 Dec 2023 13:09:53 +0200 Subject: [PATCH 4/5] Revert "Bail-out earlier on the score summation loop to improve performance." This reverts commit 9305a5398fb5bb87c239ee6e371950d5451f58f2. --- javascript/src/parser.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/javascript/src/parser.ts b/javascript/src/parser.ts index ce2ae3bc..6343ece4 100644 --- a/javascript/src/parser.ts +++ b/javascript/src/parser.ts @@ -72,15 +72,12 @@ export class Parser { score += this.model.get('UW1')?.get(sentence.substring(i - 3, i - 2)) || 0; score += this.model.get('UW2')?.get(sentence.substring(i - 2, i - 1)) || 0; score += this.model.get('UW3')?.get(sentence.substring(i - 1, i)) || 0; - if (score > 0) { result.push(i); continue; } score += this.model.get('UW4')?.get(sentence.substring(i, i + 1)) || 0; score += this.model.get('UW5')?.get(sentence.substring(i + 1, i + 2)) || 0; score += this.model.get('UW6')?.get(sentence.substring(i + 2, i + 3)) || 0; - if (score > 0) { result.push(i); continue; } score += this.model.get('BW1')?.get(sentence.substring(i - 2, i)) || 0; score += this.model.get('BW2')?.get(sentence.substring(i - 1, i + 1)) || 0; score += this.model.get('BW3')?.get(sentence.substring(i, i + 2)) || 0; - if (score > 0) { result.push(i); continue; } score += this.model.get('TW1')?.get(sentence.substring(i - 3, i)) || 0; score += this.model.get('TW2')?.get(sentence.substring(i - 2, i + 1)) || 0; score += this.model.get('TW3')?.get(sentence.substring(i - 1, i + 2)) || 0; From 6e0f63e9ccef1bb4058135ffe8258523811b927d Mon Sep 17 00:00:00 2001 From: Amit Markel Date: Wed, 20 Dec 2023 13:12:30 +0200 Subject: [PATCH 5/5] Add comment that score items in models may be negative. --- javascript/src/parser.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/javascript/src/parser.ts b/javascript/src/parser.ts index 6343ece4..93905ecc 100644 --- a/javascript/src/parser.ts +++ b/javascript/src/parser.ts @@ -68,6 +68,7 @@ export class Parser { for (let i = 1; i < sentence.length; i++) { let score = this.baseScore; + // NOTE: Score values in models may be negative. /* eslint-disable */ score += this.model.get('UW1')?.get(sentence.substring(i - 3, i - 2)) || 0; score += this.model.get('UW2')?.get(sentence.substring(i - 2, i - 1)) || 0;