From de2b05a912d914b44bc8540f8cd6309b34bc1c08 Mon Sep 17 00:00:00 2001 From: Shuhei Iitsuka Date: Mon, 24 Oct 2022 15:32:23 +0900 Subject: [PATCH 1/2] Remove P features from JS module --- demo/package-lock.json | 2 +- javascript/package-lock.json | 4 ++-- javascript/package.json | 2 +- javascript/src/cli.ts | 2 +- javascript/src/parser.ts | 36 ++--------------------------- javascript/src/tests/test_parser.ts | 26 ++------------------- 6 files changed, 9 insertions(+), 63 deletions(-) diff --git a/demo/package-lock.json b/demo/package-lock.json index 9788551e..3825469d 100644 --- a/demo/package-lock.json +++ b/demo/package-lock.json @@ -21,7 +21,7 @@ }, "../javascript": { "name": "budoux", - "version": "0.2.0", + "version": "0.2.1", "license": "Apache-2.0", "dependencies": { "commander": "^9.3.0", diff --git a/javascript/package-lock.json b/javascript/package-lock.json index ee866089..16014bb0 100644 --- a/javascript/package-lock.json +++ b/javascript/package-lock.json @@ -1,12 +1,12 @@ { "name": "budoux", - "version": "0.2.0", + "version": "0.2.1", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "budoux", - "version": "0.2.0", + "version": "0.2.1", "license": "Apache-2.0", "dependencies": { "commander": "^9.3.0", diff --git a/javascript/package.json b/javascript/package.json index 00960aa5..4691ec8f 100644 --- a/javascript/package.json +++ b/javascript/package.json @@ -1,6 +1,6 @@ { "name": "budoux", - "version": "0.2.0", + "version": "0.2.1", "description": "A small chunk segmenter.", "repository": { "type": "git", diff --git a/javascript/src/cli.ts b/javascript/src/cli.ts index b552d17e..92d57826 100644 --- a/javascript/src/cli.ts +++ b/javascript/src/cli.ts @@ -20,7 +20,7 @@ import * as readline from 'readline'; import {Command} from 'commander'; import {Parser, loadDefaultJapaneseParser} from './parser.js'; -const CLI_VERSION = '0.2.0'; +const CLI_VERSION = '0.2.1'; /** * Run the command line interface program. diff --git a/javascript/src/parser.ts b/javascript/src/parser.ts index 130a03bd..5413122f 100644 --- a/javascript/src/parser.ts +++ b/javascript/src/parser.ts @@ -59,9 +59,6 @@ export class Parser { * @param w4 The character right after the break point. * @param w5 The character 2 characters after the break point. * @param w6 The character 3 characters after the break point. - * @param p1 The result 3 steps ago. - * @param p2 The result 2 steps ago. - * @param p3 The last result. * @returns A feature to be consumed by a classifier. */ static getFeature( @@ -70,10 +67,7 @@ export class Parser { w3: string, w4: string, w5: string, - w6: string, - p1: string, - p2: string, - p3: string + w6: string ) { const b1 = Parser.getUnicodeBlockFeature(w1); const b2 = Parser.getUnicodeBlockFeature(w2); @@ -82,11 +76,6 @@ export class Parser { const b5 = Parser.getUnicodeBlockFeature(w5); const b6 = Parser.getUnicodeBlockFeature(w6); const rawFeature = { - UP1: p1, - UP2: p2, - UP3: p3, - BP1: p1 + p2, - BP2: p2 + p3, UW1: w1, UW2: w2, UW3: w3, @@ -113,17 +102,6 @@ export class Parser { TB2: b2 + b3 + b4, TB3: b3 + b4 + b5, TB4: b4 + b5 + b6, - UQ1: p1 + b1, - UQ2: p2 + b2, - UQ3: p3 + b3, - BQ1: p2 + b2 + b3, - BQ2: p2 + b3 + b4, - BQ3: p3 + b2 + b3, - BQ4: p3 + b3 + b4, - TQ1: p2 + b1 + b2 + b3, - TQ2: p2 + b2 + b3 + b4, - TQ3: p3 + b1 + b2 + b3, - TQ4: p3 + b2 + b3 + b4, }; return Object.entries(rawFeature) .filter(entry => !entry[1].includes(INVALID)) @@ -151,9 +129,6 @@ export class Parser { */ parse(sentence: string) { if (sentence === '') return []; - let p1 = 'U'; - let p2 = 'U'; - let p3 = 'U'; const result = [sentence[0]]; const baseScore = -sum([...this.model.values()]); @@ -164,19 +139,12 @@ export class Parser { sentence[i - 1], sentence[i], sentence[i + 1] || INVALID, - sentence[i + 2] || INVALID, - p1, - p2, - p3 + sentence[i + 2] || INVALID ); const score = baseScore + 2 * sum(feature.map(f => this.model.get(f) || 0)); - const p = score > 0 ? 'B' : 'O'; if (score > 0) result.push(''); result[result.length - 1] += sentence[i]; - p1 = p2; - p2 = p3; - p3 = p; } return result; } diff --git a/javascript/src/tests/test_parser.ts b/javascript/src/tests/test_parser.ts index 245ad3de..dd16bf97 100644 --- a/javascript/src/tests/test_parser.ts +++ b/javascript/src/tests/test_parser.ts @@ -45,26 +45,14 @@ describe('Parser.getUnicodeBlockFeature', () => { }); describe('Parser.getFeature', () => { - const feature = Parser.getFeature( - 'a', - 'b', - 'c', - 'd', - 'e', - 'f', - 'x', - 'y', - 'z' - ); + const feature = Parser.getFeature('a', 'b', 'c', 'd', 'e', 'f'); it('should include certain features.', () => { expect(feature).toContain('UW1:a'); expect(feature).toContain('UB1:001'); - expect(feature).toContain('UP1:x'); expect(feature).toContain('BW1:bc'); expect(feature).toContain('BB1:001001'); - expect(feature).toContain('BP1:xy'); expect(feature).toContain('TW1:abc'); expect(feature).toContain('TB1:001001001'); @@ -72,17 +60,7 @@ describe('Parser.getFeature', () => { }); describe('Parser.getFeature with invalid inputs.', () => { - const feature = Parser.getFeature( - 'a', - 'a', - INVALID, - 'a', - 'a', - 'a', - 'a', - 'a', - 'a' - ); + const feature = Parser.getFeature('a', 'a', INVALID, 'a', 'a', 'a'); const findByPrefix = (prefix: string, feature: string[]) => { for (const item of feature) { if (item.startsWith(prefix)) return true; From 37891ac85853dd724c6b277af26390255e3ef385 Mon Sep 17 00:00:00 2001 From: Shuhei Iitsuka Date: Mon, 24 Oct 2022 15:39:24 +0900 Subject: [PATCH 2/2] Fix comment --- javascript/src/parser.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/javascript/src/parser.ts b/javascript/src/parser.ts index 5413122f..0b2f2358 100644 --- a/javascript/src/parser.ts +++ b/javascript/src/parser.ts @@ -50,8 +50,7 @@ export class Parser { } /** - * Generates a feature from characters around (w1-w6) and past - * results (p1-p3). + * Generates a feature from characters around (w1-w6). * * @param w1 The character 3 characters before the break point. * @param w2 The character 2 characters before the break point.