Skip to content

Commit

Permalink
Merge pull request #8779 from ckeditor/i/8078-apostrophes-in-words
Browse files Browse the repository at this point in the history
Fix (word-count): The word count feature should consider a string with a special character as a single word. Closes #8078.
  • Loading branch information
oleq authored Jan 12, 2021
2 parents 28f827d + 38300bd commit c218328
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 54 deletions.
3 changes: 2 additions & 1 deletion packages/ckeditor5-word-count/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
"@ckeditor/ckeditor5-list": "^24.0.0",
"@ckeditor/ckeditor5-paragraph": "^24.0.0",
"@ckeditor/ckeditor5-table": "^24.0.0",
"@ckeditor/ckeditor5-utils": "^24.0.0"
"@ckeditor/ckeditor5-utils": "^24.0.0",
"@ckeditor/ckeditor5-image": "^24.0.0"
},
"engines": {
"node": ">=12.0.0",
Expand Down
9 changes: 3 additions & 6 deletions packages/ckeditor5-word-count/src/wordcount.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ import env from '@ckeditor/ckeditor5-utils/src/env';
* // Words: 0, Characters: 5
*
* <paragraph>foo(bar)</paragraph>
* //Words: 2, Characters: 8
* //Words: 1, Characters: 8
*
* <paragraph>12345</paragraph>
* // Words: 1, Characters: 5
Expand Down Expand Up @@ -134,11 +134,8 @@ export default class WordCount extends Plugin {
// Groups:
// {L} - Any kind of letter from any language.
// {N} - Any kind of numeric character in any script.
// {M} - A character intended to be combined with another character (e.g. accents, umlauts, enclosing boxes, etc.).
// {Pd} - Any kind of hyphen or dash.
// {Pc} - A punctuation character such as an underscore that connects words.
new RegExp( '[\\p{L}\\p{N}\\p{M}\\p{Pd}\\p{Pc}]+', 'gu' ) :
/[_\-a-zA-Z0-9À-ž]+/gu;
new RegExp( '([\\p{L}\\p{N}]+\\S?)+', 'gu' ) :
/([a-zA-Z0-9À-ž]+\S?)+/gu;
}

/**
Expand Down
4 changes: 2 additions & 2 deletions packages/ckeditor5-word-count/tests/manual/wordcount.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
1. Try to type in the editor. The container below should be automatically updated with the current amount of words and characters.
2. Special characters are treated as separators for words. For example
2. Special characters are not treated as separators for words. For example
* `Hello world` - 2 words
* `Hello(World)` - 2 words
* `Hello(World)` - 1 word
* `Hello\nWorld` - 2 words
3. Numbers are treated as words.
4. There are logged values of `WordCount:event-update` in the console. Values should change in same way as container in html.
Expand Down
195 changes: 150 additions & 45 deletions packages/ckeditor5-word-count/tests/wordcount.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ import Position from '@ckeditor/ckeditor5-engine/src/model/position';
import ShiftEnter from '@ckeditor/ckeditor5-enter/src/shiftenter';
import TableEditing from '@ckeditor/ckeditor5-table/src/tableediting';
import env from '@ckeditor/ckeditor5-utils/src/env';
import ListEditing from '@ckeditor/ckeditor5-list/src/listediting';
import LinkEditing from '@ckeditor/ckeditor5-link/src/linkediting';
import ImageCaptionEditing from '@ckeditor/ckeditor5-image/src/imagecaption/imagecaptionediting';
import ImageEditing from '@ckeditor/ckeditor5-image/src/image/imageediting';

// Delay related to word-count throttling.
const DELAY = 255;
Expand All @@ -27,7 +31,7 @@ describe( 'WordCount', () => {

beforeEach( () => {
return VirtualTestEditor.create( {
plugins: [ WordCount, Paragraph, ShiftEnter, TableEditing ]
plugins: [ WordCount, Paragraph, ShiftEnter, TableEditing, ListEditing, LinkEditing, ImageEditing, ImageCaptionEditing ]
} )
.then( _editor => {
editor = _editor;
Expand Down Expand Up @@ -119,17 +123,152 @@ describe( 'WordCount', () => {
} );

describe( 'functionality', () => {
it( 'counts words', () => {
expect( wordCountPlugin.words ).to.equal( 0 );
describe( 'counting words', () => {
beforeEach( () => {
expect( wordCountPlugin.words ).to.equal( 0 );
} );

setModelData( model, '<paragraph>Foo(bar)baz</paragraph>' +
'<paragraph><$text foo="true">Hello</$text> world.</paragraph>' +
'<paragraph>1234</paragraph>' +
'<paragraph>(@#$%^*())</paragraph>' );
it( 'should count a number as a word', () => {
setModelData( model, '<paragraph>1 12 3,5 3/4 1.2 0</paragraph>' );
wordCountPlugin._refreshStats();
expect( wordCountPlugin.words ).to.equal( 6 );
} );

wordCountPlugin._refreshStats();
it( 'should count a single letter as a word', () => {
setModelData( model, '<paragraph>a</paragraph>' );
wordCountPlugin._refreshStats();
expect( wordCountPlugin.words ).to.equal( 1 );
} );

it( 'should count an e-mail as a single word', () => {
setModelData( model, '<paragraph>[email protected]</paragraph>' );
wordCountPlugin._refreshStats();
expect( wordCountPlugin.words ).to.equal( 1 );
} );

it( 'should ignore apostrophes in words', () => {
setModelData( model, '<paragraph>Foo\'bar</paragraph>' );
wordCountPlugin._refreshStats();
expect( wordCountPlugin.words ).to.equal( 1 );
} );

it( 'should ignore dots in words', () => {
setModelData( model, '<paragraph>Foo.bar</paragraph>' );
wordCountPlugin._refreshStats();
expect( wordCountPlugin.words ).to.equal( 1 );
} );

it( 'should count words in links', () => {
setModelData( model, '<paragraph><$text linkHref="http://www.cksource.com">CK Source</$text></paragraph>' );
wordCountPlugin._refreshStats();
expect( wordCountPlugin.words ).to.equal( 2 );
} );

it( 'should not count the string with no letters or numbers as a word', () => {
setModelData( model, '<paragraph>(@#$%^*()) . ??? @ --- ...</paragraph>' );
wordCountPlugin._refreshStats();
expect( wordCountPlugin.words ).to.equal( 0 );
} );

it( 'should not count the list item number/bullet as a word', () => {
setModelData( model, '<listItem listType="numbered" listIndent="0">Foo</listItem>' +
'<listItem listType="bulleted" listIndent="0">bar</listItem>' );

wordCountPlugin._refreshStats();
expect( wordCountPlugin.words ).to.equal( 2 );
} );

it( 'should count words in the image caption', () => {
setModelData( model,
'<image>' +
'<caption>Foo Bar</caption>' +
'</image>'
);

wordCountPlugin._refreshStats();
expect( wordCountPlugin.words ).to.equal( 2 );
} );

it( 'should count words in the table', () => {
setModelData( model,
'<table>' +
'<tableRow>' +
'<tableCell><paragraph>Foo</paragraph></tableCell>' +
'<tableCell><paragraph>Foo</paragraph></tableCell>' +
'<tableCell><paragraph>Foo</paragraph></tableCell>' +
'</tableRow>' +
'<tableRow>' +
'<tableCell><paragraph>Foo</paragraph></tableCell>' +
'<tableCell><paragraph>Foo</paragraph></tableCell>' +
'<tableCell><paragraph>Foo</paragraph></tableCell>' +
'</tableRow>' +
'</table>'
);

wordCountPlugin._refreshStats();
expect( wordCountPlugin.words ).to.equal( 6 );
} );

it( 'should separate words with the end of the paragraph', () => {
setModelData( model, '<paragraph>Foo</paragraph>' +
'<paragraph>Bar</paragraph>' );

wordCountPlugin._refreshStats();
expect( wordCountPlugin.words ).to.equal( 2 );
} );

it( 'should separate words with the new line character', () => {
setModelData( model, '<paragraph>Foo\nBar</paragraph>' );

expect( wordCountPlugin.words ).to.equal( 6 );
wordCountPlugin._refreshStats();
expect( wordCountPlugin.words ).to.equal( 2 );
} );

it( 'should separate words with the soft break', () => {
setModelData( model, '<paragraph>Foo<softBreak></softBreak>Bar</paragraph>' );

wordCountPlugin._refreshStats();
expect( wordCountPlugin.words ).to.equal( 2 );
} );

it( 'should not separate words with the special characters', () => {
setModelData( model, '<paragraph>F!o@o-B#a$r%F^o*B(a)r_F-o+o=B£a§r`F~o,B,a.F/o?o;B:a\'r"F\\o|oB{ar}</paragraph>' );

wordCountPlugin._refreshStats();
expect( wordCountPlugin.words ).to.equal( 1 );
} );

it( 'should count international words', function() {
if ( !env.features.isRegExpUnicodePropertySupported ) {
this.skip();
}

setModelData( model, '<paragraph>שמש 太陽 ดวงอาทิตย์ شمس ਸੂਰਜ słońce</paragraph>' );
wordCountPlugin._refreshStats();

expect( wordCountPlugin.words ).to.equal( 6 );
} );

describe( 'ES2018 RegExp Unicode property fallback', () => {
const originalPropertiesSupport = env.features.isRegExpUnicodePropertySupported;

before( () => {
env.features.isRegExpUnicodePropertySupported = false;
} );

after( () => {
env.features.isRegExpUnicodePropertySupported = originalPropertiesSupport;
} );

it( 'should use different regexp when unicode properties are not supported', () => {
expect( wordCountPlugin.words ).to.equal( 0 );

setModelData( model, '<paragraph>hello world.</paragraph>' );
wordCountPlugin._refreshStats();

expect( wordCountPlugin.words ).to.equal( 2 );
} );
} );
} );

it( 'counts characters', () => {
Expand Down Expand Up @@ -162,40 +301,6 @@ describe( 'WordCount', () => {
expect( wordCountPlugin.characters ).to.equal( 9 );
} );

it( 'should count international words', function() {
if ( !env.features.isRegExpUnicodePropertySupported ) {
this.skip();
}

expect( wordCountPlugin.words ).to.equal( 0 );

setModelData( model, '<paragraph>שמש 太陽 ดวงอาทิตย์ شمس ਸੂਰਜ słońce</paragraph>' );
wordCountPlugin._refreshStats();

expect( wordCountPlugin.words ).to.equal( 6 );
} );

describe( 'ES2018 RegExp Unicode property fallback', () => {
const originalPropertiesSupport = env.features.isRegExpUnicodePropertySupported;

before( () => {
env.features.isRegExpUnicodePropertySupported = false;
} );

after( () => {
env.features.isRegExpUnicodePropertySupported = originalPropertiesSupport;
} );

it( 'should use different regexp when unicode properties are not supported', () => {
expect( wordCountPlugin.words ).to.equal( 0 );

setModelData( model, '<paragraph>hello world.</paragraph>' );
wordCountPlugin._refreshStats();

expect( wordCountPlugin.words ).to.equal( 2 );
} );
} );

describe( '#update event', () => {
it( 'fires with the actual number of characters and words', () => {
const fake = sinon.fake();
Expand Down Expand Up @@ -256,12 +361,12 @@ describe( 'WordCount', () => {
it( 'updates container content', () => {
expect( container.innerText ).to.equal( 'Words: 0Characters: 0' );

setModelData( model, '<paragraph>Foo(bar)baz</paragraph>' +
setModelData( model, '<paragraph>Foo bar</paragraph>' +
'<paragraph><$text foo="true">Hello</$text> world.</paragraph>' );

wordCountPlugin._refreshStats();

expect( container.innerText ).to.equal( 'Words: 5Characters: 23' );
expect( container.innerText ).to.equal( 'Words: 4Characters: 19' );
} );

it( 'subsequent calls provides the same element', () => {
Expand Down

0 comments on commit c218328

Please sign in to comment.