Skip to content

Commit

Permalink
Add WordCounter package (#10)
Browse files Browse the repository at this point in the history
* Adding WordCounter package

* Simplifying test syntax

* export the count as default

* Creating a simpler API

* Refactor into a single function export

*  Modification based on review notes

* Complete refactor based on suggestions from @omarreiss

* Remove conditional check

* Moving each function into a new file. Exporting an object so we can use a single settings property and make the calls a little more sane

* Only importing the lodash method we need

* Using flow to chain the matchWords/matchCharacters inner function calls

* Adding readme

* Addressing some feedback on the PR

* Move to a simplier API

* Updates the README to match API changes

* Updates per review by @youknowriad

* Adds missing dockblock param

* Spacing issues

* Adds correct docblocks and fixes some whitespace issues

* Adding some whitespace as per review comment
  • Loading branch information
ryanwelcher authored and Adam Silverstein committed Apr 24, 2018
1 parent 21f3cea commit aac5139
Show file tree
Hide file tree
Showing 16 changed files with 448 additions and 2 deletions.
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
"devDependencies": {
"babel-core": "^6.26.0",
"benchmark": "^2.1.4",
"chalk": "^2.0.1",
"chalk": "^2.3.2",
"check-node-version": "^3.1.1",
"codecov": "^2.2.0",
"codecov": "^2.3.1",
"glob": "^7.1.2",
"lerna": "^2.9.0",
"mkdirp": "^0.5.1",
Expand Down
1 change: 1 addition & 0 deletions packages/wordcount/.npmrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
package-lock=false
26 changes: 26 additions & 0 deletions packages/wordcount/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# @wordpress/wordcount

A utility to count words

## Installation

Install the module

```bash
npm install @wordpress/wordcount --save
```


## Accepted Paramaters
```JS
count( text, type, userSettings )
````
count accepts three parameters:
1. text: A string containing the words/characters to be counted.
2. type: A string that represents the type of count. The current implementation accepts the strings 'words', 'characters_excluding_spaces', or 'characters_including_spaces'.
3. userSettings: An object that contains the list of regular expressions that will be used to count. See defaultSettings.js for the defaults.

## Usage
```JS
import { count } from '@wordpress/wordcount';
const numberOfWords = count( 'Words to count', 'words', {} )
16 changes: 16 additions & 0 deletions packages/wordcount/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"name": "@wordpress/wordcount",
"version": "0.0.1",
"repository": {
"type": "git",
"url": "https://github.com/WordPress/packages.git"
},
"description": "WordPress Word Count Utility",
"main": "build/index.js",
"module": "build-module/index.js",
"author": "WordPress",
"license": "GPL-2.0+",
"dependencies": {
"lodash": "^4.17.4"
}
}
74 changes: 74 additions & 0 deletions packages/wordcount/src/defaultSettings.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
export const defaultSettings = {
HTMLRegExp: /<\/?[a-z][^>]*?>/gi,
HTMLcommentRegExp: /<!--[\s\S]*?-->/g,
spaceRegExp: /&nbsp;|&#160;/gi,
HTMLEntityRegExp: /&\S+?;/g,

// \u2014 = em-dash
connectorRegExp: /--|\u2014/g,

// Characters to be removed from input text.
removeRegExp: new RegExp([
'[',

// Basic Latin (extract)
'\u0021-\u0040\u005B-\u0060\u007B-\u007E',

// Latin-1 Supplement (extract)
'\u0080-\u00BF\u00D7\u00F7',

/*
* The following range consists of:
* General Punctuation
* Superscripts and Subscripts
* Currency Symbols
* Combining Diacritical Marks for Symbols
* Letterlike Symbols
* Number Forms
* Arrows
* Mathematical Operators
* Miscellaneous Technical
* Control Pictures
* Optical Character Recognition
* Enclosed Alphanumerics
* Box Drawing
* Block Elements
* Geometric Shapes
* Miscellaneous Symbols
* Dingbats
* Miscellaneous Mathematical Symbols-A
* Supplemental Arrows-A
* Braille Patterns
* Supplemental Arrows-B
* Miscellaneous Mathematical Symbols-B
* Supplemental Mathematical Operators
* Miscellaneous Symbols and Arrows
*/
'\u2000-\u2BFF',

// Supplemental Punctuation
'\u2E00-\u2E7F',
']'
].join(''), 'g'),

// Remove UTF-16 surrogate points, see https://en.wikipedia.org/wiki/UTF-16#U.2BD800_to_U.2BDFFF
astralRegExp: /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
wordsRegExp: /\S\s+/g,
characters_excluding_spacesRegExp: /\S/g,

/*
* Match anything that is not a formatting character, excluding:
* \f = form feed
* \n = new line
* \r = carriage return
* \t = tab
* \v = vertical tab
* \u00AD = soft hyphen
* \u2028 = line separator
* \u2029 = paragraph separator
*/
characters_including_spacesRegExp: /[^\f\n\r\t\v\u00AD\u2028\u2029]/g,
l10n: {
type: 'words'
}
};
104 changes: 104 additions & 0 deletions packages/wordcount/src/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import { extend, flow } from 'lodash';
import { defaultSettings } from './defaultSettings'
import stripTags from './stripTags';
import transposeAstralsToCountableChar from './transposeAstralsToCountableChar';
import stripHTMLEntities from './stripHTMLEntities';
import stripConnectors from './stripConnectors';
import stripRemovables from './stripRemovables';
import stripHTMLComments from './stripHTMLComments';
import stripShortcodes from './stripShortcodes';
import stripSpaces from './stripSpaces';
import transposeHTMLEntitiesToCountableChars from './transposeHTMLEntitiesToCountableChars';

/**
* Private function to manage the settings.
*
* @param {string} type The type of count to be done.
* @param {Object} userSettings Custom settings for the count.
*
* @return {void|Object|*} The combined settings object to be used.
*/
function loadSettings( type, userSettings ) {
const settings = extend( defaultSettings, userSettings );

settings.shortcodes = settings.l10n.shortcodes || {};

if ( settings.shortcodes && settings.shortcodes.length ) {
settings.shortcodesRegExp = new RegExp( '\\[\\/?(?:' + settings.shortcodes.join( '|' ) + ')[^\\]]*?\\]', 'g' );
}

settings.type = type || settings.l10n.type;

if ( settings.type !== 'characters_excluding_spaces' && settings.type !== 'characters_including_spaces' ) {
settings.type = 'words';
}

return settings;
}

/**
* Match the regex for the type 'words'
*
* @param {string} text The text being processed
* @param {string} regex The regular expression pattern being matched
* @param {object} settings Settings object containing regular expressions for each strip function
*
* @return {Array|{index: number, input: string}} The matched string.
*/
function matchWords( text, regex, settings ) {
text = flow(
stripTags.bind( this, settings ),
stripHTMLComments.bind( this, settings ),
stripShortcodes.bind( this, settings ),
stripSpaces.bind( this, settings ),
stripHTMLEntities.bind( this, settings ),
stripConnectors.bind( this, settings ),
stripRemovables.bind( this, settings ),
)( text );
text = text + '\n';
return text.match( regex );
}

/**
* Match the regex for either 'characters_excluding_spaces' or 'characters_including_spaces'
*
* @param {string} text The text being processed
* @param {string} regex The regular expression pattern being matched
* @param {object} settings Settings object containing regular expressions for each strip function
*
* @return {Array|{index: number, input: string}} The matched string.
*/
function matchCharacters( text, regex, settings ) {
text = flow(
stripTags.bind( this, settings ),
stripHTMLComments.bind( this, settings ),
stripShortcodes.bind( this, settings ),
stripSpaces.bind( this, settings ),
transposeAstralsToCountableChar.bind( this, settings ),
transposeHTMLEntitiesToCountableChars.bind( this, settings ),
)( text );
text = text + '\n';
return text.match( regex );
}

/**
* Count some words.
*
* @param {String} text The text being processed
* @param {String} type The type of count. Accepts ;words', 'characters_excluding_spaces', or 'characters_including_spaces'.
* @param {Object} userSettings Custom settings object.
*
* @return {Number} The word or character count.
*/

export function count( text, type, userSettings ) {
const settings = loadSettings( type, userSettings );
if ( text ) {
let matchRegExp = settings[ type + 'RegExp' ];
if ( 'words' === settings.type ) {
return matchWords( text, matchRegExp, settings ).length;
} else {
return matchCharacters( text, matchRegExp, settings ).length;
}
}
}
14 changes: 14 additions & 0 deletions packages/wordcount/src/stripConnectors.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/**
* Replaces items matched in the regex with spaces.
*
* @param {Object} settings The main settings object containing regular expressions
* @param {String} text The string being counted.
*
* @return {string} The manipulated text.
*/
export default function ( settings, text ) {
if ( settings.connectorRegExp ) {
return text.replace( settings.connectorRegExp, ' ' );
}
return text;
}
14 changes: 14 additions & 0 deletions packages/wordcount/src/stripHTMLComments.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/**
* Removes items matched in the regex.
*
* @param {Object} settings The main settings object containing regular expressions
* @param {String} text The string being counted.
*
* @return {string} The manipulated text.
*/
export default function ( settings, text ) {
if ( settings.HTMLcommentRegExp ) {
return text.replace( settings.HTMLcommentRegExp , '' );
}
return text;
}
14 changes: 14 additions & 0 deletions packages/wordcount/src/stripHTMLEntities.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/**
* Removes items matched in the regex.
*
* @param {Object} settings The main settings object containing regular expressions
* @param {String} text The string being counted.
*
* @return {string} The manipulated text.
*/
export default function ( settings, text ) {
if ( settings.HTMLEntityRegExp ) {
return text.replace( settings.HTMLEntityRegExp, '' );
}
return text;
}
14 changes: 14 additions & 0 deletions packages/wordcount/src/stripRemovables.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/**
* Removes items matched in the regex.
*
* @param {Object} settings The main settings object containing regular expressions
* @param {String} text The string being counted.
*
* @return {string} The manipulated text.
*/
export default function ( settings, text ) {
if ( settings.removeRegExp ) {
return text.replace( settings.removeRegExp, '' );
}
return text;
}
14 changes: 14 additions & 0 deletions packages/wordcount/src/stripShortcodes.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/**
* Replaces items matched in the regex with a new line.
*
* @param {Object} settings The main settings object containing regular expressions
* @param {String} text The string being counted.
*
* @return {string} The manipulated text.
*/
export default function( settings, text ) {
if ( settings.shortcodesRegExp ) {
return text.replace( settings.shortcodesRegExp, '\n' );
}
return text;
}
13 changes: 13 additions & 0 deletions packages/wordcount/src/stripSpaces.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/**
* Replaces items matched in the regex with spaces.
*
* @param {Object} settings The main settings object containing regular expressions
* @param {String} text The string being counted.
*
* @return {string} The manipulated text.
*/
export default function ( settings, text ) {
if ( settings.spaceRegExp ) {
return text.replace( settings.spaceRegExp, ' ' );
}
}
13 changes: 13 additions & 0 deletions packages/wordcount/src/stripTags.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/**
* Replaces items matched in the regex with new line
*
* @param {Object} settings The main settings object containing regular expressions
* @param {String} text The string being counted.
*
* @return {string} The manipulated text.
*/
export default function( settings, text ) {
if ( settings.HTMLRegExp ) {
return text.replace( settings.HTMLRegExp, '\n' );
}
}
Loading

0 comments on commit aac5139

Please sign in to comment.