-
Notifications
You must be signed in to change notification settings - Fork 4.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Adding WordCounter package * Simplifying test syntax * export the count as default * Creating a simpler API * Refactor into a single function export * Modification based on review notes * Complete refactor based on suggestions from @omarreiss * Remove conditional check * Moving each function into a new file. Exporting an object so we can use a single settings property and make the calls a little more sane * Only importing the lodash method we need * Using flow to chain the matchWords/matchCharacters inner function calls * Adding readme * Addressing some feedback on the PR * Move to a simplier API * Updates the README to match API changes * Updates per review by @youknowriad * Adds missing dockblock param * Spacing issues * Adds correct docblocks and fixes some whitespace issues * Adding some whitespace as per review comment
- Loading branch information
1 parent
21f3cea
commit aac5139
Showing
16 changed files
with
448 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
package-lock=false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# @wordpress/wordcount | ||
|
||
A utility to count words | ||
|
||
## Installation | ||
|
||
Install the module | ||
|
||
```bash | ||
npm install @wordpress/wordcount --save | ||
``` | ||
|
||
|
||
## Accepted Paramaters | ||
```JS | ||
count( text, type, userSettings ) | ||
```` | ||
count accepts three parameters: | ||
1. text: A string containing the words/characters to be counted. | ||
2. type: A string that represents the type of count. The current implementation accepts the strings 'words', 'characters_excluding_spaces', or 'characters_including_spaces'. | ||
3. userSettings: An object that contains the list of regular expressions that will be used to count. See defaultSettings.js for the defaults. | ||
|
||
## Usage | ||
```JS | ||
import { count } from '@wordpress/wordcount'; | ||
const numberOfWords = count( 'Words to count', 'words', {} ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
{ | ||
"name": "@wordpress/wordcount", | ||
"version": "0.0.1", | ||
"repository": { | ||
"type": "git", | ||
"url": "https://github.com/WordPress/packages.git" | ||
}, | ||
"description": "WordPress Word Count Utility", | ||
"main": "build/index.js", | ||
"module": "build-module/index.js", | ||
"author": "WordPress", | ||
"license": "GPL-2.0+", | ||
"dependencies": { | ||
"lodash": "^4.17.4" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
export const defaultSettings = { | ||
HTMLRegExp: /<\/?[a-z][^>]*?>/gi, | ||
HTMLcommentRegExp: /<!--[\s\S]*?-->/g, | ||
spaceRegExp: / | /gi, | ||
HTMLEntityRegExp: /&\S+?;/g, | ||
|
||
// \u2014 = em-dash | ||
connectorRegExp: /--|\u2014/g, | ||
|
||
// Characters to be removed from input text. | ||
removeRegExp: new RegExp([ | ||
'[', | ||
|
||
// Basic Latin (extract) | ||
'\u0021-\u0040\u005B-\u0060\u007B-\u007E', | ||
|
||
// Latin-1 Supplement (extract) | ||
'\u0080-\u00BF\u00D7\u00F7', | ||
|
||
/* | ||
* The following range consists of: | ||
* General Punctuation | ||
* Superscripts and Subscripts | ||
* Currency Symbols | ||
* Combining Diacritical Marks for Symbols | ||
* Letterlike Symbols | ||
* Number Forms | ||
* Arrows | ||
* Mathematical Operators | ||
* Miscellaneous Technical | ||
* Control Pictures | ||
* Optical Character Recognition | ||
* Enclosed Alphanumerics | ||
* Box Drawing | ||
* Block Elements | ||
* Geometric Shapes | ||
* Miscellaneous Symbols | ||
* Dingbats | ||
* Miscellaneous Mathematical Symbols-A | ||
* Supplemental Arrows-A | ||
* Braille Patterns | ||
* Supplemental Arrows-B | ||
* Miscellaneous Mathematical Symbols-B | ||
* Supplemental Mathematical Operators | ||
* Miscellaneous Symbols and Arrows | ||
*/ | ||
'\u2000-\u2BFF', | ||
|
||
// Supplemental Punctuation | ||
'\u2E00-\u2E7F', | ||
']' | ||
].join(''), 'g'), | ||
|
||
// Remove UTF-16 surrogate points, see https://en.wikipedia.org/wiki/UTF-16#U.2BD800_to_U.2BDFFF | ||
astralRegExp: /[\uD800-\uDBFF][\uDC00-\uDFFF]/g, | ||
wordsRegExp: /\S\s+/g, | ||
characters_excluding_spacesRegExp: /\S/g, | ||
|
||
/* | ||
* Match anything that is not a formatting character, excluding: | ||
* \f = form feed | ||
* \n = new line | ||
* \r = carriage return | ||
* \t = tab | ||
* \v = vertical tab | ||
* \u00AD = soft hyphen | ||
* \u2028 = line separator | ||
* \u2029 = paragraph separator | ||
*/ | ||
characters_including_spacesRegExp: /[^\f\n\r\t\v\u00AD\u2028\u2029]/g, | ||
l10n: { | ||
type: 'words' | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
import { extend, flow } from 'lodash'; | ||
import { defaultSettings } from './defaultSettings' | ||
import stripTags from './stripTags'; | ||
import transposeAstralsToCountableChar from './transposeAstralsToCountableChar'; | ||
import stripHTMLEntities from './stripHTMLEntities'; | ||
import stripConnectors from './stripConnectors'; | ||
import stripRemovables from './stripRemovables'; | ||
import stripHTMLComments from './stripHTMLComments'; | ||
import stripShortcodes from './stripShortcodes'; | ||
import stripSpaces from './stripSpaces'; | ||
import transposeHTMLEntitiesToCountableChars from './transposeHTMLEntitiesToCountableChars'; | ||
|
||
/** | ||
* Private function to manage the settings. | ||
* | ||
* @param {string} type The type of count to be done. | ||
* @param {Object} userSettings Custom settings for the count. | ||
* | ||
* @return {void|Object|*} The combined settings object to be used. | ||
*/ | ||
function loadSettings( type, userSettings ) { | ||
const settings = extend( defaultSettings, userSettings ); | ||
|
||
settings.shortcodes = settings.l10n.shortcodes || {}; | ||
|
||
if ( settings.shortcodes && settings.shortcodes.length ) { | ||
settings.shortcodesRegExp = new RegExp( '\\[\\/?(?:' + settings.shortcodes.join( '|' ) + ')[^\\]]*?\\]', 'g' ); | ||
} | ||
|
||
settings.type = type || settings.l10n.type; | ||
|
||
if ( settings.type !== 'characters_excluding_spaces' && settings.type !== 'characters_including_spaces' ) { | ||
settings.type = 'words'; | ||
} | ||
|
||
return settings; | ||
} | ||
|
||
/** | ||
* Match the regex for the type 'words' | ||
* | ||
* @param {string} text The text being processed | ||
* @param {string} regex The regular expression pattern being matched | ||
* @param {object} settings Settings object containing regular expressions for each strip function | ||
* | ||
* @return {Array|{index: number, input: string}} The matched string. | ||
*/ | ||
function matchWords( text, regex, settings ) { | ||
text = flow( | ||
stripTags.bind( this, settings ), | ||
stripHTMLComments.bind( this, settings ), | ||
stripShortcodes.bind( this, settings ), | ||
stripSpaces.bind( this, settings ), | ||
stripHTMLEntities.bind( this, settings ), | ||
stripConnectors.bind( this, settings ), | ||
stripRemovables.bind( this, settings ), | ||
)( text ); | ||
text = text + '\n'; | ||
return text.match( regex ); | ||
} | ||
|
||
/** | ||
* Match the regex for either 'characters_excluding_spaces' or 'characters_including_spaces' | ||
* | ||
* @param {string} text The text being processed | ||
* @param {string} regex The regular expression pattern being matched | ||
* @param {object} settings Settings object containing regular expressions for each strip function | ||
* | ||
* @return {Array|{index: number, input: string}} The matched string. | ||
*/ | ||
function matchCharacters( text, regex, settings ) { | ||
text = flow( | ||
stripTags.bind( this, settings ), | ||
stripHTMLComments.bind( this, settings ), | ||
stripShortcodes.bind( this, settings ), | ||
stripSpaces.bind( this, settings ), | ||
transposeAstralsToCountableChar.bind( this, settings ), | ||
transposeHTMLEntitiesToCountableChars.bind( this, settings ), | ||
)( text ); | ||
text = text + '\n'; | ||
return text.match( regex ); | ||
} | ||
|
||
/** | ||
* Count some words. | ||
* | ||
* @param {String} text The text being processed | ||
* @param {String} type The type of count. Accepts ;words', 'characters_excluding_spaces', or 'characters_including_spaces'. | ||
* @param {Object} userSettings Custom settings object. | ||
* | ||
* @return {Number} The word or character count. | ||
*/ | ||
|
||
export function count( text, type, userSettings ) { | ||
const settings = loadSettings( type, userSettings ); | ||
if ( text ) { | ||
let matchRegExp = settings[ type + 'RegExp' ]; | ||
if ( 'words' === settings.type ) { | ||
return matchWords( text, matchRegExp, settings ).length; | ||
} else { | ||
return matchCharacters( text, matchRegExp, settings ).length; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
/** | ||
* Replaces items matched in the regex with spaces. | ||
* | ||
* @param {Object} settings The main settings object containing regular expressions | ||
* @param {String} text The string being counted. | ||
* | ||
* @return {string} The manipulated text. | ||
*/ | ||
export default function ( settings, text ) { | ||
if ( settings.connectorRegExp ) { | ||
return text.replace( settings.connectorRegExp, ' ' ); | ||
} | ||
return text; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
/** | ||
* Removes items matched in the regex. | ||
* | ||
* @param {Object} settings The main settings object containing regular expressions | ||
* @param {String} text The string being counted. | ||
* | ||
* @return {string} The manipulated text. | ||
*/ | ||
export default function ( settings, text ) { | ||
if ( settings.HTMLcommentRegExp ) { | ||
return text.replace( settings.HTMLcommentRegExp , '' ); | ||
} | ||
return text; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
/** | ||
* Removes items matched in the regex. | ||
* | ||
* @param {Object} settings The main settings object containing regular expressions | ||
* @param {String} text The string being counted. | ||
* | ||
* @return {string} The manipulated text. | ||
*/ | ||
export default function ( settings, text ) { | ||
if ( settings.HTMLEntityRegExp ) { | ||
return text.replace( settings.HTMLEntityRegExp, '' ); | ||
} | ||
return text; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
/** | ||
* Removes items matched in the regex. | ||
* | ||
* @param {Object} settings The main settings object containing regular expressions | ||
* @param {String} text The string being counted. | ||
* | ||
* @return {string} The manipulated text. | ||
*/ | ||
export default function ( settings, text ) { | ||
if ( settings.removeRegExp ) { | ||
return text.replace( settings.removeRegExp, '' ); | ||
} | ||
return text; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
/** | ||
* Replaces items matched in the regex with a new line. | ||
* | ||
* @param {Object} settings The main settings object containing regular expressions | ||
* @param {String} text The string being counted. | ||
* | ||
* @return {string} The manipulated text. | ||
*/ | ||
export default function( settings, text ) { | ||
if ( settings.shortcodesRegExp ) { | ||
return text.replace( settings.shortcodesRegExp, '\n' ); | ||
} | ||
return text; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
/** | ||
* Replaces items matched in the regex with spaces. | ||
* | ||
* @param {Object} settings The main settings object containing regular expressions | ||
* @param {String} text The string being counted. | ||
* | ||
* @return {string} The manipulated text. | ||
*/ | ||
export default function ( settings, text ) { | ||
if ( settings.spaceRegExp ) { | ||
return text.replace( settings.spaceRegExp, ' ' ); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
/** | ||
* Replaces items matched in the regex with new line | ||
* | ||
* @param {Object} settings The main settings object containing regular expressions | ||
* @param {String} text The string being counted. | ||
* | ||
* @return {string} The manipulated text. | ||
*/ | ||
export default function( settings, text ) { | ||
if ( settings.HTMLRegExp ) { | ||
return text.replace( settings.HTMLRegExp, '\n' ); | ||
} | ||
} |
Oops, something went wrong.