WordPress · getdave · Oct 7, 2021 · Oct 8, 2021 · Oct 8, 2021 · Oct 8, 2021
@@ -6,9 +6,21 @@
  * @return {string} The text content with any html removed.
  */
 export default function stripHTML( html ) {
+	// DOM Parser will ignore any space character coming after
+	// the DocType.
+	// see: https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-name-state
+	// As a result any leading space in the provided `html`
+	// argument string will be stripped out.
+	// Manually retrieve these prior to parsing for restoration post-parse.
+	// @ts-ignore
+	const [ spacesToRestore ] = html.match( /^\s*/ );
+
 	const document = new window.DOMParser().parseFromString(
 		html,
 		'text/html'
 	);
-	return document.body.textContent || '';
+
+	const content = document.body.textContent || '';
+
+	return spacesToRestore + content;
 }
@@ -0,0 +1,66 @@
+/**
+ * Internal dependencies
+ */
+import stripHTML from '../strip-html';
+
+describe( 'stripHTML', () => {
+	it( 'should strip valid HTML', () => {
+		const input =
+			'<strong>Here is some text</strong> that contains <em>HTML markup</em>.';
+		const output = 'Here is some text that contains HTML markup.';
+		expect( stripHTML( input ) ).toBe( output );
+	} );
+
+	it( 'should strip invalid HTML', () => {
+		const input =
+			'<strong>Here is some text</em> <p></div>that contains HTML markup</p>.';
+		const output = 'Here is some text that contains HTML markup.';
+		expect( stripHTML( input ) ).toBe( output );
+	} );
+
+	describe( 'whitespace preservation', () => {
+		it( 'should preserve leading spaces', () => {
+			const input =
+				'       <strong>Here is some text</strong> with <em>leading spaces</em>.';
+			const output = '       Here is some text with leading spaces.';
+			expect( stripHTML( input ) ).toBe( output );
+		} );
+
+		it( 'should preserve leading spaces with HTML', () => {
+			const input =
+				'<strong>      Here is some text</strong> with <em>leading spaces</em>.';
+			const output = '      Here is some text with leading spaces.';
+			expect( stripHTML( input ) ).toBe( output );
+		} );
+
+		it( 'should preserve trailing spaces with HTML', () => {
+			const input =
+				'<strong>Here is some text</strong> with <em>trailing spaces</em>.          ';
+			const output = 'Here is some text with trailing spaces.          ';
+			expect( stripHTML( input ) ).toBe( output );
+		} );
+
+		it( 'should preserve consequtive spaces within string', () => {
+			const input =
+				'<strong>Here is some          text</strong> with                  <em>a lot of spaces inside</em>.';
+			const output =
+				'Here is some          text with                  a lot of spaces inside.';
+			expect( stripHTML( input ) ).toBe( output );
+		} );
+
+		it( 'should preserve new lines in multi-line HTML string', () => {
+			const input = `<div>
+        Here is some
+        <em>text</em>
+        with new lines
+        </div>`;
+
+			const output = `
+        Here is some
+        text
+        with new lines
+        `;
+			expect( stripHTML( input ) ).toBe( output );
+		} );
+	} );
+} );