From 7e5a2118561db8b258505aac9a40a4a36d1ee06a Mon Sep 17 00:00:00 2001 From: Koji Ishii Date: Tue, 19 Jul 2022 06:08:26 +0900 Subject: [PATCH] Fix when the `display` property is empty (#76) This patch supports when the `display` property is empty. This occurs when the element is not [connected]. In that case, `HTMLProcessor` uses its built-in rules to determine whether the element is inline or block. [connected]: https://dom.spec.whatwg.org/#connected --- javascript/src/html_processor.ts | 71 +++++++++++++++++++++++-- javascript/tests/test_html_processor.ts | 19 ++++++- 2 files changed, 83 insertions(+), 7 deletions(-) diff --git a/javascript/src/html_processor.ts b/javascript/src/html_processor.ts index 8215aaef..04d0e7ea 100644 --- a/javascript/src/html_processor.ts +++ b/javascript/src/html_processor.ts @@ -90,16 +90,72 @@ const domActions: {[name: string]: DomAction} = { VAR: DomAction.Skip, }; +const defaultBlockElements = new Set([ + // 15.3.2 The page + 'HTML', + 'BODY', + // 15.3.3 Flow content + 'ADDRESS', + 'BLOCKQUOTE', + 'CENTER', + 'DIALOG', + 'DIV', + 'FIGURE', + 'FIGCAPTION', + 'FOOTER', + 'FORM', + 'HEADER', + 'LEGEND', + 'LISTING', + 'MAIN', + 'P', + // 15.3.6 Sections and headings + 'ARTICLE', + 'ASIDE', + 'H1', + 'H2', + 'H3', + 'H4', + 'H5', + 'H6', + 'HGROUP', + 'NAV', + 'SECTION', + // 15.3.7 Lists + 'DIR', + 'DD', + 'DL', + 'DT', + 'MENU', + 'OL', + 'UL', + 'LI', + // 15.3.8 Tables + 'TABLE', + 'CAPTION', + 'COL', + 'TR', + 'TD', + 'TH', + // 15.3.12 The fieldset and legend elements + 'FIELDSET', + // 15.5.4 The details and summary elements + 'DETAILS', + 'SUMMARY', + // 15.5.12 The marquee element + 'MARQUEE', +]); + /** * Determine the action for an element. * @param element An element to determine the action for. * @returns The {@link domActions} for the element. */ function actionForElement(element: Element): DomAction { - const action = domActions[element.nodeName]; + const nodeName = element.nodeName; + const action = domActions[nodeName]; if (action !== undefined) return action; - // jsdom does not have `getComputedStyle`. if (typeof getComputedStyle === 'function') { const style = getComputedStyle(element); switch (style.whiteSpace) { @@ -109,10 +165,15 @@ function actionForElement(element: Element): DomAction { } const display = style.display; - assert(display); - if (display !== 'inline') return DomAction.Block; + if (display) + return display === 'inline' ? DomAction.Inline : DomAction.Block; + // `display` is an empty string if the element is not connected. } - return DomAction.Inline; + // Use the built-in rules if the `display` property is empty, or if + // `getComputedStyle` is missing (e.g., jsdom.) + return defaultBlockElements.has(nodeName) + ? DomAction.Block + : DomAction.Inline; } /** diff --git a/javascript/tests/test_html_processor.ts b/javascript/tests/test_html_processor.ts index 5a78544f..90ccdf7e 100644 --- a/javascript/tests/test_html_processor.ts +++ b/javascript/tests/test_html_processor.ts @@ -19,14 +19,18 @@ import {JSDOM} from 'jsdom'; import {loadDefaultJapaneseParser} from '../src/parser'; import {HTMLProcessor, HTMLProcessorOptions} from '../src/html_processor'; +let emulateNotConnected = false; + // Browser compatibilities. console.assert(!('getComputedStyle' in global)); global.getComputedStyle = (element: Element) => { const window = element.ownerDocument.defaultView; console.assert(window); const style = window!.getComputedStyle(element); - // jsdom does not compute unspecified properties. - if (!style.display) { + if (emulateNotConnected) { + style.display = ''; + } else if (!style.display) { + // jsdom does not compute unspecified properties. const blockify = style.float || style.position; style.display = blockify ? 'block' : 'inline'; } @@ -172,6 +176,17 @@ describe('HTMLProcessor.getBlocks', () => { getBlocks('beforeb1(r1b2r2after') ).toEqual(['beforeb1b2after']); }); + + it('should use the built-in rules if the `display` property is empty', () => { + emulateNotConnected = true; + expect(getBlocks('
123456
')).toEqual(['123456']); + expect(getBlocks('
123
456
')).toEqual(['456', '123']); + expect(getBlocks('

123

  • 456
  • ')).toEqual([ + '123', + '456', + ]); + emulateNotConnected = false; + }); }); describe('HTMLProcessor.splitTextNodes', () => {