From 468f79a1930e0e139a4d5a4184045cd320462cc6 Mon Sep 17 00:00:00 2001 From: madman-bob Date: Fri, 1 Jun 2018 11:10:32 +0100 Subject: [PATCH 1/8] Clarify content_unformatted tests By comparison with textarea, note that is not balanced, as there is actually only one opening tag. The second is raw text inside the text area. --- js/test/generated/beautify-html-tests.js | 12 ++++++------ test/data/html/tests.js | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/js/test/generated/beautify-html-tests.js b/js/test/generated/beautify-html-tests.js index ef9e4ec65..e44e1a8cb 100644 --- a/js/test/generated/beautify-html-tests.js +++ b/js/test/generated/beautify-html-tests.js @@ -2878,25 +2878,25 @@ function run_html_tests(test_obj, Urlencoded, js_beautify, html_beautify, css_be '\n' + ''); test_fragment( - '

Beautify me

But not me

', + '

Beautify me

But not me

', // -- output -- '
\n' + '

Beautify me

\n' + '
\n' + - '

But not me

'); + '

But not me

'); test_fragment( '
Beautify me

Beautify me

But not me

', + '>But not me

', // -- output -- '
\n' + '

Beautify me

\n' + '
\n' + - '

But not me

'); + '>But not me

'); test_fragment('
blabla
something here
'); test_fragment('

'); test_fragment( diff --git a/test/data/html/tests.js b/test/data/html/tests.js index 9e3908a50..622ab422a 100644 --- a/test/data/html/tests.js +++ b/test/data/html/tests.js @@ -1010,23 +1010,23 @@ exports.test_data = { ] }, { fragment: true, - input: '

Beautify me

But not me

', + input: '

Beautify me

But not me

', output: [ '
', '

Beautify me

', '
', - '

But not me

' + '

But not me

' ] }, { fragment: true, - input: '
Beautify me

But not me

', + input: '
Beautify me

But not me

', output: [ '
', '

Beautify me

', '
', - '

But not me

' + '>But not me

' ] }, { fragment: true, From 0e9ad61dba19722f0c4f2b5aabe41b8f10bc884e Mon Sep 17 00:00:00 2001 From: madman-bob Date: Fri, 1 Jun 2018 11:36:25 +0100 Subject: [PATCH 2/8] Add concept of `inline` HTML tags --- README.md | 1 + js/lib/beautify-html.js | 69 +++++++++++++++++++----- js/lib/cli.js | 2 + js/src/html/beautifier.js | 66 ++++++++++++++++++----- js/test/generated/beautify-html-tests.js | 12 ++--- test/data/html/node.mustache | 12 ++--- tools/template/beautify-html.begin.js | 3 +- 7 files changed, 125 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index a86e2dfbb..22ff459ef 100644 --- a/README.md +++ b/README.md @@ -291,6 +291,7 @@ HTML Beautifier Options: -w, --wrap-line-length Maximum characters per line (0 disables) [250] -A, --wrap-attributes Wrap attributes to new lines [auto|force|force-aligned|force-expand-multiline] ["auto"] -i, --wrap-attributes-indent-size Indent wrapped attributes to after N characters [indent-size] (ignored if wrap-attributes is "force-aligned") + -d, --inline List of tags to be considered inline tags -U, --unformatted List of tags (defaults to inline) that should not be reformatted -T, --content_unformatted List of tags (defaults to pre) whose content should not be reformatted -E, --extra_liners List of tags (defaults to [head,body,/html] that should have an extra newline before them. diff --git a/js/lib/beautify-html.js b/js/lib/beautify-html.js index ff66cf7df..eb0dcc1ef 100644 --- a/js/lib/beautify-html.js +++ b/js/lib/beautify-html.js @@ -47,8 +47,9 @@ wrap_line_length (default 250) - maximum amount of characters per line (0 = disable) brace_style (default "collapse") - "collapse" | "expand" | "end-expand" | "none" put braces on the same line as control statements (default), or put braces on own line (Allman / ANSI style), or just put end braces on own line, or attempt to keep them where they are. + inline (defaults to inline tags) - list of tags to be considered inline tags unformatted (defaults to inline tags) - list of tags, that shouldn't be reformatted - content_unformatted (defaults to pre tag) - list of tags, whose content shouldn't be reformatted + content_unformatted (defaults to ["pre", "textarea"] tags) - list of tags, whose content shouldn't be reformatted indent_scripts (default normal) - "keep"|"separate"|"normal" preserve_newlines (default true) - whether existing line breaks before elements should be preserved Only works before elements, not inside tags or for text. @@ -206,6 +207,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { indent_character, wrap_line_length, brace_style, + inline_tags, unformatted, content_unformatted, preserve_newlines, @@ -237,7 +239,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { indent_character = (options.indent_char === undefined) ? ' ' : options.indent_char; brace_style = (options.brace_style === undefined) ? 'collapse' : options.brace_style; wrap_line_length = parseInt(options.wrap_line_length, 10) === 0 ? 32786 : parseInt(options.wrap_line_length || 250, 10); - unformatted = options.unformatted || [ + inline_tags = options.inline || [ // https://www.w3.org/TR/html5/dom.html#phrasing-content 'a', 'abbr', 'area', 'audio', 'b', 'bdi', 'bdo', 'br', 'button', 'canvas', 'cite', 'code', 'data', 'datalist', 'del', 'dfn', 'em', 'embed', 'i', 'iframe', 'img', @@ -248,8 +250,9 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { // prexisting - not sure of full effect of removing, leaving in 'acronym', 'address', 'big', 'dt', 'ins', 'strike', 'tt', ]; + unformatted = options.unformatted || []; content_unformatted = options.content_unformatted || [ - 'pre', + 'pre', 'textarea' ]; preserve_newlines = (options.preserve_newlines === undefined) ? true : options.preserve_newlines; max_preserve_newlines = preserve_newlines ? @@ -330,6 +333,14 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { } } return false; + }, + get_tag_name: function(full_tag) { + var tag_match = (full_tag || "").match(/^\s*(<\/?|\{\{[#\/])([^\s>\}]+)/); + var is_closing_tag = !!((full_tag || "").match(/^\s*(<\/|\{\{\/)/)); + return { + tag_name: tag_match && tag_match[2], + is_closing_tag: is_closing_tag + }; } }; @@ -377,6 +388,17 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { } }; + this.get_last_tag = function() { + var last_tag; + for (var i = this.output.length - 1; i >= 0; i --) { + last_tag = this.Utils.get_tag_name(multi_parser.output[i]); + if (last_tag.tag_name) { + break; + } + } + return last_tag; + }; + this.get_content = function() { //function to capture regular content between tags var input_char = '', content = [], @@ -659,6 +681,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { tag_offset = tag_complete.charAt(2) === '#' ? 3 : 2; } var tag_check = tag_complete.substring(tag_offset, tag_index).toLowerCase(); + this.is_inline_tag = this.Utils.in_array(tag_check, inline_tags); if (tag_complete.charAt(tag_complete.length - 2) === '/' || this.Utils.in_array(tag_check, this.Utils.single_token)) { //if this tag name is a single tag type (either in the list or has a closing /) if (!peek) { @@ -674,6 +697,9 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { } else if (this.is_unformatted(tag_check, unformatted) || this.is_unformatted(tag_check, content_unformatted)) { // do not reformat the "unformatted" or "content_unformatted" tags + if (this.is_unformatted(tag_check, unformatted)) { + content = [this.input.slice(tag_start, this.pos)]; + } comment = this.get_unformatted('', tag_complete); //...delegate to get_unformatted function content.push(comment); tag_end = this.pos - 1; @@ -710,6 +736,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { } this.tag_type = 'START'; } + this.is_inline_tag = this.Utils.in_array(tag_check.charAt(0) === '/' ? tag_check.substr(1) : tag_check, inline_tags); // Allow preserving of newlines after a start or end tag if (this.traverse_whitespace()) { @@ -860,6 +887,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { this.get_token = function() { //initial handler for token-retrieval var token; + this.is_inline_tag = true; if (this.last_token === 'TK_TAG_SCRIPT' || this.last_token === 'TK_TAG_STYLE') { //check if we need to format javascript var type = this.last_token.substr(7); @@ -1036,7 +1064,9 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { switch (multi_parser.token_type) { case 'TK_TAG_START': - multi_parser.print_newline(false, multi_parser.output); + if (!multi_parser.is_last_tag_inline && !multi_parser.is_inline_tag) { + multi_parser.print_newline(false, multi_parser.output); + } multi_parser.print_token(multi_parser.token_text); if (multi_parser.indent_content) { if ((multi_parser.indent_body_inner_html || !multi_parser.token_text.match(//)) && @@ -1056,15 +1086,18 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { multi_parser.current_mode = 'CONTENT'; break; case 'TK_TAG_END': - //Print new line only if the tag has no content and has child - if (multi_parser.last_token === 'TK_CONTENT' && multi_parser.last_text === '') { - var tag_name = (multi_parser.token_text.match(/\w+/) || [])[0]; - var tag_extracted_from_last_output = null; - if (multi_parser.output.length) { - tag_extracted_from_last_output = multi_parser.output[multi_parser.output.length - 1].match(/(?:<|{{#)\s*(\w+)/); - } - if (tag_extracted_from_last_output === null || - (tag_extracted_from_last_output[1] !== tag_name && !multi_parser.Utils.in_array(tag_extracted_from_last_output[1], unformatted))) { + if (!multi_parser.is_inline_tag) { + //Print new line only if the tag has no content and has child + var tag_name = multi_parser.Utils.get_tag_name(multi_parser.token_text).tag_name; + var last_tag = multi_parser.get_last_tag(); + if ( + !( + !last_tag.is_closing_tag && + tag_name === last_tag.tag_name && + !multi_parser.Utils.in_array(tag_name, content_unformatted) + ) && + !multi_parser.is_last_tag_inline + ) { multi_parser.print_newline(false, multi_parser.output); } } @@ -1074,7 +1107,11 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { case 'TK_TAG_SINGLE': // Don't add a newline before elements that should remain unformatted. var tag_check = multi_parser.token_text.match(/^\s*<([a-z-]+)/i); - if (!tag_check || !multi_parser.Utils.in_array(tag_check[1], unformatted)) { + if ( + !tag_check || + !multi_parser.Utils.in_array(tag_check[1], inline_tags) && + !multi_parser.Utils.in_array(tag_check[1], unformatted) + ) { multi_parser.print_newline(false, multi_parser.output); } multi_parser.print_token(multi_parser.token_text); @@ -1110,6 +1147,9 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { case 'TK_CONTENT': multi_parser.print_token(multi_parser.token_text); multi_parser.current_mode = 'TAG'; + if (!multi_parser.token_text) { + continue; + } break; case 'TK_STYLE': case 'TK_SCRIPT': @@ -1166,6 +1206,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { } multi_parser.last_token = multi_parser.token_type; multi_parser.last_text = multi_parser.token_text; + multi_parser.is_last_tag_inline = multi_parser.is_inline_tag; } var sweet_code = multi_parser.output.join('').replace(/[\r\n\t ]+$/, ''); diff --git a/js/lib/cli.js b/js/lib/cli.js index 99b243602..977209605 100755 --- a/js/lib/cli.js +++ b/js/lib/cli.js @@ -95,6 +95,7 @@ var path = require('path'), "space_around_selector_separator": Boolean, // HTML-only "max_char": Number, // obsolete since 1.3.5 + "inline": [String, Array], "unformatted": [String, Array], "content_unformatted": [String, Array], "indent_inner_html": [Boolean], @@ -144,6 +145,7 @@ var path = require('path'), "A": ["--wrap_attributes"], "i": ["--wrap_attributes_indent_size"], "W": ["--max_char"], // obsolete since 1.3.5 + "d": ["--inline"], "U": ["--unformatted"], "T": ["--content_unformatted"], "I": ["--indent_inner_html"], diff --git a/js/src/html/beautifier.js b/js/src/html/beautifier.js index c8732bd53..50e819b44 100644 --- a/js/src/html/beautifier.js +++ b/js/src/html/beautifier.js @@ -58,6 +58,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { indent_character, wrap_line_length, brace_style, + inline_tags, unformatted, content_unformatted, preserve_newlines, @@ -89,7 +90,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { indent_character = (options.indent_char === undefined) ? ' ' : options.indent_char; brace_style = (options.brace_style === undefined) ? 'collapse' : options.brace_style; wrap_line_length = parseInt(options.wrap_line_length, 10) === 0 ? 32786 : parseInt(options.wrap_line_length || 250, 10); - unformatted = options.unformatted || [ + inline_tags = options.inline || [ // https://www.w3.org/TR/html5/dom.html#phrasing-content 'a', 'abbr', 'area', 'audio', 'b', 'bdi', 'bdo', 'br', 'button', 'canvas', 'cite', 'code', 'data', 'datalist', 'del', 'dfn', 'em', 'embed', 'i', 'iframe', 'img', @@ -100,8 +101,9 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { // prexisting - not sure of full effect of removing, leaving in 'acronym', 'address', 'big', 'dt', 'ins', 'strike', 'tt', ]; + unformatted = options.unformatted || []; content_unformatted = options.content_unformatted || [ - 'pre', + 'pre', 'textarea' ]; preserve_newlines = (options.preserve_newlines === undefined) ? true : options.preserve_newlines; max_preserve_newlines = preserve_newlines ? @@ -182,6 +184,14 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { } } return false; + }, + get_tag_name: function(full_tag) { + var tag_match = (full_tag || "").match(/^\s*(<\/?|\{\{[#\/])([^\s>\}]+)/); + var is_closing_tag = !!((full_tag || "").match(/^\s*(<\/|\{\{\/)/)); + return { + tag_name: tag_match && tag_match[2], + is_closing_tag: is_closing_tag + }; } }; @@ -229,6 +239,17 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { } }; + this.get_last_tag = function() { + var last_tag; + for (var i = this.output.length - 1; i >= 0; i --) { + last_tag = this.Utils.get_tag_name(multi_parser.output[i]); + if (last_tag.tag_name) { + break; + } + } + return last_tag; + }; + this.get_content = function() { //function to capture regular content between tags var input_char = '', content = [], @@ -511,6 +532,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { tag_offset = tag_complete.charAt(2) === '#' ? 3 : 2; } var tag_check = tag_complete.substring(tag_offset, tag_index).toLowerCase(); + this.is_inline_tag = this.Utils.in_array(tag_check, inline_tags); if (tag_complete.charAt(tag_complete.length - 2) === '/' || this.Utils.in_array(tag_check, this.Utils.single_token)) { //if this tag name is a single tag type (either in the list or has a closing /) if (!peek) { @@ -526,6 +548,9 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { } else if (this.is_unformatted(tag_check, unformatted) || this.is_unformatted(tag_check, content_unformatted)) { // do not reformat the "unformatted" or "content_unformatted" tags + if (this.is_unformatted(tag_check, unformatted)) { + content = [this.input.slice(tag_start, this.pos)]; + } comment = this.get_unformatted('', tag_complete); //...delegate to get_unformatted function content.push(comment); tag_end = this.pos - 1; @@ -562,6 +587,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { } this.tag_type = 'START'; } + this.is_inline_tag = this.Utils.in_array(tag_check.charAt(0) === '/' ? tag_check.substr(1) : tag_check, inline_tags); // Allow preserving of newlines after a start or end tag if (this.traverse_whitespace()) { @@ -712,6 +738,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { this.get_token = function() { //initial handler for token-retrieval var token; + this.is_inline_tag = true; if (this.last_token === 'TK_TAG_SCRIPT' || this.last_token === 'TK_TAG_STYLE') { //check if we need to format javascript var type = this.last_token.substr(7); @@ -888,7 +915,9 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { switch (multi_parser.token_type) { case 'TK_TAG_START': - multi_parser.print_newline(false, multi_parser.output); + if (!multi_parser.is_last_tag_inline && !multi_parser.is_inline_tag) { + multi_parser.print_newline(false, multi_parser.output); + } multi_parser.print_token(multi_parser.token_text); if (multi_parser.indent_content) { if ((multi_parser.indent_body_inner_html || !multi_parser.token_text.match(//)) && @@ -908,15 +937,18 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { multi_parser.current_mode = 'CONTENT'; break; case 'TK_TAG_END': - //Print new line only if the tag has no content and has child - if (multi_parser.last_token === 'TK_CONTENT' && multi_parser.last_text === '') { - var tag_name = (multi_parser.token_text.match(/\w+/) || [])[0]; - var tag_extracted_from_last_output = null; - if (multi_parser.output.length) { - tag_extracted_from_last_output = multi_parser.output[multi_parser.output.length - 1].match(/(?:<|{{#)\s*(\w+)/); - } - if (tag_extracted_from_last_output === null || - (tag_extracted_from_last_output[1] !== tag_name && !multi_parser.Utils.in_array(tag_extracted_from_last_output[1], unformatted))) { + if (!multi_parser.is_inline_tag) { + //Print new line only if the tag has no content and has child + var tag_name = multi_parser.Utils.get_tag_name(multi_parser.token_text).tag_name; + var last_tag = multi_parser.get_last_tag(); + if ( + !( + !last_tag.is_closing_tag && + tag_name === last_tag.tag_name && + !multi_parser.Utils.in_array(tag_name, content_unformatted) + ) && + !multi_parser.is_last_tag_inline + ) { multi_parser.print_newline(false, multi_parser.output); } } @@ -926,7 +958,11 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { case 'TK_TAG_SINGLE': // Don't add a newline before elements that should remain unformatted. var tag_check = multi_parser.token_text.match(/^\s*<([a-z-]+)/i); - if (!tag_check || !multi_parser.Utils.in_array(tag_check[1], unformatted)) { + if ( + !tag_check || + !multi_parser.Utils.in_array(tag_check[1], inline_tags) && + !multi_parser.Utils.in_array(tag_check[1], unformatted) + ) { multi_parser.print_newline(false, multi_parser.output); } multi_parser.print_token(multi_parser.token_text); @@ -962,6 +998,9 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { case 'TK_CONTENT': multi_parser.print_token(multi_parser.token_text); multi_parser.current_mode = 'TAG'; + if (!multi_parser.token_text) { + continue; + } break; case 'TK_STYLE': case 'TK_SCRIPT': @@ -1018,6 +1057,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { } multi_parser.last_token = multi_parser.token_type; multi_parser.last_text = multi_parser.token_text; + multi_parser.is_last_tag_inline = multi_parser.is_inline_tag; } var sweet_code = multi_parser.output.join('').replace(/[\r\n\t ]+$/, ''); diff --git a/js/test/generated/beautify-html-tests.js b/js/test/generated/beautify-html-tests.js index e44e1a8cb..e5cc681dd 100644 --- a/js/test/generated/beautify-html-tests.js +++ b/js/test/generated/beautify-html-tests.js @@ -3116,8 +3116,8 @@ function run_html_tests(test_obj, Urlencoded, js_beautify, html_beautify, css_be '
content content
'); bth('Text Link Text'); - var unformatted = opts.unformatted; - opts.unformatted = ['script', 'style']; + var content_unformatted = opts.content_unformatted; + opts.content_unformatted = ['script', 'style']; bth('