Skip to content

Commit

Permalink
connect collapse whitespace func to tag type map
Browse files Browse the repository at this point in the history
  • Loading branch information
JohannesKaufmann committed Nov 4, 2024
1 parent f0108bb commit c37ad42
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 60 deletions.
87 changes: 37 additions & 50 deletions collapse/collapse.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,8 @@ import (
"golang.org/x/net/html"
)

// Note: Originally in the javascript version, this just checked for "pre".
// I changed it, to also return true for "code"
func isPreOrCode(node *html.Node) bool {
name := dom.NodeName(node)

return name == "pre" || name == "code"
}

func next(prev *html.Node, current *html.Node) *html.Node {
if (prev != nil && prev.Parent == current) || isPreOrCode(current) {
func nextNode(prev *html.Node, current *html.Node, domFuncs *DomFuncs) *html.Node {
if (prev != nil && prev.Parent == current) || domFuncs.IsPreformattedNode(current) {
if current.NextSibling != nil {
return current.NextSibling
}
Expand All @@ -77,37 +69,7 @@ func next(prev *html.Node, current *html.Node) *html.Node {
return current.Parent
}

var blockElements = []string{
"address", "article", "aside", "audio", "blockquote", "body", "canvas", "center", "dd", "dir", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "html", "isindex", "li", "main", "menu", "nav", "noframes", "noscript", "ol", "output", "p", "pre", "section", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "ul",
}

var voidElements = []string{
// Note: Compared to the javascript implementation, I removed "source"
"area", "base", "br", "col", "command", "embed", "hr", "img", "input", "keygen", "link", "meta", "param" /* "source, "*/, "track", "wbr",
}

func isBlock(node *html.Node) bool {
name := dom.NodeName(node)

for _, elem := range blockElements {
if elem == name {
return true
}
}
return false
}
func isVoid(node *html.Node) bool {
name := dom.NodeName(node)

for _, elem := range voidElements {
if elem == name {
return true
}
}
return false
}

func remove(node *html.Node) *html.Node {
func removeNode(node *html.Node) *html.Node {
next := node.NextSibling
if next == nil {
next = node.Parent
Expand All @@ -119,16 +81,41 @@ func remove(node *html.Node) *html.Node {

}

func Collapse(element *html.Node) {
if element.FirstChild == nil || isPreOrCode(element) {
type DomFuncs struct {
IsBlockNode func(node *html.Node) bool
IsVoidNode func(node *html.Node) bool
IsPreformattedNode func(node *html.Node) bool
}

func fillDefaultDomFuncs(domFuncs *DomFuncs) *DomFuncs {
if domFuncs == nil {
domFuncs = &DomFuncs{}
}
if domFuncs.IsBlockNode == nil {
domFuncs.IsBlockNode = defaultIsBlockNode
}
if domFuncs.IsVoidNode == nil {
domFuncs.IsVoidNode = defaultIsVoidNode
}
if domFuncs.IsPreformattedNode == nil {
domFuncs.IsPreformattedNode = defaultIsPreformattedNode
}
return domFuncs

}
func Collapse(element *html.Node, domFuncs *DomFuncs) {
domFuncs = fillDefaultDomFuncs(domFuncs)
// - - - - - - - - - - - - - - - - - - //

if element.FirstChild == nil || domFuncs.IsPreformattedNode(element) {
return
}

var prevText *html.Node = nil
var keepLeadingWs = false

var prev *html.Node = nil
var node = next(prev, element)
var node = nextNode(prev, element, domFuncs)

for node != element {
if node.Type == html.TextNode /* node.nodeType == 4 */ { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
Expand All @@ -141,22 +128,22 @@ func Collapse(element *html.Node) {

// `text` might be empty at this point.
if text == "" {
node = remove(node)
node = removeNode(node)
continue
}

node.Data = text

prevText = node
} else if node.Type == html.ElementNode { // Node.ELEMENT_NODE
if isBlock(node) || dom.NodeName(node) == "br" {
if domFuncs.IsBlockNode(node) || dom.NodeName(node) == "br" {
if prevText != nil {
prevText.Data = strings.TrimSuffix(prevText.Data, " ")
}

prevText = nil
keepLeadingWs = false
} else if isVoid(node) || isPreOrCode(node) || node.Data == "code" {
} else if domFuncs.IsVoidNode(node) || domFuncs.IsPreformattedNode(node) || dom.NodeName(node) == "code" {
// Avoid trimming space around non-block, non-BR void elements and inline PRE.
prevText = nil
keepLeadingWs = true
Expand All @@ -169,19 +156,19 @@ func Collapse(element *html.Node) {
} else {
// E.g. DoctypeNode

node = remove(node)
node = removeNode(node)
continue
}

var nextNode = next(prev, node)
var nextNode = nextNode(prev, node, domFuncs)
prev = node
node = nextNode
}

if prevText != nil {
prevText.Data = strings.TrimSuffix(prevText.Data, " ")
if prevText.Data == "" {
remove(prevText)
removeNode(prevText)
}
}
}
12 changes: 6 additions & 6 deletions collapse/collapse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func TestCollapse_DocType(t *testing.T) {
t.Error(err)
}

Collapse(doc)
Collapse(doc, nil)

var buf bytes.Buffer
err = html.Render(&buf, doc)
Expand All @@ -55,7 +55,7 @@ func TestCollapse_NoFirstChild(t *testing.T) {
Data: "strong",
}

Collapse(boldNode)
Collapse(boldNode, nil)

var buf bytes.Buffer
err := html.Render(&buf, boldNode)
Expand All @@ -80,7 +80,7 @@ func TestCollapse_StartWithCode(t *testing.T) {
}
codeNode.AppendChild(textNode)

Collapse(codeNode)
Collapse(codeNode, nil)

var buf bytes.Buffer
err := html.Render(&buf, codeNode)
Expand Down Expand Up @@ -111,7 +111,7 @@ func TestCollapse_TwoTextNodes(t *testing.T) {
node1.AppendChild(node2)
node1.AppendChild(node3)

Collapse(node1)
Collapse(node1, nil)

var buf bytes.Buffer
err := html.Render(&buf, node1)
Expand Down Expand Up @@ -142,7 +142,7 @@ func TestCollapse_LastTextIsEmpty(t *testing.T) {
node1.AppendChild(node2)
node1.AppendChild(node3)

Collapse(node1)
Collapse(node1, nil)

var buf bytes.Buffer
err := html.Render(&buf, node1)
Expand Down Expand Up @@ -273,7 +273,7 @@ func TestCollapse_Table(t *testing.T) {
t.Error(err)
}

Collapse(doc)
Collapse(doc, nil)

var buf bytes.Buffer
err = html.Render(&buf, getBody(doc))
Expand Down
43 changes: 43 additions & 0 deletions collapse/is_node.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package collapse

import (
"github.com/JohannesKaufmann/dom"
"golang.org/x/net/html"
)

var blockElements = []string{
"address", "article", "aside", "audio", "blockquote", "body", "canvas", "center", "dd", "dir", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "html", "isindex", "li", "main", "menu", "nav", "noframes", "noscript", "ol", "output", "p", "pre", "section", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "ul",
}

var voidElements = []string{
// Note: Compared to the javascript implementation, I removed "source"
"area", "base", "br", "col", "command", "embed", "hr", "img", "input", "keygen", "link", "meta", "param" /* "source, "*/, "track", "wbr",
}

var defaultIsBlockNode = func(node *html.Node) bool {
name := dom.NodeName(node)

for _, elem := range blockElements {
if elem == name {
return true
}
}
return false
}
var defaultIsVoidNode = func(node *html.Node) bool {
name := dom.NodeName(node)

for _, elem := range voidElements {
if elem == name {
return true
}
}
return false
}
var defaultIsPreformattedNode = func(node *html.Node) bool {
// Note: Originally in the javascript version, this just checked for "pre".
// I changed it, to also return true for "code"
name := dom.NodeName(node)

return name == "pre" || name == "code"
}
2 changes: 1 addition & 1 deletion converter/register_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ func TestTagType_Priority(t *testing.T) {
if err != nil {
t.Fatal(err)
}
expected := "This \n\n<strong>bold</strong>\n\n and *italic* text"
expected := "This\n\n<strong>bold</strong>\n\nand *italic* text"
if output != expected {
t.Errorf("expected %q but got %q", expected, output)
}
Expand Down
12 changes: 11 additions & 1 deletion plugin/base/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,17 @@ func (b *base) preRenderRemove(ctx converter.Context, doc *html.Node) {
}

func (b *base) preRenderCollapse(ctx converter.Context, doc *html.Node) {
collapse.Collapse(doc)
collapse.Collapse(doc, &collapse.DomFuncs{
IsBlockNode: func(node *html.Node) bool {
tagName := dom.NodeName(node)
tagType, ok := ctx.GetTagType(tagName)
if ok {
return tagType == converter.TagTypeBlock
}

return dom.NameIsBlockNode(tagName)
},
})
}

var characterEntityReplacer = strings.NewReplacer(
Expand Down
3 changes: 1 addition & 2 deletions plugin/base/renderers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,7 @@ func TestRenderAsX(t *testing.T) {
desc: "RenderAsHTML Inline",
isInline: true,
renderFunc: base.RenderAsHTML,
// TODO: Connect the collapse function to the tagType map. Then we can also use "TagTypeInline" for a node that is actually a block node
expected: "# heading\n\n<footer><strong>bold text</strong></footer>",
expected: "# heading\n\n<footer><strong>bold text</strong></footer>",
},

{
Expand Down

0 comments on commit c37ad42

Please sign in to comment.