Skip to content

Commit

Permalink
refactor(renderer): improve HTML escaping (#952)
Browse files Browse the repository at this point in the history
escape special characters in link URLs
escape special characters in table of contents' section titles

also, simplify rendering of listing with language/syntax
coloring

Signed-off-by: Xavier Coulon <[email protected]>
  • Loading branch information
xcoulon authored Feb 26, 2022
1 parent 8018f3f commit 76352c9
Show file tree
Hide file tree
Showing 17 changed files with 86 additions and 119 deletions.
7 changes: 3 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ require (
github.com/davecgh/go-spew v1.1.1
github.com/google/go-cmp v0.5.5
github.com/kr/text v0.2.0 // indirect
github.com/mattn/go-isatty v0.0.12 // indirect
github.com/mna/pigeon v1.1.0
github.com/modocache/gover v0.0.0-20171022184752-b58185e213c5 // indirect
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect
Expand All @@ -18,11 +17,11 @@ require (
github.com/sirupsen/logrus v1.7.0
github.com/sozorogami/gover v0.0.0-20171022184752-b58185e213c5
github.com/spf13/cobra v1.1.1
github.com/stretchr/testify v1.6.1
github.com/stretchr/testify v1.7.0
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect
gopkg.in/yaml.v2 v2.4.0
)

// include support for disabling unexported fields
// include support for disabling unexported fields
// TODO: still needed?
replace github.com/davecgh/go-spew => github.com/flw-cn/go-spew v1.1.2-0.20200624141737-10fccbfd0b23
replace github.com/davecgh/go-spew => github.com/flw-cn/go-spew v1.1.2-0.20200624141737-10fccbfd0b23
52 changes: 2 additions & 50 deletions go.sum

Large diffs are not rendered by default.

12 changes: 5 additions & 7 deletions pkg/renderer/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
type Context struct {
Config *configuration.Configuration // TODO: use composition (remove the `Config` field)
WithinDelimitedBlock bool
EncodeSpecialChars bool
WithinList int
counters map[string]int
Attributes types.Attributes
Expand All @@ -23,12 +22,11 @@ type Context struct {
func NewContext(doc *types.Document, config *configuration.Configuration) *Context {
header := doc.Header()
ctx := &Context{
Config: config,
counters: make(map[string]int),
Attributes: config.Attributes,
ElementReferences: doc.ElementReferences,
HasHeader: header != nil,
EncodeSpecialChars: true,
Config: config,
counters: make(map[string]int),
Attributes: config.Attributes,
ElementReferences: doc.ElementReferences,
HasHeader: header != nil,
}
// TODO: add other attributes from https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#builtin-attributes-i18n
ctx.Attributes[types.AttrFigureCaption] = "Figure"
Expand Down
6 changes: 1 addition & 5 deletions pkg/renderer/sgml/delimited_block_source.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,6 @@ func (r *sgmlRenderer) renderSourceBlockElements(ctx *renderer.Context, b *types
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("splitted lines:\n%s", spew.Sdump(lines))
}
ctx.EncodeSpecialChars = false
defer func() {
ctx.EncodeSpecialChars = true
}()
// using github.com/alecthomas/chroma to highlight the content
lexer := lexers.Get(language)
if lexer == nil {
Expand Down Expand Up @@ -168,7 +164,7 @@ func (r *sgmlRenderer) renderSourceLine(ctx *renderer.Context, line interface{})
for _, e := range elements {
switch e := e.(type) {
case *types.StringElement, *types.SpecialCharacter:
s, err := r.renderElement(ctx, e)
s, err := r.renderPlainText(ctx, e)
if err != nil {
return "", nil, err
}
Expand Down
6 changes: 4 additions & 2 deletions pkg/renderer/sgml/elements.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ func (r *sgmlRenderer) renderElement(ctx *renderer.Context, element interface{})
case *types.ThematicBreak:
return r.renderThematicBreak()
case *types.SpecialCharacter:
return r.renderSpecialCharacter(ctx, e)
return r.renderSpecialCharacter(e)
case *types.PredefinedAttribute:
return r.renderPredefinedAttribute(e)
case *types.AttributeDeclaration:
Expand Down Expand Up @@ -141,8 +141,10 @@ func (r *sgmlRenderer) renderPlainText(ctx *renderer.Context, element interface{
return element.Location.Stringify(), nil
case *types.BlankLine, types.ThematicBreak:
return "\n\n", nil
case *types.SpecialCharacter:
return element.Name, nil
case *types.StringElement:
return element.Content, nil
return r.renderStringElement(ctx, element)
case *types.QuotedString:
return r.renderQuotedString(ctx, element)
// case *types.Paragraph:
Expand Down
2 changes: 1 addition & 1 deletion pkg/renderer/sgml/html5/link.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
package html5

const (
linkTmpl = `<a{{ if .ID }} id="{{ .ID }}"{{ end }}{{ if .URL }} href="{{ .URL }}"{{ end }}{{if .Class}} class="{{ .Class }}"{{ end }}{{if .Target}} target="{{ .Target }}"{{ end }}>{{ .Text }}</a>`
linkTmpl = `<a{{ if .ID }} id="{{ .ID }}"{{ end }}{{ if .URL }} href="{{ escape .URL }}"{{ end }}{{if .Class}} class="{{ .Class }}"{{ end }}{{if .Target}} target="{{ .Target }}"{{ end }}>{{ .Text }}</a>`
)
43 changes: 37 additions & 6 deletions pkg/renderer/sgml/html5/link_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ var _ = Describe("links", func() {

Context("bare URLs", func() {

It("should parse standalone URL with scheme ", func() {
It("standalone URL with scheme", func() {
source := `<https://example.com>`
expected := `<div class="paragraph">
<p><a href="https://example.com" class="bare">https://example.com</a></p>
Expand All @@ -24,7 +24,7 @@ var _ = Describe("links", func() {
Expect(RenderHTML(source)).To(MatchHTML(expected))
})

It("should parse URL with scheme in sentence", func() {
It("URL with scheme in sentence", func() {
source := `a link to <https://example.com>.`
expected := `<div class="paragraph">
<p>a link to <a href="https://example.com" class="bare">https://example.com</a>.</p>
Expand All @@ -33,7 +33,7 @@ var _ = Describe("links", func() {
Expect(RenderHTML(source)).To(MatchHTML(expected))
})

It("should parse substituted URL with scheme", func() {
It("substituted URL with scheme", func() {
source := `:example: https://example.com
a link to <{example}>.`
Expand All @@ -44,6 +44,37 @@ a link to <{example}>.`
Expect(RenderHTML(source)).To(MatchHTML(expected))
})

It("substituted URL with scheme", func() {
source := `:example: https://example.com
a link to <{example}>.`
expected := `<div class="paragraph">
<p>a link to <a href="https://example.com" class="bare">https://example.com</a>.</p>
</div>
`
Expect(RenderHTML(source)).To(MatchHTML(expected))
})

It("URL with query string", func() {
source := `a link to https://example.com?foo=bar&lang=en[].`
expected := `<div class="paragraph">
<p>a link to <a href="https://example.com?foo=bar&amp;lang=en" class="bare">https://example.com?foo=bar&amp;lang=en</a>.</p>
</div>
`
Expect(RenderHTML(source)).To(MatchHTML(expected))
})

It("substituted bare URL with query string", func() {
source := `:example: https://example.com?foo=bar&lang=en
a link to <{example}>.`
expected := `<div class="paragraph">
<p>a link to <a href="https://example.com?foo=bar&amp;lang=en" class="bare">https://example.com?foo=bar&amp;lang=en</a>.</p>
</div>
`
Expect(RenderHTML(source)).To(MatchHTML(expected))
})

Context("malformed", func() {

It("should not parse URL without scheme", func() {
Expand All @@ -55,7 +86,7 @@ a link to <{example}>.`
Expect(RenderHTML(source)).To(MatchHTML(expected))
})

It("should parse with special character in URL", func() {
It("with special character in URL", func() {
source := `a link to https://example.com>[].`
expected := &types.Document{
Elements: []interface{}{
Expand Down Expand Up @@ -87,7 +118,7 @@ a link to <{example}>.`
Expect(ParseDocument(source)).To(MatchDocument(expected))
})

It("should parse with opening angle bracket", func() {
It("with opening angle bracket", func() {
source := `a link to <https://example.com[].`
expected := &types.Document{
Elements: []interface{}{
Expand Down Expand Up @@ -295,7 +326,7 @@ a link to {scheme}://{path} and https://foo.baz`
It("with special characters", func() {
source := `a link to https://example.com?a=1&b=2`
expected := `<div class="paragraph">
<p>a link to <a href="https://example.com?a=1&b=2" class="bare">https://example.com?a=1&amp;b=2</a></p>
<p>a link to <a href="https://example.com?a=1&amp;b=2" class="bare">https://example.com?a=1&amp;b=2</a></p>
</div>
`
Expect(RenderHTML(source)).To(MatchHTML(expected))
Expand Down
5 changes: 0 additions & 5 deletions pkg/renderer/sgml/html5/special_character.go

This file was deleted.

9 changes: 9 additions & 0 deletions pkg/renderer/sgml/html5/string_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,4 +134,13 @@ var _ = Describe("strings", func() {
"</div>\n"
Expect(RenderHTML(source)).To(MatchHTML(expected))
})

It("paragraph with special characters", func() {
source := `...and we're back!`
expected := `<div class="paragraph">
<p>&#8230;&#8203;and we&#8217;re back!</p>
</div>
`
Expect(RenderHTML(source)).To(MatchHTML(expected))
})
})
2 changes: 1 addition & 1 deletion pkg/renderer/sgml/html5/table_of_contents.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ const (

tocSectionTmpl = "<ul class=\"sectlevel{{ .Level }}\">\n{{ .Content }}</ul>\n"

tocEntryTmpl = "<li><a href=\"#{{ .ID }}\">{{ if .Number }}{{ .Number }}. {{ end }}{{ .Title }}</a>" +
tocEntryTmpl = "<li><a href=\"#{{ .ID }}\">{{ if .Number }}{{ .Number }}. {{ end }}{{ escape .Title }}</a>" +
"{{ if .Content }}\n{{ .Content }}{{ end }}</li>\n"
)
21 changes: 20 additions & 1 deletion pkg/renderer/sgml/html5/table_of_contents_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
. "github.com/onsi/gomega" // nolint:golint
)

var _ = Describe("document toc", func() {
var _ = Describe("table of contents", func() {

Context("in document with header", func() {

Expand Down Expand Up @@ -430,6 +430,25 @@ level 1 sections do not exist.`
Expect(RenderHTML(source)).To(MatchHTML(expected))

})

It("section title with special characters", func() { // TODO: move into `section_test.go`
source := `:toc:
== ...and we're back!`
expected := `<div id="toc" class="toc">
<div id="toctitle">Table of Contents</div>
<ul class="sectlevel1">
<li><a href="#_and_were_back">&#8230;&#8203;and we&#8217;re back!</a></li>
</ul>
</div>
<div class="sect1">
<h2 id="_and_were_back">&#8230;&#8203;and we&#8217;re back!</h2>
<div class="sectionbody">
</div>
</div>
`
Expect(RenderHTML(source)).To(MatchHTML(expected))
})
})

Context("within preamble", func() {
Expand Down
1 change: 0 additions & 1 deletion pkg/renderer/sgml/html5/templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ var templates = sgml.Templates{
SectionHeader: sectionHeaderTmpl,
SidebarBlock: sidebarBlockTmpl,
SourceBlock: sourceBlockTmpl,
SpecialCharacter: specialCharacterTmpl,
StringElement: stringTmpl,
SubscriptText: subscriptTextTmpl,
SuperscriptText: superscriptTextTmpl,
Expand Down
11 changes: 0 additions & 11 deletions pkg/renderer/sgml/renderer.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ func NewRenderer(t Templates) Renderer {
"trimRight": trimRight,
"trimLeft": trimLeft,
"trim": trimBoth,
"specialCharacter": specialCharacter,
"predefinedAttribute": predefinedAttribute,
"halign": halign,
"valign": valign,
Expand All @@ -62,16 +61,6 @@ func trimBoth(s string) string {
return strings.Trim(s, " ")
}

var specialCharacters = map[string]string{
">": "&gt;",
"<": "&lt;",
"&": "&amp;",
}

func specialCharacter(c string) string {
return specialCharacters[c]
}

var predefinedAttributes = map[string]string{
"sp": " ",
"blank": "",
Expand Down
2 changes: 0 additions & 2 deletions pkg/renderer/sgml/sgml_renderer.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ type sgmlRenderer struct {
sectionTitle *textTemplate
sidebarBlock *textTemplate
sourceBlock *textTemplate
specialCharacter *textTemplate
stringElement *textTemplate
subscriptText *textTemplate
superscriptText *textTemplate
Expand Down Expand Up @@ -152,7 +151,6 @@ func (r *sgmlRenderer) prepareTemplates() error {
r.stringElement, err = r.newTemplate("string-element", tmpls.StringElement, err)
r.sidebarBlock, err = r.newTemplate("sidebar-block", tmpls.SidebarBlock, err)
r.sourceBlock, err = r.newTemplate("source-block", tmpls.SourceBlock, err)
r.specialCharacter, err = r.newTemplate("special-character", tmpls.SpecialCharacter, err)
r.subscriptText, err = r.newTemplate("subscript", tmpls.SubscriptText, err)
r.superscriptText, err = r.newTemplate("superscript", tmpls.SuperscriptText, err)
r.table, err = r.newTemplate("table", tmpls.Table, err)
Expand Down
20 changes: 2 additions & 18 deletions pkg/renderer/sgml/special_character.go
Original file line number Diff line number Diff line change
@@ -1,26 +1,10 @@
package sgml

import (
"strings"

"github.com/bytesparadise/libasciidoc/pkg/types"
"github.com/pkg/errors"
)

func (r *sgmlRenderer) renderSpecialCharacter(ctx *Context, s *types.SpecialCharacter) (string, error) {
func (r *sgmlRenderer) renderSpecialCharacter(s *types.SpecialCharacter) (string, error) {
// log.Debugf("rendering special character '%s'", s.Name)
if !ctx.EncodeSpecialChars {
// just return the special character as-is
return s.Name, nil
}
// TODO: no need for a template here, just a map
result := &strings.Builder{}
if err := r.specialCharacter.Execute(result, struct {
Name string
}{
Name: s.Name,
}); err != nil {
return "", errors.Wrap(err, "error while rendering special character")
}
return result.String(), nil
return EscapeString(s.Name), nil
}
5 changes: 1 addition & 4 deletions pkg/renderer/sgml/string.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,15 @@ func (r *sgmlRenderer) renderStringElement(ctx *renderer.Context, str *types.Str
func asciiEntify(source string) string {
out := &strings.Builder{}
out.Grow(len(source))

for _, r := range source {

// This will certain characters that should be escaped alone. Run them through
// escape first if that is a concern.
if r < 128 && (unicode.IsPrint(r) || unicode.IsSpace(r)) {
out.WriteRune(r)
continue
}
// take care that the entity is unsigned (should always be)
fmt.Fprintf(out, "&#%d;", uint32(r))
fmt.Fprintf(out, "&#%d;", uint32(r)) // TODO: avoid `fmt.Fprintf`, use `fmt.Fprint` instead?
}

return out.String()
}
1 change: 0 additions & 1 deletion pkg/renderer/sgml/templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ type Templates struct {
SectionHeader string
SidebarBlock string
SourceBlock string
SpecialCharacter string
StringElement string
SubscriptText string
SuperscriptText string
Expand Down

0 comments on commit 76352c9

Please sign in to comment.