diff --git a/go.mod b/go.mod
index 02cf2ea..0e9028a 100644
--- a/go.mod
+++ b/go.mod
@@ -3,7 +3,8 @@ module github.com/microcosm-cc/bluemonday
go 1.16
require (
+ github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d
github.com/aymerick/douceur v0.2.0
github.com/gorilla/css v1.0.0 // indirect
- golang.org/x/net v0.0.0-20210610132358-84b48f89b13b
+ golang.org/x/net v0.0.0-20210614182718-04defd469f4e
)
diff --git a/go.sum b/go.sum
index 930d271..049d516 100644
--- a/go.sum
+++ b/go.sum
@@ -1,3 +1,5 @@
+github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d h1:Byv0BzEl3/e6D5CLfI0j/7hiIEtvGVFPCZ7Ei2oq8iQ=
+github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY=
@@ -6,6 +8,8 @@ golang.org/x/net v0.0.0-20210421230115-4e50805a0758 h1:aEpZnXcAmXkd6AvLb2OPt+EN1
golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM=
golang.org/x/net v0.0.0-20210610132358-84b48f89b13b h1:k+E048sYJHyVnsr1GDrRZWQ32D2C7lWs9JRc0bel53A=
golang.org/x/net v0.0.0-20210610132358-84b48f89b13b/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q=
+golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
diff --git a/sanitize.go b/sanitize.go
index 9bb87a6..5f4b60d 100644
--- a/sanitize.go
+++ b/sanitize.go
@@ -130,7 +130,7 @@ func escapeUrlComponent(w stringWriterWriter, val string) error {
return err
}
-// Query represents a query
+// Query represents a single part of the query string, a query param
type Query struct {
Key string
Value string
@@ -138,6 +138,10 @@ type Query struct {
}
func parseQuery(query string) (values []Query, err error) {
+ // This is essentially a copy of parseQuery from
+ // https://golang.org/src/net/url/url.go but adjusted to build our values
+ // based on our type, which we need to preserve the ordering of the query
+ // string
for query != "" {
key := query
if i := strings.IndexAny(key, "&;"); i >= 0 {
@@ -213,43 +217,6 @@ func sanitizedURL(val string) (string, error) {
return u.String(), nil
}
-func (p *Policy) writeLinkableBuf(buff stringWriterWriter, token *html.Token) (int, error) {
- // do not escape multiple query parameters
- tokenBuff := bytes.NewBuffer(make([]byte, 0, 1024)) // This should stay on the stack unless it gets too big
-
- tokenBuff.WriteByte('<')
- tokenBuff.WriteString(token.Data)
- for _, attr := range token.Attr {
- tokenBuff.WriteByte(' ')
- tokenBuff.WriteString(attr.Key)
- tokenBuff.Write([]byte{'=', '"'})
- switch attr.Key {
- case "href", "src":
- u, ok := p.validURL(attr.Val)
- if !ok {
- tokenBuff.WriteString(html.EscapeString(attr.Val))
- continue
- }
- u, err := sanitizedURL(u)
- if err == nil {
- tokenBuff.WriteString(u)
- } else {
- // fallthrough
- tokenBuff.WriteString(html.EscapeString(attr.Val))
- }
- default:
- // re-apply
- tokenBuff.WriteString(html.EscapeString(attr.Val))
- }
- tokenBuff.WriteByte('"')
- }
- if token.Type == html.SelfClosingTagToken {
- tokenBuff.WriteString("/")
- }
- tokenBuff.WriteString(">")
- return buff.Write(tokenBuff.Bytes())
-}
-
// Performs the actual sanitization process.
func (p *Policy) sanitizeWithBuff(r io.Reader) *bytes.Buffer {
var buff bytes.Buffer
@@ -344,7 +311,9 @@ func (p *Policy) sanitize(r io.Reader, w io.Writer) error {
aps = aa
}
if len(token.Attr) != 0 {
- token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps)
+ token.Attr = escapeAttributes(
+ p.sanitizeAttrs(token.Data, token.Attr, aps),
+ )
}
if len(token.Attr) == 0 {
@@ -361,15 +330,8 @@ func (p *Policy) sanitize(r io.Reader, w io.Writer) error {
}
if !skipElementContent {
- // do not escape multiple query parameters
- if linkable(token.Data) {
- if _, err := p.writeLinkableBuf(buff, &token); err != nil {
- return err
- }
- } else {
- if _, err := buff.WriteString(token.String()); err != nil {
- return err
- }
+ if _, err := buff.WriteString(token.String()); err != nil {
+ return err
}
}
@@ -439,7 +401,7 @@ func (p *Policy) sanitize(r io.Reader, w io.Writer) error {
}
if len(token.Attr) != 0 {
- token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps)
+ token.Attr = escapeAttributes(p.sanitizeAttrs(token.Data, token.Attr, aps))
}
if len(token.Attr) == 0 && !p.allowNoAttrs(token.Data) {
@@ -451,15 +413,8 @@ func (p *Policy) sanitize(r io.Reader, w io.Writer) error {
}
}
if !skipElementContent {
- // do not escape multiple query parameters
- if linkable(token.Data) {
- if _, err := p.writeLinkableBuf(buff, &token); err != nil {
- return err
- }
- } else {
- if _, err := buff.WriteString(token.String()); err != nil {
- return err
- }
+ if _, err := buff.WriteString(token.String()); err != nil {
+ return err
}
}
@@ -569,9 +524,11 @@ attrsLoop:
for _, ap := range apl {
if ap.regexp != nil {
if ap.regexp.MatchString(htmlAttr.Val) {
+ htmlAttr.Val = escapeAttribute(htmlAttr.Val)
cleanAttrs = append(cleanAttrs, htmlAttr)
}
} else {
+ htmlAttr.Val = escapeAttribute(htmlAttr.Val)
cleanAttrs = append(cleanAttrs, htmlAttr)
}
}
@@ -1087,3 +1044,18 @@ func normaliseElementName(str string) string {
`"`,
)
}
+
+func escapeAttributes(attrs []html.Attribute) []html.Attribute {
+ escapedAttrs := []html.Attribute{}
+ for _, attr := range attrs {
+ attr.Val = escapeAttribute(attr.Val)
+ escapedAttrs = append(escapedAttrs, attr)
+ }
+ return escapedAttrs
+}
+
+func escapeAttribute(val string) string {
+ val = strings.Replace(val, string([]rune{'\u00A0'}), ` `, -1)
+ val = strings.Replace(val, `"`, `"`, -1)
+ return val
+}
\ No newline at end of file
diff --git a/sanitize_test.go b/sanitize_test.go
index 1682756..5c12f5a 100644
--- a/sanitize_test.go
+++ b/sanitize_test.go
@@ -129,11 +129,11 @@ func TestLinks(t *testing.T) {
},
{
in: ``,
- expected: ``,
+ expected: ``,
},
{
in: ``,
- expected: ``,
+ expected: ``,
},
{
in: ``,
@@ -141,7 +141,7 @@ func TestLinks(t *testing.T) {
},
{
in: ``,
- expected: ``,
+ expected: ``,
},
{
in: ``,
@@ -152,8 +152,8 @@ func TestLinks(t *testing.T) {
expected: ``,
},
{
- in: ``,
- expected: ``,
+ in: ``,
+ expected: ``,
},
}
@@ -3624,3 +3624,39 @@ func TestAdditivePolicies(t *testing.T) {
})
})
}
+
+func TestHrefSanitization(t *testing.T) {
+ tests := []test{
+ {
+ in: `abcCLICK`,
+ expected: `abcCLICK`,
+ },
+ {
+ in: ``,
+ expected: ``,
+ },
+ }
+
+ p := UGCPolicy()
+
+ // These tests are run concurrently to enable the race detector to pick up
+ // potential issues
+ wg := sync.WaitGroup{}
+ wg.Add(len(tests))
+ for ii, tt := range tests {
+ go func(ii int, tt test) {
+ out := p.Sanitize(tt.in)
+ if out != tt.expected {
+ t.Errorf(
+ "test %d failed;\ninput : %s\noutput : %s\nexpected: %s",
+ ii,
+ tt.in,
+ out,
+ tt.expected,
+ )
+ }
+ wg.Done()
+ }(ii, tt)
+ }
+ wg.Wait()
+}