diff --git a/go.mod b/go.mod index 02cf2ea..0e9028a 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,8 @@ module github.com/microcosm-cc/bluemonday go 1.16 require ( + github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d github.com/aymerick/douceur v0.2.0 github.com/gorilla/css v1.0.0 // indirect - golang.org/x/net v0.0.0-20210610132358-84b48f89b13b + golang.org/x/net v0.0.0-20210614182718-04defd469f4e ) diff --git a/go.sum b/go.sum index 930d271..049d516 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d h1:Byv0BzEl3/e6D5CLfI0j/7hiIEtvGVFPCZ7Ei2oq8iQ= +github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk= github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY= @@ -6,6 +8,8 @@ golang.org/x/net v0.0.0-20210421230115-4e50805a0758 h1:aEpZnXcAmXkd6AvLb2OPt+EN1 golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM= golang.org/x/net v0.0.0-20210610132358-84b48f89b13b h1:k+E048sYJHyVnsr1GDrRZWQ32D2C7lWs9JRc0bel53A= golang.org/x/net v0.0.0-20210610132358-84b48f89b13b/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q= +golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/sanitize.go b/sanitize.go index 9bb87a6..5f4b60d 100644 --- a/sanitize.go +++ b/sanitize.go @@ -130,7 +130,7 @@ func escapeUrlComponent(w stringWriterWriter, val string) error { return err } -// Query represents a query +// Query represents a single part of the query string, a query param type Query struct { Key string Value string @@ -138,6 +138,10 @@ type Query struct { } func parseQuery(query string) (values []Query, err error) { + // This is essentially a copy of parseQuery from + // https://golang.org/src/net/url/url.go but adjusted to build our values + // based on our type, which we need to preserve the ordering of the query + // string for query != "" { key := query if i := strings.IndexAny(key, "&;"); i >= 0 { @@ -213,43 +217,6 @@ func sanitizedURL(val string) (string, error) { return u.String(), nil } -func (p *Policy) writeLinkableBuf(buff stringWriterWriter, token *html.Token) (int, error) { - // do not escape multiple query parameters - tokenBuff := bytes.NewBuffer(make([]byte, 0, 1024)) // This should stay on the stack unless it gets too big - - tokenBuff.WriteByte('<') - tokenBuff.WriteString(token.Data) - for _, attr := range token.Attr { - tokenBuff.WriteByte(' ') - tokenBuff.WriteString(attr.Key) - tokenBuff.Write([]byte{'=', '"'}) - switch attr.Key { - case "href", "src": - u, ok := p.validURL(attr.Val) - if !ok { - tokenBuff.WriteString(html.EscapeString(attr.Val)) - continue - } - u, err := sanitizedURL(u) - if err == nil { - tokenBuff.WriteString(u) - } else { - // fallthrough - tokenBuff.WriteString(html.EscapeString(attr.Val)) - } - default: - // re-apply - tokenBuff.WriteString(html.EscapeString(attr.Val)) - } - tokenBuff.WriteByte('"') - } - if token.Type == html.SelfClosingTagToken { - tokenBuff.WriteString("/") - } - tokenBuff.WriteString(">") - return buff.Write(tokenBuff.Bytes()) -} - // Performs the actual sanitization process. func (p *Policy) sanitizeWithBuff(r io.Reader) *bytes.Buffer { var buff bytes.Buffer @@ -344,7 +311,9 @@ func (p *Policy) sanitize(r io.Reader, w io.Writer) error { aps = aa } if len(token.Attr) != 0 { - token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps) + token.Attr = escapeAttributes( + p.sanitizeAttrs(token.Data, token.Attr, aps), + ) } if len(token.Attr) == 0 { @@ -361,15 +330,8 @@ func (p *Policy) sanitize(r io.Reader, w io.Writer) error { } if !skipElementContent { - // do not escape multiple query parameters - if linkable(token.Data) { - if _, err := p.writeLinkableBuf(buff, &token); err != nil { - return err - } - } else { - if _, err := buff.WriteString(token.String()); err != nil { - return err - } + if _, err := buff.WriteString(token.String()); err != nil { + return err } } @@ -439,7 +401,7 @@ func (p *Policy) sanitize(r io.Reader, w io.Writer) error { } if len(token.Attr) != 0 { - token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps) + token.Attr = escapeAttributes(p.sanitizeAttrs(token.Data, token.Attr, aps)) } if len(token.Attr) == 0 && !p.allowNoAttrs(token.Data) { @@ -451,15 +413,8 @@ func (p *Policy) sanitize(r io.Reader, w io.Writer) error { } } if !skipElementContent { - // do not escape multiple query parameters - if linkable(token.Data) { - if _, err := p.writeLinkableBuf(buff, &token); err != nil { - return err - } - } else { - if _, err := buff.WriteString(token.String()); err != nil { - return err - } + if _, err := buff.WriteString(token.String()); err != nil { + return err } } @@ -569,9 +524,11 @@ attrsLoop: for _, ap := range apl { if ap.regexp != nil { if ap.regexp.MatchString(htmlAttr.Val) { + htmlAttr.Val = escapeAttribute(htmlAttr.Val) cleanAttrs = append(cleanAttrs, htmlAttr) } } else { + htmlAttr.Val = escapeAttribute(htmlAttr.Val) cleanAttrs = append(cleanAttrs, htmlAttr) } } @@ -1087,3 +1044,18 @@ func normaliseElementName(str string) string { `"`, ) } + +func escapeAttributes(attrs []html.Attribute) []html.Attribute { + escapedAttrs := []html.Attribute{} + for _, attr := range attrs { + attr.Val = escapeAttribute(attr.Val) + escapedAttrs = append(escapedAttrs, attr) + } + return escapedAttrs +} + +func escapeAttribute(val string) string { + val = strings.Replace(val, string([]rune{'\u00A0'}), ` `, -1) + val = strings.Replace(val, `"`, `"`, -1) + return val +} \ No newline at end of file diff --git a/sanitize_test.go b/sanitize_test.go index 1682756..5c12f5a 100644 --- a/sanitize_test.go +++ b/sanitize_test.go @@ -129,11 +129,11 @@ func TestLinks(t *testing.T) { }, { in: ``, - expected: ``, + expected: ``, }, { in: ``, - expected: ``, + expected: ``, }, { in: ``, @@ -141,7 +141,7 @@ func TestLinks(t *testing.T) { }, { in: ``, - expected: ``, + expected: ``, }, { in: `Red dot`, @@ -152,8 +152,8 @@ func TestLinks(t *testing.T) { expected: ``, }, { - in: ``, - expected: ``, + in: ``, + expected: ``, }, } @@ -3624,3 +3624,39 @@ func TestAdditivePolicies(t *testing.T) { }) }) } + +func TestHrefSanitization(t *testing.T) { + tests := []test{ + { + in: `abcCLICK`, + expected: `abcCLICK`, + }, + { + in: ``, + expected: ``, + }, + } + + p := UGCPolicy() + + // These tests are run concurrently to enable the race detector to pick up + // potential issues + wg := sync.WaitGroup{} + wg.Add(len(tests)) + for ii, tt := range tests { + go func(ii int, tt test) { + out := p.Sanitize(tt.in) + if out != tt.expected { + t.Errorf( + "test %d failed;\ninput : %s\noutput : %s\nexpected: %s", + ii, + tt.in, + out, + tt.expected, + ) + } + wg.Done() + }(ii, tt) + } + wg.Wait() +}