Skip to content

Commit

Permalink
Fix #105: Correct handling of charset encoding for XPath queries
Browse files Browse the repository at this point in the history
  • Loading branch information
sibprogrammer committed Nov 11, 2024
1 parent 6a92c0a commit a60e345
Showing 1 changed file with 14 additions and 11 deletions.
25 changes: 14 additions & 11 deletions internal/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,7 @@ const (
func FormatXml(reader io.Reader, writer io.Writer, indent string, colors int) error {
decoder := xml.NewDecoder(reader)
decoder.Strict = false
decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
if strings.ToLower(charset) == "utf-16" {
charset = "utf-8"
}
e, err := ianaindex.MIME.Encoding(charset)
if err != nil {
return nil, err
}
return transform.NewReader(input, e.NewDecoder()), nil
}
decoder.CharsetReader = getCharsetReader

level := 0
hasContent := false
Expand Down Expand Up @@ -216,7 +207,8 @@ func XPathQuery(reader io.Reader, writer io.Writer, query string, singleNode boo

doc, err := xmlquery.ParseWithOptions(reader, xmlquery.ParserOptions{
Decoder: &xmlquery.DecoderOptions{
Strict: false,
Strict: false,
CharsetReader: getCharsetReader,
},
})
if err != nil {
Expand Down Expand Up @@ -614,3 +606,14 @@ func normalizeSpaces(input string, indent string, level int) string {

return input
}

func getCharsetReader(charset string, input io.Reader) (io.Reader, error) {
if strings.ToLower(charset) == "utf-16" {
charset = "utf-8"
}
e, err := ianaindex.MIME.Encoding(charset)
if err != nil {
return nil, err
}
return transform.NewReader(input, e.NewDecoder()), nil
}

0 comments on commit a60e345

Please sign in to comment.