Skip to content

Commit

Permalink
Fixed self-closing script tag issues (fixes #29)
Browse files Browse the repository at this point in the history
  • Loading branch information
pgaskin committed May 21, 2018
1 parent 37d2855 commit 3440efd
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 2 deletions.
14 changes: 12 additions & 2 deletions kepub/content.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,12 +262,22 @@ func cleanHTML(doc *goquery.Document) error {
return nil
}

var selfClosingTitleRe = regexp.MustCompile("<title ?/>")
var selfClosingScriptRe = regexp.MustCompile(`<(script)([^>]*?)\/>`)
var selfClosingTitleRe = regexp.MustCompile("<title */>")

// fixInvalidSelfClosingTags fixes invalid self-closing tags which cause breakages. It must be run first.
func fixInvalidSelfClosingTags(html *string) error {
*html = selfClosingTitleRe.ReplaceAllString(*html, "<title>book</title>")
*html = selfClosingScriptRe.ReplaceAllString(*html, "<$1$2> </$1>")
return nil
}

// process processes the html of a content file in an ordinary epub and converts it into a kobo epub by adding kobo divs, kobo spans, smartening punctuation, and cleaning html.
// It can also optionally run a postprocessor on the goquery.Document, or the html string.
func process(content *string, postDoc *func(doc *goquery.Document) error, postHTML *func(h *string) error) error {
*content = selfClosingTitleRe.ReplaceAllString(*content, "<title>book</title>")
if err := fixInvalidSelfClosingTags(content); err != nil {
return err
}

doc, err := goquery.NewDocumentFromReader(strings.NewReader(*content))
if err != nil {
Expand Down
73 changes: 73 additions & 0 deletions kepub/content_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,79 @@ func TestSpans(t *testing.T) {
}
}

func TestFixInvalidSelfClosingTags(t *testing.T) {
for _, c := range []struct {
What string
In string
Out string
}{
{
"should not modify correct title tag",
"<title>test</title>",
"<title>test</title>",
},
{
"should fix self-closing title tag",
"<title/>",
"<title>book</title>",
},
{
"should fix self-closing title tag with spaces and trim extra spaces",
"<title />",
"<title>book</title>",
},
{
"should not modify correct script tag",
"<script>test</script>",
"<script>test</script>",
},
{
"should fix self-closing script tag",
"<script/>",
"<script> </script>",
},
{
"should fix self-closing script tag with spaces and trim extra spaces",
"<script />",
"<script > </script>",
},
{
"should fix self-closing script tag with attributes",
"<script src=\"test\"/>",
"<script src=\"test\"> </script>",
},
{
"should not intefere with other script tags",
"<script/><script src=\"whatever\"></script>",
"<script> </script><script src=\"whatever\"></script>",
},
{
"should work with complex attributes",
`<script xmlns="http://www.w3.org/1999/xhtml" type="text/javascript" src="../script.js"/>`,
`<script xmlns="http://www.w3.org/1999/xhtml" type="text/javascript" src="../script.js"> </script>`,
},
} {
c.In = fmt.Sprintf("<html><head>%s</head><body></body></html>", c.In)
c.Out = fmt.Sprintf("<html><head>%s</head><body></body></html>", c.Out)

h := c.In
err := fixInvalidSelfClosingTags(&h)
assert.NoError(t, err, "should not error")
assert.Equalf(t, c.Out, h, "%s (after replacement)", c.What)

doc, err := goquery.NewDocumentFromReader(strings.NewReader(h))
assert.NoError(t, err, "should not error when parsing modified document")

if c.Out == "<html><head><script > </script></head><body></body></html>" {
c.Out = "<html><head><script> </script></head><body></body></html>"
}

h, err = doc.Html()
assert.NoError(t, err, "should not error when creating new html")
assert.Equalf(t, c.Out, h, "%s (after passing through goquery)", c.What)
}
}

func BenchmarkProcess(b *testing.B) {
for n := 0; n < b.N; n++ {
h := `<!DOCTYPE html>
Expand Down

0 comments on commit 3440efd

Please sign in to comment.