From 3440efd9b2bed7229eb7293fc747e93577217921 Mon Sep 17 00:00:00 2001 From: Patrick G Date: Mon, 21 May 2018 15:23:20 -0400 Subject: [PATCH] Fixed self-closing script tag issues (fixes #29) --- kepub/content.go | 14 +++++++-- kepub/content_test.go | 73 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 2 deletions(-) diff --git a/kepub/content.go b/kepub/content.go index 3082b22..0a5875a 100644 --- a/kepub/content.go +++ b/kepub/content.go @@ -262,12 +262,22 @@ func cleanHTML(doc *goquery.Document) error { return nil } -var selfClosingTitleRe = regexp.MustCompile("") +var selfClosingScriptRe = regexp.MustCompile(`<(script)([^>]*?)\/>`) +var selfClosingTitleRe = regexp.MustCompile("<title */>") + +// fixInvalidSelfClosingTags fixes invalid self-closing tags which cause breakages. It must be run first. +func fixInvalidSelfClosingTags(html *string) error { + *html = selfClosingTitleRe.ReplaceAllString(*html, "<title>book") + *html = selfClosingScriptRe.ReplaceAllString(*html, "<$1$2> ") + return nil +} // process processes the html of a content file in an ordinary epub and converts it into a kobo epub by adding kobo divs, kobo spans, smartening punctuation, and cleaning html. // It can also optionally run a postprocessor on the goquery.Document, or the html string. func process(content *string, postDoc *func(doc *goquery.Document) error, postHTML *func(h *string) error) error { - *content = selfClosingTitleRe.ReplaceAllString(*content, "book") + if err := fixInvalidSelfClosingTags(content); err != nil { + return err + } doc, err := goquery.NewDocumentFromReader(strings.NewReader(*content)) if err != nil { diff --git a/kepub/content_test.go b/kepub/content_test.go index 84c7a6b..5f61c0e 100644 --- a/kepub/content_test.go +++ b/kepub/content_test.go @@ -246,6 +246,79 @@ func TestSpans(t *testing.T) { } } +func TestFixInvalidSelfClosingTags(t *testing.T) { + for _, c := range []struct { + What string + In string + Out string + }{ + { + "should not modify correct title tag", + "test", + "test", + }, + { + "should fix self-closing title tag", + "", + "<title>book", + }, + { + "should fix self-closing title tag with spaces and trim extra spaces", + "", + "<title>book", + }, + { + "should not modify correct script tag", + "", + "", + }, + { + "should fix self-closing script tag", + "", + }, + { + "should fix self-closing script tag with spaces and trim extra spaces", + "", + }, + { + "should fix self-closing script tag with attributes", + "", + }, + { + "should not intefere with other script tags", + "", + "", + }, + { + "should work with complex attributes", + ``, + }, + } { + c.In = fmt.Sprintf("%s", c.In) + c.Out = fmt.Sprintf("%s", c.Out) + + h := c.In + err := fixInvalidSelfClosingTags(&h) + assert.NoError(t, err, "should not error") + assert.Equalf(t, c.Out, h, "%s (after replacement)", c.What) + + doc, err := goquery.NewDocumentFromReader(strings.NewReader(h)) + assert.NoError(t, err, "should not error when parsing modified document") + + if c.Out == "" { + c.Out = "" + } + + h, err = doc.Html() + assert.NoError(t, err, "should not error when creating new html") + assert.Equalf(t, c.Out, h, "%s (after passing through goquery)", c.What) + } +} + func BenchmarkProcess(b *testing.B) { for n := 0; n < b.N; n++ { h := `