Skip to content

Commit

Permalink
Make conversion more extensible
Browse files Browse the repository at this point in the history
  • Loading branch information
pgaskin committed Mar 5, 2018
1 parent 94be780 commit feb6367
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 7 deletions.
15 changes: 14 additions & 1 deletion kepub/content.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,8 @@ func cleanHTML(doc *goquery.Document) error {
}

// process processes the html of a content file in an ordinary epub and converts it into a kobo epub by adding kobo divs, kobo spans, smartening punctuation, and cleaning html.
func process(content *string) error {
// It can also optionally run a postprocessor on the goquery.Document, or the html string.
func process(content *string, postDoc *func(doc *goquery.Document) error, postHTML *func(h *string) error) error {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(*content))
if err != nil {
return err
Expand All @@ -281,6 +282,12 @@ func process(content *string) error {
return err
}

if postDoc != nil {
if err := (*postDoc)(doc); err != nil {
return err
}
}

h, err := doc.Html()
if err != nil {
return err
Expand All @@ -300,6 +307,12 @@ func process(content *string) error {
// Fix nbsps removed
h = strings.Replace(h, "\u00a0", " ", -1)

if postHTML != nil {
if err := (*postHTML)(&h); err != nil {
return err
}
}

*content = h

return nil
Expand Down
17 changes: 15 additions & 2 deletions kepub/content_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ func TestProcess(t *testing.T) {
</body>
</html>`

process(&h)
process(&h, nil, nil)

hs := sha256.New()
hs.Write([]byte(h))
Expand All @@ -145,8 +145,21 @@ func TestProcess(t *testing.T) {

ha := `<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml"><head><title>Test Book 1</title><meta content="http://www.w3.org/1999/xhtml; charset=utf-8" http-equiv="Content-Type"/></head><body><p>Test&nbsp;&nbsp;Test</p><p>&nbsp;&#160;</p><p>Test</p></body></html>`
hax := `<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml"><head><title>Test Book 1</title><meta content="http://www.w3.org/1999/xhtml; charset=utf-8" http-equiv="Content-Type"/><style type="text/css">div#book-inner{margin-top: 0;margin-bottom: 0;}</style></head><body><div class="book-columns"><div class="book-inner"><p><span class="koboSpan" id="kobo.1.1">Test&nbsp;&nbsp;Test</span></p><p><span class="koboSpan" id="kobo.2.1">&nbsp;&nbsp;</span></p><p><span class="koboSpan" id="kobo.3.1">Test</span></p></div></div></body></html>`
process(&ha)
process(&ha, nil, nil)
assert.Equal(t, hax, ha, "should process nbsps correctly")

ha1 := `<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml"><head><title>Test Book 1</title><meta content="http://www.w3.org/1999/xhtml; charset=utf-8" http-equiv="Content-Type"/></head><body><p>test</p></body></html>`
hax1 := `<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml"><head><title>Replaced Book 1</title><meta content="http://www.w3.org/1999/xhtml; charset=utf-8" http-equiv="Content-Type"/><style type="text/css">div#book-inner{margin-top: 0;margin-bottom: 0;}</style></head><body><div class="book-columns"><div class="book-inner"><p><span class="koboSpan" id="kobo.1.1">replaced</span></p></div></div></body></html>`
postDoc := func(doc *goquery.Document) error {
doc.Find("title").SetText("Replaced Book 1")
return nil
}
postHTML := func(h *string) error {
*h = strings.Replace(*h, "test", "replaced", -1)
return nil
}
process(&ha1, &postDoc, &postHTML)
assert.Equal(t, hax1, ha1, "should run post-processing correctly")
}

func TestProcessOPF(t *testing.T) {
Expand Down
8 changes: 5 additions & 3 deletions kepub/kepub.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@ import (
"sync"
"time"

"github.com/PuerkitoBio/goquery"
"github.com/beevik/etree"
zglob "github.com/mattn/go-zglob"
)

// Kepubify converts a .epub into a .kepub.epub
func Kepubify(src, dest string, verbose bool) error {
// Kepubify converts a .epub into a .kepub.epub.
// It can also optionally run a postprocessor for each file on the goquery.Document, or the html string.
func Kepubify(src, dest string, verbose bool, postDoc *func(doc *goquery.Document) error, postHTML *func(h *string) error) error {
td, err := ioutil.TempDir("", "kepubify")
if err != nil {
return fmt.Errorf("could not create temp dir: %s", err)
Expand Down Expand Up @@ -60,7 +62,7 @@ func Kepubify(src, dest string, verbose bool) error {
return
}
str := string(buf)
err = process(&str)
err = process(&str, postDoc, postHTML)
if err != nil {
select {
case cerr <- fmt.Errorf("Error processing content file \"%s\": %s", cf, err): // Put err in the channel unless it is full
Expand Down
2 changes: 1 addition & 1 deletion kepub/kepub_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func TestKepubify(t *testing.T) {
kepub := filepath.Join(td, "test1.kepub.epub")
kepubunp := filepath.Join(td, "test1.kepub.epub_unpacked")

err = Kepubify(filepath.Join(wd, "testdata", "books", "test1.epub"), kepub, false)
err = Kepubify(filepath.Join(wd, "testdata", "books", "test1.epub"), kepub, false, nil, nil)
assert.Nil(t, err, "should not error when converting book")
assert.True(t, exists(kepub), "converted kepub should exist")

Expand Down

0 comments on commit feb6367

Please sign in to comment.