Skip to content

Commit

Permalink
kepub/kobotest: Switch diff library, improve tree formatting and gene…
Browse files Browse the repository at this point in the history
…ration

* Use gotextdiff (i.e. golang.org/x/tools/internal/lsp/diff) instead of go-diff.
  * Better chunking for our input.
  * No external dependencies.
* Dim node type label text.
* Change output colors.
* Show all removals/insertions.
* Prefix TextNode contents.
* Hide quotes for TextNode text when there isn't leading or trailing whitespace.
* Improve TextNode indentation.
* Add blank lines around groups of TextNodes.
  • Loading branch information
pgaskin committed Jul 2, 2021
1 parent 40afb6d commit 7848002
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 49 deletions.
3 changes: 1 addition & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,4 @@ require (
golang.org/x/sync v0.0.0-20201008141435-b3e1573b7520
)

// tests/utils
require github.com/sergi/go-diff v1.1.0
require github.com/hexops/gotextdiff v1.0.3
20 changes: 2 additions & 18 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,10 @@ github.com/bamiaux/rez v0.0.0-20170731184118-29f4463c688b h1:5Ci5wpOL75rYF6RQGRo
github.com/bamiaux/rez v0.0.0-20170731184118-29f4463c688b/go.mod h1:obBQGGIFbbv9KWg92Qu9UHeD94JXmHD1jovY/z6I3O8=
github.com/beevik/etree v1.1.0 h1:T0xke/WvNtMoCqgzPhkX2r4rjY3GDZFi+FjpRZY2Jbs=
github.com/beevik/etree v1.1.0/go.mod h1:r8Aw8JqVegEf0w2fDnATrX9VpkMcyFeM0FhwO62wh+A=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
github.com/kr/smartypants v0.1.0 h1:Sn8hn5XrY+uXrxSWUdcr621Gfpk11mOGGVs4XX06kEw=
github.com/kr/smartypants v0.1.0/go.mod h1:EcTX9ge+SWNaGwbQvHwNICsMGavh98FLUqyOWFr+j9c=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/mattn/go-sqlite3 v2.0.3+incompatible h1:gXHsfypPkaMZrKbD5209QV9jbUTJKjyR5WD3HYQSd+U=
github.com/mattn/go-sqlite3 v2.0.3+incompatible/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
github.com/pgaskin/kepubify/_/go116-zip.go117 v0.0.0-20210611152744-2d89b3182523 h1:pYGj3rKTy+TDs5Z707kT+ztjoIDCy76lc2UPkZocAFM=
Expand All @@ -18,21 +14,9 @@ github.com/pgaskin/kepubify/_/html v0.0.0-20210611145339-337924fbbaf0 h1:qYHfG66
github.com/pgaskin/kepubify/_/html v0.0.0-20210611145339-337924fbbaf0/go.mod h1:fxzoIpMFAReNKunZ+ttVbf3hNVrJGtrSZMI4olZizbs=
github.com/pgaskin/koboutils/v2 v2.1.1 h1:Or5y+z8rXlip0Al8tiSj+Fb9NkuLhkcw1UPpzPPvKWY=
github.com/pgaskin/koboutils/v2 v2.1.1/go.mod h1:wTzkDIlsxmUyfwfspGcm0Ap+HOxSUYV0S8kMYrf+0gM=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
golang.org/x/sync v0.0.0-20201008141435-b3e1573b7520 h1:Bx6FllMpG4NWDOfhMBz1VR2QYNp/SAOHPIAsaVmxfPo=
golang.org/x/sync v0.0.0-20201008141435-b3e1573b7520/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
112 changes: 83 additions & 29 deletions kepub/kobotest/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@
package main

import (
"bytes"
"fmt"
"io"
"os"
"strconv"
"strings"
"unicode"
"unicode/utf8"

"github.com/sergi/go-diff/diffmatchpatch"
"github.com/hexops/gotextdiff/myers"
"github.com/hexops/gotextdiff/span"

"github.com/pgaskin/kepubify/_/html/golang.org/x/net/html"
"github.com/pgaskin/kepubify/_/html/golang.org/x/net/html/atom"
Expand All @@ -31,10 +34,7 @@ func main() {
panic(err)
}

koboTree := bytes.NewBuffer(nil)
if err := mkTree(koboTree, doc); err != nil {
panic(err)
}
koboTree := mkTree(doc)

fmt.Print("\n\n=== ORIGINAL ===\n\n")
if err := html.Render(os.Stdout, doc); err != nil {
Expand All @@ -55,19 +55,40 @@ func main() {
panic(err)
}

kepubifyTree := bytes.NewBuffer(nil)
if err := mkTree(kepubifyTree, doc); err != nil {
panic(err)
}
kepubifyTree := mkTree(doc)

fmt.Print("\n\n=== RESULT (blue=kepubify, yellow=kobo) ===\n\n")

koboTreeStr := koboTree.String()
kepubifyTreeStr := kepubifyTree.String()
if a, b := kepubifyTree, koboTree; a != b {
txt, prev := span.NewContentConverter("", []byte(a)), 0
for _, edit := range myers.ComputeEdits(span.URI(""), a, b) {
span, _ := edit.Span.WithOffset(txt)
start, end := span.Start().Offset(), span.End().Offset()
if start > prev {
io.WriteString(os.Stdout, a[prev:start])
prev = start
}
if end > start {
io.WriteString(os.Stdout, "\x1b[34m") // blue
io.WriteString(os.Stdout, a[start:end])
io.WriteString(os.Stdout, "\x1b[0m")
}
if edit.NewText != "" {
io.WriteString(os.Stdout, "\x1b[33m") // yellow
io.WriteString(os.Stdout, edit.NewText)
io.WriteString(os.Stdout, "\x1b[0m")
}
prev = end
}
if prev < len(a) {
io.WriteString(os.Stdout, a[prev:])
}

fmt.Print("\n\n=== RESULT (red=incorrect green=correct) ===\n\n")
lines := strings.SplitAfter(a, "\n")
if lines[len(lines)-1] == "" {
lines = lines[:len(lines)-1]
}

if kepubifyTreeStr != koboTreeStr {
dmp := diffmatchpatch.New()
fmt.Println(dmp.DiffPrettyText(dmp.DiffMain(kepubifyTreeStr, koboTreeStr, false)))
os.Exit(1)
return
}
Expand Down Expand Up @@ -133,7 +154,9 @@ func removeSpans(node *html.Node) {
}
}

func mkTree(w io.Writer, node *html.Node) error {
func mkTree(node *html.Node) string {
var b strings.Builder

var stack []*html.Node
var cur *html.Node

Expand All @@ -150,37 +173,68 @@ func mkTree(w io.Writer, node *html.Node) error {

switch cur.Type {
case html.TextNode:
if _, err := fmt.Fprintf(w, "%s- TextNode: %#v\n", indent, cur.Data); err != nil {
return err
if cur.PrevSibling != nil && cur.PrevSibling.Type == html.ElementNode {
b.WriteByte('\n')
}
b.WriteString(indent)
b.WriteString("\x1b[2mTextNode: » \x1b[22m")
q := strconv.Quote(cur.Data)
if q[0] == '"' && q[len(q)-1] == '"' {
if t := q[1 : len(q)-1]; t != "" {
var unquoted bool
if r, _ := utf8.DecodeLastRuneInString(t); !unicode.IsSpace(r) {
if r, _ := utf8.DecodeLastRuneInString(t); !unicode.IsSpace(r) {
b.WriteString(t)
unquoted = true
}
}
if !unquoted {
b.WriteString("\x1b[2m\"\x1b[22m")
b.WriteString(t)
b.WriteString("\x1b[2m\"\x1b[22m")
}
}
}
b.WriteByte('\n')
if cur.NextSibling != nil && cur.NextSibling.Type == html.ElementNode {
b.WriteByte('\n')
}
continue
case html.ElementNode:
desc := cur.Data
b.WriteString(indent)
b.WriteString("\x1b[2mElementNode: \x1b[22m")
b.WriteString(cur.Data)
for _, attr := range cur.Attr {
if attr.Key == "class" {
desc += "." + strings.Join(strings.Fields(attr.Val), ".")
b.WriteByte('.')
b.WriteString(strings.Join(strings.Fields(attr.Val), "."))
break
}
}
for _, attr := range cur.Attr {
if attr.Key == "id" {
desc += "#" + strings.TrimSpace(attr.Val)
b.WriteByte('#')
b.WriteString(strings.TrimSpace(attr.Val))
break
}
}
for _, attr := range cur.Attr {
if attr.Key != "class" && attr.Key != "id" && !strings.HasPrefix(attr.Key, "xmlns") {
desc += fmt.Sprintf("[%s=%#v]", attr.Key, attr.Val)
b.WriteByte('[')
b.WriteString(attr.Key)
b.WriteByte('=')
b.WriteString(attr.Val)
b.WriteByte(']')
break
}
}
if _, err := fmt.Fprintf(w, "%s- ElementNode: %s\n", indent, desc); err != nil {
return err
b.WriteByte('\n')
if cur.PrevSibling != nil && (cur.PrevSibling.LastChild != nil && cur.PrevSibling.LastChild.Type == html.TextNode) {
b.WriteByte('\n')
}
case html.DocumentNode:
if _, err := fmt.Fprintf(w, "%sDocumentNode:\n", indent); err != nil {
return err
}
b.WriteString(indent)
b.WriteString("\x1b[2mDocumentNode:\x1b[22m\n")
}

for c := cur.LastChild; c != nil; c = c.PrevSibling {
Expand All @@ -189,5 +243,5 @@ func mkTree(w io.Writer, node *html.Node) error {
}
}

return nil
return b.String()
}

0 comments on commit 7848002

Please sign in to comment.