Skip to content

Commit

Permalink
refactor(parser): simplify grammar (bytesparadise#560)
Browse files Browse the repository at this point in the history
Refactor multiple rules to simplify the grammar and
make the parser more efficient:

- use character groups instead of choices whenever applicable
- refactor `ContinuedListItemElement` rule: ignore BlankLine,
  but take into account during preprocessing (when building lists)
- move `LabeledListItem` alone before `SimpleParagraph` to simplify the latter
- remove multiple occurrences of `!EOL !LineBreak` and `!EOF !BlankLine rules
- simplify single quote/double quote bokld/monospace/italic text rules
- merge TextOnlyLinkAttributes and TextAndMoreLinkAttributes rules
- simplify FoonoteContent rule
- simplify InlineElement rule
- remove Anychars and Parenthesis rules, simplify AnyChar

also:
- define a makefile goal for the bench, using a separate .go file with a custom build tag so we can have the parse options.

Fixes bytesparadise#557

Signed-off-by: Xavier Coulon <[email protected]>
  • Loading branch information
xcoulon authored May 14, 2020
1 parent e7b429a commit 4662a8b
Show file tree
Hide file tree
Showing 39 changed files with 9,581 additions and 9,720 deletions.
12 changes: 9 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,23 @@ go 1.11
require (
github.com/alecthomas/chroma v0.7.1
github.com/davecgh/go-spew v1.1.1
github.com/golangci/golangci-lint v1.25.1 // indirect
github.com/inconshreveable/mousetrap v1.0.0 // indirect
github.com/mna/pigeon v1.0.1-0.20190909211542-7ee56e19b15c
github.com/golang/protobuf v1.3.2 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/mattn/go-isatty v0.0.8 // indirect
github.com/mna/pigeon v1.0.1-0.20200224192238-18953b277063
github.com/modocache/gover v0.0.0-20171022184752-b58185e213c5 // indirect
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect
github.com/onsi/ginkgo v1.12.0
github.com/onsi/gomega v1.9.0
github.com/pkg/errors v0.8.1
github.com/sergi/go-diff v1.0.0
github.com/sirupsen/logrus v1.4.2
github.com/sozorogami/gover v0.0.0-20171022184752-b58185e213c5
github.com/spf13/cobra v0.0.5
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/testify v1.5.1 // indirect
golang.org/x/text v0.3.2 // indirect
golang.org/x/tools v0.0.0-20200502202811-ed308ab3e770 // indirect
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect
gopkg.in/yaml.v2 v2.2.8
)
305 changes: 9 additions & 296 deletions go.sum

Large diffs are not rendered by default.

8 changes: 3 additions & 5 deletions make/test.mk
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ test-fixtures: generate-optimized

.PHONY: bench-parser
## run the benchmarks on the parser
bench-parser: generate-optimized
$(eval GIT_BRANCH:=$(shell git rev-parse --abbrev-ref HEAD))
go test -run="XXX" -bench=. -benchmem -count=10 \
github.com/bytesparadise/libasciidoc/pkg/parser | \
tee ./tmp/bench-$(GIT_BRANCH).txt
bench-parser: generate
@ginkgo -tags bench -focus "real-world doc-based benchmarks" pkg/parser
@ginkgo -tags bench -focus "basic stats" pkg/parser
135 changes: 61 additions & 74 deletions pkg/parser/bench_test.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,34 @@
// +build bench

package parser_test

import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"testing"

"github.com/bytesparadise/libasciidoc/pkg/parser"

. "github.com/onsi/ginkgo" //nolint golint
. "github.com/onsi/ginkgo/extensions/table" //nolint golint
. "github.com/onsi/gomega" //nolint golint
)

const (
doc1line = `=== foo1
bar1`
doc1line = `= Lorem Ipsum
Lorem ipsum dolor sit amet, consetetur sadipscing elitr,
sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat,
sed diam voluptua.
At vero eos et accusam et justo duo dolores et ea rebum.
Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
Lorem ipsum dolor sit amet, consetetur sadipscing elitr,
sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat,
sed diam voluptua.
At vero eos et accusam et justo duo dolores et ea rebum.
Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.`

doc10lines = `=== foo
bar
Expand Down Expand Up @@ -42,82 +60,51 @@ bar
bar`
)

func BenchmarkParser(b *testing.B) {
usecases := []struct {
name string
content []byte
}{
{
name: "1 line",
content: []byte(doc1line),
},
{
name: "10 lines",
content: []byte(doc10lines),
},
{
name: "vert.x doc",
content: load(b, "../../test/bench/vertx-examples.adoc"),
},
}
for _, usecase := range usecases {
name := usecase.name
content := usecase.content
b.Run(name, func(b *testing.B) {
for n := 0; n < b.N; n++ {
_, err := parser.Parse(name, content)
if err != nil {
b.Error(err)
}
}
var _ = DescribeTable("basic stats",
func(title, content string) {
stats := parser.Stats{}
_, err := parser.Parse(title, []byte(content), parser.Statistics(&stats, "no match")) // , parser.Debug(true))
Expect(err).NotTo(HaveOccurred())
fmt.Printf("%s\n", title)
fmt.Printf("ExprCnt: %d\n", stats.ExprCnt)
result, _ := json.MarshalIndent(stats.ChoiceAltCnt, " ", " ")
fmt.Printf("ChoiceAltCnt: \n%s\n", result)
},
Entry("parse a single line file", "1-line doc", doc1line),
Entry("parse a 10-line file", "10-lines doc", doc10lines),
)

var _ = Describe("real-world doc-based benchmarks", func() {

Measure("parse the vert.x examples doc", func(b Benchmarker) {
filename := "../../test/bench/vertx-examples.adoc"
content, err := load(filename)
Expect(err).NotTo(HaveOccurred())
b.Time("runtime", func() {
_, err := parser.Parse(filename, content)
Expect(err).NotTo(HaveOccurred())
})
}
}
}, 10)

Measure("parse the quarkus kafka streams doc", func(b Benchmarker) {
filename := "../../test/bench/kafka-streams.adoc"
content, err := load(filename)
Expect(err).NotTo(HaveOccurred())
b.Time("runtime", func() {
_, err := parser.Parse(filename, content)
Expect(err).NotTo(HaveOccurred())
})
}, 10)

// func TestParserWithStats(t *testing.T) {
// usecases := []struct {
// name string
// content []byte
// }{
// {
// name: "1 line",
// content: []byte(doc1line),
// },
// {
// name: "10 lines",
// content: []byte(doc10lines),
// },
// }
// for _, usecase := range usecases {
// name := usecase.name
// content := usecase.content
// t.Run(name, func(t *testing.T) {
// stats := parser.Stats{}
// _, err := parser.Parse(name, content, parser.Statistics(&stats, "no match"), parser.Debug(true), parser.AllowInvalidUTF8(true))
// if err != nil {
// t.Error(err)
// }
// t.Logf("ExprCnt: %d", stats.ExprCnt)
// result, _ := json.MarshalIndent(stats.ChoiceAltCnt, " ", " ")
// t.Logf("ChoiceAltCnt: \n%s", result)
// })
// }
// }

func load(b *testing.B, filename string) []byte {
})

func load(filename string) ([]byte, error) {
f, err := os.Open(filename)
if err != nil {
b.Error(err)
return nil, err
}
defer func() {
err2 := f.Close()
if err2 != nil {
b.Error(err2)
}
f.Close()
}()
content, err := ioutil.ReadAll(f)
if err != nil {
b.Error(err)
}
return content
return ioutil.ReadAll(f)
}
4 changes: 4 additions & 0 deletions pkg/parser/comment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ with multiple lines

It("comment block with paragraphs around", func() {
source := `a first paragraph
////
a *comment* block
with multiple lines
Expand All @@ -161,6 +162,7 @@ a second paragraph`
},
},
},
types.BlankLine{}, // blankline is required between a paragraph and the next block
types.DelimitedBlock{
Attributes: types.ElementAttributes{},
Kind: types.Comment,
Expand Down Expand Up @@ -314,6 +316,7 @@ with multiple lines

It("comment block with paragraphs around", func() {
source := `a first paragraph
////
a *comment* block
with multiple lines
Expand Down Expand Up @@ -350,6 +353,7 @@ a second paragraph`
source := `== section 1
a first paragraph
////
a *comment* block
with multiple lines
Expand Down
8 changes: 4 additions & 4 deletions pkg/parser/cross_reference_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ with some content linked to <<thetitle,a label to the title>>!`
},
types.ExternalCrossReference{
Location: types.Location{
Elements: []interface{}{
Path: []interface{}{
types.StringElement{
Content: "another-doc.adoc",
},
Expand Down Expand Up @@ -155,7 +155,7 @@ with some content linked to <<thetitle,a label to the title>>!`
},
types.ExternalCrossReference{
Location: types.Location{
Elements: []interface{}{
Path: []interface{}{
types.DocumentAttributeSubstitution{
Name: "foo",
},
Expand Down Expand Up @@ -310,7 +310,7 @@ with some content linked to <<thetitle,a label to the title>>!`
},
types.ExternalCrossReference{
Location: types.Location{
Elements: []interface{}{
Path: []interface{}{
types.StringElement{
Content: "another-doc.adoc",
},
Expand Down Expand Up @@ -359,7 +359,7 @@ some content linked to xref:{foo}[another_doc()]!`
},
types.ExternalCrossReference{
Location: types.Location{
Elements: []interface{}{
Path: []interface{}{
types.StringElement{
Content: "another-doc.adoc",
},
Expand Down
16 changes: 12 additions & 4 deletions pkg/parser/delimited_block_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ var _ = Describe("delimited blocks", func() {

It("fenced block after a paragraph", func() {
content := "some fenced code"
source := "a paragraph.\n```\n" + content + "\n" + "```\n"
source := "a paragraph.\n\n```\n" + content + "\n" + "```\n"
expected := types.DraftDocument{
Blocks: []interface{}{
types.Paragraph{
Expand All @@ -122,6 +122,7 @@ var _ = Describe("delimited blocks", func() {
},
},
},
types.BlankLine{},
types.DelimitedBlock{
Attributes: types.ElementAttributes{},
Kind: types.Fenced,
Expand Down Expand Up @@ -376,8 +377,9 @@ then a normal paragraph.`
Expect(ParseDraftDocument(source)).To(MatchDraftDocument(expected))
})

It("listing block just after a paragraph", func() {
It("listing block after a paragraph", func() {
source := `a paragraph.
----
some listing code
----`
Expand All @@ -391,6 +393,7 @@ some listing code
},
},
},
types.BlankLine{}, // blankline is required between paragraph and the next block
types.DelimitedBlock{
Attributes: types.ElementAttributes{},
Kind: types.Listing,
Expand Down Expand Up @@ -1472,6 +1475,7 @@ some *verse* content
source := `.a title
****
some *verse* content
----
foo
bar
Expand Down Expand Up @@ -1506,6 +1510,7 @@ bar
},
},
},
types.BlankLine{}, // blankline is required between paragraph and the next block
types.DelimitedBlock{
Attributes: types.ElementAttributes{},
Kind: types.Listing,
Expand Down Expand Up @@ -1642,7 +1647,7 @@ bar

It("fenced block after a paragraph", func() {
content := "some fenced code"
source := "a paragraph.\n```\n" + content + "\n" + "```\n"
source := "a paragraph.\n\n```\n" + content + "\n" + "```\n"
expected := types.Document{
Attributes: types.DocumentAttributes{},
ElementReferences: types.ElementReferences{},
Expand Down Expand Up @@ -1938,8 +1943,9 @@ then a normal paragraph.`
Expect(ParseDocument(source)).To(MatchDocument(expected))
})

It("listing block just after a paragraph", func() {
It("listing block after a paragraph", func() {
source := `a paragraph.
----
some listing code
----`
Expand Down Expand Up @@ -3145,6 +3151,7 @@ some *verse* content
source := `.a title
****
some *verse* content
----
foo
bar
Expand Down Expand Up @@ -3182,6 +3189,7 @@ bar
},
},
},
types.BlankLine{}, // blankline is required between paragraph and the next block
types.DelimitedBlock{
Attributes: types.ElementAttributes{},
Kind: types.Listing,
Expand Down
5 changes: 3 additions & 2 deletions pkg/parser/document_processing_apply_substitutions_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,9 +271,10 @@ var _ = Describe("document attribute subsititutions", func() {
types.InlineLink{
Attributes: types.ElementAttributes{},
Location: types.Location{
Elements: []interface{}{
Scheme: "https://",
Path: []interface{}{
types.StringElement{
Content: "https://foo.bar",
Content: "foo.bar",
},
},
},
Expand Down
Loading

0 comments on commit 4662a8b

Please sign in to comment.