Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(parser): preprocess file inclusions #883

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion libasciidoc.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"io"
"os"
"strings"
"time"

"github.com/bytesparadise/libasciidoc/pkg/renderer/sgml/xhtml5"
Expand Down Expand Up @@ -67,8 +68,12 @@ func Convert(r io.Reader, output io.Writer, config *configuration.Configuration)
duration := time.Since(start)
log.Debugf("rendered the output in %v", duration)
}()
p, err := parser.Preprocess(r, config)
if err != nil {
return types.Metadata{}, err
}
// log.Debugf("parsing the asciidoc source...")
doc, err := parser.ParseDocument(r, config)
doc, err := parser.ParseDocument(strings.NewReader(p), config)
if err != nil {
return types.Metadata{}, err
}
Expand Down
15 changes: 1 addition & 14 deletions pkg/parser/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ func NewParseContext(config *configuration.Configuration, opts ...Option) *Parse
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("new parser context with attributes: %s", spew.Sdump(config.Attributes))
}
opts = append(opts, Entrypoint("DocumentFragment"))
opts = append(opts, GlobalStore(frontMatterKey, true))
opts = append(opts, GlobalStore(documentHeaderKey, true))
opts = append(opts, GlobalStore(substitutionKey, newNormalSubstitution()))
Expand Down Expand Up @@ -50,20 +51,6 @@ func (c *ParseContext) Clone() *ParseContext {
}
}

func (c *ParseContext) WithinDelimitedBlock(b *types.DelimitedBlock) *ParseContext {
clone := c.Clone()
switch b.Kind {
case types.Example, types.Quote, types.Sidebar:
clone.Opts = append(c.Opts, Entrypoint("DocumentFragment"), GlobalStore(delimitedBlockScopeKey, b.Kind))
case types.Fenced, types.Listing, types.Literal, types.Verse:
clone.Opts = append(c.Opts, Entrypoint("DocumentFragmentWithinVerbatimBlock"), GlobalStore(delimitedBlockScopeKey, b.Kind))
default:
// TODO: do we need to parse the content of Passthrough and Comments?
clone.Opts = append(c.Opts, Entrypoint("DocumentFragmentWithinVerbatimBlock"), GlobalStore(delimitedBlockScopeKey, b.Kind))
}
return clone
}

type contextAttributes struct {
immutableAttributes types.Attributes
attributes types.Attributes
Expand Down
2 changes: 1 addition & 1 deletion pkg/parser/delimited_block_listing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1174,7 +1174,7 @@ and <more text> on the +
},
}
Expect(ParseDocument(s)).To(MatchDocument(expected))
Expect(logs).To(ContainJSONLogWithOffset(log.ErrorLevel, 33, 183, "unsupported kind of substitution: 'unknown'"))
Expect(logs).To(ContainJSONLogWithOffset(log.ErrorLevel, 33, 182, "unsupported kind of substitution: 'unknown'"))
})
})
})
Expand Down
13 changes: 13 additions & 0 deletions pkg/parser/document_preprocessing.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package parser

import (
"io"

"github.com/bytesparadise/libasciidoc/pkg/configuration"
)

// Preprocess reads line by line to look-up and process file inclusions
func Preprocess(source io.Reader, config *configuration.Configuration, opts ...Option) (string, error) {
ctx := NewParseContext(config, opts...) // each pipeline step will have its own clone of `ctx`
return processFileInclusions(ctx, source)
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,162 +17,121 @@ import (
log "github.com/sirupsen/logrus"
)

func IncludeFiles(ctx *ParseContext, done <-chan interface{}, fragmentStream <-chan types.DocumentFragment) <-chan types.DocumentFragment {
resultStream := make(chan types.DocumentFragment, bufferSize)
go func() {
defer close(resultStream)
for f := range fragmentStream {
select {
case resultStream <- includeFiles(ctx, f, done):
case <-done:
log.WithField("pipeline_task", "include_files").Debug("received 'done' signal")
return
}
}
log.WithField("pipeline_task", "include_files").Debug("done")
}()
return resultStream
}

func includeFiles(ctx *ParseContext, f types.DocumentFragment, done <-chan interface{}) types.DocumentFragment {
// if the fragment already contains an error, then send it as-is downstream
if f.Error != nil {
log.Debugf("skipping file inclusions because of fragment with error: %v", f.Error)
return f
}
elements, err := doIncludeFiles(ctx, f.Elements, done)
if err != nil {
return types.NewErrorFragment(f.Position, err)
func processFileInclusions(ctx *ParseContext, source io.Reader) (string, error) {
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("processing file inclusions in %s with leveloffset=%s", ctx.filename, spew.Sdump(ctx.levelOffsets))
}
return types.NewDocumentFragment(f.Position, elements...)
}

func doIncludeFiles(ctx *ParseContext, elements []interface{}, done <-chan interface{}) ([]interface{}, error) {
result := make([]interface{}, 0, len(elements))
for _, element := range elements {
switch e := element.(type) {
case *types.AttributeDeclaration:
ctx.attributes.set(e.Name, e.Value)
result = append(result, element)
case *types.AttributeReset:
ctx.attributes.unset(e.Name)
result = append(result, element)
case *types.FileInclusion:
// use an Entrypoint based on the Delimited block kind
elmts, err := doIncludeFile(ctx.Clone(), e, done)
if err != nil {
return nil, err
}
result = append(result, elmts...)
case *types.DelimitedBlock:
elmts, err := doIncludeFiles(ctx.WithinDelimitedBlock(e), e.Elements, done)
if err != nil {
return nil, err
content := &builder{}
scanner := bufio.NewScanner(source)
t := newBlockDelimiterTracker()
for scanner.Scan() {
line := scanner.Bytes()
element, err := Parse("", line, append(ctx.Opts, Entrypoint("DocumentRawLine"))...)
if err != nil {
// content of line was not relevant in the context of preparsing (ie, it's a regular line), so let's keep it as-is
content.Write(line)
} else {
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("checking element of type '%T'", element)
}
e.Elements = elmts
switch e.Kind {
case types.Example, types.Quote, types.Sidebar:
if e.Elements, err = parseDelimitedBlockElements(ctx, e); err != nil {
// log the error, but keep the delimited block empty so we can carry on with the whole processing
log.WithError(err).Error("unable to parse content of delimited block")
e.Elements = nil
break
switch e := element.(type) {
case *types.AttributeDeclaration:
ctx.attributes.set(e.Name, e.Value)
t, _ := e.RawText()
content.WriteString(t)
case *types.AttributeReset:
ctx.attributes.unset(e.Name)
t, _ := e.RawText()
content.WriteString(t)
case *types.RawSection:
content.WriteString(ctx.levelOffsets.apply(e))
case *types.FileInclusion:
f, err := includeFile(ctx.Clone(), e)
if err != nil {
return "", err
}
content.WriteString(f)
case *types.BlockDelimiter:
t.push(types.BlockDelimiterKind(e.Kind))
ctx.Opts = append(ctx.Opts, GlobalStore(delimitedBlockScopeKey, t.withinDelimitedBlock()))
t, _ := e.RawText()
content.WriteString(t)
default:
return "", fmt.Errorf("unexpected type of element while preprocessinh document: '%T'", e)
}
result = append(result, e)
default:
result = append(result, element)
}
}
return result, nil
return content.String(), nil
}

// replace the content of this FileInclusion element the content of the target file
// note: there is a trade-off here: we include the whole content of the file in the current
// fragment, making it potentially big, but at the same time we ensure that the context
// of the inclusion (for example, within a delimited block) is not lost.
func doIncludeFile(ctx *ParseContext, e *types.FileInclusion, done <-chan interface{}) ([]interface{}, error) {
// ctx.Opts = append(ctx.Opts, GlobalStore(documentHeaderKey, true))
fileContent, err := contentOf(ctx, e)
if err != nil {
return nil, err
}
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("including content of '%s' with offsets %v", e.Location.Stringify(), ctx.levelOffsets)
func includeFile(ctx *ParseContext, incl *types.FileInclusion) (string, error) {
if err := applySubstitutionsOnBlockWithLocation(ctx, incl); err != nil {
return "", errors.Errorf("Unresolved directive in %s - %s", ctx.filename, incl.RawText)
}
elements := []interface{}{}
for f := range ParseFragments(ctx, fileContent, done) {
if err := f.Error; err != nil {
return nil, err
}
// apply level offset on sections
for i, e := range f.Elements {
switch e := e.(type) {
case *types.DocumentHeader:
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("applying offsets to header section: %v", ctx.levelOffsets)
}
// header becomes a "regular" section
s := &types.Section{
Title: e.Title,
Elements: e.Elements,
}
ctx.levelOffsets.apply(s)
if s.Level == 0 { // no level change: keep as the header
f.Elements[i] = e
} else { // level changed: becomes a section with some elements
f.Elements[i] = s
}
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("applied offsets to header/section: level is now %d", s.Level)
}
case *types.Section:
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("applying offsets to section of level %d: %v", e.Level, ctx.levelOffsets)
}
ctx.levelOffsets.apply(e)
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("applied offsets to section: level is now %d", e.Level)
}
}
}
elements = append(elements, f.Elements...)
content, err := contentOf(ctx, incl)
if err != nil {
return "", err
}
// and recursively...
return doIncludeFiles(ctx, elements, done)
ctx.Opts = append(ctx.Opts, sectionEnabled())
return processFileInclusions(ctx, bytes.NewReader(content))
}

func contentOf(ctx *ParseContext, incl *types.FileInclusion) (io.Reader, error) {
if err := applySubstitutionsOnBlockWithLocation(ctx, incl); err != nil {
log.Error(err)
return nil, errors.Errorf("Unresolved directive in %s - %s", ctx.filename, incl.RawText)
type builder struct {
strings.Builder
insertLF bool
}

func (b *builder) WriteString(s string) {
b.doInsertLF()
b.Builder.WriteString(s)
}

func (b *builder) Write(p []byte) {
b.doInsertLF()
b.Builder.Write(p)
}

func (b *builder) doInsertLF() {
if b.insertLF {
b.Builder.WriteString("\n")
}
// from now on, we will insert a `\n` before each new line (but there will be no extra `\n` at the end of the content)
b.insertLF = true
}

func contentOf(ctx *ParseContext, incl *types.FileInclusion) ([]byte, error) {
path := incl.Location.Stringify()
currentDir := filepath.Dir(ctx.filename)
f, absPath, closeFile, err := open(filepath.Join(currentDir, path))
filename := filepath.Join(currentDir, path)

f, absPath, closeFile, err := open(filename)
defer closeFile()
if err != nil {
return nil, errors.Wrapf(err, "Unresolved directive in %s - %s", ctx.filename, incl.RawText)
}
defer closeFile()
content := bytes.NewBuffer(nil)
result := bytes.NewBuffer(nil)
scanner := bufio.NewScanner(bufio.NewReader(f))
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("parsing file to %s", incl.RawText)
log.Debugf("reading %s", filename)
}
if lr, ok, err := lineRanges(incl); err != nil {
return nil, errors.Wrapf(err, "Unresolved directive in %s - %s", ctx.filename, incl.RawText)
} else if ok {
if err := readWithinLines(scanner, content, lr); err != nil {
if err := readWithinLines(scanner, result, lr); err != nil {
return nil, errors.Wrapf(err, "Unresolved directive in %s - %s", ctx.filename, incl.RawText)
}
} else if tr, ok, err := tagRanges(incl); err != nil {
return nil, errors.Wrapf(err, "Unresolved directive in %s - %s", ctx.filename, incl.RawText)
} else if ok {
if err := readWithinTags(path, scanner, content, tr); err != nil {
if err := readWithinTags(path, scanner, result, tr); err != nil {
return nil, errors.Wrapf(err, "Unresolved directive in %s - %s", ctx.filename, incl.RawText)
}
} else {
if err := readAll(scanner, content); err != nil {
if err := readAll(scanner, result); err != nil {
log.Error(err)
return nil, errors.Errorf("Unresolved directive in %s - %s", ctx.filename, incl.RawText)
}
Expand Down Expand Up @@ -208,18 +167,22 @@ func contentOf(ctx *ParseContext, incl *types.FileInclusion) (io.Reader, error)
ctx.levelOffsets = []*levelOffset{absoluteOffset(offset)}
}
}
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("content of '%s':\n%s", absPath, content.String())
}
return content, nil
// if log.IsLevelEnabled(log.DebugLevel) {
// log.Debugf("content of '%s':\n%s", absPath, result.String())
// }
return result.Bytes(), nil
}

type levelOffsets []*levelOffset

func (l levelOffsets) apply(s *types.Section) {
func (l levelOffsets) apply(s *types.RawSection) string {
for _, offset := range l {
offset.apply(s)
}
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("applied offsets to section: level is now %d", s.Level)
}
return s.Stringify()
}

func (l levelOffsets) clone() levelOffsets {
Expand All @@ -234,14 +197,14 @@ type levelOffset struct {
value int
}

func (l *levelOffset) apply(s *types.Section) {
func (l *levelOffset) apply(s *types.RawSection) {
// also, absolute offset becomes relative offset after processing the first section,
// so that the hierarchy of subsequent sections of the doc to include is preserved
if l.absolute {
l.absolute = false
l.value = l.value - s.Level
}
s.Level += l.value
s.OffsetLevel(l.value)
}

func relativeOffset(offset int) *levelOffset {
Expand Down Expand Up @@ -426,16 +389,18 @@ func open(path string) (*os.File, string, func(), error) {
absPath, err := filepath.Abs(path)
if err != nil {
return nil, "", func() {
// log.Debugf("restoring current working dir to: %s", wd)
log.Debugf("restoring current working dir to: %s", wd)
if err := os.Chdir(wd); err != nil { // restore the previous working directory
log.WithError(err).Error("failed to restore previous working directory")
}
}, err
}
dir := filepath.Dir(absPath)
// TODO: we could skip the Chdir part if we retain the absPath in the context,
// and use `filepath.Join` to compute the abspath of the file to include
if err = os.Chdir(dir); err != nil {
return nil, "", func() {
// log.Debugf("restoring current working dir to: %s", wd)
log.Debugf("restoring current working dir to: %s", wd)
if err := os.Chdir(wd); err != nil { // restore the previous working directory
log.WithError(err).Error("failed to restore previous working directory")
}
Expand All @@ -446,14 +411,14 @@ func open(path string) (*os.File, string, func(), error) {
f, err := os.Open(absPath)
if err != nil {
return nil, absPath, func() {
// log.Debugf("restoring current working dir to: %s", wd)
log.Debugf("restoring current working dir to: %s", wd)
if err := os.Chdir(wd); err != nil { // restore the previous working directory
log.WithError(err).Error("failed to restore previous working directory")
}
}, err
}
return f, absPath, func() {
// log.Debugf("restoring current working dir to: %s", wd)
log.Debugf("restoring current working dir to: %s", wd)
if err := os.Chdir(wd); err != nil { // restore the previous working directory
log.WithError(err).Error("failed to restore previous working directory")
}
Expand Down
Loading