Skip to content

Commit

Permalink
refactor(parser): preprocess file inclusions
Browse files Browse the repository at this point in the history
start with a first pass of parsing the document to detect
file inclusions and process accordingly (inclusing applying
the level offsets on sections when needed)

Signed-off-by: Xavier Coulon <[email protected]>
  • Loading branch information
xcoulon committed Dec 24, 2021
1 parent ccbc117 commit b8c7318
Show file tree
Hide file tree
Showing 23 changed files with 50,094 additions and 41,569 deletions.
7 changes: 6 additions & 1 deletion libasciidoc.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"io"
"os"
"strings"
"time"

"github.com/bytesparadise/libasciidoc/pkg/renderer/sgml/xhtml5"
Expand Down Expand Up @@ -67,8 +68,12 @@ func Convert(r io.Reader, output io.Writer, config *configuration.Configuration)
duration := time.Since(start)
log.Debugf("rendered the output in %v", duration)
}()
p, err := parser.Preprocess(r, config)
if err != nil {
return types.Metadata{}, err
}
// log.Debugf("parsing the asciidoc source...")
doc, err := parser.ParseDocument(r, config)
doc, err := parser.ParseDocument(strings.NewReader(p), config)
if err != nil {
return types.Metadata{}, err
}
Expand Down
15 changes: 1 addition & 14 deletions pkg/parser/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ func NewParseContext(config *configuration.Configuration, opts ...Option) *Parse
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("new parser context with attributes: %s", spew.Sdump(config.Attributes))
}
opts = append(opts, Entrypoint("DocumentFragment"))
opts = append(opts, GlobalStore(frontMatterKey, true))
opts = append(opts, GlobalStore(documentHeaderKey, true))
opts = append(opts, GlobalStore(substitutionKey, newNormalSubstitution()))
Expand Down Expand Up @@ -50,20 +51,6 @@ func (c *ParseContext) Clone() *ParseContext {
}
}

func (c *ParseContext) WithinDelimitedBlock(b *types.DelimitedBlock) *ParseContext {
clone := c.Clone()
switch b.Kind {
case types.Example, types.Quote, types.Sidebar:
clone.Opts = append(c.Opts, Entrypoint("DocumentFragment"), GlobalStore(delimitedBlockScopeKey, b.Kind))
case types.Fenced, types.Listing, types.Literal, types.Verse:
clone.Opts = append(c.Opts, Entrypoint("DocumentFragmentWithinVerbatimBlock"), GlobalStore(delimitedBlockScopeKey, b.Kind))
default:
// TODO: do we need to parse the content of Passthrough and Comments?
clone.Opts = append(c.Opts, Entrypoint("DocumentFragmentWithinVerbatimBlock"), GlobalStore(delimitedBlockScopeKey, b.Kind))
}
return clone
}

type contextAttributes struct {
immutableAttributes types.Attributes
attributes types.Attributes
Expand Down
2 changes: 1 addition & 1 deletion pkg/parser/delimited_block_listing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1174,7 +1174,7 @@ and <more text> on the +
},
}
Expect(ParseDocument(s)).To(MatchDocument(expected))
Expect(logs).To(ContainJSONLogWithOffset(log.ErrorLevel, 33, 183, "unsupported kind of substitution: 'unknown'"))
Expect(logs).To(ContainJSONLogWithOffset(log.ErrorLevel, 33, 182, "unsupported kind of substitution: 'unknown'"))
})
})
})
Expand Down
13 changes: 13 additions & 0 deletions pkg/parser/document_preprocessing.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package parser

import (
"io"

"github.com/bytesparadise/libasciidoc/pkg/configuration"
)

// Preprocess reads line by line to look-up and process file inclusions
func Preprocess(source io.Reader, config *configuration.Configuration, opts ...Option) (string, error) {
ctx := NewParseContext(config, opts...) // each pipeline step will have its own clone of `ctx`
return processFileInclusions(ctx, source)
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,162 +17,121 @@ import (
log "github.com/sirupsen/logrus"
)

func IncludeFiles(ctx *ParseContext, done <-chan interface{}, fragmentStream <-chan types.DocumentFragment) <-chan types.DocumentFragment {
resultStream := make(chan types.DocumentFragment, bufferSize)
go func() {
defer close(resultStream)
for f := range fragmentStream {
select {
case resultStream <- includeFiles(ctx, f, done):
case <-done:
log.WithField("pipeline_task", "include_files").Debug("received 'done' signal")
return
}
}
log.WithField("pipeline_task", "include_files").Debug("done")
}()
return resultStream
}

func includeFiles(ctx *ParseContext, f types.DocumentFragment, done <-chan interface{}) types.DocumentFragment {
// if the fragment already contains an error, then send it as-is downstream
if f.Error != nil {
log.Debugf("skipping file inclusions because of fragment with error: %v", f.Error)
return f
}
elements, err := doIncludeFiles(ctx, f.Elements, done)
if err != nil {
return types.NewErrorFragment(f.Position, err)
func processFileInclusions(ctx *ParseContext, source io.Reader) (string, error) {
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("processing file inclusions in %s with leveloffset=%s", ctx.filename, spew.Sdump(ctx.levelOffsets))
}
return types.NewDocumentFragment(f.Position, elements...)
}

func doIncludeFiles(ctx *ParseContext, elements []interface{}, done <-chan interface{}) ([]interface{}, error) {
result := make([]interface{}, 0, len(elements))
for _, element := range elements {
switch e := element.(type) {
case *types.AttributeDeclaration:
ctx.attributes.set(e.Name, e.Value)
result = append(result, element)
case *types.AttributeReset:
ctx.attributes.unset(e.Name)
result = append(result, element)
case *types.FileInclusion:
// use an Entrypoint based on the Delimited block kind
elmts, err := doIncludeFile(ctx.Clone(), e, done)
if err != nil {
return nil, err
}
result = append(result, elmts...)
case *types.DelimitedBlock:
elmts, err := doIncludeFiles(ctx.WithinDelimitedBlock(e), e.Elements, done)
if err != nil {
return nil, err
content := &builder{}
scanner := bufio.NewScanner(source)
t := newBlockDelimiterTracker()
for scanner.Scan() {
line := scanner.Bytes()
element, err := Parse("", line, append(ctx.Opts, Entrypoint("DocumentRawLine"))...)
if err != nil {
// content of line was not relevant in the context of preparsing (ie, it's a regular line), so let's keep it as-is
content.Write(line)
} else {
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("checking element of type '%T'", element)
}
e.Elements = elmts
switch e.Kind {
case types.Example, types.Quote, types.Sidebar:
if e.Elements, err = parseDelimitedBlockElements(ctx, e); err != nil {
// log the error, but keep the delimited block empty so we can carry on with the whole processing
log.WithError(err).Error("unable to parse content of delimited block")
e.Elements = nil
break
switch e := element.(type) {
case *types.AttributeDeclaration:
ctx.attributes.set(e.Name, e.Value)
t, _ := e.RawText()
content.WriteString(t)
case *types.AttributeReset:
ctx.attributes.unset(e.Name)
t, _ := e.RawText()
content.WriteString(t)
case *types.RawSection:
content.WriteString(ctx.levelOffsets.apply(e))
case *types.FileInclusion:
f, err := includeFile(ctx.Clone(), e)
if err != nil {
return "", err
}
content.WriteString(f)
case *types.BlockDelimiter:
t.push(types.BlockDelimiterKind(e.Kind))
ctx.Opts = append(ctx.Opts, GlobalStore(delimitedBlockScopeKey, t.withinDelimitedBlock()))
t, _ := e.RawText()
content.WriteString(t)
default:
return "", fmt.Errorf("unexpected type of element while preprocessinh document: '%T'", e)
}
result = append(result, e)
default:
result = append(result, element)
}
}
return result, nil
return content.String(), nil
}

// replace the content of this FileInclusion element the content of the target file
// note: there is a trade-off here: we include the whole content of the file in the current
// fragment, making it potentially big, but at the same time we ensure that the context
// of the inclusion (for example, within a delimited block) is not lost.
func doIncludeFile(ctx *ParseContext, e *types.FileInclusion, done <-chan interface{}) ([]interface{}, error) {
// ctx.Opts = append(ctx.Opts, GlobalStore(documentHeaderKey, true))
fileContent, err := contentOf(ctx, e)
if err != nil {
return nil, err
}
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("including content of '%s' with offsets %v", e.Location.Stringify(), ctx.levelOffsets)
func includeFile(ctx *ParseContext, incl *types.FileInclusion) (string, error) {
if err := applySubstitutionsOnBlockWithLocation(ctx, incl); err != nil {
return "", errors.Errorf("Unresolved directive in %s - %s", ctx.filename, incl.RawText)
}
elements := []interface{}{}
for f := range ParseFragments(ctx, fileContent, done) {
if err := f.Error; err != nil {
return nil, err
}
// apply level offset on sections
for i, e := range f.Elements {
switch e := e.(type) {
case *types.DocumentHeader:
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("applying offsets to header section: %v", ctx.levelOffsets)
}
// header becomes a "regular" section
s := &types.Section{
Title: e.Title,
Elements: e.Elements,
}
ctx.levelOffsets.apply(s)
if s.Level == 0 { // no level change: keep as the header
f.Elements[i] = e
} else { // level changed: becomes a section with some elements
f.Elements[i] = s
}
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("applied offsets to header/section: level is now %d", s.Level)
}
case *types.Section:
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("applying offsets to section of level %d: %v", e.Level, ctx.levelOffsets)
}
ctx.levelOffsets.apply(e)
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("applied offsets to section: level is now %d", e.Level)
}
}
}
elements = append(elements, f.Elements...)
content, err := contentOf(ctx, incl)
if err != nil {
return "", err
}
// and recursively...
return doIncludeFiles(ctx, elements, done)
ctx.Opts = append(ctx.Opts, sectionEnabled())
return processFileInclusions(ctx, bytes.NewReader(content))
}

func contentOf(ctx *ParseContext, incl *types.FileInclusion) (io.Reader, error) {
if err := applySubstitutionsOnBlockWithLocation(ctx, incl); err != nil {
log.Error(err)
return nil, errors.Errorf("Unresolved directive in %s - %s", ctx.filename, incl.RawText)
type builder struct {
strings.Builder
insertLF bool
}

func (b *builder) WriteString(s string) {
b.doInsertLF()
b.Builder.WriteString(s)
}

func (b *builder) Write(p []byte) {
b.doInsertLF()
b.Builder.Write(p)
}

func (b *builder) doInsertLF() {
if b.insertLF {
b.Builder.WriteString("\n")
}
// from now on, we will insert a `\n` before each new line (but there will be no extra `\n` at the end of the content)
b.insertLF = true
}

func contentOf(ctx *ParseContext, incl *types.FileInclusion) ([]byte, error) {
path := incl.Location.Stringify()
currentDir := filepath.Dir(ctx.filename)
f, absPath, closeFile, err := open(filepath.Join(currentDir, path))
filename := filepath.Join(currentDir, path)

f, absPath, closeFile, err := open(filename)
defer closeFile()
if err != nil {
return nil, errors.Wrapf(err, "Unresolved directive in %s - %s", ctx.filename, incl.RawText)
}
defer closeFile()
content := bytes.NewBuffer(nil)
result := bytes.NewBuffer(nil)
scanner := bufio.NewScanner(bufio.NewReader(f))
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("parsing file to %s", incl.RawText)
log.Debugf("reading %s", filename)
}
if lr, ok, err := lineRanges(incl); err != nil {
return nil, errors.Wrapf(err, "Unresolved directive in %s - %s", ctx.filename, incl.RawText)
} else if ok {
if err := readWithinLines(scanner, content, lr); err != nil {
if err := readWithinLines(scanner, result, lr); err != nil {
return nil, errors.Wrapf(err, "Unresolved directive in %s - %s", ctx.filename, incl.RawText)
}
} else if tr, ok, err := tagRanges(incl); err != nil {
return nil, errors.Wrapf(err, "Unresolved directive in %s - %s", ctx.filename, incl.RawText)
} else if ok {
if err := readWithinTags(path, scanner, content, tr); err != nil {
if err := readWithinTags(path, scanner, result, tr); err != nil {
return nil, errors.Wrapf(err, "Unresolved directive in %s - %s", ctx.filename, incl.RawText)
}
} else {
if err := readAll(scanner, content); err != nil {
if err := readAll(scanner, result); err != nil {
log.Error(err)
return nil, errors.Errorf("Unresolved directive in %s - %s", ctx.filename, incl.RawText)
}
Expand Down Expand Up @@ -208,18 +167,22 @@ func contentOf(ctx *ParseContext, incl *types.FileInclusion) (io.Reader, error)
ctx.levelOffsets = []*levelOffset{absoluteOffset(offset)}
}
}
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("content of '%s':\n%s", absPath, content.String())
}
return content, nil
// if log.IsLevelEnabled(log.DebugLevel) {
// log.Debugf("content of '%s':\n%s", absPath, result.String())
// }
return result.Bytes(), nil
}

type levelOffsets []*levelOffset

func (l levelOffsets) apply(s *types.Section) {
func (l levelOffsets) apply(s *types.RawSection) string {
for _, offset := range l {
offset.apply(s)
}
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("applied offsets to section: level is now %d", s.Level)
}
return s.Stringify()
}

func (l levelOffsets) clone() levelOffsets {
Expand All @@ -234,14 +197,14 @@ type levelOffset struct {
value int
}

func (l *levelOffset) apply(s *types.Section) {
func (l *levelOffset) apply(s *types.RawSection) {
// also, absolute offset becomes relative offset after processing the first section,
// so that the hierarchy of subsequent sections of the doc to include is preserved
if l.absolute {
l.absolute = false
l.value = l.value - s.Level
}
s.Level += l.value
s.OffsetLevel(l.value)
}

func relativeOffset(offset int) *levelOffset {
Expand Down Expand Up @@ -426,16 +389,18 @@ func open(path string) (*os.File, string, func(), error) {
absPath, err := filepath.Abs(path)
if err != nil {
return nil, "", func() {
// log.Debugf("restoring current working dir to: %s", wd)
log.Debugf("restoring current working dir to: %s", wd)
if err := os.Chdir(wd); err != nil { // restore the previous working directory
log.WithError(err).Error("failed to restore previous working directory")
}
}, err
}
dir := filepath.Dir(absPath)
// TODO: we could skip the Chdir part if we retain the absPath in the context,
// and use `filepath.Join` to compute the abspath of the file to include
if err = os.Chdir(dir); err != nil {
return nil, "", func() {
// log.Debugf("restoring current working dir to: %s", wd)
log.Debugf("restoring current working dir to: %s", wd)
if err := os.Chdir(wd); err != nil { // restore the previous working directory
log.WithError(err).Error("failed to restore previous working directory")
}
Expand All @@ -446,14 +411,14 @@ func open(path string) (*os.File, string, func(), error) {
f, err := os.Open(absPath)
if err != nil {
return nil, absPath, func() {
// log.Debugf("restoring current working dir to: %s", wd)
log.Debugf("restoring current working dir to: %s", wd)
if err := os.Chdir(wd); err != nil { // restore the previous working directory
log.WithError(err).Error("failed to restore previous working directory")
}
}, err
}
return f, absPath, func() {
// log.Debugf("restoring current working dir to: %s", wd)
log.Debugf("restoring current working dir to: %s", wd)
if err := os.Chdir(wd); err != nil { // restore the previous working directory
log.WithError(err).Error("failed to restore previous working directory")
}
Expand Down
Loading

0 comments on commit b8c7318

Please sign in to comment.