Skip to content

Commit

Permalink
fix parsing of document
Browse files Browse the repository at this point in the history
  • Loading branch information
goccy committed Nov 8, 2024
1 parent e3a88b0 commit 26c4215
Show file tree
Hide file tree
Showing 3 changed files with 234 additions and 17 deletions.
210 changes: 210 additions & 0 deletions lexer/lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1877,6 +1877,216 @@ a: !!binary |
},
{
YAML: `
a:
b
c
`,
Tokens: token.Tokens{
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "a",
Origin: "\na",
},
{
Type: token.MappingValueType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockStructureIndicator,
Value: ":",
Origin: ":",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "b\nc",
Origin: "\n b\n\n c",
},
},
},
{
YAML: `
a:
b
c
d
e: f
`,
Tokens: token.Tokens{
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "a",
Origin: "\na",
},
{
Type: token.MappingValueType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockStructureIndicator,
Value: ":",
Origin: ":",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "b\nc d",
Origin: "\n b\n\n\n c\n d\n",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "e",
Origin: "e",
},
{
Type: token.MappingValueType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockStructureIndicator,
Value: ":",
Origin: ":",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "f",
Origin: " f",
},
},
},
{
YAML: `
a: |
b
c
d
e: f
`,
Tokens: token.Tokens{
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "a",
Origin: "\na",
},
{
Type: token.MappingValueType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockStructureIndicator,
Value: ":",
Origin: ":",
},
{
Type: token.LiteralType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockScalarIndicator,
Value: "|",
Origin: " |\n",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "b \n\n \nc\nd \n",
Origin: " b \n\n \n c\n d \n",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "e",
Origin: "e",
},
{
Type: token.MappingValueType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockStructureIndicator,
Value: ":",
Origin: ":",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "f",
Origin: " f",
},
},
},
{
YAML: `
a: >
b
c
d
e: f
`,
Tokens: token.Tokens{
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "a",
Origin: "\na",
},
{
Type: token.MappingValueType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockStructureIndicator,
Value: ":",
Origin: ":",
},
{
Type: token.FoldedType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockScalarIndicator,
Value: ">",
Origin: " >\n",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "b \n\n \nc d \n",
Origin: " b \n\n \n c\n d \n",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "e",
Origin: "e",
},
{
Type: token.MappingValueType,
CharacterType: token.CharacterTypeIndicator,
Indicator: token.BlockStructureIndicator,
Value: ":",
Origin: ":",
},
{
Type: token.StringType,
CharacterType: token.CharacterTypeMiscellaneous,
Indicator: token.NotIndicator,
Value: "f",
Origin: " f",
},
},
},
{
YAML: `
a: >
Text`,
Tokens: token.Tokens{
Expand Down
16 changes: 11 additions & 5 deletions scanner/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,7 @@ func (c *Context) addDocumentIndent(column int) {
// new-line-char is used as is instead of space.
// Therefore, it is necessary to replace the space already added to buf.
// `c.docFoldedNewLine` is a variable that is set to true for every newline.
if c.isFolded && c.docFoldedNewLine {
c.buf[len(c.buf)-1] = '\n'
if (c.isFolded || c.isRawFolded) && c.docFoldedNewLine {
c.docFoldedNewLine = false
}
// Since addBuf ignore space character, add to the buffer directly.
Expand All @@ -160,18 +159,22 @@ func (c *Context) addDocumentIndent(column int) {
}

func (c *Context) addDocumentNewLineInFolded(column int) {
if !c.isFolded {
if c.isLiteral {
return
}
if !c.docFoldedNewLine {
return
}
if c.docLineIndentColumn == c.docPrevLineIndentColumn {
if c.buf[len(c.buf)-1] == '\n' {
c.buf[len(c.buf)-1] = ' '
}
}
if c.docFirstLineIndentColumn == c.docLineIndentColumn &&
c.docLineIndentColumn == c.docPrevLineIndentColumn {
// use space as a new line delimiter.
return
}
c.buf[len(c.buf)-1] = '\n'
c.docFoldedNewLine = false
}

Expand Down Expand Up @@ -276,7 +279,7 @@ func (c *Context) bufferedSrc() []rune {
if c.isDocument() {
// remove end '\n' character and trailing empty lines.
// https://yaml.org/spec/1.2.2/#8112-block-chomping-indicator
if c.hasTrimAllEndNewlineOpt() {
if c.hasTrimAllEndNewlineOpt() || c.isRawFolded {
// If the '-' flag is specified, all trailing newline characters will be removed.
src = []rune(strings.TrimRight(string(src), "\n"))
} else {
Expand All @@ -298,6 +301,9 @@ func (c *Context) bufferedSrc() []rune {

// If the text ends with a space character, remove all of them.
src = []rune(strings.TrimRight(string(src), " "))
if string(src) == "\n" {
src = []rune{}
}
}
return src
}
Expand Down
25 changes: 13 additions & 12 deletions scanner/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ func (s *Scanner) indentStateFromIndentNumDifference() IndentState {
}

func (s *Scanner) updateIndent(ctx *Context, c rune) {
if s.isFirstCharAtLine && s.isNewLineChar(c) && ctx.isDocument() {
if s.isFirstCharAtLine && s.isNewLineChar(c) {
return
}
if s.isFirstCharAtLine && c == ' ' {
Expand Down Expand Up @@ -557,21 +557,13 @@ func (s *Scanner) scanDocument(ctx *Context, c rune) error {
s.progressColumn(ctx, 1)
return ErrInvalidToken(err.Error(), invalidTk)
}
if ctx.isLiteral {
ctx.addBuf(c)
} else if ctx.isFolded {
ctx.addBuf(c)
}
ctx.addBuf(c)
value := ctx.bufferedSrc()
ctx.addToken(token.String(string(value), string(ctx.obuf), s.pos()))
ctx.resetBuffer()
s.progressColumn(ctx, 1)
} else if s.isNewLineChar(c) {
if ctx.isLiteral {
ctx.addBuf(c)
} else {
ctx.addBuf(' ')
}
ctx.addBuf(c)
ctx.updateDocumentNewLineState()
s.progressLine(ctx)
} else if s.isFirstCharAtLine && c == ' ' {
Expand Down Expand Up @@ -626,7 +618,15 @@ func (s *Scanner) scanNewLine(ctx *Context, c rune) {
} else if s.isAnchor {
s.addBufferedTokenIfExists(ctx)
}
ctx.addBuf(' ')
if ctx.existsBuffer() && s.isFirstCharAtLine {
if ctx.buf[len(ctx.buf)-1] == ' ' {
ctx.buf[len(ctx.buf)-1] = '\n'
} else {
ctx.buf = append(ctx.buf, '\n')
}
} else {
ctx.addBuf(' ')
}
ctx.addOriginBuf(c)
s.progressLine(ctx)
}
Expand Down Expand Up @@ -789,6 +789,7 @@ func (s *Scanner) scanRawFoldedChar(ctx *Context) bool {
return false
}

ctx.updateDocumentLineIndentColumn(s.column)
ctx.isRawFolded = true
ctx.addBuf('-')
ctx.addOriginBuf('-')
Expand Down

0 comments on commit 26c4215

Please sign in to comment.