Skip to content

Commit

Permalink
fix lexing nested sequences/mappings
Browse files Browse the repository at this point in the history
  • Loading branch information
xabbuh committed Nov 18, 2020
1 parent 443c622 commit b9ce672
Show file tree
Hide file tree
Showing 2 changed files with 303 additions and 68 deletions.
187 changes: 119 additions & 68 deletions Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ private function doParse(string $value, int $flags)
}

try {
return Inline::parse($this->parseQuotedString($this->currentLine), $flags, $this->refs);
return Inline::parse($this->lexInlineQuotedString(), $flags, $this->refs);
} catch (ParseException $e) {
$e->setParsedLine($this->getRealCurrentLineNb() + 1);
$e->setSnippet($this->currentLine);
Expand All @@ -368,7 +368,7 @@ private function doParse(string $value, int $flags)
}

try {
$parsedMapping = Inline::parse($this->lexInlineMapping($this->currentLine), $flags, $this->refs);
$parsedMapping = Inline::parse($this->lexInlineMapping(), $flags, $this->refs);

while ($this->moveToNextLine()) {
if (!$this->isCurrentLineEmpty()) {
Expand All @@ -389,7 +389,7 @@ private function doParse(string $value, int $flags)
}

try {
$parsedSequence = Inline::parse($this->lexInlineSequence($this->currentLine), $flags, $this->refs);
$parsedSequence = Inline::parse($this->lexInlineSequence(), $flags, $this->refs);

while ($this->moveToNextLine()) {
if (!$this->isCurrentLineEmpty()) {
Expand Down Expand Up @@ -659,6 +659,11 @@ private function getNextEmbedBlock(int $indentation = null, bool $inSequence = f
return implode("\n", $data);
}

private function hasMoreLines(): bool
{
return (\count($this->lines) - 1) > $this->currentLineNb;
}

/**
* Moves the parser to the next line.
*/
Expand Down Expand Up @@ -736,9 +741,13 @@ private function parseValue(string $value, int $flags, string $context)

try {
if ('' !== $value && '{' === $value[0]) {
return Inline::parse($this->lexInlineMapping($value), $flags, $this->refs);
$cursor = \strlen($this->currentLine) - \strlen($value);

return Inline::parse($this->lexInlineMapping($cursor), $flags, $this->refs);
} elseif ('' !== $value && '[' === $value[0]) {
return Inline::parse($this->lexInlineSequence($value), $flags, $this->refs);
$cursor = \strlen($this->currentLine) - \strlen($value);

return Inline::parse($this->lexInlineSequence($cursor), $flags, $this->refs);
}

$quotation = '' !== $value && ('"' === $value[0] || "'" === $value[0]) ? $value[0] : null;
Expand Down Expand Up @@ -1137,106 +1146,148 @@ private function getLineTag(string $value, int $flags, bool $nextLineCheck = tru
throw new ParseException(sprintf('Tags support is not enabled. You must use the flag "Yaml::PARSE_CUSTOM_TAGS" to use "%s".', $matches['tag']), $this->getRealCurrentLineNb() + 1, $value, $this->filename);
}

private function parseQuotedString(string $yaml): ?string
private function lexInlineQuotedString(int &$cursor = 0): string
{
if ('' === $yaml || ('"' !== $yaml[0] && "'" !== $yaml[0])) {
throw new \InvalidArgumentException(sprintf('"%s" is not a quoted string.', $yaml));
}
$quotation = $this->currentLine[$cursor];
$value = $quotation;
++$cursor;

$lines = [$yaml];

while ($this->moveToNextLine()) {
$lines[] = $this->currentLine;
$previousLineWasNewline = true;
$previousLineWasTerminatedWithBackslash = false;

if (!$this->isCurrentLineEmpty() && $yaml[0] === $this->currentLine[-1]) {
break;
}
}

$value = '';

for ($i = 0, $linesCount = \count($lines), $previousLineWasNewline = false, $previousLineWasTerminatedWithBackslash = false; $i < $linesCount; ++$i) {
if ('' === trim($lines[$i])) {
do {
if ($this->isCurrentLineBlank()) {
$value .= "\n";
} elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) {
$value .= ' ';
}

if ('' !== trim($lines[$i]) && '\\' === substr($lines[$i], -1)) {
$value .= ltrim(substr($lines[$i], 0, -1));
} elseif ('' !== trim($lines[$i])) {
$value .= trim($lines[$i]);
for (; \strlen($this->currentLine) > $cursor; ++$cursor) {
switch ($this->currentLine[$cursor]) {
case '\\':
if (isset($this->currentLine[++$cursor])) {
$value .= '\\'.$this->currentLine[$cursor];
}

break;
case $quotation:
++$cursor;

if ("'" === $quotation && isset($this->currentLine[$cursor]) && "'" === $this->currentLine[$cursor]) {
$value .= "''";
break;
}

return $value.$quotation;
default:
$value .= $this->currentLine[$cursor];
}
}

if ('' === trim($lines[$i])) {
if ($this->isCurrentLineBlank()) {
$previousLineWasNewline = true;
$previousLineWasTerminatedWithBackslash = false;
} elseif ('\\' === substr($lines[$i], -1)) {
} elseif ('\\' === $this->currentLine[-1]) {
$previousLineWasNewline = false;
$previousLineWasTerminatedWithBackslash = true;
} else {
$previousLineWasNewline = false;
$previousLineWasTerminatedWithBackslash = false;
}
}

return $value;
if ($this->hasMoreLines()) {
$cursor = 0;
}
} while ($this->moveToNextLine());

throw new ParseException('Malformed inline YAML string');
}

private function lexInlineMapping(string $yaml): string
private function lexUnquotedString(int &$cursor): string
{
if ('' === $yaml || '{' !== $yaml[0]) {
throw new \InvalidArgumentException(sprintf('"%s" is not a sequence.', $yaml));
}

for ($i = 1; isset($yaml[$i]) && '}' !== $yaml[$i]; ++$i) {
}
$offset = $cursor;
$cursor += strcspn($this->currentLine, '[]{},: ', $cursor);

if (isset($yaml[$i]) && '}' === $yaml[$i]) {
return $yaml;
}

$lines = [$yaml];

while ($this->moveToNextLine()) {
$lines[] = $this->currentLine;
}
return substr($this->currentLine, $offset, $cursor - $offset);
}

return implode("\n", $lines);
private function lexInlineMapping(int &$cursor = 0): string
{
return $this->lexInlineStructure($cursor, '}');
}

private function lexInlineSequence(string $yaml): string
private function lexInlineSequence(int &$cursor = 0): string
{
if ('' === $yaml || '[' !== $yaml[0]) {
throw new \InvalidArgumentException(sprintf('"%s" is not a sequence.', $yaml));
}
return $this->lexInlineStructure($cursor, ']');
}

for ($i = 1; isset($yaml[$i]) && ']' !== $yaml[$i]; ++$i) {
}
private function lexInlineStructure(int &$cursor, string $closingTag): string
{
$value = $this->currentLine[$cursor];
++$cursor;

if (isset($yaml[$i]) && ']' === $yaml[$i]) {
return $yaml;
}
do {
$this->consumeWhitespaces($cursor);

while (isset($this->currentLine[$cursor])) {
switch ($this->currentLine[$cursor]) {
case '"':
case "'":
$value .= $this->lexInlineQuotedString($cursor);
break;
case ':':
case ',':
$value .= $this->currentLine[$cursor];
++$cursor;
break;
case '{':
$value .= $this->lexInlineMapping($cursor);
break;
case '[':
$value .= $this->lexInlineSequence($cursor);
break;
case $closingTag:
$value .= $this->currentLine[$cursor];
++$cursor;

return $value;
case '#':
break 2;
default:
$value .= $this->lexUnquotedString($cursor);
}

$value = $yaml;
if ($this->consumeWhitespaces($cursor)) {
$value .= ' ';
}
}

while ($this->moveToNextLine()) {
for ($i = 1; isset($this->currentLine[$i]) && ']' !== $this->currentLine[$i]; ++$i) {
if ($this->hasMoreLines()) {
$cursor = 0;
}
} while ($this->moveToNextLine());

$trimmedValue = trim($this->currentLine);
throw new ParseException('Malformed inline YAML string');
}

if ('' !== $trimmedValue && '#' === $trimmedValue[0]) {
continue;
}
private function consumeWhitespaces(int &$cursor): bool
{
$whitespacesConsumed = 0;

$value .= $trimmedValue;
do {
$whitespaceOnlyTokenLength = strspn($this->currentLine, ' ', $cursor);
$whitespacesConsumed += $whitespaceOnlyTokenLength;
$cursor += $whitespaceOnlyTokenLength;

if (isset($this->currentLine[$i]) && ']' === $this->currentLine[$i]) {
break;
if (isset($this->currentLine[$cursor])) {
return 0 < $whitespacesConsumed;
}
}

return $value;
if ($this->hasMoreLines()) {
$cursor = 0;
}
} while ($this->moveToNextLine());

return 0 < $whitespacesConsumed;
}
}
Loading

0 comments on commit b9ce672

Please sign in to comment.