Skip to content

Commit

Permalink
Merge pull request #348 from commonmark/source-position-input-index
Browse files Browse the repository at this point in the history
Also include "input index" in SourceSpan
  • Loading branch information
robinst authored Oct 21, 2024
2 parents 5c5c2a7 + a6b3daa commit 159249c
Show file tree
Hide file tree
Showing 19 changed files with 702 additions and 262 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,7 @@ private static Text createTextNode(String literal, Span span, SourceSpan sourceS
String text = literal.substring(beginIndex, endIndex);
Text textNode = new Text(text);
if (sourceSpan != null) {
int length = endIndex - beginIndex;
textNode.addSourceSpan(SourceSpan.of(sourceSpan.getLineIndex(), beginIndex, length));
textNode.addSourceSpan(sourceSpan.subSpan(beginIndex, endIndex));
}
return textNode;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,43 +71,43 @@ public void sourceSpans() {

Paragraph paragraph = (Paragraph) document.getFirstChild();
Text abc = (Text) paragraph.getFirstChild();
assertEquals(List.of(SourceSpan.of(0, 0, 3)),
assertEquals(List.of(SourceSpan.of(0, 0, 0, 3)),
abc.getSourceSpans());

assertTrue(abc.getNext() instanceof SoftLineBreak);

Link one = (Link) abc.getNext().getNext();
assertEquals("http://example.com/one", one.getDestination());
assertEquals(List.of(SourceSpan.of(1, 0, 22)),
assertEquals(List.of(SourceSpan.of(1, 0, 4, 22)),
one.getSourceSpans());

assertTrue(one.getNext() instanceof SoftLineBreak);

Text def = (Text) one.getNext().getNext();
assertEquals("def ", def.getLiteral());
assertEquals(List.of(SourceSpan.of(2, 0, 4)),
assertEquals(List.of(SourceSpan.of(2, 0, 27, 4)),
def.getSourceSpans());

Link two = (Link) def.getNext();
assertEquals("http://example.com/two", two.getDestination());
assertEquals(List.of(SourceSpan.of(2, 4, 22)),
assertEquals(List.of(SourceSpan.of(2, 4, 31, 22)),
two.getSourceSpans());

assertTrue(two.getNext() instanceof SoftLineBreak);

Text ghi = (Text) two.getNext().getNext();
assertEquals("ghi ", ghi.getLiteral());
assertEquals(List.of(SourceSpan.of(3, 0, 4)),
assertEquals(List.of(SourceSpan.of(3, 0, 54, 4)),
ghi.getSourceSpans());

Link three = (Link) ghi.getNext();
assertEquals("http://example.com/three", three.getDestination());
assertEquals(List.of(SourceSpan.of(3, 4, 24)),
assertEquals(List.of(SourceSpan.of(3, 4, 58, 24)),
three.getSourceSpans());

Text jkl = (Text) three.getNext();
assertEquals(" jkl", jkl.getLiteral());
assertEquals(List.of(SourceSpan.of(3, 28, 4)),
assertEquals(List.of(SourceSpan.of(3, 28, 82, 4)),
jkl.getSourceSpans());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,10 +287,10 @@ public void testSourcePositions() {

var doc = parser.parse("Test [^foo]\n\n[^foo]: /url\n");
var ref = find(doc, FootnoteReference.class);
assertEquals(ref.getSourceSpans(), List.of(SourceSpan.of(0, 5, 6)));
assertEquals(ref.getSourceSpans(), List.of(SourceSpan.of(0, 5, 5, 6)));

var def = find(doc, FootnoteDefinition.class);
assertEquals(def.getSourceSpans(), List.of(SourceSpan.of(2, 0, 12)));
assertEquals(def.getSourceSpans(), List.of(SourceSpan.of(2, 0, 13, 12)));
}

private static <T> T find(Node parent, Class<T> nodeClass) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ public void sourceSpans() {
Node document = parser.parse("hey ~~there~~\n");
Paragraph block = (Paragraph) document.getFirstChild();
Node strikethrough = block.getLastChild();
assertEquals(List.of(SourceSpan.of(0, 4, 9)),
assertEquals(List.of(SourceSpan.of(0, 4, 4, 9)),
strikethrough.getSourceSpans());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -791,45 +791,45 @@ public void sourceSpans() {
Node document = parser.parse("Abc|Def\n---|---\n|1|2\n 3|four|\n|||\n");

TableBlock block = (TableBlock) document.getFirstChild();
assertEquals(List.of(SourceSpan.of(0, 0, 7), SourceSpan.of(1, 0, 7),
SourceSpan.of(2, 0, 4), SourceSpan.of(3, 0, 8), SourceSpan.of(4, 0, 3)),
assertEquals(List.of(SourceSpan.of(0, 0, 0, 7), SourceSpan.of(1, 0, 8, 7),
SourceSpan.of(2, 0, 16, 4), SourceSpan.of(3, 0, 21, 8), SourceSpan.of(4, 0, 30, 3)),
block.getSourceSpans());

TableHead head = (TableHead) block.getFirstChild();
assertEquals(List.of(SourceSpan.of(0, 0, 7)), head.getSourceSpans());
assertEquals(List.of(SourceSpan.of(0, 0, 0, 7)), head.getSourceSpans());

TableRow headRow = (TableRow) head.getFirstChild();
assertEquals(List.of(SourceSpan.of(0, 0, 7)), headRow.getSourceSpans());
assertEquals(List.of(SourceSpan.of(0, 0, 0, 7)), headRow.getSourceSpans());
TableCell headRowCell1 = (TableCell) headRow.getFirstChild();
TableCell headRowCell2 = (TableCell) headRow.getLastChild();
assertEquals(List.of(SourceSpan.of(0, 0, 3)), headRowCell1.getSourceSpans());
assertEquals(List.of(SourceSpan.of(0, 0, 3)), headRowCell1.getFirstChild().getSourceSpans());
assertEquals(List.of(SourceSpan.of(0, 4, 3)), headRowCell2.getSourceSpans());
assertEquals(List.of(SourceSpan.of(0, 4, 3)), headRowCell2.getFirstChild().getSourceSpans());
assertEquals(List.of(SourceSpan.of(0, 0, 0, 3)), headRowCell1.getSourceSpans());
assertEquals(List.of(SourceSpan.of(0, 0, 0, 3)), headRowCell1.getFirstChild().getSourceSpans());
assertEquals(List.of(SourceSpan.of(0, 4, 4, 3)), headRowCell2.getSourceSpans());
assertEquals(List.of(SourceSpan.of(0, 4, 4, 3)), headRowCell2.getFirstChild().getSourceSpans());

TableBody body = (TableBody) block.getLastChild();
assertEquals(List.of(SourceSpan.of(2, 0, 4), SourceSpan.of(3, 0, 8), SourceSpan.of(4, 0, 3)), body.getSourceSpans());
assertEquals(List.of(SourceSpan.of(2, 0, 16, 4), SourceSpan.of(3, 0, 21, 8), SourceSpan.of(4, 0, 30, 3)), body.getSourceSpans());

TableRow bodyRow1 = (TableRow) body.getFirstChild();
assertEquals(List.of(SourceSpan.of(2, 0, 4)), bodyRow1.getSourceSpans());
assertEquals(List.of(SourceSpan.of(2, 0, 16, 4)), bodyRow1.getSourceSpans());
TableCell bodyRow1Cell1 = (TableCell) bodyRow1.getFirstChild();
TableCell bodyRow1Cell2 = (TableCell) bodyRow1.getLastChild();
assertEquals(List.of(SourceSpan.of(2, 1, 1)), bodyRow1Cell1.getSourceSpans());
assertEquals(List.of(SourceSpan.of(2, 1, 1)), bodyRow1Cell1.getFirstChild().getSourceSpans());
assertEquals(List.of(SourceSpan.of(2, 3, 1)), bodyRow1Cell2.getSourceSpans());
assertEquals(List.of(SourceSpan.of(2, 3, 1)), bodyRow1Cell2.getFirstChild().getSourceSpans());
assertEquals(List.of(SourceSpan.of(2, 1, 17, 1)), bodyRow1Cell1.getSourceSpans());
assertEquals(List.of(SourceSpan.of(2, 1, 17, 1)), bodyRow1Cell1.getFirstChild().getSourceSpans());
assertEquals(List.of(SourceSpan.of(2, 3, 19, 1)), bodyRow1Cell2.getSourceSpans());
assertEquals(List.of(SourceSpan.of(2, 3, 19, 1)), bodyRow1Cell2.getFirstChild().getSourceSpans());

TableRow bodyRow2 = (TableRow) body.getFirstChild().getNext();
assertEquals(List.of(SourceSpan.of(3, 0, 8)), bodyRow2.getSourceSpans());
assertEquals(List.of(SourceSpan.of(3, 0, 21, 8)), bodyRow2.getSourceSpans());
TableCell bodyRow2Cell1 = (TableCell) bodyRow2.getFirstChild();
TableCell bodyRow2Cell2 = (TableCell) bodyRow2.getLastChild();
assertEquals(List.of(SourceSpan.of(3, 1, 1)), bodyRow2Cell1.getSourceSpans());
assertEquals(List.of(SourceSpan.of(3, 1, 1)), bodyRow2Cell1.getFirstChild().getSourceSpans());
assertEquals(List.of(SourceSpan.of(3, 3, 4)), bodyRow2Cell2.getSourceSpans());
assertEquals(List.of(SourceSpan.of(3, 3, 4)), bodyRow2Cell2.getFirstChild().getSourceSpans());
assertEquals(List.of(SourceSpan.of(3, 1, 22, 1)), bodyRow2Cell1.getSourceSpans());
assertEquals(List.of(SourceSpan.of(3, 1, 22, 1)), bodyRow2Cell1.getFirstChild().getSourceSpans());
assertEquals(List.of(SourceSpan.of(3, 3, 24, 4)), bodyRow2Cell2.getSourceSpans());
assertEquals(List.of(SourceSpan.of(3, 3, 24, 4)), bodyRow2Cell2.getFirstChild().getSourceSpans());

TableRow bodyRow3 = (TableRow) body.getLastChild();
assertEquals(List.of(SourceSpan.of(4, 0, 3)), bodyRow3.getSourceSpans());
assertEquals(List.of(SourceSpan.of(4, 0, 30, 3)), bodyRow3.getSourceSpans());
TableCell bodyRow3Cell1 = (TableCell) bodyRow3.getFirstChild();
TableCell bodyRow3Cell2 = (TableCell) bodyRow3.getLastChild();
assertEquals(List.of(), bodyRow3Cell1.getSourceSpans());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ public void sourceSpans() {
Node document = parser.parse("x{height=3 width=4}\n");
Paragraph block = (Paragraph) document.getFirstChild();
Node text = block.getFirstChild();
assertEquals(List.of(SourceSpan.of(0, 0, 19)),
assertEquals(List.of(SourceSpan.of(0, 0, 0, 19)),
text.getSourceSpans());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ public void sourceSpans() {
Node document = parser.parse("hey ++there++\n");
Paragraph block = (Paragraph) document.getFirstChild();
Node ins = block.getLastChild();
assertEquals(List.of(SourceSpan.of(0, 4, 9)),
assertEquals(List.of(SourceSpan.of(0, 4, 4, 9)),
ins.getSourceSpans());
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.commonmark.internal;

import org.commonmark.internal.util.LineReader;
import org.commonmark.internal.util.Parsing;
import org.commonmark.node.*;
import org.commonmark.parser.IncludeSourceSpans;
Expand Down Expand Up @@ -127,7 +128,7 @@ public Document parse(String input) {
int lineBreak;
while ((lineBreak = Characters.findLineBreak(input, lineStart)) != -1) {
String line = input.substring(lineStart, lineBreak);
parseLine(line);
parseLine(line, lineStart);
if (lineBreak + 1 < input.length() && input.charAt(lineBreak) == '\r' && input.charAt(lineBreak + 1) == '\n') {
lineStart = lineBreak + 2;
} else {
Expand All @@ -136,23 +137,23 @@ public Document parse(String input) {
}
if (!input.isEmpty() && (lineStart == 0 || lineStart < input.length())) {
String line = input.substring(lineStart);
parseLine(line);
parseLine(line, lineStart);
}

return finalizeAndProcess();
}

public Document parse(Reader input) throws IOException {
BufferedReader bufferedReader;
if (input instanceof BufferedReader) {
bufferedReader = (BufferedReader) input;
} else {
bufferedReader = new BufferedReader(input);
}

var lineReader = new LineReader(input);
int inputIndex = 0;
String line;
while ((line = bufferedReader.readLine()) != null) {
parseLine(line);
while ((line = lineReader.readLine()) != null) {
parseLine(line, inputIndex);
inputIndex += line.length();
var eol = lineReader.getLineTerminator();
if (eol != null) {
inputIndex += eol.length();
}
}

return finalizeAndProcess();
Expand Down Expand Up @@ -197,8 +198,8 @@ public BlockParser getActiveBlockParser() {
* Analyze a line of text and update the document appropriately. We parse markdown text by calling this on each
* line of input, then finalizing the document.
*/
private void parseLine(String ln) {
setLine(ln);
private void parseLine(String ln, int inputIndex) {
setLine(ln, inputIndex);

// For each containing block, try to parse the associated line start.
// The document will always match, so we can skip the first block parser and start at 1 matches
Expand Down Expand Up @@ -322,7 +323,7 @@ private void parseLine(String ln) {
}
}

private void setLine(String ln) {
private void setLine(String ln, int inputIndex) {
lineIndex++;
index = 0;
column = 0;
Expand All @@ -331,7 +332,7 @@ private void setLine(String ln) {
String lineContent = prepareLine(ln);
SourceSpan sourceSpan = null;
if (includeSourceSpans != IncludeSourceSpans.NONE) {
sourceSpan = SourceSpan.of(lineIndex, 0, lineContent.length());
sourceSpan = SourceSpan.of(lineIndex, 0, inputIndex, lineContent.length());
}
this.line = SourceLine.of(lineContent, sourceSpan);
}
Expand Down Expand Up @@ -430,10 +431,9 @@ private void addLine() {
content = line.getContent().subSequence(index, line.getContent().length());
}
SourceSpan sourceSpan = null;
if (includeSourceSpans == IncludeSourceSpans.BLOCKS_AND_INLINES) {
// Note that if we're in a partially-consumed tab, the length here corresponds to the content but not to the
// actual source length. That sounds like a problem, but I haven't found a test case where it matters (yet).
sourceSpan = SourceSpan.of(lineIndex, index, content.length());
if (includeSourceSpans == IncludeSourceSpans.BLOCKS_AND_INLINES && index < line.getSourceSpan().getLength()) {
// Note that if we're in a partially-consumed tab the length of the source span and the content don't match.
sourceSpan = line.getSourceSpan().subSpan(index);
}
getActiveBlockParser().addLine(SourceLine.of(content, sourceSpan));
addSourceSpans();
Expand All @@ -449,7 +449,7 @@ private void addSourceSpans() {
int blockIndex = Math.min(openBlockParser.sourceIndex, index);
int length = line.getContent().length() - blockIndex;
if (length != 0) {
openBlockParser.blockParser.addSourceSpan(SourceSpan.of(lineIndex, blockIndex, length));
openBlockParser.blockParser.addSourceSpan(line.getSourceSpan().subSpan(blockIndex));
}
}
}
Expand Down
Loading

0 comments on commit 159249c

Please sign in to comment.