-
Notifications
You must be signed in to change notification settings - Fork 53
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add caret and spans like Trifecta (#234) #238
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,8 @@ | |
|
||
package cats.parse | ||
|
||
import cats.kernel.Eq | ||
|
||
import java.util.Arrays | ||
|
||
/** This is a class to convert linear offset in | ||
|
@@ -91,3 +93,13 @@ class LocationMap(val input: String) { | |
object LocationMap { | ||
def apply(str: String): LocationMap = new LocationMap(str) | ||
} | ||
|
||
case class Caret(offset: Int, row: Int, col: Int) | ||
object Caret { | ||
implicit val eqCatsCaret: Eq[Caret] = Eq.fromUniversalEquals | ||
} | ||
|
||
case class Span(from: Caret, to: Caret) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we add a comment if from and to are inclusive/exclusive? I assume |
||
object Span { | ||
implicit val eqCatsCaret: Eq[Span] = Eq.fromUniversalEquals | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also we could make an |
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -360,6 +360,24 @@ sealed abstract class Parser0[+A] { self: Product => | |
def withContext(str: String): Parser0[A] = | ||
Parser.withContext0(this, str) | ||
|
||
/** return the result of the parser together | ||
* with the position of the starting of the consumption | ||
*/ | ||
def withCaret: Parser0[(A, Caret)] = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what about call this I'm really nervous |
||
Parser.withCaret0(this) | ||
|
||
/** replace the result of the parser with with the | ||
* span of position of the string it consumed | ||
*/ | ||
def span: Parser0[Span] = | ||
Parser.span0(this) | ||
|
||
/** return the result of the parser together | ||
* with the span of the string it consumed | ||
*/ | ||
def withSpan: Parser0[(A, Span)] = | ||
Parser.withSpan0(this) | ||
|
||
/** Internal (mutable) parsing method. | ||
* | ||
* This method should only be called internally by parser instances. | ||
|
@@ -680,6 +698,21 @@ sealed abstract class Parser[+A] extends Parser0[A] { self: Product => | |
*/ | ||
override def withContext(str: String): Parser[A] = | ||
Parser.withContext(this, str) | ||
|
||
/** This method overrides `Parser0#withCaret` to refine the return type. | ||
*/ | ||
override def withCaret: Parser[(A, Caret)] = | ||
Parser.withCaret(this) | ||
|
||
/** This method overrides `Parser0#span`` to refine the return type. | ||
*/ | ||
override def span: Parser[Span] = | ||
Parser.span(this) | ||
|
||
/** This method overrides `Parser0#withSpan` to refine the return type. | ||
*/ | ||
override def withSpan: Parser[(A, Span)] = | ||
Parser.withSpan(this) | ||
} | ||
|
||
object Parser { | ||
|
@@ -1666,7 +1699,7 @@ object Parser { | |
case str if Impl.matchesString(str) => str.asInstanceOf[Parser0[String]] | ||
case _ => | ||
Impl.unmap0(pa) match { | ||
case Impl.Pure(_) | Impl.Index => emptyStringParser0 | ||
case Impl.Pure(_) | Impl.Index | Impl.WithCaret => emptyStringParser0 | ||
case notEmpty => Impl.StringP0(notEmpty) | ||
} | ||
} | ||
|
@@ -1731,6 +1764,46 @@ object Parser { | |
*/ | ||
def end: Parser0[Unit] = Impl.EndParser | ||
|
||
/** return the position the parser is at | ||
*/ | ||
def caret: Parser0[Caret] = Impl.WithCaret | ||
|
||
/** return the result of the parser together | ||
* with the position of the starting of the consumption | ||
*/ | ||
def withCaret0[A](pa: Parser0[A]): Parser0[(A, Caret)] = | ||
(caret ~ pa).map { case (l, x) => (x, l) } | ||
|
||
/** return the result of the parser together | ||
* with the position of the starting of the consumption | ||
*/ | ||
def withCaret[A](pa: Parser[A]): Parser[(A, Caret)] = | ||
(caret.with1 ~ pa).map { case (l, x) => (x, l) } | ||
|
||
/** replace the result of the parser with with the | ||
* span of position of the string it consumed | ||
*/ | ||
def span0[A](pa: Parser0[A]): Parser0[Span] = | ||
(caret ~ (pa *> caret)).map { case (l, r) => Span(l, r) } | ||
|
||
/** replace the result of the parser with with the | ||
* span of position of the string it consumed | ||
*/ | ||
def span[A](pa: Parser[A]): Parser[Span] = | ||
(caret.with1 ~ (pa *> caret)).map { case (l, r) => Span(l, r) } | ||
|
||
/** return the result of the parser together | ||
* with the span of the string it consumed | ||
*/ | ||
def withSpan0[A](pa: Parser0[A]): Parser0[(A, Span)] = | ||
(caret, pa, caret).mapN { case (l, x, r) => (x, Span(l, r)) } | ||
|
||
/** return the result of the parser together | ||
* with the span of the string it consumed | ||
*/ | ||
def withSpan[A](pa: Parser[A]): Parser[(A, Span)] = | ||
(caret.with1 ~ pa ~ caret).map { case ((l, x), r) => (x, Span(l, r)) } | ||
|
||
/** If we fail, rewind the offset back so that | ||
* we can try other branches. This tends | ||
* to harm debuggability and ideally should be | ||
|
@@ -1761,7 +1834,7 @@ object Parser { | |
case p1: Parser[_] => as(p1, b) | ||
case _ => | ||
Impl.unmap0(pa) match { | ||
case Impl.Pure(_) | Impl.Index => pure(b) | ||
case Impl.Pure(_) | Impl.Index | Impl.WithCaret => pure(b) | ||
case notPure => | ||
Impl.Void0(notPure).map(Impl.ConstFn(b)) | ||
} | ||
|
@@ -1878,6 +1951,7 @@ object Parser { | |
* This is protected rather than private to avoid a warning on 2.12 | ||
*/ | ||
protected[parse] final class State(val str: String) { | ||
lazy val locMap: LocationMap = LocationMap(str) | ||
var offset: Int = 0 | ||
var error: Chain[Expectation] = null | ||
var capture: Boolean = true | ||
|
@@ -1927,8 +2001,9 @@ object Parser { | |
final def doesBacktrack(p: Parser0[Any]): Boolean = | ||
p match { | ||
case Backtrack0(_) | Backtrack(_) | AnyChar | CharIn(_, _, _) | Str(_) | IgnoreCase(_) | | ||
Length(_) | StartParser | EndParser | Index | Pure(_) | Fail() | FailWith(_) | Not(_) | | ||
StringIn(_) => | ||
Length(_) | StartParser | EndParser | Index | WithCaret | Pure(_) | Fail() | FailWith( | ||
_ | ||
) | Not(_) | StringIn(_) => | ||
true | ||
case Map0(p, _) => doesBacktrack(p) | ||
case Map(p, _) => doesBacktrack(p) | ||
|
@@ -1958,7 +2033,7 @@ object Parser { | |
// and by construction, a oneOf0 never always succeeds | ||
final def alwaysSucceeds(p: Parser0[Any]): Boolean = | ||
p match { | ||
case Index | Pure(_) => true | ||
case Index | WithCaret | Pure(_) => true | ||
case Map0(p, _) => alwaysSucceeds(p) | ||
case SoftProd0(a, b) => alwaysSucceeds(a) && alwaysSucceeds(b) | ||
case Prod0(a, b) => alwaysSucceeds(a) && alwaysSucceeds(b) | ||
|
@@ -1978,7 +2053,7 @@ object Parser { | |
def unmap0(pa: Parser0[Any]): Parser0[Any] = | ||
pa match { | ||
case p1: Parser[Any] => unmap(p1) | ||
case Pure(_) | Index => Parser.unit | ||
case Pure(_) | Index | WithCaret => Parser.unit | ||
case s if alwaysSucceeds(s) => Parser.unit | ||
case Map0(p, _) => | ||
// we discard any allocations done by fn | ||
|
@@ -2209,6 +2284,15 @@ object Parser { | |
override def parseMut(state: State): Int = state.offset | ||
} | ||
|
||
case object WithCaret extends Parser0[Caret] { | ||
override def parseMut(state: State): Caret = { | ||
val (row, col) = state.locMap | ||
.toLineCol(state.offset) | ||
.getOrElse(throw new IllegalStateException("the offset is larger than the input size")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this will throw when you are at the EOF right? I think we need to fix that somehow. I guess we can say that EOF should return What do you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. I'll take a look at how I can solve that first. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. #286 fixes this issue. |
||
Caret(state.offset, row, col) | ||
} | ||
} | ||
|
||
final def backtrack[A](pa: Parser0[A], state: State): A = { | ||
val offset = state.offset | ||
val a = pa.parseMut(state) | ||
|
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -155,6 +155,26 @@ object ParserGen { | |||||||
def string(g: GenT[Parser]): GenT[Parser] = | ||||||||
GenT(Parser.string(g.fa)) | ||||||||
|
||||||||
private implicit val cogenCaret: Cogen[Caret] = | ||||||||
Cogen | ||||||||
.tuple3(Cogen.cogenInt, Cogen.cogenInt, Cogen.cogenInt) | ||||||||
.contramap { x => (x.offset, x.row, x.col) } | ||||||||
|
||||||||
private implicit val cogenSpan: Cogen[Span] = | ||||||||
Cogen.tuple2(cogenCaret, cogenCaret).contramap { x => (x.from, x.to) } | ||||||||
|
||||||||
def span0(g: GenT[Parser0]): GenT[Parser0] = | ||||||||
GenT(Parser.span0(g.fa)) | ||||||||
|
||||||||
def span(g: GenT[Parser]): GenT[Parser] = | ||||||||
GenT(Parser.span(g.fa)) | ||||||||
|
||||||||
def withSpan0(g: GenT[Parser0]): GenT[Parser0] = | ||||||||
GenT(Parser.withSpan0(g.fa))(Cogen.tuple2(g.cogen, cogenSpan)) | ||||||||
|
||||||||
def withSpan(g: GenT[Parser]): GenT[Parser] = | ||||||||
GenT(Parser.withSpan(g.fa))(Cogen.tuple2(g.cogen, cogenSpan)) | ||||||||
|
||||||||
def backtrack0(g: GenT[Parser0]): GenT[Parser0] = | ||||||||
GenT(g.fa.backtrack)(g.cogen) | ||||||||
|
||||||||
|
@@ -668,6 +688,18 @@ class ParserTest extends munit.ScalaCheckSuite { | |||||||
parseTest(Parser.stringIn(List("foo", "foobar", "foofoo", "foobat")).string, "foobal", "foo") | ||||||||
} | ||||||||
|
||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please add generators that create any new elements to the AST (in this PR that would be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still want to address this to make sure all the previous laws pass. see for instance:
as an example of how we want to transform some generators to make
and:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see that you added the generators, but I don't see that they were added to the root generator so we don't exercise all the existing laws in the presence of these operations. |
||||||||
test("Parser.careted and spanned works") { | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also, I'd like to see a law like something like that. |
||||||||
val str = "foo bar\n\nbaz\n" | ||||||||
val pa = ( | ||||||||
Parser.string("foo bar"), | ||||||||
Rfc5234.lf.rep.string, | ||||||||
Parser.peek(Parser.charWhere(_.isLetter)) | ||||||||
).tupled map { x => "foo bar" + x._2 } | ||||||||
parseTest(pa, str, "foo bar\n\n") | ||||||||
parseTest(pa.withCaret, str, ("foo bar\n\n", Caret(0, 0, 0))) | ||||||||
parseTest(pa.span, str, Span(Caret(0, 0, 0), Caret(9, 2, 0))) | ||||||||
} | ||||||||
|
||||||||
property("biasSmall works") { | ||||||||
val genPair = | ||||||||
for { | ||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what about making an
Order
instead and order by theoffset
? We might want to sort by carets.