Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simpler capts #154

Merged
merged 3 commits into from
Feb 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions src/regex/nfafindall2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ func add(ms: var RegexMatches2, m: MatchItem) {.inline.} =
if max(msm[i].bounds.b, msm[i].bounds.a) < m.bounds.a:
size = i+1
break
#for i in size .. msm.len-1:
for i in size .. msm.len-1:
if msm[i].capt != -1:
capts.recyclable msm[i].capt
msm.setLen size
Expand Down Expand Up @@ -159,8 +159,6 @@ func nextState(
var eoeFound = false
var smi = 0
while smi < smA.len:
if capt != -1:
capts.keepAlive capt
let L = nfa[n].next.len
var nti = 0
while nti < L:
Expand Down Expand Up @@ -189,7 +187,11 @@ func nextState(
smB.add initPstate(nt0, captx, bounds.a .. i-1)
inc smi
swap smA, smB
capts.recycle()
if mfNoCaptures notin flags:
for pstate in items smA:
if pstate.ci != -1:
capts.keepAlive pstate.ci
capts.recycle()

func findSomeImpl*(
text: string,
Expand Down
8 changes: 5 additions & 3 deletions src/regex/nfamatch2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,6 @@ func nextState(
var matched = true
smB.clear()
for pstate in items smA:
if capt != -1:
capts.keepAlive capt
if anchored and nfa[n].kind == reEoe:
if n notin smB:
smB.add initPstate(n, capt, bounds)
Expand All @@ -165,7 +163,11 @@ func nextState(
if matched:
smB.add initPstate(nt0, captx, bounds2)
swap smA, smB
capts.recycle()
if mfNoCaptures notin flags:
for pstate in items smA:
if capt != -1:
capts.keepAlive capt
capts.recycle()

func matchImpl(
smA, smB: var Pstates,
Expand Down
83 changes: 21 additions & 62 deletions src/regex/nfatype.nim
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,10 @@ const nonCapture* = -1 .. -2
# XXX limit lookarounds to int8.high per regex
type CaptState* = uint8
const
stsInitial = 0.CaptState
stsKeepAlive = 1.CaptState
stsRecyclable = 2.CaptState
stsRecycled = 3.CaptState
stsNotRecyclable = 4.CaptState
stsFrozen = 5.CaptState .. CaptState.high
stsKeepAlive = 0.CaptState
stsRecycle = 1.CaptState
stsNotRecyclable = 2.CaptState
stsFrozen = 3.CaptState .. CaptState.high

type
# XXX int16 same as max parallel states or max regex len
Expand Down Expand Up @@ -81,72 +79,31 @@ func reset*(capts: var Capts3, groupsLen: int) =
func initCapts3*(groupsLen: int): Capts3 =
reset(result, groupsLen)

func check(curr, next: CaptState): bool =
## Check if transition from state curr to next is allowed
result = case next:
of stsInitial:
curr == stsInitial or
curr == stsRecycled or
curr == stsNotRecyclable or
curr in stsFrozen
of stsKeepAlive:
curr == stsInitial or
curr == stsRecyclable
of stsRecyclable:
curr == stsInitial or
curr == stsKeepAlive
of stsRecycled:
curr == stsRecyclable or
curr == stsRecycled
of stsNotRecyclable:
curr == stsInitial or
curr == stsKeepAlive or
curr in stsFrozen
else:
doAssert next in stsFrozen
curr == stsInitial or
curr == stsKeepAlive or
curr == stsRecyclable

proc to(a: var CaptState, b: CaptState) {.inline.} =
doAssert check(a, b), $a.int & " " & $b.int
a = b

func keepAlive*(capts: var Capts3, captIdx: CaptIdx) {.inline.} =
template state: untyped = capts.states[captIdx]
doAssert state != stsRecycled
if state == stsInitial or
state == stsRecyclable:
state.to stsKeepAlive

func freeze*(capts: var Capts3): CaptState =
## Freeze all in use capts.
## Return freezeId
doAssert capts.freezeId < stsFrozen.b
inc capts.freezeId
result = capts.freezeId
for state in mitems capts.states:
if state == stsInitial or
state == stsKeepAlive or
state == stsRecyclable:
state.to result
if state == stsRecycle:
state = result

func unfreeze*(capts: var Capts3, freezeId: CaptState) =
doAssert freezeId in stsFrozen
doAssert freezeId == capts.freezeId, "Unordered freeze/unfreeze call"
for state in mitems capts.states:
if state == freezeId:
state.to stsInitial
state = stsRecycle
dec capts.freezeId

func diverge*(capts: var Capts3, captIdx: CaptIdx): CaptIdx =
if capts.free.len > 0:
result = capts.free.pop
capts.states[result].to stsInitial
else:
result = capts.len.CaptIdx
capts.s.setLen(capts.s.len+capts.blockSize)
capts.states.add stsInitial
capts.states.add stsRecycle
doAssert result == capts.states.len-1
let idx = capts.blockIdx(result)
if captIdx != -1:
Expand All @@ -162,19 +119,20 @@ func recycle*(capts: var Capts3) =
## Set initial/keepAlive entries to recyclable
capts.free.setLen 0
for i, state in mpairs capts.states:
if state == stsRecyclable or
state == stsRecycled:
if state == stsRecycle:
capts.free.add i.int16
state.to stsRecycled
if state == stsInitial or
state == stsKeepAlive:
state.to stsRecyclable
if state == stsKeepAlive:
state = stsRecycle

func notRecyclable*(capts: var Capts3, captIdx: CaptIdx) =
capts.states[captIdx].to stsNotRecyclable
func keepAlive*(capts: var Capts3, captIdx: CaptIdx) {.inline.} =
if capts.states[captIdx] == stsRecycle:
capts.states[captIdx] = stsKeepAlive

func notRecyclable*(capts: var Capts3, captIdx: CaptIdx) {.inline.} =
capts.states[captIdx] = stsNotRecyclable

func recyclable*(capts: var Capts3, captIdx: CaptIdx) {.inline.} =
capts.states[captIdx].to stsInitial
capts.states[captIdx] = stsRecycle

func clear*(capts: var Capts3) =
capts.s.setLen 0
Expand Down Expand Up @@ -446,6 +404,7 @@ when isMainModule:
var captx1 = capts.diverge -1
capts[captx1, 0] = 1..1
capts[captx1, 1] = 2..2
capts.keepAlive captx1
capts.recycle()
var captx2 = capts.diverge -1
capts[captx2, 0] = 3..3
Expand All @@ -464,7 +423,6 @@ when isMainModule:
doAssert capts[captx1, 0] == 1..1
doAssert capts[captx1, 1] == 2..2
capts.recycle()
capts.recycle()
var captx2 = capts.diverge -1
doAssert captx1 == captx2
doAssert capts[captx1, 0] == nonCapture
Expand All @@ -476,8 +434,8 @@ when isMainModule:
capts[captx1, 1] = 2..2
doAssert capts[captx1, 0] == 1..1
doAssert capts[captx1, 1] == 2..2
capts.recycle()
capts.keepAlive captx1
capts.recycle()
var captx2 = capts.diverge -1
doAssert captx1 != captx2
doAssert capts[captx1, 0] == 1..1
Expand Down Expand Up @@ -561,3 +519,4 @@ when isMainModule:
capts.recycle()
capts.recycle()
doAssert capts.free.len == 1
echo "ok"