-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathre.stanza
110 lines (91 loc) · 4.35 KB
/
re.stanza
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
;This file defines the 're' (short for regex) package for Stanza. It includes methods
;for regex search, split and replace. More information on the methods is located above
;the method definition
;------------------------------------DEFINITIONS--------------------------------------
;Define our package name
defpackage re :
import core
import collections
;Define the regex object that has two fields: 1) a vector containing all the
;matches and matched groups 2) a hash with named capture group names as keys and
;their corresponding captured groups as values(vectors)
defstruct regex :
m: Vector<String>
g: HashTable<String,Vector<String>>
defmethod print (o: OutputStream, r: regex) :
println(o, "Matches: %_" % [m(r)])
println(o, "Named capture groups: %_" % [g(r)])
;This array will hold our matches found and matched groups
var matches_found = Vector<String>()
;And this map will hold the named capture group names and matches found
var named_matches_found = HashTable<String,Vector<String>>()
;For regex split operations, this vector holds all the 'positions' where our regex was found
var positions_found = Vector<Int>()
;And this vector holds the sizes of those matches
var sizes = Vector<Int>()
;------------------------------FUNCTIONS CALLED FROM C--------------------------------
;This function is called from C to append to the global vector that contains
;matches found
extern defn append_matches_found (matched: ptr<byte>) -> int :
add(matches_found, String(matched))
return 0
;This function is called from C to append to the global hash that contains named
;groups and their matches found
extern defn append_named_matches_found (name: ptr<byte>, matched: ptr<byte>) -> int :
if key?(named_matches_found, String(name)) == true:
add(get(named_matches_found, String(name)), String(matched))
else:
set(named_matches_found, String(name), Vector<String>())
add(get(named_matches_found, String(name)), String(matched))
return 0
;This function is called from C to append the positions and sizes of the matches
;found
extern defn append_positions (p: int, s: int) -> int :
add(positions_found, new Int{p})
add(sizes, new Int{s})
return 0
;-------------------------------FUNCTIONS DEFINED IN C--------------------------------
;C-function to start a regex search
extern pcre_search : (ptr<byte>, ptr<byte>) -> int
;-------------------------------PRIVATE HELPERS---------------------------------------
;Technically we can just expose this function to the user and have them call this
;directly, similar to regex-search. I could not figure out a few things about
;LoStanza functions, so I was forced to do this.
lostanza defn regex-split-lostanza (pattern: ref<String>, subject: ref<String>) -> ref<False> :
call-c pcre_search(addr!(pattern.chars), addr!(subject.chars))
return false
;This function simply sets our global variables to their default values
defn set-defaults () -> False :
matches_found = Vector<String>()
named_matches_found = HashTable<String,Vector<String>>()
positions_found = Vector<Int>()
sizes = Vector<Int>()
;-----------------------------EXPOSED PUBLIC FUNCTIONS--------------------------------
;Methods to access the regex object fields, since the struct itself isn't public
public defn matches (r: regex) -> Vector<String> :
m(r)
public defn groups (r: regex) -> HashTable<String,Vector<String>> :
g(r)
;This is the regex-search function. Takes in two arguments: 1) the regex 2) the string
;to search. Returns a regex object
public lostanza defn regex-search (pattern: ref<String>, subject: ref<String>) -> ref<regex> :
call-c pcre_search(addr!(pattern.chars), addr!(subject.chars))
val final = regex(matches_found, named_matches_found)
set-defaults()
return final
;This is the regex-split function. Takes in two arguments: 1) the regex 2) the string
;to search. Return a vector of strings
public defn regex-split (pattern: String, subject: String) -> Vector<String> :
regex-split-lostanza(pattern, subject)
var final = Vector<String>()
var index:Int = 0
;This loop handles all the splits from 1 to n-1
for i in 0 to length(positions_found) do:
val p:Int = positions_found[i]
val s:Int = sizes[i]
add(final, subject[index through p - 1])
index = p + s
;And this handles the last split
add(final, subject[index through length(subject) - 1])
set-defaults()
final