Skip to content

Commit

Permalink
part of Issue #940, simplify regex substring search methods to only u…
Browse files Browse the repository at this point in the history
…se Regex.search (#1030)

Co-authored-by: Pierce Hayes <[email protected]>
  • Loading branch information
stress-tess and Pierce Hayes authored Jan 19, 2022
1 parent f68e9e2 commit 9912eb2
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 33 deletions.
2 changes: 2 additions & 0 deletions arkouda/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,6 +748,8 @@ def startswith(self, substr: Union[bytes, str_scalars], regex: bool = False) ->
matcher = self._get_matcher(substr, create=False)
if matcher is not None:
return matcher.get_match(MatchType.MATCH, self).matched()
else:
return self.contains('^' + substr, regex=True)
cmd = "segmentedEfunc"
args = "{} {} {} {} {} {} {}".format("startswith",
self.objtype,
Expand Down
34 changes: 6 additions & 28 deletions src/SegmentedArray.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -716,46 +716,24 @@ module SegmentedArray {
:returns: [domain] bool where index i indicates whether the regular expression, pattern, matched string i of the SegString
*/
// DEPRECATED - All substringSearchRegex calls now handled by Match objects on Client
// TODO: Remove substringSearchRegex
proc substringSearchRegex(const pattern: string, mode: SearchMode) throws {
proc substringSearchRegex(const pattern: string) throws {
var hits: [offsets.aD] bool = false; // the answer
checkCompile(pattern);

// should we do len check here? re2.compile('') is valid regex and matches everything
ref oa = offsets.a;
ref va = values.a;
var lengths = getLengths();

select mode {
when SearchMode.contains {
forall (o, l, h) in zip(oa, lengths, hits) with (var myRegex = _unsafeCompileRegex(pattern)) {
// regexp.search searches the receiving string for matches at any offset
h = myRegex.search(interpretAsString(va, o..#l, borrow=true)).matched;
}
}
when SearchMode.startsWith {
forall (o, l, h) in zip(oa, lengths, hits) with (var myRegex = _unsafeCompileRegex(pattern)) {
// regexp.match only returns a match if the start of the string matches the pattern
h = myRegex.match(interpretAsString(va, o..#l, borrow=true)).matched;
}
}
when SearchMode.endsWith {
forall (o, l, h) in zip(oa, lengths, hits) with (var myRegex = _unsafeCompileRegex(pattern)) {
var matches = myRegex.matches(interpretAsString(va, o..#l, borrow=true));
var lastMatch = matches[matches.size-1][0]; // v1.24.x reMatch, 1.25.x regexMatch
// h = true iff start(lastMatch) + len(lastMatch) == len(string) (-1 to account for null byte)
h = lastMatch.offset + lastMatch.size == l-1;
}
}
forall (o, l, h) in zip(oa, lengths, hits) with (var myRegex = _unsafeCompileRegex(pattern)) {
// regexp.search searches the receiving string for matches at any offset
h = myRegex.search(interpretAsString(va, o..#l, borrow=true)).matched;
}
return hits;
}

proc substringSearch(const substr: string, mode: SearchMode, regex: bool = false) throws {
if regex || mode == SearchMode.match {
// match always uses substringSearchRegex
return substringSearchRegex(substr, mode);
if regex {
return substringSearchRegex(substr);
}
var hits: [offsets.aD] bool; // the answer
if (size == 0) || (substr.size == 0) {
Expand Down
5 changes: 0 additions & 5 deletions src/SegmentedMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -196,11 +196,6 @@ module SegmentedMsg {
truth.a = strings.substringSearch(val, SearchMode.endsWith, regex);
repMsg = "created "+st.attrib(rname);
}
when "match" {
var truth = st.addEntry(rname, strings.size, bool);
truth.a = strings.substringSearch(val, SearchMode.match, regex);
repMsg = "created "+st.attrib(rname);
}
otherwise {
var errorMsg = notImplementedError(pn, "subcmd: %s, (%s, %s)".format(
subcmd, objtype, valtype));
Expand Down

0 comments on commit 9912eb2

Please sign in to comment.