Skip to content
This repository has been archived by the owner on May 7, 2021. It is now read-only.
/ CVC4-archived Public archive

Commit

Permalink
Add support for str.replace_re/str.replace_re_all (#4594)
Browse files Browse the repository at this point in the history
This commit adds support for the last remaining string operators from
the new SMT-LIB standard for the theory of strings. The commit adds the
kinds, type checking, reductions, and evaluation rewrites for
`str.replace_re` and `str.replace_re_all`.
  • Loading branch information
4tXJ7f authored Jun 10, 2020
1 parent 05c0998 commit 2da2646
Show file tree
Hide file tree
Showing 19 changed files with 693 additions and 6 deletions.
8 changes: 8 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@ This file contains a summary of important user-visible changes.
Changes since 1.7
=================

New Features:
* Strings: Full support of the new SMT-LIB standard for the theory of strings,
including:
* Support for `str.replace_re`, `str.replace_re_all`, `str.is_digit`,
`str.from_code`, `re.diff`, and `re.comp`
* Support for new operator names (e.g. `str.in_re` instead of `str.in.re`),
new escape sequences. The new syntax is enabled by default for smt2 files.

Improvements:
* API: Function definitions can now be requested to be global. If the `global`
parameter is set to true, they persist after popping the user context.
Expand Down
4 changes: 4 additions & 0 deletions src/api/cvc4cpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,8 @@ const static std::unordered_map<Kind, CVC4::Kind, KindHashFunction> s_kinds{
{STRING_INDEXOF, CVC4::Kind::STRING_STRIDOF},
{STRING_REPLACE, CVC4::Kind::STRING_STRREPL},
{STRING_REPLACE_ALL, CVC4::Kind::STRING_STRREPLALL},
{STRING_REPLACE_RE, CVC4::Kind::STRING_REPLACE_RE},
{STRING_REPLACE_RE_ALL, CVC4::Kind::STRING_REPLACE_RE_ALL},
{STRING_TOLOWER, CVC4::Kind::STRING_TOLOWER},
{STRING_TOUPPER, CVC4::Kind::STRING_TOUPPER},
{STRING_REV, CVC4::Kind::STRING_REV},
Expand Down Expand Up @@ -526,6 +528,8 @@ const static std::unordered_map<CVC4::Kind, Kind, CVC4::kind::KindHashFunction>
{CVC4::Kind::STRING_STRIDOF, STRING_INDEXOF},
{CVC4::Kind::STRING_STRREPL, STRING_REPLACE},
{CVC4::Kind::STRING_STRREPLALL, STRING_REPLACE_ALL},
{CVC4::Kind::STRING_REPLACE_RE, STRING_REPLACE_RE},
{CVC4::Kind::STRING_REPLACE_RE_ALL, STRING_REPLACE_RE_ALL},
{CVC4::Kind::STRING_TOLOWER, STRING_TOLOWER},
{CVC4::Kind::STRING_TOUPPER, STRING_TOUPPER},
{CVC4::Kind::STRING_REV, STRING_REV},
Expand Down
27 changes: 27 additions & 0 deletions src/api/cvc4cppkind.h
Original file line number Diff line number Diff line change
Expand Up @@ -2002,6 +2002,33 @@ enum CVC4_PUBLIC Kind : int32_t
* mkTerm(Kind kind, const std::vector<Term>& children)
*/
STRING_REPLACE_ALL,
/**
* String replace regular expression match.
* Replaces the first match of a regular expression r in string s1 with
* string s2. If r does not match a substring of s1, s1 is returned
* unmodified.
* Parameters: 3
* -[1]: Term of sort String (string s1)
* -[2]: Term of sort Regexp (regexp r)
* -[3]: Term of sort String (string s2)
* Create with:
* mkTerm(Kind kind, Term child1, Term child2, Term child3)
* mkTerm(Kind kind, const std::vector<Term>& children)
*/
STRING_REPLACE_RE,
/**
* String replace all regular expression matches.
* Replaces all matches of a regular expression r in string s1 with string
* s2. If r does not match a substring of s1, s1 is returned unmodified.
* Parameters: 3
* -[1]: Term of sort String (string s1)
* -[2]: Term of sort Regexp (regexp r)
* -[3]: Term of sort String (string s2)
* Create with:
* mkTerm(Kind kind, Term child1, Term child2, Term child3)
* mkTerm(Kind kind, const std::vector<Term>& children)
*/
STRING_REPLACE_RE_ALL,
/**
* String to lower case.
* Parameters: 1
Expand Down
2 changes: 2 additions & 0 deletions src/parser/smt2/smt2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ void Smt2::addStringOperators() {
addOperator(api::STRING_CHARAT, "str.at");
addOperator(api::STRING_INDEXOF, "str.indexof");
addOperator(api::STRING_REPLACE, "str.replace");
addOperator(api::STRING_REPLACE_RE, "str.replace_re");
addOperator(api::STRING_REPLACE_RE_ALL, "str.replace_re_all");
if (!strictModeEnabled())
{
addOperator(api::STRING_TOLOWER, "str.tolower");
Expand Down
4 changes: 4 additions & 0 deletions src/printer/smt2/smt2_printer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,8 @@ void Smt2Printer::toStream(std::ostream& out,
case kind::STRING_STRIDOF:
case kind::STRING_STRREPL:
case kind::STRING_STRREPLALL:
case kind::STRING_REPLACE_RE:
case kind::STRING_REPLACE_RE_ALL:
case kind::STRING_TOLOWER:
case kind::STRING_TOUPPER:
case kind::STRING_REV:
Expand Down Expand Up @@ -1191,6 +1193,8 @@ static string smtKindString(Kind k, Variant v)
case kind::STRING_STRIDOF: return "str.indexof" ;
case kind::STRING_STRREPL: return "str.replace" ;
case kind::STRING_STRREPLALL: return "str.replace_all";
case kind::STRING_REPLACE_RE: return "str.replace_re";
case kind::STRING_REPLACE_RE_ALL: return "str.replace_re_all";
case kind::STRING_TOLOWER: return "str.tolower";
case kind::STRING_TOUPPER: return "str.toupper";
case kind::STRING_REV: return "str.rev";
Expand Down
7 changes: 5 additions & 2 deletions src/theory/strings/extf_solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ ExtfSolver::ExtfSolver(context::Context* c,
d_extt.addFunctionKind(kind::STRING_STOI);
d_extt.addFunctionKind(kind::STRING_STRREPL);
d_extt.addFunctionKind(kind::STRING_STRREPLALL);
d_extt.addFunctionKind(kind::STRING_REPLACE_RE);
d_extt.addFunctionKind(kind::STRING_REPLACE_RE_ALL);
d_extt.addFunctionKind(kind::STRING_STRCTN);
d_extt.addFunctionKind(kind::STRING_IN_REGEXP);
d_extt.addFunctionKind(kind::STRING_LEQ);
Expand Down Expand Up @@ -180,8 +182,9 @@ bool ExtfSolver::doReduction(int effort, Node n)
NodeManager* nm = NodeManager::currentNM();
Assert(k == STRING_SUBSTR || k == STRING_STRCTN || k == STRING_STRIDOF
|| k == STRING_ITOS || k == STRING_STOI || k == STRING_STRREPL
|| k == STRING_STRREPLALL || k == STRING_LEQ || k == STRING_TOLOWER
|| k == STRING_TOUPPER || k == STRING_REV);
|| k == STRING_STRREPLALL || k == STRING_REPLACE_RE
|| k == STRING_REPLACE_RE_ALL || k == STRING_LEQ
|| k == STRING_TOLOWER || k == STRING_TOUPPER || k == STRING_REV);
std::vector<Node> new_nodes;
Node res = d_preproc.simplify(n, new_nodes);
Assert(res != n);
Expand Down
4 changes: 4 additions & 0 deletions src/theory/strings/kinds
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ operator STRING_LEQ 2 "string less than or equal"
operator STRING_STRIDOF 3 "string indexof"
operator STRING_STRREPL 3 "string replace"
operator STRING_STRREPLALL 3 "string replace all"
operator STRING_REPLACE_RE 3 "string replace regular expression match"
operator STRING_REPLACE_RE_ALL 3 "string replace all regular expression matches"
typerule STRING_REPLACE_RE "SimpleTypeRule<RString, AString, ARegExp, AString>"
typerule STRING_REPLACE_RE_ALL "SimpleTypeRule<RString, AString, ARegExp, AString>"
operator STRING_PREFIX 2 "string prefixof"
operator STRING_SUFFIX 2 "string suffixof"
operator STRING_IS_DIGIT 1 "string isdigit, returns true if argument is a string of length one that represents a digit"
Expand Down
2 changes: 2 additions & 0 deletions src/theory/strings/rewrites.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ const char* toString(Rewrite r)
case Rewrite::RPL_RPL_EMPTY: return "RPL_RPL_EMPTY";
case Rewrite::RPL_RPL_LEN_ID: return "RPL_RPL_LEN_ID";
case Rewrite::RPL_X_Y_X_SIMP: return "RPL_X_Y_X_SIMP";
case Rewrite::REPLACE_RE_EVAL: return "REPLACE_RE_EVAL";
case Rewrite::REPLACE_RE_ALL_EVAL: return "REPLACE_RE_ALL_EVAL";
case Rewrite::SPLIT_EQ: return "SPLIT_EQ";
case Rewrite::SPLIT_EQ_STRIP_L: return "SPLIT_EQ_STRIP_L";
case Rewrite::SPLIT_EQ_STRIP_R: return "SPLIT_EQ_STRIP_R";
Expand Down
2 changes: 2 additions & 0 deletions src/theory/strings/rewrites.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ enum class Rewrite : uint32_t
RPL_RPL_EMPTY,
RPL_RPL_LEN_ID,
RPL_X_Y_X_SIMP,
REPLACE_RE_EVAL,
REPLACE_RE_ALL_EVAL,
SPLIT_EQ,
SPLIT_EQ_STRIP_L,
SPLIT_EQ_STRIP_R,
Expand Down
116 changes: 116 additions & 0 deletions src/theory/strings/sequences_rewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1413,6 +1413,14 @@ RewriteResponse SequencesRewriter::postRewrite(TNode node)
{
retNode = rewriteReplaceAll(node);
}
else if (nk == kind::STRING_REPLACE_RE)
{
retNode = rewriteReplaceRe(node);
}
else if (nk == kind::STRING_REPLACE_RE_ALL)
{
retNode = rewriteReplaceReAll(node);
}
else if (nk == STRING_REV)
{
retNode = rewriteStrReverse(node);
Expand Down Expand Up @@ -2914,6 +2922,114 @@ Node SequencesRewriter::rewriteReplaceInternal(Node node)
return Node::null();
}

Node SequencesRewriter::rewriteReplaceRe(Node node)
{
Assert(node.getKind() == STRING_REPLACE_RE);
NodeManager* nm = NodeManager::currentNM();
Node x = node[0];
Node y = node[1];
Node z = node[2];

if (x.isConst())
{
// str.replace_re("ZABCZ", re.++("A", _*, "C"), y) ---> "Z" ++ y ++ "Z"
std::pair<size_t, size_t> match = firstMatch(x, y);
if (match.first != string::npos)
{
String s = x.getConst<String>();
Node ret = nm->mkNode(STRING_CONCAT,
nm->mkConst(s.substr(0, match.first)),
z,
nm->mkConst(s.substr(match.second)));
return returnRewrite(node, ret, Rewrite::REPLACE_RE_EVAL);
}
else
{
return returnRewrite(node, x, Rewrite::REPLACE_RE_EVAL);
}
}
return node;
}

Node SequencesRewriter::rewriteReplaceReAll(Node node)
{
Assert(node.getKind() == STRING_REPLACE_RE_ALL);
NodeManager* nm = NodeManager::currentNM();
Node x = node[0];
Node y = node[1];
Node z = node[2];

if (x.isConst())
{
// str.replace_re_all("ZABCZAB", re.++("A", _*, "C"), y) --->
// "Z" ++ y ++ "Z" ++ y
TypeNode t = x.getType();
Node emp = Word::mkEmptyWord(t);
Node yp = Rewriter::rewrite(
nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp)));
std::vector<Node> res;
String rem = x.getConst<String>();
std::pair<size_t, size_t> match(0, 0);
while (rem.size() >= 0)
{
match = firstMatch(nm->mkConst(rem), yp);
if (match.first == string::npos)
{
break;
}
res.push_back(nm->mkConst(rem.substr(0, match.first)));
res.push_back(z);
rem = rem.substr(match.second);
}
res.push_back(nm->mkConst(rem));
Node ret = utils::mkConcat(res, t);
return returnRewrite(node, ret, Rewrite::REPLACE_RE_ALL_EVAL);
}

return node;
}

std::pair<size_t, size_t> SequencesRewriter::firstMatch(Node n, Node r)
{
Assert(n.isConst() && n.getType().isStringLike());
Assert(r.getType().isRegExp());
NodeManager* nm = NodeManager::currentNM();

std::vector<Node> emptyVec;
Node sigmaStar = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec));
Node re = nm->mkNode(REGEXP_CONCAT, r, sigmaStar);
String s = n.getConst<String>();

if (s.size() == 0)
{
if (RegExpEntail::testConstStringInRegExp(s, 0, r))
{
return std::make_pair(0, 0);
}
else
{
return std::make_pair(string::npos, string::npos);
}
}

for (size_t i = 0, size = s.size(); i < size; i++)
{
if (RegExpEntail::testConstStringInRegExp(s, i, re))
{
for (size_t j = i; j <= size; j++)
{
String substr = s.substr(i, j - i);
if (RegExpEntail::testConstStringInRegExp(substr, 0, r))
{
return std::make_pair(i, j);
}
}
}
}

return std::make_pair(string::npos, string::npos);
}

Node SequencesRewriter::rewriteStrReverse(Node node)
{
Assert(node.getKind() == STRING_REV);
Expand Down
27 changes: 27 additions & 0 deletions src/theory/strings/sequences_rewriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,33 @@ class SequencesRewriter : public TheoryRewriter
* str.replaceall. If it returns a non-null ret, then node rewrites to ret.
*/
Node rewriteReplaceInternal(Node node);
/** rewrite regular expression replace
*
* This method implements rewrite rules that apply to terms of the form
* str.replace_re(s, r, t).
*
* @param node The node to rewrite
* @return The rewritten node
*/
Node rewriteReplaceRe(Node node);
/** rewrite regular expression replace
*
* This method implements rewrite rules that apply to terms of the form
* str.replace_re_all(s, r, t).
*
* @param node The node to rewrite
* @return The rewritten node
*/
Node rewriteReplaceReAll(Node node);
/**
* Returns the first, shortest sequence in n that matches r.
*
* @param n The constant string or sequence to search in.
* @param r The regular expression to search for.
* @return A pair holding the start position and the end position of the
* match or a pair of string::npos if r does not appear in n.
*/
std::pair<size_t, size_t> firstMatch(Node n, Node r);
/** rewrite string reverse
*
* This is the entry point for post-rewriting terms n of the form
Expand Down
18 changes: 18 additions & 0 deletions src/theory/strings/skolem_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,16 @@ class SkolemCache
// of b in a as the witness for contains( a, b ).
SK_FIRST_CTN_PRE,
SK_FIRST_CTN_POST,
// For sequence a and regular expression b,
// in_re(a, re.++(_*, b, _*)) =>
// exists k_pre, k_match, k_post.
// a = k_pre ++ k_match ++ k_post ^
// ~in_re(k_pre ++ substr(k_match, 0, str.len(k_match) - 1),
// re.++(_*, b, _*)) ^
// in_re(k2, y)
SK_FIRST_MATCH_PRE,
SK_FIRST_MATCH,
SK_FIRST_MATCH_POST,
// For integer b,
// len( a ) > b =>
// exists k. a = k ++ a' ^ len( k ) = b
Expand All @@ -120,6 +130,14 @@ class SkolemCache
// k(x) is the end index of the x^th occurrence of b in a
// where n is the number of occurrences of b in a, and k(0)=0.
SK_OCCUR_INDEX,
// For function k: Int -> Int
// exists k.
// forall 0 <= x < n,
// k(x) is the length of the x^th occurrence of b in a (excluding
// matches of empty strings)
// where b is a regular expression, n is the number of occurrences of b
// in a, and k(0)=0.
SK_OCCUR_LEN,
};
/**
* Returns a skolem of type string that is cached for (a,b,id) and has
Expand Down
7 changes: 4 additions & 3 deletions src/theory/strings/term_registry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,10 @@ void TermRegistry::preRegisterTerm(TNode n)
if (!options::stringExp())
{
if (k == STRING_STRIDOF || k == STRING_ITOS || k == STRING_STOI
|| k == STRING_STRREPL || k == STRING_STRREPLALL || k == STRING_STRCTN
|| k == STRING_LEQ || k == STRING_TOLOWER || k == STRING_TOUPPER
|| k == STRING_REV)
|| k == STRING_STRREPL || k == STRING_STRREPLALL
|| k == STRING_REPLACE_RE || k == STRING_REPLACE_RE_ALL
|| k == STRING_STRCTN || k == STRING_LEQ || k == STRING_TOLOWER
|| k == STRING_TOUPPER || k == STRING_REV)
{
std::stringstream ss;
ss << "Term of kind " << k
Expand Down
3 changes: 3 additions & 0 deletions src/theory/strings/theory_strings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ TheoryStrings::TheoryStrings(context::Context* c,
d_equalityEngine.addFunctionKind(kind::STRING_STRIDOF);
d_equalityEngine.addFunctionKind(kind::STRING_STRREPL);
d_equalityEngine.addFunctionKind(kind::STRING_STRREPLALL);
d_equalityEngine.addFunctionKind(kind::STRING_REPLACE_RE);
d_equalityEngine.addFunctionKind(kind::STRING_REPLACE_RE_ALL);
d_equalityEngine.addFunctionKind(kind::STRING_STRREPLALL);
d_equalityEngine.addFunctionKind(kind::STRING_TOLOWER);
d_equalityEngine.addFunctionKind(kind::STRING_TOUPPER);
d_equalityEngine.addFunctionKind(kind::STRING_REV);
Expand Down
Loading

0 comments on commit 2da2646

Please sign in to comment.