Skip to content

Commit

Permalink
updates related to issue #5140 (#5463)
Browse files Browse the repository at this point in the history
* updates related to issue #5140

* updated/simplified some cases

* fixing feedback comments

* fixed comments and added missing case for get_re_head_tail_reversed

* two bug fixes and some other code improvements
  • Loading branch information
veanes authored Aug 9, 2021
1 parent af5fd10 commit 225204e
Show file tree
Hide file tree
Showing 4 changed files with 202 additions and 38 deletions.
199 changes: 167 additions & 32 deletions src/ast/rewriter/seq_rewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Module Name:
Nikolaj Bjorner (nbjorner) 2015-12-5
Murphy Berzish 2017-02-21
Caleb Stanford 2020-07-07
Margus Veanes 2021
--*/

Expand Down Expand Up @@ -872,6 +873,65 @@ br_status seq_rewriter::mk_seq_length(expr* a, expr_ref& result) {
return BR_FAILED;
}

/*
* In general constructs nth(t,0) but if t = substring(s,j,..) then simplifies to nth(s,j)
* This method assumes that |t| > 0.
*/
expr_ref seq_rewriter::mk_seq_first(expr* t) {
expr_ref result(m());
expr* s, * j, * k;
if (str().is_extract(t, s, j, k))
result = str().mk_nth_i(s, j);
else
result = str().mk_nth_i(t, m_autil.mk_int(0));
return result;
}

/*
* In general constructs substring(t,1,|t|-1) but if t = substring(s,j,k) then simplifies to substring(s,j+1,k-1)
* This method assumes that |t| > 0.
*/
expr_ref seq_rewriter::mk_seq_rest(expr* t) {
expr_ref result(m());
expr* s, * j, * k;
expr_ref one(m_autil.mk_int(1), m());
if (str().is_extract(t, s, j, k))
result = str().mk_substr(s, m_autil.mk_add(j, one), m_autil.mk_sub(k, one));
else
result = str().mk_substr(t, one, m_autil.mk_sub(str().mk_length(t), one));
return result;
}

/*
* In general constructs nth(t,|t|-1) but if t = substring(s,j,k) then simplifies to nth(s,j+k-1)
* This method assumes that |t| > 0.
*/
expr_ref seq_rewriter::mk_seq_last(expr* t) {
expr_ref result(m());
expr* s, * j, * k;
expr_ref one(m_autil.mk_int(1), m());
if (str().is_extract(t, s, j, k))
result = str().mk_nth_i(s, m_autil.mk_sub(m_autil.mk_add(j, k), one));
else
result = str().mk_nth_i(t, m_autil.mk_sub(str().mk_length(t), one));
return result;
}

/*
* In general constructs substring(t,0,|t|-1) but if t = substring(s,j,k) then simplifies to substring(s,j,k-1)
* This method assumes that |t| > 0 holds.
*/
expr_ref seq_rewriter::mk_seq_butlast(expr* t) {
expr_ref result(m());
expr* s, * j, * k;
expr_ref one(m_autil.mk_int(1), m());
if (str().is_extract(t, s, j, k))
result = str().mk_substr(s, j, m_autil.mk_sub(k, one));
else
result = str().mk_substr(t, m_autil.mk_int(0), m_autil.mk_sub(str().mk_length(t), one));
return result;
}

/*
Lift all ite expressions to the top level, safely
throttled to not blowup the size of the expression.
Expand Down Expand Up @@ -1893,13 +1953,13 @@ br_status seq_rewriter::mk_seq_replace_all(expr* a, expr* b, expr* c, expr_ref&
return BR_REWRITE1;
}
zstring s1, s2;
expr_ref_vector strs(m());
if (str().is_string(a, s1) && str().is_string(b, s2)) {
SASSERT(s2.length() > 0);
if (s1.length() < s2.length()) {
result = a;
return BR_DONE;
}
expr_ref_vector strs(m());
for (unsigned i = 0; i < s1.length(); ++i) {
if (s1.length() >= s2.length() + i &&
s2 == s1.extract(i, s2.length())) {
Expand All @@ -1912,11 +1972,62 @@ br_status seq_rewriter::mk_seq_replace_all(expr* a, expr* b, expr* c, expr_ref&
result = str().mk_concat(strs, a->get_sort());
return BR_REWRITE_FULL;
}
// TBD: add case when a, b are concatenation of units that are values.
// in this case we can use a similar loop as for strings.
expr_ref_vector a_vals(m());
expr_ref_vector b_vals(m());
if (try_get_unit_values(a, a_vals) && try_get_unit_values(b, b_vals)) {
replace_all_subvectors(a_vals, b_vals, c, strs);
result = str().mk_concat(strs, a->get_sort());
return BR_REWRITE_FULL;
}

//TODO: the case when a is a unit or concatenation of units while b is a string
//or the other way around -- if that situation is possible at all -- is similar to the above
return BR_FAILED;
}

/*
* Returns false if s is not a single unit value or concatenation of unit values.
* Else extracts the units from s into vals and returns true.
*/
bool seq_rewriter::try_get_unit_values(expr* s, expr_ref_vector& vals) {
expr* h, * t, * v;
t = s;
//collect all unit values from s, if not all elements are unit-values then fail
while (str().is_concat(t, h, t))
if (str().is_unit(h, v) && m().is_value(v))
vals.push_back(h);
else
return false;
//add the last element
if (str().is_unit(t, v) && m().is_value(v))
vals.push_back(t);
else
return false;
return true;
}

/*
* Replace all subvectors of b in a by c
*/
void seq_rewriter::replace_all_subvectors(expr_ref_vector const& a, expr_ref_vector const& b, expr* c, expr_ref_vector& result) {
unsigned int i = 0;
unsigned int k = b.size();
while (i + k <= a.size()) {
//if a[i..i+k-1] equals b then replace it by c and inceremnt i by k
int j = 0;
while (j < k && b[j] == a[i + j]) j += 1;
if (j < k) //the equality failed
result.push_back(a[i++]);
else { //the equality succeeded
result.push_back(c);
i += k;
}
}
//add the trailing elements from a
while (i < a.size())
result.push_back(a[i++]);
}

br_status seq_rewriter::mk_seq_replace_re_all(expr* a, expr* b, expr* c, expr_ref& result) {
return BR_FAILED;
}
Expand Down Expand Up @@ -2521,12 +2632,18 @@ bool seq_rewriter::get_re_head_tail_reversed(expr* r, expr_ref& head, expr_ref&
if (re().is_concat(r, r1, r2)) {
unsigned len = re().min_length(r2);
if (len != UINT_MAX && re().max_length(r2) == len) {
head = r1;
tail = r2;
if (get_re_head_tail_reversed(r1, head, tail))
// left associative binding of concat
tail = mk_re_append(tail, r2);
else {
// right associative binding of concat
head = r1;
tail = r2;
}
return true;
}
if (get_re_head_tail_reversed(r2, head, tail)) {
head = re().mk_concat(r1, head);
head = mk_re_append(r1, head);
return true;
}
}
Expand Down Expand Up @@ -3059,7 +3176,8 @@ expr_ref seq_rewriter::mk_der_op_rec(decl_kind k, expr* a, expr* b) {
return result;
}
// Order with higher IDs on the outside
if (get_id(ca) < get_id(cb)) {
bool is_symmetric = k == OP_RE_UNION || k == OP_RE_INTERSECT;
if (is_symmetric && get_id(ca) < get_id(cb)) {
std::swap(a, b);
std::swap(ca, cb);
std::swap(notca, notcb);
Expand Down Expand Up @@ -3348,35 +3466,53 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
//while mk_empty() = [], because deriv(epsilon) = [] = nothing
return mk_empty();
}
else if (str().is_itos(r1, r2)) {
else if (str().is_itos(r1)) {
//
// here r1 = (str.from_int r2) and r2 is non-ground
// or else the expression would have been simplified earlier
// so r1 must be nonempty and must consists of decimal digits
// '0' <= elem <= '9'
// if ((isdigit ele) and (ele = (hd r1))) then (to_re (tl r1)) else []
//
hd = str().mk_nth_i(r1, m_autil.mk_int(0));
hd = mk_seq_first(r1);
m_br.mk_and(u().mk_le(m_util.mk_char('0'), ele), u().mk_le(ele, m_util.mk_char('9')), m().mk_eq(hd, ele), result);
tl = re().mk_to_re(str().mk_substr(r1, m_autil.mk_int(1), m_autil.mk_sub(str().mk_length(r1), m_autil.mk_int(1))));
tl = re().mk_to_re(mk_seq_rest(r1));
return re_and(result, tl);
}
}
else if (re().is_reverse(r, r1) && re().is_to_re(r1, r2)) {
// Reverses are rewritten so that the only derivative case is
// derivative of a reverse of a string. (All other cases stuck)
// This is analagous to the previous is_to_re case.
expr_ref hd(m()), tl(m());
if (get_head_tail_reversed(r2, hd, tl)) {
// Use mk_der_cond to normalize
STRACE("seq_verbose", tout << "deriv reverse to_re" << std::endl;);
result = m().mk_eq(ele, tl);
result = mk_der_cond(result, ele, seq_sort);
result = mk_der_concat(result, re().mk_reverse(re().mk_to_re(hd)));
return result;
else {
// recall: [] denotes the empty language (nothing) regex, () denotes epsilon or empty sequence
// construct the term (if (r1 != () and (ele = (first r1)) then (to_re (rest r1)) else []))
hd = mk_seq_first(r1);
m_br.mk_and(m().mk_not(m().mk_eq(r1, str().mk_empty(seq_sort))), m().mk_eq(hd, ele), result);
tl = re().mk_to_re(mk_seq_rest(r1));
return re_and(result, tl);
}
else if (str().is_empty(r2)) {
return mk_empty();
}
else if (re().is_reverse(r, r1)) {
if (re().is_to_re(r1, r2)) {
// First try to exctract hd and tl such that r = hd ++ tl and |tl|=1
expr_ref hd(m()), tl(m());
if (get_head_tail_reversed(r2, hd, tl)) {
// Use mk_der_cond to normalize
STRACE("seq_verbose", tout << "deriv reverse to_re" << std::endl;);
result = m().mk_eq(ele, tl);
result = mk_der_cond(result, ele, seq_sort);
result = mk_der_concat(result, re().mk_reverse(re().mk_to_re(hd)));
return result;
}
else if (str().is_empty(r2)) {
return mk_empty();
}
else {
// construct the term (if (r2 != () and (ele = (last r2)) then reverse(to_re (butlast r2)) else []))
// hd = first of reverse(r2) i.e. last of r2
// tl = rest of reverse(r2) i.e. butlast of r2
//hd = str().mk_nth_i(r2, m_autil.mk_sub(str().mk_length(r2), m_autil.mk_int(1)));
hd = mk_seq_last(r2);
m_br.mk_and(m().mk_not(m().mk_eq(r2, str().mk_empty(seq_sort))), m().mk_eq(hd, ele), result);
tl = re().mk_to_re(mk_seq_butlast(r2));
return re_and(result, re().mk_reverse(tl));
}
}
}
else if (re().is_range(r, r1, r2)) {
Expand Down Expand Up @@ -3423,10 +3559,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) {
STRACE("seq_verbose", tout << "deriv of_pred" << std::endl;);
return mk_der_cond(result, ele, seq_sort);
}
// stuck cases: re.derivative, variable,
// str.to_re if the head of the string can't be obtained,
// and re.reverse if not applied to a string or if the tail char
// of the string can't be obtained
// stuck cases: re.derivative, re variable,
return expr_ref(re().mk_derivative(ele, r), m());
}

Expand Down Expand Up @@ -3662,12 +3795,14 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
}
if (get_re_head_tail_reversed(b, hd, tl)) {
SASSERT(re().min_length(tl) == re().max_length(tl));
expr_ref len_tl(m_autil.mk_int(re().min_length(tl)), m());
expr_ref len_tl(m_autil.mk_int(re().min_length(tl)), m());
expr_ref len_a(str().mk_length(a), m());
expr_ref len_hd(m_autil.mk_sub(len_a, len_tl), m());
expr* s = nullptr;
result = m().mk_and(m_autil.mk_ge(len_a, len_tl),
re().mk_in_re(str().mk_substr(a, m_autil.mk_int(0), len_hd), hd),
re().mk_in_re(str().mk_substr(a, len_hd, len_tl), tl));
re().mk_in_re(str().mk_substr(a, m_autil.mk_int(0), len_hd), hd),
(false && re().is_to_re(tl, s) ? m().mk_eq(s, str().mk_substr(a, len_hd, len_tl)) :
re().mk_in_re(str().mk_substr(a, len_hd, len_tl), tl)));
return BR_REWRITE_FULL;
}

Expand Down
20 changes: 20 additions & 0 deletions src/ast/rewriter/seq_rewriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,16 @@ class seq_rewriter {

expr_ref mk_seq_concat(expr* a, expr* b);

// Construct the expressions for taking the first element, the last element, the rest, and the butlast element
expr_ref mk_seq_first(expr* s);
expr_ref mk_seq_rest(expr* s);
expr_ref mk_seq_last(expr* s);
expr_ref mk_seq_butlast(expr* s);

bool try_get_unit_values(expr* s, expr_ref_vector& result);
//replace b in a by c into result
void replace_all_subvectors(expr_ref_vector const& as, expr_ref_vector const& bs, expr* c, expr_ref_vector& result);

// Calculate derivative, memoized and enforcing a normal form
expr_ref is_nullable_rec(expr* r);
expr_ref mk_derivative_rec(expr* ele, expr* r);
Expand Down Expand Up @@ -344,6 +354,16 @@ class seq_rewriter {
return result;
}

/*
* makes concat and simplifies
*/
expr_ref mk_re_append(expr* r1, expr* r2) {
expr_ref result(m());
if (mk_re_concat(r1, r2, result) == BR_FAILED)
result = re().mk_concat(r1, r2);
return result;
}

/**
* check if regular expression is of the form all ++ s ++ all ++ t + u ++ all, where, s, t, u are sequences
*/
Expand Down
19 changes: 14 additions & 5 deletions src/ast/seq_decl_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -911,7 +911,7 @@ unsigned seq_util::str::max_length(expr* s) const {
return UINT_MAX;
};
while (is_concat(s, s1, s2)) {
result = u.max_plus(get_length(s), result);
result = u.max_plus(get_length(s1), result);
s = s2;
}
result = u.max_plus(get_length(s), result);
Expand Down Expand Up @@ -1058,6 +1058,7 @@ app* seq_util::rex::mk_epsilon(sort* seq_sort) {
*/
std::ostream& seq_util::rex::pp::compact_helper_seq(std::ostream& out, expr* s) const {
SASSERT(re.u.is_seq(s));
zstring z;
if (re.u.str.is_empty(s))
out << "()";
else if (re.u.str.is_unit(s))
Expand All @@ -1068,6 +1069,10 @@ std::ostream& seq_util::rex::pp::compact_helper_seq(std::ostream& out, expr* s)
for (expr* e : es)
compact_helper_seq(out, e);
}
else if (re.u.str.is_string(s, z)) {
for (int i = 0; i < z.length(); i++)
out << (char)z[i];
}
//using braces to indicate 'full' output
//for example an uninterpreted constant X will be printed as {X}
//while a unit sequence "X" will be printed as X
Expand Down Expand Up @@ -1100,7 +1105,7 @@ bool seq_util::rex::pp::can_skip_parenth(expr* r) const {
std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const {
expr* e;
unsigned n = 0;
if (re.u.str.is_unit(s, e) && re.u.is_const_char(e, n)) {
if ((re.u.str.is_unit(s, e) && re.u.is_const_char(e, n)) || re.u.is_const_char(s, n)) {
char c = (char)n;
if (c == '\n')
out << "\\n";
Expand Down Expand Up @@ -1148,7 +1153,11 @@ std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const {
std::ostream& seq_util::rex::pp::display(std::ostream& out) const {
expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr;
unsigned lo = 0, hi = 0;
if (re.is_full_char(e))
if (re.u.is_char(e))
return seq_unit(out, e);
else if (re.u.is_seq(e))
return compact_helper_seq(out, e);
else if (re.is_full_char(e))
return out << ".";
else if (re.is_full_seq(e))
return out << ".*";
Expand All @@ -1163,9 +1172,9 @@ std::ostream& seq_util::rex::pp::display(std::ostream& out) const {
else if (re.is_concat(e, r1, r2))
return out << pp(re, r1) << pp(re, r2);
else if (re.is_union(e, r1, r2))
return out << pp(re, r1) << "|" << pp(re, r2);
return out << "(" << pp(re, r1) << "|" << pp(re, r2) << ")";
else if (re.is_intersection(e, r1, r2))
return out << "(" << pp(re, r1) << (html_encode ? ")&amp;(": ")&(" ) << pp(re, r2) << ")";
return out << "(" << pp(re, r1) << "&amp;" /*(html_encode ? ")&amp;(" : ")&(")*/ << pp(re, r2) << ")";
else if (re.is_complement(e, r1)) {
if (can_skip_parenth(r1))
return out << "~" << pp(re, r1);
Expand Down
2 changes: 1 addition & 1 deletion src/util/state_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Module Name:
Author:
Caleb Stanford (calebstanford-msr / cdstanford) 2020-7
Margus Veanes 2020-8
--*/

#include "state_graph.h"
Expand Down

0 comments on commit 225204e

Please sign in to comment.