From 890a66cc66c8dbd716ae70e6842d8842061d563a Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Wed, 18 Aug 2021 11:14:09 -0700 Subject: [PATCH 01/16] updated derivative engine --- src/ast/rewriter/seq_rewriter.cpp | 452 +++++++++++++++++++++++++++--- src/ast/rewriter/seq_rewriter.h | 32 ++- src/ast/seq_decl_plugin.cpp | 142 ++++++++-- src/ast/seq_decl_plugin.h | 22 +- src/smt/seq_regex.cpp | 1 + 5 files changed, 569 insertions(+), 80 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 63a5044e587..54170d24558 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -893,12 +893,17 @@ expr_ref seq_rewriter::mk_seq_first(expr* t) { */ expr_ref seq_rewriter::mk_seq_rest(expr* t) { expr_ref result(m()); - expr* s, * j, * k; - expr_ref one(m_autil.mk_int(1), m()); - if (str().is_extract(t, s, j, k)) - result = str().mk_substr(s, m_autil.mk_add(j, one), m_autil.mk_sub(k, one)); + expr* s, * j, * k, * s_, * l; + rational jv; + if (str().is_extract(t, s, j, k) && m_autil.is_numeral(j, jv) && jv.is_nonneg()) { + unsigned i; + expr_ref k_min_1(str().is_len_sub(k, l, s_, i) && s == s_ ? + m_autil.mk_sub(l, m_autil.mk_int(i + 1)) : + m_autil.mk_sub(k, m_autil.mk_int(1)), m()); + result = str().mk_substr(s, m_autil.mk_int(jv.get_int32() + 1), k_min_1); + } else - result = str().mk_substr(t, one, m_autil.mk_sub(str().mk_length(t), one)); + result = str().mk_substr(t, m_autil.mk_int(1), m_autil.mk_sub(str().mk_length(t), m_autil.mk_int(1))); return result; } @@ -908,12 +913,18 @@ expr_ref seq_rewriter::mk_seq_rest(expr* t) { */ expr_ref seq_rewriter::mk_seq_last(expr* t) { expr_ref result(m()); - expr* s, * j, * k; - expr_ref one(m_autil.mk_int(1), m()); - if (str().is_extract(t, s, j, k)) - result = str().mk_nth_i(s, m_autil.mk_sub(m_autil.mk_add(j, k), one)); + expr* s, * j, * k, * s_, * l; + rational jv; + if (str().is_extract(t, s, j, k) && m_autil.is_numeral(j, jv) && jv.is_nonneg()) { + unsigned i; + // k = |s_| - i, l = |s_| implies that lastpos = j + k - 1 = k + j - 1 = j + l - i - 1 = l + j - i - 1 + expr_ref lastpos(str().is_len_sub(k, l, s_, i) && s == s_ ? + m_autil.mk_add(l, m_autil.mk_int(jv.get_int32() - 1 - i)) : + m_autil.mk_add(k, m_autil.mk_int(jv.get_int32() - 1)), m()); + result = str().mk_nth_i(s, lastpos); + } else - result = str().mk_nth_i(t, m_autil.mk_sub(str().mk_length(t), one)); + result = str().mk_nth_i(t, m_autil.mk_sub(str().mk_length(t), m_autil.mk_int(1))); return result; } @@ -924,11 +935,14 @@ expr_ref seq_rewriter::mk_seq_last(expr* t) { expr_ref seq_rewriter::mk_seq_butlast(expr* t) { expr_ref result(m()); expr* s, * j, * k; - expr_ref one(m_autil.mk_int(1), m()); - if (str().is_extract(t, s, j, k)) - result = str().mk_substr(s, j, m_autil.mk_sub(k, one)); + if (str().is_extract(t, s, j, k)) { + expr_ref_vector k_min_1(m()); + k_min_1.push_back(k); + k_min_1.push_back(m_autil.mk_int(-1)); + result = str().mk_substr(s, j, m_autil.mk_add_simplify(k_min_1)); + } else - result = str().mk_substr(t, m_autil.mk_int(0), m_autil.mk_sub(str().mk_length(t), one)); + result = str().mk_substr(t, m_autil.mk_int(0), m_autil.mk_sub(str().mk_length(t), m_autil.mk_int(1))); return result; } @@ -2573,7 +2587,7 @@ bool seq_rewriter::is_sequence(expr* e, expr_ref_vector& seq) { } /* - s = head + tail where |head| = 1 + s = [head] + tail where head is the first element of s */ bool seq_rewriter::get_head_tail(expr* s, expr_ref& head, expr_ref& tail) { expr* h = nullptr, *t = nullptr; @@ -2670,10 +2684,10 @@ expr_ref seq_rewriter::re_predicate(expr* cond, sort* seq_sort) { expr_ref seq_rewriter::is_nullable(expr* r) { STRACE("seq_verbose", tout << "is_nullable: " << mk_pp(r, m()) << std::endl;); - expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr), m()); + expr_ref result(m_op_cache.find(_OP_RE_IS_NULLABLE, r, nullptr, nullptr), m()); if (!result) { result = is_nullable_rec(r); - m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, result); + m_op_cache.insert(_OP_RE_IS_NULLABLE, r, nullptr, nullptr, result); } STRACE("seq_verbose", tout << "is_nullable result: " << result << std::endl;); @@ -2691,7 +2705,7 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) { re().is_intersection(r, r1, r2)) { m_br.mk_and(is_nullable(r1), is_nullable(r2), result); } - else if (re().is_union(r, r1, r2)) { + else if (re().is_union(r, r1, r2) || re().is_antimorov_union(r, r1, r2)) { m_br.mk_or(is_nullable(r1), is_nullable(r2), result); } else if (re().is_diff(r, r1, r2)) { @@ -2701,6 +2715,7 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) { else if (re().is_star(r) || re().is_opt(r) || re().is_full_seq(r) || + re().is_epsilon(r) || (re().is_loop(r, r1, lo) && lo == 0) || (re().is_loop(r, r1, lo, hi) && lo == 0)) { result = m().mk_true(); @@ -2724,7 +2739,7 @@ expr_ref seq_rewriter::is_nullable_rec(expr* r) { result = is_nullable(r1); } else if (m().is_ite(r, cond, r1, r2)) { - result = m().mk_ite(cond, is_nullable(r1), is_nullable(r2)); + m_br.mk_ite(cond, is_nullable(r1), is_nullable(r2), result); } else if (m_util.is_re(r, seq_sort)) { result = is_nullable_symbolic_regex(r, seq_sort); @@ -2977,25 +2992,364 @@ bool seq_rewriter::check_deriv_normal_form(expr* r, int level) { #endif /* - Memoized, recursive implementation of the symbolic derivative such that - the result is in normal form. - - Functions without _rec are memoized wrappers, which call the _rec - version if lookup fails. - - The main logic is in mk_der_op_rec for combining normal forms. + Memoized, recursive implementation of the symbolic derivative of r as a transition regex with . +*/ +expr_ref seq_rewriter::mk_derivative(expr* r) { + return mk_antimirov_deriv(ele, r, m().mk_true()); +} +/* + Memoized, recursive implementation of the symbolic derivative of r. */ expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) { - STRACE("seq_verbose", tout << "derivative: " << mk_pp(ele, m()) + /*STRACE("seq_verbose", tout << "derivative: " << mk_pp(ele, m()) << "," << mk_pp(r, m()) << std::endl;); - expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, ele, r), m()); + expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, ele, r, nullptr), m()); if (!result) { result = mk_derivative_rec(ele, r); - m_op_cache.insert(OP_RE_DERIVATIVE, ele, r, result); + m_op_cache.insert(OP_RE_DERIVATIVE, ele, r, nullptr, result); } STRACE("seq_verbose", tout << "derivative result: " - << mk_pp(result, m()) << std::endl;); + << re().to_str(result) << std::endl;); CASSERT("seq_regex", check_deriv_normal_form(r)); + return result;*/ + return mk_antimirov_deriv(ele, r, m().mk_true()); +} + +expr_ref seq_rewriter::mk_antimirov_deriv(expr* e, expr* r, expr* path) { + expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, e, r, path), m()); + if (!result) { + mk_antimirov_deriv_rec(e, r, path, result); + m_op_cache.insert(OP_RE_DERIVATIVE, e, r, path, result); + STRACE("seq_regex", tout << "D(" << mk_pp(e, m()) << "," << mk_pp(r, m()) << "," << mk_pp(path, m()) << ")" << std::endl;); + STRACE("seq_regex", tout << "= " << mk_pp(result, m()) << std::endl;); + } + return result; +} + +void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref& result) { + sort* seq_sort = nullptr, * ele_sort = nullptr; + VERIFY(m_util.is_re(r, seq_sort)); + VERIFY(m_util.is_seq(seq_sort, ele_sort)); + SASSERT(ele_sort == e->get_sort()); + expr* r1 = nullptr, * r2 = nullptr, * c = nullptr; + expr_ref c1(m()); + expr_ref c2(m()); + auto nothing = [&]() { return expr_ref(re().mk_empty(r->get_sort()), m()); }; + auto epsilon = [&]() { return expr_ref(re().mk_epsilon(seq_sort), m()); }; + auto dotstar = [&]() { return expr_ref(re().mk_full_seq(r->get_sort()), m()); }; + auto dotplus = [&]() { return expr_ref(re().mk_plus(re().mk_full_char(r->get_sort())), m()); }; + unsigned lo = 0, hi = 0; + if (re().is_empty(r) || re().is_epsilon(r)) + // D(e,[]) = D(e,()) = [] + result = nothing(); + else if (re().is_full_seq(r) || re().is_dot_plus(r)) + // D(e,.*) = D(e,.+) = .* + result = dotstar(); + else if (re().is_full_char(r)) + // D(e,.) = () + result = epsilon(); + else if (re().is_to_re(r, r1)) { + expr_ref h(m()); + expr_ref t(m()); + // here r1 is a sequence + if (get_head_tail(r1, h, t)) { + if (eq_char(e, h)) + result = re().mk_to_re(t); + else if (neq_char(e, h)) + result = nothing(); + else + result = mk_ite_simplify(m().mk_eq(e, h), re().mk_to_re(t), nothing()); + } + else + { + // observe that the precondition |r1|>0 is is implied by c1 for use of mk_seq_first + m_br.mk_and(m().mk_not(m().mk_eq(r1, str().mk_empty(seq_sort))), m().mk_eq(mk_seq_first(r1), e), c1); + m_br.mk_and(path, c1, c2); + if (m().is_false(c2)) + result = nothing(); + else + // observe that the precondition |r1|>0 is implied by c1 for use of mk_seq_rest + result = m().mk_ite(c1, re().mk_to_re(mk_seq_rest(r1)), nothing()); + } + } + else if (re().is_reverse(r, r2)) + if (re().is_to_re(r2, r1)) { + // here r1 is a sequence + // observe that the precondition |r1|>0 of mk_seq_last is implied by c1 + m_br.mk_and(m().mk_not(m().mk_eq(r1, str().mk_empty(seq_sort))), m().mk_eq(mk_seq_last(r1), e), c1); + m_br.mk_and(path, c1, c2); + if (m().is_false(c2)) + result = nothing(); + else + // observe that the precondition |r1|>0 of mk_seq_rest is implied by c1 + result = mk_ite_simplify(c1, re().mk_reverse(mk_seq_butlast(r1)), nothing()); + } + else { + result = mk_regex_reverse(r2); + if (result.get() == r) + //r2 is an uninterpreted regex that is stuck + //for example if r = (re.reverse R) where R is a regex variable then + //here result.get() == r + result = re().mk_derivative(e, result); + else + result = mk_antimirov_deriv(e, result, path); + } + else if (re().is_concat(r, r1, r2)) { + expr_ref r1nullable(is_nullable(r1), m()); + c1 = mk_antimirov_deriv_concat(mk_antimirov_deriv(e, r1, path), r2); + expr_ref r1nullable_and_path(m()); + m_br.mk_and(r1nullable, path, r1nullable_and_path); + if (m().is_false(r1nullable_and_path)) + // D(e,r1)r2 + result = c1; + else + // D(e,r1)r2|(ite (r1nullable) (D(e,r2)) []) + // observe that (mk_ite_simplify(true, D(e,r2), []) = D(e,r2) + result = mk_antimirov_deriv_union(c1, mk_ite_simplify(r1nullable, mk_antimirov_deriv(e, r2, path), nothing())); + } + else if (m().is_ite(r, c, r1, r2)) { + m_br.mk_and(path, c, c1); + m_br.mk_and(path, m_br.mk_not(c), c2); + // TODO: try to simplify c1 and c2 further to false when possible + // by checking cases when path is inconsistent with character constraint c + if (m().is_false(c1)) + result = mk_antimirov_deriv(e, r2, c2); + else if (m().is_false(c2)) + result = mk_antimirov_deriv(e, r1, c1); + else + result = mk_ite_simplify(c, mk_antimirov_deriv(e, r1, c1), mk_antimirov_deriv(e, r2, c2)); + } + else if (re().is_range(r, r1, r2)) { + expr_ref range(m()); + expr_ref psi(m()); + if (str().is_unit_string(r1, c1) && str().is_unit_string(r2, c2)) { + SASSERT(u().is_char(c1)); + SASSERT(u().is_char(c2)); + // range represents c1 <= e <= c2 + m_br.mk_and(u().mk_le(c1, e), u().mk_le(e, c2), range); + m_br.mk_and(path, range, psi); + if (m().is_false(psi)) + result = nothing(); + else + // D(e,c1..c2) = if (c1<=e<=c2) then () else [] + result = mk_ite_simplify(range, epsilon(), nothing()); + } + else + result = nothing(); + } + else if (re().is_union(r, r1, r2)) + result = mk_antimirov_deriv_union(mk_antimirov_deriv(e, r1, path), mk_antimirov_deriv(e, r2, path)); + else if (re().is_intersection(r, r1, r2)) + result = mk_antimirov_deriv_intersection( + mk_antimirov_deriv(e, r1, path), + mk_antimirov_deriv(e, r2, path), m().mk_true()); + else if (re().is_star(r, r1) || re().is_plus(r, r1) || re().is_loop(r, r1, lo) && 0 <= lo && lo <= 1) + result = mk_antimirov_deriv_concat(mk_antimirov_deriv(e, r1, path), re().mk_star(r1)); + else if (re().is_loop(r, r1, lo)) + result = mk_antimirov_deriv_concat(mk_antimirov_deriv(e, r1, path), re().mk_loop(r1, lo - 1)); + else if (re().is_loop(r, r1, lo, hi)) { + if (lo == 0 && hi == 0 || hi < lo) + result = nothing(); + else + result = mk_antimirov_deriv_concat(mk_antimirov_deriv(e, r1, path), re().mk_loop(r1, (lo == 0 ? 0 : lo - 1), hi - 1)); + } + else if (re().is_opt(r, r1)) + result = mk_antimirov_deriv(e, r1, path); + else if (re().is_complement(r, r1)) + // D(e,~r1) = ~D(e,r1) + result = mk_antimirov_deriv_negate(mk_antimirov_deriv(e, r1, path)); + else if (re().is_diff(r, r1, r2)) + result = mk_antimirov_deriv_intersection( + mk_antimirov_deriv(e, r1, path), + mk_antimirov_deriv_negate(mk_antimirov_deriv(e, r2, path)), m().mk_true()); + else + // stuck cases + result = re().mk_derivative(e, r); +} + +expr_ref seq_rewriter::mk_antimirov_deriv_intersection(expr* d1, expr* d2, expr* path) { + auto nothing = [&]() { return expr_ref(re().mk_empty(d1->get_sort()), m()); }; + expr_ref result(m()); + if (d1 == d2) { + result = d1; + return result; + } + expr* c1, * t1, * e1; + if (m().is_ite(d1, c1, t1, e1)) { + expr_ref path_and_c1(m()); + expr_ref path_and_notc1(m()); + m_br.mk_and(path, c1, path_and_c1); + m_br.mk_and(path, m_br.mk_not(c1), path_and_notc1); + + if (m().is_false(path_and_c1)) + result = mk_antimirov_deriv_intersection(e1, d2, path); + else if (m().is_false(path_and_notc1)) + result = mk_antimirov_deriv_intersection(t1, d2, path); + else + result = m().mk_ite(c1, mk_antimirov_deriv_intersection(t1, d2, path_and_c1), + mk_antimirov_deriv_intersection(e1, d2, path_and_notc1)); + } + else if (m().is_ite(d2)) + // swap d1 and d2 + result = mk_antimirov_deriv_intersection(d2, d1, path); + else + // in all other cases create the intersection regex + result = re().mk_inter(d1, d2); + return result; +} + +expr_ref seq_rewriter::mk_antimirov_deriv_concat(expr* d, expr* r) { + expr_ref result(m()); + expr* c, * t, * e; + if (m().is_ite(d, c, t, e)) + result = m().mk_ite(c, mk_antimirov_deriv_concat(t, r), mk_antimirov_deriv_concat(e, r)); + else if (re().is_union(d, t, e)) + result = re().mk_union(mk_antimirov_deriv_concat(t, r), mk_antimirov_deriv_concat(e, r)); + else + result = mk_re_append(d, r); + return result; +} + +expr_ref seq_rewriter::mk_antimirov_deriv_negate(expr* d) { + sort* seq_sort = nullptr, * ele_sort = nullptr; + VERIFY(m_util.is_re(d, seq_sort)); + auto nothing = [&]() { return expr_ref(re().mk_empty(d->get_sort()), m()); }; + auto epsilon = [&]() { return expr_ref(re().mk_epsilon(seq_sort), m()); }; + auto dotstar = [&]() { return expr_ref(re().mk_full_seq(d->get_sort()), m()); }; + auto dotplus = [&]() { return expr_ref(re().mk_plus(re().mk_full_char(d->get_sort())), m()); }; + expr_ref result(m()); + expr* c, * t, * e; + if (re().is_empty(d)) + result = dotstar(); + else if (re().is_epsilon(d)) + result = dotplus(); + else if (re().is_full_seq(d)) + result = nothing(); + else if (re().is_dot_plus(d)) + result = epsilon(); + else if (m().is_ite(d, c, t, e)) + result = m().mk_ite(c, mk_antimirov_deriv_negate(t), mk_antimirov_deriv_negate(e)); + else if (re().is_union(d, t, e)) + result = re().mk_inter(mk_antimirov_deriv_negate(t), mk_antimirov_deriv_negate(e)); + else if (re().is_intersection(d, t, e)) + result = re().mk_union(mk_antimirov_deriv_negate(t), mk_antimirov_deriv_negate(e)); + else if (re().is_complement(d, t)) + result = t; + else + result = re().mk_complement(d); + return result; +} + +expr_ref seq_rewriter::mk_antimirov_deriv_union(expr* d1, expr* d2) +{ + expr_ref result(m()); + if (re().is_empty(d1) || re().is_full_seq(d2)) + result = d2; + else if (re().is_empty(d2) || re().is_full_seq(d1)) + result = d1; + else if (re().is_dot_plus(d1) && re().get_info(d2).min_length > 0) + result = d1; + else if (re().is_dot_plus(d2) && re().get_info(d1).min_length > 0) + result = d2; + else + //TODO: flatten and order the union + result = re().mk_union(d1, d2); + return result; +} + +expr_ref seq_rewriter::mk_ite_simplify(expr* c, expr* t, expr* e) +{ + expr_ref result(m()); + if (m().is_true(c) || t == e) + result = t; + else if (m().is_false(c)) + result = e; + else + result = m().mk_ite(c, t, e); + return result; +} + +expr_ref seq_rewriter::mk_regex_reverse(expr* r) { + expr* r1 = nullptr, * r2 = nullptr, * c = nullptr; + unsigned lo = 0, hi = 0; + expr_ref result(m()); + if (re().is_empty(r) || re().is_range(r) || re().is_epsilon(r) || re().is_full_seq(r) || + re().is_full_char(r) || re().is_dot_plus(r) || re().is_of_pred(r)) + result = r; + else if (re().is_to_re(r)) + result = re().mk_reverse(r); + else if (re().is_reverse(r, r1)) + result = r1; + else if (re().is_concat(r, r1, r2)) + result = mk_regex_concat(mk_regex_reverse(r2), mk_regex_reverse(r1)); + else if (m().is_ite(r, c, r1, r2)) + result = m().mk_ite(c, mk_regex_reverse(r1), mk_regex_reverse(r2)); + else if (re().is_union(r, r1, r2)) + result = re().mk_union(mk_regex_reverse(r1), mk_regex_reverse(r2)); + else if (re().is_intersection(r, r1, r2)) + result = re().mk_inter(mk_regex_reverse(r1), mk_regex_reverse(r2)); + else if (re().is_diff(r, r1, r2)) + result = re().mk_diff(mk_regex_reverse(r1), mk_regex_reverse(r2)); + else if (re().is_star(r, r1)) + result = re().mk_star(mk_regex_reverse(r1)); + else if (re().is_plus(r, r1)) + result = re().mk_plus(mk_regex_reverse(r1)); + else if (re().is_loop(r, r1, lo)) + result = re().mk_loop(mk_regex_reverse(r1), lo); + else if (re().is_loop(r, r1, lo, hi)) + result = re().mk_loop(mk_regex_reverse(r1), lo, hi); + else if (re().is_opt(r, r1)) + result = re().mk_opt(mk_regex_reverse(r1)); + else if (re().is_complement(r, r1)) + result = re().mk_complement(mk_regex_reverse(r1)); + else + //stuck cases: such as r being a regex variable + //observe that re().mk_reverse(to_re(s)) is not a stuck case + result = re().mk_reverse(r); + return result; +} + +expr_ref seq_rewriter::mk_regex_concat(expr* r, expr* s) { + sort* seq_sort = nullptr; + VERIFY(m_util.is_re(r, seq_sort)); + SASSERT(r->get_sort() == s->get_sort()); + expr_ref result(m()); + expr* r1, * r2; + if (re().is_epsilon(r) || re().is_empty(s)) + result = s; + else if (re().is_epsilon(s) || re().is_empty(r)) + result = r; + else if (re().is_full_seq(r) && re().is_full_seq(s)) + result = r; + else if (re().is_concat(r, r1, r2)) + //create the resulting concatenation in right-associative form + result = mk_regex_concat(r1, mk_regex_concat(r2, s)); + else { + //TODO: perhaps simplifiy some further cases such as .*. = ..* = .*.+ = .+.* = .+ + result = re().mk_concat(r, s); + } +} + +expr_ref seq_rewriter::mk_in_antimirov(expr* s, expr* d){ + expr_ref result(mk_in_antimirov_rec(s, d), m()); + return result; +} + +expr_ref seq_rewriter::mk_in_antimirov_rec(expr* s, expr* d) { + expr* c, * d1, * d2; + expr_ref result(m()); + if (re().is_full_seq(d) || (str().min_length(s) > 0 && re().is_dot_plus(d))) + // s in .* <==> true, also: s in .+ <==> true when |s|>0 + result = m().mk_true(); + else if (re().is_empty(d) || (str().min_length(s) > 0 && re().is_epsilon(d))) + // s in [] <==> false, also: s in () <==> false when |s|>0 + result = m().mk_false(); + else if (m().is_ite(d, c, d1, d2)) + m_br.mk_ite(c, mk_in_antimirov_rec(s, d1), mk_in_antimirov_rec(s, d2), result); + else if (re().is_union(d, d1, d2)) + m_br.mk_or(mk_in_antimirov_rec(s, d1), mk_in_antimirov_rec(s, d2), result); + else + result = re().mk_in_re(s, d); return result; } @@ -3016,7 +3370,7 @@ expr_ref seq_rewriter::mk_der_concat(expr* r1, expr* r2) { } /* - Utility functions to decide char <, ==, and <=. + Utility functions to decide char <, ==, !=, and <=. Return true if deduced, false if unknown. */ bool seq_rewriter::lt_char(expr* ch1, expr* ch2) { @@ -3027,6 +3381,11 @@ bool seq_rewriter::lt_char(expr* ch1, expr* ch2) { bool seq_rewriter::eq_char(expr* ch1, expr* ch2) { return ch1 == ch2; } +bool seq_rewriter::neq_char(expr* ch1, expr* ch2) { + unsigned u1, u2; + return u().is_const_char(ch1, u1) && + u().is_const_char(ch2, u2) && (u1 != u2); +} bool seq_rewriter::le_char(expr* ch1, expr* ch2) { return eq_char(ch1, ch2) || lt_char(ch1, ch2); } @@ -3257,10 +3616,10 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) { default: break; } - result = m_op_cache.find(k, a, b); + result = m_op_cache.find(k, a, b, nullptr); if (!result) { result = mk_der_op_rec(k, a, b); - m_op_cache.insert(k, a, b, result); + m_op_cache.insert(k, a, b, nullptr, result); } CASSERT("seq_regex", check_deriv_normal_form(result)); return result; @@ -3269,7 +3628,7 @@ expr_ref seq_rewriter::mk_der_op(decl_kind k, expr* a, expr* b) { expr_ref seq_rewriter::mk_der_compl(expr* r) { STRACE("seq_verbose", tout << "mk_der_compl: " << mk_pp(r, m()) << std::endl;); - expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr), m()); + expr_ref result(m_op_cache.find(OP_RE_COMPLEMENT, r, nullptr, nullptr), m()); if (!result) { expr* c = nullptr, * r1 = nullptr, * r2 = nullptr; if (re().is_antimorov_union(r, r1, r2)) { @@ -3285,7 +3644,7 @@ expr_ref seq_rewriter::mk_der_compl(expr* r) { } else if (BR_FAILED == mk_re_complement(r, result)) result = re().mk_complement(r); - m_op_cache.insert(OP_RE_COMPLEMENT, r, nullptr, result); + m_op_cache.insert(OP_RE_COMPLEMENT, r, nullptr, nullptr, result); } CASSERT("seq_regex", check_deriv_normal_form(result)); return result; @@ -3537,9 +3896,9 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { return mk_empty(); } } - expr* e1 = nullptr, *e2 = nullptr; + expr* e1 = nullptr, * e2 = nullptr; if (str().is_unit(r1, e1) && str().is_unit(r2, e2)) { - SASSERT(u().is_char(e1)); + SASSERT(u().is_char(e1)); // Use mk_der_cond to normalize STRACE("seq_verbose", tout << "deriv range str" << std::endl;); expr_ref p1(u().mk_le(e1, ele), m()); @@ -3775,8 +4134,10 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { expr_ref hd(m()), tl(m()); if (get_head_tail(a, hd, tl)) { - result = re().mk_in_re(tl, re().mk_derivative(hd, b)); - return BR_REWRITE2; + //result = re().mk_in_re(tl, re().mk_derivative(hd, b)); + //result = re().mk_in_re(tl, mk_derivative(hd, b)); + result = mk_in_antimirov(tl, mk_antimirov_deriv(hd, b, m().mk_true())); + return BR_DONE; } if (get_head_tail_reversed(a, hd, tl)) { @@ -3912,6 +4273,10 @@ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) { return BR_REWRITE2; } expr* a1 = nullptr, *b1 = nullptr; + if (re().is_to_re(a, a1) && re().is_to_re(b, b1)) { + result = re().mk_to_re(str().mk_concat(a1, b1)); + return BR_DONE; + } if (re().is_star(a, a1) && re().is_star(b, b1) && a1 == b1) { result = a; return BR_DONE; @@ -5275,19 +5640,20 @@ seq_rewriter::op_cache::op_cache(ast_manager& m): m_trail(m) {} -expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b) { - op_entry e(op, a, b, nullptr); +expr* seq_rewriter::op_cache::find(decl_kind op, expr* a, expr* b, expr* c) { + op_entry e(op, a, b, c, nullptr); m_table.find(e, e); return e.r; } -void seq_rewriter::op_cache::insert(decl_kind op, expr* a, expr* b, expr* r) { +void seq_rewriter::op_cache::insert(decl_kind op, expr* a, expr* b, expr* c, expr* r) { cleanup(); if (a) m_trail.push_back(a); if (b) m_trail.push_back(b); + if (c) m_trail.push_back(c); if (r) m_trail.push_back(r); - m_table.insert(op_entry(op, a, b, r)); + m_table.insert(op_entry(op, a, b, c, r)); } void seq_rewriter::op_cache::cleanup() { diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index f726a467309..d6d8b43917f 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -117,20 +117,20 @@ class seq_rewriter { class op_cache { struct op_entry { decl_kind k; - expr* a, *b, *r; - op_entry(decl_kind k, expr* a, expr* b, expr* r): k(k), a(a), b(b), r(r) {} - op_entry():k(0), a(nullptr), b(nullptr), r(nullptr) {} + expr* a, *b, *c, *r; + op_entry(decl_kind k, expr* a, expr* b, expr* c, expr* r): k(k), a(a), b(b), c(c), r(r) {} + op_entry():k(0), a(nullptr), b(nullptr), c(nullptr), r(nullptr) {} }; struct hash_entry { unsigned operator()(op_entry const& e) const { - return mk_mix(e.k, e.a ? e.a->get_id() : 0, e.b ? e.b->get_id() : 0); + return combine_hash(mk_mix(e.k, e.a ? e.a->get_id() : 0, e.b ? e.b->get_id() : 0), e.c ? e.c->get_id() : 0); } }; struct eq_entry { - bool operator()(op_entry const& a, op_entry const& b) const { - return a.k == b.k && a.a == b.a && a.b == b.b; + bool operator()(op_entry const& a, op_entry const& b) const { + return a.k == b.k && a.a == b.a && a.b == b.b && a.c == b.c; } }; @@ -143,8 +143,8 @@ class seq_rewriter { public: op_cache(ast_manager& m); - expr* find(decl_kind op, expr* a, expr* b); - void insert(decl_kind op, expr* a, expr* b, expr* r); + expr* find(decl_kind op, expr* a, expr* b, expr* c); + void insert(decl_kind op, expr* a, expr* b, expr* c, expr* r); }; seq_util m_util; @@ -208,8 +208,23 @@ class seq_rewriter { bool check_deriv_normal_form(expr* r, int level = 3); #endif + void mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref& result); + + expr_ref mk_antimirov_deriv(expr* e, expr* r, expr* path); + expr_ref mk_in_antimirov_rec(expr* s, expr* d); + expr_ref mk_in_antimirov(expr* s, expr* d); + + expr_ref mk_antimirov_deriv_intersection(expr* d1, expr* d2, expr* path); + expr_ref mk_antimirov_deriv_concat(expr* d, expr* r); + expr_ref mk_antimirov_deriv_negate(expr* d); + expr_ref mk_antimirov_deriv_union(expr* d1, expr* d2); + expr_ref mk_ite_simplify(expr* c, expr* t, expr* e); + expr_ref mk_regex_reverse(expr* r); + expr_ref mk_regex_concat(expr* r1, expr* r2); + bool lt_char(expr* ch1, expr* ch2); bool eq_char(expr* ch1, expr* ch2); + bool neq_char(expr* ch1, expr* ch2); bool le_char(expr* ch1, expr* ch2); bool pred_implies(expr* a, expr* b); bool are_complements(expr* r1, expr* r2) const; @@ -382,6 +397,7 @@ class seq_rewriter { // Expose derivative and nullability check expr_ref is_nullable(expr* r); expr_ref mk_derivative(expr* ele, expr* r); + expr_ref mk_derivative(expr* r); // heuristic elimination of element from condition that comes form a derivative. // special case optimization for conjunctions of equalities, disequalities and ranges. diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 69ec244d54a..6cfc6e1179a 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -854,6 +854,48 @@ void seq_util::str::get_concat(expr* e, expr_ref_vector& es) const { } } +/* +Returns true if s is an expression of the form (l = |u|) |u|-k or (-k)+|u| or |u|+(-k). +Also returns true and assigns k=0 and l=s if s is |u|. +*/ +bool seq_util::str::is_len_sub(expr const* s, expr*& l, expr*& u, unsigned& k) const +{ + expr* x; + rational v; + if (is_length(s, l)) { + k = 0; + return true; + } + else if (arith_util(m).is_sub(s, l, x) && is_length(l, u) && arith_util(m).is_numeral(x, v) && v.is_nonneg()) { + k = v.get_unsigned(); + return true; + } + else if (arith_util(m).is_add(s, l, x) && is_length(l, u) && arith_util(m).is_numeral(x, v) && v.is_nonpos()) { + k = (0 - v.get_int32()); + return true; + } + else if (arith_util(m).is_add(s, x, l) && is_length(l, u) && arith_util(m).is_numeral(x, v) && v.is_nonpos()) { + k = (0 - v.get_int32()); + return true; + } + else + return false; +} + +bool seq_util::str::is_unit_string(expr const* s, expr_ref& c) const { + zstring z; + expr* ch = nullptr; + if (is_string(s, z) && z.length() == 1) { + c = mk_char(z[0]); + return true; + } + else if (is_unit(s, ch)) { + c = ch; + return true; + } + return false; +} + void seq_util::str::get_concat_units(expr* e, expr_ref_vector& es) const { expr* e1, *e2; while (is_concat(e, e1, e2)) { @@ -876,8 +918,6 @@ app* seq_util::str::mk_is_empty(expr* s) const { return m.mk_eq(s, mk_empty(s->get_sort())); } - - unsigned seq_util::str::min_length(expr* s) const { SASSERT(u.is_seq(s)); unsigned result = 0; @@ -1068,6 +1108,7 @@ app* seq_util::rex::mk_epsilon(sort* seq_sort) { std::ostream& seq_util::rex::pp::compact_helper_seq(std::ostream& out, expr* s) const { SASSERT(re.u.is_seq(s)); zstring z; + expr* x, * j, * k, * l, * i, * x_; if (re.u.str.is_empty(s)) out << "()"; else if (re.u.str.is_unit(s)) @@ -1082,11 +1123,28 @@ std::ostream& seq_util::rex::pp::compact_helper_seq(std::ostream& out, expr* s) for (unsigned i = 0; i < z.length(); i++) out << (char)z[i]; } - //using braces to indicate 'full' output - //for example an uninterpreted constant X will be printed as {X} - //while a unit sequence "X" will be printed as X - //thus for example (concat "X" "Y" Z "W") where Z is uninterpreted is printed as XY{Z}W - else out << "{" << mk_pp(s, re.m) << "}"; + else if (re.u.str.is_extract(s, x, j, k)) { + rational jv, iv; + if (arith_util(re.m).is_numeral(j, jv)) { + if (arith_util(re.m).is_numeral(k, iv)) + // output X[j,k] + out << pp(re, x) << "[" << jv.get_int32() << "," << jv.get_int32() << "]"; + else if (arith_util(re.m).is_sub(k, l, i) && re.u.str.is_length(l, x_) && x == x_ && + arith_util(re.m).is_numeral(i, iv) && iv == jv) + // case X[j,|X|-j] is denoted by X[j..] + out << pp(re, x) << "[" << jv.get_int32() << "..]"; + else if (((arith_util(re.m).is_add(k, l, i) && re.u.str.is_length(l, x_)) || + (arith_util(re.m).is_add(k, i, l) && re.u.str.is_length(l, x_))) && x == x_ && + arith_util(re.m).is_numeral(i, iv) && iv.get_int32() + jv.get_int32() == 0) + // case X[j,|X|-j] is denoted by X[j..] + out << pp(re, x) << "[" << jv.get_int32() << "..]"; + else + out << pp(re, x) << "[" << jv.get_int32() << "," << pp(re, k) << "]"; + } + else + out << pp(re, x) << "[" << pp(re, j) << "," << pp(re, k) << "]"; + } + else out << mk_pp(s, re.m); return out; } @@ -1112,7 +1170,7 @@ bool seq_util::rex::pp::can_skip_parenth(expr* r) const { Specialize output for a unit sequence converting to visible ASCII characters if possible. */ std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const { - expr* e; + expr* e, * i; unsigned n = 0; if ((re.u.str.is_unit(s, e) && re.u.is_const_char(e, n)) || re.u.is_const_char(s, n)) { char c = (char)n; @@ -1151,17 +1209,21 @@ std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const { else out << "\\u" << std::hex << n; } + else if (re.u.str.is_nth_i(s, e, i)) { + out << mk_pp(e, re.m) << "[" << mk_pp(i, re.m) << "]"; + } else - out << "{" << mk_pp(s, re.m) << "}"; + out << mk_pp(s, re.m); return out; } /* - Pretty prints the regex r into the out stream + Pretty prints the regex r into the ostream out */ std::ostream& seq_util::rex::pp::display(std::ostream& out) const { expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr; unsigned lo = 0, hi = 0; + rational v; if (re.u.is_char(e)) return seq_unit(out, e); else if (re.u.is_seq(e)) @@ -1172,28 +1234,28 @@ std::ostream& seq_util::rex::pp::display(std::ostream& out) const { return out << ".*"; else if (re.is_to_re(e, s)) return compact_helper_seq(out, s); - else if (re.is_range(e, s, s2)) + else if (re.is_range(e, s, s2)) return compact_helper_range(out, s, s2); else if (re.is_epsilon(e)) return out << "()"; else if (re.is_empty(e)) return out << "[]"; - else if (re.is_concat(e, r1, r2)) + else if (re.is_concat(e, r1, r2)) return out << pp(re, r1) << pp(re, r2); - else if (re.is_union(e, r1, r2)) + else if (re.is_antimorov_union(e, r1, r2) || re.is_union(e, r1, r2)) return out << "(" << pp(re, r1) << "|" << pp(re, r2) << ")"; - else if (re.is_intersection(e, r1, r2)) + else if (re.is_intersection(e, r1, r2)) return out << "(" << pp(re, r1) << "&" /*(html_encode ? ")&(" : ")&(")*/ << pp(re, r2) << ")"; else if (re.is_complement(e, r1)) { if (can_skip_parenth(r1)) return out << "~" << pp(re, r1); - else + else return out << "~(" << pp(re, r1) << ")"; } else if (re.is_plus(e, r1)) { - if (can_skip_parenth(r1)) + if (can_skip_parenth(r1)) return out << pp(re, r1) << "+"; - else + else return out << "(" << pp(re, r1) << ")+"; } else if (re.is_star(e, r1)) { @@ -1205,14 +1267,14 @@ std::ostream& seq_util::rex::pp::display(std::ostream& out) const { else if (re.is_loop(e, r1, lo)) { if (can_skip_parenth(r1)) return out << pp(re, r1) << "{" << lo << ",}"; - else + else return out << "(" << pp(re, r1) << "){" << lo << ",}"; } else if (re.is_loop(e, r1, lo, hi)) { if (can_skip_parenth(r1)) { if (lo == hi) return out << pp(re, r1) << "{" << lo << "}"; - else + else return out << pp(re, r1) << "{" << lo << "," << hi << "}"; } else { @@ -1222,21 +1284,45 @@ std::ostream& seq_util::rex::pp::display(std::ostream& out) const { return out << "(" << pp(re, r1) << "){" << lo << "," << hi << "}"; } } - else if (re.is_diff(e, r1, r2)) + else if (re.is_diff(e, r1, r2)) return out << "(" << pp(re, r1) << ")\\(" << pp(re, r2) << ")"; - else if (re.m.is_ite(e, s, r1, r2)) - return out << "if(" << mk_pp(s, re.m) << "," << pp(re, r1) << "," << pp(re, r2) << ")"; + else if (re.m.is_ite(e, s, r1, r2)) + return out << "(if " << pp(re, s) << " then " << pp(re, r1) << " else " << pp(re, r2) << ")"; else if (re.is_opt(e, r1)) { - if (can_skip_parenth(r1)) + if (can_skip_parenth(r1)) return out << pp(re, r1) << "?"; - else + else return out << "(" << pp(re, r1) << ")?"; } - else if (re.is_reverse(e, r1)) - return out << "reverse(" << pp(re, r1) << ")"; + else if (re.is_reverse(e, r1)) + return out << "rev(" << pp(re, r1) << ")"; + else if (re.m.is_eq(e, r1, r2)) + return out << "(" << pp(re, r1) << "=" << pp(re, r2) << ")"; + else if (re.m.is_not(e, r1)) + return out << "!" << pp(re, r1); + else if (re.m.is_and(e)) { + out << "("; + for (unsigned i = 0; i < to_app(e)->get_num_args(); i++) + out << (i > 0 ? " and " : "") << pp(re, to_app(e)->get_arg(i)); + out << ")"; + } + else if (re.m.is_or(e)) { + out << "("; + for (unsigned i = 0; i < to_app(e)->get_num_args(); i++) + out << (i > 0 ? " or " : "") << pp(re, to_app(e)->get_arg(i)); + out << ")"; + } + else if (re.u.str.is_in_re(e, s, r1)) + return out << "(" << pp(re, s) << " in " << pp(re, r1) << ")"; + //else if (arith_util(re.m).is_add(e, s, r1) && re.u.str.is_length(r1, r2) && arith_util(re.m).is_numeral(s, v) && v.is_neg()) + // return out << "(|" << pp(re, r2) << "|" << "-" << (0 - v.get_int32()) << ")"; + //else if (arith_util(re.m).is_add(e, r1, s) && re.u.str.is_length(r1, r2) && arith_util(re.m).is_numeral(s, v) && v.is_neg()) + // return out << "(|" << pp(re, r2) << "|" << "-" << (0 - v.get_int32()) << ")"; + //else if (arith_util(re.m).is_sub(e, r1, s) && re.u.str.is_length(r1, r2) && arith_util(re.m).is_numeral(s, v) && v.is_pos()) + // return out << "(|" << pp(re, r2) << "|" << "-" << v.get_int32() << ")"; else - // Else: derivative or is_of_pred - return out << "{" << mk_pp(e, re.m) << "}"; + // for all remaining cases use the default pretty printer + return out << mk_pp(e, re.m); } /* diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index f9589fee695..a985c7520c0 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -350,6 +350,13 @@ class seq_util { bool is_from_code(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_FROM_CODE); } bool is_to_code(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_TO_CODE); } + bool is_len_sub(expr const* n, expr*& l, expr*& u, unsigned& k) const; + + /* + tests if s is a single character string(c) or a unit (c) + */ + bool is_unit_string(expr const* s, expr_ref& c) const; + bool is_string_term(expr const * n) const { return u.is_string(n->get_sort()); } @@ -530,7 +537,20 @@ class seq_util { bool is_loop(expr const* n) const { return is_app_of(n, m_fid, OP_RE_LOOP); } bool is_empty(expr const* n) const { return is_app_of(n, m_fid, OP_RE_EMPTY_SET); } bool is_full_char(expr const* n) const { return is_app_of(n, m_fid, OP_RE_FULL_CHAR_SET); } - bool is_full_seq(expr const* n) const { return is_app_of(n, m_fid, OP_RE_FULL_SEQ_SET); } + bool is_full_seq(expr const* n) const { + expr* s; + return is_app_of(n, m_fid, OP_RE_FULL_SEQ_SET) || (is_star(n, s) && is_full_char(s)); + } + bool is_dot_plus(expr const* n) const { + expr* s, * t; + if (is_plus(n, s) && is_full_char(s)) + return true; + if (is_concat(n, s, t)) { + if ((is_full_char(s) && is_full_seq(t)) || (is_full_char(t) && is_full_seq(s))) + return true; + } + return false; + } bool is_of_pred(expr const* n) const { return is_app_of(n, m_fid, OP_RE_OF_PRED); } bool is_reverse(expr const* n) const { return is_app_of(n, m_fid, OP_RE_REVERSE); } bool is_derivative(expr const* n) const { return is_app_of(n, m_fid, OP_RE_DERIVATIVE); } diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 80282c3f895..160d0e33372 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -12,6 +12,7 @@ Module Name: Author: Nikolaj Bjorner (nbjorner) 2020-5-22 + Margus Veanes 2021 --*/ From 1fdb655323bbb6e8e656be8e6ee5165665f6625d Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Wed, 18 Aug 2021 16:22:08 -0700 Subject: [PATCH 02/16] some edit --- src/ast/rewriter/seq_rewriter.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 54170d24558..78583db1ac0 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2992,10 +2992,15 @@ bool seq_rewriter::check_deriv_normal_form(expr* r, int level) { #endif /* - Memoized, recursive implementation of the symbolic derivative of r as a transition regex with . + Memoized, recursive implementation of the symbolic derivative of r as a transition regex wrt (:var 0). */ expr_ref seq_rewriter::mk_derivative(expr* r) { - return mk_antimirov_deriv(ele, r, m().mk_true()); + sort* seq_sort = nullptr, * elem_sort = nullptr; + VERIFY(u().is_re(r, seq_sort)); + u().is_seq(seq_sort, elem_sort); + // Use the canonical variable (:var 0) for derivation + // essentially representing the transition regex \lambda x.deriv(x,r) + return mk_antimirov_deriv(m().mk_var(0, elem_sort), r, m().mk_true()); } /* Memoized, recursive implementation of the symbolic derivative of r. From 60f39b09c6ef4df5ba094a0753732313608105b8 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Fri, 20 Aug 2021 19:14:35 -0700 Subject: [PATCH 03/16] further improvements in derivative code --- src/ast/rewriter/seq_rewriter.cpp | 88 ++++++++++++++++++------------- src/ast/rewriter/seq_rewriter.h | 13 ++++- src/ast/seq_decl_plugin.cpp | 68 ++++++++++++------------ src/ast/seq_decl_plugin.h | 2 +- src/smt/seq_regex.cpp | 37 ++++++------- src/smt/seq_regex.h | 2 +- 6 files changed, 118 insertions(+), 92 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 78583db1ac0..fc7a0d24a8d 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2896,7 +2896,8 @@ br_status seq_rewriter::mk_re_reverse(expr* r, expr_ref& result) { br_status seq_rewriter::mk_re_derivative(expr* ele, expr* r, expr_ref& result) { result = mk_derivative(ele, r); // TBD: we may even declare BR_DONE here and potentially miss some simplifications - return re().is_derivative(result) ? BR_DONE : BR_REWRITE_FULL; + // return re().is_derivative(result) ? BR_DONE : BR_REWRITE_FULL; + return BR_DONE; } /* @@ -2991,20 +2992,13 @@ bool seq_rewriter::check_deriv_normal_form(expr* r, int level) { } #endif -/* - Memoized, recursive implementation of the symbolic derivative of r as a transition regex wrt (:var 0). -*/ expr_ref seq_rewriter::mk_derivative(expr* r) { - sort* seq_sort = nullptr, * elem_sort = nullptr; - VERIFY(u().is_re(r, seq_sort)); - u().is_seq(seq_sort, elem_sort); - // Use the canonical variable (:var 0) for derivation - // essentially representing the transition regex \lambda x.deriv(x,r) - return mk_antimirov_deriv(m().mk_var(0, elem_sort), r, m().mk_true()); + sort* seq_sort = nullptr, * ele_sort = nullptr; + VERIFY(m_util.is_re(r, seq_sort)); + VERIFY(m_util.is_seq(seq_sort, ele_sort)); + return mk_antimirov_deriv(m().mk_var(0, ele_sort), r, m().mk_true()); } -/* - Memoized, recursive implementation of the symbolic derivative of r. -*/ + expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) { /*STRACE("seq_verbose", tout << "derivative: " << mk_pp(ele, m()) << "," << mk_pp(r, m()) << std::endl;); @@ -3113,10 +3107,8 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref result = mk_antimirov_deriv_union(c1, mk_ite_simplify(r1nullable, mk_antimirov_deriv(e, r2, path), nothing())); } else if (m().is_ite(r, c, r1, r2)) { - m_br.mk_and(path, c, c1); - m_br.mk_and(path, m_br.mk_not(c), c2); - // TODO: try to simplify c1 and c2 further to false when possible - // by checking cases when path is inconsistent with character constraint c + c1 = simplify_path(m().mk_and(c, path)); + c2 = simplify_path(m().mk_and(m().mk_not(c), path)); if (m().is_false(c1)) result = mk_antimirov_deriv(e, r2, c2); else if (m().is_false(c2)) @@ -3131,8 +3123,8 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref SASSERT(u().is_char(c1)); SASSERT(u().is_char(c2)); // range represents c1 <= e <= c2 - m_br.mk_and(u().mk_le(c1, e), u().mk_le(e, c2), range); - m_br.mk_and(path, range, psi); + range = m().mk_and(u().mk_le(c1, e), u().mk_le(e, c2)); + psi = simplify_path(m().mk_and(path, range)); if (m().is_false(psi)) result = nothing(); else @@ -3173,30 +3165,35 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref } expr_ref seq_rewriter::mk_antimirov_deriv_intersection(expr* d1, expr* d2, expr* path) { - auto nothing = [&]() { return expr_ref(re().mk_empty(d1->get_sort()), m()); }; + sort* seq_sort = nullptr, * ele_sort = nullptr; + VERIFY(m_util.is_re(d1, seq_sort)); + VERIFY(m_util.is_seq(seq_sort, ele_sort)); expr_ref result(m()); - if (d1 == d2) { + expr* c, * a, * b; + if (d1 == d2 || re().is_full_seq(d2) || re().is_empty(d1)) result = d1; - return result; - } - expr* c1, * t1, * e1; - if (m().is_ite(d1, c1, t1, e1)) { - expr_ref path_and_c1(m()); - expr_ref path_and_notc1(m()); - m_br.mk_and(path, c1, path_and_c1); - m_br.mk_and(path, m_br.mk_not(c1), path_and_notc1); - - if (m().is_false(path_and_c1)) - result = mk_antimirov_deriv_intersection(e1, d2, path); - else if (m().is_false(path_and_notc1)) - result = mk_antimirov_deriv_intersection(t1, d2, path); + else if (re().is_full_seq(d1) || re().is_empty(d2)) + result = d2; + else if (m().is_ite(d1, c, a, b)) { + expr_ref path_and_c(simplify_path(m().mk_and(path, c)), m()); + expr_ref path_and_notc(simplify_path(m().mk_and(path, m().mk_not(c))), m()); + if (m().is_false(path_and_c)) + result = mk_antimirov_deriv_intersection(b, d2, path); + else if (m().is_false(path_and_notc)) + result = mk_antimirov_deriv_intersection(a, d2, path); else - result = m().mk_ite(c1, mk_antimirov_deriv_intersection(t1, d2, path_and_c1), - mk_antimirov_deriv_intersection(e1, d2, path_and_notc1)); + result = m().mk_ite(c, mk_antimirov_deriv_intersection(a, d2, path_and_c), + mk_antimirov_deriv_intersection(b, d2, path_and_notc)); } else if (m().is_ite(d2)) // swap d1 and d2 result = mk_antimirov_deriv_intersection(d2, d1, path); + else if (re().is_union(d1, a, b)) + // distribute intersection over the union in d1 + result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(a, d2, path), mk_antimirov_deriv_intersection(b, d2, path)); + else if (re().is_union(d2, a, b)) + // distribute intersection over the union in d2 + result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(d1, a, path), mk_antimirov_deriv_intersection(d1, b, path)); else // in all other cases create the intersection regex result = re().mk_inter(d1, d2); @@ -3358,6 +3355,25 @@ expr_ref seq_rewriter::mk_in_antimirov_rec(expr* s, expr* d) { return result; } +/* +path is typically a conjunction of (negated) character equations or constraints that can potentially be simplified +the first element of each equation is assumed to be the element parameter, for example x = (:var 0), +for example a constraint x='a' & x='b' is simplified to false +*/ +expr_ref seq_rewriter::simplify_path(expr* path) { + //TODO: more systematic simplifications + expr_ref result(path, m()); + expr* h = nullptr, * t = nullptr, * lhs = nullptr, * rhs = nullptr, * h1 = nullptr; + if (m().is_and(path, h, t)) { + if (m().is_true(h)) + result = t; + else if (m().is_eq(h, lhs, rhs) || m().is_not(h, h1) && m().is_eq(h1, lhs, rhs)) + elim_condition(lhs, result); + } + return result; +} + + expr_ref seq_rewriter::mk_der_antimorov_union(expr* r1, expr* r2) { return mk_der_op(_OP_RE_ANTIMOROV_UNION, r1, r2); } diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index d6d8b43917f..45746a01296 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -222,6 +222,8 @@ class seq_rewriter { expr_ref mk_regex_reverse(expr* r); expr_ref mk_regex_concat(expr* r1, expr* r2); + expr_ref simplify_path(expr* path); + bool lt_char(expr* ch1, expr* ch2); bool eq_char(expr* ch1, expr* ch2); bool neq_char(expr* ch1, expr* ch2); @@ -394,9 +396,18 @@ class seq_rewriter { void add_seqs(expr_ref_vector const& ls, expr_ref_vector const& rs, expr_ref_pair_vector& new_eqs); - // Expose derivative and nullability check + /* + create the nullability check for r + */ expr_ref is_nullable(expr* r); + /* + make the derivative of r wrt the given element ele + */ expr_ref mk_derivative(expr* ele, expr* r); + /* + make the derivative of r wrt the canonical variable v0 = (:var 0), + for example mk_derivative(a+) = (if (v0 = 'a') then a* else []) + */ expr_ref mk_derivative(expr* r); // heuristic elimination of element from condition that comes form a derivative. diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 6cfc6e1179a..f74b9954071 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1128,21 +1128,21 @@ std::ostream& seq_util::rex::pp::compact_helper_seq(std::ostream& out, expr* s) if (arith_util(re.m).is_numeral(j, jv)) { if (arith_util(re.m).is_numeral(k, iv)) // output X[j,k] - out << pp(re, x) << "[" << jv.get_int32() << "," << jv.get_int32() << "]"; + out << pp(re, x, html_encode) << "[" << jv.get_int32() << "," << jv.get_int32() << "]"; else if (arith_util(re.m).is_sub(k, l, i) && re.u.str.is_length(l, x_) && x == x_ && arith_util(re.m).is_numeral(i, iv) && iv == jv) // case X[j,|X|-j] is denoted by X[j..] - out << pp(re, x) << "[" << jv.get_int32() << "..]"; + out << pp(re, x, html_encode) << "[" << jv.get_int32() << "..]"; else if (((arith_util(re.m).is_add(k, l, i) && re.u.str.is_length(l, x_)) || (arith_util(re.m).is_add(k, i, l) && re.u.str.is_length(l, x_))) && x == x_ && arith_util(re.m).is_numeral(i, iv) && iv.get_int32() + jv.get_int32() == 0) // case X[j,|X|-j] is denoted by X[j..] - out << pp(re, x) << "[" << jv.get_int32() << "..]"; + out << pp(re, x, html_encode) << "[" << jv.get_int32() << "..]"; else - out << pp(re, x) << "[" << jv.get_int32() << "," << pp(re, k) << "]"; + out << pp(re, x, html_encode) << "[" << jv.get_int32() << "," << pp(re, k, html_encode) << "]"; } else - out << pp(re, x) << "[" << pp(re, j) << "," << pp(re, k) << "]"; + out << pp(re, x, html_encode) << "[" << pp(re, j, html_encode) << "," << pp(re, k, html_encode) << "]"; } else out << mk_pp(s, re.m); return out; @@ -1181,9 +1181,11 @@ std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const { else if (c == '\f') out << "\\f"; else if (c == ' ') - out << "\\s"; + out << " "; else if (c == '(' || c == ')' || c == '{' || c == '}' || c == '[' || c == ']' || c == '.' || c == '\\') out << "\\" << c; + else if ('A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c == '@' || c == ';' || c == ':' || c == '/' || c == '#' || c == '$') + out << c; else if (32 < n && n < 127) { if (html_encode) { if (c == '<') @@ -1241,79 +1243,79 @@ std::ostream& seq_util::rex::pp::display(std::ostream& out) const { else if (re.is_empty(e)) return out << "[]"; else if (re.is_concat(e, r1, r2)) - return out << pp(re, r1) << pp(re, r2); + return out << pp(re, r1, html_encode) << pp(re, r2, html_encode); else if (re.is_antimorov_union(e, r1, r2) || re.is_union(e, r1, r2)) - return out << "(" << pp(re, r1) << "|" << pp(re, r2) << ")"; + return out << "(" << pp(re, r1, html_encode) << "|" << pp(re, r2, html_encode) << ")"; else if (re.is_intersection(e, r1, r2)) - return out << "(" << pp(re, r1) << "&" /*(html_encode ? ")&(" : ")&(")*/ << pp(re, r2) << ")"; + return out << "(" << pp(re, r1, html_encode) << (html_encode ? "&" : "&") << pp(re, r2, html_encode) << ")"; else if (re.is_complement(e, r1)) { if (can_skip_parenth(r1)) - return out << "~" << pp(re, r1); + return out << "~" << pp(re, r1, html_encode); else - return out << "~(" << pp(re, r1) << ")"; + return out << "~(" << pp(re, r1, html_encode) << ")"; } else if (re.is_plus(e, r1)) { if (can_skip_parenth(r1)) - return out << pp(re, r1) << "+"; + return out << pp(re, r1, html_encode) << "+"; else - return out << "(" << pp(re, r1) << ")+"; + return out << "(" << pp(re, r1, html_encode) << ")+"; } else if (re.is_star(e, r1)) { if (can_skip_parenth(r1)) - return out << pp(re, r1) << "*"; + return out << pp(re, r1, html_encode) << "*"; else - return out << "(" << pp(re, r1) << ")*"; + return out << "(" << pp(re, r1, html_encode) << ")*"; } else if (re.is_loop(e, r1, lo)) { if (can_skip_parenth(r1)) - return out << pp(re, r1) << "{" << lo << ",}"; + return out << pp(re, r1, html_encode) << "{" << lo << ",}"; else - return out << "(" << pp(re, r1) << "){" << lo << ",}"; + return out << "(" << pp(re, r1, html_encode) << "){" << lo << ",}"; } else if (re.is_loop(e, r1, lo, hi)) { if (can_skip_parenth(r1)) { if (lo == hi) - return out << pp(re, r1) << "{" << lo << "}"; + return out << pp(re, r1, html_encode) << "{" << lo << "}"; else - return out << pp(re, r1) << "{" << lo << "," << hi << "}"; + return out << pp(re, r1, html_encode) << "{" << lo << "," << hi << "}"; } else { if (lo == hi) - return out << "(" << pp(re, r1) << "){" << lo << "}"; + return out << "(" << pp(re, r1, html_encode) << "){" << lo << "}"; else - return out << "(" << pp(re, r1) << "){" << lo << "," << hi << "}"; + return out << "(" << pp(re, r1, html_encode) << "){" << lo << "," << hi << "}"; } } else if (re.is_diff(e, r1, r2)) - return out << "(" << pp(re, r1) << ")\\(" << pp(re, r2) << ")"; + return out << "(" << pp(re, r1, html_encode) << ")\\(" << pp(re, r2, html_encode) << ")"; else if (re.m.is_ite(e, s, r1, r2)) - return out << "(if " << pp(re, s) << " then " << pp(re, r1) << " else " << pp(re, r2) << ")"; + return out << "(if " << pp(re, s, html_encode) << " then " << pp(re, r1, html_encode) << " else " << pp(re, r2, html_encode) << ")"; else if (re.is_opt(e, r1)) { if (can_skip_parenth(r1)) - return out << pp(re, r1) << "?"; + return out << pp(re, r1, html_encode) << "?"; else - return out << "(" << pp(re, r1) << ")?"; + return out << "(" << pp(re, r1, html_encode) << ")?"; } else if (re.is_reverse(e, r1)) - return out << "rev(" << pp(re, r1) << ")"; + return out << "rev(" << pp(re, r1, html_encode) << ")"; else if (re.m.is_eq(e, r1, r2)) - return out << "(" << pp(re, r1) << "=" << pp(re, r2) << ")"; + return out << "(" << pp(re, r1, html_encode) << "=" << pp(re, r2, html_encode) << ")"; else if (re.m.is_not(e, r1)) - return out << "!" << pp(re, r1); + return out << "!" << pp(re, r1, html_encode); else if (re.m.is_and(e)) { out << "("; for (unsigned i = 0; i < to_app(e)->get_num_args(); i++) - out << (i > 0 ? " and " : "") << pp(re, to_app(e)->get_arg(i)); + out << (i > 0 ? " and " : "") << pp(re, to_app(e)->get_arg(i), html_encode); out << ")"; } else if (re.m.is_or(e)) { out << "("; for (unsigned i = 0; i < to_app(e)->get_num_args(); i++) - out << (i > 0 ? " or " : "") << pp(re, to_app(e)->get_arg(i)); + out << (i > 0 ? " or " : "") << pp(re, to_app(e)->get_arg(i), html_encode); out << ")"; } else if (re.u.str.is_in_re(e, s, r1)) - return out << "(" << pp(re, s) << " in " << pp(re, r1) << ")"; + return out << "(" << pp(re, s, html_encode) << " in " << pp(re, r1, html_encode) << ")"; //else if (arith_util(re.m).is_add(e, s, r1) && re.u.str.is_length(r1, r2) && arith_util(re.m).is_numeral(s, v) && v.is_neg()) // return out << "(|" << pp(re, r2) << "|" << "-" << (0 - v.get_int32()) << ")"; //else if (arith_util(re.m).is_add(e, r1, s) && re.u.str.is_length(r1, r2) && arith_util(re.m).is_numeral(s, v) && v.is_neg()) @@ -1330,7 +1332,7 @@ std::ostream& seq_util::rex::pp::display(std::ostream& out) const { */ std::string seq_util::rex::to_str(expr* r) const { std::ostringstream out; - out << pp(u.re, r); + out << pp(u.re, r, false); return out.str(); } @@ -1376,7 +1378,7 @@ seq_util::rex::info seq_util::rex::get_info_rec(expr* e) const { else result = mk_info_rec(to_app(e)); m_infos.setx(e->get_id(), result, invalid_info); - STRACE("re_info", tout << "compute_info(" << pp(u.re, e) << ")=" << result << std::endl;); + STRACE("re_info", tout << "compute_info(" << pp(u.re, e, false) << ")=" << result << std::endl;); return result; } diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index a985c7520c0..6cb3fe9f20f 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -590,7 +590,7 @@ class seq_util { std::ostream& compact_helper_range(std::ostream& out, expr* s1, expr* s2) const; public: - pp(seq_util::rex& r, expr* e, bool html = false) : re(r), e(e), html_encode(html) {} + pp(seq_util::rex& r, expr* e, bool html) : re(r), e(e), html_encode(html) {} std::ostream& display(std::ostream&) const; }; }; diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 160d0e33372..5657f109d30 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -460,12 +460,10 @@ namespace smt { expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) { STRACE("seq_regex", tout << "derivative(" << mk_pp(hd, m) << "): " << mk_pp(r, m) << std::endl;); - // Use canonical variable for head - expr_ref hd_canon(m.mk_var(0, hd->get_sort()), m); - expr_ref result(re().mk_derivative(hd_canon, r), m); - rewrite(result); + // Use canonical variable (:var 0) for the derivative element + expr_ref result(seq_rw().mk_derivative(r), m); - // Substitute with real head + // Substitute (:var 0) with the actual element var_subst subst(m); expr_ref_vector sub(m); sub.push_back(hd); @@ -688,27 +686,26 @@ namespace smt { } /* - Return a list of all leaves in the derivative of a regex r, + Return a list of all target regexes in the derivative of a regex r, ignoring the conditions along each path. - Warning: Although the derivative - normal form tries to eliminate unsat condition paths, one cannot - assume that the path to each leaf is satisfiable in general - (e.g. when regexes are created using re.pred). - So not all results may correspond to satisfiable predicates. - It is OK to rely on the results being satisfiable for completeness, - but not soundness. + The derivative + construction tries to eliminate unsat condition paths, but one cannot + assume that the path to each leaf is satisfiable in general. + + In general, some of the target regexes in results may be unreachable from r. */ - void seq_regex::get_all_derivatives(expr* r, expr_ref_vector& results) { + void seq_regex::get_targets(expr* r, expr_ref_vector& results) { // Get derivative - sort* seq_sort = nullptr; + /*sort* seq_sort = nullptr; VERIFY(u().is_re(r, seq_sort)); expr_ref n(m.mk_fresh_const("re.char", seq_sort), m); expr_ref hd = mk_first(r, n); expr_ref d(m); - d = derivative_wrapper(hd, r); + d = derivative_wrapper(hd, r);*/ + expr_ref d(seq_rw().mk_derivative(r), m); - // DFS + // use DFS to collect all the targets (leaf regexes) in d. vector to_visit; to_visit.push_back(d); obj_map visited; // set (bool is used as a unit type) @@ -885,7 +882,7 @@ namespace smt { STRACE("state_graph", if (!m_state_graph.is_seen(r_id)) - tout << std::endl << "state(" << r_id << ") = " << seq_util::rex::pp(re(), r) << std::endl << "info(" << r_id << ") = " << re().get_info(r) << std::endl;); + tout << std::endl << "state(" << r_id << ") = " << re().to_str(r) << std::endl << "info(" << r_id << ") = " << re().get_info(r) << std::endl;); // Add state m_state_graph.add_state(r_id); STRACE("seq_regex", tout << "Updating state graph for regex " @@ -901,14 +898,14 @@ namespace smt { expr_ref_vector derivatives(m); STRACE("seq_regex_verbose", tout << "getting all derivs: " << r_id << " " << std::endl;); - get_all_derivatives(r, derivatives); + get_targets(r, derivatives); for (auto const& dr: derivatives) { unsigned dr_id = get_state_id(dr); STRACE("seq_regex_verbose", tout << std::endl << " traversing deriv: " << dr_id << " ";); STRACE("state_graph", if (!m_state_graph.is_seen(dr_id)) - tout << "state(" << dr_id << ") = " << seq_util::rex::pp(re(), dr) << std::endl << "info(" << dr_id << ") = " << re().get_info(dr) << std::endl;); + tout << "state(" << dr_id << ") = " << re().to_str(dr) << std::endl << "info(" << dr_id << ") = " << re().get_info(dr) << std::endl;); // Add state m_state_graph.add_state(dr_id); bool maybecycle = can_be_in_cycle(r, dr); diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 8a2cf68f1bc..eee57375d9e 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -163,7 +163,7 @@ namespace smt { // Various support for unfolding derivative expressions that are // returned by derivative_wrapper expr_ref mk_deriv_accept(expr* s, unsigned i, expr* r); - void get_all_derivatives(expr* r, expr_ref_vector& results); + void get_targets(expr* r, expr_ref_vector& results); void get_cofactors(expr* r, expr_ref_pair_vector& result); void get_cofactors_rec(expr* r, expr_ref_vector& conds, expr_ref_pair_vector& result); From c4275df322da3950a3d4e7ff238e9d12d3f36efb Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Mon, 23 Aug 2021 02:07:09 -0700 Subject: [PATCH 04/16] more deriv code edits and re::to_str update --- src/ast/rewriter/seq_rewriter.cpp | 31 ++-- src/ast/rewriter/seq_rewriter.h | 1 - src/ast/seq_decl_plugin.cpp | 287 +++++++++++++++++++----------- src/ast/seq_decl_plugin.h | 40 ++++- src/smt/seq_regex.cpp | 154 ++++++++-------- src/smt/seq_regex.h | 5 +- 6 files changed, 317 insertions(+), 201 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index fc7a0d24a8d..25d4ac035ee 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -3057,7 +3057,7 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref else if (neq_char(e, h)) result = nothing(); else - result = mk_ite_simplify(m().mk_eq(e, h), re().mk_to_re(t), nothing()); + result = re().mk_ite_simplify(m().mk_eq(e, h), re().mk_to_re(t), nothing()); } else { @@ -3081,7 +3081,7 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref result = nothing(); else // observe that the precondition |r1|>0 of mk_seq_rest is implied by c1 - result = mk_ite_simplify(c1, re().mk_reverse(mk_seq_butlast(r1)), nothing()); + result = re().mk_ite_simplify(c1, re().mk_reverse(mk_seq_butlast(r1)), nothing()); } else { result = mk_regex_reverse(r2); @@ -3104,7 +3104,7 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref else // D(e,r1)r2|(ite (r1nullable) (D(e,r2)) []) // observe that (mk_ite_simplify(true, D(e,r2), []) = D(e,r2) - result = mk_antimirov_deriv_union(c1, mk_ite_simplify(r1nullable, mk_antimirov_deriv(e, r2, path), nothing())); + result = mk_antimirov_deriv_union(c1, re().mk_ite_simplify(r1nullable, mk_antimirov_deriv(e, r2, path), nothing())); } else if (m().is_ite(r, c, r1, r2)) { c1 = simplify_path(m().mk_and(c, path)); @@ -3114,7 +3114,7 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref else if (m().is_false(c2)) result = mk_antimirov_deriv(e, r1, c1); else - result = mk_ite_simplify(c, mk_antimirov_deriv(e, r1, c1), mk_antimirov_deriv(e, r2, c2)); + result = re().mk_ite_simplify(c, mk_antimirov_deriv(e, r1, c1), mk_antimirov_deriv(e, r2, c2)); } else if (re().is_range(r, r1, r2)) { expr_ref range(m()); @@ -3129,7 +3129,7 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref result = nothing(); else // D(e,c1..c2) = if (c1<=e<=c2) then () else [] - result = mk_ite_simplify(range, epsilon(), nothing()); + result = re().mk_ite_simplify(range, epsilon(), nothing()); } else result = nothing(); @@ -3196,7 +3196,8 @@ expr_ref seq_rewriter::mk_antimirov_deriv_intersection(expr* d1, expr* d2, expr* result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(d1, a, path), mk_antimirov_deriv_intersection(d1, b, path)); else // in all other cases create the intersection regex - result = re().mk_inter(d1, d2); + // TODO: flatten, order and merge d1 and d2 to maintain equality under similarity + result = (d1->get_id() <= d2->get_id() ? re().mk_inter(d1, d2) : re().mk_inter(d2, d1)); return result; } @@ -3254,20 +3255,8 @@ expr_ref seq_rewriter::mk_antimirov_deriv_union(expr* d1, expr* d2) else if (re().is_dot_plus(d2) && re().get_info(d1).min_length > 0) result = d2; else - //TODO: flatten and order the union - result = re().mk_union(d1, d2); - return result; -} - -expr_ref seq_rewriter::mk_ite_simplify(expr* c, expr* t, expr* e) -{ - expr_ref result(m()); - if (m().is_true(c) || t == e) - result = t; - else if (m().is_false(c)) - result = e; - else - result = m().mk_ite(c, t, e); + // TODO: flatten, order and merge d1 and d2 to maintain equality under similarity + result = (d1->get_id() <= d2->get_id() ? re().mk_union(d1, d2) : re().mk_union(d2, d1)); return result; } @@ -3347,7 +3336,7 @@ expr_ref seq_rewriter::mk_in_antimirov_rec(expr* s, expr* d) { // s in [] <==> false, also: s in () <==> false when |s|>0 result = m().mk_false(); else if (m().is_ite(d, c, d1, d2)) - m_br.mk_ite(c, mk_in_antimirov_rec(s, d1), mk_in_antimirov_rec(s, d2), result); + result = re().mk_ite_simplify(c, mk_in_antimirov_rec(s, d1), mk_in_antimirov_rec(s, d2)); else if (re().is_union(d, d1, d2)) m_br.mk_or(mk_in_antimirov_rec(s, d1), mk_in_antimirov_rec(s, d2), result); else diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 45746a01296..59aac3ffda8 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -218,7 +218,6 @@ class seq_rewriter { expr_ref mk_antimirov_deriv_concat(expr* d, expr* r); expr_ref mk_antimirov_deriv_negate(expr* d); expr_ref mk_antimirov_deriv_union(expr* d1, expr* d2); - expr_ref mk_ite_simplify(expr* c, expr* t, expr* e); expr_ref mk_regex_reverse(expr* r); expr_ref mk_regex_concat(expr* r1, expr* r2); diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index f74b9954071..bd0365599fe 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1105,19 +1105,19 @@ app* seq_util::rex::mk_epsilon(sort* seq_sort) { /* Produces compact view of concrete concatenations such as (abcd). */ -std::ostream& seq_util::rex::pp::compact_helper_seq(std::ostream& out, expr* s) const { +void seq_util::rex::pp::print_seq(std::ostream& out, expr* s) const { SASSERT(re.u.is_seq(s)); zstring z; expr* x, * j, * k, * l, * i, * x_; if (re.u.str.is_empty(s)) out << "()"; else if (re.u.str.is_unit(s)) - seq_unit(out, s); + print_unit(out, s); else if (re.u.str.is_concat(s)) { expr_ref_vector es(re.m); re.u.str.get_concat(s, es); for (expr* e : es) - compact_helper_seq(out, e); + print_seq(out, e); } else if (re.u.str.is_string(s, z)) { for (unsigned i = 0; i < z.length(); i++) @@ -1125,37 +1125,50 @@ std::ostream& seq_util::rex::pp::compact_helper_seq(std::ostream& out, expr* s) } else if (re.u.str.is_extract(s, x, j, k)) { rational jv, iv; + print(out, x); if (arith_util(re.m).is_numeral(j, jv)) { - if (arith_util(re.m).is_numeral(k, iv)) + if (arith_util(re.m).is_numeral(k, iv)) { // output X[j,k] - out << pp(re, x, html_encode) << "[" << jv.get_int32() << "," << jv.get_int32() << "]"; + out << "[" << jv.get_int32() << "," << jv.get_int32() << "]"; + } else if (arith_util(re.m).is_sub(k, l, i) && re.u.str.is_length(l, x_) && x == x_ && - arith_util(re.m).is_numeral(i, iv) && iv == jv) + arith_util(re.m).is_numeral(i, iv) && iv == jv) { // case X[j,|X|-j] is denoted by X[j..] - out << pp(re, x, html_encode) << "[" << jv.get_int32() << "..]"; + out << "[" << jv.get_int32() << "..]"; + } else if (((arith_util(re.m).is_add(k, l, i) && re.u.str.is_length(l, x_)) || - (arith_util(re.m).is_add(k, i, l) && re.u.str.is_length(l, x_))) && x == x_ && - arith_util(re.m).is_numeral(i, iv) && iv.get_int32() + jv.get_int32() == 0) + (arith_util(re.m).is_add(k, i, l) && re.u.str.is_length(l, x_))) && x == x_ && + arith_util(re.m).is_numeral(i, iv) && iv.get_int32() + jv.get_int32() == 0) { // case X[j,|X|-j] is denoted by X[j..] - out << pp(re, x, html_encode) << "[" << jv.get_int32() << "..]"; - else - out << pp(re, x, html_encode) << "[" << jv.get_int32() << "," << pp(re, k, html_encode) << "]"; + out << "[" << jv.get_int32() << "..]"; + } + else { + out << "[" << jv.get_int32() << ","; + print(out, k); + out << "]"; + } + } + else { + out << "["; + print(out, j); + out << ","; + print(out, k); + out << "]"; } - else - out << pp(re, x, html_encode) << "[" << pp(re, j, html_encode) << "," << pp(re, k, html_encode) << "]"; } - else out << mk_pp(s, re.m); - return out; + else + out << mk_pp(s, re.m); } /* Produces output such as [a-z] for a range. */ -std::ostream& seq_util::rex::pp::compact_helper_range(std::ostream& out, expr* s1, expr* s2) const { +void seq_util::rex::pp::print_range(std::ostream& out, expr* s1, expr* s2) const { out << "["; - seq_unit(out, s1) << "-"; - seq_unit(out, s2) << "]"; - return out; + print_unit(out, s1); + out << "-"; + print_unit(out, s2); + out << "]"; } /* @@ -1169,7 +1182,7 @@ bool seq_util::rex::pp::can_skip_parenth(expr* r) const { /* Specialize output for a unit sequence converting to visible ASCII characters if possible. */ -std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const { +void seq_util::rex::pp::print_unit(std::ostream& out, expr* s) const { expr* e, * i; unsigned n = 0; if ((re.u.str.is_unit(s, e) && re.u.is_const_char(e, n)) || re.u.is_const_char(s, n)) { @@ -1180,24 +1193,21 @@ std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const { out << "\\r"; else if (c == '\f') out << "\\f"; - else if (c == ' ') - out << " "; - else if (c == '(' || c == ')' || c == '{' || c == '}' || c == '[' || c == ']' || c == '.' || c == '\\') - out << "\\" << c; - else if ('A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c == '@' || c == ';' || c == ':' || c == '/' || c == '#' || c == '$') - out << c; - else if (32 < n && n < 127) { + else if (32 <= n && n < 127 && n != '\"' && n != ' ' + && n != '\\' && n != '\'' && n != '?' && n != '.' && n != '(' && n != ')' && n != '[' && n != ']' + && n != '{' && n != '}' && n != '&') { if (html_encode) { if (c == '<') out << "<"; else if (c == '>') out << ">"; - else if (c == '&') - out << "&"; - else if (c == '\"') - out << """; + //else if (c == '&') + // out << "&"; + //else if (c == '\"') + // out << """; else - out << "\\x" << std::hex << n; + //out << "\\x" << std::hex << n; + out << c; } else out << c; @@ -1212,119 +1222,189 @@ std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const { out << "\\u" << std::hex << n; } else if (re.u.str.is_nth_i(s, e, i)) { - out << mk_pp(e, re.m) << "[" << mk_pp(i, re.m) << "]"; + print(out, e); + out << "[" << mk_pp(i, re.m) << "]"; + } + else if (re.m.is_value(e)) + out << mk_pp(e, re.m); + else if (is_app(e)) { + out << "(" << to_app(e)->get_decl()->get_name().str(); + for (unsigned i = 0; i < to_app(e)->get_num_args(); i++) { + out << " "; + print(out, to_app(e)->get_arg(i)); + } + out << ")"; } else out << mk_pp(s, re.m); - return out; } /* Pretty prints the regex r into the ostream out */ -std::ostream& seq_util::rex::pp::display(std::ostream& out) const { +void seq_util::rex::pp::print(std::ostream& out, expr* e) const { expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr; unsigned lo = 0, hi = 0; rational v; if (re.u.is_char(e)) - return seq_unit(out, e); + print_unit(out, e); else if (re.u.is_seq(e)) - return compact_helper_seq(out, e); + print_seq(out, e); else if (re.is_full_char(e)) - return out << "."; + out << "."; else if (re.is_full_seq(e)) - return out << ".*"; + out << ".*"; else if (re.is_to_re(e, s)) - return compact_helper_seq(out, s); + print_seq(out, s); else if (re.is_range(e, s, s2)) - return compact_helper_range(out, s, s2); + print_range(out, s, s2); else if (re.is_epsilon(e)) - return out << "()"; + // ε = epsilon + out << (html_encode ? "ε" : "()"); else if (re.is_empty(e)) - return out << "[]"; - else if (re.is_concat(e, r1, r2)) - return out << pp(re, r1, html_encode) << pp(re, r2, html_encode); - else if (re.is_antimorov_union(e, r1, r2) || re.is_union(e, r1, r2)) - return out << "(" << pp(re, r1, html_encode) << "|" << pp(re, r2, html_encode) << ")"; + // ∅ = emptyset + out << (html_encode ? "∅" : "[]"); + else if (re.is_concat(e, r1, r2)) { + print(out, r1); + print(out, r2); + } + else if (re.is_antimorov_union(e, r1, r2) || re.is_union(e, r1, r2)) { + out << "("; + print(out, r1); + out << (html_encode ? "⋃" : "|"); + print(out, r2); + out << ")"; + } else if (re.is_intersection(e, r1, r2)) - return out << "(" << pp(re, r1, html_encode) << (html_encode ? "&" : "&") << pp(re, r2, html_encode) << ")"; + { + out << "("; + print(out, r1); + out << (html_encode ? "⋂" : "&"); + print(out, r2); + out << ")"; + } else if (re.is_complement(e, r1)) { + out << "~"; if (can_skip_parenth(r1)) - return out << "~" << pp(re, r1, html_encode); - else - return out << "~(" << pp(re, r1, html_encode) << ")"; + print(out, r1); + else { + out << "("; + print(out, r1); + out << ")"; + } } else if (re.is_plus(e, r1)) { - if (can_skip_parenth(r1)) - return out << pp(re, r1, html_encode) << "+"; - else - return out << "(" << pp(re, r1, html_encode) << ")+"; + if (can_skip_parenth(r1)) { + print(out, r1); + out << "+"; + } + else { + out << "("; + print(out, r1); + out << ")+"; + } } else if (re.is_star(e, r1)) { - if (can_skip_parenth(r1)) - return out << pp(re, r1, html_encode) << "*"; - else - return out << "(" << pp(re, r1, html_encode) << ")*"; + if (can_skip_parenth(r1)) { + print(out, r1); + out << "*"; + } + else { + out << "("; + print(out, r1); + out << ")*"; + } } else if (re.is_loop(e, r1, lo)) { - if (can_skip_parenth(r1)) - return out << pp(re, r1, html_encode) << "{" << lo << ",}"; + if (can_skip_parenth(r1)) { + print(out, r1); + out << "{" << lo << ",}"; + } else - return out << "(" << pp(re, r1, html_encode) << "){" << lo << ",}"; + { + out << "("; + print(out, r1); + out << "){" << lo << ",}"; + } } else if (re.is_loop(e, r1, lo, hi)) { if (can_skip_parenth(r1)) { + print(out, r1); if (lo == hi) - return out << pp(re, r1, html_encode) << "{" << lo << "}"; + out << "{" << lo << "}"; else - return out << pp(re, r1, html_encode) << "{" << lo << "," << hi << "}"; + out << "{" << lo << "," << hi << "}"; } else { + out << "("; + print(out, r1); if (lo == hi) - return out << "(" << pp(re, r1, html_encode) << "){" << lo << "}"; + out << "){" << lo << "}"; else - return out << "(" << pp(re, r1, html_encode) << "){" << lo << "," << hi << "}"; + out << "){" << lo << "," << hi << "}"; } } - else if (re.is_diff(e, r1, r2)) - return out << "(" << pp(re, r1, html_encode) << ")\\(" << pp(re, r2, html_encode) << ")"; - else if (re.m.is_ite(e, s, r1, r2)) - return out << "(if " << pp(re, s, html_encode) << " then " << pp(re, r1, html_encode) << " else " << pp(re, r2, html_encode) << ")"; - else if (re.is_opt(e, r1)) { - if (can_skip_parenth(r1)) - return out << pp(re, r1, html_encode) << "?"; - else - return out << "(" << pp(re, r1, html_encode) << ")?"; - } - else if (re.is_reverse(e, r1)) - return out << "rev(" << pp(re, r1, html_encode) << ")"; - else if (re.m.is_eq(e, r1, r2)) - return out << "(" << pp(re, r1, html_encode) << "=" << pp(re, r2, html_encode) << ")"; - else if (re.m.is_not(e, r1)) - return out << "!" << pp(re, r1, html_encode); - else if (re.m.is_and(e)) { + else if (re.is_diff(e, r1, r2)) { out << "("; - for (unsigned i = 0; i < to_app(e)->get_num_args(); i++) - out << (i > 0 ? " and " : "") << pp(re, to_app(e)->get_arg(i), html_encode); + print(out, r1); + out << ")\\("; + print(out, r2); + out << ")"; + } + else if (re.m.is_ite(e, s, r1, r2)) { + out << (html_encode ? "(𝐢𝐟 " : "(if "); + print(out, s); + out << (html_encode ? " 𝐭𝗵𝐞𝐧 " : " then "); + print(out, r1); + out << (html_encode ? " 𝐞𝐥𝘀𝐞 " : " else "); + print(out, r2); out << ")"; } - else if (re.m.is_or(e)) { + else if (re.is_opt(e, r1)) { + if (can_skip_parenth(r1)) { + print(out, r1); + out << "?"; + } + else { + out << "("; + print(out, r1); + out << ")?"; + } + } + else if (re.is_reverse(e, r1)) { + out << "(reverse "; + print(out, r1); + out << ")"; + } + else if (re.m.is_eq(e, r1, r2)) { out << "("; - for (unsigned i = 0; i < to_app(e)->get_num_args(); i++) - out << (i > 0 ? " or " : "") << pp(re, to_app(e)->get_arg(i), html_encode); + print(out, r1); + out << "="; + print(out, r2); + out << ")"; + } + else if (re.m.is_not(e, r1)) { + out << "!"; + print(out, r1); + } + else if (re.m.is_value(e)) + out << mk_pp(e, re.m); + else if (is_app(e)) { + out << "(" << to_app(e)->get_decl()->get_name().str(); + for (unsigned i = 0; i < to_app(e)->get_num_args(); i++) { + out << " "; + print(out, to_app(e)->get_arg(i)); + } out << ")"; } - else if (re.u.str.is_in_re(e, s, r1)) - return out << "(" << pp(re, s, html_encode) << " in " << pp(re, r1, html_encode) << ")"; - //else if (arith_util(re.m).is_add(e, s, r1) && re.u.str.is_length(r1, r2) && arith_util(re.m).is_numeral(s, v) && v.is_neg()) - // return out << "(|" << pp(re, r2) << "|" << "-" << (0 - v.get_int32()) << ")"; - //else if (arith_util(re.m).is_add(e, r1, s) && re.u.str.is_length(r1, r2) && arith_util(re.m).is_numeral(s, v) && v.is_neg()) - // return out << "(|" << pp(re, r2) << "|" << "-" << (0 - v.get_int32()) << ")"; - //else if (arith_util(re.m).is_sub(e, r1, s) && re.u.str.is_length(r1, r2) && arith_util(re.m).is_numeral(s, v) && v.is_pos()) - // return out << "(|" << pp(re, r2) << "|" << "-" << v.get_int32() << ")"; else // for all remaining cases use the default pretty printer - return out << mk_pp(e, re.m); + out << mk_pp(e, re.m); +} + +std::ostream& seq_util::rex::pp::display(std::ostream& out) const { + print(out, ex); + return out; } /* @@ -1332,7 +1412,16 @@ std::ostream& seq_util::rex::pp::display(std::ostream& out) const { */ std::string seq_util::rex::to_str(expr* r) const { std::ostringstream out; - out << pp(u.re, r, false); + pp(u.re, r, false).display(out); + return out.str(); +} + +/* + Pretty prints the regex r into the output string that is htmlencoded +*/ +std::string seq_util::rex::to_strh(expr* r) const { + std::ostringstream out; + pp(u.re, r, true).display(out); return out.str(); } diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 6cb3fe9f20f..11ed269c632 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -579,18 +579,48 @@ class seq_util { app* mk_epsilon(sort* seq_sort); info get_info(expr* r) const; std::string to_str(expr* r) const; + std::string to_strh(expr* r) const; + + expr_ref mk_ite_simplify(expr* c, expr* t, expr* e) + { + expr_ref result(m); + if (m.is_true(c) || t == e) + result = t; + else if (m.is_false(c)) + result = e; + else + result = m.mk_ite(c, t, e); + return result; + } + + expr_ref mk_or_simplify(expr* a, expr* b) + { + expr_ref result(m); + if (m.is_true(a) || a == b) + result = a; + else if (m.is_true(b)) + result = b; + else if (m.is_false(a)) + result = b; + else if (m.is_false(b)) + result = a; + else + result = m.mk_or(a, b); + return result; + } class pp { seq_util::rex& re; - expr* e; + expr* ex; bool html_encode; bool can_skip_parenth(expr* r) const; - std::ostream& seq_unit(std::ostream& out, expr* s) const; - std::ostream& compact_helper_seq(std::ostream& out, expr* s) const; - std::ostream& compact_helper_range(std::ostream& out, expr* s1, expr* s2) const; + void print_unit(std::ostream& out, expr* s) const; + void print_seq(std::ostream& out, expr* s) const; + void print_range(std::ostream& out, expr* s1, expr* s2) const; + void print(std::ostream& out, expr* e) const; public: - pp(seq_util::rex& r, expr* e, bool html) : re(r), e(e), html_encode(html) {} + pp(seq_util::rex& re, expr* ex, bool html) : re(re), ex(ex), html_encode(html) {} std::ostream& display(std::ostream&) const; }; }; diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 5657f109d30..35e10433a86 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -254,10 +254,8 @@ namespace smt { * (accept s (i + 1) (derivative s[i] r) * * Acceptance of a derivative is unfolded into a disjunction over - * all derivatives. Effectively, this implements the following rule, - * but all in one step: - * (accept s i (ite c r1 r2)) => - * c & (accept s i r1) \/ ~c & (accept s i r2) + * all derivatives. Effectively, this implements the following rule: + * (accept s i (ite c r1 r2)) => (ite c (accept s i r1) (accept s i r2)) */ void seq_regex::propagate_accept(literal lit) { SASSERT(!lit.sign()); @@ -303,6 +301,7 @@ namespace smt { unsigned min_len = re().min_length(r); unsigned min_len_plus_i = u().max_plus(min_len, idx); literal len_s_ge_min = th.m_ax.mk_ge(th.mk_len(s), min_len_plus_i); + // Acc(s,i,r) ==> |s| >= i + minlength(r) th.propagate_lit(nullptr, 1, &lit, len_s_ge_min); // Axiom equivalent to the above: th.add_axiom(~lit, len_s_ge_min); @@ -317,6 +316,8 @@ namespace smt { STRACE("seq_regex_brief", tout << " (Warning: min_length returned 0 for" << " non-nullable regex)";); + // since nullable(r) = false: + // Acc(s,i,r) ==> |s|>i th.propagate_lit(nullptr, 1, &lit, ~len_s_le_i); } else if (!m.is_true(is_nullable)) { @@ -328,7 +329,9 @@ namespace smt { << " (Warning: is_nullable did not simplify)";); literal is_nullable_lit = th.mk_literal(is_nullable); ctx.mark_as_relevant(is_nullable_lit); + // Acc(s,i,r) & |s|<=i ==> nullable(r) th.add_axiom(~lit, ~len_s_le_i, is_nullable_lit); + //TODO: what if is_nullable contains an in_re if (str().is_in_re(is_nullable)) th.add_unhandled_expr(is_nullable); } @@ -336,14 +339,21 @@ namespace smt { // Rule 3: derivative unfolding literal_vector accept_next; - expr_ref hd = th.mk_nth(s, i); + expr_ref s_i = th.mk_nth(s, i); expr_ref deriv(m); - deriv = derivative_wrapper(hd, r); + deriv = mk_derivative_wrapper(s_i, r); + STRACE("here", tout + << "mk_pp: " << mk_pp(deriv, m) << std::endl;); + STRACE("here", tout + << "re().to_str: " << re().to_str(deriv) << std::endl;); expr_ref accept_deriv(m); accept_deriv = mk_deriv_accept(s, idx + 1, deriv); accept_next.push_back(~lit); accept_next.push_back(len_s_le_i); accept_next.push_back(th.mk_literal(accept_deriv)); + // Acc(s, i, r) => (|s|<=i or Acc(s, i+1, D(s_i,r))) + // where Acc(s, i+1, ite(c, t, f)) = ite(c, Acc(s, i+1, t), Acc(s, i+1, t)) + // and Acc(s, i+1, r U s) = Acc(s, i+1, r) or Acc(s, i+1, s) th.add_axiom(accept_next); } @@ -419,20 +429,21 @@ namespace smt { /* Wrapper around calls to is_nullable from the seq rewriter. - Note: the nullable wrapper and derivative wrapper actually use + TODO: clean up the following: + Note: the is_nullable_wrapper and mk_derivative_wrapper actually use different sequence rewriters; these are at: m_seq_rewrite (returned by seq_rw()) th.m_rewrite.m_imp->m_cfg.m_seq_rw (private, can't be accessed directly) As a result operations are cached separately for the nullable - and derivative calls. TBD if caching them using the same rewriter - makes any difference. + and derivative calls. */ expr_ref seq_regex::is_nullable_wrapper(expr* r) { STRACE("seq_regex", tout << "nullable: " << mk_pp(r, m) << std::endl;); expr_ref result = seq_rw().is_nullable(r); + //TODO: rewrite seems unnecessary here rewrite(result); STRACE("seq_regex", tout << "nullable result: " << mk_pp(result, m) << std::endl;); @@ -443,37 +454,28 @@ namespace smt { } /* - Wrapper around the regex symbolic derivative from the seq rewriter. - Ensures that the derivative is written in a normalized BDD form - with optimizations for if-then-else expressions involving the head. - - Note: the nullable wrapper and derivative wrapper actually use - different sequence rewriters; these are at: - m_seq_rewrite - (returned by seq_rw()) - th.m_rewrite.m_imp->m_cfg.m_seq_rw - (private, can't be accessed directly) - As a result operations are cached separately for the nullable - and derivative calls. TBD if caching them using the same rewriter - makes any difference. + First creates a derivatrive of r wrt x=(:var 0) and then replaces x by ele. + This will create a cached entry for the generic derivative of r that is independent of ele. */ - expr_ref seq_regex::derivative_wrapper(expr* hd, expr* r) { - STRACE("seq_regex", tout << "derivative(" << mk_pp(hd, m) << "): " << mk_pp(r, m) << std::endl;); + expr_ref seq_regex::mk_derivative_wrapper(expr* ele, expr* r) { + STRACE("seq_regex", tout << "derivative(" << mk_pp(ele, m) << "): " << mk_pp(r, m) << std::endl;); - // Use canonical variable (:var 0) for the derivative element - expr_ref result(seq_rw().mk_derivative(r), m); + // Uses canonical variable (:var 0) for the derivative element + expr_ref der(seq_rw().mk_derivative(r), m); // Substitute (:var 0) with the actual element var_subst subst(m); expr_ref_vector sub(m); - sub.push_back(hd); - result = subst(result, sub); + sub.push_back(ele); + der = subst(der, sub); - STRACE("seq_regex", tout << "derivative result: " << mk_pp(result, m) << std::endl;); + STRACE("seq_regex", tout << "derivative result: " << mk_pp(der, m) << std::endl;); STRACE("seq_regex_brief", tout << "d(" << state_str(r) << ")=" - << state_str(result) << " ";); + << state_str(der) << " ";); - return result; + //TODO: simplify der further, if ele implies further simplifications + //e.g. if ele='b' then de(ite (x='a') t f) simplifies to t + return der; } void seq_regex::propagate_eq(expr* r1, expr* r2) { @@ -556,7 +558,7 @@ namespace smt { literal null_lit = th.mk_literal(is_nullable); expr_ref hd = mk_first(r, n); expr_ref d(m); - d = derivative_wrapper(hd, r); + d = mk_derivative_wrapper(hd, r); literal_vector lits; lits.push_back(~lit); @@ -583,11 +585,10 @@ namespace smt { } /* - Given a string s, index i, and a derivative regex d, return an + Given a string s, index i, and a derivative r, return an expression that is equivalent to accept s i r - but which pushes accept s i r into the leaves (next derivatives to - explore). + but which pushes accept s i r into the leaves Input r is of type regex; output is of type bool. @@ -597,6 +598,8 @@ namespace smt { (ite b (accept s i r3) (accept s i r4))) */ expr_ref seq_regex::mk_deriv_accept(expr* s, unsigned i, expr* r) { + return mk_deriv_accept_rec(s, i, r); + vector to_visit; to_visit.push_back(r); obj_map re_to_bool; @@ -685,52 +688,57 @@ namespace smt { return result; } + + expr_ref seq_regex::mk_deriv_accept_rec(expr* s, unsigned i, expr* d) { + expr* c, * d1, * d2; + expr_ref result(m); + if (re().is_full_seq(d) || str().min_length(s) > i && re().is_dot_plus(d)) + // s[i..] in .* <==> true, also: s[i..] in .+ <==> true when |s|>i + result = m.mk_true(); + else if (re().is_empty(d) || (str().min_length(s) > i && re().is_epsilon(d))) + // s[i..] in [] <==> false, also: s[i..] in () <==> false when |s|>i + result = m.mk_false(); + else if (m.is_ite(d, c, d1, d2)) + result = re().mk_ite_simplify(c, mk_deriv_accept_rec(s, i, d1), mk_deriv_accept_rec(s, i, d2)); + else if (re().is_union(d, d1, d2)) + result = re().mk_or_simplify(mk_deriv_accept_rec(s, i, d1), mk_deriv_accept_rec(s, i, d2)); + else + result = sk().mk_accept(s, a().mk_int(i + 1), d); + return result; + } + /* Return a list of all target regexes in the derivative of a regex r, ignoring the conditions along each path. - The derivative - construction tries to eliminate unsat condition paths, but one cannot - assume that the path to each leaf is satisfiable in general. - - In general, some of the target regexes in results may be unreachable from r. + The derivative construction uses (:var 0) and tries + to eliminate unsat condition paths but it does not perform + full satisfiability checks and it is not guaranteed + that all targets are actually reachable */ - void seq_regex::get_targets(expr* r, expr_ref_vector& results) { - // Get derivative - /*sort* seq_sort = nullptr; - VERIFY(u().is_re(r, seq_sort)); - expr_ref n(m.mk_fresh_const("re.char", seq_sort), m); - expr_ref hd = mk_first(r, n); - expr_ref d(m); - d = derivative_wrapper(hd, r);*/ + void seq_regex::get_derivative_targets(expr* r, expr_ref_vector& targets) { + // constructs the derivative wrt (:var 0) expr_ref d(seq_rw().mk_derivative(r), m); // use DFS to collect all the targets (leaf regexes) in d. - vector to_visit; - to_visit.push_back(d); - obj_map visited; // set (bool is used as a unit type) - while (to_visit.size() > 0) { - expr* e = to_visit.back(); - to_visit.pop_back(); - if (visited.contains(e)) continue; - visited.insert(e, true); - expr* econd = nullptr, *e1 = nullptr, *e2 = nullptr; - if (m.is_ite(e, econd, e1, e2) || - re().is_union(e, e1, e2)) { - to_visit.push_back(e1); - to_visit.push_back(e2); - } - else if (!re().is_empty(e)) { - results.push_back(e); - STRACE("seq_regex_verbose", tout - << "get_all_derivatives: added deriv: " - << mk_pp(e, m) << std::endl;); + expr* _1 = nullptr, * e1 = nullptr, * e2 = nullptr; + obj_hashtable::entry* _2 = nullptr; + vector workset; + workset.push_back(d); + obj_hashtable done; + done.insert(d); + while (workset.size() > 0) { + expr* e = workset.back(); + workset.pop_back(); + if (m.is_ite(e, _1, e1, e2) || re().is_union(e, e1, e2)) { + if (done.insert_if_not_there_core(e1, _2)) + workset.push_back(e1); + if (done.insert_if_not_there_core(e2, _2)) + workset.push_back(e2); } + else if (!re().is_empty(e)) + targets.push_back(e); } - - STRACE("seq_regex", tout << "Number of derivatives: " - << results.size() << std::endl;); - STRACE("seq_regex_brief", tout << "#derivs=" << results.size() << " ";); } /* @@ -798,7 +806,7 @@ namespace smt { th.add_axiom(~lit, ~th.mk_literal(is_nullable)); expr_ref hd = mk_first(r, n); expr_ref d(m); - d = derivative_wrapper(hd, r); + d = mk_derivative_wrapper(hd, r); literal_vector lits; expr_ref_pair_vector cofactors(m); get_cofactors(d, cofactors); @@ -898,7 +906,7 @@ namespace smt { expr_ref_vector derivatives(m); STRACE("seq_regex_verbose", tout << "getting all derivs: " << r_id << " " << std::endl;); - get_targets(r, derivatives); + get_derivative_targets(r, derivatives); for (auto const& dr: derivatives) { unsigned dr_id = get_state_id(dr); STRACE("seq_regex_verbose", tout diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index eee57375d9e..5d8dc86567f 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -158,12 +158,13 @@ namespace smt { expr_ref symmetric_diff(expr* r1, expr* r2); expr_ref is_nullable_wrapper(expr* r); - expr_ref derivative_wrapper(expr* hd, expr* r); + expr_ref mk_derivative_wrapper(expr* hd, expr* r); // Various support for unfolding derivative expressions that are // returned by derivative_wrapper expr_ref mk_deriv_accept(expr* s, unsigned i, expr* r); - void get_targets(expr* r, expr_ref_vector& results); + expr_ref mk_deriv_accept_rec(expr* s, unsigned i, expr* r); + void get_derivative_targets(expr* r, expr_ref_vector& targets); void get_cofactors(expr* r, expr_ref_pair_vector& result); void get_cofactors_rec(expr* r, expr_ref_vector& conds, expr_ref_pair_vector& result); From 0a4cb0359de13361dcbfe5f2c4768d43b8d79a6d Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Fri, 24 Sep 2021 05:07:50 -0700 Subject: [PATCH 05/16] optimized mk_deriv_accept --- src/smt/seq_regex.cpp | 87 +++++++++++++++++-------------------------- src/smt/seq_regex.h | 1 - 2 files changed, 35 insertions(+), 53 deletions(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 35e10433a86..f6560aa9c85 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -342,10 +342,8 @@ namespace smt { expr_ref s_i = th.mk_nth(s, i); expr_ref deriv(m); deriv = mk_derivative_wrapper(s_i, r); - STRACE("here", tout - << "mk_pp: " << mk_pp(deriv, m) << std::endl;); - STRACE("here", tout - << "re().to_str: " << re().to_str(deriv) << std::endl;); + STRACE("seq_regex", tout + << "mk_derivative_wrapper: " << re().to_str(deriv) << std::endl;); expr_ref accept_deriv(m); accept_deriv = mk_deriv_accept(s, idx + 1, deriv); accept_next.push_back(~lit); @@ -598,18 +596,20 @@ namespace smt { (ite b (accept s i r3) (accept s i r4))) */ expr_ref seq_regex::mk_deriv_accept(expr* s, unsigned i, expr* r) { - return mk_deriv_accept_rec(s, i, r); - vector to_visit; to_visit.push_back(r); - obj_map re_to_bool; + obj_map re_to_accept; expr_ref_vector _temp_bool_owner(m); // temp owner for bools we create - // DFS + bool s_is_longer_than_i = str().min_length(s) > i; + expr* i_int = a().mk_int(i); + _temp_bool_owner.push_back(i_int); + + // DFS, avoids duplicating derivative construction that has already been done while (to_visit.size() > 0) { expr* e = to_visit.back(); expr* econd = nullptr, *e1 = nullptr, *e2 = nullptr; - if (!re_to_bool.contains(e)) { + if (!re_to_accept.contains(e)) { // First visit: add children STRACE("seq_regex_verbose", tout << "1";); if (m.is_ite(e, econd, e1, e2) || @@ -618,36 +618,40 @@ namespace smt { to_visit.push_back(e2); } // Mark first visit by adding nullptr to the map - re_to_bool.insert(e, nullptr); + re_to_accept.insert(e, nullptr); } - else if (re_to_bool.find(e) == nullptr) { + else if (re_to_accept.find(e) == nullptr) { // Second visit: set value STRACE("seq_regex_verbose", tout << "2";); to_visit.pop_back(); if (m.is_ite(e, econd, e1, e2)) { - expr* b1 = re_to_bool.find(e1); - expr* b2 = re_to_bool.find(e2); - expr* b = m.mk_ite(econd, b1, b2); + expr* b1 = re_to_accept.find(e1); + expr* b2 = re_to_accept.find(e2); + expr* b = m.is_true(econd) || b1 == b2 ? b1 : m.is_false(econd) ? b2 : m.mk_ite(econd, b1, b2); _temp_bool_owner.push_back(b); - re_to_bool.find(e) = b; + re_to_accept.find(e) = b; } - /* - else if (re().is_empty(e)) + else if (re().is_empty(e) || (s_is_longer_than_i && re().is_epsilon(e))) { - re_to_bool.find(e) = m.mk_false(); + // s[i..] in [] <==> false, also: s[i..] in () <==> false when |s|>i + re_to_accept.find(e) = m.mk_false(); } + else if (re().is_full_seq(e) || s_is_longer_than_i && re().is_dot_plus(e)) + { + // s[i..] in .* <==> true, also: s[i..] in .+ <==> true when |s|>i + re_to_accept.find(e) = m.mk_true(); + } + /* else if (re().is_epsilon(e)) { - expr* iplus1 = a().mk_int(i); expr* one = a().mk_int(1); - _temp_bool_owner.push_back(iplus1); _temp_bool_owner.push_back(one); - //the substring starting after position iplus1 must be empty - expr* s_end = str().mk_substr(s, iplus1, one); + //the substring starting after position i must be empty + expr* s_end = str().mk_substr(s, i_int, one); expr* s_end_is_epsilon = m.mk_eq(s_end, str().mk_empty(m.get_sort(s))); _temp_bool_owner.push_back(s_end_is_epsilon); - re_to_bool.find(e) = s_end_is_epsilon; + re_to_accept.find(e) = s_end_is_epsilon; STRACE("seq_regex_verbose", tout << "added empty sequence leaf: " @@ -655,18 +659,16 @@ namespace smt { } */ else if (re().is_union(e, e1, e2)) { - expr* b1 = re_to_bool.find(e1); - expr* b2 = re_to_bool.find(e2); - expr* b = m.mk_or(b1, b2); + expr* b1 = re_to_accept.find(e1); + expr* b2 = re_to_accept.find(e2); + expr* b = m.is_false(b1) || b1 == b2 ? b2 : m.is_false(b2) ? b1 : m.mk_or(b1, b2); _temp_bool_owner.push_back(b); - re_to_bool.find(e) = b; + re_to_accept.find(e) = b; } else { - expr* iplus1 = a().mk_int(i); - _temp_bool_owner.push_back(iplus1); - expr_ref acc_leaf = sk().mk_accept(s, iplus1, e); + expr_ref acc_leaf = sk().mk_accept(s, i_int, e); _temp_bool_owner.push_back(acc_leaf); - re_to_bool.find(e) = acc_leaf; + re_to_accept.find(e) = acc_leaf; STRACE("seq_regex_verbose", tout << "mk_deriv_accept: added accept leaf: " @@ -682,31 +684,12 @@ namespace smt { // Finalize expr_ref result(m); - result = re_to_bool.find(r); // Assigns ownership of all exprs in - // re_to_bool for after this completes + result = re_to_accept.find(r); // Assigns ownership of all exprs in + // re_to_accept for after this completes rewrite(result); return result; } - - expr_ref seq_regex::mk_deriv_accept_rec(expr* s, unsigned i, expr* d) { - expr* c, * d1, * d2; - expr_ref result(m); - if (re().is_full_seq(d) || str().min_length(s) > i && re().is_dot_plus(d)) - // s[i..] in .* <==> true, also: s[i..] in .+ <==> true when |s|>i - result = m.mk_true(); - else if (re().is_empty(d) || (str().min_length(s) > i && re().is_epsilon(d))) - // s[i..] in [] <==> false, also: s[i..] in () <==> false when |s|>i - result = m.mk_false(); - else if (m.is_ite(d, c, d1, d2)) - result = re().mk_ite_simplify(c, mk_deriv_accept_rec(s, i, d1), mk_deriv_accept_rec(s, i, d2)); - else if (re().is_union(d, d1, d2)) - result = re().mk_or_simplify(mk_deriv_accept_rec(s, i, d1), mk_deriv_accept_rec(s, i, d2)); - else - result = sk().mk_accept(s, a().mk_int(i + 1), d); - return result; - } - /* Return a list of all target regexes in the derivative of a regex r, ignoring the conditions along each path. diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 5d8dc86567f..dcf81dbc360 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -163,7 +163,6 @@ namespace smt { // Various support for unfolding derivative expressions that are // returned by derivative_wrapper expr_ref mk_deriv_accept(expr* s, unsigned i, expr* r); - expr_ref mk_deriv_accept_rec(expr* s, unsigned i, expr* r); void get_derivative_targets(expr* r, expr_ref_vector& targets); void get_cofactors(expr* r, expr_ref_pair_vector& result); void get_cofactors_rec(expr* r, expr_ref_vector& conds, From ab920308ab4953d1c0fe99fe007fefb8715e3722 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Mon, 27 Sep 2021 16:57:19 -0700 Subject: [PATCH 06/16] fixed PR comments --- src/ast/rewriter/seq_rewriter.cpp | 5 +++-- src/ast/seq_decl_plugin.cpp | 12 ++++++++---- src/ast/seq_decl_plugin.h | 8 ++++---- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 25d4ac035ee..cb173d6bb22 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -921,7 +921,7 @@ expr_ref seq_rewriter::mk_seq_last(expr* t) { expr_ref lastpos(str().is_len_sub(k, l, s_, i) && s == s_ ? m_autil.mk_add(l, m_autil.mk_int(jv.get_int32() - 1 - i)) : m_autil.mk_add(k, m_autil.mk_int(jv.get_int32() - 1)), m()); - result = str().mk_nth_i(s, lastpos); + result = str().mk_nth_i(s, lastpos.get()); } else result = str().mk_nth_i(t, m_autil.mk_sub(str().mk_length(t), m_autil.mk_int(1))); @@ -3140,7 +3140,7 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref result = mk_antimirov_deriv_intersection( mk_antimirov_deriv(e, r1, path), mk_antimirov_deriv(e, r2, path), m().mk_true()); - else if (re().is_star(r, r1) || re().is_plus(r, r1) || re().is_loop(r, r1, lo) && 0 <= lo && lo <= 1) + else if (re().is_star(r, r1) || re().is_plus(r, r1) || (re().is_loop(r, r1, lo) && 0 <= lo && lo <= 1)) result = mk_antimirov_deriv_concat(mk_antimirov_deriv(e, r1, path), re().mk_star(r1)); else if (re().is_loop(r, r1, lo)) result = mk_antimirov_deriv_concat(mk_antimirov_deriv(e, r1, path), re().mk_loop(r1, lo - 1)); @@ -3319,6 +3319,7 @@ expr_ref seq_rewriter::mk_regex_concat(expr* r, expr* s) { //TODO: perhaps simplifiy some further cases such as .*. = ..* = .*.+ = .+.* = .+ result = re().mk_concat(r, s); } + return result; } expr_ref seq_rewriter::mk_in_antimirov(expr* s, expr* d){ diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index bd0365599fe..c71ddfbf5fb 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1105,7 +1105,7 @@ app* seq_util::rex::mk_epsilon(sort* seq_sort) { /* Produces compact view of concrete concatenations such as (abcd). */ -void seq_util::rex::pp::print_seq(std::ostream& out, expr* s) const { +std::ostream& seq_util::rex::pp::print_seq(std::ostream& out, expr* s) const { SASSERT(re.u.is_seq(s)); zstring z; expr* x, * j, * k, * l, * i, * x_; @@ -1158,17 +1158,19 @@ void seq_util::rex::pp::print_seq(std::ostream& out, expr* s) const { } else out << mk_pp(s, re.m); + return out; } /* Produces output such as [a-z] for a range. */ -void seq_util::rex::pp::print_range(std::ostream& out, expr* s1, expr* s2) const { +std::ostream& seq_util::rex::pp::print_range(std::ostream& out, expr* s1, expr* s2) const { out << "["; print_unit(out, s1); out << "-"; print_unit(out, s2); out << "]"; + return out; } /* @@ -1182,7 +1184,7 @@ bool seq_util::rex::pp::can_skip_parenth(expr* r) const { /* Specialize output for a unit sequence converting to visible ASCII characters if possible. */ -void seq_util::rex::pp::print_unit(std::ostream& out, expr* s) const { +std::ostream& seq_util::rex::pp::print_unit(std::ostream& out, expr* s) const { expr* e, * i; unsigned n = 0; if ((re.u.str.is_unit(s, e) && re.u.is_const_char(e, n)) || re.u.is_const_char(s, n)) { @@ -1237,12 +1239,13 @@ void seq_util::rex::pp::print_unit(std::ostream& out, expr* s) const { } else out << mk_pp(s, re.m); + return out; } /* Pretty prints the regex r into the ostream out */ -void seq_util::rex::pp::print(std::ostream& out, expr* e) const { +std::ostream& seq_util::rex::pp::print(std::ostream& out, expr* e) const { expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr; unsigned lo = 0, hi = 0; rational v; @@ -1400,6 +1403,7 @@ void seq_util::rex::pp::print(std::ostream& out, expr* e) const { else // for all remaining cases use the default pretty printer out << mk_pp(e, re.m); + return out; } std::ostream& seq_util::rex::pp::display(std::ostream& out) const { diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 11ed269c632..77d20a65cff 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -614,10 +614,10 @@ class seq_util { expr* ex; bool html_encode; bool can_skip_parenth(expr* r) const; - void print_unit(std::ostream& out, expr* s) const; - void print_seq(std::ostream& out, expr* s) const; - void print_range(std::ostream& out, expr* s1, expr* s2) const; - void print(std::ostream& out, expr* e) const; + std::ostream& print_unit(std::ostream& out, expr* s) const; + std::ostream& print_seq(std::ostream& out, expr* s) const; + std::ostream& print_range(std::ostream& out, expr* s1, expr* s2) const; + std::ostream& print(std::ostream& out, expr* e) const; public: pp(seq_util::rex& re, expr* ex, bool html) : re(re), ex(ex), html_encode(html) {} From 11479a221046d7d8a541a2d104d6147a970c344d Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Mon, 27 Sep 2021 17:52:12 -0700 Subject: [PATCH 07/16] small syntax fix --- src/smt/seq_regex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index f6560aa9c85..aa1faa13174 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -636,7 +636,7 @@ namespace smt { // s[i..] in [] <==> false, also: s[i..] in () <==> false when |s|>i re_to_accept.find(e) = m.mk_false(); } - else if (re().is_full_seq(e) || s_is_longer_than_i && re().is_dot_plus(e)) + else if (re().is_full_seq(e) || (s_is_longer_than_i && re().is_dot_plus(e))) { // s[i..] in .* <==> true, also: s[i..] in .+ <==> true when |s|>i re_to_accept.find(e) = m.mk_true(); From dec03d1fa99d4ed4aabc084e2e7e0fa8cc737d3d Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Fri, 1 Oct 2021 15:40:48 -0700 Subject: [PATCH 08/16] updated some simplifications --- src/ast/rewriter/seq_rewriter.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index cb173d6bb22..bb0090789ea 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -2996,7 +2996,8 @@ expr_ref seq_rewriter::mk_derivative(expr* r) { sort* seq_sort = nullptr, * ele_sort = nullptr; VERIFY(m_util.is_re(r, seq_sort)); VERIFY(m_util.is_seq(seq_sort, ele_sort)); - return mk_antimirov_deriv(m().mk_var(0, ele_sort), r, m().mk_true()); + expr_ref v(m().mk_var(0, ele_sort), m()); + return mk_antimirov_deriv(v, r, m().mk_true()); } expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) { @@ -3123,7 +3124,7 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref SASSERT(u().is_char(c1)); SASSERT(u().is_char(c2)); // range represents c1 <= e <= c2 - range = m().mk_and(u().mk_le(c1, e), u().mk_le(e, c2)); + range = simplify_path(m().mk_and(u().mk_le(c1, e), u().mk_le(e, c2))); psi = simplify_path(m().mk_and(path, range)); if (m().is_false(psi)) result = nothing(); @@ -3356,7 +3357,9 @@ expr_ref seq_rewriter::simplify_path(expr* path) { expr* h = nullptr, * t = nullptr, * lhs = nullptr, * rhs = nullptr, * h1 = nullptr; if (m().is_and(path, h, t)) { if (m().is_true(h)) - result = t; + result = simplify_path(t); + else if (m().is_true(t)) + result = simplify_path(h); else if (m().is_eq(h, lhs, rhs) || m().is_not(h, h1) && m().is_eq(h1, lhs, rhs)) elim_condition(lhs, result); } From 0eea58383b2417a58126727b18851c790ad44dcf Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Fri, 1 Oct 2021 18:01:20 -0700 Subject: [PATCH 09/16] bugfix:forgot to_re before reverse --- src/ast/rewriter/seq_rewriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index bb0090789ea..a995a86e399 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -3082,7 +3082,7 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref result = nothing(); else // observe that the precondition |r1|>0 of mk_seq_rest is implied by c1 - result = re().mk_ite_simplify(c1, re().mk_reverse(mk_seq_butlast(r1)), nothing()); + result = re().mk_ite_simplify(c1, re().mk_reverse(re().mk_to_re(mk_seq_butlast(r1))), nothing()); } else { result = mk_regex_reverse(r2); From f948a636761f8f7c8dc08a2d16a786fea8af487d Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Wed, 6 Oct 2021 18:39:02 -0700 Subject: [PATCH 10/16] fixed PR comments --- src/ast/rewriter/seq_rewriter.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index a995a86e399..bf40710ff91 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -3016,6 +3016,8 @@ expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) { } expr_ref seq_rewriter::mk_antimirov_deriv(expr* e, expr* r, expr* path) { + // Take reference count of path and r + expr_ref _path(path, m()), _r(r, m()); expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, e, r, path), m()); if (!result) { mk_antimirov_deriv_rec(e, r, path, result); @@ -3204,6 +3206,8 @@ expr_ref seq_rewriter::mk_antimirov_deriv_intersection(expr* d1, expr* d2, expr* expr_ref seq_rewriter::mk_antimirov_deriv_concat(expr* d, expr* r) { expr_ref result(m()); + // Take reference count of r and d + expr_ref _r(r, m()), _d(d, m()); expr* c, * t, * e; if (m().is_ite(d, c, t, e)) result = m().mk_ite(c, mk_antimirov_deriv_concat(t, r), mk_antimirov_deriv_concat(e, r)); @@ -4151,7 +4155,7 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { //result = re().mk_in_re(tl, re().mk_derivative(hd, b)); //result = re().mk_in_re(tl, mk_derivative(hd, b)); result = mk_in_antimirov(tl, mk_antimirov_deriv(hd, b, m().mk_true())); - return BR_DONE; + return BR_REWRITE1; } if (get_head_tail_reversed(a, hd, tl)) { From 612588f5a1195e9acaade2f0b6f3328b6d9bd951 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Wed, 6 Oct 2021 19:59:59 -0700 Subject: [PATCH 11/16] more PR comment fixes --- src/ast/rewriter/seq_rewriter.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index bf40710ff91..1cb6853241e 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -3162,6 +3162,13 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref result = mk_antimirov_deriv_intersection( mk_antimirov_deriv(e, r1, path), mk_antimirov_deriv_negate(mk_antimirov_deriv(e, r2, path)), m().mk_true()); + else if (re().is_of_pred(r, r1)) { + array_util array(m()); + expr* args[2] = { r1, e }; + result = array.mk_select(2, args); + // Use mk_der_cond to normalize + result = mk_der_cond(result, e, seq_sort); + } else // stuck cases result = re().mk_derivative(e, r); @@ -4155,7 +4162,7 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { //result = re().mk_in_re(tl, re().mk_derivative(hd, b)); //result = re().mk_in_re(tl, mk_derivative(hd, b)); result = mk_in_antimirov(tl, mk_antimirov_deriv(hd, b, m().mk_true())); - return BR_REWRITE1; + return BR_REWRITE_FULL; } if (get_head_tail_reversed(a, hd, tl)) { From 7ecbc62b634dd3c32f79574c453624a1a4b5b689 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Wed, 6 Oct 2021 22:26:37 -0700 Subject: [PATCH 12/16] more PR comment fixes --- src/ast/seq_decl_plugin.cpp | 13 +++++-------- src/ast/seq_decl_plugin.h | 4 ++-- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index c71ddfbf5fb..96411f0bffe 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1231,10 +1231,9 @@ std::ostream& seq_util::rex::pp::print_unit(std::ostream& out, expr* s) const { out << mk_pp(e, re.m); else if (is_app(e)) { out << "(" << to_app(e)->get_decl()->get_name().str(); - for (unsigned i = 0; i < to_app(e)->get_num_args(); i++) { - out << " "; - print(out, to_app(e)->get_arg(i)); - } + //for (unsigned i = 0; i < to_app(e)->get_num_args(); i++) { + for (expr * arg : *to_app(e)) + print(out << " ", arg); out << ")"; } else @@ -1394,10 +1393,8 @@ std::ostream& seq_util::rex::pp::print(std::ostream& out, expr* e) const { out << mk_pp(e, re.m); else if (is_app(e)) { out << "(" << to_app(e)->get_decl()->get_name().str(); - for (unsigned i = 0; i < to_app(e)->get_num_args(); i++) { - out << " "; - print(out, to_app(e)->get_arg(i)); - } + for (expr* arg : *to_app(e)) + print(out << " ", arg); out << ")"; } else diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 77d20a65cff..1bfff85f668 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -593,7 +593,7 @@ class seq_util { return result; } - expr_ref mk_or_simplify(expr* a, expr* b) + /* expr_ref mk_or_simplify(expr* a, expr* b) { expr_ref result(m); if (m.is_true(a) || a == b) @@ -607,7 +607,7 @@ class seq_util { else result = m.mk_or(a, b); return result; - } + }*/ class pp { seq_util::rex& re; From b62fdca7b9a4eeb4fc91c5b8da0de0396e5d68e0 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Wed, 6 Oct 2021 22:28:33 -0700 Subject: [PATCH 13/16] forgot to delete --- src/ast/seq_decl_plugin.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 96411f0bffe..ba46611441e 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1231,7 +1231,6 @@ std::ostream& seq_util::rex::pp::print_unit(std::ostream& out, expr* s) const { out << mk_pp(e, re.m); else if (is_app(e)) { out << "(" << to_app(e)->get_decl()->get_name().str(); - //for (unsigned i = 0; i < to_app(e)->get_num_args(); i++) { for (expr * arg : *to_app(e)) print(out << " ", arg); out << ")"; From 3d5b27efac8f24c3fb3a5427c2faa5f4fa42fde4 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Wed, 6 Oct 2021 22:37:11 -0700 Subject: [PATCH 14/16] deleting unused definition --- src/ast/seq_decl_plugin.h | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 1bfff85f668..ae20af87d24 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -593,22 +593,6 @@ class seq_util { return result; } - /* expr_ref mk_or_simplify(expr* a, expr* b) - { - expr_ref result(m); - if (m.is_true(a) || a == b) - result = a; - else if (m.is_true(b)) - result = b; - else if (m.is_false(a)) - result = b; - else if (m.is_false(b)) - result = a; - else - result = m.mk_or(a, b); - return result; - }*/ - class pp { seq_util::rex& re; expr* ex; From 90e8a3c3f980c09d3768d3fd3d572bd91084c934 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Fri, 8 Oct 2021 12:07:28 -0700 Subject: [PATCH 15/16] fixes Signed-off-by: Nikolaj Bjorner --- src/ast/rewriter/seq_rewriter.cpp | 114 ++++++++++++++---------------- src/ast/rewriter/seq_rewriter.h | 2 + src/ast/seq_decl_plugin.cpp | 16 ++--- src/ast/seq_decl_plugin.h | 2 +- 4 files changed, 65 insertions(+), 69 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 1cb6853241e..e7918a4183a 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -859,13 +859,12 @@ br_status seq_rewriter::mk_seq_length(expr* a, expr_ref& result) { // elif offset >= len(s) then 0 // elif offset + length > len(s) then len(s) - offset // else length - expr_ref zero(m_autil.mk_int(0), m()); result = length; result = m().mk_ite(m_autil.mk_gt(m_autil.mk_add(offset, length), len_s), m_autil.mk_sub(len_s, offset), result); - result = m().mk_ite(m().mk_or(m_autil.mk_le(len_s, offset), m_autil.mk_le(length, zero), m_autil.mk_lt(offset, zero)), - zero, + result = m().mk_ite(m().mk_or(m_autil.mk_le(len_s, offset), m_autil.mk_le(length, zero()), m_autil.mk_lt(offset, zero())), + zero(), result); return BR_REWRITE_FULL; } @@ -883,48 +882,56 @@ expr_ref seq_rewriter::mk_seq_first(expr* t) { if (str().is_extract(t, s, j, k)) result = str().mk_nth_i(s, j); else - result = str().mk_nth_i(t, m_autil.mk_int(0)); + result = str().mk_nth_i(t, zero()); return result; } +expr_ref seq_rewriter::mk_sub(expr* a, rational const& n) { + expr* a1, *a2; + SASSERT(n.is_int()); + rational k; + if (m_autil.is_sub(a, a1, a2) && m_autil.is_numeral(a2, k)) + return expr_ref(m_autil.mk_sub(a1, m_autil.mk_int(k + n)), m()); + if (m_autil.is_add(a, a1, a2) && m_autil.is_numeral(a2, k)) + return expr_ref(m_autil.mk_add(a1, m_autil.mk_int(k - n)), m()); + if (m_autil.is_add(a, a1, a2) && m_autil.is_numeral(a1, k)) + return expr_ref(m_autil.mk_add(a2, m_autil.mk_int(k - n)), m()); + return expr_ref(m_autil.mk_sub(a, m_autil.mk_int(n)), m()); +} + + /* * In general constructs substring(t,1,|t|-1) but if t = substring(s,j,k) then simplifies to substring(s,j+1,k-1) * This method assumes that |t| > 0. */ expr_ref seq_rewriter::mk_seq_rest(expr* t) { expr_ref result(m()); - expr* s, * j, * k, * s_, * l; + expr* s, * j, * k; rational jv; - if (str().is_extract(t, s, j, k) && m_autil.is_numeral(j, jv) && jv.is_nonneg()) { - unsigned i; - expr_ref k_min_1(str().is_len_sub(k, l, s_, i) && s == s_ ? - m_autil.mk_sub(l, m_autil.mk_int(i + 1)) : - m_autil.mk_sub(k, m_autil.mk_int(1)), m()); - result = str().mk_substr(s, m_autil.mk_int(jv.get_int32() + 1), k_min_1); - } - else - result = str().mk_substr(t, m_autil.mk_int(1), m_autil.mk_sub(str().mk_length(t), m_autil.mk_int(1))); + if (str().is_extract(t, s, j, k) && m_autil.is_numeral(j, jv) && jv >= 0) + result = str().mk_substr(s, m_autil.mk_int(jv + 1), mk_sub(k, 1)); + else + result = str().mk_substr(t, one(), mk_sub(str().mk_length(t), 1)); return result; } /* -* In general constructs nth(t,|t|-1) but if t = substring(s,j,k) then simplifies to nth(s,j+k-1) +* In general constructs nth(t,|t|-1) but if t = substring(s,j,|s|-j) j >= 0, then simplifies to nth(s,|s|-1) * This method assumes that |t| > 0. */ expr_ref seq_rewriter::mk_seq_last(expr* t) { expr_ref result(m()); - expr* s, * j, * k, * s_, * l; - rational jv; - if (str().is_extract(t, s, j, k) && m_autil.is_numeral(j, jv) && jv.is_nonneg()) { - unsigned i; - // k = |s_| - i, l = |s_| implies that lastpos = j + k - 1 = k + j - 1 = j + l - i - 1 = l + j - i - 1 - expr_ref lastpos(str().is_len_sub(k, l, s_, i) && s == s_ ? - m_autil.mk_add(l, m_autil.mk_int(jv.get_int32() - 1 - i)) : - m_autil.mk_add(k, m_autil.mk_int(jv.get_int32() - 1)), m()); + expr* s, * j, * k, * s_, * len_s; + rational jv, i; + if (str().is_extract(t, s, j, k) && + m_autil.is_numeral(j, jv) && jv >= 0 && + str().is_len_sub(k, len_s, s_, i) && + s == s_ && jv == i) { + expr_ref lastpos = mk_sub(len_s, 1); result = str().mk_nth_i(s, lastpos.get()); } else - result = str().mk_nth_i(t, m_autil.mk_sub(str().mk_length(t), m_autil.mk_int(1))); + result = str().mk_nth_i(t, m_autil.mk_sub(str().mk_length(t), one())); return result; } @@ -938,11 +945,11 @@ expr_ref seq_rewriter::mk_seq_butlast(expr* t) { if (str().is_extract(t, s, j, k)) { expr_ref_vector k_min_1(m()); k_min_1.push_back(k); - k_min_1.push_back(m_autil.mk_int(-1)); + k_min_1.push_back(minus_one()); result = str().mk_substr(s, j, m_autil.mk_add_simplify(k_min_1)); } else - result = str().mk_substr(t, m_autil.mk_int(0), m_autil.mk_sub(str().mk_length(t), m_autil.mk_int(1))); + result = str().mk_substr(t, zero(), m_autil.mk_sub(str().mk_length(t), one())); return result; } @@ -1692,7 +1699,7 @@ br_status seq_rewriter::mk_seq_index(expr* a, expr* b, expr* c, expr_ref& result return BR_DONE; } if (m_autil.is_numeral(c, r) && r.is_neg()) { - result = m_autil.mk_int(-1); + result = minus_one(); return BR_DONE; } @@ -1702,10 +1709,10 @@ br_status seq_rewriter::mk_seq_index(expr* a, expr* b, expr* c, expr_ref& result } if (str().is_empty(b)) { - result = m().mk_ite(m().mk_and(m_autil.mk_le(m_autil.mk_int(0), c), + result = m().mk_ite(m().mk_and(m_autil.mk_le(zero(), c), m_autil.mk_le(c, str().mk_length(a))), c, - m_autil.mk_int(-1)); + minus_one()); return BR_REWRITE2; } @@ -2321,7 +2328,7 @@ br_status seq_rewriter::mk_str_to_code(expr* a, expr_ref& result) { if (s.length() == 1) result = m_autil.mk_int(s[0]); else - result = m_autil.mk_int(-1); + result = minus_one(); return BR_DONE; } return BR_FAILED; @@ -2462,7 +2469,7 @@ br_status seq_rewriter::mk_str_stoi(expr* a, expr_ref& result) { result = m_autil.mk_int(ch - '0'); } else { - result = m_autil.mk_int(-1); + result = minus_one(); } return BR_DONE; } @@ -2470,7 +2477,7 @@ br_status seq_rewriter::mk_str_stoi(expr* a, expr_ref& result) { expr_ref_vector as(m()); str().get_concat_units(a, as); if (as.empty()) { - result = m_autil.mk_int(-1); + result = minus_one(); return BR_DONE; } if (str().is_unit(as.back())) { @@ -2480,11 +2487,11 @@ br_status seq_rewriter::mk_str_stoi(expr* a, expr_ref& result) { expr_ref tail(str().mk_stoi(as.back()), m()); expr_ref head(str().mk_concat(as.size() - 1, as.data(), a->get_sort()), m()); expr_ref stoi_head(str().mk_stoi(head), m()); - result = m().mk_ite(m_autil.mk_ge(stoi_head, m_autil.mk_int(0)), + result = m().mk_ite(m_autil.mk_ge(stoi_head, zero()), m_autil.mk_add(m_autil.mk_mul(m_autil.mk_int(10), stoi_head), tail), - m_autil.mk_int(-1)); + minus_one()); - result = m().mk_ite(m_autil.mk_ge(tail, m_autil.mk_int(0)), + result = m().mk_ite(m_autil.mk_ge(tail, zero()), result, tail); result = m().mk_ite(str().mk_is_empty(head), @@ -2495,7 +2502,7 @@ br_status seq_rewriter::mk_str_stoi(expr* a, expr_ref& result) { if (str().is_unit(as.get(0), u) && m_util.is_const_char(u, ch) && '0' == ch) { result = str().mk_concat(as.size() - 1, as.data() + 1, as[0]->get_sort()); result = m().mk_ite(str().mk_is_empty(result), - m_autil.mk_int(0), + zero(), str().mk_stoi(result)); return BR_REWRITE_FULL; } @@ -3001,23 +3008,12 @@ expr_ref seq_rewriter::mk_derivative(expr* r) { } expr_ref seq_rewriter::mk_derivative(expr* ele, expr* r) { - /*STRACE("seq_verbose", tout << "derivative: " << mk_pp(ele, m()) - << "," << mk_pp(r, m()) << std::endl;); - expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, ele, r, nullptr), m()); - if (!result) { - result = mk_derivative_rec(ele, r); - m_op_cache.insert(OP_RE_DERIVATIVE, ele, r, nullptr, result); - } - STRACE("seq_verbose", tout << "derivative result: " - << re().to_str(result) << std::endl;); - CASSERT("seq_regex", check_deriv_normal_form(r)); - return result;*/ return mk_antimirov_deriv(ele, r, m().mk_true()); } expr_ref seq_rewriter::mk_antimirov_deriv(expr* e, expr* r, expr* path) { - // Take reference count of path and r - expr_ref _path(path, m()), _r(r, m()); + // Ensure references are owned + expr_ref _e(e, m()), _path(path, m()), _r(r, m()); expr_ref result(m_op_cache.find(OP_RE_DERIVATIVE, e, r, path), m()); if (!result) { mk_antimirov_deriv_rec(e, r, path, result); @@ -3062,8 +3058,7 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref else result = re().mk_ite_simplify(m().mk_eq(e, h), re().mk_to_re(t), nothing()); } - else - { + else { // observe that the precondition |r1|>0 is is implied by c1 for use of mk_seq_first m_br.mk_and(m().mk_not(m().mk_eq(r1, str().mk_empty(seq_sort))), m().mk_eq(mk_seq_first(r1), e), c1); m_br.mk_and(path, c1, c2); @@ -3255,8 +3250,7 @@ expr_ref seq_rewriter::mk_antimirov_deriv_negate(expr* d) { return result; } -expr_ref seq_rewriter::mk_antimirov_deriv_union(expr* d1, expr* d2) -{ +expr_ref seq_rewriter::mk_antimirov_deriv_union(expr* d1, expr* d2) { expr_ref result(m()); if (re().is_empty(d1) || re().is_full_seq(d2)) result = d2; @@ -3893,7 +3887,7 @@ expr_ref seq_rewriter::mk_derivative_rec(expr* ele, expr* r) { // construct the term (if (r2 != () and (ele = (last r2)) then reverse(to_re (butlast r2)) else [])) // hd = first of reverse(r2) i.e. last of r2 // tl = rest of reverse(r2) i.e. butlast of r2 - //hd = str().mk_nth_i(r2, m_autil.mk_sub(str().mk_length(r2), m_autil.mk_int(1))); + //hd = str().mk_nth_i(r2, m_autil.mk_sub(str().mk_length(r2), one())); hd = mk_seq_last(r2); m_br.mk_and(m().mk_not(m().mk_eq(r2, str().mk_empty(seq_sort))), m().mk_eq(hd, ele), result); tl = re().mk_to_re(mk_seq_butlast(r2)); @@ -4144,7 +4138,7 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { (re().is_union(b, b1, eps) && re().is_epsilon(eps)) || (re().is_union(b, eps, b1) && re().is_epsilon(eps))) { - result = m().mk_ite(m().mk_eq(str().mk_length(a), m_autil.mk_int(0)), + result = m().mk_ite(m().mk_eq(str().mk_length(a), zero()), m().mk_true(), re().mk_in_re(a, b1)); return BR_REWRITE_FULL; @@ -4177,7 +4171,7 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { expr_ref len_a(str().mk_length(a), m()); expr_ref len_tl(m_autil.mk_sub(len_a, len_hd), m()); result = m().mk_and(m_autil.mk_ge(len_a, len_hd), - re().mk_in_re(str().mk_substr(a, m_autil.mk_int(0), len_hd), hd), + re().mk_in_re(str().mk_substr(a, zero(), len_hd), hd), re().mk_in_re(str().mk_substr(a, len_hd, len_tl), tl)); return BR_REWRITE_FULL; } @@ -4188,7 +4182,7 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) { expr_ref len_hd(m_autil.mk_sub(len_a, len_tl), m()); expr* s = nullptr; result = m().mk_and(m_autil.mk_ge(len_a, len_tl), - re().mk_in_re(str().mk_substr(a, m_autil.mk_int(0), len_hd), hd), + re().mk_in_re(str().mk_substr(a, zero(), len_hd), hd), (re().is_to_re(tl, s) ? m().mk_eq(s, str().mk_substr(a, len_hd, len_tl)) : re().mk_in_re(str().mk_substr(a, len_hd, len_tl), tl))); return BR_REWRITE_FULL; @@ -5541,15 +5535,15 @@ bool seq_rewriter::reduce_eq_empty(expr* l, expr* r, expr_ref& result) { if (str().is_extract(r, s, offset, len)) { expr_ref len_s(str().mk_length(s), m()); expr_ref_vector fmls(m()); - fmls.push_back(m_autil.mk_lt(offset, m_autil.mk_int(0))); + fmls.push_back(m_autil.mk_lt(offset, zero())); fmls.push_back(m().mk_eq(s, l)); - fmls.push_back(m_autil.mk_le(len, m_autil.mk_int(0))); + fmls.push_back(m_autil.mk_le(len, zero())); fmls.push_back(m_autil.mk_le(len_s, offset)); result = m().mk_or(fmls); return true; } if (str().is_itos(r, s)) { - result = m_autil.mk_lt(s, m_autil.mk_int(0)); + result = m_autil.mk_lt(s, zero()); return true; } return false; diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 59aac3ffda8..20978c2796f 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -302,6 +302,8 @@ class seq_rewriter { expr_ref zero() { return expr_ref(m_autil.mk_int(0), m()); } expr_ref one() { return expr_ref(m_autil.mk_int(1), m()); } expr_ref minus_one() { return expr_ref(m_autil.mk_int(-1), m()); } + expr_ref mk_sub(expr* a, rational const& n); + expr_ref mk_sub(expr* a, unsigned n) { return mk_sub(a, rational(n)); } bool is_suffix(expr* s, expr* offset, expr* len); bool is_prefix(expr* s, expr* offset, expr* len); diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index ba46611441e..36ac5561ff8 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -858,24 +858,24 @@ void seq_util::str::get_concat(expr* e, expr_ref_vector& es) const { Returns true if s is an expression of the form (l = |u|) |u|-k or (-k)+|u| or |u|+(-k). Also returns true and assigns k=0 and l=s if s is |u|. */ -bool seq_util::str::is_len_sub(expr const* s, expr*& l, expr*& u, unsigned& k) const -{ +bool seq_util::str::is_len_sub(expr const* s, expr*& l, expr*& u, rational& k) const { expr* x; rational v; + arith_util a(m); if (is_length(s, l)) { k = 0; return true; } - else if (arith_util(m).is_sub(s, l, x) && is_length(l, u) && arith_util(m).is_numeral(x, v) && v.is_nonneg()) { - k = v.get_unsigned(); + else if (a.is_sub(s, l, x) && is_length(l, u) && a.is_numeral(x, v) && v.is_nonneg()) { + k = v; return true; } - else if (arith_util(m).is_add(s, l, x) && is_length(l, u) && arith_util(m).is_numeral(x, v) && v.is_nonpos()) { - k = (0 - v.get_int32()); + else if (a.is_add(s, l, x) && is_length(l, u) && a.is_numeral(x, v) && v.is_nonpos()) { + k = - v; return true; } - else if (arith_util(m).is_add(s, x, l) && is_length(l, u) && arith_util(m).is_numeral(x, v) && v.is_nonpos()) { - k = (0 - v.get_int32()); + else if (a.is_add(s, x, l) && is_length(l, u) && a.is_numeral(x, v) && v.is_nonpos()) { + k = - v; return true; } else diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index ae20af87d24..0bdf8a68b8e 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -350,7 +350,7 @@ class seq_util { bool is_from_code(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_FROM_CODE); } bool is_to_code(expr const* n) const { return is_app_of(n, m_fid, OP_STRING_TO_CODE); } - bool is_len_sub(expr const* n, expr*& l, expr*& u, unsigned& k) const; + bool is_len_sub(expr const* n, expr*& l, expr*& u, rational& k) const; /* tests if s is a single character string(c) or a unit (c) From 691fdf0a85f8b21f83ee35a9e87beb01abc1a11c Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Fri, 8 Oct 2021 12:38:26 -0700 Subject: [PATCH 16/16] fixes Signed-off-by: Nikolaj Bjorner --- src/ast/rewriter/seq_axioms.cpp | 2 +- src/ast/rewriter/seq_rewriter.cpp | 4 ++-- src/ast/seq_decl_plugin.cpp | 2 +- src/ast/seq_decl_plugin.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ast/rewriter/seq_axioms.cpp b/src/ast/rewriter/seq_axioms.cpp index bcda71e5448..14e6290c6d6 100644 --- a/src/ast/rewriter/seq_axioms.cpp +++ b/src/ast/rewriter/seq_axioms.cpp @@ -850,7 +850,7 @@ namespace seq { add_clause(~eq, ge10k); for (unsigned i = 0; i < k; ++i) { - expr* ch = seq.str.mk_nth_i(ubvs, i); + expr* ch = seq.str.mk_nth_c(ubvs, i); is_digit = seq.mk_char_is_digit(ch); add_clause(~ge_len, is_digit); } diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index e7918a4183a..a956718dc27 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -882,7 +882,7 @@ expr_ref seq_rewriter::mk_seq_first(expr* t) { if (str().is_extract(t, s, j, k)) result = str().mk_nth_i(s, j); else - result = str().mk_nth_i(t, zero()); + result = str().mk_nth_c(t, 0); return result; } @@ -928,7 +928,7 @@ expr_ref seq_rewriter::mk_seq_last(expr* t) { str().is_len_sub(k, len_s, s_, i) && s == s_ && jv == i) { expr_ref lastpos = mk_sub(len_s, 1); - result = str().mk_nth_i(s, lastpos.get()); + result = str().mk_nth_i(s, lastpos); } else result = str().mk_nth_i(t, m_autil.mk_sub(str().mk_length(t), one())); diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 36ac5561ff8..cc78da8f668 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -839,7 +839,7 @@ bool seq_util::str::is_nth_i(expr const* n, expr*& s, unsigned& idx) const { return arith_util(m).is_unsigned(i, idx); } -app* seq_util::str::mk_nth_i(expr* s, unsigned i) const { +app* seq_util::str::mk_nth_c(expr* s, unsigned i) const { return mk_nth_i(s, arith_util(m).mk_int(i)); } diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 0bdf8a68b8e..6bffbf62e09 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -286,7 +286,7 @@ class seq_util { app* mk_at(expr* s, expr* i) const { expr* es[2] = { s, i }; return m.mk_app(m_fid, OP_SEQ_AT, 2, es); } app* mk_nth(expr* s, expr* i) const { expr* es[2] = { s, i }; return m.mk_app(m_fid, OP_SEQ_NTH, 2, es); } app* mk_nth_i(expr* s, expr* i) const { expr* es[2] = { s, i }; return m.mk_app(m_fid, OP_SEQ_NTH_I, 2, es); } - app* mk_nth_i(expr* s, unsigned i) const; + app* mk_nth_c(expr* s, unsigned i) const; app* mk_substr(expr* a, expr* b, expr* c) const { expr* es[3] = { a, b, c }; return m.mk_app(m_fid, OP_SEQ_EXTRACT, 3, es); } app* mk_contains(expr* a, expr* b) const { expr* es[2] = { a, b }; return m.mk_app(m_fid, OP_SEQ_CONTAINS, 2, es); }