From 2be93870c86761dd5bd3df93a8b92f1ddf94bf24 Mon Sep 17 00:00:00 2001 From: Margus Veanes Date: Wed, 15 Dec 2021 10:59:34 -0800 Subject: [PATCH] Cleanup regex info and some fixes in Derivative code (#5709) * removed unused regex info fields * cleanup of info and fixes in antimirov derivatives * removed extra qualification on operator --- src/ast/rewriter/seq_rewriter.cpp | 128 ++++++++++++++++++++---------- src/ast/rewriter/seq_rewriter.h | 7 +- src/ast/seq_decl_plugin.cpp | 105 ++++++++---------------- src/ast/seq_decl_plugin.h | 35 ++------ 4 files changed, 132 insertions(+), 143 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index cb7b778abde..d399df1e607 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -3115,8 +3115,8 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref result = mk_antimirov_deriv_union(c1, re().mk_ite_simplify(r1nullable, mk_antimirov_deriv(e, r2, path), nothing())); } else if (m().is_ite(r, c, r1, r2)) { - c1 = simplify_path(m().mk_and(c, path)); - c2 = simplify_path(m().mk_and(m().mk_not(c), path)); + c1 = simplify_path(e, m().mk_and(c, path)); + c2 = simplify_path(e, m().mk_and(m().mk_not(c), path)); if (m().is_false(c1)) result = mk_antimirov_deriv(e, r2, c2); else if (m().is_false(c2)) @@ -3131,8 +3131,8 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref SASSERT(u().is_char(c1)); SASSERT(u().is_char(c2)); // case: c1 <= e <= c2 - range = simplify_path(m().mk_and(u().mk_le(c1, e), u().mk_le(e, c2))); - psi = simplify_path(m().mk_and(path, range)); + range = simplify_path(e, m().mk_and(u().mk_le(c1, e), u().mk_le(e, c2))); + psi = simplify_path(e, m().mk_and(path, range)); } else if (!str().is_string(r1) && str().is_unit_string(r2, c2)) { SASSERT(u().is_char(c2)); @@ -3141,8 +3141,8 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref expr_ref zero(m_autil.mk_int(0), m()); expr_ref r1_length_eq_one(m().mk_eq(str().mk_length(r1), one), m()); expr_ref r1_0(str().mk_nth_i(r1, zero), m()); - range = simplify_path(m().mk_and(r1_length_eq_one, m().mk_and(u().mk_le(r1_0, e), u().mk_le(e, c2)))); - psi = simplify_path(m().mk_and(path, range)); + range = simplify_path(e, m().mk_and(r1_length_eq_one, m().mk_and(u().mk_le(r1_0, e), u().mk_le(e, c2)))); + psi = simplify_path(e, m().mk_and(path, range)); } else if (!str().is_string(r2) && str().is_unit_string(r1, c1)) { SASSERT(u().is_char(c1)); @@ -3151,8 +3151,8 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref expr_ref zero(m_autil.mk_int(0), m()); expr_ref r2_length_eq_one(m().mk_eq(str().mk_length(r2), one), m()); expr_ref r2_0(str().mk_nth_i(r2, zero), m()); - range = simplify_path(m().mk_and(r2_length_eq_one, m().mk_and(u().mk_le(c1, e), u().mk_le(e, r2_0)))); - psi = simplify_path(m().mk_and(path, range)); + range = simplify_path(e, m().mk_and(r2_length_eq_one, m().mk_and(u().mk_le(c1, e), u().mk_le(e, r2_0)))); + psi = simplify_path(e, m().mk_and(path, range)); } else if (!str().is_string(r1) && !str().is_string(r2)) { // both r1 and r2 nonground: |r1|=1 & |r2|=1 & r1[0] <= e <= r2[0] @@ -3162,8 +3162,8 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref expr_ref r1_0(str().mk_nth_i(r1, zero), m()); expr_ref r2_length_eq_one(m().mk_eq(str().mk_length(r2), one), m()); expr_ref r2_0(str().mk_nth_i(r2, zero), m()); - range = simplify_path(m().mk_and(r1_length_eq_one, m().mk_and(r2_length_eq_one, m().mk_and(u().mk_le(r1_0, e), u().mk_le(e, r2_0))))); - psi = simplify_path(m().mk_and(path, range)); + range = simplify_path(e, m().mk_and(r1_length_eq_one, m().mk_and(r2_length_eq_one, m().mk_and(u().mk_le(r1_0, e), u().mk_le(e, r2_0))))); + psi = simplify_path(e, m().mk_and(path, range)); } if (m().is_false(psi)) result = nothing(); @@ -3173,7 +3173,7 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref else if (re().is_union(r, r1, r2)) result = mk_antimirov_deriv_union(mk_antimirov_deriv(e, r1, path), mk_antimirov_deriv(e, r2, path)); else if (re().is_intersection(r, r1, r2)) - result = mk_antimirov_deriv_intersection( + result = mk_antimirov_deriv_intersection(e, mk_antimirov_deriv(e, r1, path), mk_antimirov_deriv(e, r2, path), m().mk_true()); else if (re().is_star(r, r1) || re().is_plus(r, r1) || (re().is_loop(r, r1, lo) && 0 <= lo && lo <= 1)) @@ -3190,11 +3190,11 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref result = mk_antimirov_deriv(e, r1, path); else if (re().is_complement(r, r1)) // D(e,~r1) = ~D(e,r1) - result = mk_antimirov_deriv_negate(mk_antimirov_deriv(e, r1, path)); + result = mk_antimirov_deriv_negate(e, mk_antimirov_deriv(e, r1, path)); else if (re().is_diff(r, r1, r2)) - result = mk_antimirov_deriv_intersection( + result = mk_antimirov_deriv_intersection(e, mk_antimirov_deriv(e, r1, path), - mk_antimirov_deriv_negate(mk_antimirov_deriv(e, r2, path)), m().mk_true()); + mk_antimirov_deriv_negate(e, mk_antimirov_deriv(e, r2, path)), m().mk_true()); else if (re().is_of_pred(r, r1)) { array_util array(m()); expr* args[2] = { r1, e }; @@ -3207,36 +3207,47 @@ void seq_rewriter::mk_antimirov_deriv_rec(expr* e, expr* r, expr* path, expr_ref result = re().mk_derivative(e, r); } -expr_ref seq_rewriter::mk_antimirov_deriv_intersection(expr* d1, expr* d2, expr* path) { +expr_ref seq_rewriter::mk_antimirov_deriv_intersection(expr* e, expr* d1, expr* d2, expr* path) { sort* seq_sort = nullptr, * ele_sort = nullptr; VERIFY(m_util.is_re(d1, seq_sort)); VERIFY(m_util.is_seq(seq_sort, ele_sort)); expr_ref result(m()); expr* c, * a, * b; - if (d1 == d2 || re().is_full_seq(d2) || re().is_empty(d1)) + //if (d1 == d2 || re().is_full_seq(d2) || re().is_empty(d1)) + // result = d1; + //else if (re().is_full_seq(d1) || re().is_empty(d2)) + // result = d2; + //else + if (re().is_empty(d1)) result = d1; - else if (re().is_full_seq(d1) || re().is_empty(d2)) + else if (re().is_empty(d2)) result = d2; else if (m().is_ite(d1, c, a, b)) { - expr_ref path_and_c(simplify_path(m().mk_and(path, c)), m()); - expr_ref path_and_notc(simplify_path(m().mk_and(path, m().mk_not(c))), m()); + expr_ref path_and_c(simplify_path(e, m().mk_and(path, c)), m()); + expr_ref path_and_notc(simplify_path(e, m().mk_and(path, m().mk_not(c))), m()); if (m().is_false(path_and_c)) - result = mk_antimirov_deriv_intersection(b, d2, path); + result = mk_antimirov_deriv_intersection(e, b, d2, path); else if (m().is_false(path_and_notc)) - result = mk_antimirov_deriv_intersection(a, d2, path); + result = mk_antimirov_deriv_intersection(e, a, d2, path); else - result = m().mk_ite(c, mk_antimirov_deriv_intersection(a, d2, path_and_c), - mk_antimirov_deriv_intersection(b, d2, path_and_notc)); + result = m().mk_ite(c, mk_antimirov_deriv_intersection(e, a, d2, path_and_c), + mk_antimirov_deriv_intersection(e, b, d2, path_and_notc)); } else if (m().is_ite(d2)) // swap d1 and d2 - result = mk_antimirov_deriv_intersection(d2, d1, path); + result = mk_antimirov_deriv_intersection(e, d2, d1, path); + else if (d1 == d2 || re().is_full_seq(d2)) + result = mk_antimirov_deriv_restrict(e, d1, path); + else if (re().is_full_seq(d1)) + result = mk_antimirov_deriv_restrict(e, d2, path); else if (re().is_union(d1, a, b)) // distribute intersection over the union in d1 - result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(a, d2, path), mk_antimirov_deriv_intersection(b, d2, path)); + result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(e, a, d2, path), + mk_antimirov_deriv_intersection(e, b, d2, path)); else if (re().is_union(d2, a, b)) // distribute intersection over the union in d2 - result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(d1, a, path), mk_antimirov_deriv_intersection(d1, b, path)); + result = mk_antimirov_deriv_union(mk_antimirov_deriv_intersection(e, d1, a, path), + mk_antimirov_deriv_intersection(e, d1, b, path)); else // in all other cases create the intersection regex // TODO: flatten, order and merge d1 and d2 to maintain equality under similarity @@ -3258,7 +3269,7 @@ expr_ref seq_rewriter::mk_antimirov_deriv_concat(expr* d, expr* r) { return result; } -expr_ref seq_rewriter::mk_antimirov_deriv_negate(expr* d) { +expr_ref seq_rewriter::mk_antimirov_deriv_negate(expr* elem, expr* d) { sort* seq_sort = nullptr; VERIFY(m_util.is_re(d, seq_sort)); auto nothing = [&]() { return expr_ref(re().mk_empty(d->get_sort()), m()); }; @@ -3276,11 +3287,12 @@ expr_ref seq_rewriter::mk_antimirov_deriv_negate(expr* d) { else if (re().is_dot_plus(d)) result = epsilon(); else if (m().is_ite(d, c, t, e)) - result = m().mk_ite(c, mk_antimirov_deriv_negate(t), mk_antimirov_deriv_negate(e)); + result = m().mk_ite(c, mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e)); else if (re().is_union(d, t, e)) - result = re().mk_inter(mk_antimirov_deriv_negate(t), mk_antimirov_deriv_negate(e)); + //result = re().mk_inter(mk_antimirov_deriv_negate(t), mk_antimirov_deriv_negate(e)); + result = mk_antimirov_deriv_intersection(elem, mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e), m().mk_true()); else if (re().is_intersection(d, t, e)) - result = re().mk_union(mk_antimirov_deriv_negate(t), mk_antimirov_deriv_negate(e)); + result = re().mk_union(mk_antimirov_deriv_negate(elem, t), mk_antimirov_deriv_negate(elem, e)); else if (re().is_complement(d, t)) result = t; else @@ -3304,6 +3316,45 @@ expr_ref seq_rewriter::mk_antimirov_deriv_union(expr* d1, expr* d2) { return result; } +// restrict the guards of all conditionals id d and simplify the resulting derivative +// restrict(if(c, a, b), cond) = if(c, restrict(a, cond & c), restrict(b, cond & ~c)) +// restrict(a U b, cond) = restrict(a, cond) U restrict(b, cond) +// where {} U X = X, X U X = X +// restrict(R, cond) = R +// +// restrict(d, false) = [] +// +// it is already assumed that the restriction takes place witin a branch +// so the condition is not added explicitly but propagated down in order to eliminate +// infeasible cases +expr_ref seq_rewriter::mk_antimirov_deriv_restrict(expr* e, expr* d, expr* cond) { + expr_ref result(d, m()); + expr_ref _cond(cond, m()); + expr* c, * a, * b; + if (m().is_false(cond)) + result = re().mk_empty(d->get_sort()); + else if (re().is_empty(d) || m().is_true(cond)) + result = d; + else if (m().is_ite(d, c, a, b)) { + expr_ref path_and_c(simplify_path(e, m().mk_and(cond, c)), m()); + expr_ref path_and_notc(simplify_path(e, m().mk_and(cond, m().mk_not(c))), m()); + result = re().mk_ite_simplify(c, mk_antimirov_deriv_restrict(e, a, path_and_c), + mk_antimirov_deriv_restrict(e, b, path_and_notc)); + } + else if (re().is_union(d, a, b)) { + expr_ref a1(mk_antimirov_deriv_restrict(e, a, cond), m()); + expr_ref b1(mk_antimirov_deriv_restrict(e, b, cond), m()); + if (a1 == b1 || re().is_empty(b1) || re().is_full_seq(a1)) + result = a1; + else if (re().is_empty(a1) || re().is_full_seq(b1)) + result = b1; + else + //TODO: merge + result = (a1->get_id() <= b1->get_id() ? re().mk_union(a1, b1) : re().mk_union(b1, a1)); + } + return result; +} + expr_ref seq_rewriter::mk_regex_reverse(expr* r) { expr* r1 = nullptr, * r2 = nullptr, * c = nullptr; unsigned lo = 0, hi = 0; @@ -3390,21 +3441,18 @@ expr_ref seq_rewriter::mk_in_antimirov_rec(expr* s, expr* d) { } /* -path is typically a conjunction of (negated) character equations or constraints that can potentially be simplified -the first element of each equation is assumed to be the element parameter, for example x = (:var 0), -for example a constraint x='a' & x='b' is simplified to false +* calls elim_condition */ -expr_ref seq_rewriter::simplify_path(expr* path) { - //TODO: more systematic simplifications +expr_ref seq_rewriter::simplify_path(expr* elem, expr* path) { expr_ref result(path, m()); - expr* h = nullptr, * t = nullptr, * lhs = nullptr, * rhs = nullptr, * h1 = nullptr; + expr* h = nullptr, * t = nullptr; if (m().is_and(path, h, t)) { if (m().is_true(h)) - result = simplify_path(t); + result = simplify_path(elem, t); else if (m().is_true(t)) - result = simplify_path(h); - else if (m().is_eq(h, lhs, rhs) || (m().is_not(h, h1) && m().is_eq(h1, lhs, rhs))) - elim_condition(lhs, result); + result = simplify_path(elem, h); + else + elim_condition(elem, result); } return result; } diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 5cba15fc856..9687797a0f6 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -214,14 +214,15 @@ class seq_rewriter { expr_ref mk_in_antimirov_rec(expr* s, expr* d); expr_ref mk_in_antimirov(expr* s, expr* d); - expr_ref mk_antimirov_deriv_intersection(expr* d1, expr* d2, expr* path); + expr_ref mk_antimirov_deriv_intersection(expr* elem, expr* d1, expr* d2, expr* path); expr_ref mk_antimirov_deriv_concat(expr* d, expr* r); - expr_ref mk_antimirov_deriv_negate(expr* d); + expr_ref mk_antimirov_deriv_negate(expr* elem, expr* d); expr_ref mk_antimirov_deriv_union(expr* d1, expr* d2); + expr_ref mk_antimirov_deriv_restrict(expr* elem, expr* d1, expr* cond); expr_ref mk_regex_reverse(expr* r); expr_ref mk_regex_concat(expr* r1, expr* r2); - expr_ref simplify_path(expr* path); + expr_ref simplify_path(expr* elem, expr* path); bool lt_char(expr* ch1, expr* ch2); bool eq_char(expr* ch1, expr* ch2); diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp index 1e87e1402e7..2044590aa12 100644 --- a/src/ast/seq_decl_plugin.cpp +++ b/src/ast/seq_decl_plugin.cpp @@ -1503,9 +1503,9 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const { if (e->get_family_id() == u.get_family_id()) { switch (e->get_decl()->get_decl_kind()) { case OP_RE_EMPTY_SET: - return info(true, true, true, true, true, true, false, l_false, UINT_MAX, 0); + return info(true, l_false, UINT_MAX); case OP_RE_FULL_SEQ_SET: - return info(true, true, true, true, true, true, false, l_true, 0, 1); + return info(true, l_true, 0); case OP_RE_STAR: i1 = get_info_rec(e->get_arg(0)); return i1.star(); @@ -1517,7 +1517,7 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const { case OP_RE_OF_PRED: //TBD: check if the character predicate contains uninterpreted symbols or is nonground or is unsat //TBD: check if the range is unsat - return info(true, true, true, true, true, true, true, l_false, 1, 0); + return info(true, l_false, 1); case OP_RE_CONCAT: i1 = get_info_rec(e->get_arg(0)); i2 = get_info_rec(e->get_arg(1)); @@ -1534,7 +1534,7 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const { min_length = u.str.min_length(e->get_arg(0)); is_value = m.is_value(e->get_arg(0)); nullable = (is_value && min_length == 0 ? l_true : (min_length > 0 ? l_false : l_undef)); - return info(true, true, is_value, true, true, true, (min_length == 1 && u.str.max_length(e->get_arg(0)) == 1), nullable, min_length, 0); + return info(is_value, nullable, min_length); case OP_RE_REVERSE: return get_info_rec(e->get_arg(0)); case OP_RE_PLUS: @@ -1570,14 +1570,7 @@ std::ostream& seq_util::rex::info::display(std::ostream& out) const { if (is_known()) { out << "info(" << "nullable=" << (nullable == l_true ? "T" : (nullable == l_false ? "F" : "U")) << ", " - << "classical=" << (classical ? "T" : "F") << ", " - << "standard=" << (standard ? "T" : "F") << ", " - << "nonbranching=" << (nonbranching ? "T" : "F") << ", " - << "normalized=" << (normalized ? "T" : "F") << ", " - << "monadic=" << (monadic ? "T" : "F") << ", " - << "singleton=" << (singleton ? "T" : "F") << ", " - << "min_length=" << min_length << ", " - << "star_height=" << star_height << ")"; + << "min_length=" << min_length << ")"; } else if (is_valid()) out << "UNKNOWN"; @@ -1597,13 +1590,13 @@ std::string seq_util::rex::info::str() const { seq_util::rex::info seq_util::rex::info::star() const { //if is_known() is false then all mentioned properties will remain false - return seq_util::rex::info(classical, classical, interpreted, nonbranching, normalized, monadic, false, l_true, 0, star_height + 1); + return seq_util::rex::info(interpreted, l_true, 0); } seq_util::rex::info seq_util::rex::info::plus() const { if (is_known()) { //plus never occurs in a normalized regex - return info(classical, classical, interpreted, nonbranching, false, monadic, false, nullable, min_length, star_height + 1); + return info(interpreted, nullable, min_length); } else return *this; @@ -1612,14 +1605,14 @@ seq_util::rex::info seq_util::rex::info::plus() const { seq_util::rex::info seq_util::rex::info::opt() const { // if is_known() is false then all mentioned properties will remain false // optional construct never occurs in a normalized regex - return seq_util::rex::info(classical, classical, interpreted, nonbranching, false, monadic, false, l_true, 0, star_height); + return seq_util::rex::info(interpreted, l_true, 0); } seq_util::rex::info seq_util::rex::info::complement() const { if (is_known()) { lbool compl_nullable = (nullable == l_true ? l_false : (nullable == l_false ? l_true : l_undef)); unsigned compl_min_length = (compl_nullable == l_false ? 1 : 0); - return info(false, standard, interpreted, nonbranching, normalized, monadic, false, compl_nullable, compl_min_length, star_height); + return info(interpreted, compl_nullable, compl_min_length); } else return *this; @@ -1631,16 +1624,9 @@ seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info const& rhs, unsigned m = min_length + rhs.min_length; if (m < min_length || m < rhs.min_length) m = UINT_MAX; - return info(classical & rhs.classical, - classical && rhs.classical, //both args of concat must be classical for it to be standard - interpreted && rhs.interpreted, - nonbranching && rhs.nonbranching, - (normalized && !lhs_is_concat && rhs.normalized), - monadic && rhs.monadic, - false, + return info(interpreted && rhs.interpreted, ((nullable == l_false || rhs.nullable == l_false) ? l_false : ((nullable == l_true && rhs.nullable == l_true) ? l_true : l_undef)), - m, - std::max(star_height, rhs.star_height)); + m); } else return rhs; @@ -1652,16 +1638,9 @@ seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info const& rhs, seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info const& rhs) const { if (is_known() || rhs.is_known()) { //works correctly if one of the arguments is unknown - return info(classical & rhs.classical, - standard && rhs.standard, - interpreted && rhs.interpreted, - nonbranching && rhs.nonbranching, - normalized && rhs.normalized, - monadic && rhs.monadic, - singleton && rhs.singleton, + return info(interpreted && rhs.interpreted, ((nullable == l_true || rhs.nullable == l_true) ? l_true : ((nullable == l_false && rhs.nullable == l_false) ? l_false : l_undef)), - std::min(min_length, rhs.min_length), - std::max(star_height, rhs.star_height)); + std::min(min_length, rhs.min_length)); } else return rhs; @@ -1670,16 +1649,9 @@ seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info const& rhs) co seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info const& rhs) const { if (is_known()) { if (rhs.is_known()) { - return info(false, - standard && rhs.standard, - interpreted && rhs.interpreted, - nonbranching && rhs.nonbranching, - normalized && rhs.normalized, - monadic && rhs.monadic, - singleton && rhs.singleton, + return info(interpreted && rhs.interpreted, ((nullable == l_true && rhs.nullable == l_true) ? l_true : ((nullable == l_false || rhs.nullable == l_false) ? l_false : l_undef)), - std::max(min_length, rhs.min_length), - std::max(star_height, rhs.star_height)); + std::max(min_length, rhs.min_length)); } else return rhs; @@ -1691,16 +1663,9 @@ seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info const& rhs) co seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info const& rhs) const { if (is_known()) { if (rhs.is_known()) { - return info(false, - standard & rhs.standard, - interpreted & rhs.interpreted, - nonbranching & rhs.nonbranching, - normalized & rhs.normalized, - monadic & rhs.monadic, - false, + return info(interpreted & rhs.interpreted, ((nullable == l_true && rhs.nullable == l_false) ? l_true : ((nullable == l_false || rhs.nullable == l_false) ? l_false : l_undef)), - std::max(min_length, rhs.min_length), - std::max(star_height, rhs.star_height)); + std::max(min_length, rhs.min_length)); } else return rhs; @@ -1715,13 +1680,9 @@ seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info const& i) co // unsigned ite_min_length = std::min(min_length, i.min_length); // lbool ite_nullable = (nullable == i.nullable ? nullable : l_undef); // TBD: whether ite is interpreted or not depends on whether the condition is interpreted and both branches are interpreted - return info(false, false, false, false, - normalized && i.normalized, - monadic && i.monadic, - singleton && i.singleton, + return info(false, ((nullable == l_true && i.nullable == l_true) ? l_true : ((nullable == l_false && i.nullable == l_false) ? l_false : l_undef)), - std::min(min_length, i.min_length), - std::max(star_height, i.star_height)); + std::min(min_length, i.min_length)); } else return i; @@ -1737,24 +1698,22 @@ seq_util::rex::info seq_util::rex::info::loop(unsigned lower, unsigned upper) co if (m > 0 && (m < min_length || m < lower)) m = UINT_MAX; lbool loop_nullable = (nullable == l_true || lower == 0 ? l_true : nullable); - if (upper == UINT_MAX) { - // this means the loop is r{lower,*} and is therefore not normalized - // normalized regex would be r{lower,lower}r* and would in particular not use r{0,} for r* - return info(classical, classical, interpreted, nonbranching, false, singleton, false, loop_nullable, m, star_height + 1); - } - else { - bool loop_normalized = normalized; - // r{lower,upper} is not normalized if r is nullable but lower > 0 - // r{0,1} is not normalized: it should be ()|r - // r{1,1} is not normalized: it should be r - // r{lower,upper} is not normalized if lower > upper it should then be [] (empty) - if ((nullable == l_true && lower > 0) || upper == 1 || lower > upper) - loop_normalized = false; - return info(classical, classical, interpreted, nonbranching, loop_normalized, singleton, false, loop_nullable, m, star_height); - } + return info(interpreted, loop_nullable, m); } else return *this; } +seq_util::rex::info& seq_util::rex::info::operator=(info const& other) { + if (this == &other) { + return *this; + } + + known = other.known; + interpreted = other.interpreted; + nullable = other.nullable; + min_length = other.min_length; + return *this; +} + diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h index 1db05b7221f..d6a1a0f2966 100644 --- a/src/ast/seq_decl_plugin.h +++ b/src/ast/seq_decl_plugin.h @@ -411,26 +411,11 @@ class seq_util { struct info { /* Value is either undefined (known=l_undef) or defined and known (l_true) or defined but unknown (l_false)*/ lbool known { l_undef }; - /* No complement, no intersection, no difference, and no if-then-else is used. Reverse is allowed. */ - bool classical { false }; - /* Boolean-reverse combination of classical regexes (using reverse, union, complement, intersection or difference). */ - bool standard { false }; - /* There are no uninterpreted symbols. */ bool interpreted { false }; - /* No if-then-else is used. */ - bool nonbranching { false }; - /* Concatenations are right associative and if a loop body is nullable then the lower bound is zero. */ - bool normalized { false }; - /* All bounded loops have a body that is a singleton. */ - bool monadic { false }; - /* Positive Boolean combination of ranges or predicates or singleton sequences. */ - bool singleton { false }; /* If l_true then empty word is accepted, if l_false then empty word is not accepted. */ lbool nullable { l_undef }; /* Lower bound on the length of all accepted words. */ unsigned min_length { 0 }; - /* Maximum nesting depth of Kleene stars. */ - unsigned star_height { 0 }; /* Default constructor of invalid info. @@ -445,19 +430,13 @@ class seq_util { /* General info constructor. */ - info(bool is_classical, - bool is_standard, - bool is_interpreted, - bool is_nonbranching, - bool is_normalized, - bool is_monadic, - bool is_singleton, + info(bool is_interpreted, lbool is_nullable, - unsigned min_l, - unsigned star_h) : - known(l_true), classical(is_classical), standard(is_standard), interpreted(is_interpreted), nonbranching(is_nonbranching), - normalized(is_normalized), monadic(is_monadic), singleton(is_singleton), nullable(is_nullable), - min_length(min_l), star_height(star_h) {} + unsigned min_l) : + known(l_true), + interpreted(is_interpreted), + nullable(is_nullable), + min_length(min_l) {} /* Appends a string representation of the info into the stream. @@ -483,6 +462,8 @@ class seq_util { info diff(info const& rhs) const; info orelse(info const& rhs) const; info loop(unsigned lower, unsigned upper) const; + + info& operator=(info const& other); }; private: seq_util& u;