From af54a79acc9aba4a09fedcfce0488b1bdb2f3e3f Mon Sep 17 00:00:00 2001
From: Margus Veanes <veanes@users.noreply.github.com>
Date: Tue, 8 Sep 2020 04:13:18 -0700
Subject: [PATCH] fixing issue #4651 (#4666)

* fixing issue #4651

* regression fix

* fix #4662

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* na

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* reenabled lift_ites_throttled with an additional filter, without the filter finding the model in report #4651 goes from .02s to 20s, also updated pretty printing of regexes to be more accurate

* removing temp testing variable

* Allow to skip System.loadLibrary() calls from Java Native class (#4667)

* using intended utility methods for sort detection

* adding ack/model

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* add smt params dependency

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* missing file

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* deps

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* order

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* persist fields

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* dbg build

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* reset caches

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* sr

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* fix cmake build

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* shuffle dependencies

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* warnings /errors

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* update include

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* missing cmakelists

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* update cmake

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* add depend

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* add depend

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* virtual method

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* path

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* move parameters from ast/rewriter to params

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* move fpa

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* fix warnings

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* remove pragma

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* dbg

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* updated sat_smt

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* na

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* fix #4651

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* encoding options #4665

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* expose name inclusion as optional

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* fix misc issues around #4661 introduced when adding lazy push/pop to selected theories

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* remove lazy push from theory_lra

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* na

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* fix dotnet build

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* na

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* release nodes

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* free memory in egraph

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* avoid duplicate class names frame in sat_scc and sat_smt

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* adding euf

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* elaborate on smt/drat format outline, expose euf mode as config

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* mk-var during copy

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* move theory_var_list into id_var_list and utilities from smt-enode into it, prepare for theory variables in egraph

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* with bounded

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* na

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* Remove duplicate binary condition. Fixes #4668.

* butterfly effect on fp?

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* prepare for theory plugins

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* file

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* build fix

* remove SMTFD

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* na

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* na

* na

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* na

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* na

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* na

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* na

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* na

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* na

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>

* SMTFD is back (#4676)

* fixing issue #4651

* regression fix

* reenabled lift_ites_throttled with an additional filter, without the filter finding the model in report #4651 goes from .02s to 20s, also updated pretty printing of regexes to be more accurate

* removing temp testing variable

* using intended utility methods for sort detection

* misc edits related to nonground regexes

* bug fix of state id off by 1 calculation error and improved pretty printing with regex tooltip generated in dgml state graph

* removed unused method declaration

* improved id to regex value map info in generated dgml

* reorgd callback function for state pretty printer

* updated some comments

Co-authored-by: Nikolaj Bjorner <nbjorner@microsoft.com>
Co-authored-by: Sergey Vladimirov <vlsergey@gmail.com>
Co-authored-by: Christoph M. Wintersteiger <cwinter@microsoft.com>
Co-authored-by: Arie Gurfinkel <arie.gurfinkel@gmail.com>
---
 src/ast/rewriter/seq_rewriter.cpp |  73 +++++++++++++++++----
 src/ast/rewriter/seq_rewriter.h   |   5 ++
 src/ast/seq_decl_plugin.cpp       |  91 ++++++++++++++++++--------
 src/ast/seq_decl_plugin.h         |   3 +-
 src/smt/seq_regex.cpp             | 104 ++++++++++++++++++++++++++----
 src/smt/seq_regex.h               |  18 ++++++
 src/util/state_graph.cpp          |  13 +++-
 src/util/state_graph.h            |  29 ++++++++-
 8 files changed, 281 insertions(+), 55 deletions(-)

diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp
index caa2188cf15..666736cfa9a 100644
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@@ -860,11 +860,12 @@ br_status seq_rewriter::mk_seq_length(expr* a, expr_ref& result) {
     used in the normal form for derivatives in mk_re_derivative.
 */
 br_status seq_rewriter::lift_ites_throttled(func_decl* f, unsigned n, expr* const* args, expr_ref& result) {
-    expr* c = nullptr, *t = nullptr, *e = nullptr;
-    for (unsigned i = 0; i < n; ++i) {        
-        if (m().is_ite(args[i], c, t, e) && 
+    expr* c = nullptr, * t = nullptr, * e = nullptr;
+    for (unsigned i = 0; i < n; ++i)
+        if (m().is_ite(args[i], c, t, e) &&
+            lift_ites_filter(f, args[i]) &&
             (get_depth(t) <= 2 || t->get_ref_count() == 1 ||
-             get_depth(e) <= 2 || e->get_ref_count() == 1)) {
+                get_depth(e) <= 2 || e->get_ref_count() == 1)) {
             ptr_buffer<expr> new_args;
             for (unsigned j = 0; j < n; ++j) new_args.push_back(args[j]);
             new_args[i] = t;
@@ -872,12 +873,26 @@ br_status seq_rewriter::lift_ites_throttled(func_decl* f, unsigned n, expr* cons
             new_args[i] = e;
             expr_ref arg2(m().mk_app(f, new_args), m());
             result = m().mk_ite(c, arg1, arg2);
+            TRACE("seq_verbose", tout << "lifting ite: " << mk_pp(result, m()) << std::endl;);
             return BR_REWRITE2;
         }
-    }
     return BR_FAILED;
 }
 
+/* returns false iff the ite must not be lifted */
+bool seq_rewriter::lift_ites_filter(func_decl* f, expr* ite) {
+    // do not lift ites from sequences over regexes
+    // for example DO NOT lift to_re(ite(c, s, t)) to ite(c, to_re(s), to_re(t))
+    if (u().is_re(f->get_range()) && u().is_seq(m().get_sort(ite)))
+        return false;
+    // The following check is intended to avoid lifting cases such as 
+    // substring(s,0,ite(c,e1,e2)) ==> ite(c, substring(s,0,e1), substring(s,0,e2))
+    // TBD: not sure if this is too restrictive though and may block cases when such lifting is desired
+    // if (m_autil.is_int(m().get_sort(ite)) && u().is_seq(f->get_range()))
+    //    return false;
+    return true;
+}
+
 
 bool seq_rewriter::is_suffix(expr* s, expr* offset, expr* len) {
     expr_ref_vector lens(m());
@@ -3246,11 +3261,12 @@ br_status seq_rewriter::mk_str_in_regexp(expr* a, expr* b, expr_ref& result) {
         result = m().mk_true();
         return BR_DONE;
     }
-    expr* b1 = nullptr;
-    if (re().is_to_re(b, b1)) {
-        result = m_br.mk_eq_rw(a, b1);
-        return BR_REWRITE1;
+    expr_ref b_s(m());
+    if (lift_str_from_to_re(b, b_s)) {
+       result = m_br.mk_eq_rw(a, b_s);
+       return BR_REWRITE_FULL;
     }
+    expr* b1 = nullptr;
     expr* eps = nullptr;
     if (re().is_opt(b, b1) ||
         (re().is_union(b, b1, eps) && re().is_epsilon(eps)) ||
@@ -3337,6 +3353,30 @@ bool seq_rewriter::has_fixed_length_constraint(expr* a, unsigned& len) {
     return minl == maxl;
 }
 
+bool seq_rewriter::lift_str_from_to_re_ite(expr* r, expr_ref& result)
+{
+    expr* cond = nullptr, * then_r = nullptr, * else_r = nullptr;
+    expr_ref then_s(m());
+    expr_ref else_s(m());
+    if (m().is_ite(r, cond, then_r, else_r) &&
+        lift_str_from_to_re(then_r, then_s) &&
+        lift_str_from_to_re(else_r, else_s)) {
+        result = m().mk_ite(cond, then_s, else_s);
+        return true;
+    }
+    return false;
+}
+
+bool seq_rewriter::lift_str_from_to_re(expr* r, expr_ref& result)
+{
+    expr* s = nullptr;
+    if (re().is_to_re(r, s)) {
+        result = s;
+        return true;
+    }
+    return lift_str_from_to_re_ite(r, result);
+}
+
 br_status seq_rewriter::mk_str_to_regexp(expr* a, expr_ref& result) {
     return BR_FAILED;
 }
@@ -3375,11 +3415,13 @@ br_status seq_rewriter::mk_re_concat(expr* a, expr* b, expr_ref& result) {
         result = a;
         return BR_DONE;
     }
-    expr* a1 = nullptr, *b1 = nullptr;
-    if (re().is_to_re(a, a1) && re().is_to_re(b, b1)) {
-        result = re().mk_to_re(str().mk_concat(a1, b1));
+    expr_ref a_str(m());
+    expr_ref b_str(m());
+    if (lift_str_from_to_re(a, a_str) && lift_str_from_to_re(b, b_str)) {
+        result = re().mk_to_re(str().mk_concat(a_str, b_str));
         return BR_REWRITE2;
     }
+    expr* a1 = nullptr, *b1 = nullptr;
     if (re().is_star(a, a1) && re().is_star(b, b1) && a1 == b1) {
         result = a;
         return BR_DONE;
@@ -3811,7 +3853,14 @@ br_status seq_rewriter::mk_re_star(expr* a, expr_ref& result) {
         result = re().mk_star(re().mk_union(b1, c1));
         return BR_REWRITE2;
     }
+    if (m().is_ite(a, c, b1, c1)) {
+        if ((re().is_full_char(b1) || re().is_full_seq(b1)) &&
+            (re().is_full_char(c1) || re().is_full_seq(c1))) {
+            result = re().mk_full_seq(m().get_sort(b1));
+            return BR_REWRITE2;
+        }
 
+    }
     return BR_FAILED;
 }
 
diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h
index 03e0ce7c105..fe40c0d5ac3 100644
--- a/src/ast/rewriter/seq_rewriter.h
+++ b/src/ast/rewriter/seq_rewriter.h
@@ -249,6 +249,7 @@ class seq_rewriter {
     br_status mk_re_derivative(expr* ele, expr* r, expr_ref& result);
 
     br_status lift_ites_throttled(func_decl* f, unsigned n, expr* const* args, expr_ref& result);
+    bool lift_ites_filter(func_decl* f, expr* ite);
 
     br_status reduce_re_eq(expr* a, expr* b, expr_ref& result);
     br_status reduce_re_is_empty(expr* r, expr_ref& result);
@@ -257,6 +258,10 @@ class seq_rewriter {
     bool non_overlap(zstring const& p1, zstring const& p2) const;
     bool rewrite_contains_pattern(expr* a, expr* b, expr_ref& result);
     bool has_fixed_length_constraint(expr* a, unsigned& len);
+    /* r = ite(c1,ite(c2,to_re(s),to_re(t)),to_re(u)) ==> returns true, result = ite(c1,ite(c2,s,t),u)*/
+    bool lift_str_from_to_re_ite(expr * r, expr_ref & result);
+    /* same as lift_to_re_from_ite and also: r = to_re(u) ==> returns true, result = u */
+    bool lift_str_from_to_re(expr * r, expr_ref & result);
 
     br_status mk_bool_app_helper(bool is_and, unsigned n, expr* const* args, expr_ref& result);
     br_status mk_eq_helper(expr* a, expr* b, expr_ref& result);
diff --git a/src/ast/seq_decl_plugin.cpp b/src/ast/seq_decl_plugin.cpp
index e9d02611e54..27e5b3db960 100644
--- a/src/ast/seq_decl_plugin.cpp
+++ b/src/ast/seq_decl_plugin.cpp
@@ -1452,20 +1452,21 @@ app* seq_util::rex::mk_epsilon(sort* seq_sort) {
 */
 std::ostream& seq_util::rex::pp::compact_helper_seq(std::ostream& out, expr* s) const {
     SASSERT(re.u.is_seq(s));
-    if (re.u.str.is_concat(s)) {
-        expr_ref_vector es(re.m);
-        re.u.str.get_concat(s, es);
-        for (expr* e : es) {
-            if (re.u.str.is_unit(e))
-                seq_unit(out, e);
-            else
-                out << mk_pp(e, re.m);
-        }
-    }
-    else if (re.u.str.is_empty(s))
+    if (re.u.str.is_empty(s))
         out << "()";
-    else
+    else if (re.u.str.is_unit(s))
         seq_unit(out, s);
+    else if (re.u.str.is_concat(s)) {
+        expr_ref_vector es(re.m);
+        re.u.str.get_concat(s, es);
+        for (expr* e : es)
+            compact_helper_seq(out, e);
+    }
+    //using braces to indicate 'full' output
+    //for example an uninterpreted constant X will be printed as {X}
+    //while a unit sequence "X" will be printed as X
+    //thus for example (concat "X" "Y" Z "W") where Z is uninterpreted is printed as XY{Z}W
+    else out << "{" << mk_pp(s, re.m) << "}";
     return out;
 }
 
@@ -1484,7 +1485,7 @@ std::ostream& seq_util::rex::pp::compact_helper_range(std::ostream& out, expr* s
 */
 bool seq_util::rex::pp::can_skip_parenth(expr* r) const {
     expr* s;
-    return ((re.is_to_re(r, s) && re.u.str.is_unit(s)) || re.is_range(r) || re.is_empty(r) || re.is_epsilon(r));
+    return ((re.is_to_re(r, s) && re.u.str.is_unit(s)) || re.is_range(r) || re.is_empty(r) || re.is_epsilon(r) || re.is_full_char(r));
 }
 
 /*
@@ -1494,15 +1495,44 @@ std::ostream& seq_util::rex::pp::seq_unit(std::ostream& out, expr* s) const {
     expr* e;
     unsigned n = 0;
     if (re.u.str.is_unit(s, e) && re.u.is_const_char(e, n)) {
-        if (32 < n && n < 127)
-            out << (char)n;
-        else if (n < 16)
+        char c = (char)n;
+        if (c == '\n')
+            out << "\\n";
+        else if (c == '\r')
+            out << "\\r";
+        else if (c == '\f')
+            out << "\\f";
+        else if (c == ' ')
+            out << "\\s";
+        else if (c == '(' || c == ')' || c == '{' || c == '}' || c == '[' || c == ']' || c == '.' || c == '\\')
+            out << "\\" << c;
+        else if (32 < n && n < 127) {
+            if (html_encode) {
+                if (c == '<')
+                    out << "&lt;";
+                else if (c == '>')
+                    out << "&gt;";
+                else if (c == '&')
+                    out << "&amp;";
+                else if (c == '\"')
+                    out << "&quot;";
+                else
+                    out << "\\x" << std::hex << n;
+            }
+            else
+                out << c;
+        }
+        else if (n <= 0xF)
             out << "\\x0" << std::hex << n;
-        else
+        else if (n <= 0xFF)
             out << "\\x" << std::hex << n;
+        else if (n <= 0xFFF)
+            out << "\\u0" << std::hex << n;
+        else 
+            out << "\\u" << std::hex << n;
     }
     else
-        out << mk_pp(s, re.m);
+        out << "{" << mk_pp(s, re.m) << "}";
     return out;
 }
 
@@ -1516,7 +1546,7 @@ std::ostream& seq_util::rex::pp::display(std::ostream& out) const {
         return out << ".";
     else if (re.is_full_seq(e))
         return out << ".*";
-    else if (re.is_to_re(e, s)) 
+    else if (re.is_to_re(e, s))
         return compact_helper_seq(out, s);
     else if (re.is_range(e, s, s2)) 
         return compact_helper_range(out, s, s2);
@@ -1529,7 +1559,7 @@ std::ostream& seq_util::rex::pp::display(std::ostream& out) const {
     else if (re.is_union(e, r1, r2)) 
         return out << pp(re, r1) << "|" << pp(re, r2);
     else if (re.is_intersection(e, r1, r2)) 
-        return out << "(" << pp(re, r1) << ")&(" << pp(re, r2) << ")";
+        return out << "(" << pp(re, r1) << (html_encode ? ")&amp;(": ")&(" ) << pp(re, r2) << ")";
     else if (re.is_complement(e, r1)) {
         if (can_skip_parenth(r1))
             return out << "~" << pp(re, r1);
@@ -1555,10 +1585,18 @@ std::ostream& seq_util::rex::pp::display(std::ostream& out) const {
             return out << "(" << pp(re, r1) << "){" << lo << ",}";
     }
     else if (re.is_loop(e, r1, lo, hi)) {
-        if (can_skip_parenth(r1))
-            return out << pp(re, r1) << "{" << lo << "," << hi << "}";
-        else
-            return out << "(" << pp(re, r1) << "){" << lo << "," << hi << "}";
+        if (can_skip_parenth(r1)) {
+            if (lo == hi)
+                return out << pp(re, r1) << "{" << lo << "}";
+            else 
+                return out << pp(re, r1) << "{" << lo << "," << hi << "}";
+        }
+        else {
+            if (lo == hi)
+                return out << "(" << pp(re, r1) << "){" << lo << "}";
+            else
+                return out << "(" << pp(re, r1) << "){" << lo << "," << hi << "}";
+        }
     }
     else if (re.is_diff(e, r1, r2)) 
         return out << "(" << pp(re, r1) << ")\\(" << pp(re, r2) << ")";
@@ -1574,7 +1612,7 @@ std::ostream& seq_util::rex::pp::display(std::ostream& out) const {
         return out << "reverse(" << pp(re, r1) << ")";
     else
         // Else: derivative or is_of_pred
-        return out << mk_pp(e, re.m);
+        return out << "{" << mk_pp(e, re.m) << "}";
 }
 
 /*
@@ -1643,8 +1681,7 @@ seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
     bool is_value(false);
     bool normalized(false);
     if (e->get_family_id() == u.get_family_id()) {
-        switch (e->get_decl()->get_decl_kind())
-        {
+        switch (e->get_decl()->get_decl_kind()) {
         case OP_RE_EMPTY_SET:
             return info(true, true, true, true, true, true, false, l_false, UINT_MAX, 0);
         case OP_RE_FULL_SEQ_SET:
diff --git a/src/ast/seq_decl_plugin.h b/src/ast/seq_decl_plugin.h
index 0894d12f6ce..10b3014d26b 100644
--- a/src/ast/seq_decl_plugin.h
+++ b/src/ast/seq_decl_plugin.h
@@ -576,13 +576,14 @@ class seq_util {
         class pp {
             seq_util::rex& re;
             expr* e;
+            bool html_encode;
             bool can_skip_parenth(expr* r) const;
             std::ostream& seq_unit(std::ostream& out, expr* s) const;
             std::ostream& compact_helper_seq(std::ostream& out, expr* s) const;
             std::ostream& compact_helper_range(std::ostream& out, expr* s1, expr* s2) const;
 
         public:
-            pp(seq_util::rex& r, expr* e) : re(r), e(e) {}
+            pp(seq_util::rex& r, expr* e, bool html = false) : re(r), e(e), html_encode(html) {}
             std::ostream& display(std::ostream&) const;
         };
     };
diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp
index 784a7efd308..3dc4f31d0c1 100644
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@@ -25,8 +25,8 @@ namespace smt {
         th(th),
         ctx(th.get_context()),
         m(th.get_manager()),
-        m_state_to_expr(m)
-    {}
+        m_state_to_expr(m),
+        m_state_graph(state_graph::state_pp(this, pp_state)) { }
 
     seq_util& seq_regex::u() { return th.m_util; }
     class seq_util::rex& seq_regex::re() { return th.m_util.re; }
@@ -143,13 +143,46 @@ namespace smt {
             }
         }
 
+        /*
+        //if r is uninterpreted then taking a derivative may diverge try to obtain the 
+        //value from equations providing r a definition
+        if (is_uninterp(r)) {
+            if (m_const_to_expr.contains(r)) {
+                proof* _not_used = nullptr;
+                m_const_to_expr.get(r, r, _not_used);
+                if (is_uninterp(r)) {
+                    if (m_const_to_expr.contains(r)) {
+                        m_const_to_expr.get(r, r, _not_used);
+                    }
+                }
+            }
+            else {
+                //add the literal back
+                expr_ref r_alias(m.mk_fresh_const(symbol(r->get_id()), m.get_sort(r), false), m);
+                expr_ref s_in_r_alias(re().mk_in_re(s, r_alias), m);
+                literal s_in_r_alias_lit = th.mk_literal(s_in_r_alias);
+                m_const_to_expr.insert(r_alias, r, nullptr);
+                th.add_axiom(s_in_r_alias_lit);
+                return;
+            }
+        }
+        */
+
+        /*
+        if (is_uninterp(r)) {
+            th.add_unhandled_expr(e);
+            return;
+        }
+        */
+
         expr_ref zero(a().mk_int(0), m);
-        expr_ref acc = sk().mk_accept(s, zero, r);
+        expr_ref acc(sk().mk_accept(s, zero, r), m);
         literal acc_lit = th.mk_literal(acc);
 
         TRACE("seq", tout << "propagate " << acc << "\n";);
 
-        th.propagate_lit(nullptr, 1, &lit, acc_lit);
+        //th.propagate_lit(nullptr, 1, &lit, acc_lit);
+        th.add_axiom(~lit, acc_lit);
     }
 
     /**
@@ -363,9 +396,11 @@ namespace smt {
 
     expr_ref seq_regex::symmetric_diff(expr* r1, expr* r2) {
         expr_ref r(m);
-        if (re().is_empty(r1)) 
-            std::swap(r1, r2);
-        if (re().is_empty(r2))
+        if (r1 == r2)
+            r = re().mk_empty(m.get_sort(r1));
+        else if (re().is_empty(r1)) 
+            r = r2;
+        else if (re().is_empty(r2))
             r = r1;
         else 
             r = re().mk_union(re().mk_diff(r1, r2), re().mk_diff(r2, r1));
@@ -439,19 +474,40 @@ namespace smt {
         TRACE("seq_regex", tout << "propagate EQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;);
         STRACE("seq_regex_brief", tout << "PEQ ";);
 
+        /*
+        if (is_uninterp(r1) || is_uninterp(r2)) {
+            th.add_axiom(th.mk_eq(r1, r2, false));
+           if (is_uninterp(r1))
+                m_const_to_expr.insert(r1, r2, nullptr);
+            else 
+                m_const_to_expr.insert(r2, r1, nullptr);
+           
+        }
+        */
+
         sort* seq_sort = nullptr;
         VERIFY(u().is_re(r1, seq_sort));
-        expr_ref r = symmetric_diff(r1, r2);       
+        expr_ref r = symmetric_diff(r1, r2);
+        if (re().is_empty(r))
+            //trivially true
+            return;
         expr_ref emp(re().mk_empty(m.get_sort(r)), m);
-        expr_ref n(m.mk_fresh_const("re.char", seq_sort), m); 
-        expr_ref is_empty = sk().mk_is_empty(r, r, n);
+        expr_ref f(m.mk_fresh_const("re.char", seq_sort), m); 
+        expr_ref is_empty = sk().mk_is_empty(r, r, f);
+        // is_empty : (re,re,seq) -> Bool is a Skolem function 
+        // f is a fresh internal Skolem constant of sort seq
+        // the literal is satisfiable when emptiness check succeeds
+        // meaning that r is not nullable and 
+        // that all derivatives of r (if any) are also empty
+        // TBD: rewrite to use state_graph
         th.add_axiom(~th.mk_eq(r1, r2, false), th.mk_literal(is_empty));
     }
     
     void seq_regex::propagate_ne(expr* r1, expr* r2) {
         TRACE("seq_regex", tout << "propagate NEQ: " << mk_pp(r1, m) << ", " << mk_pp(r2, m) << std::endl;);
         STRACE("seq_regex_brief", tout << "PNEQ ";);
-
+        // TBD: rewrite to use state_graph
+        // why is is_non_empty even needed, why not just not(in_empty)
         sort* seq_sort = nullptr;
         VERIFY(u().is_re(r1, seq_sort));
         expr_ref r = symmetric_diff(r1, r2);
@@ -566,6 +622,29 @@ namespace smt {
                     _temp_bool_owner.push_back(b);
                     re_to_bool.find(e) = b;
                 }
+                /*
+                else if (re().is_empty(e))
+                {
+                    re_to_bool.find(e) = m.mk_false();
+                }
+                else if (re().is_epsilon(e))
+                {
+                    expr* iplus1 = a().mk_int(i);
+                    expr* one = a().mk_int(1);
+                    _temp_bool_owner.push_back(iplus1);
+                    _temp_bool_owner.push_back(one);
+                    //the substring starting after position iplus1 must be empty
+                    expr* s_end = str().mk_substr(s, iplus1, one);
+                    expr* s_end_is_epsilon = m.mk_eq(s_end, str().mk_empty(m.get_sort(s)));
+
+                    _temp_bool_owner.push_back(s_end_is_epsilon);
+                    re_to_bool.find(e) = s_end_is_epsilon;
+
+                    STRACE("seq_regex_verbose", tout
+                        << "added empty sequence leaf: "
+                        << mk_pp(s_end_is_epsilon, m) << std::endl;);
+                }
+                */
                 else if (re().is_union(e, e1, e2)) {
                     expr* b1 = re_to_bool.find(e1);
                     expr* b2 = re_to_bool.find(e2);
@@ -760,13 +839,14 @@ namespace smt {
             STRACE("seq_regex", tout
                 << "New state ID: " << new_id
                 << " = " << mk_pp(e, m) << std::endl;);
+            SASSERT(get_expr_from_id(new_id) == e);
         }
         return m_expr_to_state.find(e);
     }
     expr* seq_regex::get_expr_from_id(unsigned id) {
         SASSERT(id >= 1);
         SASSERT(id <= m_state_to_expr.size());
-        return m_state_to_expr.get(id);
+        return m_state_to_expr.get(id - 1);
     }
 
 
diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h
index f29d1837bf3..3bbca7260c6 100644
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@@ -114,6 +114,8 @@ namespace smt {
         state_graph                    m_state_graph;
         ptr_addr_map<expr, unsigned>   m_expr_to_state;
         expr_ref_vector                m_state_to_expr;
+        /* map from uninterpreted regex constants to assigned regex expressions by EQ */
+        // expr_map                       m_const_to_expr;
         unsigned                       m_max_state_graph_size { 10000 };
         // Convert between expressions and states (IDs)
         unsigned get_state_id(expr* e);
@@ -166,6 +168,22 @@ namespace smt {
         void get_cofactors_rec(expr* r, expr_ref_vector& conds,
                                expr_ref_pair_vector& result);
 
+        /* 
+           Pretty print the regex of the state id to the out stream, 
+           seq_regex_ptr must be a pointer to seq_regex and the 
+           id must be a valid state id or else nothing is printed. 
+        */
+        static void pp_state(void* seq_regex_ptr, std::ostream& out, unsigned id, bool html_encode) {
+            seq_regex* sr = (seq_regex*)seq_regex_ptr;
+            if (sr) {
+                seq_util::rex re_util(sr->re());
+                if (1 <= id && id <= sr->m_state_to_expr.size()) {
+                    expr* r = sr->get_expr_from_id(id);
+                    seq_util::rex::pp(re_util, r, html_encode).display(out);
+                }
+            }
+        }
+
     public:
 
         seq_regex(theory_seq& th);
diff --git a/src/util/state_graph.cpp b/src/util/state_graph.cpp
index 2b870331e31..400d1f58623 100644
--- a/src/util/state_graph.cpp
+++ b/src/util/state_graph.cpp
@@ -17,6 +17,7 @@ Module Name:
 --*/
 
 #include "state_graph.h"
+#include <sstream>
 
 void state_graph::add_state_core(state s) {
     STRACE("state_graph", tout << "add(" << s << ") ";);
@@ -434,7 +435,17 @@ bool state_graph::write_dgml() {
                     dgml << "," << r;
                 r = m_state_ufind.next(r);
             } while (r != s);
-            dgml << "\" Category=\"State\">" << std::endl;
+            r = s;
+            dgml << "\" Value_of_" << r << "=\"";
+            m_state_pp.pp_state_label(dgml, r) << "\"";
+            do {
+                if (r != s) {
+                    dgml << " Value_of_" << r << "=\"";
+                    m_state_pp.pp_state_label(dgml, r) << "\"";
+                }
+                r = m_state_ufind.next(r);
+            } while (r != s);
+            dgml << " Category=\"State\">" << std::endl;
             if (m_dead.contains(s))
                 dgml << "<Category Ref=\"Dead\"/>" << std::endl;
             if (m_live.contains(s))
diff --git a/src/util/state_graph.h b/src/util/state_graph.h
index 61e19946a76..b0420c1680a 100644
--- a/src/util/state_graph.h
+++ b/src/util/state_graph.h
@@ -47,6 +47,26 @@ Module Name:
 */
 class state_graph {
 public:
+    /* Wrapper for custom callback function for pretty printing states */
+    struct state_pp {
+        /* Pointer to object that owns m_pp_state, it must be passed as the first argument to m_pp_state */
+        void* m_printer;
+        /* Pointer to function that pretty prints a state label into the stream (html-encoded if the last argument is true)  */
+        void (*m_pp_state)(void*, std::ostream&, unsigned, bool);
+        state_pp(
+            /* Pointer to object that owns f  */
+            void* p, 
+            /* Pointer to function that pretty prints a state label into the stream (html-encoded if the last argument is true) */
+            void (*f)(void*, std::ostream&, unsigned, bool)) : m_printer(p), m_pp_state(f) {}
+
+        /* call back to m_printer using m_pp_state to pretty print state_id to the given stream (html-encoded by default) */
+        std::ostream& pp_state_label(std::ostream& out, unsigned state_id, bool html_encode = true) const {
+            if (m_printer && m_pp_state)
+                (*m_pp_state)(m_printer, out, state_id, html_encode);
+            return out;
+        }
+    };
+
     typedef unsigned           state;
     typedef uint_set           state_set;
     typedef u_map<state_set>   edge_rel;
@@ -83,6 +103,11 @@ class state_graph {
     edge_rel   m_targets;
     edge_rel   m_sources_maybecycle;
 
+    /* 
+        Pretty printer for states 
+    */
+    state_pp m_state_pp;
+
     /*
         CLASS INVARIANTS
 
@@ -152,9 +177,9 @@ class state_graph {
     state merge_all_cycles(state s);
 
 public:
-    state_graph():
+    state_graph(state_pp p):
         m_live(), m_dead(), m_unknown(), m_unexplored(), m_seen(),
-        m_state_ufind(), m_sources(), m_targets(), m_sources_maybecycle()
+        m_state_ufind(), m_sources(), m_targets(), m_sources_maybecycle(), m_state_pp(p)
     {
         CASSERT("state_graph", check_invariant());
     }