oracle-samples · WilliamParker · Mar 11, 2019 · Mar 18, 2019 · Mar 18, 2019 · Jul 5, 2019
diff --git a/src/main/clojure/clara/rules.cljc b/src/main/clojure/clara/rules.cljc
@@ -35,6 +35,12 @@
 
    This take an additional map of options as a second argument.  Current options:
 
+   :max-cycles (positive integer): Each time Clara determines that there are no more insertions process in a given activation group (rules with the same salience)
+    it records that one cycle of processing rules has been performed.  When the max-cycles limit is reached Clara throws an exception indicating that the rules
+    are in an infinite loop.  This exception can be regarded as analogous to a StackOverflowError.  The default is 600000, which is a level at which it is unlikely that
+    most users will encounter this error in cases where the rules actually would eventually terminate.  See issue 275 for details on how this level was chosen.
+    Nevertheless, it is possible that such cases exist, and so an option is provided to increase the limit.  Furthermore, some infinite loop scenarios will cause memory
+    errors before this ceiling is reached, and so users who do not expect to need a large number of rule cycles may wish to lower this limit.
    :cancelling true (EXPERIMENTAL, subject to change/removal.  Not supported in ClojureScript.):  
     Simultaneously propagate insertions and retractions through the rules network, at every step using the insertion and retractions of equals facts to cancel each
     other out and avoid operations deeper in the rules network.  The behavior of unconditional insertions and RHS (right-hand side) retractions
@@ -419,4 +425,4 @@ See the [query authoring documentation](http://www.clara-rules.org/docs/queries/
                                   (map first)               ; Take the symbols for the rule/query vars
                                   )]
          (doseq [psym production-syms]
-           (ns-unmap *ns* psym))))))
+           (ns-unmap *ns* psym))))))
diff --git a/src/main/clojure/clara/rules/engine.cljc b/src/main/clojure/clara/rules/engine.cljc
@@ -1719,7 +1719,7 @@
 
 (defn fire-rules*
   "Fire rules for the given nodes."
-  [rulebase nodes transient-memory transport listener get-alphas-fn update-cache]
+  [rulebase nodes transient-memory transport listener get-alphas-fn update-cache max-cycles]
   (binding [*current-session* {:rulebase rulebase
                                :transient-memory transient-memory
                                :transport transport
@@ -1729,7 +1729,25 @@
                                :listener listener}]
 
     (loop [next-group (mem/next-activation-group transient-memory)
-           last-group nil]
+           last-group nil
+           group-change-count 0]
+
+      (when (> group-change-count max-cycles)
+        (throw (ex-info
+                (str "It appears that the rules are in an infinite loop."
+                     " Incorrect use of truth maintenance is a frequent source of such loops; see the website."
+                     " If the rules are not in fact in a loop it is possible to increase the ceiling by use of the :max-cycles setting."
+                     " See the clara.rules/fire-rules docstring for details.")
+                {:clara-rules/infinite-loop-suspected true
+                 :listeners (try
+                              (let [p-listener (l/to-persistent! listener)]
+                                (if (l/null-listener? p-listener)
+                                  []
+                                  (l/get-children p-listener)))
+                              (catch #?(:clj Exception :cljs :default)
+                                  listener-exception
+                                listener-exception))})))
+
 
       (if next-group
 
@@ -1739,7 +1757,7 @@
           ;; group before continuing.
           (do
             (flush-updates *current-session*)
-            (recur (mem/next-activation-group transient-memory) next-group))
+            (recur (mem/next-activation-group transient-memory) next-group (inc group-change-count)))
 
           (do
 
@@ -1817,13 +1835,20 @@
                   (when (some-> node :production :props :no-loop)
                     (flush-updates *current-session*)))))
 
-            (recur (mem/next-activation-group transient-memory) next-group)))
+            ;; If the activation group changed, then the updates will be flushed after the loop statement
+            ;; and we will recur again, this time with the group-change-count incremented.  It is expected
+            ;; that changes between activation groups will be less numerous than each single activation
+            ;; of a rule, and the former can remain constant even for large numbers of facts.  That is,
+            ;; it is more dependent on the logical structure of fact flow than the simple volume.
+            ;; Therefore counting only upon activation groups changes will reduce the likelihood of a false
+            ;; positive in infinite cycle detection.
+            (recur (mem/next-activation-group transient-memory) next-group group-change-count)))
 
         ;; There were no items to be activated, so flush any pending
         ;; updates and recur with a potential new activation group
         ;; since a flushed item may have triggered one.
         (when (flush-updates *current-session*)
-          (recur (mem/next-activation-group transient-memory) next-group))))))
+          (recur (mem/next-activation-group transient-memory) next-group (inc group-change-count)))))))
 
 (deftype LocalSession [rulebase memory transport listener get-alphas-fn pending-operations]
   ISession
@@ -1867,7 +1892,8 @@
   (fire-rules [session opts]
 
     (let [transient-memory (mem/to-transient memory)
-          transient-listener (l/to-transient listener)]
+          transient-listener (l/to-transient listener)
+          max-cycles (get opts :max-cycles 600000)]
 
       (if-not (:cancelling opts)
         ;; We originally performed insertions and retractions immediately after the insert and retract calls,
@@ -1912,7 +1938,8 @@
                        transport
                        transient-listener
                        get-alphas-fn
-                       (uc/get-ordered-update-cache)))
+                       (uc/get-ordered-update-cache)
+                       max-cycles))
 
         #?(:cljs (throw (ex-info "The :cancelling option is not supported in ClojureScript"
                                  {:session session :opts opts}))
@@ -1961,7 +1988,8 @@
                                  get-alphas-fn
                                  ;; This continues to use the cancelling cache after the first batch of insertions and retractions.
                                  ;; If this is suboptimal for some workflows we can revisit this.
-                                 update-cache)))))
+                                 update-cache
+                                 max-cycles)))))
 
       (LocalSession. rulebase
                      (mem/to-persistent! transient-memory)

diff --git a/src/main/clojure/clara/tools/testing_utils.cljc b/src/main/clojure/clara/tools/testing_utils.cljc
@@ -152,3 +152,20 @@
         (str "Actual mean value: " mean))
     {:mean mean
      :std std}))
+
+#?(:clj
+   (defn ex-data-maps
+     "Given a Throwable, return in order the ExceptionInfo data maps for all items in the
+      chain that implement IExceptionInfo and have nonempty data maps."
+     [t]
+     (let [throwables  ((fn append-self
+                          [prior t1]
+                          (if t1
+                            (append-self (conj prior t1) (.getCause ^Throwable t1))
+                            prior))
+                        []
+                        t)]
+       (into []
+             (comp (map ex-data)
+                   (filter not-empty))
+             throwables))))
diff --git a/src/test/clojure/clara/test_infinite_loops.clj b/src/test/clojure/clara/test_infinite_loops.clj
@@ -0,0 +1,173 @@
+(ns clara.test-infinite-loops
+  (:require [clojure.test :refer :all]
+            [clara.rules :refer :all]
+            [clara.rules.testfacts :refer [->Cold  ->Hot ->First ->Second]]
+            [clara.tools.testing-utils :refer [def-rules-test
+                                               ex-data-maps
+                                               side-effect-holder-fixture
+                                               side-effect-holder]]
+            [clara.test-rules :refer [assert-ex-data]]
+            [clara.tools.tracing :as tr]
+            [clara.rules.accumulators :as acc])
+  (:import [clara.rules.testfacts Cold Hot First Second]
+           [clara.tools.tracing
+            PersistentTracingListener]))
+
+(use-fixtures :each side-effect-holder-fixture)
+
+(def-rules-test test-truth-maintenance-loop
+
+  ;; Test of an infinite loop to an endless cycle caused by truth maintenance,
+  ;; that is an insertion that causes its support to be retracted.
+
+  {:rules [hot-rule [[[:not [Hot]]]
+                     (insert! (->Cold nil))]
+
+           cold-rule [[[Cold]]
+                      (insert! (->Hot nil))]]
+
+   :sessions [empty-session [hot-rule cold-rule] {}]}
+
+  (assert-ex-data  {:clara-rules/infinite-loop-suspected true}  (fire-rules empty-session {:max-cycles 3000})))
+
+(def-rules-test test-truth-maintenance-loop-with-salience
+
+  ;; Test of an infinite loop to an endless cycle caused by truth maintenance,
+  ;; that is an insertion that causes its support to be retracted, when the
+  ;; two rules that form the loop have salience and are thus parts of different
+  ;; activation groups.
+
+
+  {:rules [hot-rule [[[:not [Hot]]]
+                     (insert! (->Cold nil))
+                     {:salience 1}]
+
+           cold-rule [[[Cold]]
+                      (insert! (->Hot nil))
+                      {:salience 2}]]
+
+   :sessions [empty-session [hot-rule cold-rule] {}
+              empty-session-negated-salience [hot-rule cold-rule] {:activation-group-fn (comp - :salience :props)}]}
+
+  (doseq [session [empty-session empty-session-negated-salience]]
+    ;; Validate that the results are the same in either rule ordering.
+    (assert-ex-data  {:clara-rules/infinite-loop-suspected true}  (fire-rules session {:max-cycles 3000}))))
+
+
+(def-rules-test test-recursive-insertion
+
+  ;; Test of an infinite loop due to runaway insertions without retractions.
+
+  {:rules [cold-rule [[[Cold]]
+                      (insert! (->Cold "ORD"))]]
+
+   :sessions [empty-session [cold-rule] {}]}
+
+  (assert-ex-data {:clara-rules/infinite-loop-suspected true}
+                  (-> empty-session
+                      (insert (->Cold "ARN"))
+                      ;; Use a small value here to ensure that we don't run out of memory before throwing.
+                      ;; There is unfortunately a tradeoff where making the default number of cycles allowed
+                      ;; high enough for some use cases will allow others to create OutOfMemory errors in cases
+                      ;; like these but we can at least throw when there is enough memory available to hold the facts
+                      ;; created in the loop.
+                      (fire-rules {:max-cycles 10}))))
+
+(def-rules-test test-recursive-insertion-loop-no-salience
+
+  {:rules [first-rule [[[First]]
+                       (insert! (->Second))]
+
+           second-rule [[[Second]]
+                        (insert! (->First))]]
+
+   :sessions [empty-session [first-rule second-rule] {}]}
+
+  (assert-ex-data {:clara-rules/infinite-loop-suspected true}
+                  (-> empty-session
+                      (insert (->First))
+                      (fire-rules {:max-cycles 10}))))
+
+(def-rules-test test-recursive-insertion-loop-with-salience
+
+  {:rules [first-rule [[[First]]
+                       (insert! (->Second))
+                       {:salience 1}]
+
+           second-rule [[[Second]]
+                        (insert! (->First))
+                        {:salience 2}]]
+
+   :sessions [empty-session [first-rule second-rule] {}
+              empty-session-negated-salience [first-rule second-rule] {:activation-group-fn (comp - :salience :props)}]}
+
+  (doseq [session [empty-session empty-session-negated-salience]]
+    (assert-ex-data {:clara-rules/infinite-loop-suspected true}
+                    (-> session
+                        (insert (->First))
+                        (fire-rules {:max-cycles 10})))))
+
+(def-rules-test test-tracing-infinite-loop
+
+  {:rules [cold-rule [[[Cold]]
+                      (insert! (->Cold "ORD"))]]
+
+   :sessions [empty-session [cold-rule] {}]}
+
+  (try
+    (do (-> empty-session
+            tr/with-tracing
+            (insert (->Cold "ARN"))
+            ;; Use a small value here to ensure that we don't run out of memory before throwing.
+            ;; There is unfortunately a tradeoff where making the default number of cycles allowed
+            ;; high enough for some use cases will allow others to create OutOfMemory errors in cases
+            ;; like these but we can at least throw when there is enough memory available to hold the facts
+            ;; created in the loop.
+            (fire-rules {:max-cycles 10}))
+        (is false "The infinite loop did not throw an exception."))
+    (catch Exception e
+      (let [data-maps (ex-data-maps e)
+            loop-data (filter :clara-rules/infinite-loop-suspected data-maps)]
+        (is (= (count loop-data)
+               1)
+            "There should only be one exception in the chain from infinite rules loops.")
+        (is (= (-> loop-data first  :listeners count) 1)
+            "There should only be one listener.")
+        (is (-> loop-data first  :listeners ^PersistentTracingListener first .-trace not-empty)
+            "There should be tracing data available when a traced session throws an exception on an infinite loop.")))))
+
+(def-rules-test test-max-cycles-respected
+
+  ;; As the name suggests, this is a test to validate that setting the max-cycles to different
+  ;; values actually works.  The others tests are focused on validating that infinite loops
+  ;; throw exceptions, and the max-cycles values there are simply set to restrict resource usage,
+  ;; whether CPU or memory, by the test.  Here, on the other hand, we construct a rule where we control
+  ;; how many rule cycles will be executed, and then validate that an exception is thrown when that number is
+  ;; greater than the max-cycles and is not when it isn't.  For good measure, we validate that the non-exception-throwing
+  ;; case has queryable output to make sure that the logic was actually run, not ommitted because of a typo or similar.
+
+  {:rules [recursive-rule-with-end [[[First]]
+                                    (when (< @side-effect-holder 20)
+                                      (do
+                                        (insert-unconditional! (->First))
+                                        (swap! side-effect-holder inc)))]]
+
+   :queries [first-query [[] [[?f <- First]]]]
+
+   :sessions [empty-session [recursive-rule-with-end first-query] {}]}
+
+
+  (reset! side-effect-holder 0)
+
+  (assert-ex-data {:clara-rules/infinite-loop-suspected true}
+                  (-> empty-session
+                      (insert (->First))
+                      (fire-rules {:max-cycles 10})))
+
+  (reset! side-effect-holder 0)
+
+  (is (= (count (-> empty-session
+                    (insert (->First))
+                    (fire-rules {:max-cycles 30})
+                    (query first-query)))
+         21)))
diff --git a/src/test/common/clara/test_testing_utils.cljc b/src/test/common/clara/test_testing_utils.cljc
@@ -1,7 +1,8 @@
 #?(:clj
    (ns clara.test-testing-utils
      (:require [clara.tools.testing-utils :refer [def-rules-test
-                                                  run-performance-test]]
+                                                  run-performance-test
+                                                  ex-data-maps]]
                [clara.rules :as r]
 
                [clara.rules.testfacts :refer [->Temperature ->Cold]]
@@ -71,3 +72,9 @@
                          :iterations 5
                          :mean-assertion (partial > 500)})
   (is (= @fire-rules-counter 5)))
+
+#?(:clj
+   (deftest test-ex-data-maps
+     (let [exception (ex-info  "Top" {:level :top} (IllegalArgumentException. "" (ex-info "Bottom" {:level :bottom})))]
+       (is  (= (ex-data-maps exception)
+               [{:level :top} {:level :bottom}])))))