From 4368c040cff05fc13c6367e7f5b67f5b51178c65 Mon Sep 17 00:00:00 2001
From: "petro.zarytskyi" <petro.zarytskyi@gmail.com>
Date: Wed, 14 Aug 2024 01:07:45 +0300
Subject: [PATCH] Differentiate the RHS in multiplication instead of cloning by
 introducing placeholders

Currently, when differentiating multiplication in the reverse mode, we need to pass the differentiated LHS when visiting the RHS and vice versa (i.e. ``Visit(R, dR)`` and ``Visit(L, dL)`` where ``dR = LDiff * dfdx`` and ``dL = dfdx * RDiff``). This creates a loop that we break in one of two ways:
1) Create a variable to represent the result of the RHS visitation, use it to visit the LHS, visit the RHS, and then set the value of the variable introduced before to the result of the RHS visitation.
2) Clone the RHS, use it to visit the LHS, then visit the RHS.

The 1st approach is bad because it introduces a new variable that is usually unnecessary.

The 2nd approach is used more frequently but its downside is that cloning is not the same as visiting, some expressions cannot be cloned. e.g.
a) ``x > 0 ? a : b`` is cloned as ``x > 0 ? a : b`` but differentiated as ``_cond ? a : b``, where ``_cond`` is a variable to save the condition ``x > 0``.
b) References are often turned into pointers in the reverse mode. Because of that, ``x`` should be differentiated as ``*x``  and not as just ``x``, which ``Clone`` does. We already have an exception to handle ref-type decl refs as RHS of multiplication, which is removed in this PR.

This commit solves this problem in a third way: by passing a literal expression when visiting LHS and replacing it when the differentiated RHS is known. All of the new logic is sunk into existing functions ``DelayedGlobalStoreAndRef`` and ``Finalize``.
---
 .../clad/Differentiator/ReverseModeVisitor.h  |  14 +-
 lib/Differentiator/ReverseModeVisitor.cpp     | 177 +++++++++++-------
 test/ErrorEstimation/LoopsAndArrays.C         |   8 +-
 test/ErrorEstimation/LoopsAndArraysExec.C     |   3 +-
 test/Gradient/Gradients.C                     |  84 ++++++++-
 test/Gradient/Loops.C                         |   4 +-
 test/Gradient/NonDifferentiable.C             |   2 +-
 test/Gradient/Pointers.C                      |  10 +-
 8 files changed, 215 insertions(+), 87 deletions(-)

diff --git a/include/clad/Differentiator/ReverseModeVisitor.h b/include/clad/Differentiator/ReverseModeVisitor.h
index 427859dc2..5eeb60286 100644
--- a/include/clad/Differentiator/ReverseModeVisitor.h
+++ b/include/clad/Differentiator/ReverseModeVisitor.h
@@ -278,13 +278,14 @@ namespace clad {
       bool isInsideLoop;
       bool isFnScope;
       bool needsUpdate;
+      clang::Expr* Placeholder;
       DelayedStoreResult(ReverseModeVisitor& pV, StmtDiff pResult,
-                         clang::VarDecl* pDeclaration, bool pIsConstant,
-                         bool pIsInsideLoop, bool pIsFnScope,
-                         bool pNeedsUpdate = false)
+                         clang::VarDecl* pDeclaration, bool pIsInsideLoop,
+                         bool pIsFnScope, bool pNeedsUpdate = false,
+                         clang::Expr* pPlaceholder = nullptr)
           : V(pV), Result(pResult), Declaration(pDeclaration),
-            isConstant(pIsConstant), isInsideLoop(pIsInsideLoop),
-            isFnScope(pIsFnScope), needsUpdate(pNeedsUpdate) {}
+            isInsideLoop(pIsInsideLoop), isFnScope(pIsFnScope),
+            needsUpdate(pNeedsUpdate), Placeholder(pPlaceholder) {}
       void Finalize(clang::Expr* New);
     };
 
@@ -297,7 +298,8 @@ namespace clad {
     /// This is what DelayedGlobalStoreAndRef does. E is expected to be the
     /// original (uncloned) expression.
     DelayedStoreResult DelayedGlobalStoreAndRef(clang::Expr* E,
-                                                llvm::StringRef prefix = "_t");
+                                                llvm::StringRef prefix = "_t",
+                                                bool forceStore = false);
 
     struct CladTapeResult {
       ReverseModeVisitor& V;
diff --git a/lib/Differentiator/ReverseModeVisitor.cpp b/lib/Differentiator/ReverseModeVisitor.cpp
index 6306399ea..b8baae4f9 100644
--- a/lib/Differentiator/ReverseModeVisitor.cpp
+++ b/lib/Differentiator/ReverseModeVisitor.cpp
@@ -2319,25 +2319,8 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
       // to reduce cloning complexity and only clones once. Storing it in a
       // global variable allows to save current result and make it accessible
       // in the reverse pass.
-      std::unique_ptr<DelayedStoreResult> RDelayed;
-      StmtDiff RResult;
-      // If R has no side effects, it can be just cloned
-      // (no need to store it).
-
-      // Check if the local variable declaration is reference type, since it is
-      // moved to the global scope and the right side should be recomputed
-      bool promoteToFnScope = false;
-      if (auto* RDeclRef = dyn_cast<DeclRefExpr>(R->IgnoreImplicit()))
-        promoteToFnScope = RDeclRef->getDecl()->getType()->isReferenceType() &&
-                           !getCurrentScope()->isFunctionScope();
-
-      if (!ShouldRecompute(R) || promoteToFnScope) {
-        RDelayed = std::unique_ptr<DelayedStoreResult>(
-            new DelayedStoreResult(DelayedGlobalStoreAndRef(R)));
-        RResult = RDelayed->Result;
-      } else {
-        RResult = StmtDiff(Clone(R));
-      }
+      DelayedStoreResult RDelayed = DelayedGlobalStoreAndRef(R);
+      StmtDiff& RResult = RDelayed.Result;
 
       Expr* dl = nullptr;
       if (dfdx())
@@ -2358,31 +2341,24 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
         LStored = GlobalStoreAndRef(LStored.getExpr(), /*prefix=*/"_t",
                                     /*force=*/true);
       Stmt* LPop = endBlock(direction::reverse);
-      Expr::EvalResult dummy;
-      if (RDelayed ||
-          !clad_compat::Expr_EvaluateAsConstantExpr(R, dummy, m_Context)) {
-        Expr* dr = nullptr;
-        if (dfdx())
-          dr = BuildOp(BO_Mul, LStored.getRevSweepAsExpr(), dfdx());
-        Rdiff = Visit(R, dr);
-        // Assign right multiplier's variable with R.
-        if (RDelayed)
-          RDelayed->Finalize(Rdiff.getExpr());
-      }
+      Expr* dr = nullptr;
+      if (dfdx())
+        dr = BuildOp(BO_Mul, LStored.getRevSweepAsExpr(), dfdx());
+      Rdiff = Visit(R, dr);
+      // Assign right multiplier's variable with R.
+      RDelayed.Finalize(Rdiff.getExpr());
       addToCurrentBlock(utils::unwrapIfSingleStmt(LPop), direction::reverse);
-      std::tie(Ldiff, Rdiff) =
-          std::make_pair(LStored.getExpr(), RResult.getExpr());
+      std::tie(Ldiff, Rdiff) = std::make_pair(LStored, RResult);
     } else if (opCode == BO_Div) {
       // xi = xl / xr
       // dxi/xl = 1 / xr
       // df/dxl += df/dxi * dxi/xl = df/dxi * (1/xr)
-      auto RDelayed = DelayedGlobalStoreAndRef(R);
-      StmtDiff RResult = RDelayed.Result;
-      Expr* RStored =
-          StoreAndRef(RResult.getRevSweepAsExpr(), direction::reverse);
+      auto RDelayed = DelayedGlobalStoreAndRef(R, /*prefix=*/"_t",
+                                               /*forceStore=*/true);
+      StmtDiff& RResult = RDelayed.Result;
       Expr* dl = nullptr;
       if (dfdx())
-        dl = BuildOp(BO_Div, dfdx(), RStored);
+        dl = BuildOp(BO_Div, dfdx(), RResult.getExpr());
       Ldiff = Visit(L, dl);
       StmtDiff LStored = Ldiff;
       // Catch the pop statement and emit it after
@@ -2396,14 +2372,17 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
         LStored = GlobalStoreAndRef(LStored.getExpr(), /*prefix=*/"_t",
                                     /*force=*/true);
       Stmt* LPop = endBlock(direction::reverse);
-      // dxi/xr = -xl / (xr * xr)
-      // df/dxl += df/dxi * dxi/xr = df/dxi * (-xl /(xr * xr))
-      // Wrap R * R in parentheses: (R * R). otherwise code like 1 / R * R is
-      // produced instead of 1 / (R * R).
-      if (!RDelayed.isConstant) {
+      Expr::EvalResult dummy;
+      if (!clad_compat::Expr_EvaluateAsConstantExpr(R, dummy, m_Context) ||
+          RDelayed.needsUpdate) {
+        // dxi/xr = -xl / (xr * xr)
+        // df/dxl += df/dxi * dxi/xr = df/dxi * (-xl /(xr * xr))
+        // Wrap R * R in parentheses: (R * R). otherwise code like 1 / R * R is
+        // produced instead of 1 / (R * R).
         Expr* dr = nullptr;
         if (dfdx()) {
-          Expr* RxR = BuildParens(BuildOp(BO_Mul, RStored, RStored));
+          Expr* RxR = BuildParens(
+              BuildOp(BO_Mul, RResult.getExpr(), RResult.getExpr()));
           dr = BuildOp(BO_Mul, dfdx(),
                        BuildOp(UO_Minus,
                                BuildParens(BuildOp(
@@ -2414,8 +2393,7 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
         RDelayed.Finalize(Rdiff.getExpr());
       }
       addToCurrentBlock(utils::unwrapIfSingleStmt(LPop), direction::reverse);
-      std::tie(Ldiff, Rdiff) =
-          std::make_pair(LStored.getExpr(), RResult.getExpr());
+      std::tie(Ldiff, Rdiff) = std::make_pair(LStored, RResult);
     } else if (BinOp->isAssignmentOp()) {
       if (L->isModifiableLvalue(m_Context) != Expr::MLV_Valid) {
         diag(DiagnosticsEngine::Warning,
@@ -2611,26 +2589,25 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
         Expr* zero = getZeroInit(ResultRef->getType());
         addToCurrentBlock(BuildOp(BO_Assign, ResultRef, zero),
                           direction::reverse);
-        auto RDelayed = DelayedGlobalStoreAndRef(R);
-        StmtDiff RResult = RDelayed.Result;
+        auto RDelayed = DelayedGlobalStoreAndRef(R, /*prefix=*/"_t",
+                                                 /*forceStore=*/true);
+        StmtDiff& RResult = RDelayed.Result;
         Expr* RStored =
             StoreAndRef(RResult.getRevSweepAsExpr(), direction::reverse);
         addToCurrentBlock(BuildOp(BO_AddAssign, ResultRef,
                                   BuildOp(BO_Div, oldValue, RStored)),
                           direction::reverse);
-        if (!RDelayed.isConstant) {
-          if (isInsideLoop)
-            addToCurrentBlock(LCloned, direction::forward);
-          Expr* RxR = BuildParens(BuildOp(BO_Mul, RStored, RStored));
-          Expr* dr = BuildOp(BO_Mul, oldValue,
-                             BuildOp(UO_Minus, BuildOp(BO_Div, LCloned, RxR)));
-          dr = StoreAndRef(dr, direction::reverse);
-          Rdiff = Visit(R, dr);
-          RDelayed.Finalize(Rdiff.getExpr());
-        }
+        if (isInsideLoop)
+          addToCurrentBlock(LCloned, direction::forward);
+        Expr* RxR = BuildParens(BuildOp(BO_Mul, RStored, RStored));
+        Expr* dr = BuildOp(BO_Mul, oldValue,
+                           BuildOp(UO_Minus, BuildOp(BO_Div, LCloned, RxR)));
+        dr = StoreAndRef(dr, direction::reverse);
+        Rdiff = Visit(R, dr);
+        RDelayed.Finalize(Rdiff.getExpr());
         valueForRevPass = BuildOp(BO_Div, Rdiff.getRevSweepAsExpr(),
                                   Ldiff.getRevSweepAsExpr());
-        std::tie(Ldiff, Rdiff) = std::make_pair(LCloned, RResult.getExpr());
+        std::tie(Ldiff, Rdiff) = std::make_pair(LCloned, RResult);
       } else
         llvm_unreachable("unknown assignment opCode");
       if (m_ExternalSource)
@@ -3424,8 +3401,61 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
   }
 
   void ReverseModeVisitor::DelayedStoreResult::Finalize(Expr* New) {
-    if (isConstant || !needsUpdate)
+    // Placeholders are used when we have to use an expr before we have that.
+    // For instance, this is necessary for multiplication and division when the
+    // RHS and LHS need the derivatives of each other to be differentiated. We
+    // need placeholders to break this loop.
+    class PlaceholderReplacer
+        : public RecursiveASTVisitor<PlaceholderReplacer> {
+    public:
+      const Expr* placeholder;
+      Sema& m_Sema;
+      ASTContext& m_Context;
+      Expr* newExpr{nullptr};
+      PlaceholderReplacer(const Expr* Placeholder, Sema& S)
+          : placeholder(Placeholder), m_Sema(S), m_Context(S.getASTContext()) {}
+
+      void Replace(ReverseModeVisitor& RMV, Expr* New, StmtDiff& Result) {
+        newExpr = New;
+        for (Stmt* S : RMV.getCurrentBlock(direction::forward))
+          TraverseStmt(S);
+        for (Stmt* S : RMV.getCurrentBlock(direction::reverse))
+          TraverseStmt(S);
+        Result = New;
+      }
+
+      // We chose iteration rather than visiting because we only do this for
+      // simple Expression subtrees and it is not worth it to implement an
+      // entire visitor infrastructure for simple replacements.
+      bool VisitExpr(Expr* E) const {
+        for (Stmt*& S : E->children())
+          if (S == placeholder) {
+            // Since we are manually replacing the statement, implicit casts are
+            // not generated automatically.
+            ExprResult newExprRes{newExpr};
+            QualType targetTy = cast<Expr>(S)->getType();
+            CastKind kind = m_Sema.PrepareScalarCast(newExprRes, targetTy);
+            // CK_NoOp casts trigger an assertion on debug Clang
+            if (kind == CK_NoOp)
+              S = newExpr;
+            else
+              S = m_Sema.ImpCastExprToType(newExpr, targetTy, kind).get();
+          }
+        return true;
+      }
+      PlaceholderReplacer(const PlaceholderReplacer&) = delete;
+      PlaceholderReplacer(PlaceholderReplacer&&) = delete;
+    };
+
+    if (!needsUpdate)
+      return;
+
+    if (Placeholder) {
+      PlaceholderReplacer repl(Placeholder, V.m_Sema);
+      repl.Replace(V, New, Result);
       return;
+    }
+
     if (isInsideLoop) {
       auto* Push = cast<CallExpr>(Result.getExpr());
       unsigned lastArg = Push->getNumArgs() - 1;
@@ -3441,21 +3471,30 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
   }
 
   ReverseModeVisitor::DelayedStoreResult
-  ReverseModeVisitor::DelayedGlobalStoreAndRef(Expr* E,
-                                               llvm::StringRef prefix) {
+  ReverseModeVisitor::DelayedGlobalStoreAndRef(Expr* E, llvm::StringRef prefix,
+                                               bool forceStore) {
     assert(E && "must be provided");
     if (!UsefulToStore(E)) {
       StmtDiff Ediff = Visit(E);
       Expr::EvalResult evalRes;
-      bool isConst =
-          clad_compat::Expr_EvaluateAsConstantExpr(E, evalRes, m_Context);
-      return DelayedStoreResult{*this,
-                                Ediff,
+      return DelayedStoreResult{*this, Ediff,
                                 /*Declaration=*/nullptr,
-                                /*isConstant=*/isConst,
                                 /*isInsideLoop=*/false,
-                                /*isFnScope=*/false,
-                                /*pNeedsUpdate=*/false};
+                                /*isFnScope=*/false};
+    }
+    if (!forceStore && ShouldRecompute(E)) {
+      // The value of the literal has no. It's given a very particular value for
+      // easier debugging.
+      Expr* PH = ConstantFolder::synthesizeLiteral(E->getType(), m_Context,
+                                                   /*val=*/~0U);
+      return DelayedStoreResult{
+          *this,
+          StmtDiff{PH, /*diff=*/nullptr, /*forwSweepDiff=*/nullptr, PH},
+          /*Declaration=*/nullptr,
+          /*isInsideLoop=*/false,
+          /*isFnScope=*/false,
+          /*pNeedsUpdate=*/true,
+          /*pPlaceholder=*/PH};
     }
     if (isInsideLoop) {
       Expr* dummy = E;
@@ -3465,7 +3504,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
       return DelayedStoreResult{*this,
                                 StmtDiff{Push, nullptr, nullptr, Pop},
                                 /*Declaration=*/nullptr,
-                                /*isConstant=*/false,
                                 /*isInsideLoop=*/true,
                                 /*isFnScope=*/false,
                                 /*pNeedsUpdate=*/true};
@@ -3481,7 +3519,6 @@ Expr* getArraySizeExpr(const ArrayType* AT, ASTContext& context,
     return DelayedStoreResult{*this,
                               StmtDiff{Ref, nullptr, nullptr, Ref},
                               /*Declaration=*/VD,
-                              /*isConstant=*/false,
                               /*isInsideLoop=*/false,
                               /*isFnScope=*/isFnScope,
                               /*pNeedsUpdate=*/true};
diff --git a/test/ErrorEstimation/LoopsAndArrays.C b/test/ErrorEstimation/LoopsAndArrays.C
index 360051f71..c46b36d25 100644
--- a/test/ErrorEstimation/LoopsAndArrays.C
+++ b/test/ErrorEstimation/LoopsAndArrays.C
@@ -226,8 +226,8 @@ double func5(double* x, double* y, double* output) {
 
 //CHECK: void func5_grad(double *x, double *y, double *output, double *_d_x, double *_d_y, double *_d_output, double &_final_error) {
 //CHECK-NEXT:     unsigned {{int|long|long long}} output_size = 0;
-//CHECK-NEXT:     unsigned {{int|long|long long}} x_size = 0;
 //CHECK-NEXT:     unsigned {{int|long|long long}} y_size = 0;
+//CHECK-NEXT:     unsigned {{int|long|long long}} x_size = 0;
 //CHECK-NEXT:     double _ret_value0 = 0;
 //CHECK-NEXT:     double _t0 = output[0];
 //CHECK-NEXT:     output[0] = x[1] * y[2] - x[2] * y[1];
@@ -249,10 +249,12 @@ double func5(double* x, double* y, double* output) {
 //CHECK-NEXT:         output[2] = _t2;
 //CHECK-NEXT:         double _r_d2 = _d_output[2];
 //CHECK-NEXT:         _d_output[2] = 0;
+//CHECK-NEXT:         y_size = std::max(y_size, 1);
 //CHECK-NEXT:         _d_x[0] += _r_d2 * y[1];
 //CHECK-NEXT:         x_size = std::max(x_size, 0);
 //CHECK-NEXT:         _d_y[1] += x[0] * _r_d2;
 //CHECK-NEXT:         y_size = std::max(y_size, 1);
+//CHECK-NEXT:         x_size = std::max(x_size, 1);
 //CHECK-NEXT:         _d_y[0] += -_r_d2 * x[1];
 //CHECK-NEXT:         y_size = std::max(y_size, 0);
 //CHECK-NEXT:         _d_x[1] += y[0] * -_r_d2;
@@ -264,10 +266,12 @@ double func5(double* x, double* y, double* output) {
 //CHECK-NEXT:         output[1] = _t1;
 //CHECK-NEXT:         double _r_d1 = _d_output[1];
 //CHECK-NEXT:         _d_output[1] = 0;
+//CHECK-NEXT:         y_size = std::max(y_size, 0);
 //CHECK-NEXT:         _d_x[2] += _r_d1 * y[0];
 //CHECK-NEXT:         x_size = std::max(x_size, 2);
 //CHECK-NEXT:         _d_y[0] += x[2] * _r_d1;
 //CHECK-NEXT:         y_size = std::max(y_size, 0);
+//CHECK-NEXT:         y_size = std::max(y_size, 2);
 //CHECK-NEXT:         _d_x[0] += -_r_d1 * y[2];
 //CHECK-NEXT:         x_size = std::max(x_size, 0);
 //CHECK-NEXT:         _d_y[2] += x[0] * -_r_d1;
@@ -279,10 +283,12 @@ double func5(double* x, double* y, double* output) {
 //CHECK-NEXT:         output[0] = _t0;
 //CHECK-NEXT:         double _r_d0 = _d_output[0];
 //CHECK-NEXT:         _d_output[0] = 0;
+//CHECK-NEXT:         y_size = std::max(y_size, 2);
 //CHECK-NEXT:         _d_x[1] += _r_d0 * y[2];
 //CHECK-NEXT:         x_size = std::max(x_size, 1);
 //CHECK-NEXT:         _d_y[2] += x[1] * _r_d0;
 //CHECK-NEXT:         y_size = std::max(y_size, 2);
+//CHECK-NEXT:         y_size = std::max(y_size, 1);
 //CHECK-NEXT:         _d_x[2] += -_r_d0 * y[1];
 //CHECK-NEXT:         x_size = std::max(x_size, 2);
 //CHECK-NEXT:         _d_y[1] += x[2] * -_r_d0;
diff --git a/test/ErrorEstimation/LoopsAndArraysExec.C b/test/ErrorEstimation/LoopsAndArraysExec.C
index a5ac15f2c..b6efac4f5 100644
--- a/test/ErrorEstimation/LoopsAndArraysExec.C
+++ b/test/ErrorEstimation/LoopsAndArraysExec.C
@@ -72,8 +72,8 @@ double mulSum(float* a, float* b, int n) {
 //CHECK-NEXT:     int _d_j = 0;
 //CHECK-NEXT:     int j = 0;
 //CHECK-NEXT:     clad::tape<double> _t3 = {};
-//CHECK-NEXT:     unsigned {{int|long|long long}} a_size = 0;
 //CHECK-NEXT:     unsigned {{int|long|long long}} b_size = 0;
+//CHECK-NEXT:     unsigned {{int|long|long long}} a_size = 0;
 //CHECK-NEXT:     double _d_sum = 0;
 //CHECK-NEXT:     double sum = 0;
 //CHECK-NEXT:     unsigned {{int|long|long long}} _t0 = {{0U|0UL|0ULL}};
@@ -111,6 +111,7 @@ double mulSum(float* a, float* b, int n) {
 //CHECK-NEXT:                 _final_error += std::abs(_d_sum * sum * {{.+}});
 //CHECK-NEXT:                 sum = clad::pop(_t3);
 //CHECK-NEXT:                 double _r_d0 = _d_sum;
+//CHECK-NEXT:                 b_size = std::max(b_size, j);
 //CHECK-NEXT:                 _d_a[i] += _r_d0 * b[j];
 //CHECK-NEXT:                 a_size = std::max(a_size, i);
 //CHECK-NEXT:                 _d_b[j] += a[i] * _r_d0;
diff --git a/test/Gradient/Gradients.C b/test/Gradient/Gradients.C
index 5a49af619..db2154348 100644
--- a/test/Gradient/Gradients.C
+++ b/test/Gradient/Gradients.C
@@ -714,7 +714,10 @@ double fn_template_non_type(double x) {
 // CHECK-NEXT:     bool _cond0 = maxN < {{15U|15UL|15ULL}};
 // CHECK-NEXT:     size_t _d_m = 0;
 // CHECK-NEXT:     const size_t m = _cond0 ? maxN : {{15U|15UL|15ULL}};
-// CHECK-NEXT:     *_d_x += 1 * m;
+// CHECK-NEXT:     {
+// CHECK-NEXT:       *_d_x += 1 * m;
+// CHECK-NEXT:       _d_m += x * 1;
+// CHECK-NEXT:     }
 // CHECK-NEXT:     if (_cond0)
 // CHECK-NEXT:         _d_maxN += _d_m;
 // CHECK-NEXT: }
@@ -1070,6 +1073,78 @@ double fn_cond_add_assign(double i, double j) {
 // CHECK-NEXT:    }
 // CHECK-NEXT:}
 
+double f_mult3(double i, double j) {
+  i = (i + j) * (i < 10 ? i : j);
+  return i;
+}
+
+//CHECK: void f_mult3_grad(double i, double j, double *_d_i, double *_d_j) {
+//CHECK-NEXT:     double _t0 = i;
+//CHECK-NEXT:     bool _cond0 = i < 10;
+//CHECK-NEXT:     i = (i + j) * (_cond0 ? i : j);
+//CHECK-NEXT:     *_d_i += 1;
+//CHECK-NEXT:     {
+//CHECK-NEXT:         i = _t0;
+//CHECK-NEXT:         double _r_d0 = *_d_i;
+//CHECK-NEXT:         *_d_i = 0;
+//CHECK-NEXT:         *_d_i += _r_d0 * (_cond0 ? i : j);
+//CHECK-NEXT:         *_d_j += _r_d0 * (_cond0 ? i : j);
+//CHECK-NEXT:         if (_cond0)
+//CHECK-NEXT:             *_d_i += (i + j) * _r_d0;
+//CHECK-NEXT:         else
+//CHECK-NEXT:             *_d_j += (i + j) * _r_d0;
+//CHECK-NEXT:     }
+//CHECK-NEXT: }
+
+double f_const_denom(double x, double y) {
+  const double m = 0.5;
+  return x * y / m;
+}
+
+//CHECK: void f_const_denom_grad(double x, double y, double *_d_x, double *_d_y) {
+//CHECK-NEXT:     double _d_m = 0;
+//CHECK-NEXT:     const double m = 0.5;
+//CHECK-NEXT:     {
+//CHECK-NEXT:         *_d_x += 1 / m * y;
+//CHECK-NEXT:         *_d_y += x * 1 / m;
+//CHECK-NEXT:     }
+//CHECK-NEXT: }
+
+double f_ref_in_rhs(double x, double y) {
+  if (x != 55) {
+    double& ref_x = x;
+    double& ref_y = y;
+    return ref_y * (ref_x + y);
+  }
+  return 1;
+}
+
+//CHECK: void f_ref_in_rhs_grad(double x, double y, double *_d_x, double *_d_y) {
+//CHECK-NEXT:     bool _cond0;
+//CHECK-NEXT:     double *_d_ref_x = 0;
+//CHECK-NEXT:     double *ref_x = {};
+//CHECK-NEXT:     double *_d_ref_y = 0;
+//CHECK-NEXT:     double *ref_y = {};
+//CHECK-NEXT:     {
+//CHECK-NEXT:         _cond0 = x != 55;
+//CHECK-NEXT:         if (_cond0) {
+//CHECK-NEXT:             _d_ref_x = &*_d_x;
+//CHECK-NEXT:             ref_x = &x;
+//CHECK-NEXT:             _d_ref_y = &*_d_y;
+//CHECK-NEXT:             ref_y = &y;
+//CHECK-NEXT:             goto _label0;
+//CHECK-NEXT:         }
+//CHECK-NEXT:     }
+//CHECK-NEXT:     if (_cond0) {
+//CHECK-NEXT:       _label0:
+//CHECK-NEXT:         {
+//CHECK-NEXT:             *_d_ref_y += 1 * (*ref_x + y);
+//CHECK-NEXT:             *_d_ref_x += *ref_y * 1;
+//CHECK-NEXT:             *_d_y += *ref_y * 1;
+//CHECK-NEXT:         }
+//CHECK-NEXT:     }
+//CHECK-NEXT: }
+
 #define TEST(F, x, y)                                                          \
   {                                                                            \
     result[0] = 0;                                                             \
@@ -1158,5 +1233,12 @@ int main() {
   INIT_GRADIENT(fn_cond_add_assign);
   TEST_GRADIENT(fn_cond_add_assign, /*numOfDerivativeArgs=*/2, 3, 5, &d_i, &d_j);  // CHECK-EXEC: {80.00, 48.00}
 
+  INIT_GRADIENT(f_mult3);
+  TEST_GRADIENT(f_mult3, /*numOfDerivativeArgs=*/2, 3, 5, &d_i, &d_j);  // CHECK-EXEC: {11.00, 3.00}
+
+  INIT_GRADIENT(f_const_denom);
+  TEST_GRADIENT(f_const_denom, /*numOfDerivativeArgs=*/2, 3, 5, &d_i, &d_j);  // CHECK-EXEC: {10.00, 6.00}
 
+  INIT_GRADIENT(f_ref_in_rhs);
+  TEST_GRADIENT(f_ref_in_rhs, /*numOfDerivativeArgs=*/2, 3, 5, &d_i, &d_j);  // CHECK-EXEC: {5.00, 13.00}
 }
diff --git a/test/Gradient/Loops.C b/test/Gradient/Loops.C
index c97cd9da6..9999e480c 100644
--- a/test/Gradient/Loops.C
+++ b/test/Gradient/Loops.C
@@ -3113,7 +3113,7 @@ double fn39(double x) {
 //CHECK-NEXT:         }
 //CHECK-NEXT:         _t0++;
 //CHECK-NEXT:         clad::push(_t1, res);
-//CHECK-NEXT:         res += x * (*i);
+//CHECK-NEXT:         res += x * *i;
 //CHECK-NEXT:     }
 //CHECK-NEXT:     _d_res += 1;
 //CHECK-NEXT:     for (;; _t0--) {
@@ -3128,7 +3128,7 @@ double fn39(double x) {
 //CHECK-NEXT:         {
 //CHECK-NEXT:             res = clad::pop(_t1);
 //CHECK-NEXT:             double _r_d0 = _d_res;
-//CHECK-NEXT:             *_d_x += _r_d0 * (*i);
+//CHECK-NEXT:             *_d_x += _r_d0 * *i;
 //CHECK-NEXT:             *_d_i += x * _r_d0;
 //CHECK-NEXT:         }
 //CHECK-NEXT:     }
diff --git a/test/Gradient/NonDifferentiable.C b/test/Gradient/NonDifferentiable.C
index 8ad0ea115..230c9a2b1 100644
--- a/test/Gradient/NonDifferentiable.C
+++ b/test/Gradient/NonDifferentiable.C
@@ -154,7 +154,7 @@ int main() {
     // CHECK-NEXT:     SimpleFunctions1 _d_obj({});
     // CHECK-NEXT:     SimpleFunctions1 obj(2, 3);
     // CHECK-NEXT:     {
-    // CHECK-NEXT:         *_d_obj.x_pointer += 1 * (*obj.y_pointer);
+    // CHECK-NEXT:         *_d_obj.x_pointer += 1 * *obj.y_pointer;
     // CHECK-NEXT:         *_d_i += 1 * j;
     // CHECK-NEXT:         *_d_j += i * 1;
     // CHECK-NEXT:     }
diff --git a/test/Gradient/Pointers.C b/test/Gradient/Pointers.C
index 8a57d88cd..450da889d 100644
--- a/test/Gradient/Pointers.C
+++ b/test/Gradient/Pointers.C
@@ -28,13 +28,13 @@ double minimalPointer(double x) {
 // CHECK-NEXT:     double *_d_p = &*_d_x;
 // CHECK-NEXT:     double *const p = &x;
 // CHECK-NEXT:     double _t0 = *p;
-// CHECK-NEXT:     *p = *p * (*p);
+// CHECK-NEXT:     *p = *p * *p;
 // CHECK-NEXT:     *_d_p += 1;
 // CHECK-NEXT:     {
 // CHECK-NEXT:         *p = _t0;
 // CHECK-NEXT:         double _r_d0 = *_d_p;
 // CHECK-NEXT:         *_d_p = 0;
-// CHECK-NEXT:         *_d_p += _r_d0 * (*p);
+// CHECK-NEXT:         *_d_p += _r_d0 * *p;
 // CHECK-NEXT:         *_d_p += *p * _r_d0;
 // CHECK-NEXT:     }
 // CHECK-NEXT: }
@@ -87,7 +87,7 @@ double arrayPointer(const double* arr) {
 // CHECK-NEXT:     _d_p = _d_p - 2;
 // CHECK-NEXT:     p = p - 2;
 // CHECK-NEXT:     double _t11 = sum;
-// CHECK-NEXT:     sum += 5 * (*p);
+// CHECK-NEXT:     sum += 5 * *p;
 // CHECK-NEXT:     _d_sum += 1;
 // CHECK-NEXT:     {
 // CHECK-NEXT:         sum = _t11;
@@ -170,7 +170,7 @@ double pointerParam(const double* arr, size_t n) {
 // CHECK-NEXT:         clad::push(_t1, _d_j);
 // CHECK-NEXT:         clad::push(_t3, j) , j = &i;
 // CHECK-NEXT:         clad::push(_t4, sum);
-// CHECK-NEXT:         sum += arr[0] * (*j);
+// CHECK-NEXT:         sum += arr[0] * *j;
 // CHECK-NEXT:         clad::push(_t5, arr);
 // CHECK-NEXT:         clad::push(_t6, _d_arr);
 // CHECK-NEXT:         _d_arr = _d_arr + 1;
@@ -191,7 +191,7 @@ double pointerParam(const double* arr, size_t n) {
 // CHECK-NEXT:         {
 // CHECK-NEXT:             sum = clad::pop(_t4);
 // CHECK-NEXT:             double _r_d0 = _d_sum;
-// CHECK-NEXT:             _d_arr[0] += _r_d0 * (*j);
+// CHECK-NEXT:             _d_arr[0] += _r_d0 * *j;
 // CHECK-NEXT:             *_t2 += arr[0] * _r_d0;
 // CHECK-NEXT:         }
 // CHECK-NEXT:         j = clad::pop(_t3);