From 554b041cf8b686f939c0edbc596ff3bb5aec467e Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Sat, 30 Oct 2021 23:12:21 -0400
Subject: [PATCH 01/21] start with a LLVM analysis file

---
 .../loop_unroller/BUILD                       |  21 +
 .../loop_unroller/loop_unroller.cc            | 381 ++++++++++++++++++
 2 files changed, 402 insertions(+)
 create mode 100644 examples/example_unrolling_service/loop_unroller/BUILD
 create mode 100644 examples/example_unrolling_service/loop_unroller/loop_unroller.cc
diff --git a/examples/example_unrolling_service/loop_unroller/BUILD b/examples/example_unrolling_service/loop_unroller/BUILD
new file mode 100644
index 000000000..ef9122ff9
--- /dev/null
+++ b/examples/example_unrolling_service/loop_unroller/BUILD
@@ -0,0 +1,21 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the LICENSE file
+# in the root directory of this source tree.
+#
+# This package exposes the LLVM optimization pipeline as a CompilerGym service.
+load("@rules_cc//cc:defs.bzl", "cc_binary")
+
+cc_binary(
+    name = "loop_unroller",
+    srcs = ["loop_unroller.cc"],
+    copts = [
+        "-Wall",
+        "-fdiagnostics-color=always",
+        "-fno-rtti",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        "@llvm//10.0.0",
+    ],
+)
diff --git a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
new file mode 100644
index 000000000..5d4ae7eae
--- /dev/null
+++ b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
@@ -0,0 +1,381 @@
+//==============================================================================
+// Estimate best and worst case execution time of LLVM code.
+//
+// Hugh Leather hughleat@gmail.com 2020-06-30
+//==============================================================================
+
+#include <cassert>
+#include <fstream>
+#include <iostream>
+#include <limits>
+#include <queue>
+#include <unordered_map>
+
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Pass.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace std;
+
+//------------------------------------------------------------------------------
+// Command line options
+//------------------------------------------------------------------------------
+cl::OptionCategory bwcetCategory{"bwcet options"};
+
+cl::list<std::string> inputFiles(cl::Positional, cl::desc{"<Modules to analyse>"},
+                                 cl::value_desc{"bitcode filename"}, cl::OneOrMore,
+                                 cl::cat{bwcetCategory});
+
+cl::opt<string> outputFilename("output", cl::desc("Specify output filename (default to std out)"),
+                               cl::value_desc("output filename"), cl::init("-"),
+                               cl::cat{bwcetCategory});
+cl::alias outputFilenameA("o", cl::desc("Alias for --output"), cl::aliasopt(outputFilename),
+                          cl::cat{bwcetCategory});
+
+enum OutputFormat { TXT, JSON, CSV };
+cl::opt<OutputFormat> outputFormat("format", cl::desc("Choose output format"),
+                                   cl::values(clEnumVal(TXT, "Human readable format (default)"),
+                                              clEnumVal(JSON, "JSON format"),
+                                              clEnumVal(CSV, "CSV format")),
+                                   cl::init(TXT), cl::cat{bwcetCategory});
+cl::alias outputFormatA("f", cl::desc("Alias for --format"), cl::aliasopt(outputFormat),
+                        cl::cat{bwcetCategory});
+
+cl::opt<TargetTransformInfo::TargetCostKind> costKind(
+    "cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput),
+    cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput",
+                          "Reciprocal throughput (default)"),
+               clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"),
+               clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size")),
+    cl::cat{bwcetCategory});
+cl::alias costKindA("k", cl::desc("Alias for --cost-kind"), cl::aliasopt(costKind),
+                    cl::cat{bwcetCategory});
+
+//------------------------------------------------------------------------------
+// Determine if CFG is a DAG.
+//------------------------------------------------------------------------------
+// Colour for DFS
+enum Colour { WHITE, GREY, BLACK };
+// DFS
+bool isDAG(const BasicBlock* bb, unordered_map<const BasicBlock*, Colour>& colour) {
+  switch (colour[bb]) {
+    case BLACK:
+      return true;
+    case GREY:
+      return false;
+    case WHITE: {
+      colour[bb] = GREY;
+      for (const auto* succ : successors(bb)) {
+        if (!isDAG(succ, colour))
+          return false;
+      }
+      colour[bb] = BLACK;
+      return true;
+    }
+  }
+}
+bool isDAG(const Function& f) {
+  unordered_map<const BasicBlock*, Colour> colour;
+  return isDAG(&f.getEntryBlock(), colour);
+}
+
+//------------------------------------------------------------------------------
+// Get min and max cost of functions and basic blocks
+//------------------------------------------------------------------------------
+TargetIRAnalysis tira;
+unique_ptr<TargetTransformInfoWrapperPass> ttiwp((TargetTransformInfoWrapperPass*)
+                                                     createTargetTransformInfoWrapperPass(tira));
+using CostT = double;
+
+CostT getCost(const BasicBlock& bb, const TargetTransformInfo& tti) {
+  CostT cost = 0;
+  for (const auto& insn : bb) {
+    cost += tti.getInstructionCost(&insn, costKind);
+  }
+  return cost;
+}
+CostT minCost(const Function& f, unordered_map<const BasicBlock*, CostT> bbCost) {
+  // Cost of best path (path with minimum cost)
+  CostT best = numeric_limits<CostT>::infinity();
+  // The exit block with the best path cost
+  const BasicBlock* bestBB = nullptr;
+  // Predecessors
+  unordered_map<const BasicBlock*, const BasicBlock*> pred;
+  // Map of costs into each vertex
+  unordered_map<const BasicBlock*, CostT> costIn;
+  // Priority queue
+  set<pair<CostT, const BasicBlock*>> q;
+  // Pointers into q - so we can change priority
+  unordered_map<const BasicBlock*, decltype(q.begin())> iter;
+  // Initialise cost
+  for (const BasicBlock& v : f.getBasicBlockList()) costIn[&v] = numeric_limits<CostT>::infinity();
+  auto start = &f.getEntryBlock();
+  costIn[start] = 0;
+  // Push into q (and remember iterator)
+  auto iti = q.insert({costIn[start], start});
+  iter[start] = iti.first;
+  // Do the search
+  while (!q.empty()) {
+    // Pop from the q
+    auto top = q.begin();
+    const BasicBlock* v = top->second;
+    CostT cIn = top->first;
+    q.erase(top);
+    iter.erase(v);
+    assert(cIn == costIn[v]);
+
+    // Get the cost out of this node
+    int cOut = cIn + bbCost[v];
+    // Count the successors as we process them
+    int numSuccs = 0;
+    // Process each successor
+    for (const auto* succ : successors(v)) {
+      numSuccs++;
+      // Update if the cost is better
+      if (cOut < costIn[succ]) {
+        // Set the new cost
+        costIn[succ] = cOut;
+        // Delete from the queue if already in there
+        if (iter.count(succ)) {
+          auto it = iter[succ];
+          q.erase(it);
+        }
+        // Insert into the queue (and remember iterator)
+        auto iti = q.insert({cOut, succ});
+        iter[succ] = iti.first;
+        // Remember predecessor
+        pred[succ] = v;
+      }
+    }
+    // Update best if this is an exit block (no successors) and we have a better cost
+    if (numSuccs == 0 && best > cOut) {
+      best = cOut;
+      bestBB = v;
+    }
+  }
+  return best;
+}
+CostT maxCost(const Function& f, unordered_map<const BasicBlock*, CostT> bbCost) {
+  // Cost of best path (path with minimum cost)
+  CostT best = 0;
+  // The exit block with the best path cost
+  const BasicBlock* bestBB = nullptr;
+  // Predecessors
+  unordered_map<const BasicBlock*, const BasicBlock*> pred;
+  // Map of costs into each vertex
+  unordered_map<const BasicBlock*, CostT> costIn;
+  // Priority queue
+  struct RCmp {
+    bool operator()(const pair<CostT, const BasicBlock*>& a,
+                    const pair<CostT, const BasicBlock*>& b) const {
+      if (a.first == b.first)
+        return a.second < b.second;
+      return a.first > b.first;
+    }
+  };
+  set<pair<CostT, const BasicBlock*>, RCmp> q;
+  // Pointers into q - so we can change priority
+  unordered_map<const BasicBlock*, decltype(q.begin())> iter;
+  // Initialise cost
+  for (const BasicBlock& v : f.getBasicBlockList()) costIn[&v] = 0;
+  auto start = &f.getEntryBlock();
+  costIn[start] = 0;
+  // Push into q (and remember iterator)
+  auto iti = q.insert({costIn[start], start});
+  iter[start] = iti.first;
+  // Do the search
+  while (!q.empty()) {
+    // Pop from the q
+    auto top = q.begin();
+    const BasicBlock* v = top->second;
+    CostT cIn = top->first;
+    q.erase(top);
+    iter.erase(v);
+    assert(cIn == costIn[v]);
+
+    // Get the cost out of this node
+    int cOut = cIn + bbCost[v];
+    // Count the successors as we process them
+    int numSuccs = 0;
+    // Process each successor
+    for (const auto* succ : successors(v)) {
+      numSuccs++;
+      // Update if the cost is better
+      if (cOut > costIn[succ]) {
+        // Set the new cost
+        costIn[succ] = cOut;
+        // Delete from the queue if already in there
+        if (iter.count(succ)) {
+          auto it = iter[succ];
+          q.erase(it);
+        }
+        // Insert into the queue (and remember iterator)
+        auto iti = q.insert({cOut, succ});
+        iter[succ] = iti.first;
+        // Remember predecessor
+        pred[succ] = v;
+      }
+    }
+    // Update best if this is an exit block (no successors) and we have a better cost
+    if (numSuccs == 0 && best < cOut) {
+      best = cOut;
+      bestBB = v;
+    }
+  }
+  return best;
+}
+
+pair<CostT, CostT> getCost(const Function& f) {
+  auto& tti = ttiwp->getTTI(f);
+
+  // Precompute BB costs.
+  unordered_map<const BasicBlock*, CostT> bbCost;
+  for (const auto& bb : f.getBasicBlockList()) bbCost[&bb] = getCost(bb, tti);
+
+  if (isDAG(f)) {
+    return {minCost(f, bbCost), maxCost(f, bbCost)};
+  } else {
+    return {minCost(f, bbCost), numeric_limits<CostT>::infinity()};
+  }
+}
+
+//------------------------------------------------------------------------------
+// Visitor functions, called to process the module
+//------------------------------------------------------------------------------
+void visit(const Function& f, ostream& os) {
+  auto costs = getCost(f);
+  switch (outputFormat) {
+    case TXT: {
+      os << "  Function: " << f.getName().str() << " ";
+      os << "min=" << costs.first << " ";
+      os << "max=" << costs.second << endl;
+      break;
+    }
+    case JSON: {
+      os << "{";
+      os << "\"function\":\"" << f.getName().str() << "\",";
+      os << "\"min\":" << costs.first;
+      if (costs.second != numeric_limits<CostT>::infinity()) {
+        os << ",\"max\":" << costs.second;
+      }
+      os << "}";
+      break;
+    }
+    case CSV: {
+      os << f.getParent()->getName().str() << ",";
+      os << f.getName().str() << ",";
+      os << costs.first << ",";
+      if (costs.second != numeric_limits<CostT>::infinity()) {
+        os << costs.second;
+      }
+      os << "\n";
+      break;
+    }
+  }
+}
+void visit(const Module& m, ostream& os) {
+  switch (outputFormat) {
+    case TXT: {
+      os << "Module: " << m.getName().str() << "\n";
+      for (const auto& f : m.functions()) visit(f, os);
+      break;
+    }
+    case JSON: {
+      os << "{";
+      os << "\"module\":\"" << m.getName().str() << "\",";
+      os << "\"functions\":[";
+      bool isFirst = true;
+      for (const auto& f : m.functions()) {
+        if (!isFirst)
+          os << ",";
+        else
+          isFirst = false;
+        visit(f, os);
+      }
+      os << "]}";
+      break;
+    }
+    case CSV: {
+      for (const auto& f : m.functions()) visit(f, os);
+      break;
+    }
+  }
+}
+void visit(const string& filename, ostream& os) {
+  // Parse the IR file passed on the command line.
+  SMDiagnostic err;
+  LLVMContext ctx;
+  unique_ptr<Module> m = parseIRFile(filename, err, ctx);
+
+  if (!m)
+    throw err;
+
+  // Run the analysis and print the results
+  visit(*m, os);
+}
+void visit(const vector<string>& filenames, ostream& os) {
+  switch (outputFormat) {
+    case TXT: {
+      for (const auto& fn : filenames) visit(fn, os);
+      break;
+    }
+    case JSON: {
+      os << "[";
+      bool isFirst = true;
+      for (const auto& fn : filenames) {
+        if (!isFirst)
+          os << ",";
+        else
+          isFirst = false;
+        visit(fn, os);
+      }
+      os << "]\n";
+      break;
+    }
+    case CSV: {
+      os << "Module, Function, DAG, Min, Max\n";
+      for (const auto& fn : filenames) visit(fn, os);
+      break;
+    }
+  }
+}
+//------------------------------------------------------------------------------
+// Driver
+//------------------------------------------------------------------------------
+int main(int argc, char** argv) {
+  // Hide all options apart from the ones specific to this tool
+  cl::HideUnrelatedOptions(bwcetCategory);
+
+  cl::ParseCommandLineOptions(
+      argc, argv,
+      "Estimates the best and worst case runtime for each function the input IR file\n");
+
+  try {
+    // Get the output file
+    unique_ptr<ostream> ofs(outputFilename == "-" ? nullptr : new ofstream(outputFilename.c_str()));
+    if (ofs && !ofs->good()) {
+      throw "Error opening output file: " + outputFilename;
+    }
+    ostream& os = ofs ? *ofs : cout;
+
+    // Makes sure llvm_shutdown() is called (which cleans up LLVM objects)
+    // http://llvm.org/docs/ProgrammersManual.html#ending-execution-with-llvm-shutdown
+    llvm_shutdown_obj shutdown_obj;
+
+    // Do the work
+    visit(inputFiles, os);
+
+  } catch (string e) {
+    errs() << e;
+    return -1;
+  } catch (SMDiagnostic e) {
+    e.print(argv[0], errs(), false);
+    return -1;
+  }
+  return 0;
+}

From 1e7850f3a099d2370e8e628d05850492c607117e Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Sun, 31 Oct 2021 11:12:59 -0400
Subject: [PATCH 02/21] use an IR pass example

---
 .../loop_unroller/BUILD                       |   5 +-
 .../loop_unroller/IRCanonicalizer.h           |  77 ++
 .../loop_unroller/loop_unroller.cc            | 914 +++++++++++-------
 3 files changed, 664 insertions(+), 332 deletions(-)
 create mode 100644 examples/example_unrolling_service/loop_unroller/IRCanonicalizer.h

diff --git a/examples/example_unrolling_service/loop_unroller/BUILD b/examples/example_unrolling_service/loop_unroller/BUILD
index ef9122ff9..52c8aa580 100644
--- a/examples/example_unrolling_service/loop_unroller/BUILD
+++ b/examples/example_unrolling_service/loop_unroller/BUILD
@@ -8,7 +8,10 @@ load("@rules_cc//cc:defs.bzl", "cc_binary")
 
 cc_binary(
     name = "loop_unroller",
-    srcs = ["loop_unroller.cc"],
+    srcs = [
+        "IRCanonicalizer.h",
+        "loop_unroller.cc",
+    ],
     copts = [
         "-Wall",
         "-fdiagnostics-color=always",
diff --git a/examples/example_unrolling_service/loop_unroller/IRCanonicalizer.h b/examples/example_unrolling_service/loop_unroller/IRCanonicalizer.h
new file mode 100644
index 000000000..a4e14b517
--- /dev/null
+++ b/examples/example_unrolling_service/loop_unroller/IRCanonicalizer.h
@@ -0,0 +1,77 @@
+#pragma once
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+
+/// IRCanonicalizer aims to transform LLVM IR into canonical form.
+class IRCanonicalizer : public llvm::FunctionPass {
+ public:
+  static char ID;
+
+  /// Constructor for the IRCanonicalizer.
+  ///
+  /// \param PreserveOrder Preserves original order of instructions.
+  /// \param RenameAll Renames all instructions (including user-named).
+  /// \param FoldPreoutputs Folds all regular instructions (including pre-outputs).
+  /// \param ReorderOperands Sorts and reorders operands in commutative instructions.
+  IRCanonicalizer(bool PreserveOrder, bool RenameAll, bool FoldPreoutputs, bool ReorderOperands)
+      : FunctionPass(ID),
+        PreserveOrder(PreserveOrder),
+        RenameAll(RenameAll),
+        FoldPreoutputs(FoldPreoutputs),
+        ReorderOperands(ReorderOperands) {}
+
+  bool runOnFunction(llvm::Function& F) override;
+
+ private:
+  // Random constant for hashing, so the state isn't zero.
+  const uint64_t MagicHashConstant = 0x6acaa36bef8325c5ULL;
+
+  /// \name Canonicalizer flags.
+  /// @{
+  /// Preserves original order of instructions.
+  bool PreserveOrder;
+  /// Renames all instructions (including user-named).
+  bool RenameAll;
+  /// Folds all regular instructions (including pre-outputs).
+  bool FoldPreoutputs;
+  /// Sorts and reorders operands in commutative instructions.
+  bool ReorderOperands;
+  /// @}
+
+  /// \name Naming.
+  /// @{
+  void nameFunctionArguments(llvm::Function& F);
+  void nameBasicBlocks(llvm::Function& F);
+  void nameInstructions(llvm::SmallVector<llvm::Instruction*, 16>& Outputs);
+  void nameInstruction(llvm::Instruction* I,
+                       llvm::SmallPtrSet<const llvm::Instruction*, 32>& Visited);
+  void nameAsInitialInstruction(llvm::Instruction* I);
+  void nameAsRegularInstruction(llvm::Instruction* I,
+                                llvm::SmallPtrSet<const llvm::Instruction*, 32>& Visited);
+  void foldInstructionName(llvm::Instruction* I);
+  /// @}
+
+  /// \name Reordering.
+  /// @{
+  void reorderInstructions(llvm::SmallVector<llvm::Instruction*, 16>& Outputs);
+  void reorderInstruction(llvm::Instruction* Used, llvm::Instruction* User,
+                          llvm::SmallPtrSet<const llvm::Instruction*, 32>& Visited);
+  void reorderInstructionOperandsByNames(llvm::Instruction* I);
+  void reorderPHIIncomingValues(llvm::PHINode* PN);
+  /// @}
+
+  /// \name Utility methods.
+  /// @{
+  llvm::SmallVector<llvm::Instruction*, 16> collectOutputInstructions(llvm::Function& F);
+  bool isOutput(const llvm::Instruction* I);
+  bool isInitialInstruction(const llvm::Instruction* I);
+  bool hasOnlyImmediateOperands(const llvm::Instruction* I);
+  llvm::SetVector<int> getOutputFootprint(llvm::Instruction* I,
+                                          llvm::SmallPtrSet<const llvm::Instruction*, 32>& Visited);
+  /// @}
+};
diff --git a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
index 5d4ae7eae..0014da795 100644
--- a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
+++ b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
@@ -1,381 +1,633 @@
-//==============================================================================
-// Estimate best and worst case execution time of LLVM code.
-//
-// Hugh Leather hughleat@gmail.com 2020-06-30
-//==============================================================================
-
-#include <cassert>
-#include <fstream>
-#include <iostream>
-#include <limits>
-#include <queue>
-#include <unordered_map>
-
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/CFG.h"
+#include <algorithm>
+#include <vector>
+
+#include "IRCanonicalizer.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/Pass.h"
-#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
-using namespace std;
-
-//------------------------------------------------------------------------------
-// Command line options
-//------------------------------------------------------------------------------
-cl::OptionCategory bwcetCategory{"bwcet options"};
-
-cl::list<std::string> inputFiles(cl::Positional, cl::desc{"<Modules to analyse>"},
-                                 cl::value_desc{"bitcode filename"}, cl::OneOrMore,
-                                 cl::cat{bwcetCategory});
-
-cl::opt<string> outputFilename("output", cl::desc("Specify output filename (default to std out)"),
-                               cl::value_desc("output filename"), cl::init("-"),
-                               cl::cat{bwcetCategory});
-cl::alias outputFilenameA("o", cl::desc("Alias for --output"), cl::aliasopt(outputFilename),
-                          cl::cat{bwcetCategory});
-
-enum OutputFormat { TXT, JSON, CSV };
-cl::opt<OutputFormat> outputFormat("format", cl::desc("Choose output format"),
-                                   cl::values(clEnumVal(TXT, "Human readable format (default)"),
-                                              clEnumVal(JSON, "JSON format"),
-                                              clEnumVal(CSV, "CSV format")),
-                                   cl::init(TXT), cl::cat{bwcetCategory});
-cl::alias outputFormatA("f", cl::desc("Alias for --format"), cl::aliasopt(outputFormat),
-                        cl::cat{bwcetCategory});
-
-cl::opt<TargetTransformInfo::TargetCostKind> costKind(
-    "cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput),
-    cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput",
-                          "Reciprocal throughput (default)"),
-               clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"),
-               clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size")),
-    cl::cat{bwcetCategory});
-cl::alias costKindA("k", cl::desc("Alias for --cost-kind"), cl::aliasopt(costKind),
-                    cl::cat{bwcetCategory});
-
-//------------------------------------------------------------------------------
-// Determine if CFG is a DAG.
-//------------------------------------------------------------------------------
-// Colour for DFS
-enum Colour { WHITE, GREY, BLACK };
-// DFS
-bool isDAG(const BasicBlock* bb, unordered_map<const BasicBlock*, Colour>& colour) {
-  switch (colour[bb]) {
-    case BLACK:
-      return true;
-    case GREY:
-      return false;
-    case WHITE: {
-      colour[bb] = GREY;
-      for (const auto* succ : successors(bb)) {
-        if (!isDAG(succ, colour))
-          return false;
-      }
-      colour[bb] = BLACK;
-      return true;
+
+char IRCanonicalizer::ID = 0;
+
+/// Entry method to the IRCanonicalizer.
+///
+/// \param M Module to canonicalize.
+bool IRCanonicalizer::runOnFunction(Function& F) {
+  nameFunctionArguments(F);
+  nameBasicBlocks(F);
+
+  SmallVector<Instruction*, 16> Outputs = collectOutputInstructions(F);
+
+  if (!PreserveOrder)
+    reorderInstructions(Outputs);
+
+  nameInstructions(Outputs);
+
+  for (auto& I : instructions(F)) {
+    if (!PreserveOrder) {
+      if (ReorderOperands && I.isCommutative())
+        reorderInstructionOperandsByNames(&I);
+
+      if (auto* PN = dyn_cast<PHINode>(&I))
+        reorderPHIIncomingValues(PN);
     }
+
+    foldInstructionName(&I);
   }
-}
-bool isDAG(const Function& f) {
-  unordered_map<const BasicBlock*, Colour> colour;
-  return isDAG(&f.getEntryBlock(), colour);
+
+  return true;
 }
 
-//------------------------------------------------------------------------------
-// Get min and max cost of functions and basic blocks
-//------------------------------------------------------------------------------
-TargetIRAnalysis tira;
-unique_ptr<TargetTransformInfoWrapperPass> ttiwp((TargetTransformInfoWrapperPass*)
-                                                     createTargetTransformInfoWrapperPass(tira));
-using CostT = double;
-
-CostT getCost(const BasicBlock& bb, const TargetTransformInfo& tti) {
-  CostT cost = 0;
-  for (const auto& insn : bb) {
-    cost += tti.getInstructionCost(&insn, costKind);
+/// Numbers arguments.
+///
+/// \param F Function whose arguments will be renamed.
+void IRCanonicalizer::nameFunctionArguments(Function& F) {
+  int ArgumentCounter = 0;
+  for (auto& A : F.args()) {
+    if (RenameAll || A.getName().empty()) {
+      A.setName("a" + Twine(ArgumentCounter));
+      ++ArgumentCounter;
+    }
   }
-  return cost;
 }
-CostT minCost(const Function& f, unordered_map<const BasicBlock*, CostT> bbCost) {
-  // Cost of best path (path with minimum cost)
-  CostT best = numeric_limits<CostT>::infinity();
-  // The exit block with the best path cost
-  const BasicBlock* bestBB = nullptr;
-  // Predecessors
-  unordered_map<const BasicBlock*, const BasicBlock*> pred;
-  // Map of costs into each vertex
-  unordered_map<const BasicBlock*, CostT> costIn;
-  // Priority queue
-  set<pair<CostT, const BasicBlock*>> q;
-  // Pointers into q - so we can change priority
-  unordered_map<const BasicBlock*, decltype(q.begin())> iter;
-  // Initialise cost
-  for (const BasicBlock& v : f.getBasicBlockList()) costIn[&v] = numeric_limits<CostT>::infinity();
-  auto start = &f.getEntryBlock();
-  costIn[start] = 0;
-  // Push into q (and remember iterator)
-  auto iti = q.insert({costIn[start], start});
-  iter[start] = iti.first;
-  // Do the search
-  while (!q.empty()) {
-    // Pop from the q
-    auto top = q.begin();
-    const BasicBlock* v = top->second;
-    CostT cIn = top->first;
-    q.erase(top);
-    iter.erase(v);
-    assert(cIn == costIn[v]);
-
-    // Get the cost out of this node
-    int cOut = cIn + bbCost[v];
-    // Count the successors as we process them
-    int numSuccs = 0;
-    // Process each successor
-    for (const auto* succ : successors(v)) {
-      numSuccs++;
-      // Update if the cost is better
-      if (cOut < costIn[succ]) {
-        // Set the new cost
-        costIn[succ] = cOut;
-        // Delete from the queue if already in there
-        if (iter.count(succ)) {
-          auto it = iter[succ];
-          q.erase(it);
-        }
-        // Insert into the queue (and remember iterator)
-        auto iti = q.insert({cOut, succ});
-        iter[succ] = iti.first;
-        // Remember predecessor
-        pred[succ] = v;
-      }
-    }
-    // Update best if this is an exit block (no successors) and we have a better cost
-    if (numSuccs == 0 && best > cOut) {
-      best = cOut;
-      bestBB = v;
+
+/// Names basic blocks using a generated hash for each basic block in
+/// a function considering the opcode and the order of output instructions.
+///
+/// \param F Function containing basic blocks to rename.
+void IRCanonicalizer::nameBasicBlocks(Function& F) {
+  for (auto& B : F) {
+    // Initialize to a magic constant, so the state isn't zero.
+    uint64_t Hash = MagicHashConstant;
+
+    // Hash considering output instruction opcodes.
+    for (auto& I : B)
+      if (isOutput(&I))
+        Hash = hashing::detail::hash_16_bytes(Hash, I.getOpcode());
+
+    if (RenameAll || B.getName().empty()) {
+      // Name basic block. Substring hash to make diffs more readable.
+      B.setName("bb" + std::to_string(Hash).substr(0, 5));
     }
   }
-  return best;
 }
-CostT maxCost(const Function& f, unordered_map<const BasicBlock*, CostT> bbCost) {
-  // Cost of best path (path with minimum cost)
-  CostT best = 0;
-  // The exit block with the best path cost
-  const BasicBlock* bestBB = nullptr;
-  // Predecessors
-  unordered_map<const BasicBlock*, const BasicBlock*> pred;
-  // Map of costs into each vertex
-  unordered_map<const BasicBlock*, CostT> costIn;
-  // Priority queue
-  struct RCmp {
-    bool operator()(const pair<CostT, const BasicBlock*>& a,
-                    const pair<CostT, const BasicBlock*>& b) const {
-      if (a.first == b.first)
-        return a.second < b.second;
-      return a.first > b.first;
+
+/// Names instructions graphically.
+/// This method is a wrapper for recursive nameInstruction().
+///
+/// \see nameInstruction()
+/// \param Outputs Vector of pointers to output instructions collected top-down.
+void IRCanonicalizer::nameInstructions(SmallVector<Instruction*, 16>& Outputs) {
+  // Keeping track of visited instructions while naming (even depth first) is
+  // necessary only to avoid infinite loops on PHI nodes.
+  SmallPtrSet<const Instruction*, 32> Visited;
+
+  for (auto& I : Outputs) nameInstruction(I, Visited);
+}
+
+/// Names instructions graphically (recursive) in accordance with the
+/// def-use tree, starting from the initial instructions (defs), finishing at
+/// the output (top-most user) instructions (depth-first).
+///
+/// \param I Instruction to be renamed.
+void IRCanonicalizer::nameInstruction(Instruction* I,
+                                      SmallPtrSet<const Instruction*, 32>& Visited) {
+  // Keeping track of visited instructions while naming (even depth first) is
+  // necessary only to avoid infinite loops on PHI nodes.
+  if (!Visited.count(I)) {
+    Visited.insert(I);
+
+    // Determine the type of instruction to name.
+    if (isInitialInstruction(I)) {
+      // This is an initial instruction.
+      nameAsInitialInstruction(I);
+    } else {
+      // This must be a regular instruction.
+      nameAsRegularInstruction(I, Visited);
     }
-  };
-  set<pair<CostT, const BasicBlock*>, RCmp> q;
-  // Pointers into q - so we can change priority
-  unordered_map<const BasicBlock*, decltype(q.begin())> iter;
-  // Initialise cost
-  for (const BasicBlock& v : f.getBasicBlockList()) costIn[&v] = 0;
-  auto start = &f.getEntryBlock();
-  costIn[start] = 0;
-  // Push into q (and remember iterator)
-  auto iti = q.insert({costIn[start], start});
-  iter[start] = iti.first;
-  // Do the search
-  while (!q.empty()) {
-    // Pop from the q
-    auto top = q.begin();
-    const BasicBlock* v = top->second;
-    CostT cIn = top->first;
-    q.erase(top);
-    iter.erase(v);
-    assert(cIn == costIn[v]);
-
-    // Get the cost out of this node
-    int cOut = cIn + bbCost[v];
-    // Count the successors as we process them
-    int numSuccs = 0;
-    // Process each successor
-    for (const auto* succ : successors(v)) {
-      numSuccs++;
-      // Update if the cost is better
-      if (cOut > costIn[succ]) {
-        // Set the new cost
-        costIn[succ] = cOut;
-        // Delete from the queue if already in there
-        if (iter.count(succ)) {
-          auto it = iter[succ];
-          q.erase(it);
-        }
-        // Insert into the queue (and remember iterator)
-        auto iti = q.insert({cOut, succ});
-        iter[succ] = iti.first;
-        // Remember predecessor
-        pred[succ] = v;
-      }
+  }
+}
+
+/// Names instruction following the scheme:
+/// vl00000Callee(Operands)
+///
+/// Where 00000 is a hash calculated considering instruction's opcode and output
+/// footprint. Callee's name is only included when instruction's type is
+/// CallInst. In cases where instruction is commutative, operands list is also
+/// sorted.
+///
+/// Renames instruction only when RenameAll flag is raised or instruction is
+/// unnamed.
+///
+/// \see getOutputFootprint()
+/// \param I Instruction to be renamed.
+void IRCanonicalizer::nameAsInitialInstruction(Instruction* I) {
+  if (I->getType()->isVoidTy() || (!I->getName().empty() && !RenameAll))
+    return;
+
+  // Instruction operands for further sorting.
+  SmallVector<SmallString<64>, 4> Operands;
+
+  // Collect operands.
+  for (auto& OP : I->operands()) {
+    if (!isa<Function>(OP)) {
+      std::string TextRepresentation;
+      raw_string_ostream Stream(TextRepresentation);
+      OP->printAsOperand(Stream, false);
+      Operands.push_back(StringRef(Stream.str()));
     }
-    // Update best if this is an exit block (no successors) and we have a better cost
-    if (numSuccs == 0 && best < cOut) {
-      best = cOut;
-      bestBB = v;
+  }
+
+  if (I->isCommutative())
+    llvm::sort(Operands);
+
+  // Initialize to a magic constant, so the state isn't zero.
+  uint64_t Hash = MagicHashConstant;
+
+  // Consider instruction's opcode in the hash.
+  Hash = hashing::detail::hash_16_bytes(Hash, I->getOpcode());
+
+  SmallPtrSet<const Instruction*, 32> Visited;
+  // Get output footprint for I.
+  SetVector<int> OutputFootprint = getOutputFootprint(I, Visited);
+
+  // Consider output footprint in the hash.
+  for (const int& Output : OutputFootprint) Hash = hashing::detail::hash_16_bytes(Hash, Output);
+
+  // Base instruction name.
+  SmallString<256> Name;
+  Name.append("vl" + std::to_string(Hash).substr(0, 5));
+
+  // In case of CallInst, consider callee in the instruction name.
+  if (const auto* CI = dyn_cast<CallInst>(I)) {
+    Function* F = CI->getCalledFunction();
+
+    if (F != nullptr) {
+      Name.append(F->getName());
     }
   }
-  return best;
+
+  Name.append("(");
+  for (unsigned long i = 0; i < Operands.size(); ++i) {
+    Name.append(Operands[i]);
+
+    if (i < Operands.size() - 1)
+      Name.append(", ");
+  }
+  Name.append(")");
+
+  I->setName(Name);
 }
 
-pair<CostT, CostT> getCost(const Function& f) {
-  auto& tti = ttiwp->getTTI(f);
+/// Names instruction following the scheme:
+/// op00000Callee(Operands)
+///
+/// Where 00000 is a hash calculated considering instruction's opcode, its
+/// operands' opcodes and order. Callee's name is only included when
+/// instruction's type is CallInst. In cases where instruction is commutative,
+/// operand list is also sorted.
+///
+/// Names instructions recursively in accordance with the def-use tree,
+/// starting from the initial instructions (defs), finishing at
+/// the output (top-most user) instructions (depth-first).
+///
+/// Renames instruction only when RenameAll flag is raised or instruction is
+/// unnamed.
+///
+/// \see getOutputFootprint()
+/// \param I Instruction to be renamed.
+void IRCanonicalizer::nameAsRegularInstruction(Instruction* I,
+                                               SmallPtrSet<const Instruction*, 32>& Visited) {
+  // Instruction operands for further sorting.
+  SmallVector<SmallString<128>, 4> Operands;
 
-  // Precompute BB costs.
-  unordered_map<const BasicBlock*, CostT> bbCost;
-  for (const auto& bb : f.getBasicBlockList()) bbCost[&bb] = getCost(bb, tti);
+  // The name of a regular instruction depends
+  // on the names of its operands. Hence, all
+  // operands must be named first in the use-def
+  // walk.
 
-  if (isDAG(f)) {
-    return {minCost(f, bbCost), maxCost(f, bbCost)};
-  } else {
-    return {minCost(f, bbCost), numeric_limits<CostT>::infinity()};
+  // Collect operands.
+  for (auto& OP : I->operands()) {
+    if (auto* IOP = dyn_cast<Instruction>(OP)) {
+      // Walk down the use-def chain.
+      nameInstruction(IOP, Visited);
+      Operands.push_back(IOP->getName());
+    } else if (isa<Value>(OP) && !isa<Function>(OP)) {
+      // This must be an immediate value.
+      std::string TextRepresentation;
+      raw_string_ostream Stream(TextRepresentation);
+      OP->printAsOperand(Stream, false);
+      Operands.push_back(StringRef(Stream.str()));
+    }
   }
+
+  if (I->isCommutative())
+    llvm::sort(Operands.begin(), Operands.end());
+
+  // Initialize to a magic constant, so the state isn't zero.
+  uint64_t Hash = MagicHashConstant;
+
+  // Consider instruction opcode in the hash.
+  Hash = hashing::detail::hash_16_bytes(Hash, I->getOpcode());
+
+  // Operand opcodes for further sorting (commutative).
+  SmallVector<int, 4> OperandsOpcodes;
+
+  // Collect operand opcodes for hashing.
+  for (auto& OP : I->operands())
+    if (auto* IOP = dyn_cast<Instruction>(OP))
+      OperandsOpcodes.push_back(IOP->getOpcode());
+
+  if (I->isCommutative())
+    llvm::sort(OperandsOpcodes.begin(), OperandsOpcodes.end());
+
+  // Consider operand opcodes in the hash.
+  for (const int Code : OperandsOpcodes) Hash = hashing::detail::hash_16_bytes(Hash, Code);
+
+  // Base instruction name.
+  SmallString<512> Name;
+  Name.append("op" + std::to_string(Hash).substr(0, 5));
+
+  // In case of CallInst, consider callee in the instruction name.
+  if (const auto* CI = dyn_cast<CallInst>(I))
+    if (const Function* F = CI->getCalledFunction())
+      Name.append(F->getName());
+
+  Name.append("(");
+  for (unsigned long i = 0; i < Operands.size(); ++i) {
+    Name.append(Operands[i]);
+
+    if (i < Operands.size() - 1)
+      Name.append(", ");
+  }
+  Name.append(")");
+
+  if ((I->getName().empty() || RenameAll) && !I->getType()->isVoidTy())
+    I->setName(Name);
 }
 
-//------------------------------------------------------------------------------
-// Visitor functions, called to process the module
-//------------------------------------------------------------------------------
-void visit(const Function& f, ostream& os) {
-  auto costs = getCost(f);
-  switch (outputFormat) {
-    case TXT: {
-      os << "  Function: " << f.getName().str() << " ";
-      os << "min=" << costs.first << " ";
-      os << "max=" << costs.second << endl;
-      break;
+/// Shortens instruction's name. This method removes called function name from
+/// the instruction name and substitutes the call chain with a corresponding
+/// list of operands.
+///
+/// Examples:
+/// op00000Callee(op00001Callee(...), vl00000Callee(1, 2), ...)  ->
+/// op00000(op00001, vl00000, ...) vl00000Callee(1, 2)  ->  vl00000(1, 2)
+///
+/// This method omits output instructions and pre-output (instructions directly
+/// used by an output instruction) instructions (by default). By default it also
+/// does not affect user named instructions.
+///
+/// \param I Instruction whose name will be folded.
+void IRCanonicalizer::foldInstructionName(Instruction* I) {
+  // If this flag is raised, fold all regular
+  // instructions (including pre-outputs).
+  if (!FoldPreoutputs) {
+    // Don't fold if one of the users is an output instruction.
+    for (auto* U : I->users())
+      if (auto* IU = dyn_cast<Instruction>(U))
+        if (isOutput(IU))
+          return;
+  }
+
+  // Don't fold if it is an output instruction or has no op prefix.
+  if (isOutput(I) || I->getName().substr(0, 2) != "op")
+    return;
+
+  // Instruction operands.
+  SmallVector<SmallString<64>, 4> Operands;
+
+  for (auto& OP : I->operands()) {
+    if (const Instruction* IOP = dyn_cast<Instruction>(OP)) {
+      bool HasCanonicalName =
+          I->getName().substr(0, 2) == "op" || I->getName().substr(0, 2) == "vl";
+
+      Operands.push_back(HasCanonicalName ? IOP->getName().substr(0, 7) : IOP->getName());
     }
-    case JSON: {
-      os << "{";
-      os << "\"function\":\"" << f.getName().str() << "\",";
-      os << "\"min\":" << costs.first;
-      if (costs.second != numeric_limits<CostT>::infinity()) {
-        os << ",\"max\":" << costs.second;
+  }
+
+  if (I->isCommutative())
+    llvm::sort(Operands.begin(), Operands.end());
+
+  SmallString<256> Name;
+  Name.append(I->getName().substr(0, 7));
+
+  Name.append("(");
+  for (unsigned long i = 0; i < Operands.size(); ++i) {
+    Name.append(Operands[i]);
+
+    if (i < Operands.size() - 1)
+      Name.append(", ");
+  }
+  Name.append(")");
+
+  I->setName(Name);
+}
+
+/// Reorders instructions by walking up the tree from each operand of an output
+/// instruction and reducing the def-use distance.
+/// This method assumes that output instructions were collected top-down,
+/// otherwise the def-use chain may be broken.
+/// This method is a wrapper for recursive reorderInstruction().
+///
+/// \see reorderInstruction()
+/// \param Outputs Vector of pointers to output instructions collected top-down.
+void IRCanonicalizer::reorderInstructions(SmallVector<Instruction*, 16>& Outputs) {
+  // This method assumes output instructions were collected top-down,
+  // otherwise the def-use chain may be broken.
+
+  SmallPtrSet<const Instruction*, 32> Visited;
+
+  // Walk up the tree.
+  for (auto& I : Outputs)
+    for (auto& OP : I->operands())
+      if (auto* IOP = dyn_cast<Instruction>(OP))
+        reorderInstruction(IOP, I, Visited);
+}
+
+/// Reduces def-use distance or places instruction at the end of the basic
+/// block. Continues to walk up the def-use tree recursively. Used by
+/// reorderInstructions().
+///
+/// \see reorderInstructions()
+/// \param Used Pointer to the instruction whose value is used by the \p User.
+/// \param User Pointer to the instruction which uses the \p Used.
+/// \param Visited Set of visited instructions.
+void IRCanonicalizer::reorderInstruction(Instruction* Used, Instruction* User,
+                                         SmallPtrSet<const Instruction*, 32>& Visited) {
+  if (!Visited.count(Used)) {
+    Visited.insert(Used);
+
+    if (!isa<PHINode>(Used) && !Used->isEHPad()) {
+      // Do not move PHI nodes and 'pad' instructions to ensure they are first
+      // in a basic block. Also do not move their operands before them.
+
+      if (Used->getParent() == User->getParent()) {
+        // If Used and User share the same basic block move Used just before
+        // User.
+        Used->moveBefore(User);
+      } else {
+        // Otherwise move Used to the end of the basic block before the
+        // terminator.
+        Used->moveBefore(&Used->getParent()->back());
       }
-      os << "}";
-      break;
-    }
-    case CSV: {
-      os << f.getParent()->getName().str() << ",";
-      os << f.getName().str() << ",";
-      os << costs.first << ",";
-      if (costs.second != numeric_limits<CostT>::infinity()) {
-        os << costs.second;
+
+      for (auto& OP : Used->operands()) {
+        if (auto* IOP = dyn_cast<Instruction>(OP)) {
+          // Walk up the def-use tree.
+          reorderInstruction(IOP, Used, Visited);
+        }
       }
-      os << "\n";
-      break;
     }
   }
 }
-void visit(const Module& m, ostream& os) {
-  switch (outputFormat) {
-    case TXT: {
-      os << "Module: " << m.getName().str() << "\n";
-      for (const auto& f : m.functions()) visit(f, os);
-      break;
-    }
-    case JSON: {
-      os << "{";
-      os << "\"module\":\"" << m.getName().str() << "\",";
-      os << "\"functions\":[";
-      bool isFirst = true;
-      for (const auto& f : m.functions()) {
-        if (!isFirst)
-          os << ",";
-        else
-          isFirst = false;
-        visit(f, os);
+
+/// Reorders instruction's operands alphabetically. This method assumes
+/// that passed instruction is commutative. Changing the operand order
+/// in other instructions may change the semantics.
+///
+/// \param I Instruction whose operands will be reordered.
+void IRCanonicalizer::reorderInstructionOperandsByNames(Instruction* I) {
+  // This method assumes that passed I is commutative,
+  // changing the order of operands in other instructions
+  // may change the semantics.
+
+  // Instruction operands for further sorting.
+  SmallVector<std::pair<std::string, Value*>, 4> Operands;
+
+  // Collect operands.
+  for (auto& OP : I->operands()) {
+    if (auto* VOP = dyn_cast<Value>(OP)) {
+      if (isa<Instruction>(VOP)) {
+        // This is an an instruction.
+        Operands.push_back(std::pair<std::string, Value*>(VOP->getName(), VOP));
+      } else {
+        std::string TextRepresentation;
+        raw_string_ostream Stream(TextRepresentation);
+        OP->printAsOperand(Stream, false);
+        Operands.push_back(std::pair<std::string, Value*>(Stream.str(), VOP));
       }
-      os << "]}";
-      break;
-    }
-    case CSV: {
-      for (const auto& f : m.functions()) visit(f, os);
-      break;
     }
   }
+
+  // Sort operands.
+  llvm::sort(Operands.begin(), Operands.end(), llvm::less_first());
+
+  // Reorder operands.
+  unsigned Position = 0;
+  for (auto& OP : I->operands()) {
+    OP.set(Operands[Position].second);
+    Position++;
+  }
+}
+
+/// Reorders PHI node's values according to the names of corresponding basic
+/// blocks.
+///
+/// \param PN PHI node to canonicalize.
+void IRCanonicalizer::reorderPHIIncomingValues(PHINode* PN) {
+  // Values for further sorting.
+  SmallVector<std::pair<Value*, BasicBlock*>, 2> Values;
+
+  // Collect blocks and corresponding values.
+  for (auto& BB : PN->blocks()) {
+    Value* V = PN->getIncomingValueForBlock(BB);
+    Values.push_back(std::pair<Value*, BasicBlock*>(V, BB));
+  }
+
+  // Sort values according to the name of a basic block.
+  llvm::sort(Values, [](const std::pair<Value*, BasicBlock*>& LHS,
+                        const std::pair<Value*, BasicBlock*>& RHS) {
+    return LHS.second->getName() < RHS.second->getName();
+  });
+
+  // Swap.
+  for (unsigned i = 0; i < Values.size(); ++i) {
+    PN->setIncomingBlock(i, Values[i].second);
+    PN->setIncomingValue(i, Values[i].first);
+  }
 }
-void visit(const string& filename, ostream& os) {
-  // Parse the IR file passed on the command line.
-  SMDiagnostic err;
-  LLVMContext ctx;
-  unique_ptr<Module> m = parseIRFile(filename, err, ctx);
 
-  if (!m)
-    throw err;
+/// Returns a vector of output instructions. An output is an instruction which
+/// has side-effects or is a terminator instruction. Uses isOutput().
+///
+/// \see isOutput()
+/// \param F Function to collect outputs from.
+SmallVector<Instruction*, 16> IRCanonicalizer::collectOutputInstructions(Function& F) {
+  // Output instructions are collected top-down in each function,
+  // any change may break the def-use chain in reordering methods.
+  SmallVector<Instruction*, 16> Outputs;
+
+  for (auto& I : instructions(F))
+    if (isOutput(&I))
+      Outputs.push_back(&I);
 
-  // Run the analysis and print the results
-  visit(*m, os);
+  return Outputs;
 }
-void visit(const vector<string>& filenames, ostream& os) {
-  switch (outputFormat) {
-    case TXT: {
-      for (const auto& fn : filenames) visit(fn, os);
-      break;
-    }
-    case JSON: {
-      os << "[";
-      bool isFirst = true;
-      for (const auto& fn : filenames) {
-        if (!isFirst)
-          os << ",";
-        else
-          isFirst = false;
-        visit(fn, os);
+
+/// Helper method checking whether the instruction may have side effects or is
+/// a terminator instruction.
+///
+/// \param I Considered instruction.
+bool IRCanonicalizer::isOutput(const Instruction* I) {
+  // Outputs are such instructions which may have side effects or are a
+  // terminator.
+  if (I->mayHaveSideEffects() || I->isTerminator())
+    return true;
+
+  return false;
+}
+
+/// Helper method checking whether the instruction has users and only
+/// immediate operands.
+///
+/// \param I Considered instruction.
+bool IRCanonicalizer::isInitialInstruction(const Instruction* I) {
+  // Initial instructions are such instructions whose values are used by
+  // other instructions, yet they only depend on immediate values.
+  return !I->user_empty() && hasOnlyImmediateOperands(I);
+}
+
+/// Helper method checking whether the instruction has only immediate operands.
+///
+/// \param I Considered instruction.
+bool IRCanonicalizer::hasOnlyImmediateOperands(const Instruction* I) {
+  for (const auto& OP : I->operands())
+    if (isa<Instruction>(OP))
+      return false;  // Found non-immediate operand (instruction).
+
+  return true;
+}
+
+/// Helper method returning indices (distance from the beginning of the basic
+/// block) of outputs using the \p I (eliminates repetitions). Walks down the
+/// def-use tree recursively.
+///
+/// \param I Considered instruction.
+/// \param Visited Set of visited instructions.
+SetVector<int> IRCanonicalizer::getOutputFootprint(Instruction* I,
+                                                   SmallPtrSet<const Instruction*, 32>& Visited) {
+  // Vector containing indexes of outputs (no repetitions),
+  // which use I in the order of walking down the def-use tree.
+  SetVector<int> Outputs;
+
+  if (!Visited.count(I)) {
+    Visited.insert(I);
+
+    if (isOutput(I)) {
+      // Gets output instruction's parent function.
+      Function* Func = I->getParent()->getParent();
+
+      // Finds and inserts the index of the output to the vector.
+      unsigned Count = 0;
+      for (const auto& B : *Func) {
+        for (const auto& E : B) {
+          if (&E == I)
+            Outputs.insert(Count);
+          Count++;
+        }
       }
-      os << "]\n";
-      break;
+
+      // Returns to the used instruction.
+      return Outputs;
     }
-    case CSV: {
-      os << "Module, Function, DAG, Min, Max\n";
-      for (const auto& fn : filenames) visit(fn, os);
-      break;
+
+    for (auto* U : I->users()) {
+      if (auto* UI = dyn_cast<Instruction>(U)) {
+        // Vector for outputs which use UI.
+        SetVector<int> OutputsUsingUI = getOutputFootprint(UI, Visited);
+
+        // Insert the indexes of outputs using UI.
+        Outputs.insert(OutputsUsingUI.begin(), OutputsUsingUI.end());
+      }
     }
   }
+
+  // Return to the used instruction.
+  return Outputs;
+}
+
+/// Reads a module from a file.
+/// On error, messages are written to stderr and null is returned.
+///
+/// \param Context LLVM Context for the module.
+/// \param Name Input file name.
+static std::unique_ptr<Module> readModule(LLVMContext& Context, StringRef Name) {
+  SMDiagnostic Diag;
+  std::unique_ptr<Module> Module = parseIRFile(Name, Diag, Context);
+
+  if (!Module)
+    Diag.print("llvm-canon", errs());
+
+  return Module;
 }
-//------------------------------------------------------------------------------
-// Driver
-//------------------------------------------------------------------------------
+
+/// Input LLVM module file name.
+cl::opt<std::string> InputFilename("f", cl::desc("Specify input filename"),
+                                   cl::value_desc("filename"), cl::Required);
+/// Output LLVM module file name.
+cl::opt<std::string> OutputFilename("o", cl::desc("Specify output filename"),
+                                    cl::value_desc("filename"), cl::Required);
+
+/// \name Canonicalizer flags.
+/// @{
+/// Preserves original order of instructions.
+cl::opt<bool> PreserveOrder("preserve-order", cl::desc("Preserves original instruction order"));
+/// Renames all instructions (including user-named).
+cl::opt<bool> RenameAll("rename-all", cl::desc("Renames all instructions (including user-named)"));
+/// Folds all regular instructions (including pre-outputs).
+cl::opt<bool> FoldPreoutputs("fold-all",
+                             cl::desc("Folds all regular instructions (including pre-outputs)"));
+/// Sorts and reorders operands in commutative instructions.
+cl::opt<bool> ReorderOperands("reorder-operands",
+                              cl::desc("Sorts and reorders operands in commutative instructions"));
+/// @}
+
 int main(int argc, char** argv) {
-  // Hide all options apart from the ones specific to this tool
-  cl::HideUnrelatedOptions(bwcetCategory);
-
-  cl::ParseCommandLineOptions(
-      argc, argv,
-      "Estimates the best and worst case runtime for each function the input IR file\n");
-
-  try {
-    // Get the output file
-    unique_ptr<ostream> ofs(outputFilename == "-" ? nullptr : new ofstream(outputFilename.c_str()));
-    if (ofs && !ofs->good()) {
-      throw "Error opening output file: " + outputFilename;
-    }
-    ostream& os = ofs ? *ofs : cout;
+  cl::ParseCommandLineOptions(argc, argv,
+                              " LLVM-Canon\n\n"
+                              " This tool aims to transform LLVM Modules into canonical form by"
+                              " reordering and renaming instructions while preserving the same"
+                              " semantics. Making it easier to spot semantic differences while"
+                              " diffing two modules which have undergone different passes.\n");
+
+  LLVMContext Context;
+
+  std::unique_ptr<Module> Module = readModule(Context, InputFilename);
+
+  if (!Module)
+    return 1;
 
-    // Makes sure llvm_shutdown() is called (which cleans up LLVM objects)
-    // http://llvm.org/docs/ProgrammersManual.html#ending-execution-with-llvm-shutdown
-    llvm_shutdown_obj shutdown_obj;
+  IRCanonicalizer Canonicalizer(PreserveOrder, RenameAll, FoldPreoutputs, ReorderOperands);
 
-    // Do the work
-    visit(inputFiles, os);
+  for (auto& Function : *Module) {
+    Canonicalizer.runOnFunction(Function);
+  }
+
+  if (verifyModule(*Module, &errs()))
+    return 1;
 
-  } catch (string e) {
-    errs() << e;
-    return -1;
-  } catch (SMDiagnostic e) {
-    e.print(argv[0], errs(), false);
-    return -1;
+  std::error_code EC;
+  raw_fd_ostream OutputStream(OutputFilename, EC, sys::fs::OF_None);
+
+  if (EC) {
+    errs() << EC.message();
+    return 1;
   }
+
+  Module->print(OutputStream, nullptr, false);
+  OutputStream.close();
   return 0;
 }

From 90dacf4db9ac4d7e5c1ec520ad3cea62f6d861d9 Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Tue, 2 Nov 2021 09:48:23 -0400
Subject: [PATCH 03/21] clean up code

---
 .../loop_unroller/IRCanonicalizer.h           |  67 ---
 .../loop_unroller/loop_unroller.cc            | 568 +-----------------
 2 files changed, 14 insertions(+), 621 deletions(-)

diff --git a/examples/example_unrolling_service/loop_unroller/IRCanonicalizer.h b/examples/example_unrolling_service/loop_unroller/IRCanonicalizer.h
index a4e14b517..a3c0aba09 100644
--- a/examples/example_unrolling_service/loop_unroller/IRCanonicalizer.h
+++ b/examples/example_unrolling_service/loop_unroller/IRCanonicalizer.h
@@ -8,70 +8,3 @@
 #include "llvm/Pass.h"
 
 /// IRCanonicalizer aims to transform LLVM IR into canonical form.
-class IRCanonicalizer : public llvm::FunctionPass {
- public:
-  static char ID;
-
-  /// Constructor for the IRCanonicalizer.
-  ///
-  /// \param PreserveOrder Preserves original order of instructions.
-  /// \param RenameAll Renames all instructions (including user-named).
-  /// \param FoldPreoutputs Folds all regular instructions (including pre-outputs).
-  /// \param ReorderOperands Sorts and reorders operands in commutative instructions.
-  IRCanonicalizer(bool PreserveOrder, bool RenameAll, bool FoldPreoutputs, bool ReorderOperands)
-      : FunctionPass(ID),
-        PreserveOrder(PreserveOrder),
-        RenameAll(RenameAll),
-        FoldPreoutputs(FoldPreoutputs),
-        ReorderOperands(ReorderOperands) {}
-
-  bool runOnFunction(llvm::Function& F) override;
-
- private:
-  // Random constant for hashing, so the state isn't zero.
-  const uint64_t MagicHashConstant = 0x6acaa36bef8325c5ULL;
-
-  /// \name Canonicalizer flags.
-  /// @{
-  /// Preserves original order of instructions.
-  bool PreserveOrder;
-  /// Renames all instructions (including user-named).
-  bool RenameAll;
-  /// Folds all regular instructions (including pre-outputs).
-  bool FoldPreoutputs;
-  /// Sorts and reorders operands in commutative instructions.
-  bool ReorderOperands;
-  /// @}
-
-  /// \name Naming.
-  /// @{
-  void nameFunctionArguments(llvm::Function& F);
-  void nameBasicBlocks(llvm::Function& F);
-  void nameInstructions(llvm::SmallVector<llvm::Instruction*, 16>& Outputs);
-  void nameInstruction(llvm::Instruction* I,
-                       llvm::SmallPtrSet<const llvm::Instruction*, 32>& Visited);
-  void nameAsInitialInstruction(llvm::Instruction* I);
-  void nameAsRegularInstruction(llvm::Instruction* I,
-                                llvm::SmallPtrSet<const llvm::Instruction*, 32>& Visited);
-  void foldInstructionName(llvm::Instruction* I);
-  /// @}
-
-  /// \name Reordering.
-  /// @{
-  void reorderInstructions(llvm::SmallVector<llvm::Instruction*, 16>& Outputs);
-  void reorderInstruction(llvm::Instruction* Used, llvm::Instruction* User,
-                          llvm::SmallPtrSet<const llvm::Instruction*, 32>& Visited);
-  void reorderInstructionOperandsByNames(llvm::Instruction* I);
-  void reorderPHIIncomingValues(llvm::PHINode* PN);
-  /// @}
-
-  /// \name Utility methods.
-  /// @{
-  llvm::SmallVector<llvm::Instruction*, 16> collectOutputInstructions(llvm::Function& F);
-  bool isOutput(const llvm::Instruction* I);
-  bool isInitialInstruction(const llvm::Instruction* I);
-  bool hasOnlyImmediateOperands(const llvm::Instruction* I);
-  llvm::SetVector<int> getOutputFootprint(llvm::Instruction* I,
-                                          llvm::SmallPtrSet<const llvm::Instruction*, 32>& Visited);
-  /// @}
-};
diff --git a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
index 0014da795..ef3879897 100644
--- a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
+++ b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
@@ -19,545 +19,21 @@
 
 using namespace llvm;
 
-char IRCanonicalizer::ID = 0;
-
-/// Entry method to the IRCanonicalizer.
-///
-/// \param M Module to canonicalize.
-bool IRCanonicalizer::runOnFunction(Function& F) {
-  nameFunctionArguments(F);
-  nameBasicBlocks(F);
-
-  SmallVector<Instruction*, 16> Outputs = collectOutputInstructions(F);
-
-  if (!PreserveOrder)
-    reorderInstructions(Outputs);
-
-  nameInstructions(Outputs);
-
-  for (auto& I : instructions(F)) {
-    if (!PreserveOrder) {
-      if (ReorderOperands && I.isCommutative())
-        reorderInstructionOperandsByNames(&I);
-
-      if (auto* PN = dyn_cast<PHINode>(&I))
-        reorderPHIIncomingValues(PN);
-    }
-
-    foldInstructionName(&I);
-  }
-
-  return true;
-}
-
-/// Numbers arguments.
-///
-/// \param F Function whose arguments will be renamed.
-void IRCanonicalizer::nameFunctionArguments(Function& F) {
-  int ArgumentCounter = 0;
-  for (auto& A : F.args()) {
-    if (RenameAll || A.getName().empty()) {
-      A.setName("a" + Twine(ArgumentCounter));
-      ++ArgumentCounter;
-    }
-  }
-}
-
-/// Names basic blocks using a generated hash for each basic block in
-/// a function considering the opcode and the order of output instructions.
-///
-/// \param F Function containing basic blocks to rename.
-void IRCanonicalizer::nameBasicBlocks(Function& F) {
-  for (auto& B : F) {
-    // Initialize to a magic constant, so the state isn't zero.
-    uint64_t Hash = MagicHashConstant;
-
-    // Hash considering output instruction opcodes.
-    for (auto& I : B)
-      if (isOutput(&I))
-        Hash = hashing::detail::hash_16_bytes(Hash, I.getOpcode());
-
-    if (RenameAll || B.getName().empty()) {
-      // Name basic block. Substring hash to make diffs more readable.
-      B.setName("bb" + std::to_string(Hash).substr(0, 5));
-    }
-  }
-}
-
-/// Names instructions graphically.
-/// This method is a wrapper for recursive nameInstruction().
-///
-/// \see nameInstruction()
-/// \param Outputs Vector of pointers to output instructions collected top-down.
-void IRCanonicalizer::nameInstructions(SmallVector<Instruction*, 16>& Outputs) {
-  // Keeping track of visited instructions while naming (even depth first) is
-  // necessary only to avoid infinite loops on PHI nodes.
-  SmallPtrSet<const Instruction*, 32> Visited;
-
-  for (auto& I : Outputs) nameInstruction(I, Visited);
-}
-
-/// Names instructions graphically (recursive) in accordance with the
-/// def-use tree, starting from the initial instructions (defs), finishing at
-/// the output (top-most user) instructions (depth-first).
-///
-/// \param I Instruction to be renamed.
-void IRCanonicalizer::nameInstruction(Instruction* I,
-                                      SmallPtrSet<const Instruction*, 32>& Visited) {
-  // Keeping track of visited instructions while naming (even depth first) is
-  // necessary only to avoid infinite loops on PHI nodes.
-  if (!Visited.count(I)) {
-    Visited.insert(I);
-
-    // Determine the type of instruction to name.
-    if (isInitialInstruction(I)) {
-      // This is an initial instruction.
-      nameAsInitialInstruction(I);
-    } else {
-      // This must be a regular instruction.
-      nameAsRegularInstruction(I, Visited);
-    }
-  }
-}
-
-/// Names instruction following the scheme:
-/// vl00000Callee(Operands)
-///
-/// Where 00000 is a hash calculated considering instruction's opcode and output
-/// footprint. Callee's name is only included when instruction's type is
-/// CallInst. In cases where instruction is commutative, operands list is also
-/// sorted.
-///
-/// Renames instruction only when RenameAll flag is raised or instruction is
-/// unnamed.
-///
-/// \see getOutputFootprint()
-/// \param I Instruction to be renamed.
-void IRCanonicalizer::nameAsInitialInstruction(Instruction* I) {
-  if (I->getType()->isVoidTy() || (!I->getName().empty() && !RenameAll))
-    return;
-
-  // Instruction operands for further sorting.
-  SmallVector<SmallString<64>, 4> Operands;
-
-  // Collect operands.
-  for (auto& OP : I->operands()) {
-    if (!isa<Function>(OP)) {
-      std::string TextRepresentation;
-      raw_string_ostream Stream(TextRepresentation);
-      OP->printAsOperand(Stream, false);
-      Operands.push_back(StringRef(Stream.str()));
-    }
-  }
-
-  if (I->isCommutative())
-    llvm::sort(Operands);
-
-  // Initialize to a magic constant, so the state isn't zero.
-  uint64_t Hash = MagicHashConstant;
-
-  // Consider instruction's opcode in the hash.
-  Hash = hashing::detail::hash_16_bytes(Hash, I->getOpcode());
-
-  SmallPtrSet<const Instruction*, 32> Visited;
-  // Get output footprint for I.
-  SetVector<int> OutputFootprint = getOutputFootprint(I, Visited);
-
-  // Consider output footprint in the hash.
-  for (const int& Output : OutputFootprint) Hash = hashing::detail::hash_16_bytes(Hash, Output);
-
-  // Base instruction name.
-  SmallString<256> Name;
-  Name.append("vl" + std::to_string(Hash).substr(0, 5));
-
-  // In case of CallInst, consider callee in the instruction name.
-  if (const auto* CI = dyn_cast<CallInst>(I)) {
-    Function* F = CI->getCalledFunction();
-
-    if (F != nullptr) {
-      Name.append(F->getName());
-    }
-  }
-
-  Name.append("(");
-  for (unsigned long i = 0; i < Operands.size(); ++i) {
-    Name.append(Operands[i]);
-
-    if (i < Operands.size() - 1)
-      Name.append(", ");
-  }
-  Name.append(")");
-
-  I->setName(Name);
-}
-
-/// Names instruction following the scheme:
-/// op00000Callee(Operands)
-///
-/// Where 00000 is a hash calculated considering instruction's opcode, its
-/// operands' opcodes and order. Callee's name is only included when
-/// instruction's type is CallInst. In cases where instruction is commutative,
-/// operand list is also sorted.
-///
-/// Names instructions recursively in accordance with the def-use tree,
-/// starting from the initial instructions (defs), finishing at
-/// the output (top-most user) instructions (depth-first).
-///
-/// Renames instruction only when RenameAll flag is raised or instruction is
-/// unnamed.
-///
-/// \see getOutputFootprint()
-/// \param I Instruction to be renamed.
-void IRCanonicalizer::nameAsRegularInstruction(Instruction* I,
-                                               SmallPtrSet<const Instruction*, 32>& Visited) {
-  // Instruction operands for further sorting.
-  SmallVector<SmallString<128>, 4> Operands;
-
-  // The name of a regular instruction depends
-  // on the names of its operands. Hence, all
-  // operands must be named first in the use-def
-  // walk.
-
-  // Collect operands.
-  for (auto& OP : I->operands()) {
-    if (auto* IOP = dyn_cast<Instruction>(OP)) {
-      // Walk down the use-def chain.
-      nameInstruction(IOP, Visited);
-      Operands.push_back(IOP->getName());
-    } else if (isa<Value>(OP) && !isa<Function>(OP)) {
-      // This must be an immediate value.
-      std::string TextRepresentation;
-      raw_string_ostream Stream(TextRepresentation);
-      OP->printAsOperand(Stream, false);
-      Operands.push_back(StringRef(Stream.str()));
-    }
-  }
-
-  if (I->isCommutative())
-    llvm::sort(Operands.begin(), Operands.end());
-
-  // Initialize to a magic constant, so the state isn't zero.
-  uint64_t Hash = MagicHashConstant;
-
-  // Consider instruction opcode in the hash.
-  Hash = hashing::detail::hash_16_bytes(Hash, I->getOpcode());
-
-  // Operand opcodes for further sorting (commutative).
-  SmallVector<int, 4> OperandsOpcodes;
-
-  // Collect operand opcodes for hashing.
-  for (auto& OP : I->operands())
-    if (auto* IOP = dyn_cast<Instruction>(OP))
-      OperandsOpcodes.push_back(IOP->getOpcode());
-
-  if (I->isCommutative())
-    llvm::sort(OperandsOpcodes.begin(), OperandsOpcodes.end());
-
-  // Consider operand opcodes in the hash.
-  for (const int Code : OperandsOpcodes) Hash = hashing::detail::hash_16_bytes(Hash, Code);
-
-  // Base instruction name.
-  SmallString<512> Name;
-  Name.append("op" + std::to_string(Hash).substr(0, 5));
-
-  // In case of CallInst, consider callee in the instruction name.
-  if (const auto* CI = dyn_cast<CallInst>(I))
-    if (const Function* F = CI->getCalledFunction())
-      Name.append(F->getName());
-
-  Name.append("(");
-  for (unsigned long i = 0; i < Operands.size(); ++i) {
-    Name.append(Operands[i]);
-
-    if (i < Operands.size() - 1)
-      Name.append(", ");
-  }
-  Name.append(")");
+class IRCanonicalizer : public llvm::FunctionPass {
+ public:
+  static char ID;
 
-  if ((I->getName().empty() || RenameAll) && !I->getType()->isVoidTy())
-    I->setName(Name);
-}
-
-/// Shortens instruction's name. This method removes called function name from
-/// the instruction name and substitutes the call chain with a corresponding
-/// list of operands.
-///
-/// Examples:
-/// op00000Callee(op00001Callee(...), vl00000Callee(1, 2), ...)  ->
-/// op00000(op00001, vl00000, ...) vl00000Callee(1, 2)  ->  vl00000(1, 2)
-///
-/// This method omits output instructions and pre-output (instructions directly
-/// used by an output instruction) instructions (by default). By default it also
-/// does not affect user named instructions.
-///
-/// \param I Instruction whose name will be folded.
-void IRCanonicalizer::foldInstructionName(Instruction* I) {
-  // If this flag is raised, fold all regular
-  // instructions (including pre-outputs).
-  if (!FoldPreoutputs) {
-    // Don't fold if one of the users is an output instruction.
-    for (auto* U : I->users())
-      if (auto* IU = dyn_cast<Instruction>(U))
-        if (isOutput(IU))
-          return;
-  }
-
-  // Don't fold if it is an output instruction or has no op prefix.
-  if (isOutput(I) || I->getName().substr(0, 2) != "op")
-    return;
-
-  // Instruction operands.
-  SmallVector<SmallString<64>, 4> Operands;
-
-  for (auto& OP : I->operands()) {
-    if (const Instruction* IOP = dyn_cast<Instruction>(OP)) {
-      bool HasCanonicalName =
-          I->getName().substr(0, 2) == "op" || I->getName().substr(0, 2) == "vl";
-
-      Operands.push_back(HasCanonicalName ? IOP->getName().substr(0, 7) : IOP->getName());
-    }
-  }
-
-  if (I->isCommutative())
-    llvm::sort(Operands.begin(), Operands.end());
-
-  SmallString<256> Name;
-  Name.append(I->getName().substr(0, 7));
-
-  Name.append("(");
-  for (unsigned long i = 0; i < Operands.size(); ++i) {
-    Name.append(Operands[i]);
-
-    if (i < Operands.size() - 1)
-      Name.append(", ");
-  }
-  Name.append(")");
-
-  I->setName(Name);
-}
-
-/// Reorders instructions by walking up the tree from each operand of an output
-/// instruction and reducing the def-use distance.
-/// This method assumes that output instructions were collected top-down,
-/// otherwise the def-use chain may be broken.
-/// This method is a wrapper for recursive reorderInstruction().
-///
-/// \see reorderInstruction()
-/// \param Outputs Vector of pointers to output instructions collected top-down.
-void IRCanonicalizer::reorderInstructions(SmallVector<Instruction*, 16>& Outputs) {
-  // This method assumes output instructions were collected top-down,
-  // otherwise the def-use chain may be broken.
-
-  SmallPtrSet<const Instruction*, 32> Visited;
-
-  // Walk up the tree.
-  for (auto& I : Outputs)
-    for (auto& OP : I->operands())
-      if (auto* IOP = dyn_cast<Instruction>(OP))
-        reorderInstruction(IOP, I, Visited);
-}
-
-/// Reduces def-use distance or places instruction at the end of the basic
-/// block. Continues to walk up the def-use tree recursively. Used by
-/// reorderInstructions().
-///
-/// \see reorderInstructions()
-/// \param Used Pointer to the instruction whose value is used by the \p User.
-/// \param User Pointer to the instruction which uses the \p Used.
-/// \param Visited Set of visited instructions.
-void IRCanonicalizer::reorderInstruction(Instruction* Used, Instruction* User,
-                                         SmallPtrSet<const Instruction*, 32>& Visited) {
-  if (!Visited.count(Used)) {
-    Visited.insert(Used);
-
-    if (!isa<PHINode>(Used) && !Used->isEHPad()) {
-      // Do not move PHI nodes and 'pad' instructions to ensure they are first
-      // in a basic block. Also do not move their operands before them.
-
-      if (Used->getParent() == User->getParent()) {
-        // If Used and User share the same basic block move Used just before
-        // User.
-        Used->moveBefore(User);
-      } else {
-        // Otherwise move Used to the end of the basic block before the
-        // terminator.
-        Used->moveBefore(&Used->getParent()->back());
-      }
-
-      for (auto& OP : Used->operands()) {
-        if (auto* IOP = dyn_cast<Instruction>(OP)) {
-          // Walk up the def-use tree.
-          reorderInstruction(IOP, Used, Visited);
-        }
-      }
-    }
-  }
-}
-
-/// Reorders instruction's operands alphabetically. This method assumes
-/// that passed instruction is commutative. Changing the operand order
-/// in other instructions may change the semantics.
-///
-/// \param I Instruction whose operands will be reordered.
-void IRCanonicalizer::reorderInstructionOperandsByNames(Instruction* I) {
-  // This method assumes that passed I is commutative,
-  // changing the order of operands in other instructions
-  // may change the semantics.
-
-  // Instruction operands for further sorting.
-  SmallVector<std::pair<std::string, Value*>, 4> Operands;
-
-  // Collect operands.
-  for (auto& OP : I->operands()) {
-    if (auto* VOP = dyn_cast<Value>(OP)) {
-      if (isa<Instruction>(VOP)) {
-        // This is an an instruction.
-        Operands.push_back(std::pair<std::string, Value*>(VOP->getName(), VOP));
-      } else {
-        std::string TextRepresentation;
-        raw_string_ostream Stream(TextRepresentation);
-        OP->printAsOperand(Stream, false);
-        Operands.push_back(std::pair<std::string, Value*>(Stream.str(), VOP));
-      }
-    }
-  }
-
-  // Sort operands.
-  llvm::sort(Operands.begin(), Operands.end(), llvm::less_first());
-
-  // Reorder operands.
-  unsigned Position = 0;
-  for (auto& OP : I->operands()) {
-    OP.set(Operands[Position].second);
-    Position++;
-  }
-}
-
-/// Reorders PHI node's values according to the names of corresponding basic
-/// blocks.
-///
-/// \param PN PHI node to canonicalize.
-void IRCanonicalizer::reorderPHIIncomingValues(PHINode* PN) {
-  // Values for further sorting.
-  SmallVector<std::pair<Value*, BasicBlock*>, 2> Values;
-
-  // Collect blocks and corresponding values.
-  for (auto& BB : PN->blocks()) {
-    Value* V = PN->getIncomingValueForBlock(BB);
-    Values.push_back(std::pair<Value*, BasicBlock*>(V, BB));
-  }
+  IRCanonicalizer() : FunctionPass(ID) {}
 
-  // Sort values according to the name of a basic block.
-  llvm::sort(Values, [](const std::pair<Value*, BasicBlock*>& LHS,
-                        const std::pair<Value*, BasicBlock*>& RHS) {
-    return LHS.second->getName() < RHS.second->getName();
-  });
+  bool runOnFunction(llvm::Function& F) override;
+};
 
-  // Swap.
-  for (unsigned i = 0; i < Values.size(); ++i) {
-    PN->setIncomingBlock(i, Values[i].second);
-    PN->setIncomingValue(i, Values[i].first);
-  }
-}
-
-/// Returns a vector of output instructions. An output is an instruction which
-/// has side-effects or is a terminator instruction. Uses isOutput().
-///
-/// \see isOutput()
-/// \param F Function to collect outputs from.
-SmallVector<Instruction*, 16> IRCanonicalizer::collectOutputInstructions(Function& F) {
-  // Output instructions are collected top-down in each function,
-  // any change may break the def-use chain in reordering methods.
-  SmallVector<Instruction*, 16> Outputs;
-
-  for (auto& I : instructions(F))
-    if (isOutput(&I))
-      Outputs.push_back(&I);
-
-  return Outputs;
-}
-
-/// Helper method checking whether the instruction may have side effects or is
-/// a terminator instruction.
-///
-/// \param I Considered instruction.
-bool IRCanonicalizer::isOutput(const Instruction* I) {
-  // Outputs are such instructions which may have side effects or are a
-  // terminator.
-  if (I->mayHaveSideEffects() || I->isTerminator())
-    return true;
-
-  return false;
-}
-
-/// Helper method checking whether the instruction has users and only
-/// immediate operands.
-///
-/// \param I Considered instruction.
-bool IRCanonicalizer::isInitialInstruction(const Instruction* I) {
-  // Initial instructions are such instructions whose values are used by
-  // other instructions, yet they only depend on immediate values.
-  return !I->user_empty() && hasOnlyImmediateOperands(I);
-}
-
-/// Helper method checking whether the instruction has only immediate operands.
-///
-/// \param I Considered instruction.
-bool IRCanonicalizer::hasOnlyImmediateOperands(const Instruction* I) {
-  for (const auto& OP : I->operands())
-    if (isa<Instruction>(OP))
-      return false;  // Found non-immediate operand (instruction).
-
-  return true;
-}
+char IRCanonicalizer::ID = 0;
 
-/// Helper method returning indices (distance from the beginning of the basic
-/// block) of outputs using the \p I (eliminates repetitions). Walks down the
-/// def-use tree recursively.
+/// Entry method to the IRCanonicalizer.
 ///
-/// \param I Considered instruction.
-/// \param Visited Set of visited instructions.
-SetVector<int> IRCanonicalizer::getOutputFootprint(Instruction* I,
-                                                   SmallPtrSet<const Instruction*, 32>& Visited) {
-  // Vector containing indexes of outputs (no repetitions),
-  // which use I in the order of walking down the def-use tree.
-  SetVector<int> Outputs;
-
-  if (!Visited.count(I)) {
-    Visited.insert(I);
-
-    if (isOutput(I)) {
-      // Gets output instruction's parent function.
-      Function* Func = I->getParent()->getParent();
-
-      // Finds and inserts the index of the output to the vector.
-      unsigned Count = 0;
-      for (const auto& B : *Func) {
-        for (const auto& E : B) {
-          if (&E == I)
-            Outputs.insert(Count);
-          Count++;
-        }
-      }
-
-      // Returns to the used instruction.
-      return Outputs;
-    }
-
-    for (auto* U : I->users()) {
-      if (auto* UI = dyn_cast<Instruction>(U)) {
-        // Vector for outputs which use UI.
-        SetVector<int> OutputsUsingUI = getOutputFootprint(UI, Visited);
-
-        // Insert the indexes of outputs using UI.
-        Outputs.insert(OutputsUsingUI.begin(), OutputsUsingUI.end());
-      }
-    }
-  }
-
-  // Return to the used instruction.
-  return Outputs;
-}
+/// \param M Module to canonicalize.
+bool IRCanonicalizer::runOnFunction(Function& F) { return true; }
 
 /// Reads a module from a file.
 /// On error, messages are written to stderr and null is returned.
@@ -581,27 +57,11 @@ cl::opt<std::string> InputFilename("f", cl::desc("Specify input filename"),
 cl::opt<std::string> OutputFilename("o", cl::desc("Specify output filename"),
                                     cl::value_desc("filename"), cl::Required);
 
-/// \name Canonicalizer flags.
-/// @{
-/// Preserves original order of instructions.
-cl::opt<bool> PreserveOrder("preserve-order", cl::desc("Preserves original instruction order"));
-/// Renames all instructions (including user-named).
-cl::opt<bool> RenameAll("rename-all", cl::desc("Renames all instructions (including user-named)"));
-/// Folds all regular instructions (including pre-outputs).
-cl::opt<bool> FoldPreoutputs("fold-all",
-                             cl::desc("Folds all regular instructions (including pre-outputs)"));
-/// Sorts and reorders operands in commutative instructions.
-cl::opt<bool> ReorderOperands("reorder-operands",
-                              cl::desc("Sorts and reorders operands in commutative instructions"));
-/// @}
-
 int main(int argc, char** argv) {
   cl::ParseCommandLineOptions(argc, argv,
-                              " LLVM-Canon\n\n"
-                              " This tool aims to transform LLVM Modules into canonical form by"
-                              " reordering and renaming instructions while preserving the same"
-                              " semantics. Making it easier to spot semantic differences while"
-                              " diffing two modules which have undergone different passes.\n");
+                              " LLVM-Unroller\n\n"
+                              " This tool aims to give users fine grain control on which loops to "
+                              "unroll and by which factor.\n");
 
   LLVMContext Context;
 
@@ -610,7 +70,7 @@ int main(int argc, char** argv) {
   if (!Module)
     return 1;
 
-  IRCanonicalizer Canonicalizer(PreserveOrder, RenameAll, FoldPreoutputs, ReorderOperands);
+  IRCanonicalizer Canonicalizer;
 
   for (auto& Function : *Module) {
     Canonicalizer.runOnFunction(Function);

From 0ed07866d93a4bee30efb2490bca4015f4488ad2 Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Tue, 2 Nov 2021 09:51:03 -0400
Subject: [PATCH 04/21] remove header file

---
 examples/example_unrolling_service/loop_unroller/BUILD |  1 -
 .../loop_unroller/IRCanonicalizer.h                    | 10 ----------
 .../loop_unroller/loop_unroller.cc                     |  1 -
 3 files changed, 12 deletions(-)
 delete mode 100644 examples/example_unrolling_service/loop_unroller/IRCanonicalizer.h

diff --git a/examples/example_unrolling_service/loop_unroller/BUILD b/examples/example_unrolling_service/loop_unroller/BUILD
index 52c8aa580..3bec18c35 100644
--- a/examples/example_unrolling_service/loop_unroller/BUILD
+++ b/examples/example_unrolling_service/loop_unroller/BUILD
@@ -9,7 +9,6 @@ load("@rules_cc//cc:defs.bzl", "cc_binary")
 cc_binary(
     name = "loop_unroller",
     srcs = [
-        "IRCanonicalizer.h",
         "loop_unroller.cc",
     ],
     copts = [
diff --git a/examples/example_unrolling_service/loop_unroller/IRCanonicalizer.h b/examples/example_unrolling_service/loop_unroller/IRCanonicalizer.h
deleted file mode 100644
index a3c0aba09..000000000
--- a/examples/example_unrolling_service/loop_unroller/IRCanonicalizer.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#pragma once
-
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-
-/// IRCanonicalizer aims to transform LLVM IR into canonical form.
diff --git a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
index ef3879897..bffa35f51 100644
--- a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
+++ b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
@@ -1,7 +1,6 @@
 #include <algorithm>
 #include <vector>
 
-#include "IRCanonicalizer.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"

From 2098b5e30267e57bd1640f018d56720b8328a83d Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Tue, 2 Nov 2021 09:53:25 -0400
Subject: [PATCH 05/21] change name

---
 .../loop_unroller/loop_unroller.cc                   | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
index bffa35f51..649cf1ddb 100644
--- a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
+++ b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
@@ -18,21 +18,21 @@
 
 using namespace llvm;
 
-class IRCanonicalizer : public llvm::FunctionPass {
+class LoopUnroller : public llvm::FunctionPass {
  public:
   static char ID;
 
-  IRCanonicalizer() : FunctionPass(ID) {}
+  LoopUnroller() : FunctionPass(ID) {}
 
   bool runOnFunction(llvm::Function& F) override;
 };
 
-char IRCanonicalizer::ID = 0;
+char LoopUnroller::ID = 0;
 
-/// Entry method to the IRCanonicalizer.
+/// Entry method to the LoopUnroller.
 ///
 /// \param M Module to canonicalize.
-bool IRCanonicalizer::runOnFunction(Function& F) { return true; }
+bool LoopUnroller::runOnFunction(Function& F) { return true; }
 
 /// Reads a module from a file.
 /// On error, messages are written to stderr and null is returned.
@@ -69,7 +69,7 @@ int main(int argc, char** argv) {
   if (!Module)
     return 1;
 
-  IRCanonicalizer Canonicalizer;
+  LoopUnroller Canonicalizer;
 
   for (auto& Function : *Module) {
     Canonicalizer.runOnFunction(Function);

From 3e184ce0864b78158af623e2195a7c31b76bc2c6 Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Tue, 2 Nov 2021 10:14:33 -0400
Subject: [PATCH 06/21] everything in class declaration

---
 .../loop_unroller/loop_unroller.cc                         | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
index 649cf1ddb..ce9e42929 100644
--- a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
+++ b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
@@ -24,16 +24,11 @@ class LoopUnroller : public llvm::FunctionPass {
 
   LoopUnroller() : FunctionPass(ID) {}
 
-  bool runOnFunction(llvm::Function& F) override;
+  bool runOnFunction(llvm::Function& F) override { return true; }
 };
 
 char LoopUnroller::ID = 0;
 
-/// Entry method to the LoopUnroller.
-///
-/// \param M Module to canonicalize.
-bool LoopUnroller::runOnFunction(Function& F) { return true; }
-
 /// Reads a module from a file.
 /// On error, messages are written to stderr and null is returned.
 ///

From 295a5ab936eb3c639d9280afa13800f835b601cb Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Tue, 2 Nov 2021 10:30:41 -0400
Subject: [PATCH 07/21] start couning number of loops

---
 .../loop_unroller/loop_unroller.cc            | 35 +++++++++++++++----
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
index ce9e42929..279235e1e 100644
--- a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
+++ b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
@@ -4,6 +4,7 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
@@ -13,18 +14,39 @@
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/SourceMgr.h"
 
 using namespace llvm;
+#define DEBUG_TYPE "LoopUnroller"
 
-class LoopUnroller : public llvm::FunctionPass {
+class LoopUnroller : public llvm::ModulePass {
  public:
   static char ID;
 
-  LoopUnroller() : FunctionPass(ID) {}
+  LoopUnroller() : ModulePass(ID) {}
+
+  bool runOnModule(llvm::Module& M) override {
+    loopcounter = 0;
+    for (auto IT = M.begin(), END = M.end(); IT != END; ++IT) {
+      LoopInfo& LI = getAnalysis<LoopInfo>(*IT);
+      for (LoopInfo::iterator LIT = LI.begin(), LEND = LI.end(); LIT != LEND; ++LIT) {
+        handleLoop(*LIT);
+      }
+    }
+    LLVM_DEBUG(dbgs() << "Found " << loopcounter << " loops.\n");
+    return false;
+  }
 
-  bool runOnFunction(llvm::Function& F) override { return true; }
+ private:
+  int loopcounter = 0;
+  void handleLoop(Loop* L) {
+    ++loopcounter;
+    for (Loop* SL : L->getSubLoops()) {
+      handleLoop(SL);
+    }
+  }
 };
 
 char LoopUnroller::ID = 0;
@@ -66,9 +88,10 @@ int main(int argc, char** argv) {
 
   LoopUnroller Canonicalizer;
 
-  for (auto& Function : *Module) {
-    Canonicalizer.runOnFunction(Function);
-  }
+  Canonicalizer.runOnModule(*Module);
+  // for (auto& Function : *Module) {
+  //   Canonicalizer.runOnFunction(Function);
+  // }
 
   if (verifyModule(*Module, &errs()))
     return 1;

From c6d27748a835b8000c9e5111e8d43782354c559d Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Tue, 2 Nov 2021 11:04:18 -0400
Subject: [PATCH 08/21] show 2 methods to count number of loops

---
 .../loop_unroller/loop_unroller.cc            | 43 +++++++++++++++++--
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
index 279235e1e..3bdb811f1 100644
--- a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
+++ b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
@@ -21,6 +21,11 @@
 using namespace llvm;
 #define DEBUG_TYPE "LoopUnroller"
 
+#define METHOD 1
+
+#if (METHOD == 1)
+// obtained from https://stackoverflow.com/a/33565910/3880948
+// Error: error: no member named 'ID' in 'llvm::LoopInfo'
 class LoopUnroller : public llvm::ModulePass {
  public:
   static char ID;
@@ -48,6 +53,35 @@ class LoopUnroller : public llvm::ModulePass {
     }
   }
 };
+#elif (METHOD == 2)
+// based on advice from https://stackoverflow.com/a/30353625/3880948
+// Error message: Assertion failed: (Resolver && "Pass has not been inserted into a PassManager
+// object!"), function getAnalysis, file
+// external/clang-llvm-10.0.0-x86_64-apple-darwin/include/llvm/PassAnalysisSupport.h, line 221.
+class LoopUnroller : public llvm::FunctionPass {
+ public:
+  static char ID;
+
+  LoopUnroller() : FunctionPass(ID) {}
+
+  virtual void getAnalysisUsage(AnalysisUsage& AU) const { AU.addRequired<LoopInfoWrapperPass>(); }
+
+  bool runOnFunction(llvm::Function& F) override {
+    loopcounter = 0;
+    LoopInfo& LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+    for (BasicBlock& BB : F) {
+      Loop* L = LI.getLoopFor(&BB);
+      if (L)  // if not null
+        loopcounter++;
+    }
+    LLVM_DEBUG(dbgs() << "Found " << loopcounter << " loops.\n");
+    return false;
+  }
+
+ private:
+  int loopcounter = 0;
+};
+#endif  // METHOD
 
 char LoopUnroller::ID = 0;
 
@@ -88,10 +122,13 @@ int main(int argc, char** argv) {
 
   LoopUnroller Canonicalizer;
 
+#if (METHOD == 1)
   Canonicalizer.runOnModule(*Module);
-  // for (auto& Function : *Module) {
-  //   Canonicalizer.runOnFunction(Function);
-  // }
+#elif (METHOD == 2)
+  for (auto& Function : *Module) {
+    Canonicalizer.runOnFunction(Function);
+  }
+#endif
 
   if (verifyModule(*Module, &errs()))
     return 1;

From d1d0bd8db2ee8c98c463303aa8e4ef711f4a4718 Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Tue, 2 Nov 2021 11:17:07 -0400
Subject: [PATCH 09/21] fix name

---
 .../loop_unroller/loop_unroller.cc                          | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
index 3bdb811f1..abf32ea25 100644
--- a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
+++ b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
@@ -120,13 +120,13 @@ int main(int argc, char** argv) {
   if (!Module)
     return 1;
 
-  LoopUnroller Canonicalizer;
+  LoopUnroller Unroller;
 
 #if (METHOD == 1)
-  Canonicalizer.runOnModule(*Module);
+  Unroller.runOnModule(*Module);
 #elif (METHOD == 2)
   for (auto& Function : *Module) {
-    Canonicalizer.runOnFunction(Function);
+    Unroller.runOnFunction(Function);
   }
 #endif
 

From 6713793622542fc9e96e6aee5c8e58b5cfe0e19e Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Wed, 3 Nov 2021 10:51:57 -0400
Subject: [PATCH 10/21] fix by Hugh Leather

---
 .../loop_unroller/loop_unroller.cc            | 136 +++++++-----------
 1 file changed, 54 insertions(+), 82 deletions(-)

diff --git a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
index abf32ea25..b1004abee 100644
--- a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
+++ b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
@@ -1,4 +1,7 @@
 #include <algorithm>
+#include <iostream>
+#include <string>
+#include <unordered_map>
 #include <vector>
 
 #include "llvm/ADT/SetVector.h"
@@ -9,81 +12,58 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/IRReader/IRReader.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/ToolOutputFile.h"
 
 using namespace llvm;
-#define DEBUG_TYPE "LoopUnroller"
 
-#define METHOD 1
-
-#if (METHOD == 1)
-// obtained from https://stackoverflow.com/a/33565910/3880948
-// Error: error: no member named 'ID' in 'llvm::LoopInfo'
-class LoopUnroller : public llvm::ModulePass {
- public:
-  static char ID;
+/// Input LLVM module file name.
+cl::opt<std::string> InputFilename(cl::Positional, cl::desc("Specify input filename"),
+                                   cl::value_desc("filename"), cl::init("-"));
+/// Output LLVM module file name.
+cl::opt<std::string> OutputFilename("o", cl::desc("Specify output filename"),
+                                    cl::value_desc("filename"), cl::init("-"));
 
-  LoopUnroller() : ModulePass(ID) {}
-
-  bool runOnModule(llvm::Module& M) override {
-    loopcounter = 0;
-    for (auto IT = M.begin(), END = M.end(); IT != END; ++IT) {
-      LoopInfo& LI = getAnalysis<LoopInfo>(*IT);
-      for (LoopInfo::iterator LIT = LI.begin(), LEND = LI.end(); LIT != LEND; ++LIT) {
-        handleLoop(*LIT);
-      }
-    }
-    LLVM_DEBUG(dbgs() << "Found " << loopcounter << " loops.\n");
-    return false;
-  }
+namespace llvm {
+// The INITIALIZE_PASS_XXX macros put the initialiser in the llvm namespace.
+void initializeLoopCounterPass(PassRegistry& Registry);
+}  // namespace llvm
 
- private:
-  int loopcounter = 0;
-  void handleLoop(Loop* L) {
-    ++loopcounter;
-    for (Loop* SL : L->getSubLoops()) {
-      handleLoop(SL);
-    }
-  }
-};
-#elif (METHOD == 2)
-// based on advice from https://stackoverflow.com/a/30353625/3880948
-// Error message: Assertion failed: (Resolver && "Pass has not been inserted into a PassManager
-// object!"), function getAnalysis, file
-// external/clang-llvm-10.0.0-x86_64-apple-darwin/include/llvm/PassAnalysisSupport.h, line 221.
-class LoopUnroller : public llvm::FunctionPass {
+class LoopCounter : public llvm::FunctionPass {
  public:
   static char ID;
+  std::unordered_map<std::string, int> counts;
 
-  LoopUnroller() : FunctionPass(ID) {}
+  LoopCounter() : FunctionPass(ID) {}
 
-  virtual void getAnalysisUsage(AnalysisUsage& AU) const { AU.addRequired<LoopInfoWrapperPass>(); }
+  virtual void getAnalysisUsage(AnalysisUsage& AU) const override {
+    AU.addRequired<LoopInfoWrapperPass>();
+  }
 
   bool runOnFunction(llvm::Function& F) override {
-    loopcounter = 0;
     LoopInfo& LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-    for (BasicBlock& BB : F) {
-      Loop* L = LI.getLoopFor(&BB);
-      if (L)  // if not null
-        loopcounter++;
-    }
-    LLVM_DEBUG(dbgs() << "Found " << loopcounter << " loops.\n");
+    auto Loops = LI.getLoopsInPreorder();
+
+    // Should reall account for module, too.
+    counts[F.getName().str()] = Loops.size();
     return false;
   }
-
- private:
-  int loopcounter = 0;
 };
-#endif  // METHOD
 
-char LoopUnroller::ID = 0;
+// Initialise the pass. We have to declare the dependencies we use.
+char LoopCounter::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopCounter, "count-loops", "Count loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(LoopCounter, "count-loops", "Count loops", false, false)
 
 /// Reads a module from a file.
 /// On error, messages are written to stderr and null is returned.
@@ -95,53 +75,45 @@ static std::unique_ptr<Module> readModule(LLVMContext& Context, StringRef Name)
   std::unique_ptr<Module> Module = parseIRFile(Name, Diag, Context);
 
   if (!Module)
-    Diag.print("llvm-canon", errs());
+    Diag.print("llvm-counter", errs());
 
   return Module;
 }
 
-/// Input LLVM module file name.
-cl::opt<std::string> InputFilename("f", cl::desc("Specify input filename"),
-                                   cl::value_desc("filename"), cl::Required);
-/// Output LLVM module file name.
-cl::opt<std::string> OutputFilename("o", cl::desc("Specify output filename"),
-                                    cl::value_desc("filename"), cl::Required);
-
 int main(int argc, char** argv) {
   cl::ParseCommandLineOptions(argc, argv,
-                              " LLVM-Unroller\n\n"
-                              " This tool aims to give users fine grain control on which loops to "
-                              "unroll and by which factor.\n");
+                              " LLVM-Counter\n\n"
+                              " Count the loops in a bitcode file.\n");
 
   LLVMContext Context;
+  SMDiagnostic Err;
+  SourceMgr SM;
+  std::error_code EC;
 
-  std::unique_ptr<Module> Module = readModule(Context, InputFilename);
-
-  if (!Module)
+  ToolOutputFile Out(OutputFilename, EC, sys::fs::OF_None);
+  if (EC) {
+    Err = SMDiagnostic(OutputFilename, SourceMgr::DK_Error,
+                       "Could not open output file: " + EC.message());
+    Err.print(argv[0], errs());
     return 1;
-
-  LoopUnroller Unroller;
-
-#if (METHOD == 1)
-  Unroller.runOnModule(*Module);
-#elif (METHOD == 2)
-  for (auto& Function : *Module) {
-    Unroller.runOnFunction(Function);
   }
-#endif
 
-  if (verifyModule(*Module, &errs()))
+  std::unique_ptr<Module> Module = readModule(Context, InputFilename);
+
+  if (!Module)
     return 1;
 
-  std::error_code EC;
-  raw_fd_ostream OutputStream(OutputFilename, EC, sys::fs::OF_None);
+  // Run the pass
+  initializeLoopCounterPass(*PassRegistry::getPassRegistry());
+  legacy::PassManager PM;
+  LoopCounter* Counter = new LoopCounter();
+  PM.add(Counter);
+  PM.run(*Module);
 
-  if (EC) {
-    errs() << EC.message();
-    return 1;
+  for (auto& x : Counter->counts) {
+    Out.os() << x.first << ' ' << x.second << '\n';
   }
 
-  Module->print(OutputStream, nullptr, false);
-  OutputStream.close();
+  Out.keep();
   return 0;
 }

From fdb7461180c4624b3660648f47c38df873620dca Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Sat, 6 Nov 2021 09:09:35 -0400
Subject: [PATCH 11/21] move all file into llvm{} and hack meta data change

---
 .../loop_unroller/loop_unroller.cc            | 20 +++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
index b1004abee..1d3ba7267 100644
--- a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
+++ b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
@@ -23,9 +23,11 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
 
 using namespace llvm;
 
+namespace llvm {
 /// Input LLVM module file name.
 cl::opt<std::string> InputFilename(cl::Positional, cl::desc("Specify input filename"),
                                    cl::value_desc("filename"), cl::init("-"));
@@ -33,10 +35,8 @@ cl::opt<std::string> InputFilename(cl::Positional, cl::desc("Specify input filen
 cl::opt<std::string> OutputFilename("o", cl::desc("Specify output filename"),
                                     cl::value_desc("filename"), cl::init("-"));
 
-namespace llvm {
 // The INITIALIZE_PASS_XXX macros put the initialiser in the llvm namespace.
 void initializeLoopCounterPass(PassRegistry& Registry);
-}  // namespace llvm
 
 class LoopCounter : public llvm::FunctionPass {
  public:
@@ -53,8 +53,13 @@ class LoopCounter : public llvm::FunctionPass {
     LoopInfo& LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     auto Loops = LI.getLoopsInPreorder();
 
-    // Should reall account for module, too.
+    // Should really account for module, too.
     counts[F.getName().str()] = Loops.size();
+
+    for (auto ALoop : Loops) {
+      addStringMetadataToLoop(ALoop, "llvm.loop.unroll.enable");
+    }
+
     return false;
   }
 };
@@ -79,6 +84,7 @@ static std::unique_ptr<Module> readModule(LLVMContext& Context, StringRef Name)
 
   return Module;
 }
+}  // namespace llvm
 
 int main(int argc, char** argv) {
   cl::ParseCommandLineOptions(argc, argv,
@@ -103,17 +109,23 @@ int main(int argc, char** argv) {
   if (!Module)
     return 1;
 
-  // Run the pass
+  // Run the passes
   initializeLoopCounterPass(*PassRegistry::getPassRegistry());
   legacy::PassManager PM;
   LoopCounter* Counter = new LoopCounter();
+  // LoopUnrollConfigurator* UnrollConfigurator = new LoopUnrollConfigurator();
   PM.add(Counter);
+  // PM.add(UnrollConfigurator);
+  // PM.add(createLoopUnrollPass());
   PM.run(*Module);
 
   for (auto& x : Counter->counts) {
     Out.os() << x.first << ' ' << x.second << '\n';
   }
 
+  Module->print(Out.os(), nullptr, false);
+
   Out.keep();
+
   return 0;
 }

From 1d0d77f03c81975445da30659e120fcba190a24c Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Sat, 6 Nov 2021 12:11:52 -0400
Subject: [PATCH 12/21] add separate loop passes to count and to configure

---
 .../loop_unroller/loop_unroller.cc            | 42 ++++++++++++++++---
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
index 1d3ba7267..d84216c80 100644
--- a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
+++ b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
@@ -56,6 +56,34 @@ class LoopCounter : public llvm::FunctionPass {
     // Should really account for module, too.
     counts[F.getName().str()] = Loops.size();
 
+    return false;
+  }
+};
+
+// Initialise the pass. We have to declare the dependencies we use.
+char LoopCounter::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopCounter, "count-loops", "Count loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(LoopCounter, "count-loops", "Count loops", false, false)
+
+// The INITIALIZE_PASS_XXX macros put the initialiser in the llvm namespace.
+void initializeLoopUnrollConfiguratorPass(PassRegistry& Registry);
+
+class LoopUnrollConfigurator : public llvm::FunctionPass {
+ public:
+  static char ID;
+
+  LoopUnrollConfigurator() : FunctionPass(ID) {}
+
+  virtual void getAnalysisUsage(AnalysisUsage& AU) const override {
+    AU.addRequired<LoopInfoWrapperPass>();
+  }
+
+  bool runOnFunction(llvm::Function& F) override {
+    LoopInfo& LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+    auto Loops = LI.getLoopsInPreorder();
+
+    // Should really account for module, too.
     for (auto ALoop : Loops) {
       addStringMetadataToLoop(ALoop, "llvm.loop.unroll.enable");
     }
@@ -65,10 +93,12 @@ class LoopCounter : public llvm::FunctionPass {
 };
 
 // Initialise the pass. We have to declare the dependencies we use.
-char LoopCounter::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopCounter, "count-loops", "Count loops", false, false)
+char LoopUnrollConfigurator::ID = 1;
+INITIALIZE_PASS_BEGIN(LoopUnrollConfigurator, "unroll-loops-configurator",
+                      "Configurates loop unrolling", false, false)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(LoopCounter, "count-loops", "Count loops", false, false)
+INITIALIZE_PASS_END(LoopUnrollConfigurator, "unroll-loops-configurator",
+                    "Configurates loop unrolling", false, false)
 
 /// Reads a module from a file.
 /// On error, messages are written to stderr and null is returned.
@@ -113,14 +143,14 @@ int main(int argc, char** argv) {
   initializeLoopCounterPass(*PassRegistry::getPassRegistry());
   legacy::PassManager PM;
   LoopCounter* Counter = new LoopCounter();
-  // LoopUnrollConfigurator* UnrollConfigurator = new LoopUnrollConfigurator();
+  LoopUnrollConfigurator* UnrollConfigurator = new LoopUnrollConfigurator();
   PM.add(Counter);
-  // PM.add(UnrollConfigurator);
+  PM.add(UnrollConfigurator);
   // PM.add(createLoopUnrollPass());
   PM.run(*Module);
 
   for (auto& x : Counter->counts) {
-    Out.os() << x.first << ' ' << x.second << '\n';
+    llvm::dbgs() << x.first << ": " << x.second << " loops" << '\n';
   }
 
   Module->print(Out.os(), nullptr, false);

From 5f3d2b7c3c7b83f2ff8c20288e1fa40ec3a1f57f Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Sun, 7 Nov 2021 00:40:20 -0400
Subject: [PATCH 13/21] add meta data and call loop unroll

---
 .../loop_unroller/loop_unroller.cc                         | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
index d84216c80..6c59e5745 100644
--- a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
+++ b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
@@ -23,6 +23,7 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 
 using namespace llvm;
@@ -85,7 +86,8 @@ class LoopUnrollConfigurator : public llvm::FunctionPass {
 
     // Should really account for module, too.
     for (auto ALoop : Loops) {
-      addStringMetadataToLoop(ALoop, "llvm.loop.unroll.enable");
+      addStringMetadataToLoop(ALoop, "llvm.loop.unroll.enable", true);
+      addStringMetadataToLoop(ALoop, "llvm.loop.unroll.count", 4);
     }
 
     return false;
@@ -114,6 +116,7 @@ static std::unique_ptr<Module> readModule(LLVMContext& Context, StringRef Name)
 
   return Module;
 }
+
 }  // namespace llvm
 
 int main(int argc, char** argv) {
@@ -146,7 +149,7 @@ int main(int argc, char** argv) {
   LoopUnrollConfigurator* UnrollConfigurator = new LoopUnrollConfigurator();
   PM.add(Counter);
   PM.add(UnrollConfigurator);
-  // PM.add(createLoopUnrollPass());
+  PM.add(createLoopUnrollPass());
   PM.run(*Module);
 
   for (auto& x : Counter->counts) {

From 3c4774dd5bc7f3ed5d8b20ad3102eb82e9ae28d8 Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Sun, 7 Nov 2021 01:51:42 -0400
Subject: [PATCH 14/21] pass unroll enable and count as cli

---
 .../loop_unroller/loop_unroller.cc                  | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
index 6c59e5745..d98fce440 100644
--- a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
+++ b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
@@ -36,6 +36,13 @@ cl::opt<std::string> InputFilename(cl::Positional, cl::desc("Specify input filen
 cl::opt<std::string> OutputFilename("o", cl::desc("Specify output filename"),
                                     cl::value_desc("filename"), cl::init("-"));
 
+static cl::opt<bool> UnrollEnable("funroll-loops", cl::desc("Enable loop unrolling"),
+                                  cl::init(true));
+
+extern cl::opt<unsigned> UnrollCount(
+    "funroll-count", cl::desc("Use this unroll count for all loops including those with "
+                              "unroll_count pragma values, for testing purposes"));
+
 // The INITIALIZE_PASS_XXX macros put the initialiser in the llvm namespace.
 void initializeLoopCounterPass(PassRegistry& Registry);
 
@@ -86,8 +93,10 @@ class LoopUnrollConfigurator : public llvm::FunctionPass {
 
     // Should really account for module, too.
     for (auto ALoop : Loops) {
-      addStringMetadataToLoop(ALoop, "llvm.loop.unroll.enable", true);
-      addStringMetadataToLoop(ALoop, "llvm.loop.unroll.count", 4);
+      if (UnrollEnable)
+        addStringMetadataToLoop(ALoop, "llvm.loop.unroll.enable", UnrollEnable);
+      if (UnrollCount)
+        addStringMetadataToLoop(ALoop, "llvm.loop.unroll.count", UnrollCount);
     }
 
     return false;

From de96b1426f14d6b5d1b53576b891737505232862 Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Sun, 7 Nov 2021 20:38:41 -0500
Subject: [PATCH 15/21] call loop unroller in env service

---
 .../loop_unroller/loop_unroller.cc            |  2 +-
 .../service_py/BUILD                          |  8 +++
 .../service_py/example_service.py             | 54 ++++++++++++++-----
 3 files changed, 49 insertions(+), 15 deletions(-)

diff --git a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
index d98fce440..99b4df41b 100644
--- a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
+++ b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
@@ -36,7 +36,7 @@ cl::opt<std::string> InputFilename(cl::Positional, cl::desc("Specify input filen
 cl::opt<std::string> OutputFilename("o", cl::desc("Specify output filename"),
                                     cl::value_desc("filename"), cl::init("-"));
 
-static cl::opt<bool> UnrollEnable("funroll-loops", cl::desc("Enable loop unrolling"),
+static cl::opt<bool> UnrollEnable("floop-unroll", cl::desc("Enable loop unrolling"),
                                   cl::init(true));
 
 extern cl::opt<unsigned> UnrollCount(
diff --git a/examples/example_unrolling_service/service_py/BUILD b/examples/example_unrolling_service/service_py/BUILD
index b8ff53f0a..8da6a5c31 100644
--- a/examples/example_unrolling_service/service_py/BUILD
+++ b/examples/example_unrolling_service/service_py/BUILD
@@ -4,9 +4,17 @@
 # LICENSE file in the root directory of this source tree.
 load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")
 
+alias(
+    name = "loop_unroller",
+    actual = "//examples/example_unrolling_service/loop_unroller:loop_unroller",
+)
+
 py_binary(
     name = "example-unrolling-service-py",
     srcs = ["example_service.py"],
+    data = [
+        ":loop_unroller",
+    ],
     main = "example_service.py",
     visibility = ["//visibility:public"],
     deps = [
diff --git a/examples/example_unrolling_service/service_py/example_service.py b/examples/example_unrolling_service/service_py/example_service.py
index 8a4173827..4e87eaa0b 100755
--- a/examples/example_unrolling_service/service_py/example_service.py
+++ b/examples/example_unrolling_service/service_py/example_service.py
@@ -98,7 +98,11 @@ class UnrollingCompilationSession(CompilationSession):
     ]
 
     def __init__(
-        self, working_directory: Path, action_space: ActionSpace, benchmark: Benchmark
+        self,
+        working_directory: Path,
+        action_space: ActionSpace,
+        benchmark: Benchmark,
+        use_custom_opt: bool = True,
     ):
         super().__init__(working_directory, action_space, benchmark)
         logging.info("Started a compilation session for %s", benchmark.uri)
@@ -110,6 +114,9 @@ def __init__(
         self._llc = str(llvm.llc_path())
         self._llvm_diff = str(llvm.llvm_diff_path())
         self._opt = str(llvm.opt_path())
+        # LLVM's opt does not always enforce the unrolling options passed as cli arguments. Hence, we created our own exeutable with custom unrolling pass in examples/example_unrolling_service/loop_unroller that enforces the unrolling factors passed in its cli.
+        # if self._use_custom_opt is true, use our custom exeutable, otherwise use LLVM's opt
+        self._use_custom_opt = use_custom_opt
 
         # Dump the benchmark source to disk.
         self._src_path = str(self.working_dir / "benchmark.c")
@@ -147,28 +154,47 @@ def apply_action(self, action: Action) -> Tuple[bool, Optional[ActionSpace], boo
         if choice_index < 0 or choice_index >= num_choices:
             raise ValueError("Out-of-range")
 
-        cmd = self._action_space.choice[0].named_discrete_space.value[choice_index]
+        args = self._action_space.choice[0].named_discrete_space.value[choice_index]
         logging.info(
             "Applying action %d, equivalent command-line arguments: '%s'",
             choice_index,
-            cmd,
+            args,
         )
+        args = args.split()
 
         # make a copy of the LLVM file to compare its contents after applying the action
         shutil.copyfile(self._llvm_path, self._llvm_before_path)
 
         # apply action
-        run_command(
-            [
-                self._opt,
-                *cmd.split(),
-                self._llvm_path,
-                "-S",
-                "-o",
-                self._llvm_path,
-            ],
-            timeout=30,
-        )
+        if self._use_custom_opt:
+            # our custom unroller has an additional `f` at the beginning of each argument
+            for i, arg in enumerate(args):
+                # convert -<argument> to -f<argument>
+                arg = arg[0] + "f" + arg[1:]
+                args[i] = arg
+            print("args: ", args)
+            run_command(
+                [
+                    "loop_unroller",
+                    *args,
+                    self._llvm_path,
+                    "-o",
+                    self._llvm_path,
+                ],
+                timeout=30,
+            )
+        else:
+            run_command(
+                [
+                    self._opt,
+                    *args,
+                    self._llvm_path,
+                    "-S",
+                    "-o",
+                    self._llvm_path,
+                ],
+                timeout=30,
+            )
 
         # compare the IR files to check if the action had an effect
         try:

From cef2149bf0dc3a87f2a8591edfeab4419e0411fd Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Sun, 7 Nov 2021 20:39:17 -0500
Subject: [PATCH 16/21] start a README file

---
 examples/example_unrolling_service/loop_unroller/README.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 examples/example_unrolling_service/loop_unroller/README.md

diff --git a/examples/example_unrolling_service/loop_unroller/README.md b/examples/example_unrolling_service/loop_unroller/README.md
new file mode 100644
index 000000000..a0990367e
--- /dev/null
+++ b/examples/example_unrolling_service/loop_unroller/README.md
@@ -0,0 +1 @@
+TBD

From 305d71ed3c28817bec2750aa99a5d6d185e20cf4 Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Thu, 11 Nov 2021 22:21:07 -0500
Subject: [PATCH 17/21] invoke loop_unroller in our unrolling env example

---
 .../loop_unroller/loop_unroller.cc             | 18 +++++++++---------
 .../example_unrolling_service/service_py/BUILD |  7 +------
 .../service_py/example_service.py              |  5 ++---
 3 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
index 99b4df41b..9dbed31b3 100644
--- a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
+++ b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
@@ -138,14 +138,6 @@ int main(int argc, char** argv) {
   SourceMgr SM;
   std::error_code EC;
 
-  ToolOutputFile Out(OutputFilename, EC, sys::fs::OF_None);
-  if (EC) {
-    Err = SMDiagnostic(OutputFilename, SourceMgr::DK_Error,
-                       "Could not open output file: " + EC.message());
-    Err.print(argv[0], errs());
-    return 1;
-  }
-
   std::unique_ptr<Module> Module = readModule(Context, InputFilename);
 
   if (!Module)
@@ -161,12 +153,20 @@ int main(int argc, char** argv) {
   PM.add(createLoopUnrollPass());
   PM.run(*Module);
 
+  // Log loop stats
   for (auto& x : Counter->counts) {
     llvm::dbgs() << x.first << ": " << x.second << " loops" << '\n';
   }
 
+  // Output modified IR
+  ToolOutputFile Out(OutputFilename, EC, sys::fs::OF_None);
+  if (EC) {
+    Err = SMDiagnostic(OutputFilename, SourceMgr::DK_Error,
+                       "Could not open output file: " + EC.message());
+    Err.print(argv[0], errs());
+    return 1;
+  }
   Module->print(Out.os(), nullptr, false);
-
   Out.keep();
 
   return 0;
diff --git a/examples/example_unrolling_service/service_py/BUILD b/examples/example_unrolling_service/service_py/BUILD
index 8da6a5c31..ec30ddf0f 100644
--- a/examples/example_unrolling_service/service_py/BUILD
+++ b/examples/example_unrolling_service/service_py/BUILD
@@ -4,16 +4,11 @@
 # LICENSE file in the root directory of this source tree.
 load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")
 
-alias(
-    name = "loop_unroller",
-    actual = "//examples/example_unrolling_service/loop_unroller:loop_unroller",
-)
-
 py_binary(
     name = "example-unrolling-service-py",
     srcs = ["example_service.py"],
     data = [
-        ":loop_unroller",
+        "//examples/example_unrolling_service/loop_unroller",
     ],
     main = "example_service.py",
     visibility = ["//visibility:public"],
diff --git a/examples/example_unrolling_service/service_py/example_service.py b/examples/example_unrolling_service/service_py/example_service.py
index 4e87eaa0b..861b20849 100755
--- a/examples/example_unrolling_service/service_py/example_service.py
+++ b/examples/example_unrolling_service/service_py/example_service.py
@@ -172,12 +172,11 @@ def apply_action(self, action: Action) -> Tuple[bool, Optional[ActionSpace], boo
                 # convert -<argument> to -f<argument>
                 arg = arg[0] + "f" + arg[1:]
                 args[i] = arg
-            print("args: ", args)
             run_command(
                 [
-                    "loop_unroller",
-                    *args,
+                    "../loop_unroller/loop_unroller",
                     self._llvm_path,
+                    *args,
                     "-o",
                     self._llvm_path,
                 ],

From 60916042c00d0537cbb7c5a1a0341cc5f52b6902 Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Thu, 11 Nov 2021 22:24:44 -0500
Subject: [PATCH 18/21] add header

---
 .../example_unrolling_service/loop_unroller/loop_unroller.cc  | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
index 9dbed31b3..fd58fff04 100644
--- a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
+++ b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
@@ -1,3 +1,7 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+//
+// This source code is licensed under the MIT license found in the
+// LICENSE file in the root directory of this source tree.
 #include <algorithm>
 #include <iostream>
 #include <string>

From bef3c4b4db77e2ae26b150e8c899f87c8b4f4ac2 Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Thu, 11 Nov 2021 22:39:29 -0500
Subject: [PATCH 19/21] fill README file

---
 examples/example_unrolling_service/loop_unroller/README.md | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/examples/example_unrolling_service/loop_unroller/README.md b/examples/example_unrolling_service/loop_unroller/README.md
index a0990367e..e4e80980c 100644
--- a/examples/example_unrolling_service/loop_unroller/README.md
+++ b/examples/example_unrolling_service/loop_unroller/README.md
@@ -1 +1,6 @@
-TBD
+LLVM's opt does not always enforce the unrolling options passed as cli arguments. Hence, we created our own exeutable with custom unrolling pass in examples/example_unrolling_service/loop_unroller that enforces the unrolling factors passed in its cli.
+
+To run the custom unroller:
+```
+bazel run //examples/example_unrolling_service/loop_unroller:loop_unroller -- <input>.ll -o <output>.ll  --funroll-count=<num>
+```

From 2a0be19ba27ccc09d32af0b7609b508c51449161 Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Fri, 12 Nov 2021 17:26:02 -0500
Subject: [PATCH 20/21] add option to output to bitcode file or IR assembly

---
 .../loop_unroller/README.md                   |  2 +-
 .../loop_unroller/loop_unroller.cc            | 44 ++++++++++++++-----
 .../service_py/example_service.py             |  1 +
 3 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/examples/example_unrolling_service/loop_unroller/README.md b/examples/example_unrolling_service/loop_unroller/README.md
index e4e80980c..5684e6580 100644
--- a/examples/example_unrolling_service/loop_unroller/README.md
+++ b/examples/example_unrolling_service/loop_unroller/README.md
@@ -2,5 +2,5 @@ LLVM's opt does not always enforce the unrolling options passed as cli arguments
 
 To run the custom unroller:
 ```
-bazel run //examples/example_unrolling_service/loop_unroller:loop_unroller -- <input>.ll -o <output>.ll  --funroll-count=<num>
+bazel run //examples/example_unrolling_service/loop_unroller:loop_unroller -- <input>.ll --funroll-count=<num> -S -o <output>.ll
 ```
diff --git a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
index fd58fff04..d92a8f64f 100644
--- a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
+++ b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
@@ -12,9 +12,11 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Bitcode/BitcodeWriterPass.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
@@ -26,6 +28,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
@@ -43,10 +46,25 @@ cl::opt<std::string> OutputFilename("o", cl::desc("Specify output filename"),
 static cl::opt<bool> UnrollEnable("floop-unroll", cl::desc("Enable loop unrolling"),
                                   cl::init(true));
 
-extern cl::opt<unsigned> UnrollCount(
+static cl::opt<unsigned> UnrollCount(
     "funroll-count", cl::desc("Use this unroll count for all loops including those with "
                               "unroll_count pragma values, for testing purposes"));
 
+// Force binary on terminals
+static cl::opt<bool> Force("f", cl::desc("Enable binary output on terminals"));
+
+// Output assembly
+static cl::opt<bool> OutputAssembly("S", cl::desc("Write output as LLVM assembly"), cl::Hidden);
+
+// Preserve use list order
+static cl::opt<bool> PreserveBitcodeUseListOrder(
+    "preserve-bc-uselistorder", cl::desc("Preserve use-list order when writing LLVM bitcode."),
+    cl::init(true), cl::Hidden);
+
+static cl::opt<bool> PreserveAssemblyUseListOrder(
+    "preserve-ll-uselistorder", cl::desc("Preserve use-list order when writing LLVM assembly."),
+    cl::init(false), cl::Hidden);
+
 // The INITIALIZE_PASS_XXX macros put the initialiser in the llvm namespace.
 void initializeLoopCounterPass(PassRegistry& Registry);
 
@@ -147,6 +165,15 @@ int main(int argc, char** argv) {
   if (!Module)
     return 1;
 
+  // Prepare output
+  ToolOutputFile Out(OutputFilename, EC, sys::fs::OF_None);
+  if (EC) {
+    Err = SMDiagnostic(OutputFilename, SourceMgr::DK_Error,
+                       "Could not open output file: " + EC.message());
+    Err.print(argv[0], errs());
+    return 1;
+  }
+
   // Run the passes
   initializeLoopCounterPass(*PassRegistry::getPassRegistry());
   legacy::PassManager PM;
@@ -155,6 +182,12 @@ int main(int argc, char** argv) {
   PM.add(Counter);
   PM.add(UnrollConfigurator);
   PM.add(createLoopUnrollPass());
+  // Passes to output the module
+  if (OutputAssembly) {
+    PM.add(createPrintModulePass(Out.os(), "", PreserveAssemblyUseListOrder));
+  } else if (Force || !CheckBitcodeOutputToConsole(Out.os())) {
+    PM.add(createBitcodeWriterPass(Out.os(), PreserveBitcodeUseListOrder));
+  }
   PM.run(*Module);
 
   // Log loop stats
@@ -162,15 +195,6 @@ int main(int argc, char** argv) {
     llvm::dbgs() << x.first << ": " << x.second << " loops" << '\n';
   }
 
-  // Output modified IR
-  ToolOutputFile Out(OutputFilename, EC, sys::fs::OF_None);
-  if (EC) {
-    Err = SMDiagnostic(OutputFilename, SourceMgr::DK_Error,
-                       "Could not open output file: " + EC.message());
-    Err.print(argv[0], errs());
-    return 1;
-  }
-  Module->print(Out.os(), nullptr, false);
   Out.keep();
 
   return 0;
diff --git a/examples/example_unrolling_service/service_py/example_service.py b/examples/example_unrolling_service/service_py/example_service.py
index 861b20849..e115c5de5 100755
--- a/examples/example_unrolling_service/service_py/example_service.py
+++ b/examples/example_unrolling_service/service_py/example_service.py
@@ -177,6 +177,7 @@ def apply_action(self, action: Action) -> Tuple[bool, Optional[ActionSpace], boo
                     "../loop_unroller/loop_unroller",
                     self._llvm_path,
                     *args,
+                    "-S",
                     "-o",
                     self._llvm_path,
                 ],

From 724f23c985c613c3f9613396134f4c0458b7becd Mon Sep 17 00:00:00 2001
From: Mostafa Elhoushi <melhoushi@fb.com>
Date: Fri, 12 Nov 2021 17:42:42 -0500
Subject: [PATCH 21/21] make -S option appear when calling --help

---
 .../example_unrolling_service/loop_unroller/loop_unroller.cc    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
index d92a8f64f..29c87559a 100644
--- a/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
+++ b/examples/example_unrolling_service/loop_unroller/loop_unroller.cc
@@ -54,7 +54,7 @@ static cl::opt<unsigned> UnrollCount(
 static cl::opt<bool> Force("f", cl::desc("Enable binary output on terminals"));
 
 // Output assembly
-static cl::opt<bool> OutputAssembly("S", cl::desc("Write output as LLVM assembly"), cl::Hidden);
+static cl::opt<bool> OutputAssembly("S", cl::desc("Write output as LLVM assembly"));
 
 // Preserve use list order
 static cl::opt<bool> PreserveBitcodeUseListOrder(