From f98b40a2f4de285e309f88f3de7464be45ff05b8 Mon Sep 17 00:00:00 2001
From: "Celina G. Val" <celinval@amazon.com>
Date: Tue, 2 May 2023 10:14:21 -0700
Subject: [PATCH] Print compilation stats about the MIR that was codegened in
 verbose mode  (#2420)

Adding a bit of information about the MIR that was used as an input to codegen to help us assess differences in performance that are due to changes in the MIR as opposed to changes in the kani-compiler. The information will only be collected and printed if either `--verbose` is passed to Kani or if the user enables kani_compiler logs via `KANI_LOG`.

Co-authored-by: Michael Tautschnig <tautschn@amazon.com>
---
 CHANGELOG.md                                  |   1 +
 .../compiler_interface.rs                     |  18 +-
 kani-compiler/src/kani_middle/analysis.rs     | 190 ++++++++++++++++++
 kani-compiler/src/kani_middle/mod.rs          |   1 +
 tests/ui/compiler-stats/expected              |  10 +
 tests/ui/compiler-stats/stats.rs              |  17 ++
 6 files changed, 234 insertions(+), 3 deletions(-)
 create mode 100644 kani-compiler/src/kani_middle/analysis.rs
 create mode 100644 tests/ui/compiler-stats/expected
 create mode 100644 tests/ui/compiler-stats/stats.rs
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 171598a70f86..3b9860837808 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ This file was introduced starting Kani 0.23.0, so it only contains changes from
 * Add size_of annotation to help CBMC's allocator by @tautschnig in https://github.com/model-checking/kani/pull/2395
 * Implement `kani::Arbitrary` for `Box<T>` by @adpaco-aws in https://github.com/model-checking/kani/pull/2404
 * Use optimized overflow operation everywhere by @celinval in https://github.com/model-checking/kani/pull/2405
+* Print compilation stats in verbose mode by @celinval in https://github.com/model-checking/kani/pull/2420
 * Bump CBMC version to 5.82.0 by @adpaco-aws in https://github.com/model-checking/kani/pull/2417
 
 **Full Changelog**: https://github.com/model-checking/kani/compare/kani-0.26.0...kani-0.27.0
diff --git a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs
index 100fba0ef281..2b6c4551f510 100644
--- a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs
+++ b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs
@@ -5,6 +5,7 @@
 
 use crate::codegen_cprover_gotoc::archive::ArchiveBuilder;
 use crate::codegen_cprover_gotoc::GotocCtx;
+use crate::kani_middle::analysis;
 use crate::kani_middle::attributes::is_proof_harness;
 use crate::kani_middle::attributes::is_test_harness_description;
 use crate::kani_middle::check_crate_items;
@@ -135,8 +136,8 @@ impl CodegenBackend for GotocCodegenBackend {
                 }
 
                 // then we move on to codegen
-                for item in items {
-                    match item {
+                for item in &items {
+                    match *item {
                         MonoItem::Fn(instance) => {
                             gcx.call_with_panic_debug_info(
                                 |ctx| ctx.codegen_function(instance),
@@ -150,7 +151,7 @@ impl CodegenBackend for GotocCodegenBackend {
                         }
                         MonoItem::Static(def_id) => {
                             gcx.call_with_panic_debug_info(
-                                |ctx| ctx.codegen_static(def_id, item),
+                                |ctx| ctx.codegen_static(def_id, *item),
                                 format!("codegen_static: {def_id:?}"),
                                 def_id,
                             );
@@ -165,6 +166,9 @@ impl CodegenBackend for GotocCodegenBackend {
         // Print compilation report.
         print_report(&gcx, tcx);
 
+        // Print some compilation stats.
+        print_stats(&gcx, tcx, &items);
+
         // Map from name to prettyName for all symbols
         let pretty_name_map: BTreeMap<InternedString, Option<InternedString>> =
             BTreeMap::from_iter(gcx.symbol_table.iter().map(|(k, s)| (*k, s.pretty_name)));
@@ -493,6 +497,14 @@ fn dump_mir_items(tcx: TyCtxt, items: &[MonoItem]) {
     }
 }
 
+/// Print statistics about the MIR used as input to code generation as well as the emitted goto.
+/// TODO: Print stats for the goto.
+fn print_stats<'tcx>(_ctx: &GotocCtx, tcx: TyCtxt<'tcx>, items: &[MonoItem<'tcx>]) {
+    if tracing::enabled!(tracing::Level::INFO) {
+        analysis::print_stats(tcx, items);
+    }
+}
+
 /// Method that generates `KaniMetadata` from the given compilation context.
 /// This is a temporary method used until we generate a model per-harness.
 /// See <https://github.com/model-checking/kani/issues/1855> for more details.
diff --git a/kani-compiler/src/kani_middle/analysis.rs b/kani-compiler/src/kani_middle/analysis.rs
new file mode 100644
index 000000000000..e618d8ec2163
--- /dev/null
+++ b/kani-compiler/src/kani_middle/analysis.rs
@@ -0,0 +1,190 @@
+// Copyright Kani Contributors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+//! MIR analysis passes that extracts information about the MIR model given as input to codegen.
+//!
+//! # Performance Impact
+//!
+//! This module will perform all the analyses requested. Callers are responsible for selecting
+//! when the cost of these analyses are worth it.
+
+use rustc_middle::mir::mono::MonoItem;
+use rustc_middle::mir::visit::Visitor as MirVisitor;
+use rustc_middle::mir::{Location, Rvalue, Statement, StatementKind, Terminator, TerminatorKind};
+use rustc_middle::ty::TyCtxt;
+use std::collections::HashMap;
+use std::fmt::Display;
+
+/// This function will collect and print some information about the given set of mono items.
+///
+/// This function will print information like:
+///  - Number of items per type (Function / Constant / Shims)
+///  - Number of instructions per type.
+///  - Total number of MIR instructions.
+pub fn print_stats<'tcx>(tcx: TyCtxt<'tcx>, items: &[MonoItem<'tcx>]) {
+    let item_types = items.iter().collect::<Counter>();
+    let visitor = items
+        .iter()
+        .filter_map(|&mono| {
+            if let MonoItem::Fn(instance) = mono {
+                Some(tcx.instance_mir(instance.def))
+            } else {
+                None
+            }
+        })
+        .fold(StatsVisitor::default(), |mut visitor, body| {
+            visitor.visit_body(body);
+            visitor
+        });
+    eprintln!("====== Reachability Analysis Result =======");
+    eprintln!("Total # items: {}", item_types.total());
+    eprintln!("Total # statements: {}", visitor.stmts.total());
+    eprintln!("Total # expressions: {}", visitor.exprs.total());
+    eprintln!("\nReachable Items:\n{item_types}");
+    eprintln!("Statements:\n{}", visitor.stmts);
+    eprintln!("Expressions:\n{}", visitor.exprs);
+    eprintln!("-------------------------------------------")
+}
+
+#[derive(Default)]
+/// MIR Visitor that collects information about the body of an item.
+struct StatsVisitor {
+    /// The types of each statement / terminator visited.
+    stmts: Counter,
+    /// The kind of each expressions found.
+    exprs: Counter,
+}
+
+impl<'tcx> MirVisitor<'tcx> for StatsVisitor {
+    fn visit_statement(&mut self, statement: &Statement<'tcx>, location: Location) {
+        self.stmts.add(statement);
+        // Also visit the type of expression.
+        self.super_statement(statement, location);
+    }
+
+    fn visit_terminator(&mut self, terminator: &Terminator<'tcx>, _location: Location) {
+        self.stmts.add(terminator);
+        // Stop here since we don't care today about the information inside the terminator.
+        // self.super_terminator(terminator, location);
+    }
+
+    fn visit_rvalue(&mut self, rvalue: &Rvalue<'tcx>, _location: Location) {
+        self.exprs.add(rvalue);
+        // Stop here since we don't care today about the information inside the rvalue.
+        // self.super_rvalue(rvalue, location);
+    }
+}
+
+#[derive(Default)]
+struct Counter {
+    data: HashMap<Key, usize>,
+}
+
+impl Counter {
+    fn add<T: Into<Key>>(&mut self, item: T) {
+        *self.data.entry(item.into()).or_default() += 1;
+    }
+
+    fn total(&self) -> usize {
+        self.data.iter().fold(0, |acc, item| acc + item.1)
+    }
+}
+
+impl Display for Counter {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        for (name, freq) in &self.data {
+            writeln!(f, "  - {}: {freq}", name.0)?;
+        }
+        std::fmt::Result::Ok(())
+    }
+}
+
+impl<T: Into<Key>> FromIterator<T> for Counter {
+    // Required method
+    fn from_iter<I>(iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        let mut counter = Counter::default();
+        for item in iter {
+            counter.add(item.into())
+        }
+        counter
+    }
+}
+
+#[derive(Debug, Eq, Hash, PartialEq)]
+struct Key(pub &'static str);
+
+impl<'tcx> From<&MonoItem<'tcx>> for Key {
+    fn from(value: &MonoItem) -> Self {
+        match value {
+            MonoItem::Fn(_) => Key("function"),
+            MonoItem::GlobalAsm(_) => Key("global assembly"),
+            MonoItem::Static(_) => Key("static item"),
+        }
+    }
+}
+
+impl<'tcx> From<&Statement<'tcx>> for Key {
+    fn from(value: &Statement<'tcx>) -> Self {
+        match value.kind {
+            StatementKind::Assign(_) => Key("Assign"),
+            StatementKind::Deinit(_) => Key("Deinit"),
+            StatementKind::Intrinsic(_) => Key("Intrinsic"),
+            StatementKind::SetDiscriminant { .. } => Key("SetDiscriminant"),
+            // For now, we don't care about the ones below.
+            StatementKind::AscribeUserType(_, _)
+            | StatementKind::Coverage(_)
+            | StatementKind::ConstEvalCounter
+            | StatementKind::FakeRead(_)
+            | StatementKind::Nop
+            | StatementKind::Retag(_, _)
+            | StatementKind::StorageLive(_)
+            | StatementKind::StorageDead(_) => Key("Ignored"),
+        }
+    }
+}
+
+impl<'tcx> From<&Terminator<'tcx>> for Key {
+    fn from(value: &Terminator<'tcx>) -> Self {
+        match value.kind {
+            TerminatorKind::Abort => Key("Abort"),
+            TerminatorKind::Assert { .. } => Key("Assert"),
+            TerminatorKind::Call { .. } => Key("Call"),
+            TerminatorKind::Drop { .. } => Key("Drop"),
+            TerminatorKind::DropAndReplace { .. } => Key("DropAndReplace"),
+            TerminatorKind::GeneratorDrop => Key("GeneratorDrop"),
+            TerminatorKind::Goto { .. } => Key("Goto"),
+            TerminatorKind::FalseEdge { .. } => Key("FalseEdge"),
+            TerminatorKind::FalseUnwind { .. } => Key("FalseUnwind"),
+            TerminatorKind::InlineAsm { .. } => Key("InlineAsm"),
+            TerminatorKind::Resume => Key("Resume"),
+            TerminatorKind::Return => Key("Return"),
+            TerminatorKind::SwitchInt { .. } => Key("SwitchInt"),
+            TerminatorKind::Unreachable => Key("Unreachable"),
+            TerminatorKind::Yield { .. } => Key("Yield"),
+        }
+    }
+}
+
+impl<'tcx> From<&Rvalue<'tcx>> for Key {
+    fn from(value: &Rvalue<'tcx>) -> Self {
+        match value {
+            Rvalue::Use(_) => Key("Use"),
+            Rvalue::Repeat(_, _) => Key("Repeat"),
+            Rvalue::Ref(_, _, _) => Key("Ref"),
+            Rvalue::ThreadLocalRef(_) => Key("ThreadLocalRef"),
+            Rvalue::AddressOf(_, _) => Key("AddressOf"),
+            Rvalue::Len(_) => Key("Len"),
+            Rvalue::Cast(_, _, _) => Key("Cast"),
+            Rvalue::BinaryOp(_, _) => Key("BinaryOp"),
+            Rvalue::CheckedBinaryOp(_, _) => Key("CheckedBinaryOp"),
+            Rvalue::NullaryOp(_, _) => Key("NullaryOp"),
+            Rvalue::UnaryOp(_, _) => Key("UnaryOp"),
+            Rvalue::Discriminant(_) => Key("Discriminant"),
+            Rvalue::Aggregate(_, _) => Key("Aggregate"),
+            Rvalue::ShallowInitBox(_, _) => Key("ShallowInitBox"),
+            Rvalue::CopyForDeref(_) => Key("CopyForDeref"),
+        }
+    }
+}
diff --git a/kani-compiler/src/kani_middle/mod.rs b/kani-compiler/src/kani_middle/mod.rs
index ccef03a5bced..a990f85c63ed 100644
--- a/kani-compiler/src/kani_middle/mod.rs
+++ b/kani-compiler/src/kani_middle/mod.rs
@@ -17,6 +17,7 @@ use rustc_target::abi::{HasDataLayout, TargetDataLayout};
 
 use self::attributes::check_attributes;
 
+pub mod analysis;
 pub mod attributes;
 pub mod coercion;
 pub mod provide;
diff --git a/tests/ui/compiler-stats/expected b/tests/ui/compiler-stats/expected
new file mode 100644
index 000000000000..d384e1bc8752
--- /dev/null
+++ b/tests/ui/compiler-stats/expected
@@ -0,0 +1,10 @@
+Reachability Analysis Result
+Total # items:
+Total # statements:
+Total # expressions:
+
+Reachable Items:
+- function:
+Statements:
+- Call:
+Expressions:
diff --git a/tests/ui/compiler-stats/stats.rs b/tests/ui/compiler-stats/stats.rs
new file mode 100644
index 000000000000..84bab8ee4c9c
--- /dev/null
+++ b/tests/ui/compiler-stats/stats.rs
@@ -0,0 +1,17 @@
+// Copyright Kani Contributors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+//
+// kani-flags: --verbose --only-codegen
+//
+//! Checks that we print compilation stats when we pass `--verbose`
+
+use std::num::NonZeroU8;
+
+fn non_zero(x: u8) {
+    assert!(x != 0);
+}
+
+#[kani::proof]
+fn check_variable() {
+    non_zero(kani::any::<NonZeroU8>().into());
+}