Budget fiddling (#1098)

A bunch of exploration of minor issues discovered in budget calibration, ostensibly about #1020 but also involving some tracy markup, some internal cleanup, machinery to allow excluding the overwhelming VM instantiation cost center, and some attempts at setting budget costs more from first principles.
stellar · Oct 11, 2023 · 6e0e084 · 6e0e084
1 parent 7e90b1b
commit 6e0e084
Show file tree

Hide file tree

Showing 18 changed files with 292 additions and 82 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/soroban-bench-utils/src/tracker.rs b/soroban-bench-utils/src/tracker.rs
@@ -63,7 +63,15 @@ mod cpu {
         }
         pub fn end_and_count(&mut self) -> u64 {
             self.0.disable().expect("perf_event::Counter::disable");
-            self.0.read().expect("perf_event::Counter::read")
+            let tandc = self
+                .0
+                .read_count_and_time()
+                .expect("perf_event::Counter::read_count_and_time");
+            if tandc.time_enabled == tandc.time_running {
+                tandc.count
+            } else {
+                panic!("time enabled != time running")
+            }
         }
     }
 }
@@ -120,34 +128,45 @@ pub struct HostTracker<'a> {
     mem_tracker: MemTracker,
     start_time: Instant,
     alloc_guard: Option<AllocationGuard<'a>>,
+    #[cfg(feature = "tracy")]
+    tracy_span: Option<tracy_client::Span>,
 }
 
 impl<'a> HostTracker<'a> {
-    pub fn start(token: Option<&'a mut AllocationGroupToken>) -> Self {
+    pub fn new() -> Self {
         // Setup the instrumentation
-        let mut cpu_insn_counter = cpu::InstructionCounter::new();
+        let cpu_insn_counter = cpu::InstructionCounter::new();
         let mem_tracker = MemTracker(Arc::new(AtomicU64::new(0)));
         AllocationRegistry::set_global_tracker(mem_tracker.clone())
             .expect("no other global tracker should be set yet");
         AllocationRegistry::enable_tracking();
 
-        // start the cpu and mem measurement
-        mem_tracker.0.store(0, Ordering::SeqCst);
-        let alloc_guard: Option<AllocationGuard> = if let Some(t) = token {
+        HostTracker {
+            cpu_insn_counter,
+            mem_tracker,
+            start_time: Instant::now(),
+            alloc_guard: None,
+            #[cfg(feature = "tracy")]
+            tracy_span: None,
+        }
+    }
+
+    pub fn start(&mut self, token: Option<&'a mut AllocationGroupToken>) {
+        // start the mem measurement
+        #[cfg(feature = "tracy")]
+        {
+            self.tracy_span = Some(tracy_span!("tracker active"));
+        }
+        self.mem_tracker.0.store(0, Ordering::SeqCst);
+        self.alloc_guard = if let Some(t) = token {
             Some(t.enter())
         } else {
             None
         };
 
-        let start_time = Instant::now();
-        cpu_insn_counter.begin();
-
-        HostTracker {
-            cpu_insn_counter,
-            mem_tracker,
-            start_time,
-            alloc_guard,
-        }
+        // start the cpu measurement
+        self.start_time = Instant::now();
+        self.cpu_insn_counter.begin();
     }
 
     pub fn stop(mut self) -> (u64, u64, u64) {
@@ -157,10 +176,13 @@ impl<'a> HostTracker<'a> {
         if let Some(g) = self.alloc_guard {
             drop(g)
         }
-
         let mem_bytes = self.mem_tracker.0.load(Ordering::SeqCst);
         let time_nsecs = stop_time.duration_since(self.start_time).as_nanos() as u64;
-
+        self.alloc_guard = None;
+        #[cfg(feature = "tracy")]
+        {
+            self.tracy_span = None
+        }
         AllocationRegistry::disable_tracking();
         unsafe {
             AllocationRegistry::clear_global_tracker();

diff --git a/soroban-env-common/Cargo.toml b/soroban-env-common/Cargo.toml
@@ -24,6 +24,9 @@ arbitrary = { version = "1.3.0", features = ["derive"], optional = true }
 num-traits = {version = "0.2.15", default-features = false}
 num-derive = "0.4.0"
 
+[target.'cfg(not(target_family = "wasm"))'.dependencies]
+tracy-client = { version = "=0.15.2", features = ["enable", "timer-fallback"], default-features = false, optional = true }
+
 [dev-dependencies]
 num_enum = "0.7.0"
 num-traits = "0.2.15"
@@ -34,6 +37,7 @@ serde = ["dep:serde", "stellar-xdr/serde"]
 wasmi = ["dep:wasmi"]
 testutils = ["dep:arbitrary", "stellar-xdr/arbitrary"]
 next = ["stellar-xdr/next", "soroban-env-macros/next"]
+tracy = ["dep:tracy-client"]
 
 [package.metadata.docs.rs]
 all-features = true
diff --git a/soroban-env-common/src/lib.rs b/soroban-env-common/src/lib.rs
@@ -18,6 +18,28 @@
 //! [Val] type and XDR types, and re-exports the XDR definitions from
 //! [stellar_xdr] under the module [xdr].
 
+#[allow(unused_macros)]
+#[cfg(all(not(target_family = "wasm"), feature = "tracy"))]
+macro_rules! tracy_span {
+    () => {
+        tracy_client::span!()
+    };
+    ($name:expr) => {
+        tracy_client::span!($name)
+    };
+}
+
+#[allow(unused_macros)]
+#[cfg(any(target_family = "wasm", not(feature = "tracy")))]
+macro_rules! tracy_span {
+    () => {
+        ()
+    };
+    ($name:expr) => {
+        ()
+    };
+}
+
 #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct Version<'a> {

diff --git a/soroban-env-common/src/vmcaller_env.rs b/soroban-env-common/src/vmcaller_env.rs
@@ -177,6 +177,8 @@ macro_rules! vmcaller_none_function_helper {
         // that didn't have an Env on hand when creating the error. This will at
         // least localize the error to a given Env call.
         fn $fn_id(&self, $($arg:$type),*) -> Result<$ret, Self::Error> {
+            #[cfg(all(not(target_family = "wasm"), feature = "tracy"))]
+            let _span = tracy_span!(core::stringify!($fn_id));
             self.augment_err_result(<Self as VmCallerEnv>::$fn_id(self, &mut VmCaller::none(), $($arg),*))
         }
     };

diff --git a/soroban-env-host/Cargo.toml b/soroban-env-host/Cargo.toml
@@ -55,7 +55,7 @@ pretty_assertions = "1.4.0"
 [features]
 testutils = ["soroban-env-common/testutils"]
 next = ["soroban-env-common/next", "soroban-test-wasms/next", "soroban-synth-wasm/next", "soroban-bench-utils/next"]
-tracy = ["dep:tracy-client"]
+tracy = ["dep:tracy-client", "soroban-env-common/tracy"]
 
 [[bench]]
 required-features = ["testutils"]

diff --git a/soroban-env-host/benches/common/cost_types/host_mem_cpy.rs b/soroban-env-host/benches/common/cost_types/host_mem_cpy.rs
@@ -1,6 +1,6 @@
 use crate::common::HostCostMeasurement;
 use rand::{rngs::StdRng, RngCore};
-use soroban_env_host::{cost_runner::HostMemCpyRun, Host};
+use soroban_env_host::{budget::COST_MODEL_LIN_TERM_SCALE_BITS, cost_runner::HostMemCpyRun, Host};
 
 // Measures the cost of copying a chunk of memory in the host (no allocation).
 // The input value is the number of bytes copied.
@@ -9,7 +9,14 @@ pub(crate) struct HostMemCpyMeasure;
 impl HostCostMeasurement for HostMemCpyMeasure {
     type Runner = HostMemCpyRun;
 
-    const STEP_SIZE: u64 = 4096;
+    // Rust and LLVM will conspire to optimize the heck out of a large memcpy.
+    // This will cause us to gather completely wrong numbers for the cost of a
+    // small memcpy, which almost all our memcpys are (they're not even likely
+    // to be calls to memcpy, they're just "byte moving in the abstract sense",
+    // usually only a few dozen or hundred at a time). So we use the smallest
+    // number here we're allowed to use: the linear scale factor, which
+    // STEP_SIZE literally isn't allowed to be smaller than.
+    const STEP_SIZE: u64 = 1 << COST_MODEL_LIN_TERM_SCALE_BITS;
 
     fn new_random_case(_host: &Host, rng: &mut StdRng, input: u64) -> (Vec<u8>, Vec<u8>) {
         let len = 1 + input * Self::STEP_SIZE;

diff --git a/soroban-env-host/benches/common/measure.rs b/soroban-env-host/benches/common/measure.rs
@@ -1,11 +1,12 @@
 use rand::{rngs::StdRng, Rng, SeedableRng};
 use soroban_bench_utils::{tracking_allocator::AllocationGroupToken, HostTracker};
+use soroban_env_common::xdr::ContractCostType;
 use soroban_env_host::{
     budget::{AsBudget, COST_MODEL_LIN_TERM_SCALE_BITS},
     cost_runner::CostRunner,
     Host,
 };
-use std::ops::Range;
+use std::{io, ops::Range};
 use tabwriter::{Alignment, TabWriter};
 
 use super::{fit_model, FPCostModel};
@@ -27,6 +28,36 @@ pub struct Measurements {
 }
 
 impl Measurements {
+    // Check that the baseline isn't a significant fraction of the max measurement,
+    // as a basic spot check.
+    fn check_one_baseline_range(
+        &self,
+        cost: ContractCostType,
+        meas: &str,
+        f: impl Fn(&Measurement) -> u64,
+    ) -> Result<(), io::Error> {
+        let max = self.measurements.iter().map(&f).max().unwrap_or_default();
+        let base = f(&self.baseline);
+        if max < base * 10 {
+            println!("max {meas} measurement for {cost} is {max} which is less than 10x baseline {base}, try higher iteration or step size");
+            Err(io::ErrorKind::InvalidData.into())
+        } else {
+            Ok(())
+        }
+    }
+
+    // Confirms that there's a reasonable range of values above the baseline;
+    // only relevant for certain measurements, currently no way to tell
+    // systematically, so gated behind env var
+    pub fn check_range_against_baseline(&self, cost: ContractCostType) -> Result<(), io::Error> {
+        if std::env::var("CHECK_RANGE_AGAINST_BASELINE").is_ok() {
+            self.check_one_baseline_range(cost, "cpu", |m| m.cpu_insns)?;
+            self.check_one_baseline_range(cost, "mem", |m| m.mem_bytes)?;
+            self.check_one_baseline_range(cost, "time", |m| m.time_nsecs)?;
+        }
+        Ok(())
+    }
+
     // This is the preprocess step to convert raw measurements into `averaged_net_measurements`,
     // ready to be fitted by the linear model.
     // We start from `N_r * ( N_x * (a + b * Option<x>) + Overhead_b)`, first substracts baseline
@@ -318,10 +349,12 @@ where
         &mut Vec<<<HCM as HostCostMeasurement>::Runner as CostRunner>::RecycledType>,
     ),
 {
+    assert!(HCM::STEP_SIZE >= (1 << COST_MODEL_LIN_TERM_SCALE_BITS));
     let mut recycled_samples = Vec::with_capacity(samples.len());
     host.as_budget().reset_unlimited().unwrap();
 
-    let ht = HostTracker::start(alloc_group_token);
+    let mut ht = HostTracker::new();
+    ht.start(alloc_group_token);
 
     runner(host, samples, &mut recycled_samples);
 

diff --git a/soroban-env-host/benches/variation_histograms.rs b/soroban-env-host/benches/variation_histograms.rs
@@ -7,6 +7,7 @@ struct LinearModelTables;
 impl Benchmark for LinearModelTables {
     fn bench<HCM: HostCostMeasurement>() -> std::io::Result<(FPCostModel, FPCostModel)> {
         let mut measurements = measure_cost_variation::<HCM>(100)?;
+        measurements.check_range_against_baseline(HCM::Runner::COST_TYPE)?;
         measurements.preprocess();
         measurements.report_histogram("cpu", |m| m.cpu_insns);
         measurements.report_histogram("mem", |m| m.mem_bytes);

diff --git a/soroban-env-host/benches/worst_case_linear_models.rs b/soroban-env-host/benches/worst_case_linear_models.rs
@@ -4,15 +4,18 @@
 // $ cargo bench --features wasmi,testutils --bench worst_case_linear_models -- VecNew I64Rotr --nocapture
 mod common;
 use common::*;
-use soroban_env_host::{cost_runner::WasmInsnType, xdr::ContractCostType};
+use soroban_env_host::{
+    cost_runner::{CostRunner, WasmInsnType},
+    xdr::ContractCostType,
+};
 use std::{collections::BTreeMap, fmt::Debug, io::Write};
 use tabwriter::{Alignment, TabWriter};
 
 struct WorstCaseLinearModels;
 impl Benchmark for WorstCaseLinearModels {
     fn bench<HCM: HostCostMeasurement>() -> std::io::Result<(FPCostModel, FPCostModel)> {
         let mut measurements = measure_worst_case_costs::<HCM>(1..20)?;
-
+        measurements.check_range_against_baseline(HCM::Runner::COST_TYPE)?;
         measurements.preprocess();
         measurements.report_table();
         let cpu_model = measurements.fit_model_to_cpu();