Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve taint #58

Merged
merged 4 commits into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ graphviz-rust = "0.6.2"
cairo-felt = "0.8.7"
thiserror = "1.0.47"
rayon = "1.8"
fxhash = "0.2.1"

cairo-lang-compiler = { git = "https://github.com/starkware-libs/cairo.git", tag = "v2.4.3" }
cairo-lang-defs = { git = "https://github.com/starkware-libs/cairo.git", tag = "v2.4.3" }
Expand All @@ -33,6 +34,7 @@ cairo-lang-utils = { git = "https://github.com/starkware-libs/cairo.git", tag =
cairo-lang-sierra-generator = { git = "https://github.com/starkware-libs/cairo.git", tag = "v2.4.3" }
cairo-lang-sierra = { git = "https://github.com/starkware-libs/cairo.git", tag = "v2.4.3" }


[dev-dependencies]
insta = { version = "1.30", features = ["glob"] }

Expand Down
41 changes: 19 additions & 22 deletions src/analysis/taint.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
use cairo_lang_sierra::{
ids::VarId,
program::{GenStatement, Statement as SierraStatement},
};
use std::collections::{HashMap, HashSet};
use cairo_lang_sierra::program::{GenStatement, Statement as SierraStatement};
use fxhash::{FxHashMap, FxHashSet};

/// Wrapper around a VarId
/// it's used to univocally identify variables
Expand All @@ -11,11 +8,11 @@ pub struct WrapperVariable {
/// The function where the variable appear
function: String,
/// The variable's id
variable: VarId,
variable: u64,
}

impl WrapperVariable {
pub fn new(function: String, variable: VarId) -> Self {
pub fn new(function: String, variable: u64) -> Self {
WrapperVariable { function, variable }
}

Expand All @@ -25,8 +22,8 @@ impl WrapperVariable {
}

/// Return the variable
pub fn variable(&self) -> &VarId {
&self.variable
pub fn variable(&self) -> u64 {
self.variable
}
}

Expand All @@ -35,26 +32,26 @@ pub struct Taint {
/// Source WrapperVariable to set of sink WrapperVariable
/// e.g. instruction reads variables 3, 4 and has 5 as output
/// we will add (3, (5)) and (4, (5)); variable 5 is tainted by 3 and 4
map: HashMap<WrapperVariable, HashSet<WrapperVariable>>,
map: FxHashMap<WrapperVariable, FxHashSet<WrapperVariable>>,
}

impl Taint {
pub fn new(instructions: &[SierraStatement], function: String) -> Self {
let mut map = HashMap::new();
let mut map = FxHashMap::default();
analyze(&mut map, instructions, function);

Taint { map }
}

/// Returns variables tainted in a single step by source
pub fn single_step_taint(&self, source: &WrapperVariable) -> HashSet<WrapperVariable> {
pub fn single_step_taint(&self, source: &WrapperVariable) -> FxHashSet<WrapperVariable> {
self.map.get(source).cloned().unwrap_or_default()
}

/// Returns variables tainted in zero or more steps by source
pub fn multi_step_taint(&self, source: &WrapperVariable) -> HashSet<WrapperVariable> {
let mut result = HashSet::new();
let mut update = HashSet::from([source.clone()]);
pub fn multi_step_taint(&self, source: &WrapperVariable) -> FxHashSet<WrapperVariable> {
let mut result = FxHashSet::default();
let mut update = FxHashSet::from_iter([source.clone()]);
while !update.is_subset(&result) {
result.extend(update.iter().cloned());
update = update
Expand All @@ -69,7 +66,7 @@ impl Taint {
pub fn taints_any_sinks_variable(
&self,
source: &WrapperVariable,
sinks: &HashSet<WrapperVariable>,
sinks: &FxHashSet<WrapperVariable>,
) -> Vec<WrapperVariable> {
self.multi_step_taint(source)
.into_iter()
Expand All @@ -81,7 +78,7 @@ impl Taint {
pub fn taints_any_sinks(
&self,
source: &WrapperVariable,
sinks: &HashSet<WrapperVariable>,
sinks: &FxHashSet<WrapperVariable>,
) -> bool {
self.multi_step_taint(source)
.iter()
Expand All @@ -91,7 +88,7 @@ impl Taint {
/// Returns the sources that taint the sink
pub fn taints_any_sources_variable(
&self,
sources: &HashSet<WrapperVariable>,
sources: &FxHashSet<WrapperVariable>,
sink: &WrapperVariable,
) -> Vec<WrapperVariable> {
sources
Expand All @@ -104,7 +101,7 @@ impl Taint {
/// Returns true if the sink is tainted by any source
pub fn taints_any_sources(
&self,
sources: &HashSet<WrapperVariable>,
sources: &FxHashSet<WrapperVariable>,
sink: &WrapperVariable,
) -> bool {
sources
Expand All @@ -121,7 +118,7 @@ impl Taint {

/// Analyze each instruction in the current function and populate the taint map
fn analyze(
map: &mut HashMap<WrapperVariable, HashSet<WrapperVariable>>,
map: &mut FxHashMap<WrapperVariable, FxHashSet<WrapperVariable>>,
instructions: &[SierraStatement],
function: String,
) {
Expand All @@ -140,12 +137,12 @@ fn analyze(
let sinks = map
.entry(WrapperVariable {
function: function.clone(),
variable: source.clone(),
variable: source.id,
})
.or_default();
sinks.insert(WrapperVariable {
function: function.clone(),
variable: sink.clone(),
variable: sink.id,
});
}
}
Expand Down
32 changes: 12 additions & 20 deletions src/core/compilation_unit.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::collections::{HashMap, HashSet};

use super::function::{Function, Type};
use crate::analysis::taint::Taint;
use crate::analysis::taint::WrapperVariable;
Expand All @@ -13,6 +11,8 @@ use cairo_lang_starknet::abi::{
Contract, Item::Function as AbiFunction, Item::Interface as AbiInterface,
Item::L1Handler as AbiL1Handler,
};
use fxhash::FxHashSet;
use std::collections::{HashMap, HashSet};

pub struct CompilationUnit {
/// The compiled sierra program
Expand Down Expand Up @@ -85,18 +85,15 @@ impl CompilationUnit {

/// Return true if the variable is tainted i.e. user inputs can control it in some way
pub fn is_tainted(&self, function_name: String, variable: VarId) -> bool {
let wrapped_variable = WrapperVariable::new(function_name, variable);
let mut parameters = HashSet::new();
let wrapped_variable = WrapperVariable::new(function_name, variable.id);
let mut parameters = FxHashSet::default();
for external_function in self
.functions
.iter()
.filter(|f| matches!(f.ty(), Type::External | Type::L1Handler | Type::View))
{
for param in external_function.params().skip(1) {
parameters.insert(WrapperVariable::new(
external_function.name(),
param.id.clone(),
));
parameters.insert(WrapperVariable::new(external_function.name(), param.id.id));
}
}
// Get the taint for the function where the variable appear
Expand Down Expand Up @@ -330,15 +327,15 @@ impl CompilationUnit {
/// Propagate the taints from external/l1_handler functions to private functions
fn propagate_taints(&mut self) {
// Collect the arguments of all the external/l1_handler functions
let mut arguments_external_functions: HashSet<WrapperVariable> = HashSet::new();
let mut arguments_external_functions: FxHashSet<WrapperVariable> = FxHashSet::default();
for function in self
.functions
.iter()
.filter(|f| matches!(f.ty(), Type::External | Type::L1Handler))
{
for param in function.params() {
arguments_external_functions
.insert(WrapperVariable::new(function.name(), param.id.clone()));
.insert(WrapperVariable::new(function.name(), param.id.id));
}
}

Expand Down Expand Up @@ -395,12 +392,10 @@ impl CompilationUnit {
let external_taint = taint_copy.get(&calling_function.name()).unwrap();

// Variables used as arguments in the call to the private function
let function_called_args: HashSet<WrapperVariable> = invoc
let function_called_args: FxHashSet<WrapperVariable> = invoc
.args
.iter()
.map(|arg| {
WrapperVariable::new(calling_function.name(), arg.clone())
})
.map(|arg| WrapperVariable::new(calling_function.name(), arg.id))
.collect();

// Calling function's parameters
Expand All @@ -419,10 +414,7 @@ impl CompilationUnit {
}
// Check if the arguments used to call the private function are tainted by the calling function's parameters
for sink in external_taint.taints_any_sinks_variable(
&WrapperVariable::new(
calling_function.name(),
param.id.clone(),
),
&WrapperVariable::new(calling_function.name(), param.id.id),
&function_called_args,
) {
// If the sink is tainted by some parameters of external functions
Expand All @@ -446,11 +438,11 @@ impl CompilationUnit {
// so to convert the ID we have to iterate the arguments and use the index where we find
// our sink VarId
for (i, var) in invoc.args.iter().enumerate() {
if var.id == sink.variable().id {
if var.id == sink.variable() {
// We convert the id to be the private function's formal parameter id and not the actual parameter id
let sink_converted = WrapperVariable::new(
function_called_name.clone(),
VarId::new(i.try_into().unwrap()),
i.try_into().unwrap(),
);

// Add the source i.e. the variable of the external function
Expand Down
1 change: 0 additions & 1 deletion src/core/core_unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ impl CoreUnit {
compilation_unit
})
.collect();

Ok(CoreUnit { compilation_units })
}

Expand Down
27 changes: 12 additions & 15 deletions src/detectors/array_use_after_pop_front.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::collections::HashSet;

use super::detector::{Confidence, Detector, Impact, Result};
use crate::analysis::taint::WrapperVariable;
use crate::core::compilation_unit::CompilationUnit;
Expand All @@ -8,6 +6,8 @@ use crate::core::function::{Function, Type};
use cairo_lang_sierra::extensions::array::ArrayConcreteLibfunc;
use cairo_lang_sierra::extensions::core::{CoreConcreteLibfunc, CoreTypeConcrete};
use cairo_lang_sierra::program::{GenStatement, Statement as SierraStatement};
use fxhash::FxHashSet;
use std::collections::HashSet;

#[derive(Default)]
pub struct ArrayUseAfterPopFront {}
Expand Down Expand Up @@ -50,10 +50,7 @@ impl Detector for ArrayUseAfterPopFront {
CoreConcreteLibfunc::Array(ArrayConcreteLibfunc::PopFront(_)) => {
Some((
index,
WrapperVariable::new(
function.name(),
invoc.args[0].clone(),
),
WrapperVariable::new(function.name(), invoc.args[0].id),
))
}
_ => None,
Expand All @@ -78,7 +75,7 @@ impl Detector for ArrayUseAfterPopFront {
if !bad_array_used.is_empty() {
let array_ids = bad_array_used
.iter()
.map(|f| f.1.variable().id)
.map(|f| f.1.variable())
.collect::<Vec<u64>>();
let message = match array_ids.len() {
1 => format!(
Expand Down Expand Up @@ -163,8 +160,8 @@ impl ArrayUseAfterPopFront {

match libfunc {
CoreConcreteLibfunc::Array(ArrayConcreteLibfunc::Append(_)) => {
let mut sinks = HashSet::new();
sinks.insert(WrapperVariable::new(function.name(), invoc.args[0].clone()));
let mut sinks = FxHashSet::default();
sinks.insert(WrapperVariable::new(function.name(), invoc.args[0].id));

taint.taints_any_sinks(bad_array, &sinks)
}
Expand Down Expand Up @@ -192,10 +189,10 @@ impl ArrayUseAfterPopFront {
.expect("Library function not found in the registry");

if let CoreConcreteLibfunc::FunctionCall(_) = lib_func {
let sinks: HashSet<WrapperVariable> = invoc
let sinks: FxHashSet<WrapperVariable> = invoc
.args
.iter()
.map(|v| WrapperVariable::new(function.name(), v.clone()))
.map(|v| WrapperVariable::new(function.name(), v.id))
.collect();

return taint.taints_any_sinks(bad_array, &sinks);
Expand Down Expand Up @@ -271,7 +268,7 @@ impl ArrayUseAfterPopFront {
.map(|i| {
WrapperVariable::new(
maybe_caller.name(),
invoc.branches[0].results[*i].clone(),
invoc.branches[0].results[*i].id,
)
})
.collect();
Expand Down Expand Up @@ -325,16 +322,16 @@ impl ArrayUseAfterPopFront {
return false;
}

// Not sure if it is required because taint analysis adds all the arugments as
// Not sure if it is required because taint analysis adds all the arguments as
// tainters of the all the return values.
let returned_bad_arrays: Vec<WrapperVariable> = function
.get_statements()
.iter()
.flat_map(|s| {
if let GenStatement::Return(return_vars) = s {
let sinks: HashSet<WrapperVariable> = return_vars
let sinks: FxHashSet<WrapperVariable> = return_vars
.iter()
.map(|v| WrapperVariable::new(function.name(), v.clone()))
.map(|v| WrapperVariable::new(function.name(), v.id))
.collect();

return taint.taints_any_sinks_variable(bad_array, &sinks);
Expand Down
Loading