Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add suggestions when misspelling a record field #1710

Merged
merged 12 commits into from
Nov 13, 2023
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions cli/tests/snapshot/inputs/errors/record_access_suggestion.ncl
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# capture = 'stderr'
# command = ['eval']
({
some_field_name = "some value",
another_field_name = "another value",
}).some_fild_nam
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
source: cli/tests/snapshot/main.rs
expression: err
---
error: missing field `some_fild_nam`
┌─ [INPUTS_PATH]/errors/record_access_suggestion.ncl:3:1
3 │ ╭ ╭ ({
4 │ │ │ some_field_name = "some value",
5 │ │ │ another_field_name = "another value",
6 │ │ │ }).some_fild_nam
│ ╰─│────────────────^ this requires the field `some_fild_nam` to exist
│ ╰──' this record lacks the field `some_fild_nam`
= Did you mean `some_field_name`?


1 change: 1 addition & 0 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ topiary-queries = { workspace = true, optional = true }
tree-sitter-nickel = { workspace = true, optional = true }

metrics = { workspace = true, optional = true }
strsim = "0.10.0"

[dev-dependencies]
pretty_assertions.workspace = true
Expand Down
47 changes: 34 additions & 13 deletions core/src/error.rs → core/src/error/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ use crate::{
typ::{Type, TypeF, VarKindDiscriminant},
};

pub mod suggest;

/// A general error occurring during either parsing or evaluation.
#[derive(Debug, Clone, PartialEq)]
pub enum Error {
Expand Down Expand Up @@ -96,12 +98,18 @@ pub enum EvalError {
),
/// A field access, or another record operation requiring the existence of a specific field,
/// has been performed on a record missing that field.
FieldMissing(
/* field identifier */ String,
/* operator */ String,
RichTerm,
TermPos,
),
FieldMissing {
// The name of the missing field.
id: LocIdent,
// The actual fields of the record used to suggest similar fields.
field_names: Vec<LocIdent>,
// The primitive operation that required the field to exist.
operator: String,
// The position of the record value which is missing the field.
pos_record: TermPos,
// The position of the primitive operation application.
pos_op: TermPos,
},
/// Too few arguments were provided to a builtin function.
NotEnoughArgs(
/* required arg count */ usize,
Expand Down Expand Up @@ -1054,31 +1062,44 @@ impl IntoDiagnostics<FileId> for EvalError {
secondary_alt(pos_opt, format!("({}) ({})", t, arg), files)
.with_message("applied here"),
])],
EvalError::FieldMissing(field, op, t, span_opt) => {
EvalError::FieldMissing {
id: name,
field_names,
operator,
pos_record,
pos_op,
} => {
let mut labels = Vec::new();
let mut notes = Vec::new();
let field = escape(&field);
let suggestion = suggest::find_best_match(&field_names, &name);
let field = escape(name.as_ref());

if let Some(span) = span_opt.into_opt() {
if let Some(span) = pos_op.into_opt() {
labels.push(
Label::primary(span.src_id, span.start.to_usize()..span.end.to_usize())
.with_message(format!("this requires the field `{field}` to exist")),
);
} else {
notes.push(format!(
"The field `{field}` was required by the operator {op}"
"The field `{field}` was required by the operator {operator}"
));
}

if let Some(span) = t.pos.as_opt_ref() {
if let Some(span) = pos_record.as_opt_ref() {
labels.push(
secondary(span).with_message(format!("field `{field}` is missing here")),
secondary(span)
.with_message(format!("this record lacks the field `{field}`")),
);
}

if let Some(suggestion) = suggestion {
notes.push(format!("Did you mean `{}`?", suggestion));
}

vec![Diagnostic::error()
.with_message(format!("missing field `{field}`"))
.with_labels(labels)]
.with_labels(labels)
.with_notes(notes)]
}
EvalError::NotEnoughArgs(count, op, span_opt) => {
let mut labels = Vec::new();
Expand Down
49 changes: 49 additions & 0 deletions core/src/error/suggest.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
//! Suggest existing symbols that are similar to what the user typed in case of various "symbol not
//! found" errors, such as [super::EvalError::UnboundIdentifier] or
//! [super::EvalError::FieldMissing].
//!
//! The current implementation uses the normalized Damereau-Levenshtein edit distance to find the
//! closest match.

use strsim::normalized_damerau_levenshtein;

/// The minimum similarity between the user's input and an existing symbol for the symbol to be
/// considered a serious candidate. The current threshold is rather low, because short words with
/// edit distance 1 such as `bar` and `bare` might have a similarity which isn't that high.
pub const MIN_SIMILARITY: f64 = 0.60;
yannham marked this conversation as resolved.
Show resolved Hide resolved

/// Find the closest match to the user input in the given list of symbols, to be used as a
/// suggestion. If there is an exact match (modulo casing), it is returned immediately. Otherwise,
/// the symbol with the highest normalized Damerau-Levenshtein similarity is returned, as long as
/// the similarity is above the fixed threshold [MIN_SIMILARITY].
pub fn find_best_match<'syms, 'input, S, I>(
yannham marked this conversation as resolved.
Show resolved Hide resolved
symbols: &'syms [S],
user_input: &'input I,
) -> Option<&'syms str>
where
S: AsRef<str>,
I: AsRef<str>,
{
let mut max = std::f64::MIN;
let mut arg_max: Option<&'syms str> = None;

if symbols.is_empty() {
return None;
}

for sym in symbols {
// If there is an exact match (modulo case), return it immediately.
if sym.as_ref().to_lowercase() == user_input.as_ref().to_lowercase() {
return Some(sym.as_ref());
}

let similarity = normalized_damerau_levenshtein(sym.as_ref(), user_input.as_ref());
yannham marked this conversation as resolved.
Show resolved Hide resolved

if similarity > max && similarity >= MIN_SIMILARITY {
arg_max = Some(sym.as_ref());
max = similarity;
}
}

arg_max
}
13 changes: 7 additions & 6 deletions core/src/eval/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -299,12 +299,13 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
pending_contracts: Default::default(),
};
} else {
return Err(EvalError::FieldMissing(
id.to_string(),
String::from("query"),
RichTerm::new(Term::Record(record_data.clone()), prev_pos),
id.pos,
));
return Err(EvalError::FieldMissing {
id,
field_names: record_data.field_names(RecordOpKind::ConsiderAllFields),
operator: String::from("query"),
pos_record: prev_pos,
pos_op: id.pos,
});
}
}
Some(_) => {
Expand Down
61 changes: 30 additions & 31 deletions core/src/eval/operation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -451,12 +451,13 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
call_stack: std::mem::take(&mut self.call_stack),
})
}
_ => Err(EvalError::FieldMissing(
id.into_label(),
String::from("(.)"),
RichTerm { term: t, pos },
_ => Err(EvalError::FieldMissing {
id,
field_names: record.field_names(RecordOpKind::IgnoreEmptyOpt),
operator: String::from("(.)"),
pos_record: pos,
pos_op,
)),
}),
}, //TODO include the position of operators on the stack
}
} else {
Expand All @@ -471,19 +472,14 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
}
UnaryOp::FieldsOf() => match_sharedterm!(match (t) {
Term::Record(record) => {
let mut fields: Vec<String> = record
.fields
let fields_as_terms: Array = record
.field_names(RecordOpKind::IgnoreEmptyOpt)
.into_iter()
// Ignore optional fields without definitions.
.filter_map(|(id, field)| {
(!field.is_empty_optional()).then(|| id.to_string())
})
.map(mk_term::string)
.collect();
fields.sort();
let terms = fields.into_iter().map(mk_term::string).collect();

Ok(Closure::atomic_closure(RichTerm::new(
Term::Array(terms, ArrayAttrs::new().closurized()),
Term::Array(fields_as_terms, ArrayAttrs::new().closurized()),
pos_op_inh,
)))
}
Expand Down Expand Up @@ -1571,15 +1567,14 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
call_stack: std::mem::take(&mut self.call_stack),
})
}
_ => Err(EvalError::FieldMissing(
id.into_inner(),
String::from("(.$)"),
RichTerm {
term: t2,
pos: pos2,
},
_ => Err(EvalError::FieldMissing {
id: ident,
field_names: record
.field_names(RecordOpKind::IgnoreEmptyOpt),
operator: String::from("(.$)"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not related to this PR, but where is that .$ coming from?

Copy link
Member Author

@yannham yannham Nov 10, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, good catch. In fact the operator wasn't shown before this PR (basically the notes were created but never used - it's unrelated to this PR, it's just that I needed to add the suggestion to the notes, so I realized, and it was in fact surprising that the rust compiler haven't yelled at me for unused value until now). So we might have let old names slipped through.

I believe this was the original syntax for dynamic access field, like foo.$x was the current foo."%{x}", because we used to have $ as the interpolation character and we didn't have quoted fields yet, such as foo."bar^baz". When we added the later syntax, it felt better to avoid keeping the strange .$ which could just be subsumed by quoted field access + interpolation.

pos_record: pos2,
pos_op,
)),
}),
},
}
} else {
Expand Down Expand Up @@ -1690,15 +1685,19 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
call_stack: std::mem::take(&mut self.call_stack),
})
}
_ => Err(EvalError::FieldMissing(
id.into_inner(),
String::from("record_remove"),
RichTerm::new(
Term::Record(RecordData { fields, ..record }),
pos2,
),
pos_op,
)),
_ => {
// We reconstruct the record's data to have access to
// `data.field_names()`
let record = RecordData { fields, ..record };

Err(EvalError::FieldMissing {
id: id.into(),
field_names: record.field_names(op_kind),
operator: String::from("record_remove"),
pos_record: pos2,
pos_op,
})
}
}
} else {
Ok(Closure {
Expand Down
20 changes: 20 additions & 0 deletions core/src/term/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,26 @@ impl RecordData {
_ => Ok(None),
}
}

/// Return a vector of all the fields' names of this record sorted alphabetically.
///
/// # Parameters
///
/// - `op_kind` controls if we should ignore or include empty optional fields
pub fn field_names(&self, op_kind: RecordOpKind) -> Vec<LocIdent> {
let mut fields: Vec<LocIdent> = self
.fields
.iter()
// Ignore optional fields without definitions.
.filter(|(_, field)| {
matches!(op_kind, RecordOpKind::ConsiderAllFields) || !field.is_empty_optional()
})
.map(|(id, _)| *id)
.collect();

fields.sort_by(|id1, id2| id1.label().cmp(id2.label()));
fields
}
}

/// The sealed tail of a Nickel record under a polymorphic contract.
Expand Down
13 changes: 13 additions & 0 deletions core/src/term/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use serde::{Deserialize, Serialize};
use unicode_segmentation::UnicodeSegmentation;

use super::{array::Array, CompiledRegex, Number, Term};
use crate::identifier::{Ident, LocIdent};

/// A Nickel string is really just a Rust `String`, overlayed with some
/// methods implementing custom logic (in particular, functions which
Expand All @@ -29,6 +30,18 @@ impl From<&NickelString> for String {
}
}

impl From<NickelString> for Ident {
fn from(s: NickelString) -> Self {
Ident::from(s.0)
}
}

impl From<NickelString> for LocIdent {
fn from(s: NickelString) -> Self {
LocIdent::from(s.0)
}
}

// The below impls broadly allow `NclString`s to be treated just like
// Rust `String`s.

Expand Down
7 changes: 4 additions & 3 deletions core/tests/integration/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,9 +253,10 @@ impl PartialEq<Error> for ErrorExpectation {
_ => false,
}
}
(EvalFieldMissing { field }, Error::EvalError(EvalError::FieldMissing(ident, ..))) => {
field == ident
}
(
EvalFieldMissing { field },
Error::EvalError(EvalError::FieldMissing { id: name, .. }),
) => field == name.label(),
(
EvalMissingFieldDef { field },
Error::EvalError(EvalError::MissingFieldDef { id, .. }),
Expand Down