Skip to content

Commit

Permalink
feat: Custom dictionary support
Browse files Browse the repository at this point in the history
Switching `valid-*` to just `*` where you map typo to correction, with
support for always-valid and never-valid.

Fixes #9
  • Loading branch information
Ed Page committed Oct 28, 2020
1 parent 79d10d6 commit 527b983
Show file tree
Hide file tree
Showing 11 changed files with 352 additions and 225 deletions.
2 changes: 1 addition & 1 deletion azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ schedules:
include:
- master
variables:
minrust: 1.40.0
minrust: 1.42.0
codecov_token: $(CODECOV_TOKEN_SECRET)
windows_vm: vs2017-win2016
mac_vm: macos-10.14
Expand Down
6 changes: 4 additions & 2 deletions benches/corrections.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ fn correct_word_hit(b: &mut test::Bencher) {
let input = typos::tokens::Word::new("successs", 0).unwrap();
assert_eq!(
corrections.correct_word(input),
vec![std::borrow::Cow::Borrowed("successes")]
Some(typos::Status::Corrections(vec![
std::borrow::Cow::Borrowed("successes")
]))
);
b.iter(|| corrections.correct_word(input));
}
Expand All @@ -22,6 +24,6 @@ fn correct_word_hit(b: &mut test::Bencher) {
fn correct_word_miss(b: &mut test::Bencher) {
let corrections = typos_cli::dict::BuiltIn::new(Default::default());
let input = typos::tokens::Word::new("success", 0).unwrap();
assert!(corrections.correct_word(input).is_empty());
assert!(corrections.correct_word(input).is_none());
b.iter(|| corrections.correct_word(input));
}
103 changes: 59 additions & 44 deletions crates/typos/src/checks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use bstr::ByteSlice;
use crate::report;
use crate::tokens;
use crate::Dictionary;
use crate::Status;

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TyposSettings {
Expand Down Expand Up @@ -233,16 +234,20 @@ impl Checks {
dictionary: &dyn Dictionary,
reporter: &dyn report::Report,
) -> Result<bool, crate::Error> {
let mut typos_found = false;

if !self.check_filenames {
return Ok(typos_found);
return Ok(false);
}

if let Some(part) = path.file_name().and_then(|s| s.to_str()) {
for ident in parser.parse(part) {
let corrections = dictionary.correct_ident(ident);
if !corrections.is_empty() {
let mut typos_found = false;
for ident in path
.file_name()
.and_then(|s| s.to_str())
.iter()
.flat_map(|part| parser.parse(part))
{
match dictionary.correct_ident(ident) {
Some(Status::Valid) => {}
Some(corrections) => {
let byte_offset = ident.offset();
let msg = report::PathCorrection {
path,
Expand All @@ -251,18 +256,22 @@ impl Checks {
corrections,
};
typos_found |= reporter.report(msg.into());
} else {
}
None => {
for word in ident.split() {
let corrections = dictionary.correct_word(word);
if !corrections.is_empty() {
let byte_offset = word.offset();
let msg = report::PathCorrection {
path,
byte_offset,
typo: word.token(),
corrections,
};
typos_found |= reporter.report(msg.into());
match dictionary.correct_word(word) {
Some(Status::Valid) => {}
Some(corrections) => {
let byte_offset = word.offset();
let msg = report::PathCorrection {
path,
byte_offset,
typo: word.token(),
corrections,
};
typos_found |= reporter.report(msg.into());
}
None => {}
}
}
}
Expand Down Expand Up @@ -305,32 +314,38 @@ impl Checks {
for (line_idx, line) in buffer.lines().enumerate() {
let line_num = line_idx + 1;
for ident in parser.parse_bytes(line) {
let corrections = dictionary.correct_ident(ident);
if !corrections.is_empty() {
let byte_offset = ident.offset();
let msg = report::Correction {
path,
line,
line_num,
byte_offset,
typo: ident.token(),
corrections,
};
typos_found |= reporter.report(msg.into());
} else {
for word in ident.split() {
let corrections = dictionary.correct_word(word);
if !corrections.is_empty() {
let byte_offset = word.offset();
let msg = report::Correction {
path,
line,
line_num,
byte_offset,
typo: word.token(),
corrections,
};
typos_found |= reporter.report(msg.into());
match dictionary.correct_ident(ident) {
Some(Status::Valid) => {}
Some(corrections) => {
let byte_offset = ident.offset();
let msg = report::Correction {
path,
line,
line_num,
byte_offset,
typo: ident.token(),
corrections,
};
typos_found |= reporter.report(msg.into());
}
None => {
for word in ident.split() {
match dictionary.correct_word(word) {
Some(Status::Valid) => {}
Some(corrections) => {
let byte_offset = word.offset();
let msg = report::Correction {
path,
line,
line_num,
byte_offset,
typo: word.token(),
corrections,
};
typos_found |= reporter.report(msg.into());
}
None => {}
}
}
}
}
Expand Down
46 changes: 44 additions & 2 deletions crates/typos/src/dict.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,49 @@
use std::borrow::Cow;

#[derive(Clone, PartialEq, Eq, Debug, serde::Serialize, derive_more::From)]
#[serde(rename_all = "snake_case")]
#[serde(untagged)]
pub enum Status<'c> {
Valid,
Invalid,
Corrections(Vec<Cow<'c, str>>),
}

impl<'c> Status<'c> {
pub fn is_invalid(&self) -> bool {
matches!(self, Status::Invalid)
}
pub fn is_valid(&self) -> bool {
matches!(self, Status::Valid)
}
pub fn is_correction(&self) -> bool {
matches!(self, Status::Corrections(_))
}

pub fn corrections_mut(&mut self) -> impl Iterator<Item = &mut Cow<'c, str>> {
match self {
Status::Corrections(corrections) => itertools::Either::Left(corrections.iter_mut()),
_ => itertools::Either::Right([].iter_mut()),
}
}

pub fn borrow(&self) -> Status<'_> {
match self {
Status::Corrections(corrections) => {
let corrections = corrections
.iter()
.map(|c| Cow::Borrowed(c.as_ref()))
.collect();
Status::Corrections(corrections)
}
_ => self.clone(),
}
}
}

pub trait Dictionary: Send + Sync {
fn correct_ident<'s, 'w>(&'s self, _ident: crate::tokens::Identifier<'w>) -> Vec<Cow<'s, str>>;
fn correct_ident<'s, 'w>(&'s self, _ident: crate::tokens::Identifier<'w>)
-> Option<Status<'s>>;

fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Vec<Cow<'s, str>>;
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Status<'s>>;
}
116 changes: 75 additions & 41 deletions crates/typos/src/report.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#![allow(clippy::needless_update)]

use std::borrow::Cow;
use std::io::{self, Write};

#[derive(Clone, Debug, serde::Serialize, derive_more::From)]
Expand All @@ -21,8 +20,8 @@ impl<'m> Message<'m> {
pub fn is_correction(&self) -> bool {
match self {
Message::BinaryFile(_) => false,
Message::Correction(_) => true,
Message::PathCorrection(_) => true,
Message::Correction(c) => c.corrections.is_correction(),
Message::PathCorrection(c) => c.corrections.is_correction(),
Message::File(_) => false,
Message::Parse(_) => false,
Message::PathError(_) => false,
Expand Down Expand Up @@ -59,7 +58,7 @@ pub struct Correction<'m> {
pub line_num: usize,
pub byte_offset: usize,
pub typo: &'m str,
pub corrections: Vec<Cow<'m, str>>,
pub corrections: crate::Status<'m>,
}

impl<'m> Default for Correction<'m> {
Expand All @@ -70,7 +69,7 @@ impl<'m> Default for Correction<'m> {
line_num: 0,
byte_offset: 0,
typo: "",
corrections: Vec::new(),
corrections: crate::Status::Invalid,
}
}
}
Expand All @@ -81,7 +80,7 @@ pub struct PathCorrection<'m> {
pub path: &'m std::path::Path,
pub byte_offset: usize,
pub typo: &'m str,
pub corrections: Vec<Cow<'m, str>>,
pub corrections: crate::Status<'m>,
}

impl<'m> Default for PathCorrection<'m> {
Expand All @@ -90,7 +89,7 @@ impl<'m> Default for PathCorrection<'m> {
path: std::path::Path::new("-"),
byte_offset: 0,
typo: "",
corrections: Vec::new(),
corrections: crate::Status::Invalid,
}
}
}
Expand Down Expand Up @@ -196,24 +195,42 @@ impl Report for PrintBrief {
Message::BinaryFile(msg) => {
log::info!("{}", msg);
}
Message::Correction(msg) => {
println!(
"{}:{}:{}: {} -> {}",
msg.path.display(),
msg.line_num,
msg.byte_offset,
msg.typo,
itertools::join(msg.corrections.iter(), ", ")
);
}
Message::PathCorrection(msg) => {
println!(
"{}: {} -> {}",
msg.path.display(),
msg.typo,
itertools::join(msg.corrections.iter(), ", ")
);
}
Message::Correction(msg) => match &msg.corrections {
crate::Status::Valid => {}
crate::Status::Invalid => {
println!(
"{}:{}:{}: {} is disallowed",
msg.path.display(),
msg.line_num,
msg.byte_offset,
msg.typo,
);
}
crate::Status::Corrections(corrections) => {
println!(
"{}:{}:{}: {} -> {}",
msg.path.display(),
msg.line_num,
msg.byte_offset,
msg.typo,
itertools::join(corrections.iter(), ", ")
);
}
},
Message::PathCorrection(msg) => match &msg.corrections {
crate::Status::Valid => {}
crate::Status::Invalid => {
println!("{}: {} is disallowed", msg.path.display(), msg.typo,);
}
crate::Status::Corrections(corrections) => {
println!(
"{}: {} -> {}",
msg.path.display(),
msg.typo,
itertools::join(corrections.iter(), ", ")
);
}
},
Message::File(msg) => {
println!("{}", msg.path.display());
}
Expand Down Expand Up @@ -241,14 +258,24 @@ impl Report for PrintLong {
log::info!("{}", msg);
}
Message::Correction(msg) => print_long_correction(msg),
Message::PathCorrection(msg) => {
println!(
"{}: error: `{}` should be {}",
msg.path.display(),
msg.typo,
itertools::join(msg.corrections.iter().map(|c| format!("`{}`", c)), ", ")
);
}
Message::PathCorrection(msg) => match &msg.corrections {
crate::Status::Valid => {}
crate::Status::Invalid => {
println!(
"{}: error: `{}` is disallowed",
msg.path.display(),
msg.typo,
);
}
crate::Status::Corrections(corrections) => {
println!(
"{}: error: `{}` should be {}",
msg.path.display(),
msg.typo,
itertools::join(corrections.iter().map(|c| format!("`{}`", c)), ", ")
);
}
},
Message::File(msg) => {
println!("{}", msg.path.display());
}
Expand Down Expand Up @@ -278,14 +305,21 @@ fn print_long_correction(msg: &Correction) {

let stdout = io::stdout();
let mut handle = stdout.lock();

writeln!(
handle,
"error: `{}` should be {}",
msg.typo,
itertools::join(msg.corrections.iter().map(|c| format!("`{}`", c)), ", ")
)
.unwrap();
match &msg.corrections {
crate::Status::Valid => {}
crate::Status::Invalid => {
writeln!(handle, "error: `{}` is disallowed", msg.typo,).unwrap();
}
crate::Status::Corrections(corrections) => {
writeln!(
handle,
"error: `{}` should be {}",
msg.typo,
itertools::join(corrections.iter().map(|c| format!("`{}`", c)), ", ")
)
.unwrap();
}
}
writeln!(
handle,
" --> {}:{}:{}",
Expand Down
Loading

0 comments on commit 527b983

Please sign in to comment.