From 4be9b785135b287ce7605db9772fd082cddbe5c6 Mon Sep 17 00:00:00 2001 From: Hesam Pakdaman Date: Sat, 25 May 2024 22:16:29 +0200 Subject: [PATCH] Fix 337: Look ahead for b' ' if skip_initial_space is enabled --- csv-core/src/reader.rs | 44 ++++++++++++++++++++++++++++++++++++++++++ src/reader.rs | 9 +++++++++ 2 files changed, 53 insertions(+) diff --git a/csv-core/src/reader.rs b/csv-core/src/reader.rs index dbd6dc3..563abd6 100644 --- a/csv-core/src/reader.rs +++ b/csv-core/src/reader.rs @@ -126,6 +126,8 @@ pub struct Reader { has_read: bool, /// The current position in the output buffer when reading a record. output_pos: usize, + /// Skip any space following the delimiter + skip_initial_space: bool, } impl Default for Reader { @@ -145,6 +147,7 @@ impl Default for Reader { line: 1, has_read: false, output_pos: 0, + skip_initial_space: false, } } } @@ -256,6 +259,15 @@ impl ReaderBuilder { self.rdr.use_nfa = yes; self } + + /// Enable or disable initial space. + /// + /// When enabled skip any space following the field delimiter when + /// parsing CSV. + pub fn skip_initial_space(&mut self, yes: bool) -> &mut ReaderBuilder { + self.rdr.skip_initial_space = yes; + self + } } /// The result of parsing at most one field from CSV data. @@ -671,6 +683,12 @@ impl Reader { if has_out { output[nout] = input[nin]; nout += 1; + } else if self.skip_initial_space + && nin < input.len() - 1 + && input[nin] == self.delimiter + && input[nin + 1] == b' ' + { + nin += 1; } nin += 1; if state >= self.dfa.final_field { @@ -731,6 +749,12 @@ impl Reader { if has_out { output[nout] = b; nout += 1; + } else if self.skip_initial_space + && nin < input.len() - 1 + && input[nin] == self.delimiter + && input[nin + 1] == b' ' + { + nin += 1; } nin += 1; if state >= self.dfa.final_field { @@ -893,6 +917,12 @@ impl Reader { nin += 1; } NfaInputAction::Discard => { + if self.skip_initial_space + && nin < input.len() - 1 + && input[nin + 1] == b' ' + { + nin += 1; + } nin += 1; } NfaInputAction::Epsilon => {} @@ -944,6 +974,12 @@ impl Reader { nin += 1; } NfaInputAction::Discard => { + if self.skip_initial_space + && nin < input.len() - 1 + && input[nin + 1] == b' ' + { + nin += 1; + } nin += 1; } NfaInputAction::Epsilon => (), @@ -1725,6 +1761,14 @@ mod tests { b.comment(Some(b'#')); } ); + parses_to!( + issue_337, + "a, \"b\", c", + csv![["a", "b", "c"]], + |b: &mut ReaderBuilder| { + b.skip_initial_space(true); + } + ); macro_rules! assert_read { ( diff --git a/src/reader.rs b/src/reader.rs index 82f5d2c..c6509dd 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -630,6 +630,15 @@ impl ReaderBuilder { self.builder.nfa(yes); self } + + /// Enable or disable initial space. + /// + /// When enabled skip any space following the field delimiter when + /// parsing CSV. + pub fn skip_initial_space(&mut self, yes: bool) -> &mut ReaderBuilder { + self.builder.skip_initial_space(yes); + self + } } /// A already configured CSV reader.