Skip to content

Commit

Permalink
search: add -b/--byte-offset flag
Browse files Browse the repository at this point in the history
This commit adds support for printing 0-based byte offset before each
line. We handle corner cases such as `-o/--only-matching` and
`-C/--context` as well.

Closes #812
  • Loading branch information
balajisivaraman authored and BurntSushi committed Mar 10, 2018
1 parent 91d0756 commit b006943
Show file tree
Hide file tree
Showing 8 changed files with 176 additions and 17 deletions.
1 change: 1 addition & 0 deletions complete/_rg
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ _rg() {
'*--colors=[specify color settings and styles]: :->colorspec'
'--column[show column numbers]'
'(-A -B -C --after-context --before-context --context)'{-C+,--context=}'[specify lines to show before and after each match]:number of lines'
'(-b --byte-offset)'{-b,--byte-offset}'[print the 0-based byte offset for each matching line]'
'--context-separator=[specify string used to separate non-continuous context lines in output]:separator'
'(-c --count --passthrough --passthru)'{-c,--count}'[only show count of matches for each file]'
'--debug[show debug messages]'
Expand Down
13 changes: 13 additions & 0 deletions src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
// Flags can be defined in any order, but we do it alphabetically.
flag_after_context(&mut args);
flag_before_context(&mut args);
flag_byte_offset(&mut args);
flag_case_sensitive(&mut args);
flag_color(&mut args);
flag_colors(&mut args);
Expand Down Expand Up @@ -634,6 +635,18 @@ This overrides the --context flag.
args.push(arg);
}

fn flag_byte_offset(args: &mut Vec<RGArg>) {
const SHORT: &str = "Print the 0-based byte offset for each matching line.";
const LONG: &str = long!("\
Print the 0-based byte offset within the input file
before each line of output. If -o (--only-matching) is
specified, print the offset of the matching part itself.
");
let arg = RGArg::switch("byte-offset").short("b")
.help(SHORT).long_help(LONG);
args.push(arg);
}

fn flag_case_sensitive(args: &mut Vec<RGArg>) {
const SHORT: &str = "Search case sensitively (default).";
const LONG: &str = long!("\
Expand Down
3 changes: 3 additions & 0 deletions src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ pub struct Args {
paths: Vec<PathBuf>,
after_context: usize,
before_context: usize,
byte_offset: bool,
color_choice: termcolor::ColorChoice,
colors: ColorSpecs,
column: bool,
Expand Down Expand Up @@ -259,6 +260,7 @@ impl Args {
WorkerBuilder::new(self.grep())
.after_context(self.after_context)
.before_context(self.before_context)
.byte_offset(self.byte_offset)
.count(self.count)
.encoding(self.encoding)
.files_with_matches(self.files_with_matches)
Expand Down Expand Up @@ -361,6 +363,7 @@ impl<'a> ArgMatches<'a> {
paths: paths,
after_context: after_context,
before_context: before_context,
byte_offset: self.is_present("byte-offset"),
color_choice: self.color_choice(),
colors: self.color_specs()?,
column: self.column(),
Expand Down
26 changes: 23 additions & 3 deletions src/printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,19 +280,21 @@ impl<W: WriteColor> Printer<W> {
start: usize,
end: usize,
line_number: Option<u64>,
byte_offset: Option<u64>
) {
if !self.line_per_match && !self.only_matching {
let mat = re
.find(&buf[start..end])
.map(|m| (m.start(), m.end()))
.unwrap_or((0, 0));
return self.write_match(
re, path, buf, start, end, line_number, mat.0, mat.1);
re, path, buf, start, end, line_number,
byte_offset, mat.0, mat.1);
}
for m in re.find_iter(&buf[start..end]) {
self.write_match(
re, path.as_ref(), buf, start, end,
line_number, m.start(), m.end());
re, path.as_ref(), buf, start, end, line_number,
byte_offset, m.start(), m.end());
}
}

Expand All @@ -304,6 +306,7 @@ impl<W: WriteColor> Printer<W> {
start: usize,
end: usize,
line_number: Option<u64>,
byte_offset: Option<u64>,
match_start: usize,
match_end: usize,
) {
Expand All @@ -321,6 +324,14 @@ impl<W: WriteColor> Printer<W> {
if self.column {
self.column_number(match_start as u64 + 1, b':');
}
if let Some(byte_offset) = byte_offset {
if self.only_matching {
self.write_byte_offset(
byte_offset + ((start + match_start) as u64), b':');
} else {
self.write_byte_offset(byte_offset + (start as u64), b':');
}
}
if self.replace.is_some() {
let mut count = 0;
let mut offsets = Vec::new();
Expand Down Expand Up @@ -395,6 +406,7 @@ impl<W: WriteColor> Printer<W> {
start: usize,
end: usize,
line_number: Option<u64>,
byte_offset: Option<u64>,
) {
if self.heading && self.with_filename && !self.has_printed {
self.write_file_sep();
Expand All @@ -407,6 +419,9 @@ impl<W: WriteColor> Printer<W> {
if let Some(line_number) = line_number {
self.line_number(line_number, b'-');
}
if let Some(byte_offset) = byte_offset {
self.write_byte_offset(byte_offset + (start as u64), b'-');
}
if self.max_columns.map_or(false, |m| end - start > m) {
self.write(b"[Omitted long context line]");
self.write_eol();
Expand Down Expand Up @@ -481,6 +496,11 @@ impl<W: WriteColor> Printer<W> {
self.separator(&[sep]);
}

fn write_byte_offset(&mut self, o: u64, sep: u8) {
self.write_colored(o.to_string().as_bytes(), |colors| colors.column());
self.separator(&[sep]);
}

fn write(&mut self, buf: &[u8]) {
self.has_printed = true;
let _ = self.wtr.write_all(buf);
Expand Down
40 changes: 39 additions & 1 deletion src/search_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ pub struct BufferSearcher<'a, W: 'a> {
buf: &'a [u8],
match_count: u64,
line_count: Option<u64>,
byte_offset: Option<u64>,
last_line: usize,
}

Expand All @@ -41,10 +42,21 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> {
buf: buf,
match_count: 0,
line_count: None,
byte_offset: None,
last_line: 0,
}
}

/// If enabled, searching will print a 0-based offset of the
/// matching line (or the actual match if -o is specified) before
/// printing the line itself.
///
/// Disabled by default.
pub fn byte_offset(mut self, yes: bool) -> Self {
self.opts.byte_offset = yes;
self
}

/// If enabled, searching will print a count instead of each match.
///
/// Disabled by default.
Expand Down Expand Up @@ -120,6 +132,9 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> {

self.match_count = 0;
self.line_count = if self.opts.line_number { Some(0) } else { None };
// The memory map searcher uses one contiguous block of bytes, so the
// offsets given the printer are sufficient to compute the byte offset.
self.byte_offset = if self.opts.byte_offset { Some(0) } else { None };
let mut last_end = 0;
for m in self.grep.iter(self.buf) {
if self.opts.invert_match {
Expand Down Expand Up @@ -158,7 +173,7 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> {
self.add_line(end);
self.printer.matched(
self.grep.regex(), self.path, self.buf,
start, end, self.line_count);
start, end, self.line_count, self.byte_offset);
}

#[inline(always)]
Expand Down Expand Up @@ -271,6 +286,29 @@ and exhibited clearly, with a label attached.\
");
}

#[test]
fn byte_offset() {
let (_, out) = search(
"Sherlock", SHERLOCK, |s| s.byte_offset(true));
assert_eq!(out, "\
/baz.rs:0:For the Doctor Watsons of this world, as opposed to the Sherlock
/baz.rs:129:be, to a very large extent, the result of luck. Sherlock Holmes
");
}

#[test]
fn byte_offset_inverted() {
let (_, out) = search("Sherlock", SHERLOCK, |s| {
s.invert_match(true).byte_offset(true)
});
assert_eq!(out, "\
/baz.rs:65:Holmeses, success in the province of detective work must always
/baz.rs:193:can extract a clew from a wisp of straw or a flake of cigar ash;
/baz.rs:258:but Doctor Watson has to have it taken out for him and dusted,
/baz.rs:321:and exhibited clearly, with a label attached.
");
}

#[test]
fn count() {
let (count, out) = search(
Expand Down
86 changes: 73 additions & 13 deletions src/search_stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ pub struct Searcher<'a, R, W: 'a> {
haystack: R,
match_count: u64,
line_count: Option<u64>,
byte_offset: Option<u64>,
last_match: Match,
last_printed: usize,
last_line: usize,
Expand All @@ -80,6 +81,7 @@ pub struct Searcher<'a, R, W: 'a> {
pub struct Options {
pub after_context: usize,
pub before_context: usize,
pub byte_offset: bool,
pub count: bool,
pub files_with_matches: bool,
pub files_without_matches: bool,
Expand All @@ -96,6 +98,7 @@ impl Default for Options {
Options {
after_context: 0,
before_context: 0,
byte_offset: false,
count: false,
files_with_matches: false,
files_without_matches: false,
Expand Down Expand Up @@ -165,6 +168,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
haystack: haystack,
match_count: 0,
line_count: None,
byte_offset: None,
last_match: Match::default(),
last_printed: 0,
last_line: 0,
Expand All @@ -186,6 +190,16 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
self
}

/// If enabled, searching will print a 0-based offset of the
/// matching line (or the actual match if -o is specified) before
/// printing the line itself.
///
/// Disabled by default.
pub fn byte_offset(mut self, yes: bool) -> Self {
self.opts.byte_offset = yes;
self
}

/// If enabled, searching will print a count instead of each match.
///
/// Disabled by default.
Expand Down Expand Up @@ -259,6 +273,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
self.inp.reset();
self.match_count = 0;
self.line_count = if self.opts.line_number { Some(0) } else { None };
self.byte_offset = if self.opts.byte_offset { Some(0) } else { None };
self.last_match = Match::default();
self.after_context_remaining = 0;
while !self.terminate() {
Expand Down Expand Up @@ -327,17 +342,18 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {

#[inline(always)]
fn fill(&mut self) -> Result<bool, Error> {
let keep = if self.opts.before_context > 0 || self.opts.after_context > 0 {
let lines = 1 + cmp::max(
self.opts.before_context, self.opts.after_context);
start_of_previous_lines(
self.opts.eol,
&self.inp.buf,
self.inp.lastnl.saturating_sub(1),
lines)
} else {
self.inp.lastnl
};
let keep =
if self.opts.before_context > 0 || self.opts.after_context > 0 {
let lines = 1 + cmp::max(
self.opts.before_context, self.opts.after_context);
start_of_previous_lines(
self.opts.eol,
&self.inp.buf,
self.inp.lastnl.saturating_sub(1),
lines)
} else {
self.inp.lastnl
};
if keep < self.last_printed {
self.last_printed -= keep;
} else {
Expand All @@ -349,6 +365,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
self.count_lines(keep);
self.last_line = 0;
}
self.count_byte_offset(keep);
let ok = self.inp.fill(&mut self.haystack, keep).map_err(|err| {
Error::from_io(err, &self.path)
})?;
Expand Down Expand Up @@ -419,7 +436,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
self.add_line(end);
self.printer.matched(
self.grep.regex(), self.path,
&self.inp.buf, start, end, self.line_count);
&self.inp.buf, start, end, self.line_count, self.byte_offset);
self.last_printed = end;
self.after_context_remaining = self.opts.after_context;
}
Expand All @@ -429,7 +446,8 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
self.count_lines(start);
self.add_line(end);
self.printer.context(
&self.path, &self.inp.buf, start, end, self.line_count);
&self.path, &self.inp.buf, start, end,
self.line_count, self.byte_offset);
self.last_printed = end;
}

Expand All @@ -447,6 +465,13 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
}
}

#[inline(always)]
fn count_byte_offset(&mut self, buf_last_end: usize) {
if let Some(ref mut byte_offset) = self.byte_offset {
*byte_offset += buf_last_end as u64;
}
}

#[inline(always)]
fn count_lines(&mut self, upto: usize) {
if let Some(ref mut line_count) = self.line_count {
Expand Down Expand Up @@ -1006,6 +1031,41 @@ fn main() {
assert_eq!(out, "/baz.rs:2\n");
}

#[test]
fn byte_offset() {
let (_, out) = search_smallcap(
"Sherlock", SHERLOCK, |s| s.byte_offset(true));
assert_eq!(out, "\
/baz.rs:0:For the Doctor Watsons of this world, as opposed to the Sherlock
/baz.rs:129:be, to a very large extent, the result of luck. Sherlock Holmes
");
}

#[test]
fn byte_offset_with_before_context() {
let (_, out) = search_smallcap("dusted", SHERLOCK, |s| {
s.line_number(true).byte_offset(true).before_context(2)
});
assert_eq!(out, "\
/baz.rs-3-129-be, to a very large extent, the result of luck. Sherlock Holmes
/baz.rs-4-193-can extract a clew from a wisp of straw or a flake of cigar ash;
/baz.rs:5:258:but Doctor Watson has to have it taken out for him and dusted,
");
}

#[test]
fn byte_offset_inverted() {
let (_, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
s.invert_match(true).byte_offset(true)
});
assert_eq!(out, "\
/baz.rs:65:Holmeses, success in the province of detective work must always
/baz.rs:193:can extract a clew from a wisp of straw or a flake of cigar ash;
/baz.rs:258:but Doctor Watson has to have it taken out for him and dusted,
/baz.rs:321:and exhibited clearly, with a label attached.
");
}

#[test]
fn files_with_matches() {
let (count, out) = search_smallcap(
Expand Down
Loading

0 comments on commit b006943

Please sign in to comment.