Skip to content

Commit

Permalink
significantly improve performance of :reload
Browse files Browse the repository at this point in the history
  • Loading branch information
pascalkuthe committed Oct 25, 2022
1 parent 801984c commit ca81764
Show file tree
Hide file tree
Showing 4 changed files with 224 additions and 57 deletions.
32 changes: 24 additions & 8 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion helix-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
toml = "0.5"

similar = "2.2"
imara-diff = "0.1.0"

encoding_rs = "0.8"

Expand Down
241 changes: 196 additions & 45 deletions helix-core/src/diff.rs
Original file line number Diff line number Diff line change
@@ -1,58 +1,188 @@
use crate::{Rope, Transaction};
use std::ops::Range;

/// Compares `old` and `new` to generate a [`Transaction`] describing
/// the steps required to get from `old` to `new`.
pub fn compare_ropes(old: &Rope, new: &Rope) -> Transaction {
// `similar` only works on contiguous data, so a `Rope` has
// to be temporarily converted into a `String`.
let old_converted = old.to_string();
let new_converted = new.to_string();

// A timeout is set so after 1 seconds, the algorithm will start
// approximating. This is especially important for big `Rope`s or
// `Rope`s that are extremely dissimilar to each other.
let mut config = similar::TextDiff::configure();
config.timeout(std::time::Duration::from_secs(1));

let diff = config.diff_chars(&old_converted, &new_converted);

// The current position of the change needs to be tracked to
// construct the `Change`s.
let mut pos = 0;
Transaction::change(
old,
diff.ops()
use imara_diff::intern::InternedInput;
use imara_diff::Algorithm;
use ropey::RopeSlice;

use crate::{ChangeSet, Rope, Tendril, Transaction};

/// A `imara_diff::Sink` that builds a `ChangeSet` for a character diff of a hunk
struct CharChangeSetBuilder<'a> {
res: &'a mut ChangeSet,
hunk: &'a InternedInput<char>,
pos: u32,
}

impl imara_diff::Sink for CharChangeSetBuilder<'_> {
type Out = ();
fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
self.res.retain((before.start - self.pos) as usize);
self.res.delete(before.len());
self.pos = before.end;

let res = self.hunk.after[after.start as usize..after.end as usize]
.iter()
.map(|&token| self.hunk.interner[token])
.collect();

self.res.insert(res);
}

fn finish(self) -> Self::Out {
self.res.retain(self.hunk.before.len() - self.pos as usize);
}
}

struct LineChangeSetBuilder<'a> {
res: ChangeSet,
after: RopeSlice<'a>,
file: &'a InternedInput<RopeSlice<'a>>,
current_hunk: InternedInput<char>,
pos: u32,
}

impl imara_diff::Sink for LineChangeSetBuilder<'_> {
type Out = ChangeSet;

fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
let len = self.file.before[self.pos as usize..before.start as usize]
.iter()
.map(|op| op.as_tag_tuple())
.filter_map(|(tag, old_range, new_range)| {
// `old_pos..pos` is equivalent to `start..end` for where
// the change should be applied.
let old_pos = pos;
pos += old_range.end - old_range.start;

match tag {
// Semantically, inserts and replacements are the same thing.
similar::DiffTag::Insert | similar::DiffTag::Replace => {
// This is the text from the `new` rope that should be
// inserted into `old`.
let text: &str = {
let start = new.char_to_byte(new_range.start);
let end = new.char_to_byte(new_range.end);
&new_converted[start..end]
};
Some((old_pos, pos, Some(text.into())))
.map(|&it| self.file.interner[it].len_chars())
.sum();
self.res.retain(len);
self.pos = before.end;

// do not perform diffs on large hunks
let len_before = before.end - before.start;
let len_after = after.end - after.start;

// Pure insertions/removals do not require a character diff.
// Very large changes are ignored because their character diff is expensive to compute
// TODO adjust heuristic to detect large changes?
if len_before == 0
|| len_after == 0
|| len_after > 5 * len_before
|| 5 * len_after < len_before && len_before > 10
|| len_before + len_after > 200
{
let remove = self.file.before[before.start as usize..before.end as usize]
.iter()
.map(|&it| self.file.interner[it].len_chars())
.sum();
self.res.delete(remove);
let mut fragment = Tendril::new();
if len_after > 500 {
// copying a rope line by line is slower then copying the entire
// rope. Use to_string for very large changes instead..
if self.file.after.len() == after.end as usize {
if after.start == 0 {
fragment = self.after.to_string().into();
} else {
let start = self.after.line_to_char(after.start as usize);
fragment = self.after.slice(start..).to_string().into();
}
similar::DiffTag::Delete => Some((old_pos, pos, None)),
similar::DiffTag::Equal => None,
} else if after.start == 0 {
let end = self.after.line_to_char(after.end as usize);
fragment = self.after.slice(..end).to_string().into();
} else {
let start = self.after.line_to_char(after.start as usize);
let end = self.after.line_to_char(after.end as usize);
fragment = self.after.slice(start..end).to_string().into();
}
}),
)
} else {
for &line in &self.file.after[after.start as usize..after.end as usize] {
for chunk in self.file.interner[line].chunks() {
fragment.push_str(chunk)
}
}
};
self.res.insert(fragment);
} else {
// for reasonably small hunks, generating a ChangeSet from char diff can save memory
// TODO use a tokenizer (word diff?) for improved performance
let hunk_before = self.file.before[before.start as usize..before.end as usize]
.iter()
.flat_map(|&it| self.file.interner[it].chars());
let hunk_after = self.file.after[after.start as usize..after.end as usize]
.iter()
.flat_map(|&it| self.file.interner[it].chars());
self.current_hunk.update_before(hunk_before);
self.current_hunk.update_after(hunk_after);

// the histogram heuristic does not work as well
// for characters because the same characters often reoccur
// use myer diff instead
imara_diff::diff(
Algorithm::Myers,
&self.current_hunk,
CharChangeSetBuilder {
res: &mut self.res,
hunk: &self.current_hunk,
pos: 0,
},
);

self.current_hunk.clear();
}
}

fn finish(mut self) -> Self::Out {
let len = self.file.before[self.pos as usize..]
.iter()
.map(|&it| self.file.interner[it].len_chars())
.sum();

self.res.retain(len);
self.res
}
}

struct RopeLines<'a>(RopeSlice<'a>);

impl<'a> imara_diff::intern::TokenSource for RopeLines<'a> {
type Token = RopeSlice<'a>;
// TODO: improve performance of lines iterator (https://github.com/cessen/ropey/issues/25)
type Tokenizer = ropey::iter::Lines<'a>;

fn tokenize(&self) -> Self::Tokenizer {
self.0.lines()
}

fn estimate_tokens(&self) -> u32 {
// we can provide a perfect estimate which is very nice for performance
self.0.len_lines() as u32
}
}

/// Compares `old` and `new` to generate a [`Transaction`] describing
/// the steps required to get from `old` to `new`.
pub fn compare_ropes(before: &Rope, after: &Rope) -> Transaction {
log::error!("diff");
let res = ChangeSet::with_capacity(32);
let after = after.slice(..);
let file = InternedInput::new(RopeLines(before.slice(..)), RopeLines(after));
let builder = LineChangeSetBuilder {
res,
file: &file,
after,
pos: 0,
current_hunk: InternedInput::default(),
};

imara_diff::diff(Algorithm::Histogram, &file, builder).into()
}

#[cfg(test)]
mod tests {
use super::*;

fn test_identity(a: &str, b: &str) {
let mut old = Rope::from(a);
let new = Rope::from(b);
compare_ropes(&old, &new).apply(&mut old);
assert_eq!(old, new)
}

quickcheck::quickcheck! {
fn test_compare_ropes(a: String, b: String) -> bool {
let mut old = Rope::from(a);
Expand All @@ -61,4 +191,25 @@ mod tests {
old == new
}
}

#[test]
fn equal_files() {
test_identity("foo", "foo")
}

#[test]
fn trailing_newline() {
test_identity("foo\n", "foo");
test_identity("foo", "foo\n")
}

#[test]
fn new_file() {
test_identity("", "foo")
}

#[test]
fn deleted_file() {
test_identity("foo", "")
}
}
6 changes: 3 additions & 3 deletions helix-core/src/transaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ impl ChangeSet {
}

// Changeset builder operations: delete/insert/retain
fn delete(&mut self, n: usize) {
pub(crate) fn delete(&mut self, n: usize) {
use Operation::*;
if n == 0 {
return;
Expand All @@ -71,7 +71,7 @@ impl ChangeSet {
}
}

fn insert(&mut self, fragment: Tendril) {
pub(crate) fn insert(&mut self, fragment: Tendril) {
use Operation::*;

if fragment.is_empty() {
Expand All @@ -93,7 +93,7 @@ impl ChangeSet {
self.changes.push(new_last);
}

fn retain(&mut self, n: usize) {
pub(crate) fn retain(&mut self, n: usize) {
use Operation::*;
if n == 0 {
return;
Expand Down

0 comments on commit ca81764

Please sign in to comment.