Skip to content

Commit

Permalink
Improve loop-through startup speed by 80%-90%
Browse files Browse the repository at this point in the history
HighlightingAssets::get_syntax_set() is never called when e.g. piping the bat
output to a file (see Config::loop_through), so by loading the SyntaxSet only
when needed, we radically improve startup time when it is not needed.
  • Loading branch information
Enselic committed Jul 22, 2021
1 parent 8ebadc3 commit 6797854
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 9 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@

## Other

- Improve loop-through (e.g. when piping output to a file) startup speed by
80%-90%, see #1747 (@Enselic)


## Syntaxes

Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ ansi_term = "^0.12.1"
ansi_colours = "^1.0"
console = "0.14.1"
lazy_static = { version = "1.4", optional = true }
lazycell = "1.0"
wild = { version = "2.0", optional = true }
content_inspector = "0.2.4"
encoding = "0.2"
Expand Down
76 changes: 67 additions & 9 deletions src/assets.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ use std::collections::BTreeMap;
use std::ffi::OsStr;
use std::fs::{self, File};
use std::io::BufReader;
use std::path::Path;
use std::path::{Path, PathBuf};

use lazycell::LazyCell;

use syntect::dumps::{dump_to_file, from_binary, from_reader};
use syntect::highlighting::{Theme, ThemeSet};
Expand All @@ -18,7 +20,8 @@ use crate::syntax_mapping::{MappingTarget, SyntaxMapping};

#[derive(Debug)]
pub struct HighlightingAssets {
syntax_set: SyntaxSet,
syntax_set_cell: LazyCell<SyntaxSet>,
serialized_syntax_set: Option<SerializedSyntaxSet>,
pub(crate) theme_set: ThemeSet,
fallback_theme: Option<&'static str>,
}
Expand All @@ -41,9 +44,21 @@ const IGNORED_SUFFIXES: [&str; 10] = [
];

impl HighlightingAssets {
fn new(syntax_set: SyntaxSet, theme_set: ThemeSet) -> Self {
fn new(
syntax_set: Option<SyntaxSet>,
serialized_syntax_set: Option<SerializedSyntaxSet>,
theme_set: ThemeSet,
) -> Self {
assert!(syntax_set.is_some() || serialized_syntax_set.is_some());

let syntax_set_cell = LazyCell::new();
if let Some(syntax_set) = syntax_set {
syntax_set_cell.fill(syntax_set).expect("can never fail");
}

HighlightingAssets {
syntax_set,
syntax_set_cell,
serialized_syntax_set,
theme_set,
fallback_theme: None,
}
Expand Down Expand Up @@ -98,20 +113,30 @@ impl HighlightingAssets {
}

Ok(HighlightingAssets::new(
syntax_set_builder.build(),
Some(syntax_set_builder.build()),
None,
theme_set,
))
}

pub fn from_cache(cache_path: &Path) -> Result<Self> {
Ok(HighlightingAssets::new(
asset_from_cache(&cache_path.join("syntaxes.bin"), "syntax set")?,
None,
Some(SerializedSyntaxSet::FromFile(
cache_path.join("syntaxes.bin"),
)),
asset_from_cache(&cache_path.join("themes.bin"), "theme set")?,
))
}

pub fn from_binary() -> Self {
HighlightingAssets::new(get_integrated_syntaxset(), get_integrated_themeset())
HighlightingAssets::new(
None,
Some(SerializedSyntaxSet::FromBinary(
get_serialized_integrated_syntaxset(),
)),
get_integrated_themeset(),
)
}

pub fn save_to_cache(&self, target_dir: &Path, current_version: &str) -> Result<()> {
Expand All @@ -138,7 +163,12 @@ impl HighlightingAssets {
}

pub(crate) fn get_syntax_set(&self) -> &SyntaxSet {
&self.syntax_set
self.syntax_set_cell.borrow_with(|| {
self.serialized_syntax_set
.as_ref()
.expect("a dev forgot to setup serialized_syntax_set, please report to https://github.com/sharkdp/bat/issues")
.deserialize()
})
}

pub fn syntaxes(&self) -> &[SyntaxReference] {
Expand Down Expand Up @@ -273,8 +303,36 @@ impl HighlightingAssets {
}
}

/// A SyntaxSet in serialized form, i.e. bincoded and flate2 compressed.
/// We keep it in this format since we want to load it lazily.
#[derive(Debug)]
enum SerializedSyntaxSet {
/// The data comes from a user-generated cache file.
FromFile(PathBuf),

/// The data to use is embedded into the bat binary.
FromBinary(&'static [u8]),
}

impl SerializedSyntaxSet {
fn deserialize(&self) -> SyntaxSet {
match self {
SerializedSyntaxSet::FromBinary(data) => {
from_binary(data)
},
SerializedSyntaxSet::FromFile(ref path) => {
asset_from_cache(&path, "syntax set").expect("cache corrupt, consider rebuilding or clearing, see https://github.com/sharkdp/bat#adding-new-syntaxes--language-definitions on how")
},
}
}
}

fn get_serialized_integrated_syntaxset() -> &'static [u8] {
include_bytes!("../assets/syntaxes.bin")
}

fn get_integrated_syntaxset() -> SyntaxSet {
from_binary(include_bytes!("../assets/syntaxes.bin"))
from_binary(get_serialized_integrated_syntaxset())
}

fn get_integrated_themeset() -> ThemeSet {
Expand Down

0 comments on commit 6797854

Please sign in to comment.