Skip to content

Commit

Permalink
Add NotebookIndex to the cache (#6863)
Browse files Browse the repository at this point in the history
## Summary

This PR updates the `FileCache` to include an optional `NotebookIndex`
to support caching for Jupyter Notebooks.

We only require the index to compute the diagnostics and thus we don't
really need to store the entire `Notebook` on the `Diagnostics` struct.
This means we only need the index to be stored in the cache to
reconstruct the `Diagnostics`.

## Test Plan

Update an existing test case to run over the fixtures under
`ruff_notebook` crate where there are multiple Jupyter Notebook.

Locally, the following commands were run in order:
1. Remove the cache: `rm -rf .ruff_cache`
2. Run without cache: `cargo run --bin ruff -- check --isolated
crates/ruff_notebook/resources/test/fixtures/jupyter/unused_variable.ipynb
--no-cache`
3. Run with cache: `cargo run --bin ruff -- check --isolated
crates/ruff_notebook/resources/test/fixtures/jupyter/unused_variable.ipynb`
4. Check whether the `.ruff_cache` directory was created or not
5. Run with cache again and verify: `cargo run --bin ruff -- check
--isolated
crates/ruff_notebook/resources/test/fixtures/jupyter/unused_variable.ipynb`

## Benchmarks

#6863 (comment)

fixes: #6671
  • Loading branch information
dhruvmanila authored Sep 12, 2023
1 parent e7b7e4a commit ee0f127
Show file tree
Hide file tree
Showing 11 changed files with 84 additions and 58 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions crates/ruff/src/message/grouped.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::num::NonZeroUsize;

use colored::Colorize;

use ruff_notebook::{Notebook, NotebookIndex};
use ruff_notebook::NotebookIndex;
use ruff_source_file::OneIndexed;

use crate::fs::relativize_path;
Expand Down Expand Up @@ -65,7 +65,7 @@ impl Emitter for GroupedEmitter {
writer,
"{}",
DisplayGroupedMessage {
jupyter_index: context.notebook(message.filename()).map(Notebook::index),
notebook_index: context.notebook_index(message.filename()),
message,
show_fix_status: self.show_fix_status,
show_source: self.show_source,
Expand All @@ -92,7 +92,7 @@ struct DisplayGroupedMessage<'a> {
show_source: bool,
row_length: NonZeroUsize,
column_length: NonZeroUsize,
jupyter_index: Option<&'a NotebookIndex>,
notebook_index: Option<&'a NotebookIndex>,
}

impl Display for DisplayGroupedMessage<'_> {
Expand All @@ -110,7 +110,7 @@ impl Display for DisplayGroupedMessage<'_> {
)?;

// Check if we're working on a jupyter notebook and translate positions with cell accordingly
let (row, col) = if let Some(jupyter_index) = self.jupyter_index {
let (row, col) = if let Some(jupyter_index) = self.notebook_index {
write!(
f,
"cell {cell}{sep}",
Expand Down Expand Up @@ -150,7 +150,7 @@ impl Display for DisplayGroupedMessage<'_> {
"{}",
MessageCodeFrame {
message,
jupyter_index: self.jupyter_index
notebook_index: self.notebook_index
}
)?;
}
Expand Down
18 changes: 9 additions & 9 deletions crates/ruff/src/message/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub use json_lines::JsonLinesEmitter;
pub use junit::JunitEmitter;
pub use pylint::PylintEmitter;
use ruff_diagnostics::{Diagnostic, DiagnosticKind, Fix};
use ruff_notebook::Notebook;
use ruff_notebook::NotebookIndex;
use ruff_source_file::{SourceFile, SourceLocation};
use ruff_text_size::{Ranged, TextRange, TextSize};
pub use text::TextEmitter;
Expand Down Expand Up @@ -127,21 +127,21 @@ pub trait Emitter {

/// Context passed to [`Emitter`].
pub struct EmitterContext<'a> {
notebooks: &'a FxHashMap<String, Notebook>,
notebook_indexes: &'a FxHashMap<String, NotebookIndex>,
}

impl<'a> EmitterContext<'a> {
pub fn new(notebooks: &'a FxHashMap<String, Notebook>) -> Self {
Self { notebooks }
pub fn new(notebook_indexes: &'a FxHashMap<String, NotebookIndex>) -> Self {
Self { notebook_indexes }
}

/// Tests if the file with `name` is a jupyter notebook.
pub fn is_notebook(&self, name: &str) -> bool {
self.notebooks.contains_key(name)
self.notebook_indexes.contains_key(name)
}

pub fn notebook(&self, name: &str) -> Option<&Notebook> {
self.notebooks.get(name)
pub fn notebook_index(&self, name: &str) -> Option<&NotebookIndex> {
self.notebook_indexes.get(name)
}
}

Expand Down Expand Up @@ -225,8 +225,8 @@ def fibonacci(n):
emitter: &mut dyn Emitter,
messages: &[Message],
) -> String {
let source_kinds = FxHashMap::default();
let context = EmitterContext::new(&source_kinds);
let notebook_indexes = FxHashMap::default();
let context = EmitterContext::new(&notebook_indexes);
let mut output: Vec<u8> = Vec::new();
emitter.emit(&mut output, messages, &context).unwrap();

Expand Down
40 changes: 18 additions & 22 deletions crates/ruff/src/message/text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use annotate_snippets::snippet::{Annotation, AnnotationType, Slice, Snippet, Sou
use bitflags::bitflags;
use colored::Colorize;

use ruff_notebook::{Notebook, NotebookIndex};
use ruff_notebook::NotebookIndex;
use ruff_source_file::{OneIndexed, SourceLocation};
use ruff_text_size::{Ranged, TextRange, TextSize};

Expand Down Expand Up @@ -71,22 +71,22 @@ impl Emitter for TextEmitter {
)?;

let start_location = message.compute_start_location();
let jupyter_index = context.notebook(message.filename()).map(Notebook::index);
let notebook_index = context.notebook_index(message.filename());

// Check if we're working on a jupyter notebook and translate positions with cell accordingly
let diagnostic_location = if let Some(jupyter_index) = jupyter_index {
let diagnostic_location = if let Some(notebook_index) = notebook_index {
write!(
writer,
"cell {cell}{sep}",
cell = jupyter_index
cell = notebook_index
.cell(start_location.row.get())
.unwrap_or_default(),
sep = ":".cyan(),
)?;

SourceLocation {
row: OneIndexed::new(
jupyter_index
notebook_index
.cell_row(start_location.row.get())
.unwrap_or(1) as usize,
)
Expand Down Expand Up @@ -115,7 +115,7 @@ impl Emitter for TextEmitter {
"{}",
MessageCodeFrame {
message,
jupyter_index
notebook_index
}
)?;
}
Expand Down Expand Up @@ -161,7 +161,7 @@ impl Display for RuleCodeAndBody<'_> {

pub(super) struct MessageCodeFrame<'a> {
pub(crate) message: &'a Message,
pub(crate) jupyter_index: Option<&'a NotebookIndex>,
pub(crate) notebook_index: Option<&'a NotebookIndex>,
}

impl Display for MessageCodeFrame<'_> {
Expand All @@ -186,14 +186,12 @@ impl Display for MessageCodeFrame<'_> {
let content_start_index = source_code.line_index(range.start());
let mut start_index = content_start_index.saturating_sub(2);

// If we're working on a jupyter notebook, skip the lines which are
// If we're working with a Jupyter Notebook, skip the lines which are
// outside of the cell containing the diagnostic.
if let Some(jupyter_index) = self.jupyter_index {
let content_start_cell = jupyter_index
.cell(content_start_index.get())
.unwrap_or_default();
if let Some(index) = self.notebook_index {
let content_start_cell = index.cell(content_start_index.get()).unwrap_or_default();
while start_index < content_start_index {
if jupyter_index.cell(start_index.get()).unwrap_or_default() == content_start_cell {
if index.cell(start_index.get()).unwrap_or_default() == content_start_cell {
break;
}
start_index = start_index.saturating_add(1);
Expand All @@ -213,14 +211,12 @@ impl Display for MessageCodeFrame<'_> {
.saturating_add(2)
.min(OneIndexed::from_zero_indexed(source_code.line_count()));

// If we're working on a jupyter notebook, skip the lines which are
// If we're working with a Jupyter Notebook, skip the lines which are
// outside of the cell containing the diagnostic.
if let Some(jupyter_index) = self.jupyter_index {
let content_end_cell = jupyter_index
.cell(content_end_index.get())
.unwrap_or_default();
if let Some(index) = self.notebook_index {
let content_end_cell = index.cell(content_end_index.get()).unwrap_or_default();
while end_index > content_end_index {
if jupyter_index.cell(end_index.get()).unwrap_or_default() == content_end_cell {
if index.cell(end_index.get()).unwrap_or_default() == content_end_cell {
break;
}
end_index = end_index.saturating_sub(1);
Expand Down Expand Up @@ -256,10 +252,10 @@ impl Display for MessageCodeFrame<'_> {
title: None,
slices: vec![Slice {
source: &source.text,
line_start: self.jupyter_index.map_or_else(
line_start: self.notebook_index.map_or_else(
|| start_index.get(),
|jupyter_index| {
jupyter_index
|notebook_index| {
notebook_index
.cell_row(start_index.get())
.unwrap_or_default() as usize
},
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff/src/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ pub(crate) fn print_jupyter_messages(
messages,
&EmitterContext::new(&FxHashMap::from_iter([(
path.file_name().unwrap().to_string_lossy().to_string(),
notebook.clone(),
notebook.index().clone(),
)])),
)
.unwrap();
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ colored = { workspace = true, features = ["no-color"]}
insta = { workspace = true, features = ["filters"] }
insta-cmd = { version = "0.4.0" }
tempfile = "3.6.0"
test-case = { workspace = true }
ureq = { version = "2.6.2", features = [] }

[target.'cfg(target_os = "windows")'.dependencies]
Expand Down
28 changes: 20 additions & 8 deletions crates/ruff_cli/src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ use std::sync::Mutex;
use std::time::{Duration, SystemTime};

use anyhow::{Context, Result};
use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};

use ruff::message::Message;
use ruff::settings::Settings;
use ruff::warn_user;
use ruff_cache::{CacheKey, CacheKeyHasher};
use ruff_diagnostics::{DiagnosticKind, Fix};
use ruff_notebook::NotebookIndex;
use ruff_python_ast::imports::ImportMap;
use ruff_source_file::SourceFileBuilder;
use ruff_text_size::{TextRange, TextSize};
Expand Down Expand Up @@ -193,6 +195,7 @@ impl Cache {
key: T,
messages: &[Message],
imports: &ImportMap,
notebook_index: Option<&NotebookIndex>,
) {
let source = if let Some(msg) = messages.first() {
msg.file.source_text().to_owned()
Expand Down Expand Up @@ -226,6 +229,7 @@ impl Cache {
imports: imports.clone(),
messages,
source,
notebook_index: notebook_index.cloned(),
};
self.new_files.lock().unwrap().insert(path, file);
}
Expand Down Expand Up @@ -263,6 +267,8 @@ pub(crate) struct FileCache {
///
/// This will be empty if `messages` is empty.
source: String,
/// Notebook index if this file is a Jupyter Notebook.
notebook_index: Option<NotebookIndex>,
}

impl FileCache {
Expand All @@ -283,7 +289,12 @@ impl FileCache {
})
.collect()
};
Diagnostics::new(messages, self.imports.clone())
let notebook_indexes = if let Some(notebook_index) = self.notebook_index.as_ref() {
FxHashMap::from_iter([(path.to_string_lossy().to_string(), notebook_index.clone())])
} else {
FxHashMap::default()
};
Diagnostics::new(messages, self.imports.clone(), notebook_indexes)
}
}

Expand Down Expand Up @@ -350,16 +361,19 @@ mod tests {
use anyhow::Result;
use ruff_python_ast::imports::ImportMap;

#[test]
fn same_results() {
use test_case::test_case;

#[test_case("../ruff/resources/test/fixtures", "ruff_tests/cache_same_results_ruff"; "ruff_fixtures")]
#[test_case("../ruff_notebook/resources/test/fixtures", "ruff_tests/cache_same_results_ruff_notebook"; "ruff_notebook_fixtures")]
fn same_results(package_root: &str, cache_dir_path: &str) {
let mut cache_dir = temp_dir();
cache_dir.push("ruff_tests/cache_same_results");
cache_dir.push(cache_dir_path);
let _ = fs::remove_dir_all(&cache_dir);
cache::init(&cache_dir).unwrap();

let settings = AllSettings::default();

let package_root = fs::canonicalize("../ruff/resources/test/fixtures").unwrap();
let package_root = fs::canonicalize(package_root).unwrap();
let cache = Cache::open(&cache_dir, package_root.clone(), &settings.lib);
assert_eq!(cache.new_files.lock().unwrap().len(), 0);

Expand Down Expand Up @@ -444,9 +458,6 @@ mod tests {
.unwrap();
}

// Not stored in the cache.
expected_diagnostics.notebooks.clear();
got_diagnostics.notebooks.clear();
assert_eq!(expected_diagnostics, got_diagnostics);
}

Expand Down Expand Up @@ -614,6 +625,7 @@ mod tests {
imports: ImportMap::new(),
messages: Vec::new(),
source: String::new(),
notebook_index: None,
},
);

Expand Down
2 changes: 2 additions & 0 deletions crates/ruff_cli/src/commands/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use itertools::Itertools;
use log::{debug, error, warn};
#[cfg(not(target_family = "wasm"))]
use rayon::prelude::*;
use rustc_hash::FxHashMap;

use ruff::message::Message;
use ruff::registry::Rule;
Expand Down Expand Up @@ -156,6 +157,7 @@ pub(crate) fn check(
TextSize::default(),
)],
ImportMap::default(),
FxHashMap::default(),
)
} else {
warn!(
Expand Down
Loading

0 comments on commit ee0f127

Please sign in to comment.