Skip to content
This repository has been archived by the owner on Aug 31, 2023. It is now read-only.

fix(rome_service): disallow large files from being parsed and analyzed #3339

Merged
merged 2 commits into from
Oct 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 4 additions & 10 deletions crates/rome_cli/src/traversal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ use rome_diagnostics::{
v2::{
self,
adapters::{IoError, StdError},
category, Advices, Category, Diagnostic, DiagnosticExt, Error, FilePath, LogCategory,
PrintDescription, PrintDiagnostic, Severity, Visit,
category, Advices, Category, Diagnostic, DiagnosticExt, Error, FilePath, PrintDescription,
PrintDiagnostic, Severity, Visit,
},
MAXIMUM_DISPLAYABLE_DIAGNOSTICS,
};
Expand Down Expand Up @@ -238,14 +238,8 @@ struct FormatDiffAdvice<'a> {

impl Advices for FormatDiffAdvice<'_> {
fn record(&self, visitor: &mut dyn Visit) -> io::Result<()> {
// Skip printing the diff for files over 1Mb (probably a minified file)
let max_len = self.old.len().max(self.new.len());
if max_len >= 1_000_000 {
visitor.record_log(LogCategory::Info, &"[Diff not printed for file over 1Mb]")
} else {
let diff = TextEdit::from_unicode_words(self.old, self.new);
visitor.record_diff(&diff)
}
let diff = TextEdit::from_unicode_words(self.old, self.new);
visitor.record_diff(&diff)
}
}

Expand Down
88 changes: 88 additions & 0 deletions crates/rome_cli/tests/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,34 @@ mod check {
result,
));
}

#[test]
fn file_too_large() {
let mut fs = MemoryFileSystem::default();
let mut console = BufferConsole::default();

let file_path = Path::new("check.js");
fs.insert(file_path.into(), "statement();\n".repeat(80660).as_bytes());

let result = run_cli(
DynRef::Borrowed(&mut fs),
DynRef::Borrowed(&mut console),
Arguments::from_vec(vec![OsString::from("check"), file_path.as_os_str().into()]),
);

assert!(result.is_ok(), "run_cli returned {result:?}");

// Do not store the content of the file in the snapshot
fs.remove(file_path);

assert_cli_snapshot(SnapshotPayload::new(
module_path!(),
"file_too_large",
fs,
console,
result,
));
}
}

mod ci {
Expand Down Expand Up @@ -934,6 +962,34 @@ mod ci {
result,
));
}

#[test]
fn file_too_large() {
let mut fs = MemoryFileSystem::default();
let mut console = BufferConsole::default();

let file_path = Path::new("ci.js");
fs.insert(file_path.into(), "statement();\n".repeat(80660).as_bytes());

let result = run_cli(
DynRef::Borrowed(&mut fs),
DynRef::Borrowed(&mut console),
Arguments::from_vec(vec![OsString::from("ci"), file_path.as_os_str().into()]),
);

assert!(result.is_ok(), "run_cli returned {result:?}");

// Do not store the content of the file in the snapshot
fs.remove(file_path);

assert_cli_snapshot(SnapshotPayload::new(
module_path!(),
"file_too_large",
fs,
console,
result,
));
}
}

mod format {
Expand Down Expand Up @@ -1740,6 +1796,38 @@ mod format {
result,
));
}

#[test]
fn file_too_large() {
let mut fs = MemoryFileSystem::default();
let mut console = BufferConsole::default();

let file_path = Path::new("format.js");
fs.insert(file_path.into(), "statement();\n".repeat(80660).as_bytes());

let result = run_cli(
DynRef::Borrowed(&mut fs),
DynRef::Borrowed(&mut console),
Arguments::from_vec(vec![
OsString::from("format"),
file_path.as_os_str().into(),
OsString::from("--write"),
]),
);

assert!(result.is_ok(), "run_cli returned {result:?}");

// Do not store the content of the file in the snapshot
fs.remove(file_path);

assert_cli_snapshot(SnapshotPayload::new(
module_path!(),
"file_too_large",
fs,
console,
result,
));
}
}

mod help {
Expand Down
19 changes: 19 additions & 0 deletions crates/rome_cli/tests/snapshots/main_check/file_too_large.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
source: crates/rome_cli/tests/snap_test.rs
expression: content
---
# Emitted Messages

```block
check.js lint ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

× The file check.js could not be parsed because it's too large (the file is 1.0 MiB long, but the size limit is 1.0 MiB)


```

```block
Skipped 1 files
```


19 changes: 19 additions & 0 deletions crates/rome_cli/tests/snapshots/main_ci/file_too_large.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
source: crates/rome_cli/tests/snap_test.rs
expression: content
---
# Emitted Messages

```block
ci.js lint ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

× The file ci.js could not be parsed because it's too large (the file is 1.0 MiB long, but the size limit is 1.0 MiB)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it a coincidence that the input file is 1.0 Megabyte like the limit? If not, probably that's because we truncated some digits. I am not sure what to do... surely an inexperienced user might think that's a bug.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a bit of both, in this case the test file is a few bytes larger than 1 MiB but that doesn't show since the values higher than 1 KiB are not printed at full precision. However the file size limit is checked using >= so even a file that's precisely 1 MiB in size would be rejected and print the error anyway.



```

```block
Skipped 1 files
```


19 changes: 19 additions & 0 deletions crates/rome_cli/tests/snapshots/main_format/file_too_large.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
source: crates/rome_cli/tests/snap_test.rs
expression: content
---
# Emitted Messages

```block
format.js lint ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

× The file format.js could not be parsed because it's too large (the file is 1.0 MiB long, but the size limit is 1.0 MiB)


```

```block
Skipped 1 files
```


64 changes: 64 additions & 0 deletions crates/rome_console/src/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -233,3 +233,67 @@ impl Display for Duration {
})
}
}

#[repr(transparent)]
#[derive(Clone, Copy, Debug)]
pub struct Bytes(pub usize);

impl std::fmt::Display for Bytes {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
let Self(mut value) = *self;

if value < 1024 {
return write!(fmt, "{value} B");
}

const PREFIX: [char; 4] = ['K', 'M', 'G', 'T'];
let prefix = PREFIX
.into_iter()
.find(|_| {
let next_value = value / 1024;
if next_value < 1024 {
return true;
}

value = next_value;
false
})
.unwrap_or('T');

write!(fmt, "{:.1} {prefix}iB", value as f32 / 1024.0)
}
}

impl Display for Bytes {
fn fmt(&self, fmt: &mut Formatter) -> io::Result<()> {
write!(fmt, "{self}")
}
}

#[cfg(test)]
mod tests {
use crate::fmt::Bytes;

#[test]
fn display_bytes() {
// Examples taken from https://stackoverflow.com/a/3758880
assert_eq!(Bytes(0).to_string(), "0 B");
assert_eq!(Bytes(27).to_string(), "27 B");
assert_eq!(Bytes(999).to_string(), "999 B");
assert_eq!(Bytes(1_000).to_string(), "1000 B");
assert_eq!(Bytes(1_023).to_string(), "1023 B");
assert_eq!(Bytes(1_024).to_string(), "1.0 KiB");
assert_eq!(Bytes(1_728).to_string(), "1.7 KiB");
assert_eq!(Bytes(110_592).to_string(), "108.0 KiB");
assert_eq!(Bytes(999_999).to_string(), "976.6 KiB");
assert_eq!(Bytes(7_077_888).to_string(), "6.8 MiB");
assert_eq!(Bytes(452_984_832).to_string(), "432.0 MiB");
assert_eq!(Bytes(28_991_029_248).to_string(), "27.0 GiB");
assert_eq!(Bytes(1_855_425_871_872).to_string(), "1.7 TiB");

#[cfg(target_pointer_width = "32")]
assert_eq!(Bytes(usize::MAX).to_string(), "4.0 GiB");
#[cfg(target_pointer_width = "64")]
assert_eq!(Bytes(usize::MAX).to_string(), "16384.0 TiB");
}
}
5 changes: 5 additions & 0 deletions crates/rome_fs/src/fs/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ impl MemoryFileSystem {
self.errors.insert(path, kind);
}

/// Remove a file from the filesystem
pub fn remove(&mut self, path: &Path) {
self.files.0.write().remove(path);
}

pub fn files(self) -> IntoIter<PathBuf, FileEntry> {
let files = self.files.0.into_inner();
files.into_iter()
Expand Down
10 changes: 10 additions & 0 deletions crates/rome_service/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use rome_console::fmt::Bytes;
use rome_console::{Console, EnvConsole};
use rome_formatter::FormatError;
use rome_fs::{FileSystem, OsFileSystem, RomePath};
Expand Down Expand Up @@ -71,6 +72,12 @@ pub enum RomeError {
TransportError(TransportError),
/// Emitted when the file is ignored and should not be processed
FileIgnored(PathBuf),
/// Emitted when a file could not be parsed because it's larger than the size limite
FileTooLarge {
path: PathBuf,
size: usize,
limit: usize,
},
}

impl Debug for RomeError {
Expand Down Expand Up @@ -163,6 +170,9 @@ impl Display for RomeError {
RomeError::FileIgnored(path) => {
write!(f, "The file {} was ignored", path.display())
}
RomeError::FileTooLarge { path, size, limit } => {
write!(f, "The file {} could not be parsed because it's too large (the file is {} long, but the size limit is {})", path.display(), Bytes(*size), Bytes(*limit))
}
}
}
}
Expand Down
12 changes: 12 additions & 0 deletions crates/rome_service/src/workspace/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,18 @@ impl WorkspaceServer {
.parse
.ok_or_else(self.build_capability_error(rome_path))?;

/// Limit the size of files to 1.0 MiB
const SIZE_LIMIT_IN_BYTES: usize = 1024 * 1024;

let size = document.content.as_bytes().len();
if size >= SIZE_LIMIT_IN_BYTES {
return Err(RomeError::FileTooLarge {
path: rome_path.to_path_buf(),
size,
limit: SIZE_LIMIT_IN_BYTES,
});
}

let parsed = parse(rome_path, document.language_hint, &document.content);

Ok(entry.insert(parsed).clone())
Expand Down