Skip to content

Commit

Permalink
fix(rust, python): add file path to io error in scan_csv (pola-rs#10076)
Browse files Browse the repository at this point in the history
  • Loading branch information
rikkaka authored Jul 26, 2023
1 parent 83da1e8 commit 6adc0e9
Show file tree
Hide file tree
Showing 13 changed files with 44 additions and 14 deletions.
2 changes: 1 addition & 1 deletion polars/polars-io/src/csv/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ impl<'a> CsvReader<'a, File> {
/// This is the recommended way to create a csv reader as this allows for fastest parsing.
pub fn from_path<P: Into<PathBuf>>(path: P) -> PolarsResult<Self> {
let path = resolve_homedir(&path.into());
let f = std::fs::File::open(&path)?;
let f = polars_utils::open_file(&path)?;
Ok(Self::new(f).with_path(Some(path)))
}
}
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-io/src/ndjson/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ impl<'a> JsonLineReader<'a, File> {
/// This is the recommended way to create a json reader as this allows for fastest parsing.
pub fn from_path<P: Into<PathBuf>>(path: P) -> PolarsResult<Self> {
let path = resolve_homedir(&path.into());
let f = std::fs::File::open(&path)?;
let f = polars_utils::open_file(&path)?;
Ok(Self::new(f).with_path(Some(path)))
}
}
Expand Down
3 changes: 1 addition & 2 deletions polars/polars-io/src/parquet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ use super::*;

#[cfg(test)]
mod test {
use std::fs::File;
use std::io::Cursor;

use polars_core::df;
Expand All @@ -40,7 +39,7 @@ mod test {
#[test]
fn test_parquet() {
// In CI: This test will be skipped because the file does not exist.
if let Ok(r) = File::open("data/simple.parquet") {
if let Ok(r) = polars_utils::open_file("data/simple.parquet") {
let reader = ParquetReader::new(r);
let df = reader.finish().unwrap();
assert_eq!(df.get_column_names(), ["a", "b"]);
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-io/src/partition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ mod test {
.collect::<PolarsResult<Vec<_>>>()?;

assert_eq!(ipc_paths.len(), 1);
let reader = BufReader::new(std::fs::File::open(&ipc_paths[0])?);
let reader = BufReader::new(polars_utils::open_file(&ipc_paths[0])?);
let df = IpcReader::new(reader).finish()?;
assert!(expected_df.frame_equal(&df));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ impl Source for GroupBySource {
if partition_dir.exists() {
for file in std::fs::read_dir(partition_dir).expect("should be there") {
let spilled = file.unwrap().path();
let file = std::fs::File::open(spilled)?;
let file = polars_utils::open_file(spilled)?;
let reader = IpcReader::new(file);
let spilled = reader.finish().unwrap();
if spilled.n_chunks() > 1 {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::executors::sinks::sort::source::SortSource;
use crate::operators::FinalizedSink;

pub(super) fn read_df(path: &Path) -> PolarsResult<DataFrame> {
let file = std::fs::File::open(path)?;
let file = polars_utils::open_file(path)?;
IpcReader::new(file).set_rechunk(false).finish()
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pub struct IpcSourceOneShot {
impl IpcSourceOneShot {
#[allow(unused_variables)]
pub(crate) fn new(path: &Path) -> PolarsResult<Self> {
let file = File::open(path)?;
let file = polars_utils::open_file(path)?;
let reader = Some(IpcReader::new(file));

Ok(IpcSourceOneShot { reader })
Expand Down
14 changes: 11 additions & 3 deletions polars/polars-lazy/polars-plan/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ impl LogicalPlanBuilder {
ParquetAsyncReader::file_info(&uri, cloud_options.as_ref())?
}
} else {
let file = std::fs::File::open(&path)?;
let file = polars_utils::open_file(&path)?;
let mut reader = ParquetReader::new(file);
(reader.schema()?, reader.num_rows()?)
};
Expand Down Expand Up @@ -195,7 +195,7 @@ impl LogicalPlanBuilder {
use polars_io::SerReader as _;

let path = path.into();
let file = std::fs::File::open(&path)?;
let file = polars_utils::open_file(&path)?;
let mut reader = IpcReader::new(file);

let mut schema = reader.schema()?;
Expand Down Expand Up @@ -253,7 +253,15 @@ impl LogicalPlanBuilder {
try_parse_dates: bool,
) -> PolarsResult<Self> {
let path = path.into();
let mut file = std::fs::File::open(&path)?;
let mut file = polars_utils::open_file(&path).map_err(|e| {
let path = path.to_string_lossy();
if path.len() > 88 {
let path: String = path.chars().skip(path.len() - 88).collect();
polars_err!(ComputeError: "error open file: ...{}, {}", path, e)
} else {
polars_err!(ComputeError: "error open file: {}, {}", path, e)
}
})?;
let mut magic_nr = [0u8; 2];
file.read_exact(&mut magic_nr)
.map_err(|_| polars_err!(NoData: "empty csv"))?;
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-lazy/src/frame/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,9 +210,9 @@ impl<'a> LazyCsvReader<'a> {
Some(globresult) => globresult?,
None => polars_bail!(ComputeError: "globbing pattern did not match any files"),
};
std::fs::File::open(&path)
polars_utils::open_file(&path)
} else {
std::fs::File::open(&self.path)
polars_utils::open_file(&self.path)
}?;
let reader_bytes = get_reader_bytes(&mut file).expect("could not mmap file");
let mut skip_rows = self.skip_rows;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ impl AnonymousScan for LazyJsonLineReader {
}

fn schema(&self, infer_schema_length: Option<usize>) -> PolarsResult<Schema> {
let f = std::fs::File::open(&self.path)?;
let f = polars_utils::open_file(&self.path)?;
let mut reader = std::io::BufReader::new(f);

let data_type =
Expand Down
1 change: 1 addition & 0 deletions polars/polars-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ ahash.workspace = true
hashbrown.workspace = true
num-traits.workspace = true
once_cell.workspace = true
polars-error = { version = "0.31.1", path = "../polars-error" }
rayon.workspace = true
smartstring.workspace = true
sysinfo = { version = "0.29", default-features = false, optional = true }
Expand Down
19 changes: 19 additions & 0 deletions polars/polars-utils/src/io.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
use std::fs::File;
use std::path::Path;

use polars_error::*;

pub fn open_file<P>(path: P) -> PolarsResult<File>
where
P: AsRef<Path>,
{
std::fs::File::open(&path).map_err(|e| {
let path = path.as_ref().to_string_lossy();
if path.len() > 88 {
let path: String = path.chars().skip(path.len() - 88).collect();
polars_err!(ComputeError: "error open file: ...{}, {}", path, e)
} else {
polars_err!(ComputeError: "error open file: {}, {}", path, e)
}
})
}
3 changes: 3 additions & 0 deletions polars/polars-utils/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,6 @@ pub mod macros;
pub mod vec;
#[cfg(target_family = "wasm")]
pub mod wasm;

pub mod io;
pub use io::open_file;

0 comments on commit 6adc0e9

Please sign in to comment.