diff --git a/src/daft-scan/src/glob.rs b/src/daft-scan/src/glob.rs index af60a0e0b0..6576479d3c 100644 --- a/src/daft-scan/src/glob.rs +++ b/src/daft-scan/src/glob.rs @@ -272,9 +272,28 @@ impl ScanOperator for GlobScanOperator { } fn multiline_display(&self) -> Vec { + let condensed_glob_paths = if self.glob_paths.len() <= 7 { + self.glob_paths.join(", ") + } else { + let first_three: Vec = self.glob_paths.iter().take(3).cloned().collect(); + let last_three: Vec = self + .glob_paths + .iter() + .skip(self.glob_paths.len() - 3) + .cloned() + .collect(); + + let mut result = first_three.join(", "); + result.push_str(", ..."); + result.push_str(", "); + result.push_str(&last_three.join(", ")); + + result + }; + let mut lines = vec![ "GlobScanOperator".to_string(), - format!("Glob paths = [{}]", self.glob_paths.join(", ")), + format!("Glob paths = [{}]", condensed_glob_paths), ]; lines.extend(self.file_format_config.multiline_display()); lines.extend(self.storage_config.multiline_display()); diff --git a/src/daft-scan/src/lib.rs b/src/daft-scan/src/lib.rs index 98d557463d..ee3d5d7a38 100644 --- a/src/daft-scan/src/lib.rs +++ b/src/daft-scan/src/lib.rs @@ -967,8 +967,9 @@ mod test { use crate::{ file_format::{FileFormatConfig, ParquetSourceConfig}, + glob::GlobScanOperator, storage_config::{NativeStorageConfig, StorageConfig}, - DataSource, Pushdowns, ScanTask, + DataSource, Pushdowns, ScanOperator, ScanTask, }; fn make_scan_task(num_sources: usize) -> ScanTask { @@ -1003,6 +1004,44 @@ mod test { ) } + fn make_glob_scan_operator(num_sources: usize) -> GlobScanOperator { + let file_format_config: FileFormatConfig = FileFormatConfig::Parquet(ParquetSourceConfig { + coerce_int96_timestamp_unit: TimeUnit::Seconds, + field_id_mapping: None, + row_groups: None, + chunk_size: None, + }); + + let mut sources: Vec = Vec::new(); + + for _ in 0..num_sources { + sources.push(format!("../../tests/assets/parquet-data/mvp.parquet")); + } + + let glob_paths: Vec<&str> = sources.iter().map(|s| s.as_str()).collect(); + + let glob_scan_operator: GlobScanOperator = GlobScanOperator::try_new( + &glob_paths, + Arc::new(file_format_config), + Arc::new(StorageConfig::Native(Arc::new( + NativeStorageConfig::new_internal(false, None), + ))), + false, + Some(Arc::new(Schema::empty())), + ) + .unwrap(); + + glob_scan_operator + } + + #[test] + fn test_glob_display_condenses() -> DaftResult<()> { + let glob_scan_operator: GlobScanOperator = make_glob_scan_operator(8); + let condensed_glob_paths: Vec = glob_scan_operator.multiline_display(); + assert_eq!(condensed_glob_paths[1], "Glob paths = [../../tests/assets/parquet-data/mvp.parquet, ../../tests/assets/parquet-data/mvp.parquet, ../../tests/assets/parquet-data/mvp.parquet, ..., ../../tests/assets/parquet-data/mvp.parquet, ../../tests/assets/parquet-data/mvp.parquet, ../../tests/assets/parquet-data/mvp.parquet]"); + Ok(()) + } + #[test] fn test_display_condenses() -> DaftResult<()> { let scan_task = make_scan_task(7);