GlareDB · tychoish · Feb 21, 2024 · Feb 12, 2024 · Feb 12, 2024 · Feb 12, 2024
diff --git a/crates/datafusion_ext/src/planner/relation/mod.rs b/crates/datafusion_ext/src/planner/relation/mod.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use std::collections::HashMap;
-use std::path::Path;
+use std::path::{Path, PathBuf};
 
 use async_recursion::async_recursion;
 use datafusion::common::{DataFusionError, OwnedTableReference, Result};
@@ -242,6 +242,13 @@ fn infer_func_for_file(path: &str) -> Result<OwnedTableReference> {
         .ok_or_else(|| DataFusionError::Plan(format!("strange file extension: {path}")))?
         .to_lowercase();
 
+    let binding = PathBuf::from(path);
+    let filename = binding
+        .file_name()
+        .ok_or_else(|| DataFusionError::Plan(format!("NO file name provided: {path}")))?
+        .to_str()
+        .ok_or_else(|| DataFusionError::Plan(format!("Improper file name: {path}")))?;
+
     // TODO: We can be a bit more sophisticated here and handle compression
     // schemes as well.
     Ok(match ext.as_str() {
@@ -261,10 +268,45 @@ fn infer_func_for_file(path: &str) -> Result<OwnedTableReference> {
             schema: "public".into(),
             table: "read_bson".into(),
         },
+        "gz" => {
+            //handling compressed files with .gz extension
+            infer_func_from_compressed_file(filename)?
+        }
         ext => {
             return Err(DataFusionError::Plan(format!(
                 "unable to infer how to handle file extension: {ext}"
             )))
         }
     })
 }
+
+fn infer_func_from_compressed_file(filename: &str) -> Result<OwnedTableReference> {
+    if filename.contains(".json.gz")
+        | filename.contains(".json1.gz")
+        | filename.contains(".ndjson.gz")
+    {
+        return Ok(OwnedTableReference::Partial {
+            schema: "public".into(),
+            table: "ndjson_scan".into(),
+        });
+    } else if filename.contains(".parquet.gz") {
+        return Ok(OwnedTableReference::Partial {
+            schema: "public".into(),
+            table: "parquet_scan".into(),
+        });
+    } else if filename.contains(".csv.gz") {
+        return Ok(OwnedTableReference::Partial {
+            schema: "public".into(),
+            table: "csv_scan".into(),
+        });
+    } else if filename.contains(".bson.gz") {
+        return Ok(OwnedTableReference::Partial {
+            schema: "public".into(),
+            table: "read_bson".into(),
+        });
+    } else {
+        return Err(DataFusionError::Plan(format!(
+            "Improper compressed filename with extension .gz : {filename}"
+        )));
+    }
+}