-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Custom / Dynamic table provider factories #3311
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -72,3 +72,12 @@ pub trait TableProvider: Sync + Send { | |
Ok(TableProviderFilterPushDown::Unsupported) | ||
} | ||
} | ||
|
||
/// A factory which creates [`TableProvider`]s at runtime given a URL. | ||
/// | ||
/// For example, this can be used to create a table "on the fly" | ||
/// from a directory of files only when that name is referenced. | ||
pub trait TableProviderFactory: Sync + Send { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should The |
||
/// Create a TableProvider given name and url | ||
fn create(&self, name: &str, url: &str) -> Arc<dyn TableProvider>; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,8 +15,13 @@ | |
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
use async_trait::async_trait; | ||
use std::any::Any; | ||
use std::io::Write; | ||
|
||
use datafusion::datasource::datasource::TableProviderFactory; | ||
use datafusion::execution::context::SessionState; | ||
use datafusion_expr::TableType; | ||
use tempfile::TempDir; | ||
|
||
use super::*; | ||
|
@@ -360,6 +365,76 @@ async fn create_pipe_delimited_csv_table() -> Result<()> { | |
Ok(()) | ||
} | ||
|
||
struct TestTableProvider {} | ||
|
||
impl TestTableProvider {} | ||
|
||
#[async_trait] | ||
impl TableProvider for TestTableProvider { | ||
fn as_any(&self) -> &dyn Any { | ||
unimplemented!("TestTableProvider is a stub for testing.") | ||
} | ||
|
||
fn schema(&self) -> SchemaRef { | ||
unimplemented!("TestTableProvider is a stub for testing.") | ||
} | ||
|
||
fn table_type(&self) -> TableType { | ||
unimplemented!("TestTableProvider is a stub for testing.") | ||
} | ||
|
||
async fn scan( | ||
&self, | ||
_ctx: &SessionState, | ||
_projection: &Option<Vec<usize>>, | ||
_filters: &[Expr], | ||
_limit: Option<usize>, | ||
) -> Result<Arc<dyn ExecutionPlan>> { | ||
unimplemented!("TestTableProvider is a stub for testing.") | ||
} | ||
} | ||
|
||
struct TestTableFactory {} | ||
|
||
impl TableProviderFactory for TestTableFactory { | ||
fn create(&self, _name: &str, _path: &str) -> Arc<dyn TableProvider> { | ||
Arc::new(TestTableProvider {}) | ||
} | ||
} | ||
|
||
#[tokio::test] | ||
async fn create_custom_table() -> Result<()> { | ||
let mut ctx = SessionContext::new(); | ||
ctx.register_table_factory("DELTATABLE", Arc::new(TestTableFactory {})); | ||
|
||
let sql = "CREATE EXTERNAL TABLE dt STORED AS DELTATABLE LOCATION 's3://bucket/schema/table';"; | ||
ctx.sql(sql).await.unwrap(); | ||
|
||
Comment on lines
+408
to
+412
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This API is very cool -- I like it a lot 💯 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cool, affirmation of general direction was what I was looking for, I'll clean it up and get it submitted, ty! |
||
let cat = ctx.catalog("datafusion").unwrap(); | ||
let schema = cat.schema("public").unwrap(); | ||
let exists = schema.table_exist("dt"); | ||
assert!(exists, "Table should have been created!"); | ||
|
||
Ok(()) | ||
} | ||
|
||
#[tokio::test] | ||
async fn create_bad_custom_table() { | ||
let ctx = SessionContext::new(); | ||
|
||
let sql = "CREATE EXTERNAL TABLE dt STORED AS DELTATABLE LOCATION 's3://bucket/schema/table';"; | ||
let res = ctx.sql(sql).await; | ||
match res { | ||
Ok(_) => panic!("Registration of tables without factories should fail"), | ||
Err(e) => { | ||
assert!( | ||
e.to_string().contains("Unable to find factory for"), | ||
"Registration of tables without factories should throw correct error" | ||
) | ||
} | ||
} | ||
} | ||
|
||
#[tokio::test] | ||
async fn create_csv_table_empty_file() -> Result<()> { | ||
let ctx = | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is a left over from #3333 -- but I also don't think it hurts to leave it in this PR