From 1b439bb30fe1267f72b2fe7297563c0a01768ea1 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 26 May 2022 14:21:53 +0900 Subject: [PATCH] Add minimum C API Closes #1113 This exports minimum C API to write the following Rust code in C: use datafusion::prelude::*; #[tokio::main] async fn main() -> datafusion::error::Result<()> { // register the table let mut ctx = ExecutionContext::new(); // create a plan to run a SQL query let df = ctx.sql("SELECT 1").await?; // execute and print results df.show().await?; Ok(()) } See datafusion/c/examples/sql.c for C version. You can build and run datafusion/c/examples/sql.c by the following command lines: $ cargo build $ cc -o target/debug/sql datafusion/c/examples/sql.c -Idatafusion/c/include -Ltarget/debug -Wl,--rpath=target/debug -ldatafusion_c $ target/debug/sql +----------+ | Int64(1) | +----------+ | 1 | +----------+ This implementation doesn't export Future like datafusion-python. Async functions are block_on()-ed in exported API. But I think that we can export Future in follow-up tasks. Follow-up tasks: * Add support for testing by "cargo test" * Add support for building and running examples by "cargo ..." * Add support for installing datafusion.h --- Cargo.toml | 1 + datafusion/c/Cargo.toml | 46 +++++++++ datafusion/c/examples/sql.c | 45 +++++++++ datafusion/c/include/datafusion.h | 46 +++++++++ datafusion/c/src/lib.rs | 155 ++++++++++++++++++++++++++++++ 5 files changed, 293 insertions(+) create mode 100644 datafusion/c/Cargo.toml create mode 100644 datafusion/c/examples/sql.c create mode 100644 datafusion/c/include/datafusion.h create mode 100644 datafusion/c/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index db2cc46e9079b..3ad9e13b10f4d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ [workspace] members = [ + "datafusion/c", "datafusion/common", "datafusion/core", "datafusion/data-access", diff --git a/datafusion/c/Cargo.toml b/datafusion/c/Cargo.toml new file mode 100644 index 0000000000000..dadbe9dc2b9ca --- /dev/null +++ b/datafusion/c/Cargo.toml @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "datafusion_c" +description = "DataFusion C API" +version = "8.0.0" +homepage = "https://github.com/apache/arrow-datafusion" +repository = "https://github.com/apache/arrow-datafusion" +readme = "../../README.md" +authors = ["Apache Arrow "] +license = "Apache-2.0" +keywords = ["arrow", "c"] +include = [ + "src/**/*.rs", + "Cargo.toml", +] +edition = "2021" +rust-version = "1.59" + +[lib] +name = "datafusion_c" +path = "src/lib.rs" +crate-type = ["cdylib"] + +[features] +default = [] + +[dependencies] +datafusion = { path = "../core", version = "8.0.0" } +libc = "0.2" +tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] } diff --git a/datafusion/c/examples/sql.c b/datafusion/c/examples/sql.c new file mode 100644 index 0000000000000..798d273e5de81 --- /dev/null +++ b/datafusion/c/examples/sql.c @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include + +#include +#include + +int +main(void) +{ + DFSessionContext *context = df_session_context_new(); + DFError *error = NULL; + DFDataFrame *data_frame = df_session_context_sql(context, "SELECT 1;", &error); + if (error) { + printf("failed to run SQL: %s\n", df_error_get_message(error)); + df_error_free(error); + df_session_context_free(context); + return EXIT_FAILURE; + } + df_data_frame_show(data_frame, &error); + if (error) { + printf("failed to show data frame: %s\n", df_error_get_message(error)); + df_error_free(error); + } + df_data_frame_unref(data_frame); + df_session_context_free(context); + return EXIT_SUCCESS; +} diff --git a/datafusion/c/include/datafusion.h b/datafusion/c/include/datafusion.h new file mode 100644 index 0000000000000..d5a81e0668ba8 --- /dev/null +++ b/datafusion/c/include/datafusion.h @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + + +typedef struct DFError_ DFError; +extern void df_error_free(DFError *error); +extern const char *df_error_get_message(DFError *error); + + +typedef struct DFDataFrame_ DFDataFrame; +extern void df_data_frame_unref(DFDataFrame *data_frame); +extern void df_data_frame_show(DFDataFrame *data_frame, DFError **error); + + +typedef struct DFSessionContext_ DFSessionContext; +extern DFSessionContext *df_session_context_new(void); +extern void df_session_context_free(DFSessionContext *ctx); +extern DFDataFrame *df_session_context_sql(DFSessionContext *ctx, + const char *sql, + DFError **error); + +#ifdef __cplusplus +} +#endif diff --git a/datafusion/c/src/lib.rs b/datafusion/c/src/lib.rs new file mode 100644 index 0000000000000..216315e6e0657 --- /dev/null +++ b/datafusion/c/src/lib.rs @@ -0,0 +1,155 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::boxed::Box; +use std::ffi::CStr; +use std::ffi::CString; +use std::future::Future; +use std::sync::Arc; + +use datafusion::dataframe::DataFrame; +use datafusion::execution::context::SessionContext; + +#[repr(C)] +pub struct DFError { + code: u32, + message: *mut libc::c_char, +} + +impl DFError { + pub fn new(code: u32, message: *mut libc::c_char) -> Self { + Self { + code: code, + message: message, + } + } +} + +#[no_mangle] +pub extern "C" fn df_error_new(code: u32, message: *const libc::c_char) -> *mut DFError { + let error = DFError::new(code, unsafe { libc::strdup(message) }); + return Box::into_raw(Box::new(error)); +} + +#[no_mangle] +pub extern "C" fn df_error_free(error: *mut DFError) { + unsafe { + libc::free((*error).message as *mut libc::c_void); + Box::from_raw(error) + }; +} + +#[no_mangle] +pub extern "C" fn df_error_get_message(error: *mut DFError) -> *const libc::c_char { + unsafe { (*error).message } +} + +trait IntoDFError { + type Value; + fn into_df_error( + self, + error: *mut *mut DFError, + error_value: Option, + ) -> Option; +} + +impl IntoDFError for Result { + type Value = V; + fn into_df_error( + self, + error: *mut *mut DFError, + error_value: Option, + ) -> Option { + match self { + Ok(value) => Some(value), + Err(e) => { + if !error.is_null() { + let c_string_message = match CString::new(format!("{}", e)) { + Ok(c_string_message) => c_string_message, + Err(_) => return error_value, + }; + unsafe { + *error = df_error_new(1, c_string_message.as_ptr()); + }; + } + error_value + } + } + } +} + +fn block_on(future: F) -> F::Output { + tokio::runtime::Runtime::new().unwrap().block_on(future) +} + +#[repr(C)] +pub struct DFDataFrame { + data_frame: Arc, +} + +impl DFDataFrame { + pub fn new(data_frame: Arc) -> Self { + Self { + data_frame: data_frame, + } + } +} + +#[no_mangle] +pub extern "C" fn df_data_frame_unref(data_frame: *mut DFDataFrame) { + unsafe { Box::from_raw(data_frame) }; +} + +#[no_mangle] +pub extern "C" fn df_data_frame_show( + data_frame: *mut DFDataFrame, + error: *mut *mut DFError, +) { + let future = unsafe { (*data_frame).data_frame.show() }; + block_on(future).into_df_error(error, None); +} + +#[no_mangle] +pub extern "C" fn df_session_context_new() -> *mut SessionContext { + let ctx = SessionContext::new(); + return Box::into_raw(Box::new(ctx)); +} + +#[no_mangle] +pub extern "C" fn df_session_context_free(ctx: *mut SessionContext) { + unsafe { Box::from_raw(ctx) }; +} + +#[no_mangle] +pub extern "C" fn df_session_context_sql( + ctx: *mut SessionContext, + sql: *const libc::c_char, + error: *mut *mut DFError, +) -> *mut DFDataFrame { + let cstr_sql = unsafe { CStr::from_ptr(sql) }; + let maybe_rs_sql = cstr_sql.to_str().into_df_error(error, None); + let rs_sql = match maybe_rs_sql { + Some(rs_sql) => rs_sql, + None => return std::ptr::null_mut(), + }; + let result = block_on(unsafe { (*ctx).sql(rs_sql) }); + let maybe_data_frame = result.into_df_error(error, None); + match maybe_data_frame { + Some(data_frame) => Box::into_raw(Box::new(DFDataFrame::new(data_frame))), + None => std::ptr::null_mut(), + } +}