vega · kylebarron · Jul 23, 2024 · Jul 23, 2024 · Aug 14, 2024 · Aug 15, 2024
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -21,6 +21,7 @@ chrono-tz = {version = "0.9.0", features=["case-insensitive", "filter-by-regex"]
 reqwest = { version = "0.11.22", default-features = false }
 tokio = { version = "1.36.0" }
 pyo3 = { version = "0.21.1" }
+pyo3-arrow = { version = "0.2.0" }
 pythonize = { version = "0.21.1" }
 prost = { version = "0.12.3" }
 prost-types = { version = "0.12.3" }

diff --git a/python/vegafusion/vegafusion/runtime.py b/python/vegafusion/vegafusion/runtime.py
@@ -215,6 +215,8 @@ def _import_or_register_inline_datasets(self, inline_datasets=None):
                         pass
 
                 imported_inline_datasets[name] = PandasDatasource(value)
+            elif hasattr(value, "__arrow_c_stream__"):
+                imported_inline_datasets[name] = value
             elif hasattr(value, "__dataframe__"):
                 # Let polars convert to pyarrow since it has broader support than the raw dataframe interchange
                 # protocol, and "This operation is mostly zero copy."

diff --git a/vegafusion-common/Cargo.toml b/vegafusion-common/Cargo.toml
@@ -6,7 +6,7 @@ description = "Common components required by multiple VegaFusion crates"
 license = "BSD-3-Clause"
 
 [features]
-pyarrow = [ "pyo3", "arrow/pyarrow",]
+pyarrow = [ "pyo3", "arrow/pyarrow", "pyo3-arrow"]
 json = [ "serde_json/preserve_order", "arrow/json", "chrono",]
 prettyprint = [ "arrow/prettyprint",]
 proto = ["datafusion-proto", "datafusion-proto-common"]
@@ -52,6 +52,10 @@ optional = true
 workspace = true
 optional = true
 
+[dependencies.pyo3-arrow]
+workspace = true
+optional = true
+
 [dependencies.jni]
 version = "0.21.1"
 optional = true

diff --git a/vegafusion-common/src/data/table.rs b/vegafusion-common/src/data/table.rs
@@ -34,9 +34,10 @@ use {
 use {
     arrow::pyarrow::{FromPyArrow, ToPyArrow},
     pyo3::{
+        conversion::FromPyObjectBound,
         prelude::*,
         types::{PyList, PyTuple},
-        Bound, PyAny, PyErr, PyObject, Python,
+        Bound, PyAny, PyErr, PyObject, PyResult, Python,
     },
 };
 
@@ -271,6 +272,13 @@ impl VegaFusionTable {
         }
     }
 
+    #[cfg(feature = "pyarrow")]
+    pub fn from_arrow_c_stream(table: &Bound<PyAny>) -> PyResult<Self> {
+        let pytable = pyo3_arrow::PyTable::from_py_object_bound(table.as_borrowed())?;
-    pub fn from_arrow_c_stream(table: &Bound<PyAny>) -> PyResult<Self> {
-        let pytable = pyo3_arrow::PyTable::from_py_object_bound(table.as_borrowed())?;
+    pub fn from_arrow_c_stream(table: pyo3_arrow::PyTable) -> PyResult<Self> {
-    pub fn from_arrow_c_stream(table: &Bound<PyAny>) -> PyResult<Self> {
-        let pytable = pyo3_arrow::PyTable::from_py_object_bound(table.as_borrowed())?;
+    pub fn from_arrow_c_stream(table: pyo3_arrow::PyTable) -> PyResult<Self> {
+        let (batches, schema) = pytable.into_inner();
+        Ok(VegaFusionTable::try_new(schema, batches)?)
+    }
+
     #[cfg(feature = "pyarrow")]
     pub fn from_pyarrow(pyarrow_table: &Bound<PyAny>) -> std::result::Result<Self, PyErr> {
         // Extract table.schema as a Rust Schema

diff --git a/vegafusion-python-embed/src/lib.rs b/vegafusion-python-embed/src/lib.rs
@@ -190,6 +190,10 @@ impl PyVegaFusionRuntime {
                                     .scan_py_datasource(inline_dataset.to_object(py)),
                             )?;
                             VegaFusionDataset::DataFrame(df)
+                        } else if inline_dataset.hasattr("__arrow_c_stream__")? {
+                            // Import via Arrow PyCapsule Interface
+                            let table = VegaFusionTable::from_arrow_c_stream(inline_dataset)?;
+                            VegaFusionDataset::from_table_ipc_bytes(&table.to_ipc_bytes()?)?
                         } else {
                             // Assume PyArrow Table
                             // We convert to ipc bytes for two reasons: