Skip to content

Commit

Permalink
WKTArray (#799)
Browse files Browse the repository at this point in the history
### Change list

- Add a `WKTArray`. This fleshes out the geoarrow spec. This is useful
to have as a concrete object so that we can ensure the WKT array
maintains the data's CRS.

TODO:

- Ensure to/from WKT uses this `WKTArray` instead of a bare string array
  • Loading branch information
kylebarron authored Sep 26, 2024
1 parent 1ed469c commit eddf482
Show file tree
Hide file tree
Showing 22 changed files with 376 additions and 39 deletions.
1 change: 0 additions & 1 deletion python/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion python/geoarrow-core/python/geoarrow/rust/core/_rust.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ class NativeType:
class SerializedType:
def __init__(
self,
type: Literal["wkb"],
type: Literal["wkb", "wkt"],
) -> None:
"""Create a new SerializedType
Expand Down
4 changes: 2 additions & 2 deletions python/geoarrow-core/src/ffi/to_python/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ use pyo3_geoarrow::{PyChunkedNativeArray, PyNativeArray};

use pyo3_geoarrow::PyGeoArrowResult;

pub fn geometry_array_to_pyobject(
pub fn native_array_to_pyobject(
py: Python,
arr: Arc<dyn NativeArray>,
) -> PyGeoArrowResult<PyObject> {
Ok(PyNativeArray::new(NativeArrayDyn::new(arr)).into_py(py))
}

pub fn chunked_geometry_array_to_pyobject(
pub fn chunked_native_array_to_pyobject(
py: Python,
arr: Arc<dyn ChunkedNativeArray>,
) -> PyGeoArrowResult<PyObject> {
Expand Down
2 changes: 1 addition & 1 deletion python/geoarrow-core/src/ffi/to_python/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
pub mod array;

pub use array::{chunked_geometry_array_to_pyobject, geometry_array_to_pyobject};
pub use array::{chunked_native_array_to_pyobject, native_array_to_pyobject};
6 changes: 4 additions & 2 deletions python/geoarrow-core/src/interop/ewkb.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use geoarrow::array::{CoordType, WKBArray};
use geoarrow::datatypes::SerializedType;
use geoarrow::io::geozero::FromEWKB;
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3_arrow::PyArray;

use crate::ffi::to_python::geometry_array_to_pyobject;
use crate::ffi::to_python::native_array_to_pyobject;
use pyo3_geoarrow::PyGeoArrowResult;

#[pyfunction]
Expand All @@ -20,6 +21,7 @@ pub fn from_ewkb(py: Python, input: PyArray) -> PyGeoArrowResult<PyObject> {
let wkb_arr = WKBArray::<i64>::try_from((array.as_ref(), field.as_ref()))?;
FromEWKB::from_ewkb(&wkb_arr, CoordType::Interleaved, Default::default(), false)?
}
_ => return Err(PyValueError::new_err("Expected a WKB array").into()),
};
geometry_array_to_pyobject(py, geo_array)
native_array_to_pyobject(py, geo_array)
}
6 changes: 3 additions & 3 deletions python/geoarrow-core/src/interop/shapely/from_shapely.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::sync::Arc;

use crate::crs::CRS;
use crate::ffi::to_python::geometry_array_to_pyobject;
use crate::ffi::to_python::native_array_to_pyobject;
use crate::interop::shapely::utils::import_shapely;
use arrow_array::builder::{BinaryBuilder, Int32BufferBuilder};
use arrow_buffer::OffsetBuffer;
Expand Down Expand Up @@ -120,7 +120,7 @@ pub fn from_shapely(
.into());
};

geometry_array_to_pyobject(py, arr)
native_array_to_pyobject(py, arr)
} else {
// TODO: support 3d WKB
let wkb_arr = make_wkb_arr(py, input, metadata)?;
Expand All @@ -129,7 +129,7 @@ pub fn from_shapely(
NativeType::GeometryCollection(Default::default(), Dimension::XY),
false,
)?;
geometry_array_to_pyobject(py, geom_arr)
native_array_to_pyobject(py, geom_arr)
}
}

Expand Down
9 changes: 6 additions & 3 deletions python/geoarrow-core/src/interop/wkb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ use geoarrow::chunked_array::{ChunkedArrayBase, ChunkedWKBArray};
use geoarrow::datatypes::SerializedType;
use geoarrow::io::wkb::{to_wkb as _to_wkb, FromWKB, ToWKB};
use geoarrow::ArrayBase;
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3_arrow::input::AnyArray;
use pyo3_arrow::{PyArray, PyChunkedArray};
use pyo3_geoarrow::PyCoordType;

use crate::ffi::from_python::AnyGeometryInput;
use crate::ffi::to_python::{chunked_geometry_array_to_pyobject, geometry_array_to_pyobject};
use crate::ffi::to_python::{chunked_native_array_to_pyobject, native_array_to_pyobject};
use pyo3_geoarrow::PyGeoArrowResult;

#[pyfunction]
Expand All @@ -36,8 +37,9 @@ pub fn from_wkb(
let wkb_arr = WKBArray::<i64>::try_from((arr.as_ref(), field.as_ref()))?;
FromWKB::from_wkb(&wkb_arr, coord_type)?
}
_ => return Err(PyValueError::new_err("Expected a WKB array").into()),
};
geometry_array_to_pyobject(py, geo_array)
native_array_to_pyobject(py, geo_array)
}
AnyArray::Stream(s) => {
let (chunks, field) = s.into_chunked_array()?.into_inner();
Expand All @@ -57,8 +59,9 @@ pub fn from_wkb(
.collect::<Result<Vec<_>, _>>()?;
FromWKB::from_wkb(&ChunkedWKBArray::new(chunks), coord_type)?
}
_ => return Err(PyValueError::new_err("Expected a WKB array").into()),
};
chunked_geometry_array_to_pyobject(py, geo_array)
chunked_native_array_to_pyobject(py, geo_array)
}
}
}
Expand Down
24 changes: 16 additions & 8 deletions python/geoarrow-core/src/interop/wkt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ use geoarrow::chunked_array::{ChunkedArray, ChunkedMixedGeometryArray};
use geoarrow::io::geozero::FromWKT;
use geoarrow::io::wkt::reader::ParseWKT;
use geoarrow::io::wkt::ToWKT;
use geoarrow::ArrayBase;
use pyo3::exceptions::PyTypeError;
use pyo3::prelude::*;
use pyo3_arrow::input::AnyArray;
use pyo3_arrow::{PyArray, PyChunkedArray};

use crate::ffi::from_python::AnyGeometryInput;
use crate::ffi::to_python::{chunked_geometry_array_to_pyobject, geometry_array_to_pyobject};
use crate::ffi::to_python::{chunked_native_array_to_pyobject, native_array_to_pyobject};
use pyo3_geoarrow::{PyCoordType, PyGeoArrowResult};

#[pyfunction]
Expand All @@ -40,7 +41,7 @@ pub fn from_wkt(
)
}
};
geometry_array_to_pyobject(py, geo_array)
native_array_to_pyobject(py, geo_array)
}
AnyArray::Stream(s) => {
let chunked_arr = s.into_chunked_array()?;
Expand Down Expand Up @@ -77,21 +78,28 @@ pub fn from_wkt(
)
}
};
chunked_geometry_array_to_pyobject(py, Arc::new(geo_array))
chunked_native_array_to_pyobject(py, Arc::new(geo_array))
}
}
}

#[pyfunction]
pub fn to_wkt(py: Python, input: AnyGeometryInput) -> PyGeoArrowResult<PyObject> {
match input {
AnyGeometryInput::Array(array) => return_array(
py,
PyArray::from_array_ref(Arc::new(array.as_ref().to_wkt::<i32>())),
),
AnyGeometryInput::Array(array) => {
let wkt_arr = array.as_ref().to_wkt::<i32>();
let field = wkt_arr.extension_field();
return_array(py, PyArray::new(wkt_arr.into_array_ref(), field))
}
AnyGeometryInput::Chunked(array) => {
let out = array.as_ref().to_wkt::<i32>();
return_chunked_array(py, PyChunkedArray::from_array_refs(out.chunk_refs())?)
let field = out.extension_field();
let chunks = out
.into_inner()
.into_iter()
.map(|chunk| chunk.to_array_ref())
.collect();
return_chunked_array(py, PyChunkedArray::try_new(chunks, field)?)
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions python/geoarrow-core/src/table/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
mod geo_interface;

use crate::ffi::to_python::{chunked_geometry_array_to_pyobject, geometry_array_to_pyobject};
use crate::ffi::to_python::{chunked_native_array_to_pyobject, native_array_to_pyobject};
use crate::interop::util::pytable_to_table;
use geoarrow::array::NativeArrayDyn;
use geoarrow::schema::GeoSchemaExt;
Expand Down Expand Up @@ -29,13 +29,13 @@ pub fn geometry_col(py: Python, input: AnyRecordBatch) -> PyGeoArrowResult<PyObj
let field = schema.field(index);
let array = batch.column(index).as_ref();
let geo_arr = NativeArrayDyn::from_arrow_array(array, field)?.into_inner();
geometry_array_to_pyobject(py, geo_arr)
native_array_to_pyobject(py, geo_arr)
}
AnyRecordBatch::Stream(stream) => {
let table = stream.into_table()?;
let table = pytable_to_table(table)?;
let chunked_geom_arr = table.geometry_column(None)?;
chunked_geometry_array_to_pyobject(py, chunked_geom_arr)
chunked_native_array_to_pyobject(py, chunked_geom_arr)
}
}
}
4 changes: 4 additions & 0 deletions python/pyo3-geoarrow/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,10 @@ impl TryFrom<PyArray> for PyNativeArray {
pub struct PySerializedArray(pub(crate) SerializedArrayDyn);

impl PySerializedArray {
pub fn new(array: SerializedArrayDyn) -> Self {
Self(array)
}

/// Import from raw Arrow capsules
pub fn from_arrow_pycapsule(
schema_capsule: &Bound<PyCapsule>,
Expand Down
1 change: 1 addition & 0 deletions python/pyo3-geoarrow/src/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ impl PySerializedType {
fn py_new(r#type: &str) -> PyResult<Self> {
match r#type.to_lowercase().as_str() {
"wkb" => Ok(Self(SerializedType::WKB)),
"wkt" => Ok(Self(SerializedType::WKT)),
_ => Err(PyValueError::new_err("Unknown geometry type input")),
}
}
Expand Down
12 changes: 6 additions & 6 deletions src/array/binary/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use crate::geo_traits::GeometryTrait;
use crate::scalar::WKB;
// use crate::util::{owned_slice_offsets, owned_slice_validity};
use crate::trait_::{ArrayAccessor, ArrayBase, IntoArrow, SerializedArray};
use arrow::array::AsArray;
use arrow_array::OffsetSizeTrait;
use arrow_array::{Array, BinaryArray, GenericBinaryArray, LargeBinaryArray};
use arrow_buffer::NullBuffer;
Expand All @@ -19,10 +20,9 @@ use arrow_schema::{DataType, Field};
///
/// This is semantically equivalent to `Vec<Option<WKB>>` due to the internal validity bitmap.
///
/// This array _can_ be used directly for operations, but that will incur costly encoding to and
/// from WKB on every operation. Instead, you usually want to use the WKBArray only for
/// serialization purposes (e.g. to and from [GeoParquet](https://geoparquet.org/)) but convert to
/// strongly-typed arrays (such as the [`PointArray`][crate::array::PointArray]) for computations.
/// This array implements [`SerializedArray`], not [`NativeArray`]. This means that you'll need to
/// parse the `WKBArray` into a native-typed GeoArrow array (such as
/// [`PointArray`][crate::array::PointArray]) before using it for computations.
#[derive(Debug, Clone, PartialEq)]
pub struct WKBArray<O: OffsetSizeTrait> {
pub(crate) data_type: SerializedType,
Expand Down Expand Up @@ -240,12 +240,12 @@ impl TryFrom<&dyn Array> for WKBArray<i64> {
fn try_from(value: &dyn Array) -> Result<Self> {
match value.data_type() {
DataType::Binary => {
let downcasted = value.as_any().downcast_ref::<BinaryArray>().unwrap();
let downcasted = value.as_binary::<i32>();
let geom_array: WKBArray<i32> = downcasted.clone().into();
Ok(geom_array.into())
}
DataType::LargeBinary => {
let downcasted = value.as_any().downcast_ref::<LargeBinaryArray>().unwrap();
let downcasted = value.as_binary::<i64>();
Ok(downcasted.clone().into())
}
_ => Err(GeoArrowError::General(format!(
Expand Down
3 changes: 3 additions & 0 deletions src/array/dynamic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use arrow_schema::Field;
use arrow_schema::{DataType, FieldRef};

use crate::array::metadata::ArrayMetadata;
use crate::array::wkt::WKTArray;
use crate::array::CoordType;
use crate::array::*;
use crate::datatypes::{Dimension, NativeType, SerializedType};
Expand Down Expand Up @@ -213,6 +214,8 @@ impl SerializedArrayDyn {
let geo_arr: SerializedArrayRef = match data_type {
SerializedType::WKB => Arc::new(WKBArray::<i32>::try_from((array, field))?),
SerializedType::LargeWKB => Arc::new(WKBArray::<i64>::try_from((array, field))?),
SerializedType::WKT => Arc::new(WKTArray::<i32>::try_from((array, field))?),
SerializedType::LargeWKT => Arc::new(WKTArray::<i64>::try_from((array, field))?),
};

Ok(Self(geo_arr))
Expand Down
2 changes: 2 additions & 0 deletions src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub use multipolygon::{MultiPolygonArray, MultiPolygonBuilder, MultiPolygonCapac
pub use point::{PointArray, PointBuilder};
pub use polygon::{PolygonArray, PolygonBuilder, PolygonCapacity};
pub use rect::{RectArray, RectBuilder};
pub use wkt::WKTArray;

pub use crate::trait_::{ArrayBase, NativeArray, SerializedArray};

Expand All @@ -40,6 +41,7 @@ pub(crate) mod point;
pub(crate) mod polygon;
pub(crate) mod rect;
pub(crate) mod util;
pub(crate) mod wkt;

use std::sync::Arc;

Expand Down
Loading

0 comments on commit eddf482

Please sign in to comment.