From eddf48228f2f0fdb36d2cbab895768596a7856ba Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Thu, 26 Sep 2024 17:16:26 -0400 Subject: [PATCH] WKTArray (#799) ### Change list - Add a `WKTArray`. This fleshes out the geoarrow spec. This is useful to have as a concrete object so that we can ensure the WKT array maintains the data's CRS. TODO: - Ensure to/from WKT uses this `WKTArray` instead of a bare string array --- python/Cargo.lock | 1 - .../python/geoarrow/rust/core/_rust.pyi | 2 +- .../geoarrow-core/src/ffi/to_python/array.rs | 4 +- python/geoarrow-core/src/ffi/to_python/mod.rs | 2 +- python/geoarrow-core/src/interop/ewkb.rs | 6 +- .../src/interop/shapely/from_shapely.rs | 6 +- python/geoarrow-core/src/interop/wkb.rs | 9 +- python/geoarrow-core/src/interop/wkt.rs | 24 +- python/geoarrow-core/src/table/mod.rs | 6 +- python/pyo3-geoarrow/src/array.rs | 4 + python/pyo3-geoarrow/src/data_type.rs | 1 + src/array/binary/array.rs | 12 +- src/array/dynamic.rs | 3 + src/array/mod.rs | 2 + src/array/wkt/array.rs | 241 ++++++++++++++++++ src/array/wkt/mod.rs | 5 + src/chunked_array/mod.rs | 7 +- src/datatypes.rs | 18 ++ src/io/parquet/reader/parse.rs | 3 + src/io/shapefile/reader.rs | 43 ++++ src/io/wkt/writer/api.rs | 15 +- src/table.rs | 1 + 22 files changed, 376 insertions(+), 39 deletions(-) create mode 100644 src/array/wkt/array.rs create mode 100644 src/array/wkt/mod.rs create mode 100644 src/io/shapefile/reader.rs diff --git a/python/Cargo.lock b/python/Cargo.lock index 38aac5e9c..659a5472f 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -1046,7 +1046,6 @@ dependencies = [ "byteorder", "bytes", "chrono", - "dbase", "flatgeobuf", "futures", "geo", diff --git a/python/geoarrow-core/python/geoarrow/rust/core/_rust.pyi b/python/geoarrow-core/python/geoarrow/rust/core/_rust.pyi index 82291851b..22435bd93 100644 --- a/python/geoarrow-core/python/geoarrow/rust/core/_rust.pyi +++ b/python/geoarrow-core/python/geoarrow/rust/core/_rust.pyi @@ -278,7 +278,7 @@ class NativeType: class SerializedType: def __init__( self, - type: Literal["wkb"], + type: Literal["wkb", "wkt"], ) -> None: """Create a new SerializedType diff --git a/python/geoarrow-core/src/ffi/to_python/array.rs b/python/geoarrow-core/src/ffi/to_python/array.rs index 0380c3c05..38be74eea 100644 --- a/python/geoarrow-core/src/ffi/to_python/array.rs +++ b/python/geoarrow-core/src/ffi/to_python/array.rs @@ -8,14 +8,14 @@ use pyo3_geoarrow::{PyChunkedNativeArray, PyNativeArray}; use pyo3_geoarrow::PyGeoArrowResult; -pub fn geometry_array_to_pyobject( +pub fn native_array_to_pyobject( py: Python, arr: Arc, ) -> PyGeoArrowResult { Ok(PyNativeArray::new(NativeArrayDyn::new(arr)).into_py(py)) } -pub fn chunked_geometry_array_to_pyobject( +pub fn chunked_native_array_to_pyobject( py: Python, arr: Arc, ) -> PyGeoArrowResult { diff --git a/python/geoarrow-core/src/ffi/to_python/mod.rs b/python/geoarrow-core/src/ffi/to_python/mod.rs index 8be8da2c2..34427f51e 100644 --- a/python/geoarrow-core/src/ffi/to_python/mod.rs +++ b/python/geoarrow-core/src/ffi/to_python/mod.rs @@ -1,3 +1,3 @@ pub mod array; -pub use array::{chunked_geometry_array_to_pyobject, geometry_array_to_pyobject}; +pub use array::{chunked_native_array_to_pyobject, native_array_to_pyobject}; diff --git a/python/geoarrow-core/src/interop/ewkb.rs b/python/geoarrow-core/src/interop/ewkb.rs index e529875c8..6a4322eec 100644 --- a/python/geoarrow-core/src/interop/ewkb.rs +++ b/python/geoarrow-core/src/interop/ewkb.rs @@ -1,10 +1,11 @@ use geoarrow::array::{CoordType, WKBArray}; use geoarrow::datatypes::SerializedType; use geoarrow::io::geozero::FromEWKB; +use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3_arrow::PyArray; -use crate::ffi::to_python::geometry_array_to_pyobject; +use crate::ffi::to_python::native_array_to_pyobject; use pyo3_geoarrow::PyGeoArrowResult; #[pyfunction] @@ -20,6 +21,7 @@ pub fn from_ewkb(py: Python, input: PyArray) -> PyGeoArrowResult { let wkb_arr = WKBArray::::try_from((array.as_ref(), field.as_ref()))?; FromEWKB::from_ewkb(&wkb_arr, CoordType::Interleaved, Default::default(), false)? } + _ => return Err(PyValueError::new_err("Expected a WKB array").into()), }; - geometry_array_to_pyobject(py, geo_array) + native_array_to_pyobject(py, geo_array) } diff --git a/python/geoarrow-core/src/interop/shapely/from_shapely.rs b/python/geoarrow-core/src/interop/shapely/from_shapely.rs index d7b37fa6c..16c9e1843 100644 --- a/python/geoarrow-core/src/interop/shapely/from_shapely.rs +++ b/python/geoarrow-core/src/interop/shapely/from_shapely.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use crate::crs::CRS; -use crate::ffi::to_python::geometry_array_to_pyobject; +use crate::ffi::to_python::native_array_to_pyobject; use crate::interop::shapely::utils::import_shapely; use arrow_array::builder::{BinaryBuilder, Int32BufferBuilder}; use arrow_buffer::OffsetBuffer; @@ -120,7 +120,7 @@ pub fn from_shapely( .into()); }; - geometry_array_to_pyobject(py, arr) + native_array_to_pyobject(py, arr) } else { // TODO: support 3d WKB let wkb_arr = make_wkb_arr(py, input, metadata)?; @@ -129,7 +129,7 @@ pub fn from_shapely( NativeType::GeometryCollection(Default::default(), Dimension::XY), false, )?; - geometry_array_to_pyobject(py, geom_arr) + native_array_to_pyobject(py, geom_arr) } } diff --git a/python/geoarrow-core/src/interop/wkb.rs b/python/geoarrow-core/src/interop/wkb.rs index 572d842c4..915e79c0b 100644 --- a/python/geoarrow-core/src/interop/wkb.rs +++ b/python/geoarrow-core/src/interop/wkb.rs @@ -3,13 +3,14 @@ use geoarrow::chunked_array::{ChunkedArrayBase, ChunkedWKBArray}; use geoarrow::datatypes::SerializedType; use geoarrow::io::wkb::{to_wkb as _to_wkb, FromWKB, ToWKB}; use geoarrow::ArrayBase; +use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3_arrow::input::AnyArray; use pyo3_arrow::{PyArray, PyChunkedArray}; use pyo3_geoarrow::PyCoordType; use crate::ffi::from_python::AnyGeometryInput; -use crate::ffi::to_python::{chunked_geometry_array_to_pyobject, geometry_array_to_pyobject}; +use crate::ffi::to_python::{chunked_native_array_to_pyobject, native_array_to_pyobject}; use pyo3_geoarrow::PyGeoArrowResult; #[pyfunction] @@ -36,8 +37,9 @@ pub fn from_wkb( let wkb_arr = WKBArray::::try_from((arr.as_ref(), field.as_ref()))?; FromWKB::from_wkb(&wkb_arr, coord_type)? } + _ => return Err(PyValueError::new_err("Expected a WKB array").into()), }; - geometry_array_to_pyobject(py, geo_array) + native_array_to_pyobject(py, geo_array) } AnyArray::Stream(s) => { let (chunks, field) = s.into_chunked_array()?.into_inner(); @@ -57,8 +59,9 @@ pub fn from_wkb( .collect::, _>>()?; FromWKB::from_wkb(&ChunkedWKBArray::new(chunks), coord_type)? } + _ => return Err(PyValueError::new_err("Expected a WKB array").into()), }; - chunked_geometry_array_to_pyobject(py, geo_array) + chunked_native_array_to_pyobject(py, geo_array) } } } diff --git a/python/geoarrow-core/src/interop/wkt.rs b/python/geoarrow-core/src/interop/wkt.rs index 7d3918459..6c69ba0d1 100644 --- a/python/geoarrow-core/src/interop/wkt.rs +++ b/python/geoarrow-core/src/interop/wkt.rs @@ -7,13 +7,14 @@ use geoarrow::chunked_array::{ChunkedArray, ChunkedMixedGeometryArray}; use geoarrow::io::geozero::FromWKT; use geoarrow::io::wkt::reader::ParseWKT; use geoarrow::io::wkt::ToWKT; +use geoarrow::ArrayBase; use pyo3::exceptions::PyTypeError; use pyo3::prelude::*; use pyo3_arrow::input::AnyArray; use pyo3_arrow::{PyArray, PyChunkedArray}; use crate::ffi::from_python::AnyGeometryInput; -use crate::ffi::to_python::{chunked_geometry_array_to_pyobject, geometry_array_to_pyobject}; +use crate::ffi::to_python::{chunked_native_array_to_pyobject, native_array_to_pyobject}; use pyo3_geoarrow::{PyCoordType, PyGeoArrowResult}; #[pyfunction] @@ -40,7 +41,7 @@ pub fn from_wkt( ) } }; - geometry_array_to_pyobject(py, geo_array) + native_array_to_pyobject(py, geo_array) } AnyArray::Stream(s) => { let chunked_arr = s.into_chunked_array()?; @@ -77,7 +78,7 @@ pub fn from_wkt( ) } }; - chunked_geometry_array_to_pyobject(py, Arc::new(geo_array)) + chunked_native_array_to_pyobject(py, Arc::new(geo_array)) } } } @@ -85,13 +86,20 @@ pub fn from_wkt( #[pyfunction] pub fn to_wkt(py: Python, input: AnyGeometryInput) -> PyGeoArrowResult { match input { - AnyGeometryInput::Array(array) => return_array( - py, - PyArray::from_array_ref(Arc::new(array.as_ref().to_wkt::())), - ), + AnyGeometryInput::Array(array) => { + let wkt_arr = array.as_ref().to_wkt::(); + let field = wkt_arr.extension_field(); + return_array(py, PyArray::new(wkt_arr.into_array_ref(), field)) + } AnyGeometryInput::Chunked(array) => { let out = array.as_ref().to_wkt::(); - return_chunked_array(py, PyChunkedArray::from_array_refs(out.chunk_refs())?) + let field = out.extension_field(); + let chunks = out + .into_inner() + .into_iter() + .map(|chunk| chunk.to_array_ref()) + .collect(); + return_chunked_array(py, PyChunkedArray::try_new(chunks, field)?) } } } diff --git a/python/geoarrow-core/src/table/mod.rs b/python/geoarrow-core/src/table/mod.rs index 0c50ce066..d78072351 100644 --- a/python/geoarrow-core/src/table/mod.rs +++ b/python/geoarrow-core/src/table/mod.rs @@ -1,6 +1,6 @@ mod geo_interface; -use crate::ffi::to_python::{chunked_geometry_array_to_pyobject, geometry_array_to_pyobject}; +use crate::ffi::to_python::{chunked_native_array_to_pyobject, native_array_to_pyobject}; use crate::interop::util::pytable_to_table; use geoarrow::array::NativeArrayDyn; use geoarrow::schema::GeoSchemaExt; @@ -29,13 +29,13 @@ pub fn geometry_col(py: Python, input: AnyRecordBatch) -> PyGeoArrowResult { let table = stream.into_table()?; let table = pytable_to_table(table)?; let chunked_geom_arr = table.geometry_column(None)?; - chunked_geometry_array_to_pyobject(py, chunked_geom_arr) + chunked_native_array_to_pyobject(py, chunked_geom_arr) } } } diff --git a/python/pyo3-geoarrow/src/array.rs b/python/pyo3-geoarrow/src/array.rs index 2592e9f03..3d661de84 100644 --- a/python/pyo3-geoarrow/src/array.rs +++ b/python/pyo3-geoarrow/src/array.rs @@ -189,6 +189,10 @@ impl TryFrom for PyNativeArray { pub struct PySerializedArray(pub(crate) SerializedArrayDyn); impl PySerializedArray { + pub fn new(array: SerializedArrayDyn) -> Self { + Self(array) + } + /// Import from raw Arrow capsules pub fn from_arrow_pycapsule( schema_capsule: &Bound, diff --git a/python/pyo3-geoarrow/src/data_type.rs b/python/pyo3-geoarrow/src/data_type.rs index 53964d649..14aa26e74 100644 --- a/python/pyo3-geoarrow/src/data_type.rs +++ b/python/pyo3-geoarrow/src/data_type.rs @@ -177,6 +177,7 @@ impl PySerializedType { fn py_new(r#type: &str) -> PyResult { match r#type.to_lowercase().as_str() { "wkb" => Ok(Self(SerializedType::WKB)), + "wkt" => Ok(Self(SerializedType::WKT)), _ => Err(PyValueError::new_err("Unknown geometry type input")), } } diff --git a/src/array/binary/array.rs b/src/array/binary/array.rs index e823a6d68..656627014 100644 --- a/src/array/binary/array.rs +++ b/src/array/binary/array.rs @@ -10,6 +10,7 @@ use crate::geo_traits::GeometryTrait; use crate::scalar::WKB; // use crate::util::{owned_slice_offsets, owned_slice_validity}; use crate::trait_::{ArrayAccessor, ArrayBase, IntoArrow, SerializedArray}; +use arrow::array::AsArray; use arrow_array::OffsetSizeTrait; use arrow_array::{Array, BinaryArray, GenericBinaryArray, LargeBinaryArray}; use arrow_buffer::NullBuffer; @@ -19,10 +20,9 @@ use arrow_schema::{DataType, Field}; /// /// This is semantically equivalent to `Vec>` due to the internal validity bitmap. /// -/// This array _can_ be used directly for operations, but that will incur costly encoding to and -/// from WKB on every operation. Instead, you usually want to use the WKBArray only for -/// serialization purposes (e.g. to and from [GeoParquet](https://geoparquet.org/)) but convert to -/// strongly-typed arrays (such as the [`PointArray`][crate::array::PointArray]) for computations. +/// This array implements [`SerializedArray`], not [`NativeArray`]. This means that you'll need to +/// parse the `WKBArray` into a native-typed GeoArrow array (such as +/// [`PointArray`][crate::array::PointArray]) before using it for computations. #[derive(Debug, Clone, PartialEq)] pub struct WKBArray { pub(crate) data_type: SerializedType, @@ -240,12 +240,12 @@ impl TryFrom<&dyn Array> for WKBArray { fn try_from(value: &dyn Array) -> Result { match value.data_type() { DataType::Binary => { - let downcasted = value.as_any().downcast_ref::().unwrap(); + let downcasted = value.as_binary::(); let geom_array: WKBArray = downcasted.clone().into(); Ok(geom_array.into()) } DataType::LargeBinary => { - let downcasted = value.as_any().downcast_ref::().unwrap(); + let downcasted = value.as_binary::(); Ok(downcasted.clone().into()) } _ => Err(GeoArrowError::General(format!( diff --git a/src/array/dynamic.rs b/src/array/dynamic.rs index bc650f2a7..5159baed8 100644 --- a/src/array/dynamic.rs +++ b/src/array/dynamic.rs @@ -8,6 +8,7 @@ use arrow_schema::Field; use arrow_schema::{DataType, FieldRef}; use crate::array::metadata::ArrayMetadata; +use crate::array::wkt::WKTArray; use crate::array::CoordType; use crate::array::*; use crate::datatypes::{Dimension, NativeType, SerializedType}; @@ -213,6 +214,8 @@ impl SerializedArrayDyn { let geo_arr: SerializedArrayRef = match data_type { SerializedType::WKB => Arc::new(WKBArray::::try_from((array, field))?), SerializedType::LargeWKB => Arc::new(WKBArray::::try_from((array, field))?), + SerializedType::WKT => Arc::new(WKTArray::::try_from((array, field))?), + SerializedType::LargeWKT => Arc::new(WKTArray::::try_from((array, field))?), }; Ok(Self(geo_arr)) diff --git a/src/array/mod.rs b/src/array/mod.rs index a6834bf51..adc3e8e57 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -20,6 +20,7 @@ pub use multipolygon::{MultiPolygonArray, MultiPolygonBuilder, MultiPolygonCapac pub use point::{PointArray, PointBuilder}; pub use polygon::{PolygonArray, PolygonBuilder, PolygonCapacity}; pub use rect::{RectArray, RectBuilder}; +pub use wkt::WKTArray; pub use crate::trait_::{ArrayBase, NativeArray, SerializedArray}; @@ -40,6 +41,7 @@ pub(crate) mod point; pub(crate) mod polygon; pub(crate) mod rect; pub(crate) mod util; +pub(crate) mod wkt; use std::sync::Arc; diff --git a/src/array/wkt/array.rs b/src/array/wkt/array.rs new file mode 100644 index 000000000..4822967d7 --- /dev/null +++ b/src/array/wkt/array.rs @@ -0,0 +1,241 @@ +use std::sync::Arc; + +use arrow::array::AsArray; +use arrow_array::{Array, GenericStringArray, LargeStringArray, OffsetSizeTrait, StringArray}; +use arrow_buffer::NullBuffer; +use arrow_schema::{DataType, Field}; + +use crate::array::metadata::ArrayMetadata; +use crate::array::util::{offsets_buffer_i32_to_i64, offsets_buffer_i64_to_i32}; +use crate::array::SerializedArray; +use crate::datatypes::SerializedType; +use crate::error::{GeoArrowError, Result}; +use crate::trait_::IntoArrow; +use crate::ArrayBase; + +/// An immutable array of WKT geometries using GeoArrow's in-memory representation. +/// +/// This is semantically equivalent to `Vec>` due to the internal validity bitmap. +/// +/// This array _can_ be used directly for operations, but that will incur costly encoding to and +/// from WKT on every operation. Instead, you usually want to use the WKBArray only for +/// serialization purposes (e.g. to and from [GeoParquet](https://geoparquet.org/)) but convert to +/// strongly-typed arrays (such as the [`PointArray`][crate::array::PointArray]) for computations. +#[derive(Debug, Clone, PartialEq)] +pub struct WKTArray { + pub(crate) data_type: SerializedType, + pub(crate) metadata: Arc, + pub(crate) array: GenericStringArray, +} + +// Implement geometry accessors +impl WKTArray { + /// Create a new WKTArray from a StringArray + pub fn new(array: GenericStringArray, metadata: Arc) -> Self { + let data_type = match O::IS_LARGE { + true => SerializedType::LargeWKT, + false => SerializedType::WKT, + }; + + Self { + data_type, + metadata, + array, + } + } + + /// Returns true if the array is empty + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn into_inner(self) -> GenericStringArray { + self.array + } + + /// Slices this [`WKBArray`] in place. + /// # Panic + /// This function panics iff `offset + length > self.len()`. + #[inline] + pub fn slice(&self, offset: usize, length: usize) -> Self { + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + Self { + array: self.array.slice(offset, length), + data_type: self.data_type, + metadata: self.metadata(), + } + } + + pub fn with_metadata(&self, metadata: Arc) -> Self { + let mut arr = self.clone(); + arr.metadata = metadata; + arr + } +} + +impl ArrayBase for WKTArray { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn storage_type(&self) -> DataType { + self.data_type.to_data_type() + } + + fn extension_field(&self) -> Arc { + self.data_type + .to_field_with_metadata("geometry", true, &self.metadata) + .into() + } + + fn extension_name(&self) -> &str { + self.data_type.extension_name() + } + + fn into_array_ref(self) -> Arc { + // Recreate a BinaryArray so that we can force it to have geoarrow.wkb extension type + Arc::new(self.into_arrow()) + } + + fn to_array_ref(&self) -> arrow_array::ArrayRef { + self.clone().into_array_ref() + } + + fn metadata(&self) -> Arc { + self.metadata.clone() + } + + /// Returns the number of geometries in this array + #[inline] + fn len(&self) -> usize { + self.array.len() + } + + /// Returns the optional validity. + fn nulls(&self) -> Option<&NullBuffer> { + self.array.nulls() + } +} + +impl SerializedArray for WKTArray { + fn data_type(&self) -> SerializedType { + self.data_type + } + + fn with_metadata(&self, metadata: Arc) -> Arc { + Arc::new(self.with_metadata(metadata)) + } + + fn as_ref(&self) -> &dyn SerializedArray { + self + } +} + +impl IntoArrow for WKTArray { + type ArrowArray = GenericStringArray; + + fn into_arrow(self) -> Self::ArrowArray { + GenericStringArray::new( + self.array.offsets().clone(), + self.array.values().clone(), + self.array.nulls().cloned(), + ) + } +} + +impl From> for WKTArray { + fn from(value: GenericStringArray) -> Self { + Self::new(value, Default::default()) + } +} + +impl TryFrom<&dyn Array> for WKTArray { + type Error = GeoArrowError; + + fn try_from(value: &dyn Array) -> Result { + match value.data_type() { + DataType::Binary => { + let downcasted = value.as_any().downcast_ref::().unwrap(); + Ok(downcasted.clone().into()) + } + DataType::LargeBinary => { + let downcasted = value.as_any().downcast_ref::().unwrap(); + let geom_array: WKTArray = downcasted.clone().into(); + geom_array.try_into() + } + _ => Err(GeoArrowError::General(format!( + "Unexpected type: {:?}", + value.data_type() + ))), + } + } +} + +impl TryFrom<&dyn Array> for WKTArray { + type Error = GeoArrowError; + + fn try_from(value: &dyn Array) -> Result { + match value.data_type() { + DataType::Binary => { + let downcasted = value.as_string::(); + let geom_array: WKTArray = downcasted.clone().into(); + Ok(geom_array.into()) + } + DataType::LargeBinary => { + let downcasted = value.as_any().downcast_ref::().unwrap(); + Ok(downcasted.clone().into()) + } + _ => Err(GeoArrowError::General(format!( + "Unexpected type: {:?}", + value.data_type() + ))), + } + } +} + +impl TryFrom<(&dyn Array, &Field)> for WKTArray { + type Error = GeoArrowError; + + fn try_from((arr, field): (&dyn Array, &Field)) -> Result { + let mut arr: Self = arr.try_into()?; + arr.metadata = Arc::new(ArrayMetadata::try_from(field)?); + Ok(arr) + } +} + +impl TryFrom<(&dyn Array, &Field)> for WKTArray { + type Error = GeoArrowError; + + fn try_from((arr, field): (&dyn Array, &Field)) -> Result { + let mut arr: Self = arr.try_into()?; + arr.metadata = Arc::new(ArrayMetadata::try_from(field)?); + Ok(arr) + } +} + +impl From> for WKTArray { + fn from(value: WKTArray) -> Self { + let binary_array = value.array; + let (offsets, values, nulls) = binary_array.into_parts(); + Self::new( + LargeStringArray::new(offsets_buffer_i32_to_i64(&offsets), values, nulls), + value.metadata, + ) + } +} + +impl TryFrom> for WKTArray { + type Error = GeoArrowError; + + fn try_from(value: WKTArray) -> Result { + let binary_array = value.array; + let (offsets, values, nulls) = binary_array.into_parts(); + Ok(Self::new( + StringArray::new(offsets_buffer_i64_to_i32(&offsets)?, values, nulls), + value.metadata, + )) + } +} diff --git a/src/array/wkt/mod.rs b/src/array/wkt/mod.rs new file mode 100644 index 000000000..cc6d7c0d8 --- /dev/null +++ b/src/array/wkt/mod.rs @@ -0,0 +1,5 @@ +//! Contains the [`WKTArray`] for arrays of WKT-encoded geometries. + +pub use array::WKTArray; + +mod array; diff --git a/src/chunked_array/mod.rs b/src/chunked_array/mod.rs index b23118c40..bf0ee56ae 100644 --- a/src/chunked_array/mod.rs +++ b/src/chunked_array/mod.rs @@ -587,14 +587,17 @@ pub type ChunkedMixedGeometryArray = /// A chunked geometry collection array. pub type ChunkedGeometryCollectionArray = ChunkedGeometryArray>; -/// A chunked WKB array. -pub type ChunkedWKBArray = ChunkedGeometryArray>; /// A chunked rect array. pub type ChunkedRectArray = ChunkedGeometryArray>; /// A chunked unknown geometry array. #[allow(dead_code)] pub type ChunkedUnknownGeometryArray = ChunkedGeometryArray>; +/// A chunked WKB array. +pub type ChunkedWKBArray = ChunkedGeometryArray>; +/// A chunked WKT array. +pub type ChunkedWKTArray = ChunkedGeometryArray>; + /// A base chunked array trait that applies to all GeoArrow arrays, both "native" and "serialized" /// encodings. pub trait ChunkedArrayBase: std::fmt::Debug + Send + Sync { diff --git a/src/datatypes.rs b/src/datatypes.rs index 1ac803a0c..58c57cb46 100644 --- a/src/datatypes.rs +++ b/src/datatypes.rs @@ -162,6 +162,14 @@ pub enum SerializedType { /// Represents a [WKBArray][crate::array::WKBArray] or /// [ChunkedWKBArray][crate::chunked_array::ChunkedWKBArray] with `i64` offsets. LargeWKB, + + /// Represents a [WKTArray][crate::array::WKTArray] or + /// [ChunkedWKTArray][crate::chunked_array::ChunkedWKTArray] with `i32` offsets. + WKT, + + /// Represents a [WKTArray][crate::array::WKTArray] or + /// [ChunkedWKTArray][crate::chunked_array::ChunkedWKTArray] with `i64` offsets. + LargeWKT, } /// A type enum representing all possible GeoArrow geometry types, including both "native" and @@ -341,6 +349,13 @@ fn wkb_data_type() -> DataType { } } +fn wkt_data_type() -> DataType { + match O::IS_LARGE { + true => DataType::LargeUtf8, + false => DataType::Utf8, + } +} + pub(crate) fn rect_fields(dim: Dimension) -> Fields { let values_fields = match dim { Dimension::XY => { @@ -626,6 +641,8 @@ impl SerializedType { match self { WKB => wkb_data_type::(), LargeWKB => wkb_data_type::(), + WKT => wkt_data_type::(), + LargeWKT => wkt_data_type::(), } } @@ -634,6 +651,7 @@ impl SerializedType { use SerializedType::*; match self { WKB | LargeWKB => "geoarrow.wkb", + WKT | LargeWKT => "geoarrow.wkt", } } diff --git a/src/io/parquet/reader/parse.rs b/src/io/parquet/reader/parse.rs index cc8f91c93..e6ada5813 100644 --- a/src/io/parquet/reader/parse.rs +++ b/src/io/parquet/reader/parse.rs @@ -205,6 +205,9 @@ fn parse_array( let target_geo_data_type: NativeType = target_field.try_into()?; match t { WKB | LargeWKB => parse_wkb_column(arr, target_geo_data_type), + WKT | LargeWKT => Err(GeoArrowError::General( + "WKT input not supported in GeoParquet.".to_string(), + )), } } } diff --git a/src/io/shapefile/reader.rs b/src/io/shapefile/reader.rs new file mode 100644 index 000000000..63147f078 --- /dev/null +++ b/src/io/shapefile/reader.rs @@ -0,0 +1,43 @@ +use std::io::{Read, Seek}; +use std::path::Path; +use std::sync::Arc; + +use dbase::FieldType; +use shapefile::{Reader, ShapeReader}; + +use crate::array::PointBuilder; +use crate::table::Table; +use crate::trait_::NativeArray; + +// TODO: +// stretch goal: return a record batch reader. +pub fn read_shapefile(shp_reader: T, dbf_reader: T) { + let dbf_reader = dbase::Reader::new(dbf_reader).unwrap(); + let shp_reader = ShapeReader::new(shp_reader).unwrap(); + + let header = shp_reader.header(); + // header.shape_type + + let fields = dbf_reader.fields(); + let field = &fields[0]; + // match field.field_type() { + // FieldType:: + // } + + let mut reader = Reader::new(shp_reader, dbf_reader); + for x in reader.iter_shapes_and_records_as::() { + let (geom, record) = x.unwrap(); + // let y = Point(&geom); + PointBuilder::w + // record is a wrapper around a hash map of values + } +} + +fn read_point(reader: &mut Reader) -> GeometryArrayRef { + let mut builder = PointBuilder::<2>::with_capacity(reader.shape_count().unwrap()); + for row in reader.iter_shapes_and_records_as::() { + let (geom, _record) = row.unwrap(); + builder.push_point(Some(&Point(&geom))); + } + Arc::new(builder.finish()) +} diff --git a/src/io/wkt/writer/api.rs b/src/io/wkt/writer/api.rs index ccfa62b25..344e793cd 100644 --- a/src/io/wkt/writer/api.rs +++ b/src/io/wkt/writer/api.rs @@ -1,8 +1,8 @@ use arrow::array::GenericStringBuilder; -use arrow_array::{GenericStringArray, OffsetSizeTrait}; +use arrow_array::OffsetSizeTrait; -use crate::array::{AsChunkedNativeArray, AsNativeArray}; -use crate::chunked_array::{ChunkedArray, ChunkedNativeArray}; +use crate::array::{AsChunkedNativeArray, AsNativeArray, WKTArray}; +use crate::chunked_array::{ChunkedGeometryArray, ChunkedNativeArray}; use crate::datatypes::{Dimension, NativeType}; use crate::io::wkt::writer::scalar::{ geometry_collection_to_wkt, geometry_to_wkt, line_string_to_wkt, multi_line_string_to_wkt, @@ -18,9 +18,10 @@ pub trait ToWKT { } impl ToWKT for &dyn NativeArray { - type Output = GenericStringArray; + type Output = WKTArray; fn to_wkt(&self) -> Self::Output { + let metadata = self.metadata(); let mut output_array = GenericStringBuilder::::new(); use Dimension::*; @@ -90,12 +91,12 @@ impl ToWKT for &dyn NativeArray { Rect(XYZ) => impl_to_wkt!(as_rect, 3, rect_to_wkt), } - output_array.finish() + WKTArray::new(output_array.finish(), metadata) } } impl ToWKT for &dyn ChunkedNativeArray { - type Output = ChunkedArray>; + type Output = ChunkedGeometryArray>; fn to_wkt(&self) -> Self::Output { use Dimension::*; @@ -103,7 +104,7 @@ impl ToWKT for &dyn ChunkedNativeArray { macro_rules! impl_to_wkt { ($cast_func:ident, $dim:expr) => { - ChunkedArray::new( + ChunkedGeometryArray::new( self.$cast_func::<$dim>() .map(|chunk| chunk.as_ref().to_wkt()), ) diff --git a/src/table.rs b/src/table.rs index 47fb61aa7..1de960ffa 100644 --- a/src/table.rs +++ b/src/table.rs @@ -259,6 +259,7 @@ impl Table { .as_ref() .downcast(true) } + _ => panic!("WKT input not supported yet"), }; let new_field = new_geometry.data_type().to_field_with_metadata(