diff --git a/src/algorithm/native/type_id.rs b/src/algorithm/native/type_id.rs index f7bc2b2a5..3c1c0eaa2 100644 --- a/src/algorithm/native/type_id.rs +++ b/src/algorithm/native/type_id.rs @@ -119,10 +119,9 @@ impl TypeIds for MixedGeometryArray { impl TypeIds for WKBArray { fn get_type_ids(&self) -> Int16Array { let mut output_array = Int16Builder::with_capacity(self.len()); - self.iter().for_each(|maybe_wkb| { output_array.append_option(maybe_wkb.map(|wkb| { - let type_id = u32::from(wkb.get_wkb_geometry_type()); + let type_id = u32::from(wkb.wkb_type().unwrap()); type_id.try_into().unwrap() })) }); @@ -133,7 +132,7 @@ impl TypeIds for WKBArray { fn get_unique_type_ids(&self) -> HashSet { let mut values = HashSet::new(); self.iter().flatten().for_each(|wkb| { - let type_id = u32::from(wkb.get_wkb_geometry_type()); + let type_id = u32::from(wkb.wkb_type().unwrap()); values.insert(type_id.try_into().unwrap()); }); diff --git a/src/io/wkb/common.rs b/src/io/wkb/common.rs index 3241fceaf..0c2a63ae8 100644 --- a/src/io/wkb/common.rs +++ b/src/io/wkb/common.rs @@ -1,20 +1,65 @@ +use std::io::Cursor; + +use arrow_array::OffsetSizeTrait; +use byteorder::{BigEndian, LittleEndian, ReadBytesExt}; use num_enum::{IntoPrimitive, TryFromPrimitive}; +use crate::error::{GeoArrowError, Result}; +use crate::scalar::WKB; + +/// The various WKB types supported by this crate #[derive(Clone, Copy, Debug, PartialEq, TryFromPrimitive, IntoPrimitive)] #[repr(u32)] pub enum WKBType { + /// A WKB Point Point = 1, + /// A WKB LineString LineString = 2, + /// A WKB Polygon Polygon = 3, + /// A WKB MultiPoint MultiPoint = 4, + /// A WKB MultiLineString MultiLineString = 5, + /// A WKB MultiPolygon MultiPolygon = 6, + /// A WKB GeometryCollection GeometryCollection = 7, + /// A WKB PointZ PointZ = 1001, + /// A WKB LineStringZ LineStringZ = 1002, + /// A WKB PolygonZ PolygonZ = 1003, + /// A WKB MultiPointZ MultiPointZ = 1004, + /// A WKB MultiLineStringZ MultiLineStringZ = 1005, + /// A WKB MultiPolygonZ MultiPolygonZ = 1006, + /// A WKB GeometryCollectionZ GeometryCollectionZ = 1007, } + +impl WKBType { + /// Construct from a byte slice representing a WKB geometry + pub fn from_buffer(buf: &[u8]) -> Result { + let mut reader = Cursor::new(buf); + let byte_order = reader.read_u8().unwrap(); + let geometry_type = match byte_order { + 0 => reader.read_u32::().unwrap(), + 1 => reader.read_u32::().unwrap(), + _ => panic!("Unexpected byte order."), + }; + Self::try_from_primitive(geometry_type) + .map_err(|err| GeoArrowError::General(err.to_string())) + } +} + +impl<'a, O: OffsetSizeTrait> TryFrom> for WKBType { + type Error = GeoArrowError; + + fn try_from(value: WKB<'a, O>) -> std::result::Result { + Self::from_buffer(value.as_ref()) + } +} diff --git a/src/io/wkb/reader/coord.rs b/src/io/wkb/reader/coord.rs index d736e660b..6d206edf8 100644 --- a/src/io/wkb/reader/coord.rs +++ b/src/io/wkb/reader/coord.rs @@ -36,7 +36,7 @@ pub struct WKBCoord<'a> { } impl<'a> WKBCoord<'a> { - pub fn new(buf: &'a [u8], byte_order: Endianness, offset: u64, dim: Dimension) -> Self { + pub(crate) fn new(buf: &'a [u8], byte_order: Endianness, offset: u64, dim: Dimension) -> Self { Self { buf, byte_order, diff --git a/src/io/wkb/reader/geometry.rs b/src/io/wkb/reader/geometry.rs index 28b7ffdba..893a01712 100644 --- a/src/io/wkb/reader/geometry.rs +++ b/src/io/wkb/reader/geometry.rs @@ -1,13 +1,11 @@ use std::io::Cursor; use arrow_array::OffsetSizeTrait; -use byteorder::{BigEndian, LittleEndian, ReadBytesExt}; +use byteorder::ReadBytesExt; use crate::datatypes::Dimension; -use crate::geo_traits::{ - GeometryCollectionTrait, GeometryTrait, LineStringTrait, MultiLineStringTrait, MultiPointTrait, - MultiPolygonTrait, PointTrait, PolygonTrait, -}; +use crate::error::Result; +use crate::geo_traits::GeometryTrait; use crate::io::wkb::common::WKBType; use crate::io::wkb::reader::geometry_collection::WKBGeometryCollection; use crate::io::wkb::reader::rect::WKBRect; @@ -18,97 +16,66 @@ use crate::io::wkb::reader::{ use crate::scalar::WKB; impl<'a, O: OffsetSizeTrait> WKB<'a, O> { + /// Convert this WKB scalar to a [WKBGeometry] + /// + /// This "prepares" the WKB input for constant-time coordinate access. pub fn to_wkb_object(&'a self) -> WKBGeometry<'a> { - let buf = self.arr.value(self.geom_index); + let buf = self.as_slice(); let mut reader = Cursor::new(buf); let byte_order = reader.read_u8().unwrap(); - let geometry_type_u32 = match byte_order { - 0 => reader.read_u32::().unwrap(), - 1 => reader.read_u32::().unwrap(), - _ => panic!("Unexpected byte order."), - }; - let geometry_type = WKBType::try_from(geometry_type_u32).unwrap(); + let wkb_type = self.wkb_type().unwrap(); + + use Dimension::*; - match geometry_type { - WKBType::Point => { - WKBGeometry::Point(WKBPoint::new(buf, byte_order.into(), 0, Dimension::XY)) + match wkb_type { + WKBType::Point => WKBGeometry::Point(WKBPoint::new(buf, byte_order.into(), 0, XY)), + WKBType::LineString => { + WKBGeometry::LineString(WKBLineString::new(buf, byte_order.into(), 0, XY)) } - WKBType::LineString => WKBGeometry::LineString(WKBLineString::new( - buf, - byte_order.into(), - 0, - Dimension::XY, - )), WKBType::Polygon => { - WKBGeometry::Polygon(WKBPolygon::new(buf, byte_order.into(), 0, Dimension::XY)) + WKBGeometry::Polygon(WKBPolygon::new(buf, byte_order.into(), 0, XY)) } WKBType::MultiPoint => { - WKBGeometry::MultiPoint(WKBMultiPoint::new(buf, byte_order.into(), Dimension::XY)) + WKBGeometry::MultiPoint(WKBMultiPoint::new(buf, byte_order.into(), XY)) + } + WKBType::MultiLineString => { + WKBGeometry::MultiLineString(WKBMultiLineString::new(buf, byte_order.into(), XY)) + } + WKBType::MultiPolygon => { + WKBGeometry::MultiPolygon(WKBMultiPolygon::new(buf, byte_order.into(), XY)) } - WKBType::MultiLineString => WKBGeometry::MultiLineString(WKBMultiLineString::new( - buf, - byte_order.into(), - Dimension::XY, - )), - WKBType::MultiPolygon => WKBGeometry::MultiPolygon(WKBMultiPolygon::new( - buf, - byte_order.into(), - Dimension::XY, - )), WKBType::GeometryCollection => WKBGeometry::GeometryCollection( - WKBGeometryCollection::new(buf, byte_order.into(), Dimension::XY), + WKBGeometryCollection::new(buf, byte_order.into(), XY), ), - WKBType::PointZ => { - WKBGeometry::Point(WKBPoint::new(buf, byte_order.into(), 0, Dimension::XYZ)) + WKBType::PointZ => WKBGeometry::Point(WKBPoint::new(buf, byte_order.into(), 0, XYZ)), + WKBType::LineStringZ => { + WKBGeometry::LineString(WKBLineString::new(buf, byte_order.into(), 0, XYZ)) } - WKBType::LineStringZ => WKBGeometry::LineString(WKBLineString::new( - buf, - byte_order.into(), - 0, - Dimension::XYZ, - )), WKBType::PolygonZ => { - WKBGeometry::Polygon(WKBPolygon::new(buf, byte_order.into(), 0, Dimension::XYZ)) + WKBGeometry::Polygon(WKBPolygon::new(buf, byte_order.into(), 0, XYZ)) } WKBType::MultiPointZ => { - WKBGeometry::MultiPoint(WKBMultiPoint::new(buf, byte_order.into(), Dimension::XYZ)) + WKBGeometry::MultiPoint(WKBMultiPoint::new(buf, byte_order.into(), XYZ)) + } + WKBType::MultiLineStringZ => { + WKBGeometry::MultiLineString(WKBMultiLineString::new(buf, byte_order.into(), XYZ)) + } + WKBType::MultiPolygonZ => { + WKBGeometry::MultiPolygon(WKBMultiPolygon::new(buf, byte_order.into(), XYZ)) } - WKBType::MultiLineStringZ => WKBGeometry::MultiLineString(WKBMultiLineString::new( - buf, - byte_order.into(), - Dimension::XYZ, - )), - WKBType::MultiPolygonZ => WKBGeometry::MultiPolygon(WKBMultiPolygon::new( - buf, - byte_order.into(), - Dimension::XYZ, - )), WKBType::GeometryCollectionZ => WKBGeometry::GeometryCollection( - WKBGeometryCollection::new(buf, byte_order.into(), Dimension::XYZ), + WKBGeometryCollection::new(buf, byte_order.into(), XYZ), ), } } - pub fn get_wkb_geometry_type(&'a self) -> WKBType { - let buf = self.arr.value(self.geom_index); - let mut reader = Cursor::new(buf); - let byte_order = reader.read_u8().unwrap(); - let geometry_type = match byte_order { - 0 => reader.read_u32::().unwrap(), - 1 => reader.read_u32::().unwrap(), - _ => panic!("Unexpected byte order."), - }; - geometry_type.try_into().unwrap() - } - - pub fn to_wkb_line_string(&'a self) -> WKBLineString<'a> { - match self.to_wkb_object() { - WKBGeometry::LineString(geom) => geom, - _ => panic!(), - } + /// Access the [WKBType] of this WKB object. + pub fn wkb_type(&'a self) -> Result { + WKBType::from_buffer(self.as_ref()) } } +/// Endianness #[derive(Debug, Clone, Copy)] pub enum Endianness { BigEndian, @@ -226,15 +193,6 @@ impl<'a> WKBGeometry<'a> { } } -impl<'a> From> for WKBLineString<'a> { - fn from(value: WKBGeometry<'a>) -> Self { - match value { - WKBGeometry::LineString(geom) => geom, - _ => panic!(), - } - } -} - impl<'a> GeometryTrait for WKBGeometry<'a> { type T = f64; type Point<'b> = WKBPoint<'a> where Self: 'b; @@ -247,15 +205,7 @@ impl<'a> GeometryTrait for WKBGeometry<'a> { type Rect<'b> = WKBRect<'a> where Self: 'b; fn dim(&self) -> usize { - match self { - WKBGeometry::Point(g) => PointTrait::dim(g), - WKBGeometry::LineString(g) => LineStringTrait::dim(g), - WKBGeometry::Polygon(g) => PolygonTrait::dim(g), - WKBGeometry::MultiPoint(g) => MultiPointTrait::dim(g), - WKBGeometry::MultiLineString(g) => MultiLineStringTrait::dim(g), - WKBGeometry::MultiPolygon(g) => MultiPolygonTrait::dim(g), - WKBGeometry::GeometryCollection(g) => GeometryCollectionTrait::dim(g), - } + self.dimension().size() } fn as_type( diff --git a/src/io/wkb/reader/linearring.rs b/src/io/wkb/reader/linearring.rs index 407f3c72b..17b66234e 100644 --- a/src/io/wkb/reader/linearring.rs +++ b/src/io/wkb/reader/linearring.rs @@ -9,6 +9,8 @@ use crate::io::wkb::reader::geometry::Endianness; /// A linear ring in a WKB buffer. /// +/// This has been preprocessed, so access to any internal coordinate is `O(1)`. +/// /// See page 65 of . #[derive(Debug, Clone, Copy)] pub struct WKBLinearRing<'a> { diff --git a/src/io/wkb/reader/linestring.rs b/src/io/wkb/reader/linestring.rs index 7a5238968..94e20d1ba 100644 --- a/src/io/wkb/reader/linestring.rs +++ b/src/io/wkb/reader/linestring.rs @@ -10,6 +10,9 @@ use crate::io::wkb::reader::geometry::Endianness; const HEADER_BYTES: u64 = 5; +/// A WKB LineString +/// +/// This has been preprocessed, so access to any internal coordinate is `O(1)`. #[derive(Debug, Clone, Copy)] pub struct WKBLineString<'a> { buf: &'a [u8], diff --git a/src/io/wkb/reader/multilinestring.rs b/src/io/wkb/reader/multilinestring.rs index 9c52bc102..fae193bb6 100644 --- a/src/io/wkb/reader/multilinestring.rs +++ b/src/io/wkb/reader/multilinestring.rs @@ -10,6 +10,9 @@ use crate::io::wkb::reader::linestring::WKBLineString; const HEADER_BYTES: u64 = 5; +/// A WKB MultiLineString +/// +/// This has been preprocessed, so access to any internal coordinate is `O(1)`. #[derive(Debug, Clone)] pub struct WKBMultiLineString<'a> { /// A WKBLineString object for each of the internal line strings @@ -19,7 +22,7 @@ pub struct WKBMultiLineString<'a> { } impl<'a> WKBMultiLineString<'a> { - pub fn new(buf: &'a [u8], byte_order: Endianness, dim: Dimension) -> Self { + pub(crate) fn new(buf: &'a [u8], byte_order: Endianness, dim: Dimension) -> Self { let mut reader = Cursor::new(buf); reader.set_position(HEADER_BYTES); let num_line_strings = match byte_order { diff --git a/src/io/wkb/reader/multipoint.rs b/src/io/wkb/reader/multipoint.rs index 364c90296..27607d372 100644 --- a/src/io/wkb/reader/multipoint.rs +++ b/src/io/wkb/reader/multipoint.rs @@ -8,6 +8,9 @@ use crate::geo_traits::MultiPointTrait; use crate::io::wkb::reader::geometry::Endianness; use crate::io::wkb::reader::point::WKBPoint; +/// A WKB MultiPoint +/// +/// This has been preprocessed, so access to any internal coordinate is `O(1)`. #[derive(Debug, Clone, Copy)] pub struct WKBMultiPoint<'a> { buf: &'a [u8], @@ -19,7 +22,7 @@ pub struct WKBMultiPoint<'a> { } impl<'a> WKBMultiPoint<'a> { - pub fn new(buf: &'a [u8], byte_order: Endianness, dim: Dimension) -> Self { + pub(crate) fn new(buf: &'a [u8], byte_order: Endianness, dim: Dimension) -> Self { // TODO: assert WKB type? let mut reader = Cursor::new(buf); // Set reader to after 1-byte byteOrder and 4-byte wkbType diff --git a/src/io/wkb/reader/multipolygon.rs b/src/io/wkb/reader/multipolygon.rs index 709d071e1..aab91f996 100644 --- a/src/io/wkb/reader/multipolygon.rs +++ b/src/io/wkb/reader/multipolygon.rs @@ -10,18 +10,9 @@ use crate::io::wkb::reader::polygon::WKBPolygon; const HEADER_BYTES: u64 = 5; +/// A WKB MultiPolygon #[derive(Debug, Clone)] pub struct WKBMultiPolygon<'a> { - // buf: &'a [u8], - // byte_order: Endianness, - - // /// The number of polygons in this MultiPolygon - // num_polygons: usize, - - // /// The offset in the buffer where each WKBPolygon object begins - // /// - // /// The length of this vec must match the number of polygons - // // polygon_offsets: Vec, /// A WKBPolygon object for each of the internal line strings wkb_polygons: Vec>, @@ -30,7 +21,7 @@ pub struct WKBMultiPolygon<'a> { } impl<'a> WKBMultiPolygon<'a> { - pub fn new(buf: &'a [u8], byte_order: Endianness, dim: Dimension) -> Self { + pub(crate) fn new(buf: &'a [u8], byte_order: Endianness, dim: Dimension) -> Self { let mut reader = Cursor::new(buf); reader.set_position(HEADER_BYTES); let num_polygons = match byte_order { diff --git a/src/io/wkb/reader/point.rs b/src/io/wkb/reader/point.rs index 24fbf1654..eac6acfbf 100644 --- a/src/io/wkb/reader/point.rs +++ b/src/io/wkb/reader/point.rs @@ -4,7 +4,9 @@ use crate::geo_traits::{CoordTrait, MultiPointTrait, PointTrait}; use crate::io::wkb::reader::coord::WKBCoord; use crate::io::wkb::reader::geometry::Endianness; -/// A 2D Point in WKB +/// A WKB Point. +/// +/// This has been preprocessed, so access to any internal coordinate is `O(1)`. /// /// See page 66 of . #[derive(Debug, Clone, Copy)] diff --git a/src/io/wkb/reader/polygon.rs b/src/io/wkb/reader/polygon.rs index 22a75b58c..cf4bac1b1 100644 --- a/src/io/wkb/reader/polygon.rs +++ b/src/io/wkb/reader/polygon.rs @@ -10,6 +10,9 @@ use crate::io::wkb::reader::linearring::WKBLinearRing; const WKB_POLYGON_TYPE: u32 = 3; +/// A WKB Polygon +/// +/// This has been preprocessed, so access to any internal coordinate is `O(1)`. #[derive(Debug, Clone)] pub struct WKBPolygon<'a> { wkb_linear_rings: Vec>, diff --git a/src/io/wkb/reader/type.rs b/src/io/wkb/reader/type.rs index 584531fe2..0fa42b1cf 100644 --- a/src/io/wkb/reader/type.rs +++ b/src/io/wkb/reader/type.rs @@ -174,7 +174,7 @@ pub(crate) fn infer_geometry_type<'a, O: OffsetSizeTrait>( ) -> Result { let mut available_type = AvailableTypes::new(); for geom in geoms { - match geom.get_wkb_geometry_type() { + match geom.wkb_type()? { WKBType::Point => available_type.add_point(), WKBType::LineString => available_type.add_line_string(), WKBType::Polygon => available_type.add_polygon(), diff --git a/src/scalar/binary/scalar.rs b/src/scalar/binary/scalar.rs index 3ff1dfeed..3ad967fdf 100644 --- a/src/scalar/binary/scalar.rs +++ b/src/scalar/binary/scalar.rs @@ -4,7 +4,10 @@ use arrow_array::{GenericBinaryArray, OffsetSizeTrait}; use geo::BoundingRect; use rstar::{RTreeObject, AABB}; -/// An Arrow equivalent of a Point +/// A scalar WKB reference on a WKBArray +/// +/// This is zero-cost to _create_ from a [WKBArray] but the WKB has not been preprocessed yet, so +/// it's not constant-time to access coordinate values. #[derive(Debug, Clone)] pub struct WKB<'a, O: OffsetSizeTrait> { pub(crate) arr: &'a GenericBinaryArray, @@ -12,10 +15,16 @@ pub struct WKB<'a, O: OffsetSizeTrait> { } impl<'a, O: OffsetSizeTrait> WKB<'a, O> { - pub fn new(arr: &'a GenericBinaryArray, geom_index: usize) -> Self { + /// Construct a new WKB. + pub(crate) fn new(arr: &'a GenericBinaryArray, geom_index: usize) -> Self { Self { arr, geom_index } } + /// Access the byte slice of this WKB object. + pub fn as_slice(&self) -> &[u8] { + self.arr.value(self.geom_index) + } + pub fn into_owned_inner(self) -> (GenericBinaryArray, usize) { // TODO: hard slice? // let owned = self.into_owned();