Skip to content

Commit

Permalink
Codegen'd Rust/Arrow (de)ser 3: misc fixes & improvements (#2487)
Browse files Browse the repository at this point in the history
**Best reviewed on a commit-by-commit basis; in particular the `rerun
codegen` commit is nothing but generated code.**

This PR implements miscellaneous fixes and improvements to the semantic
pass and arrow registry that came up when implementing (de)serialization
routines for the Rust SDK.

---

- #2484
- #2485 
- #2487 
- #2545
- #2546
- #2549
- #2554
- #2570
- #2571

---

<!-- This line will get updated when the PR build summary job finishes.
-->
PR Build Summary: https://build.rerun.io/pr/2487

<!-- pr-link-docs:start -->
Docs preview: https://rerun.io/preview/8976c19/docs
Examples preview: https://rerun.io/preview/8976c19/examples
<!-- pr-link-docs:end -->
  • Loading branch information
teh-cmc authored Jun 30, 2023
1 parent afeaa91 commit e6354f0
Show file tree
Hide file tree
Showing 29 changed files with 318 additions and 164 deletions.
2 changes: 1 addition & 1 deletion crates/re_log_types/src/data_row.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pub type DataRowResult<T> = ::std::result::Result<T, DataRowError>;

// ---

type DataCellVec = SmallVec<[DataCell; 4]>;
pub type DataCellVec = SmallVec<[DataCell; 4]>;

/// A row's worth of [`DataCell`]s: a collection of independent [`DataCell`]s with different
/// underlying datatypes and pointing to different parts of the heap.
Expand Down
2 changes: 1 addition & 1 deletion crates/re_log_types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ use std::sync::Arc;
pub use self::arrow_msg::ArrowMsg;
pub use self::component::{Component, DeserializableComponent, SerializableComponent};
pub use self::data_cell::{DataCell, DataCellError, DataCellInner, DataCellResult};
pub use self::data_row::{DataRow, DataRowError, DataRowResult, RowId};
pub use self::data_row::{DataCellVec, DataRow, DataRowError, DataRowResult, RowId};
pub use self::data_table::{
DataCellColumn, DataCellOptVec, DataTable, DataTableError, DataTableResult, EntityPathVec,
ErasedTimeVec, NumInstancesVec, RowIdVec, TableId, TimePointVec, COLUMN_ENTITY_PATH,
Expand Down
1 change: 1 addition & 0 deletions crates/re_types/definitions/rerun/archetypes/points2d.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ namespace rerun.archetypes;
// TODO(#2372): archetype IDL definitions must refer to objects of kind component
// TODO(#2373): `attr.rerun.component_required` implies `required`
// TODO(#2427): distinguish optional vs. recommended in language backends
// TODO(#2521): always derive debug & clone for rust backend

/// A 2D point cloud with positions and optional colors, radii, labels, etc.
table Points2D (
Expand Down
2 changes: 1 addition & 1 deletion crates/re_types/source_hash.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# This is a sha256 hash for all direct and indirect dependencies of this crate's build script.
# It can be safely removed at anytime to force the build script to run again.
# Check out build.rs to see how it's computed.
0960d9b4f6df9136f7857a7b7280a4803f3eba7a085c98aa1ce7c95dcd88539e
c10dc39333002ce5c62d9e88a7feb4fca76098528fe643012a53665a9934581e
6 changes: 5 additions & 1 deletion crates/re_types/src/components/class_id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ impl crate::Component for ClassId {
#[allow(clippy::wildcard_imports)]
fn to_arrow_datatype() -> arrow2::datatypes::DataType {
use ::arrow2::datatypes::*;
DataType::UInt16
DataType::Extension(
"rerun.components.ClassId".to_owned(),
Box::new(DataType::UInt16),
None,
)
}
}
6 changes: 5 additions & 1 deletion crates/re_types/src/components/color.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ impl crate::Component for Color {
#[allow(clippy::wildcard_imports)]
fn to_arrow_datatype() -> arrow2::datatypes::DataType {
use ::arrow2::datatypes::*;
DataType::UInt32
DataType::Extension(
"rerun.components.Color".to_owned(),
Box::new(DataType::UInt32),
None,
)
}
}
6 changes: 5 additions & 1 deletion crates/re_types/src/components/draw_order.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ impl crate::Component for DrawOrder {
#[allow(clippy::wildcard_imports)]
fn to_arrow_datatype() -> arrow2::datatypes::DataType {
use ::arrow2::datatypes::*;
DataType::Float32
DataType::Extension(
"rerun.components.DrawOrder".to_owned(),
Box::new(DataType::Float32),
None,
)
}
}
6 changes: 5 additions & 1 deletion crates/re_types/src/components/instance_key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ impl crate::Component for InstanceKey {
#[allow(clippy::wildcard_imports)]
fn to_arrow_datatype() -> arrow2::datatypes::DataType {
use ::arrow2::datatypes::*;
DataType::UInt64
DataType::Extension(
"rerun.components.InstanceKey".to_owned(),
Box::new(DataType::UInt64),
None,
)
}
}
6 changes: 5 additions & 1 deletion crates/re_types/src/components/keypoint_id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ impl crate::Component for KeypointId {
#[allow(clippy::wildcard_imports)]
fn to_arrow_datatype() -> arrow2::datatypes::DataType {
use ::arrow2::datatypes::*;
DataType::UInt16
DataType::Extension(
"rerun.components.KeypointId".to_owned(),
Box::new(DataType::UInt16),
None,
)
}
}
6 changes: 5 additions & 1 deletion crates/re_types/src/components/label.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ impl crate::Component for Label {
#[allow(clippy::wildcard_imports)]
fn to_arrow_datatype() -> arrow2::datatypes::DataType {
use ::arrow2::datatypes::*;
DataType::Utf8
DataType::Extension(
"rerun.components.Label".to_owned(),
Box::new(DataType::Utf8),
None,
)
}
}
4 changes: 2 additions & 2 deletions crates/re_types/src/components/point2d.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ impl crate::Component for Point2D {
Field {
name: "x".to_owned(),
data_type: DataType::Float32,
is_nullable: true,
is_nullable: false,
metadata: [].into(),
},
Field {
name: "y".to_owned(),
data_type: DataType::Float32,
is_nullable: true,
is_nullable: false,
metadata: [].into(),
},
])),
Expand Down
6 changes: 5 additions & 1 deletion crates/re_types/src/components/radius.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ impl crate::Component for Radius {
#[allow(clippy::wildcard_imports)]
fn to_arrow_datatype() -> arrow2::datatypes::DataType {
use ::arrow2::datatypes::*;
DataType::Float32
DataType::Extension(
"rerun.components.Radius".to_owned(),
Box::new(DataType::Float32),
None,
)
}
}
20 changes: 12 additions & 8 deletions crates/re_types/src/datatypes/vec2d.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,18 @@ impl crate::Datatype for Vec2D {
#[allow(clippy::wildcard_imports)]
fn to_arrow_datatype() -> arrow2::datatypes::DataType {
use ::arrow2::datatypes::*;
DataType::FixedSizeList(
Box::new(Field {
name: "item".to_owned(),
data_type: DataType::Float32,
is_nullable: false,
metadata: [].into(),
}),
2usize,
DataType::Extension(
"rerun.datatypes.Vec2D".to_owned(),
Box::new(DataType::FixedSizeList(
Box::new(Field {
name: "item".to_owned(),
data_type: DataType::Float32,
is_nullable: false,
metadata: [].into(),
}),
2usize,
)),
None,
)
}
}
63 changes: 42 additions & 21 deletions crates/re_types_builder/src/arrow_registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ use anyhow::Context as _;
use arrow2::datatypes::{DataType, Field, UnionMode};
use std::collections::{BTreeMap, HashMap};

use crate::{ElementType, Object, Type, ATTR_ARROW_SPARSE_UNION, ATTR_ARROW_TRANSPARENT};
use crate::{
ElementType, Object, ObjectField, Type, ATTR_ARROW_SPARSE_UNION, ATTR_ARROW_TRANSPARENT,
};

// --- Registry ---

Expand All @@ -18,7 +20,7 @@ pub struct ArrowRegistry {
impl ArrowRegistry {
/// Computes the Arrow datatype for the specified object and stores it in the registry, to be
/// resolved later on.
pub fn register(&mut self, obj: &Object) {
pub fn register(&mut self, obj: &mut Object) {
let (fqname, datatype) = (obj.fqname.clone(), self.arrow_datatype_from_object(obj));
self.registry.insert(fqname, datatype);
}
Expand Down Expand Up @@ -48,7 +50,7 @@ impl ArrowRegistry {

// ---

fn arrow_datatype_from_object(&self, obj: &Object) -> LazyDatatype {
fn arrow_datatype_from_object(&mut self, obj: &mut Object) -> LazyDatatype {
let is_struct = obj.is_struct();
let is_transparent = obj.try_get_attr::<String>(ATTR_ARROW_TRANSPARENT).is_some();
let num_fields = obj.fields.len();
Expand All @@ -59,18 +61,25 @@ impl ArrowRegistry {
obj.fqname,
);

if is_transparent {
self.arrow_datatype_from_type(&obj.fields[0].typ)
let datatype = if is_transparent {
LazyDatatype::Extension(
obj.fqname.clone(),
Box::new(
self.arrow_datatype_from_type(obj.fields[0].typ.clone(), &mut obj.fields[0]),
),
None,
)
} else if is_struct {
LazyDatatype::Extension(
obj.fqname.clone(),
Box::new(LazyDatatype::Struct(
obj.fields
.iter()
.map(|field| LazyField {
name: field.name.clone(),
datatype: self.arrow_datatype_from_type(&field.typ),
is_nullable: field.required,
.iter_mut()
.map(|obj_field| LazyField {
name: obj_field.name.clone(),
datatype: self
.arrow_datatype_from_type(obj_field.typ.clone(), obj_field),
is_nullable: obj_field.is_nullable,
metadata: Default::default(),
})
.collect(),
Expand All @@ -85,10 +94,10 @@ impl ArrowRegistry {
obj.fqname.clone(),
Box::new(LazyDatatype::Union(
obj.fields
.iter()
.iter_mut()
.map(|field| LazyField {
name: field.name.clone(),
datatype: self.arrow_datatype_from_type(&field.typ),
datatype: self.arrow_datatype_from_type(field.typ.clone(), field),
is_nullable: false,
metadata: Default::default(),
})
Expand All @@ -102,11 +111,18 @@ impl ArrowRegistry {
)),
None,
)
};

// NOTE: Arrow-transparent objects by definition don't have a datatype of their own.
if !is_transparent {
obj.datatype = datatype.clone().into();
}

datatype
}

fn arrow_datatype_from_type(&self, typ: &Type) -> LazyDatatype {
match typ {
fn arrow_datatype_from_type(&mut self, typ: Type, field: &mut ObjectField) -> LazyDatatype {
let datatype = match typ {
Type::UInt8 => LazyDatatype::UInt8,
Type::UInt16 => LazyDatatype::UInt16,
Type::UInt32 => LazyDatatype::UInt32,
Expand All @@ -124,22 +140,27 @@ impl ArrowRegistry {
Box::new(LazyField {
name: "item".into(),
datatype: self.arrow_datatype_from_element_type(elem_type),
is_nullable: false,
is_nullable: field.is_nullable,
metadata: Default::default(),
}),
*length,
length,
),
Type::Vector { elem_type } => LazyDatatype::List(Box::new(LazyField {
name: "item".into(),
datatype: self.arrow_datatype_from_element_type(elem_type),
is_nullable: false,
is_nullable: field.is_nullable,
metadata: Default::default(),
})),
Type::Object(fqname) => LazyDatatype::Unresolved(fqname.clone()),
}
Type::Object(fqname) => LazyDatatype::Unresolved(fqname),
};

field.datatype = datatype.clone().into();
self.registry.insert(field.fqname.clone(), datatype.clone());

datatype
}

fn arrow_datatype_from_element_type(&self, typ: &ElementType) -> LazyDatatype {
fn arrow_datatype_from_element_type(&self, typ: ElementType) -> LazyDatatype {
_ = self;
match typ {
ElementType::UInt8 => LazyDatatype::UInt8,
Expand All @@ -155,7 +176,7 @@ impl ArrowRegistry {
ElementType::Float32 => LazyDatatype::Float32,
ElementType::Float64 => LazyDatatype::Float64,
ElementType::String => LazyDatatype::Utf8,
ElementType::Object(fqname) => LazyDatatype::Unresolved(fqname.clone()),
ElementType::Object(fqname) => LazyDatatype::Unresolved(fqname),
}
}
}
Expand Down
Loading

0 comments on commit e6354f0

Please sign in to comment.