Skip to content

Commit

Permalink
AVRO-3892: [Rust] Support to resolve fixed from bytes and deserialize…
Browse files Browse the repository at this point in the history
… bytes in deserialize_any (#2567)

* support to resolve fixed from bytes

* support to deserialize bytes, fixed, decimal.

* fix clippy

* AVRO-3892: Rename test method

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>

* AVRO-3892: [Rust] Add unit tests for deserializing &str/String from Value::Bytes

The tests are not really related to AVRO-3892. They do not cover the new
changes in deserialize_any()

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>

* add unit test for deserialize bytes from decimal and uuid

* add more test

---------

Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
Co-authored-by: ZENOTME <st810918843@gmail.com>
Co-authored-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
(cherry picked from commit 8073145)
  • Loading branch information
ZENOTME authored and martin-g committed Oct 26, 2023
1 parent c521b6c commit f8bfd43
Show file tree
Hide file tree
Showing 5 changed files with 174 additions and 6 deletions.
2 changes: 1 addition & 1 deletion lang/rust/avro/examples/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ fn benchmark(

let start = Instant::now();
let mut writer = Writer::new(schema, BufWriter::new(Vec::new()));
writer.extend(records.into_iter())?;
writer.extend(records)?;

let duration = Instant::now().duration_since(start);
durations.push(duration);
Expand Down
110 changes: 108 additions & 2 deletions lang/rust/avro/src/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,8 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> {
Value::String(ref s) => visitor.visit_borrowed_str(s),
Value::Uuid(uuid) => visitor.visit_str(&uuid.to_string()),
Value::Map(ref items) => visitor.visit_map(MapDeserializer::new(items)),
Value::Bytes(ref bytes) | Value::Fixed(_, ref bytes) => visitor.visit_bytes(bytes),
Value::Decimal(ref d) => visitor.visit_bytes(&d.to_vec()?),
_ => Err(de::Error::custom(format!(
"unsupported union: {:?}",
self.input
Expand All @@ -276,6 +278,8 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> {
Value::String(ref s) => visitor.visit_borrowed_str(s),
Value::Uuid(uuid) => visitor.visit_str(&uuid.to_string()),
Value::Map(ref items) => visitor.visit_map(MapDeserializer::new(items)),
Value::Bytes(ref bytes) | Value::Fixed(_, ref bytes) => visitor.visit_bytes(bytes),
Value::Decimal(ref d) => visitor.visit_bytes(&d.to_vec()?),
value => Err(de::Error::custom(format!(
"incorrect value of type: {:?}",
crate::schema::SchemaKind::from(value)
Expand Down Expand Up @@ -350,8 +354,9 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> {
Value::String(ref s) => visitor.visit_bytes(s.as_bytes()),
Value::Bytes(ref bytes) | Value::Fixed(_, ref bytes) => visitor.visit_bytes(bytes),
Value::Uuid(ref u) => visitor.visit_bytes(u.as_bytes()),
Value::Decimal(ref d) => visitor.visit_bytes(&d.to_vec()?),
_ => Err(de::Error::custom(format!(
"Expected a String|Bytes|Fixed|Uuid, but got {:?}",
"Expected a String|Bytes|Fixed|Uuid|Decimal, but got {:?}",
self.input
))),
}
Expand Down Expand Up @@ -654,6 +659,7 @@ pub fn from_value<'de, D: Deserialize<'de>>(value: &'de Value) -> Result<D, Erro

#[cfg(test)]
mod tests {
use num_bigint::BigInt;
use pretty_assertions::assert_eq;
use serde::Serialize;
use serial_test::serial;
Expand All @@ -662,6 +668,8 @@ mod tests {

use apache_avro_test_helper::TestResult;

use crate::Decimal;

use super::*;

#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
Expand Down Expand Up @@ -1099,7 +1107,7 @@ mod tests {
fn test_from_value_uuid_str() -> TestResult {
let raw_value = "9ec535ff-3e2a-45bd-91d3-0a01321b5a49";
let value = Value::Uuid(Uuid::parse_str(raw_value)?);
let result = crate::from_value::<Uuid>(&value)?;
let result = from_value::<Uuid>(&value)?;
assert_eq!(result.to_string(), raw_value);
Ok(())
}
Expand Down Expand Up @@ -1315,4 +1323,102 @@ mod tests {

Ok(())
}

#[test]
fn test_avro_3892_deserialize_string_from_bytes() -> TestResult {
let raw_value = vec![1, 2, 3, 4];
let value = Value::Bytes(raw_value.clone());
let result = from_value::<String>(&value)?;
assert_eq!(result, String::from_utf8(raw_value)?);
Ok(())
}

#[test]
fn test_avro_3892_deserialize_str_from_bytes() -> TestResult {
let raw_value = &[1, 2, 3, 4];
let value = Value::Bytes(raw_value.to_vec());
let result = from_value::<&str>(&value)?;
assert_eq!(result, std::str::from_utf8(raw_value)?);
Ok(())
}

#[derive(Debug)]
struct Bytes(Vec<u8>);

impl<'de> Deserialize<'de> for Bytes {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
struct BytesVisitor;
impl<'de> serde::de::Visitor<'de> for BytesVisitor {
type Value = Bytes;

fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("a byte array")
}

fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(Bytes(v.to_vec()))
}
}
deserializer.deserialize_bytes(BytesVisitor)
}
}

#[test]
fn test_avro_3892_deserialize_bytes_from_decimal() -> TestResult {
let expected_bytes = BigInt::from(123456789).to_signed_bytes_be();
let value = Value::Decimal(Decimal::from(&expected_bytes));
let raw_bytes = from_value::<Bytes>(&value)?;
assert_eq!(raw_bytes.0, expected_bytes);

let value = Value::Union(0, Box::new(Value::Decimal(Decimal::from(&expected_bytes))));
let raw_bytes = from_value::<Option<Bytes>>(&value)?;
assert_eq!(raw_bytes.unwrap().0, expected_bytes);
Ok(())
}

#[test]
fn test_avro_3892_deserialize_bytes_from_uuid() -> TestResult {
let uuid_str = "10101010-2020-2020-2020-101010101010";
let expected_bytes = Uuid::parse_str(uuid_str)?.as_bytes().to_vec();
let value = Value::Uuid(Uuid::parse_str(uuid_str)?);
let raw_bytes = from_value::<Bytes>(&value)?;
assert_eq!(raw_bytes.0, expected_bytes);

let value = Value::Union(0, Box::new(Value::Uuid(Uuid::parse_str(uuid_str)?)));
let raw_bytes = from_value::<Option<Bytes>>(&value)?;
assert_eq!(raw_bytes.unwrap().0, expected_bytes);
Ok(())
}

#[test]
fn test_avro_3892_deserialize_bytes_from_fixed() -> TestResult {
let expected_bytes = vec![1, 2, 3, 4];
let value = Value::Fixed(4, expected_bytes.clone());
let raw_bytes = from_value::<Bytes>(&value)?;
assert_eq!(raw_bytes.0, expected_bytes);

let value = Value::Union(0, Box::new(Value::Fixed(4, expected_bytes.clone())));
let raw_bytes = from_value::<Option<Bytes>>(&value)?;
assert_eq!(raw_bytes.unwrap().0, expected_bytes);
Ok(())
}

#[test]
fn test_avro_3892_deserialize_bytes_from_bytes() -> TestResult {
let expected_bytes = vec![1, 2, 3, 4];
let value = Value::Bytes(expected_bytes.clone());
let raw_bytes = from_value::<Bytes>(&value)?;
assert_eq!(raw_bytes.0, expected_bytes);

let value = Value::Union(0, Box::new(Value::Bytes(expected_bytes.clone())));
let raw_bytes = from_value::<Option<Bytes>>(&value)?;
assert_eq!(raw_bytes.unwrap().0, expected_bytes);
Ok(())
}
}
2 changes: 1 addition & 1 deletion lang/rust/avro/src/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ impl Decimal {
self.len
}

fn to_vec(&self) -> AvroResult<Vec<u8>> {
pub(crate) fn to_vec(&self) -> AvroResult<Vec<u8>> {
self.to_sign_extended_bytes_with_len(self.len)
}

Expand Down
62 changes: 62 additions & 0 deletions lang/rust/avro/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,13 @@ impl Value {
}
}
Value::String(s) => Ok(Value::Fixed(s.len(), s.into_bytes())),
Value::Bytes(s) => {
if s.len() == size {
Ok(Value::Fixed(size, s))
} else {
Err(Error::CompareFixedSizes { size, n: s.len() })
}
}
other => Err(Error::GetStringForFixed(other.into())),
}
}
Expand Down Expand Up @@ -2925,4 +2932,59 @@ Field with name '"b"' is not a member of the map items"#,

Ok(())
}

#[test]
fn test_avro_3779_bigdecimal_resolving() -> TestResult {
let schema =
r#"{"name": "bigDecimalSchema", "logicalType": "big-decimal", "type": "bytes" }"#;

let avro_value = Value::BigDecimal(BigDecimal::from(12345678u32));
let schema = Schema::parse_str(schema)?;
let resolve_result: AvroResult<Value> = avro_value.resolve(&schema);
assert!(
resolve_result.is_ok(),
"resolve result must be ok, got: {resolve_result:?}"
);

Ok(())
}

#[test]
fn test_avro_3892_resolve_fixed_from_bytes() -> TestResult {
let value = Value::Bytes(vec![97, 98, 99]);
assert_eq!(
value.resolve(&Schema::Fixed(FixedSchema {
name: "test".into(),
aliases: None,
doc: None,
size: 3,
attributes: Default::default()
}))?,
Value::Fixed(3, vec![97, 98, 99])
);

let value = Value::Bytes(vec![97, 99]);
assert!(value
.resolve(&Schema::Fixed(FixedSchema {
name: "test".into(),
aliases: None,
doc: None,
size: 3,
attributes: Default::default()
}))
.is_err(),);

let value = Value::Bytes(vec![97, 98, 99, 100]);
assert!(value
.resolve(&Schema::Fixed(FixedSchema {
name: "test".into(),
aliases: None,
doc: None,
size: 3,
attributes: Default::default()
}))
.is_err(),);

Ok(())
}
}
4 changes: 2 additions & 2 deletions lang/rust/avro/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -895,7 +895,7 @@ mod tests {
let record_copy = record.clone();
let records = vec![record, record_copy];

let n1 = writer.extend(records.into_iter())?;
let n1 = writer.extend(records)?;
let n2 = writer.flush()?;
let result = writer.into_inner()?;

Expand Down Expand Up @@ -970,7 +970,7 @@ mod tests {
let record_copy = record.clone();
let records = vec![record, record_copy];

let n1 = writer.extend_ser(records.into_iter())?;
let n1 = writer.extend_ser(records)?;
let n2 = writer.flush()?;
let result = writer.into_inner()?;

Expand Down

0 comments on commit f8bfd43

Please sign in to comment.