From e030aa06a1d7b2623fdae3f961018b06bb63dcb9 Mon Sep 17 00:00:00 2001 From: Sebastian Walz Date: Thu, 10 Oct 2024 15:15:30 +0200 Subject: [PATCH] feat(serde_with): make `BytesOrString` adjustable --- serde_with/src/de/impls.rs | 5 ++- serde_with/src/formats.rs | 53 +++++++++++++++++++++++++++ serde_with/src/lib.rs | 16 ++++++++- serde_with/src/schemars_0_8.rs | 5 ++- serde_with/src/ser/impls.rs | 7 ++-- serde_with/tests/serde_as/lib.rs | 62 ++++++++++++++++++++++++++++++-- 6 files changed, 140 insertions(+), 8 deletions(-) diff --git a/serde_with/src/de/impls.rs b/serde_with/src/de/impls.rs index 93b370d5..37f34980 100644 --- a/serde_with/src/de/impls.rs +++ b/serde_with/src/de/impls.rs @@ -994,7 +994,10 @@ where } #[cfg(feature = "alloc")] -impl<'de> DeserializeAs<'de, Vec> for BytesOrString { +impl<'de, PREFERENCE> DeserializeAs<'de, Vec> for BytesOrString +where + PREFERENCE: formats::TypePreference, +{ fn deserialize_as(deserializer: D) -> Result, D::Error> where D: Deserializer<'de>, diff --git a/serde_with/src/formats.rs b/serde_with/src/formats.rs index 3171a2bc..ffcf2c0c 100644 --- a/serde_with/src/formats.rs +++ b/serde_with/src/formats.rs @@ -80,6 +80,59 @@ create_format!( Unpadded ); +/// When serializing a value of a type, +/// that allows multiple types during deserialization, +/// prefer a specific type. +pub trait TypePreference: SerializeAs<[u8]> {} + +/// Prefer serializing it as ASCII string. +pub struct PreferAsciiString; + +impl TypePreference for PreferAsciiString {} + +impl SerializeAs<[u8]> for PreferAsciiString { + fn serialize_as(source: &[u8], serializer: S) -> Result + where + S: Serializer, + { + match core::str::from_utf8(source) { + Ok(text) if text.is_ascii() => serializer.serialize_str(text), + _ => serializer.serialize_bytes(source), + } + } +} + +/// Prefer serializing it as bytes. +pub struct PreferBytes; + +impl TypePreference for PreferBytes {} + +impl SerializeAs<[u8]> for PreferBytes { + fn serialize_as(source: &[u8], serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(source) + } +} + +/// Prefer serializing it as string. +pub struct PreferString; + +impl TypePreference for PreferString {} + +impl SerializeAs<[u8]> for PreferString { + fn serialize_as(source: &[u8], serializer: S) -> Result + where + S: Serializer, + { + match core::str::from_utf8(source) { + Ok(text) => serializer.serialize_str(text), + _ => serializer.serialize_bytes(source), + } + } +} + /// Specify how lenient the deserialization process should be /// /// Formats which make use of this trait should specify how it affects the deserialization behavior. diff --git a/serde_with/src/lib.rs b/serde_with/src/lib.rs index fa006643..9e26964e 100644 --- a/serde_with/src/lib.rs +++ b/serde_with/src/lib.rs @@ -811,9 +811,23 @@ pub struct DefaultOnNull(PhantomData); /// assert_eq!("✨Works!".as_bytes(), &*a.bytes_or_string); /// # } /// ``` +/// +/// Often it is prefered to serialize these bytes as string again, +/// but `BytesOrString` will always return an array of integers in the range of `0–255`. +/// This can be adjusted using its generic type parameter, +/// which can be either [`PreferBytes`] (default), [`PreferAsciiString`] or [`PreferString`]. +/// The latter two will try to convert arbitrary bytes to a `&str` first and will fallback to +/// serializing as array of bytes only if these bytes would form an invalid string. +/// `PreferAsciiString` will serialize strings containing non-ASCII characters as array as well. +/// /// [`String`]: std::string::String +/// [`PreferBytes`]: formats::PreferBytes +/// [`PreferAsciiString`]: formats::PreferString +/// [`PreferString`]: formats::PreferString #[cfg(feature = "alloc")] -pub struct BytesOrString; +pub struct BytesOrString( + PhantomData, +); /// De/Serialize Durations as number of seconds. /// diff --git a/serde_with/src/schemars_0_8.rs b/serde_with/src/schemars_0_8.rs index 0864c4eb..0f2615ea 100644 --- a/serde_with/src/schemars_0_8.rs +++ b/serde_with/src/schemars_0_8.rs @@ -430,7 +430,10 @@ impl JsonSchemaAs for Bytes { forward_schema!(Vec); } -impl JsonSchemaAs> for BytesOrString { +impl JsonSchemaAs> for BytesOrString +where + PREFERENCE: formats::TypePreference, +{ fn schema_name() -> String { "BytesOrString".into() } diff --git a/serde_with/src/ser/impls.rs b/serde_with/src/ser/impls.rs index 7fc32900..14cb3b0e 100644 --- a/serde_with/src/ser/impls.rs +++ b/serde_with/src/ser/impls.rs @@ -595,12 +595,15 @@ where } #[cfg(feature = "alloc")] -impl SerializeAs> for BytesOrString { +impl SerializeAs> for BytesOrString +where + PREFERENCE: formats::TypePreference, +{ fn serialize_as(source: &Vec, serializer: S) -> Result where S: Serializer, { - source.serialize(serializer) + PREFERENCE::serialize_as(source.as_slice(), serializer) } } diff --git a/serde_with/tests/serde_as/lib.rs b/serde_with/tests/serde_as/lib.rs index 8ec51642..e0d4c7ea 100644 --- a/serde_with/tests/serde_as/lib.rs +++ b/serde_with/tests/serde_as/lib.rs @@ -30,7 +30,7 @@ use core::{ use expect_test::expect; use serde::{Deserialize, Serialize}; use serde_with::{ - formats::{CommaSeparator, Flexible, Strict}, + formats::{CommaSeparator, Flexible, PreferAsciiString, PreferString, Strict}, serde_as, BoolFromInt, BytesOrString, DisplayFromStr, IfIsHumanReadable, Map, NoneAsEmptyString, OneOrMany, Same, Seq, StringWithSeparator, }; @@ -477,7 +477,7 @@ fn test_none_as_empty_string() { } #[test] -fn test_bytes_or_string() { +fn test_bytes_or_string_as_bytes() { #[serde_as] #[derive(Debug, Serialize, Deserialize, PartialEq)] struct S(#[serde_as(as = "BytesOrString")] Vec); @@ -491,8 +491,64 @@ fn test_bytes_or_string() { 3 ]"#]], ); - check_deserialization(S(vec![72, 101, 108, 108, 111]), r#""Hello""#); + check_deserialization(S(vec![70, 111, 111, 98, 97, 114]), r#""Foobar""#); +} + +#[test] +fn test_bytes_or_string_as_string() { + #[serde_as] + #[derive(Debug, Serialize, Deserialize, PartialEq)] + struct S(#[serde_as(as = "BytesOrString")] Vec); + + is_equal(S(vec![72, 101, 108, 108, 111]), expect![[r#""Hello""#]]); + + check_deserialization(S(vec![0xf0, 0x9f, 0xa6, 0xa6]), r#""🦦""#); + is_equal(S(vec![0xf0, 0x9f, 0xa6, 0xa6]), expect![[r#""🦦""#]]); + + is_equal( + S(vec![0, 255]), + expect![[r#" + [ + 0, + 255 + ]"#]], + ); + check_deserialization(S(vec![87, 111, 114, 108, 100]), r#""World""#); +} +#[test] +fn test_bytes_or_string_as_ascii_string() { + #[serde_as] + #[derive(Debug, Serialize, Deserialize, PartialEq)] + struct S(#[serde_as(as = "BytesOrString")] Vec); + + is_equal(S(vec![72, 101, 108, 108, 111]), expect![[r#""Hello""#]]); + + check_deserialization(S(vec![0xf0, 0x9f, 0xa6, 0xa6]), r#""🦦""#); + is_equal( + S(vec![0xf0, 0x9f, 0xa6, 0xa6]), + expect![[r#" + [ + 240, + 159, + 166, + 166 + ]"#]], + ); + + is_equal( + S(vec![0, 255]), + expect![[r#" + [ + 0, + 255 + ]"#]], + ); + check_deserialization(S(vec![87, 111, 114, 108, 100]), r#""World""#); +} + +#[test] +fn test_bytes_or_string_nested() { #[serde_as] #[derive(Debug, Serialize, Deserialize, PartialEq)] struct SVec(#[serde_as(as = "Vec")] Vec>);