From 2e8ee7e1ba22b201d7b6aa1bd000c35b8996a031 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 11 Aug 2024 07:48:13 -0400 Subject: [PATCH] Minor: Update DateType::Date64 docs --- arrow-schema/src/datatype.rs | 38 ++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs index 38e50a17a98b..1848c8b3f76e 100644 --- a/arrow-schema/src/datatype.rs +++ b/arrow-schema/src/datatype.rs @@ -203,22 +203,34 @@ pub enum DataType { /// A signed 64-bit date representing the elapsed time since UNIX epoch (1970-01-01) /// in milliseconds. /// - /// According to the specification (see [Schema.fbs]), this should be treated as the number of - /// days, in milliseconds, since the UNIX epoch. Therefore, values must be evenly divisible by - /// `86_400_000` (the number of milliseconds in a standard day). + /// # Valid Ranges /// - /// The reason for this is for compatibility with other language's native libraries, - /// such as Java, which historically lacked a dedicated date type - /// and only supported timestamps. + /// According to the Arrow specification ([Schema.fbs]), values of Date64 + /// are treated as the number of *days*, in milliseconds, since the UNIX + /// epoch. Therefore, values of this type must be evenly divisible by + /// `86_400_000`, the number of milliseconds in a standard day. /// - /// Practically, validation that values of this type are evenly divisible by `86_400_000` is not enforced - /// by this library for performance and usability reasons. Date64 values will be treated similarly to the - /// `Timestamp(TimeUnit::Millisecond, None)` type, in that its values will be printed showing the time of - /// day if the value does not represent an exact day, and arithmetic can be done at the millisecond - /// granularity to change the time represented. + /// It is not valid to store milliseconds that do not represent an exact + /// day. The reason for this restriction is compatibility with other + /// language's native libraries (specifically Java), which historically + /// lacked a dedicated date type and only supported timestamps. /// - /// Users should prefer using Date32 to cleanly represent the number of days, or one of the Timestamp - /// variants to include time as part of the representation, depending on their use case. + /// # Validation + /// + /// This library does not validate or enforce that Date64 values are evenly + /// divisible by `86_400_000` for performance and usability reasons. Date64 + /// values are treated similarly to `Timestamp(TimeUnit::Millisecond, + /// None)`: values will be displayed with a time of day if the value does + /// not represent an exact day, and arithmetic will be done at the + /// millisecond granularity. + /// + /// # Recommendation + /// + /// Users should prefer [`DataType::Date32`] to cleanly represent the number + /// of days, or one of the Timestamp variants to include time as part of the + /// representation, depending on their use case. + /// + /// # Further Reading /// /// For more details, see [#5288](https://github.com/apache/arrow-rs/issues/5288). ///