Skip to content

Commit

Permalink
Support alternate formats for unparsing datetime to timestamp and…
Browse files Browse the repository at this point in the history
… `interval` (apache#11466)

* Unparser rule for datatime cast (apache#10)

* use timestamp as the identifier for date64

* rename

* implement CustomDialectBuilder

* fix

* dialect with interval style (apache#11)

---------

Co-authored-by: Phillip LeBlanc <phillip@leblanc.tech>

* fmt

* clippy

* doc

* Update datafusion/sql/src/unparser/expr.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* update the doc for CustomDialectBuilder

* fix doc test

---------

Co-authored-by: Phillip LeBlanc <phillip@leblanc.tech>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
  • Loading branch information
3 people authored and xinlifoobar committed Jul 17, 2024
1 parent 78dd57c commit b982014
Show file tree
Hide file tree
Showing 3 changed files with 420 additions and 65 deletions.
6 changes: 4 additions & 2 deletions datafusion-examples/examples/plan_to_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use datafusion::error::Result;

use datafusion::prelude::*;
use datafusion::sql::unparser::expr_to_sql;
use datafusion_sql::unparser::dialect::CustomDialect;
use datafusion_sql::unparser::dialect::CustomDialectBuilder;
use datafusion_sql::unparser::{plan_to_sql, Unparser};

/// This example demonstrates the programmatic construction of SQL strings using
Expand Down Expand Up @@ -80,7 +80,9 @@ fn simple_expr_to_pretty_sql_demo() -> Result<()> {
/// using a custom dialect and an explicit unparser
fn simple_expr_to_sql_demo_escape_mysql_style() -> Result<()> {
let expr = col("a").lt(lit(5)).or(col("a").eq(lit(8)));
let dialect = CustomDialect::new(Some('`'));
let dialect = CustomDialectBuilder::new()
.with_identifier_quote_style('`')
.build();
let unparser = Unparser::new(&dialect);
let sql = unparser.expr_to_sql(&expr)?.to_string();
assert_eq!(sql, r#"((`a` < 5) OR (`a` = 8))"#);
Expand Down
140 changes: 140 additions & 0 deletions datafusion/sql/src/unparser/dialect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,33 @@ pub trait Dialect {
fn supports_nulls_first_in_sort(&self) -> bool {
true
}

// Does the dialect use TIMESTAMP to represent Date64 rather than DATETIME?
// E.g. Trino, Athena and Dremio does not have DATETIME data type
fn use_timestamp_for_date64(&self) -> bool {
false
}

fn interval_style(&self) -> IntervalStyle {
IntervalStyle::PostgresVerbose
}
}

/// `IntervalStyle` to use for unparsing
///
/// <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT>
/// different DBMS follows different standards, popular ones are:
/// postgres_verbose: '2 years 15 months 100 weeks 99 hours 123456789 milliseconds' which is
/// compatible with arrow display format, as well as duckdb
/// sql standard format is '1-2' for year-month, or '1 10:10:10.123456' for day-time
/// <https://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt>
#[derive(Clone, Copy)]
pub enum IntervalStyle {
PostgresVerbose,
SQLStandard,
MySQL,
}

pub struct DefaultDialect {}

impl Dialect for DefaultDialect {
Expand All @@ -57,6 +83,10 @@ impl Dialect for PostgreSqlDialect {
fn identifier_quote_style(&self, _: &str) -> Option<char> {
Some('"')
}

fn interval_style(&self) -> IntervalStyle {
IntervalStyle::PostgresVerbose
}
}

pub struct MySqlDialect {}
Expand All @@ -69,6 +99,10 @@ impl Dialect for MySqlDialect {
fn supports_nulls_first_in_sort(&self) -> bool {
false
}

fn interval_style(&self) -> IntervalStyle {
IntervalStyle::MySQL
}
}

pub struct SqliteDialect {}
Expand All @@ -81,12 +115,29 @@ impl Dialect for SqliteDialect {

pub struct CustomDialect {
identifier_quote_style: Option<char>,
supports_nulls_first_in_sort: bool,
use_timestamp_for_date64: bool,
interval_style: IntervalStyle,
}

impl Default for CustomDialect {
fn default() -> Self {
Self {
identifier_quote_style: None,
supports_nulls_first_in_sort: true,
use_timestamp_for_date64: false,
interval_style: IntervalStyle::SQLStandard,
}
}
}

impl CustomDialect {
// create a CustomDialect
#[deprecated(note = "please use `CustomDialectBuilder` instead")]
pub fn new(identifier_quote_style: Option<char>) -> Self {
Self {
identifier_quote_style,
..Default::default()
}
}
}
Expand All @@ -95,4 +146,93 @@ impl Dialect for CustomDialect {
fn identifier_quote_style(&self, _: &str) -> Option<char> {
self.identifier_quote_style
}

fn supports_nulls_first_in_sort(&self) -> bool {
self.supports_nulls_first_in_sort
}

fn use_timestamp_for_date64(&self) -> bool {
self.use_timestamp_for_date64
}

fn interval_style(&self) -> IntervalStyle {
self.interval_style
}
}

/// `CustomDialectBuilder` to build `CustomDialect` using builder pattern
///
///
/// # Examples
///
/// Building a custom dialect with all default options set in CustomDialectBuilder::new()
/// but with `use_timestamp_for_date64` overridden to `true`
///
/// ```
/// use datafusion_sql::unparser::dialect::CustomDialectBuilder;
/// let dialect = CustomDialectBuilder::new()
/// .with_use_timestamp_for_date64(true)
/// .build();
/// ```
pub struct CustomDialectBuilder {
identifier_quote_style: Option<char>,
supports_nulls_first_in_sort: bool,
use_timestamp_for_date64: bool,
interval_style: IntervalStyle,
}

impl Default for CustomDialectBuilder {
fn default() -> Self {
Self::new()
}
}

impl CustomDialectBuilder {
pub fn new() -> Self {
Self {
identifier_quote_style: None,
supports_nulls_first_in_sort: true,
use_timestamp_for_date64: false,
interval_style: IntervalStyle::PostgresVerbose,
}
}

pub fn build(self) -> CustomDialect {
CustomDialect {
identifier_quote_style: self.identifier_quote_style,
supports_nulls_first_in_sort: self.supports_nulls_first_in_sort,
use_timestamp_for_date64: self.use_timestamp_for_date64,
interval_style: self.interval_style,
}
}

/// Customize the dialect with a specific identifier quote style, e.g. '`', '"'
pub fn with_identifier_quote_style(mut self, identifier_quote_style: char) -> Self {
self.identifier_quote_style = Some(identifier_quote_style);
self
}

/// Customize the dialect to supports `NULLS FIRST` in `ORDER BY` clauses
pub fn with_supports_nulls_first_in_sort(
mut self,
supports_nulls_first_in_sort: bool,
) -> Self {
self.supports_nulls_first_in_sort = supports_nulls_first_in_sort;
self
}

/// Customize the dialect to uses TIMESTAMP when casting Date64 rather than DATETIME
pub fn with_use_timestamp_for_date64(
mut self,
use_timestamp_for_date64: bool,
) -> Self {
self.use_timestamp_for_date64 = use_timestamp_for_date64;
self
}

/// Customize the dialect with a specific interval style listed in `IntervalStyle`
pub fn with_interval_style(mut self, interval_style: IntervalStyle) -> Self {
self.interval_style = interval_style;
self
}
}
Loading

0 comments on commit b982014

Please sign in to comment.