From 3d543e32e044912fd9f746e992f5065eb9d623c8 Mon Sep 17 00:00:00 2001 From: Jinser Kafka Date: Mon, 11 Mar 2024 19:26:14 +0800 Subject: [PATCH 1/6] feat: add support for 1-dim array in extended mode --- src/common/src/types/to_binary.rs | 42 +++++++++++++++++++++++++++++-- src/common/src/types/to_sql.rs | 24 ++++++++++++++++-- 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/src/common/src/types/to_binary.rs b/src/common/src/types/to_binary.rs index 5ab9fd316dcad..3b66828b7c8fe 100644 --- a/src/common/src/types/to_binary.rs +++ b/src/common/src/types/to_binary.rs @@ -15,7 +15,7 @@ use bytes::{Bytes, BytesMut}; use postgres_types::{ToSql, Type}; -use super::{DataType, DatumRef, ScalarRefImpl, F32, F64}; +use super::{DataType, DatumRef, ListRef, ScalarRefImpl, F32, F64}; use crate::error::NotImplemented; /// Error type for [`ToBinary`] trait. @@ -87,7 +87,8 @@ impl ToBinary for ScalarRefImpl<'_> { ScalarRefImpl::Time(v) => v.to_binary_with_type(ty), ScalarRefImpl::Bytea(v) => v.to_binary_with_type(ty), ScalarRefImpl::Jsonb(v) => v.to_binary_with_type(ty), - ScalarRefImpl::Struct(_) | ScalarRefImpl::List(_) => bail_not_implemented!( + ScalarRefImpl::List(v) => v.to_binary_with_type(ty), + ScalarRefImpl::Struct(_) => bail_not_implemented!( issue = 7949, "the pgwire extended-mode encoding for {ty} is unsupported" ), @@ -95,6 +96,43 @@ impl ToBinary for ScalarRefImpl<'_> { } } +impl<'a> ToBinary for ListRef<'a> { + fn to_binary_with_type(&self, ty: &DataType) -> Result> { + // safe since ListRef + let elem_ty = ty.as_list(); + + let array_ty = match elem_ty { + DataType::Boolean => Type::BOOL_ARRAY, + DataType::Int16 => Type::INT2_ARRAY, + DataType::Int32 => Type::INT4_ARRAY, + DataType::Int64 => Type::INT8_ARRAY, + DataType::Int256 => Type::NUMERIC_ARRAY, // HACK: NOT SURE + DataType::Float32 => Type::FLOAT4_ARRAY, + DataType::Float64 => Type::FLOAT8_ARRAY, + DataType::Decimal => Type::NUMERIC_ARRAY, + DataType::Date => Type::DATE_ARRAY, + DataType::Varchar => Type::VARCHAR_ARRAY, + DataType::Time => Type::TIME_ARRAY, + DataType::Timestamp => Type::TIMESTAMP_ARRAY, + DataType::Timestamptz => Type::TIMESTAMPTZ_ARRAY, + DataType::Interval => Type::INTERVAL_ARRAY, + DataType::Bytea => Type::BYTEA_ARRAY, + DataType::Jsonb => Type::JSONB_ARRAY, + DataType::Serial => Type::INT4_ARRAY, + DataType::Struct(_) | DataType::List(_) => bail_not_implemented!( + issue = 7949, + "the pgwire extended-mode encoding for lists with more than one dimension ({ty}) is unsupported" + ), + }; + + let mut buf = BytesMut::new(); + self.to_sql(&array_ty, &mut buf) + .map_err(ToBinaryError::ToSql)?; + + Ok(Some(buf.freeze())) + } +} + impl ToBinary for DatumRef<'_> { fn to_binary_with_type(&self, ty: &DataType) -> Result> { match self { diff --git a/src/common/src/types/to_sql.rs b/src/common/src/types/to_sql.rs index 71957b3bf35c8..c9b210c0e4ef9 100644 --- a/src/common/src/types/to_sql.rs +++ b/src/common/src/types/to_sql.rs @@ -15,8 +15,10 @@ use std::error::Error; use bytes::BytesMut; +use itertools::Itertools; use postgres_types::{accepts, to_sql_checked, IsNull, ToSql, Type}; +use super::ListRef; use crate::types::{JsonbRef, ScalarRefImpl}; impl ToSql for ScalarRefImpl<'_> { @@ -42,10 +44,10 @@ impl ToSql for ScalarRefImpl<'_> { ScalarRefImpl::Timestamptz(v) => v.to_sql(ty, out), ScalarRefImpl::Time(v) => v.to_sql(ty, out), ScalarRefImpl::Bytea(v) => v.to_sql(ty, out), + ScalarRefImpl::List(v) => v.to_sql(ty, out), ScalarRefImpl::Jsonb(_) // jsonbb::Value doesn't implement ToSql yet | ScalarRefImpl::Int256(_) - | ScalarRefImpl::Struct(_) - | ScalarRefImpl::List(_) => { + | ScalarRefImpl::Struct(_) => { bail_not_implemented!("the postgres encoding for {ty} is unsupported") } } @@ -74,3 +76,21 @@ impl ToSql for JsonbRef<'_> { Ok(IsNull::No) } } + +impl ToSql for ListRef<'_> { + to_sql_checked!(); + + fn accepts(ty: &Type) -> bool + where + Self: Sized, + { + matches!(ty.kind(), postgres_types::Kind::Array(_)) + } + + fn to_sql(&self, ty: &Type, out: &mut BytesMut) -> Result> + where + Self: Sized, + { + self.iter().collect_vec().to_sql(ty, out) + } +} From 44085318afd21ae265231a43383e9711d9d68290 Mon Sep 17 00:00:00 2001 From: Jinser Kafka Date: Mon, 11 Mar 2024 19:26:51 +0800 Subject: [PATCH 2/6] test: add 1d array e2e test cases for extended mode --- e2e_test/batch/types/array_1d_ty.slt.part | 217 ++++++++++++++++++++++ e2e_test/batch/types/array_ty.slt.part | 198 -------------------- e2e_test/extended_mode/type.slt | 9 +- 3 files changed, 224 insertions(+), 200 deletions(-) create mode 100644 e2e_test/batch/types/array_1d_ty.slt.part diff --git a/e2e_test/batch/types/array_1d_ty.slt.part b/e2e_test/batch/types/array_1d_ty.slt.part new file mode 100644 index 0000000000000..87470e7716617 --- /dev/null +++ b/e2e_test/batch/types/array_1d_ty.slt.part @@ -0,0 +1,217 @@ +statement ok +SET RW_IMPLICIT_FLUSH TO true; + +query T +select ARRAY['foo', 'bar', null]; +---- +{foo,bar,NULL} + +query T +select ARRAY[1,2+3,4*5+1]; +---- +{1,5,21} + +query T +select ARRAY[null]; +---- +{NULL} + +statement error +select ARRAY[]; + +query T +select ARRAY[]::int[]; +---- +{} + +statement ok +create table t (v1 int); + +statement ok +insert into t values (1), (2), (3); + +query T rowsort +select ARRAY[1, v1*2] from t; +---- +{1,2} +{1,4} +{1,6} + +query I rowsort +select * from t where Array[1,v1*2] < Array[1,6]; +---- +1 +2 + +query I rowsort +select * from t where Array[1,v1*2] <= Array[1,6]; +---- +1 +2 +3 + +query I rowsort +select * from t where Array[1,v1*2] > Array[1,2]; +---- +2 +3 + +query I rowsort +select * from t where Array[1,v1*2] >= Array[1,2]; +---- +1 +2 +3 + +query I +select * from t where Array[1,v1*2] = Array[1,6]; +---- +3 + +query I rowsort +select * from t where Array[1,v1*2] != Array[1,6]; +---- +1 +2 + +query T +select min(ARRAY[1, v1*2]) from t; +---- +{1,2} + +query T +select max(ARRAY[1, v1*2]) from t; +---- +{1,6} + +query T +select CAST(NULL as bool[]) from t; +---- +NULL +NULL +NULL + +query T +select array[false, false] from t; +---- +{f,f} +{f,f} +{f,f} + +statement ok +drop table t; + +# Comments from Xiangjin: +# In RisingWave, assume arr is of type T[][][]: +# +# arr[x] is of type T[][] +# arr[x][y] is interpreted as (arr[x])[y], and of type T[] +# arr[x0:x1] is of type T[][][] +# arr[x0:x1][y0:y1] is interpreted as (arr[x0:x1])[y0:y1], and of type T[][][] +# arr[x0:x1][y] is interpreted as (arr[x0:x1])[y], and of type T[][] +# +# In PostgreSQL, a 3d array arr would still have type T[]: +# +# arr[x] or arr[x][y] is of type T but value null due to insufficient number of indices +# arr[x][y][z] is of type T +# arr[x0:x1][y0:y1][z0:z1] is of type T[] and 3d +# arr[x0:x1] is interpreted as arr[x0:x1][:][:], and of type T[] 3d +# arr[x0:x1][y] is interpreted as arr[x0:x1][1:y][:], and of type T[] 3d + +# array range access +query T +select array[1,NULL,2][-1:134124523]; +---- +{1,NULL,2} + +query T +select (array[1,NULL,2])[3:1]; +---- +{} + +query T +select (array[1,NULL,2])[:3]; +---- +{1,NULL,2} + +query T +select (array[1,NULL,2])[:2]; +---- +{1,NULL} + +query T +select (array[1,NULL,2])[:1]; +---- +{1} + +query T +select (array[1,NULL,2])[:999]; +---- +{1,NULL,2} + +query T +select (array[1,NULL,2])[:0]; +---- +{} + +query T +select (array[1,NULL,2])[:-1]; +---- +{} + +query T +select (array[1,NULL,2])[-1:]; +---- +{1,NULL,2} + +query T +select (array[1,NULL,2])[0:]; +---- +{1,NULL,2} + +query T +select (array[1,NULL,2])[1:]; +---- +{1,NULL,2} + +query T +select (array[1,NULL,2])[2:]; +---- +{NULL,2} + +query T +select (array[1,NULL,2])[3:]; +---- +{2} + +query T +select (array[1,NULL,2])[4:]; +---- +{} + +query T +select (array[1,NULL,2])[5:]; +---- +{} + +query T +select (array[1,NULL,2])[:]; +---- +{1,NULL,2} + +query T +select (array[1,NULL,2])[5:-1]; +---- +{} + +query T +select (array[1,NULL,2])[2:1]; +---- +{} + +# index larger than int32 +statement error cannot cast type "bigint" to "integer" in Implicit context +select (array[1,NULL,2])[1:4294967296]; + +statement error cannot cast type "bigint" to "integer" in Implicit context +select (array[1,NULL,2])[4294967296:3]; diff --git a/e2e_test/batch/types/array_ty.slt.part b/e2e_test/batch/types/array_ty.slt.part index d050664bd6550..4f40b89efd388 100644 --- a/e2e_test/batch/types/array_ty.slt.part +++ b/e2e_test/batch/types/array_ty.slt.part @@ -1,106 +1,6 @@ statement ok SET RW_IMPLICIT_FLUSH TO true; -query T -select ARRAY['foo', 'bar', null]; ----- -{foo,bar,NULL} - -query T -select ARRAY[1,2+3,4*5+1]; ----- -{1,5,21} - -query T -select ARRAY[null]; ----- -{NULL} - -statement error -select ARRAY[]; - -query T -select ARRAY[]::int[]; ----- -{} - -statement ok -create table t (v1 int); - -statement ok -insert into t values (1), (2), (3); - -query T rowsort -select ARRAY[1, v1*2] from t; ----- -{1,2} -{1,4} -{1,6} - -query I rowsort -select * from t where Array[1,v1*2] < Array[1,6]; ----- -1 -2 - -query I rowsort -select * from t where Array[1,v1*2] <= Array[1,6]; ----- -1 -2 -3 - -query I rowsort -select * from t where Array[1,v1*2] > Array[1,2]; ----- -2 -3 - -query I rowsort -select * from t where Array[1,v1*2] >= Array[1,2]; ----- -1 -2 -3 - -query I -select * from t where Array[1,v1*2] = Array[1,6]; ----- -3 - -query I rowsort -select * from t where Array[1,v1*2] != Array[1,6]; ----- -1 -2 - -query T -select min(ARRAY[1, v1*2]) from t; ----- -{1,2} - -query T -select max(ARRAY[1, v1*2]) from t; ----- -{1,6} - -query T -select CAST(NULL as bool[]) from t; ----- -NULL -NULL -NULL - -query T -select array[false, false] from t; ----- -{f,f} -{f,f} -{f,f} - -statement ok -drop table t; - # Now we don't disallow arrays with unmatching dimensions in multidimensional arrays. # This is different from PostgreSQL, we may want to change this in the future. statement ok @@ -122,18 +22,6 @@ select array[array[1,2], array[3]]; # arr[x0:x1][y0:y1][z0:z1] is of type T[] and 3d # arr[x0:x1] is interpreted as arr[x0:x1][:][:], and of type T[] 3d # arr[x0:x1][y] is interpreted as arr[x0:x1][1:y][:], and of type T[] 3d - -# array range access -query T -select array[1,NULL,2][-1:134124523]; ----- -{1,NULL,2} - -query T -select (array[1,NULL,2])[3:1]; ----- -{} - query T select array[array[1],array[2],array[3]][-21432315:134124523]; ---- @@ -154,89 +42,3 @@ select array[array[1],array[2],array[3]][-21432315:134124523][1][1]; ---- 1 -query T -select (array[1,NULL,2])[:3]; ----- -{1,NULL,2} - -query T -select (array[1,NULL,2])[:2]; ----- -{1,NULL} - -query T -select (array[1,NULL,2])[:1]; ----- -{1} - -query T -select (array[1,NULL,2])[:999]; ----- -{1,NULL,2} - -query T -select (array[1,NULL,2])[:0]; ----- -{} - -query T -select (array[1,NULL,2])[:-1]; ----- -{} - -query T -select (array[1,NULL,2])[-1:]; ----- -{1,NULL,2} - -query T -select (array[1,NULL,2])[0:]; ----- -{1,NULL,2} - -query T -select (array[1,NULL,2])[1:]; ----- -{1,NULL,2} - -query T -select (array[1,NULL,2])[2:]; ----- -{NULL,2} - -query T -select (array[1,NULL,2])[3:]; ----- -{2} - -query T -select (array[1,NULL,2])[4:]; ----- -{} - -query T -select (array[1,NULL,2])[5:]; ----- -{} - -query T -select (array[1,NULL,2])[:]; ----- -{1,NULL,2} - -query T -select (array[1,NULL,2])[5:-1]; ----- -{} - -query T -select (array[1,NULL,2])[2:1]; ----- -{} - -# index larger than int32 -statement error cannot cast type "bigint" to "integer" in Implicit context -select (array[1,NULL,2])[1:4294967296]; - -statement error cannot cast type "bigint" to "integer" in Implicit context -select (array[1,NULL,2])[4294967296:3]; \ No newline at end of file diff --git a/e2e_test/extended_mode/type.slt b/e2e_test/extended_mode/type.slt index b172fcf389abc..9fa506153d6b9 100644 --- a/e2e_test/extended_mode/type.slt +++ b/e2e_test/extended_mode/type.slt @@ -3,9 +3,14 @@ statement ok SET RW_IMPLICIT_FLUSH TO true; -# RisingWave can't support list and struct now so we skip them. -# include ../batch/types/array.slt.part +# RisingWave only support single dimension array now. +include ../batch/types/array_1d_ty.slt.part + +# RisingWave can't support struct now so we skip them. # include ../batch/types/struct.slt.part + +# copy from ../batch/types/list.slt.part: +# Test cases for list don't work for now as the parser cannot recognize the cast expression. # include ../batch/types/list.slt.part # Sqllogitest can't support binary format bytea type so we skip it. From 855cd7d9037fa0574d8c9dec4c8e2c1621c9f4f8 Mon Sep 17 00:00:00 2001 From: Jinser Kafka Date: Mon, 11 Mar 2024 20:18:49 +0800 Subject: [PATCH 3/6] chore: update comment for `DataType::Int256` in `ToBinary` --- src/common/src/types/to_binary.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/common/src/types/to_binary.rs b/src/common/src/types/to_binary.rs index 3b66828b7c8fe..5f769b971a657 100644 --- a/src/common/src/types/to_binary.rs +++ b/src/common/src/types/to_binary.rs @@ -106,7 +106,6 @@ impl<'a> ToBinary for ListRef<'a> { DataType::Int16 => Type::INT2_ARRAY, DataType::Int32 => Type::INT4_ARRAY, DataType::Int64 => Type::INT8_ARRAY, - DataType::Int256 => Type::NUMERIC_ARRAY, // HACK: NOT SURE DataType::Float32 => Type::FLOAT4_ARRAY, DataType::Float64 => Type::FLOAT8_ARRAY, DataType::Decimal => Type::NUMERIC_ARRAY, @@ -119,6 +118,9 @@ impl<'a> ToBinary for ListRef<'a> { DataType::Bytea => Type::BYTEA_ARRAY, DataType::Jsonb => Type::JSONB_ARRAY, DataType::Serial => Type::INT4_ARRAY, + // INFO: `Int256` not support in `ScalarRefImpl::to_sql` + // Just let `Array[Int256]` continue `to_sql`, and the `ScalarRefImpl::to_sql` will handle the error. + DataType::Int256 => Type::NUMERIC_ARRAY, DataType::Struct(_) | DataType::List(_) => bail_not_implemented!( issue = 7949, "the pgwire extended-mode encoding for lists with more than one dimension ({ty}) is unsupported" From e0c9e6e847ada76716897e6dc7333c14797fc8f5 Mon Sep 17 00:00:00 2001 From: Jinser Kafka Date: Thu, 14 Mar 2024 17:23:48 +0800 Subject: [PATCH 4/6] fix: DataType::Serial should be Int8 in RisingWave --- src/common/src/types/to_binary.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/src/types/to_binary.rs b/src/common/src/types/to_binary.rs index 5f769b971a657..bb1a255d510a0 100644 --- a/src/common/src/types/to_binary.rs +++ b/src/common/src/types/to_binary.rs @@ -117,7 +117,7 @@ impl<'a> ToBinary for ListRef<'a> { DataType::Interval => Type::INTERVAL_ARRAY, DataType::Bytea => Type::BYTEA_ARRAY, DataType::Jsonb => Type::JSONB_ARRAY, - DataType::Serial => Type::INT4_ARRAY, + DataType::Serial => Type::INT8_ARRAY, // INFO: `Int256` not support in `ScalarRefImpl::to_sql` // Just let `Array[Int256]` continue `to_sql`, and the `ScalarRefImpl::to_sql` will handle the error. DataType::Int256 => Type::NUMERIC_ARRAY, From 323ff42e4b877897b7864e88f3e4a89d8d832ff6 Mon Sep 17 00:00:00 2001 From: Jinser Kafka Date: Mon, 25 Mar 2024 13:06:08 +0800 Subject: [PATCH 5/6] fix: only allow one-dimensional arrays to be converted to sql check it in the to_sql method of ListRef --- src/common/src/types/to_sql.rs | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/common/src/types/to_sql.rs b/src/common/src/types/to_sql.rs index c9b210c0e4ef9..2d92e84af3ee4 100644 --- a/src/common/src/types/to_sql.rs +++ b/src/common/src/types/to_sql.rs @@ -18,7 +18,7 @@ use bytes::BytesMut; use itertools::Itertools; use postgres_types::{accepts, to_sql_checked, IsNull, ToSql, Type}; -use super::ListRef; +use super::{DataType, ListRef}; use crate::types::{JsonbRef, ScalarRefImpl}; impl ToSql for ScalarRefImpl<'_> { @@ -84,13 +84,28 @@ impl ToSql for ListRef<'_> { where Self: Sized, { - matches!(ty.kind(), postgres_types::Kind::Array(_)) + use postgres_types::Kind::Array; + matches!(ty.kind(), Array(t) if !matches!(t.kind(), Array(_))) } fn to_sql(&self, ty: &Type, out: &mut BytesMut) -> Result> where Self: Sized, { - self.iter().collect_vec().to_sql(ty, out) + let dt = self.data_type(); + match dt { + DataType::List(ref li) => { + if li.is_array() { + Err(format!( + "only one-dimensional arrays can be converted to sql, got type: {}", + dt + ) + .into()) + } else { + self.iter().collect_vec().to_sql(ty, out) + } + } + _ => Err("only accepts array data types".into()), + } } } From 9c693e1b85b0fe1f8a010e16efd6aa0a9c984fdb Mon Sep 17 00:00:00 2001 From: Jinser Kafka Date: Mon, 25 Mar 2024 13:06:20 +0800 Subject: [PATCH 6/6] chore: add docstring for DataType::as_struct --- src/common/src/types/mod.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/common/src/types/mod.rs b/src/common/src/types/mod.rs index c6123c82ab6dd..9dc8de2781b4e 100644 --- a/src/common/src/types/mod.rs +++ b/src/common/src/types/mod.rs @@ -417,6 +417,11 @@ impl DataType { Self::Struct(StructType::from_parts(field_names, fields)) } + /// Returns the inner type of a struct type. + /// + /// # Panics + /// + /// Panics if the type is not a struct type. pub fn as_struct(&self) -> &StructType { match self { DataType::Struct(t) => t,