From 3d543e32e044912fd9f746e992f5065eb9d623c8 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Mon, 11 Mar 2024 19:26:14 +0800
Subject: [PATCH 1/6] feat: add support for 1-dim array in extended mode

---
 src/common/src/types/to_binary.rs | 42 +++++++++++++++++++++++++++++--
 src/common/src/types/to_sql.rs    | 24 ++++++++++++++++--
 2 files changed, 62 insertions(+), 4 deletions(-)
diff --git a/src/common/src/types/to_binary.rs b/src/common/src/types/to_binary.rs
index 5ab9fd316dcad..3b66828b7c8fe 100644
--- a/src/common/src/types/to_binary.rs
+++ b/src/common/src/types/to_binary.rs
@@ -15,7 +15,7 @@
 use bytes::{Bytes, BytesMut};
 use postgres_types::{ToSql, Type};
 
-use super::{DataType, DatumRef, ScalarRefImpl, F32, F64};
+use super::{DataType, DatumRef, ListRef, ScalarRefImpl, F32, F64};
 use crate::error::NotImplemented;
 
 /// Error type for [`ToBinary`] trait.
@@ -87,7 +87,8 @@ impl ToBinary for ScalarRefImpl<'_> {
             ScalarRefImpl::Time(v) => v.to_binary_with_type(ty),
             ScalarRefImpl::Bytea(v) => v.to_binary_with_type(ty),
             ScalarRefImpl::Jsonb(v) => v.to_binary_with_type(ty),
-            ScalarRefImpl::Struct(_) | ScalarRefImpl::List(_) => bail_not_implemented!(
+            ScalarRefImpl::List(v) => v.to_binary_with_type(ty),
+            ScalarRefImpl::Struct(_) => bail_not_implemented!(
                 issue = 7949,
                 "the pgwire extended-mode encoding for {ty} is unsupported"
             ),
@@ -95,6 +96,43 @@ impl ToBinary for ScalarRefImpl<'_> {
     }
 }
 
+impl<'a> ToBinary for ListRef<'a> {
+    fn to_binary_with_type(&self, ty: &DataType) -> Result<Option<Bytes>> {
+        // safe since ListRef
+        let elem_ty = ty.as_list();
+
+        let array_ty = match elem_ty {
+            DataType::Boolean => Type::BOOL_ARRAY,
+            DataType::Int16 => Type::INT2_ARRAY,
+            DataType::Int32 => Type::INT4_ARRAY,
+            DataType::Int64 => Type::INT8_ARRAY,
+            DataType::Int256 => Type::NUMERIC_ARRAY, // HACK: NOT SURE
+            DataType::Float32 => Type::FLOAT4_ARRAY,
+            DataType::Float64 => Type::FLOAT8_ARRAY,
+            DataType::Decimal => Type::NUMERIC_ARRAY,
+            DataType::Date => Type::DATE_ARRAY,
+            DataType::Varchar => Type::VARCHAR_ARRAY,
+            DataType::Time => Type::TIME_ARRAY,
+            DataType::Timestamp => Type::TIMESTAMP_ARRAY,
+            DataType::Timestamptz => Type::TIMESTAMPTZ_ARRAY,
+            DataType::Interval => Type::INTERVAL_ARRAY,
+            DataType::Bytea => Type::BYTEA_ARRAY,
+            DataType::Jsonb => Type::JSONB_ARRAY,
+            DataType::Serial => Type::INT4_ARRAY,
+            DataType::Struct(_) | DataType::List(_)  => bail_not_implemented!(
+                issue = 7949,
+                "the pgwire extended-mode encoding for lists with more than one dimension ({ty}) is unsupported"
+            ),
+        };
+
+        let mut buf = BytesMut::new();
+        self.to_sql(&array_ty, &mut buf)
+            .map_err(ToBinaryError::ToSql)?;
+
+        Ok(Some(buf.freeze()))
+    }
+}
+
 impl ToBinary for DatumRef<'_> {
     fn to_binary_with_type(&self, ty: &DataType) -> Result<Option<Bytes>> {
         match self {
diff --git a/src/common/src/types/to_sql.rs b/src/common/src/types/to_sql.rs
index 71957b3bf35c8..c9b210c0e4ef9 100644
--- a/src/common/src/types/to_sql.rs
+++ b/src/common/src/types/to_sql.rs
@@ -15,8 +15,10 @@
 use std::error::Error;
 
 use bytes::BytesMut;
+use itertools::Itertools;
 use postgres_types::{accepts, to_sql_checked, IsNull, ToSql, Type};
 
+use super::ListRef;
 use crate::types::{JsonbRef, ScalarRefImpl};
 
 impl ToSql for ScalarRefImpl<'_> {
@@ -42,10 +44,10 @@ impl ToSql for ScalarRefImpl<'_> {
             ScalarRefImpl::Timestamptz(v) => v.to_sql(ty, out),
             ScalarRefImpl::Time(v) => v.to_sql(ty, out),
             ScalarRefImpl::Bytea(v) => v.to_sql(ty, out),
+            ScalarRefImpl::List(v) => v.to_sql(ty, out),
             ScalarRefImpl::Jsonb(_) // jsonbb::Value doesn't implement ToSql yet
             | ScalarRefImpl::Int256(_)
-            | ScalarRefImpl::Struct(_)
-            | ScalarRefImpl::List(_) => {
+            | ScalarRefImpl::Struct(_) => {
                 bail_not_implemented!("the postgres encoding for {ty} is unsupported")
             }
         }
@@ -74,3 +76,21 @@ impl ToSql for JsonbRef<'_> {
         Ok(IsNull::No)
     }
 }
+
+impl ToSql for ListRef<'_> {
+    to_sql_checked!();
+
+    fn accepts(ty: &Type) -> bool
+    where
+        Self: Sized,
+    {
+        matches!(ty.kind(), postgres_types::Kind::Array(_))
+    }
+
+    fn to_sql(&self, ty: &Type, out: &mut BytesMut) -> Result<IsNull, Box<dyn Error + Sync + Send>>
+    where
+        Self: Sized,
+    {
+        self.iter().collect_vec().to_sql(ty, out)
+    }
+}

From 44085318afd21ae265231a43383e9711d9d68290 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Mon, 11 Mar 2024 19:26:51 +0800
Subject: [PATCH 2/6] test: add 1d array e2e test cases for extended mode

---
 e2e_test/batch/types/array_1d_ty.slt.part | 217 ++++++++++++++++++++++
 e2e_test/batch/types/array_ty.slt.part    | 198 --------------------
 e2e_test/extended_mode/type.slt           |   9 +-
 3 files changed, 224 insertions(+), 200 deletions(-)
 create mode 100644 e2e_test/batch/types/array_1d_ty.slt.part

diff --git a/e2e_test/batch/types/array_1d_ty.slt.part b/e2e_test/batch/types/array_1d_ty.slt.part
new file mode 100644
index 0000000000000..87470e7716617
--- /dev/null
+++ b/e2e_test/batch/types/array_1d_ty.slt.part
@@ -0,0 +1,217 @@
+statement ok
+SET RW_IMPLICIT_FLUSH TO true;
+
+query T
+select ARRAY['foo', 'bar', null];
+----
+{foo,bar,NULL}
+
+query T
+select ARRAY[1,2+3,4*5+1];
+----
+{1,5,21}
+
+query T
+select ARRAY[null];
+----
+{NULL}
+
+statement error
+select ARRAY[];
+
+query T
+select ARRAY[]::int[];
+----
+{}
+
+statement ok
+create table t (v1 int);
+
+statement ok
+insert into t values (1), (2), (3);
+
+query T rowsort
+select ARRAY[1, v1*2] from t;
+----
+{1,2}
+{1,4}
+{1,6}
+
+query I rowsort
+select * from t where Array[1,v1*2] < Array[1,6];
+----
+1
+2
+
+query I rowsort
+select * from t where Array[1,v1*2] <= Array[1,6];
+----
+1
+2
+3
+
+query I rowsort
+select * from t where Array[1,v1*2] > Array[1,2];
+----
+2
+3
+
+query I rowsort
+select * from t where Array[1,v1*2] >= Array[1,2];
+----
+1
+2
+3
+
+query I
+select * from t where Array[1,v1*2] = Array[1,6];
+----
+3
+
+query I rowsort
+select * from t where Array[1,v1*2] != Array[1,6];
+----
+1
+2
+
+query T
+select min(ARRAY[1, v1*2]) from t;
+----
+{1,2}
+
+query T
+select max(ARRAY[1, v1*2]) from t;
+----
+{1,6}
+
+query T
+select CAST(NULL as bool[]) from t;
+----
+NULL
+NULL
+NULL
+
+query T
+select array[false, false] from t;
+----
+{f,f}
+{f,f}
+{f,f}
+
+statement ok
+drop table t;
+
+# Comments from Xiangjin:
+# In RisingWave, assume arr is of type T[][][]:
+#
+#    arr[x] is of type T[][]
+#    arr[x][y] is interpreted as (arr[x])[y], and of type T[]
+#    arr[x0:x1] is of type T[][][]
+#    arr[x0:x1][y0:y1] is interpreted as (arr[x0:x1])[y0:y1], and of type T[][][]
+#    arr[x0:x1][y] is interpreted as (arr[x0:x1])[y], and of type T[][]
+#
+# In PostgreSQL, a 3d array arr would still have type T[]:
+#
+#    arr[x] or arr[x][y] is of type T but value null due to insufficient number of indices
+#    arr[x][y][z] is of type T
+#    arr[x0:x1][y0:y1][z0:z1] is of type T[] and 3d
+#    arr[x0:x1] is interpreted as arr[x0:x1][:][:], and of type T[] 3d
+#    arr[x0:x1][y] is interpreted as arr[x0:x1][1:y][:], and of type T[] 3d
+
+# array range access
+query T
+select array[1,NULL,2][-1:134124523];
+----
+{1,NULL,2}
+
+query T
+select (array[1,NULL,2])[3:1];
+----
+{}
+
+query T
+select (array[1,NULL,2])[:3];
+----
+{1,NULL,2}
+
+query T
+select (array[1,NULL,2])[:2];
+----
+{1,NULL}
+
+query T
+select (array[1,NULL,2])[:1];
+----
+{1}
+
+query T
+select (array[1,NULL,2])[:999];
+----
+{1,NULL,2}
+
+query T
+select (array[1,NULL,2])[:0];
+----
+{}
+
+query T
+select (array[1,NULL,2])[:-1];
+----
+{}
+
+query T
+select (array[1,NULL,2])[-1:];
+----
+{1,NULL,2}
+
+query T
+select (array[1,NULL,2])[0:];
+----
+{1,NULL,2}
+
+query T
+select (array[1,NULL,2])[1:];
+----
+{1,NULL,2}
+
+query T
+select (array[1,NULL,2])[2:];
+----
+{NULL,2}
+
+query T
+select (array[1,NULL,2])[3:];
+----
+{2}
+
+query T
+select (array[1,NULL,2])[4:];
+----
+{}
+
+query T
+select (array[1,NULL,2])[5:];
+----
+{}
+
+query T
+select (array[1,NULL,2])[:];
+----
+{1,NULL,2}
+
+query T
+select (array[1,NULL,2])[5:-1];
+----
+{}
+
+query T
+select (array[1,NULL,2])[2:1];
+----
+{}
+
+# index larger than int32
+statement error cannot cast type "bigint" to "integer" in Implicit context
+select (array[1,NULL,2])[1:4294967296];
+
+statement error cannot cast type "bigint" to "integer" in Implicit context
+select (array[1,NULL,2])[4294967296:3];
diff --git a/e2e_test/batch/types/array_ty.slt.part b/e2e_test/batch/types/array_ty.slt.part
index d050664bd6550..4f40b89efd388 100644
--- a/e2e_test/batch/types/array_ty.slt.part
+++ b/e2e_test/batch/types/array_ty.slt.part
@@ -1,106 +1,6 @@
 statement ok
 SET RW_IMPLICIT_FLUSH TO true;
 
-query T
-select ARRAY['foo', 'bar', null];
-----
-{foo,bar,NULL}
-
-query T
-select ARRAY[1,2+3,4*5+1];
-----
-{1,5,21}
-
-query T
-select ARRAY[null];
-----
-{NULL}
-
-statement error
-select ARRAY[];
-
-query T
-select ARRAY[]::int[];
-----
-{}
-
-statement ok
-create table t (v1 int);
-
-statement ok
-insert into t values (1), (2), (3);
-
-query T rowsort
-select ARRAY[1, v1*2] from t;
-----
-{1,2}
-{1,4}
-{1,6}
-
-query I rowsort
-select * from t where Array[1,v1*2] < Array[1,6];
-----
-1
-2
-
-query I rowsort
-select * from t where Array[1,v1*2] <= Array[1,6];
-----
-1
-2
-3
-
-query I rowsort
-select * from t where Array[1,v1*2] > Array[1,2];
-----
-2
-3
-
-query I rowsort
-select * from t where Array[1,v1*2] >= Array[1,2];
-----
-1
-2
-3
-
-query I
-select * from t where Array[1,v1*2] = Array[1,6];
-----
-3
-
-query I rowsort
-select * from t where Array[1,v1*2] != Array[1,6];
-----
-1
-2
-
-query T
-select min(ARRAY[1, v1*2]) from t;
-----
-{1,2}
-
-query T
-select max(ARRAY[1, v1*2]) from t;
-----
-{1,6}
-
-query T
-select CAST(NULL as bool[]) from t;
-----
-NULL
-NULL
-NULL
-
-query T
-select array[false, false] from t;
-----
-{f,f}
-{f,f}
-{f,f}
-
-statement ok
-drop table t;
-
 # Now we don't disallow arrays with unmatching dimensions in multidimensional arrays.
 # This is different from PostgreSQL, we may want to change this in the future.
 statement ok
@@ -122,18 +22,6 @@ select array[array[1,2], array[3]];
 #    arr[x0:x1][y0:y1][z0:z1] is of type T[] and 3d
 #    arr[x0:x1] is interpreted as arr[x0:x1][:][:], and of type T[] 3d
 #    arr[x0:x1][y] is interpreted as arr[x0:x1][1:y][:], and of type T[] 3d
-
-# array range access
-query T
-select array[1,NULL,2][-1:134124523];
-----
-{1,NULL,2}
-
-query T
-select (array[1,NULL,2])[3:1];
-----
-{}
-
 query T
 select array[array[1],array[2],array[3]][-21432315:134124523];
 ----
@@ -154,89 +42,3 @@ select array[array[1],array[2],array[3]][-21432315:134124523][1][1];
 ----
 1
 
-query T
-select (array[1,NULL,2])[:3];
-----
-{1,NULL,2}
-
-query T
-select (array[1,NULL,2])[:2];
-----
-{1,NULL}
-
-query T
-select (array[1,NULL,2])[:1];
-----
-{1}
-
-query T
-select (array[1,NULL,2])[:999];
-----
-{1,NULL,2}
-
-query T
-select (array[1,NULL,2])[:0];
-----
-{}
-
-query T
-select (array[1,NULL,2])[:-1];
-----
-{}
-
-query T
-select (array[1,NULL,2])[-1:];
-----
-{1,NULL,2}
-
-query T
-select (array[1,NULL,2])[0:];
-----
-{1,NULL,2}
-
-query T
-select (array[1,NULL,2])[1:];
-----
-{1,NULL,2}
-
-query T
-select (array[1,NULL,2])[2:];
-----
-{NULL,2}
-
-query T
-select (array[1,NULL,2])[3:];
-----
-{2}
-
-query T
-select (array[1,NULL,2])[4:];
-----
-{}
-
-query T
-select (array[1,NULL,2])[5:];
-----
-{}
-
-query T
-select (array[1,NULL,2])[:];
-----
-{1,NULL,2}
-
-query T
-select (array[1,NULL,2])[5:-1];
-----
-{}
-
-query T
-select (array[1,NULL,2])[2:1];
-----
-{}
-
-# index larger than int32
-statement error cannot cast type "bigint" to "integer" in Implicit context
-select (array[1,NULL,2])[1:4294967296];
-
-statement error cannot cast type "bigint" to "integer" in Implicit context
-select (array[1,NULL,2])[4294967296:3];
\ No newline at end of file
diff --git a/e2e_test/extended_mode/type.slt b/e2e_test/extended_mode/type.slt
index b172fcf389abc..9fa506153d6b9 100644
--- a/e2e_test/extended_mode/type.slt
+++ b/e2e_test/extended_mode/type.slt
@@ -3,9 +3,14 @@
 statement ok
 SET RW_IMPLICIT_FLUSH TO true;
 
-# RisingWave can't support list and struct now so we skip them.
-# include ../batch/types/array.slt.part
+# RisingWave only support single dimension array now.
+include ../batch/types/array_1d_ty.slt.part
+
+# RisingWave can't support struct now so we skip them.
 # include ../batch/types/struct.slt.part
+
+# copy from ../batch/types/list.slt.part:
+# Test cases for list don't work for now as the parser cannot recognize the cast expression.
 # include ../batch/types/list.slt.part
 
 # Sqllogitest can't support binary format bytea type so we skip it.

From 855cd7d9037fa0574d8c9dec4c8e2c1621c9f4f8 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Mon, 11 Mar 2024 20:18:49 +0800
Subject: [PATCH 3/6] chore: update comment for `DataType::Int256` in
 `ToBinary`

---
 src/common/src/types/to_binary.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/common/src/types/to_binary.rs b/src/common/src/types/to_binary.rs
index 3b66828b7c8fe..5f769b971a657 100644
--- a/src/common/src/types/to_binary.rs
+++ b/src/common/src/types/to_binary.rs
@@ -106,7 +106,6 @@ impl<'a> ToBinary for ListRef<'a> {
             DataType::Int16 => Type::INT2_ARRAY,
             DataType::Int32 => Type::INT4_ARRAY,
             DataType::Int64 => Type::INT8_ARRAY,
-            DataType::Int256 => Type::NUMERIC_ARRAY, // HACK: NOT SURE
             DataType::Float32 => Type::FLOAT4_ARRAY,
             DataType::Float64 => Type::FLOAT8_ARRAY,
             DataType::Decimal => Type::NUMERIC_ARRAY,
@@ -119,6 +118,9 @@ impl<'a> ToBinary for ListRef<'a> {
             DataType::Bytea => Type::BYTEA_ARRAY,
             DataType::Jsonb => Type::JSONB_ARRAY,
             DataType::Serial => Type::INT4_ARRAY,
+            // INFO: `Int256` not support in `ScalarRefImpl::to_sql`
+            // Just let `Array[Int256]` continue `to_sql`, and the `ScalarRefImpl::to_sql` will handle the error.
+            DataType::Int256 => Type::NUMERIC_ARRAY,
             DataType::Struct(_) | DataType::List(_)  => bail_not_implemented!(
                 issue = 7949,
                 "the pgwire extended-mode encoding for lists with more than one dimension ({ty}) is unsupported"

From e0c9e6e847ada76716897e6dc7333c14797fc8f5 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Thu, 14 Mar 2024 17:23:48 +0800
Subject: [PATCH 4/6] fix: DataType::Serial should be Int8 in RisingWave

---
 src/common/src/types/to_binary.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/common/src/types/to_binary.rs b/src/common/src/types/to_binary.rs
index 5f769b971a657..bb1a255d510a0 100644
--- a/src/common/src/types/to_binary.rs
+++ b/src/common/src/types/to_binary.rs
@@ -117,7 +117,7 @@ impl<'a> ToBinary for ListRef<'a> {
             DataType::Interval => Type::INTERVAL_ARRAY,
             DataType::Bytea => Type::BYTEA_ARRAY,
             DataType::Jsonb => Type::JSONB_ARRAY,
-            DataType::Serial => Type::INT4_ARRAY,
+            DataType::Serial => Type::INT8_ARRAY,
             // INFO: `Int256` not support in `ScalarRefImpl::to_sql`
             // Just let `Array[Int256]` continue `to_sql`, and the `ScalarRefImpl::to_sql` will handle the error.
             DataType::Int256 => Type::NUMERIC_ARRAY,

From 323ff42e4b877897b7864e88f3e4a89d8d832ff6 Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Mon, 25 Mar 2024 13:06:08 +0800
Subject: [PATCH 5/6] fix: only allow one-dimensional arrays to be converted to
 sql

check it in the to_sql method of ListRef
---
 src/common/src/types/to_sql.rs | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/src/common/src/types/to_sql.rs b/src/common/src/types/to_sql.rs
index c9b210c0e4ef9..2d92e84af3ee4 100644
--- a/src/common/src/types/to_sql.rs
+++ b/src/common/src/types/to_sql.rs
@@ -18,7 +18,7 @@ use bytes::BytesMut;
 use itertools::Itertools;
 use postgres_types::{accepts, to_sql_checked, IsNull, ToSql, Type};
 
-use super::ListRef;
+use super::{DataType, ListRef};
 use crate::types::{JsonbRef, ScalarRefImpl};
 
 impl ToSql for ScalarRefImpl<'_> {
@@ -84,13 +84,28 @@ impl ToSql for ListRef<'_> {
     where
         Self: Sized,
     {
-        matches!(ty.kind(), postgres_types::Kind::Array(_))
+        use postgres_types::Kind::Array;
+        matches!(ty.kind(), Array(t) if !matches!(t.kind(), Array(_)))
     }
 
     fn to_sql(&self, ty: &Type, out: &mut BytesMut) -> Result<IsNull, Box<dyn Error + Sync + Send>>
     where
         Self: Sized,
     {
-        self.iter().collect_vec().to_sql(ty, out)
+        let dt = self.data_type();
+        match dt {
+            DataType::List(ref li) => {
+                if li.is_array() {
+                    Err(format!(
+                        "only one-dimensional arrays can be converted to sql, got type: {}",
+                        dt
+                    )
+                    .into())
+                } else {
+                    self.iter().collect_vec().to_sql(ty, out)
+                }
+            }
+            _ => Err("only accepts array data types".into()),
+        }
     }
 }

From 9c693e1b85b0fe1f8a010e16efd6aa0a9c984fdb Mon Sep 17 00:00:00 2001
From: Jinser Kafka <aimer@purejs.icu>
Date: Mon, 25 Mar 2024 13:06:20 +0800
Subject: [PATCH 6/6] chore: add docstring for DataType::as_struct

---
 src/common/src/types/mod.rs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/common/src/types/mod.rs b/src/common/src/types/mod.rs
index c6123c82ab6dd..9dc8de2781b4e 100644
--- a/src/common/src/types/mod.rs
+++ b/src/common/src/types/mod.rs
@@ -417,6 +417,11 @@ impl DataType {
         Self::Struct(StructType::from_parts(field_names, fields))
     }
 
+    /// Returns the inner type of a struct type.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the type is not a struct type.
     pub fn as_struct(&self) -> &StructType {
         match self {
             DataType::Struct(t) => t,