From 8f2f3bc06210d7d51d9c9deac63b48d9c2bcc8de Mon Sep 17 00:00:00 2001 From: Jorge Paravicini Date: Fri, 10 May 2024 09:04:05 +0200 Subject: [PATCH] feat: add json column support --- ClickHouse.Ado/Impl/ColumnTypes/ColumnType.cs | 12 ++-- .../Impl/ColumnTypes/JsonColumnType.cs | 71 +++++++++++++++++++ ClickHouse.Test/Test_166_JSONColumns.cs | 59 ++++++--------- 3 files changed, 98 insertions(+), 44 deletions(-) create mode 100644 ClickHouse.Ado/Impl/ColumnTypes/JsonColumnType.cs diff --git a/ClickHouse.Ado/Impl/ColumnTypes/ColumnType.cs b/ClickHouse.Ado/Impl/ColumnTypes/ColumnType.cs index f8efeb0..65e501d 100644 --- a/ClickHouse.Ado/Impl/ColumnTypes/ColumnType.cs +++ b/ClickHouse.Ado/Impl/ColumnTypes/ColumnType.cs @@ -1,4 +1,4 @@ -using System; +using System; using System.Collections; using System.Collections.Generic; using System.IO; @@ -36,7 +36,6 @@ internal abstract class ColumnType { { "TINYTEXT", "String" }, { "LONGTEXT", "String" }, { "BLOB", "String" }, - { "JSON", "String" }, //Clickhouse-specific aliases { "Decimal", "Decimal" }, @@ -45,7 +44,8 @@ internal abstract class ColumnType { { "Decimal128", "Decimal128" }, { "Decimal256", "Decimal256" }, { "Date", "Date" }, - { "DateTime", "DateTime" } + { "DateTime", "DateTime" }, + { "JSON", "Object('json')" } }; private static readonly Dictionary Types = new() { @@ -71,10 +71,10 @@ internal abstract class ColumnType { { Ipv4ColumnType.Ipv4ColumnTypeName, typeof(Ipv4ColumnType) }, { Ipv6ColumnType.Ipv6ColumnTypeName, typeof(Ipv6ColumnType) }, { "Nothing", typeof(NullColumnType) }, - { "Bool", typeof(BooleanColumnType) } + { "Bool", typeof(BooleanColumnType) }, + { "Object('json')", typeof(JsonColumnType) } }; - private static readonly Regex ObjectRegex = new(@"^Object\s*\(\s*'json'\s*\)$", RegexOptions.Compiled | RegexOptions.IgnoreCase); private static readonly Regex FixedStringRegex = new(@"^FixedString\s*\(\s*(?\d+)\s*\)$", RegexOptions.Compiled | RegexOptions.IgnoreCase); private static readonly Regex NestedRegex = new(@"^(?\w+)\s*\(\s*(?.+)\s*\)$", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Multiline); private static readonly Regex DecimalRegex = new(@"^Decimal(((?(32|64|128|256))\s*\()|\s*\(\s*(?\d+)\s*,)\s*(?\d+)\s*\)$", RegexOptions.Compiled | RegexOptions.IgnoreCase); @@ -118,8 +118,6 @@ public static ColumnType Create(string name) { return new DecimalColumnType(len, uint.Parse(m.Groups["prec"].Value)); } - m = ObjectRegex.Match(name); - if (m.Success) return new StringColumnType(); m = DateTime64Regex.Match(name); if (m.Success) return new DateTime64ColumnType(int.Parse(m.Groups["prec"].Value), ProtocolFormatter.UnescapeStringValue(m.Groups["tz"].Value)); m = DateTimeRegex.Match(name); diff --git a/ClickHouse.Ado/Impl/ColumnTypes/JsonColumnType.cs b/ClickHouse.Ado/Impl/ColumnTypes/JsonColumnType.cs new file mode 100644 index 0000000..77f0237 --- /dev/null +++ b/ClickHouse.Ado/Impl/ColumnTypes/JsonColumnType.cs @@ -0,0 +1,71 @@ +using System; +using System.Collections; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using ClickHouse.Ado.Impl.ATG.Insert; +using ClickHouse.Ado.Impl.Data; + +namespace ClickHouse.Ado.Impl.ColumnTypes; + +internal class JsonColumnType : ColumnType +{ + private string[] _data; + + public JsonColumnType() + { + _data = Array.Empty(); + } + + public JsonColumnType(string[] data) + { + _data = data; + } + + public override int Rows => _data.Length; + internal override Type CLRType => typeof(string); + + // Reading a JSON object returns the data as a tuple. + internal override Task Read(ProtocolFormatter formatter, int rows, CancellationToken cToken) => + throw new NotSupportedException(); + + public override void ValueFromConst(Parser.ValueType val) + { + if (val.TypeHint == Parser.ConstType.String) + { + var unescapedValue = ProtocolFormatter.UnescapeStringValue(val.StringValue); + _data = new[] { unescapedValue }; + } + else + { + _data = new[] { val.StringValue }; + } + } + + public override string AsClickHouseType(ClickHouseTypeUsageIntent usageIntent) => "Object('json')"; + + public override async Task Write(ProtocolFormatter formatter, int rows, CancellationToken cToken) + { + foreach (var d in _data) + { + // Clickhouse expects a byte indicating the type of the data when parsing Objects. + // The possible values are 0 for Tuples and 1 for Strings + await formatter.WriteByte(0b0000_0001, cToken); + await formatter.WriteString(d, cToken); + } + } + + public override void ValueFromParam(ClickHouseParameter parameter) + { + _data = new[] { parameter.Value?.ToString() }; + } + + public override object Value(int currentRow) => _data[currentRow]; + + public override long IntValue(int currentRow) => throw new InvalidCastException(); + + public override void ValuesFromConst(IEnumerable objects) + { + _data = objects.Cast().ToArray(); + } +} \ No newline at end of file diff --git a/ClickHouse.Test/Test_166_JSONColumns.cs b/ClickHouse.Test/Test_166_JSONColumns.cs index 103ee88..cf9f8ec 100644 --- a/ClickHouse.Test/Test_166_JSONColumns.cs +++ b/ClickHouse.Test/Test_166_JSONColumns.cs @@ -1,8 +1,6 @@ using System; using System.Data; -using System.Net; using System.Threading; -using System.Threading.Tasks; using ClickHouse.Ado; using NUnit.Framework; @@ -11,47 +9,34 @@ namespace ClickHouse.Test; public class Test_166_JSONColumns { [OneTimeSetUp] - public void CreateStructures() { - using (var cnn = ConnectionHandler.GetConnection()) { - cnn.CreateCommand("DROP TABLE IF EXISTS test_166_json").ExecuteNonQuery(); - cnn.CreateCommand("CREATE TABLE test_166_json (session_id String, guess JSON, timestamp DateTime DEFAULT now() CODEC(Delta(4), ZSTD(1)) ) ENGINE = MergeTree ORDER BY session_id").ExecuteNonQuery(); - } + public void CreateDatabase() + { + using var connection = ConnectionHandler.GetConnection(); + connection.CreateCommand("SET allow_experimental_object_type = 1").ExecuteNonQuery(); + connection.CreateCommand("DROP TABLE IF EXISTS json_test").ExecuteNonQuery(); + connection.CreateCommand(""" + CREATE TABLE json_test ( + json Object('json'), + timestamp DateTime + ) + ENGINE = MergeTree() + PARTITION BY toYYYYMM(timestamp) + ORDER BY (timestamp) + """).ExecuteNonQuery(); Thread.Sleep(1000); } [Test] - public async Task TestInsertBulk() + public void TestSimple() { - using (var cnn = ConnectionHandler.GetConnection()) { - cnn.CreateCommand("INSERT INTO test_166_json (session_id, guess, timestamp) VALUES @bulk").AddParameter("bulk", DbType.Object, new object[] - { - new object[] { "1", "{\"name\":\"value\"}", DateTime.UtcNow } - }).ExecuteNonQuery(); - } - - var values = SelectValue("1"); - Assert.True(values.Equals("{\"name\":\"value\"}")); - } - - private string SelectValue(string k) { - using (var cnn = ConnectionHandler.GetConnection()) - { - string rv = null; - using (var cmd = cnn.CreateCommand("SELECT guess FROM test_166_json WHERE session_id=@k")) { - cmd.AddParameter("k", k); - using (var reader = cmd.ExecuteReader()) { - reader.ReadAll( - r => - { - rv = r.GetString(1); - } - ); - } - } + // Arrange + using var connection = ConnectionHandler.GetConnection(); + var command = connection.CreateCommand("INSERT INTO json_test (json, timestamp) VALUES (@json, @timestamp)") + .AddParameter("json", """{"a": 1, "b": 2}""") + .AddParameter("timestamp", DbType.DateTime, DateTime.Now); - return rv; - } + // Act & Assert + Assert.DoesNotThrow(() => command.ExecuteNonQuery()); } - }