ClickHouse · genzgd · Sep 19, 2024 · Sep 18, 2024 · Sep 19, 2024 · Sep 19, 2024
diff --git a/clickhouse_connect/datatypes/base.py b/clickhouse_connect/datatypes/base.py
@@ -274,7 +274,7 @@ def _write_column_low_card(self, column: Sequence, dest: bytearray, ctx: InsertC
         write_uint64(len(index), dest)
         self._write_column_binary(index, dest, ctx)
         write_uint64(len(keys), dest)
-        write_array(array_type(1 << ix_type, False), keys, dest)
+        write_array(array_type(1 << ix_type, False), keys, dest, ctx)
 
     def _active_null(self, _ctx: QueryContext) -> Any:
         return None
@@ -338,7 +338,7 @@ def _finalize_column(self, column: Sequence, ctx: QueryContext) -> Sequence:
     def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, ctx: InsertContext):
         if len(column) and self.nullable:
             column = [0 if x is None else x for x in column]
-        write_array(self._array_type, column, dest)
+        write_array(self._array_type, column, dest, ctx)
 
     def _active_null(self, ctx: QueryContext):
         if ctx.as_pandas and ctx.use_extended_dtypes:

diff --git a/clickhouse_connect/datatypes/network.py b/clickhouse_connect/datatypes/network.py
@@ -39,7 +39,7 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
                 column = [x._ip if x else 0 for x in column]
             else:
                 column = [x._ip for x in column]
-        write_array(self._array_type, column, dest)
+        write_array(self._array_type, column, dest, ctx)
 
     def _active_null(self, ctx: QueryContext):
         fmt = self.read_format(ctx)

diff --git a/clickhouse_connect/datatypes/numeric.py b/clickhouse_connect/datatypes/numeric.py
@@ -189,8 +189,8 @@ def _finalize_column(self, column: Sequence, ctx: QueryContext) -> Sequence:
             return np.array(column)
         return column
 
-    def _write_column_binary(self, column, dest, _ctx):
-        write_array('B', [1 if x else 0 for x in column], dest)
+    def _write_column_binary(self, column, dest, ctx):
+        write_array('B', [1 if x else 0 for x in column], dest, ctx)
 
 
 class Boolean(Bool):
@@ -218,15 +218,15 @@ def _read_column_binary(self, source: ByteSource, num_rows: int, ctx: QueryConte
         lookup = self._int_map.get
         return [lookup(x, None) for x in column]
 
-    def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, _ctx):
+    def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, ctx):
         first = self._first_value(column)
         if first is None or not isinstance(first, str):
             if self.nullable:
                 column = [0 if not x else x for x in column]
-            write_array(self._array_type, column, dest)
+            write_array(self._array_type, column, dest, ctx)
         else:
             lookup = self._name_map.get
-            write_array(self._array_type, [lookup(x, 0) for x in column], dest)
+            write_array(self._array_type, [lookup(x, 0) for x in column], dest, ctx)
 
 
 class Enum8(Enum):
@@ -291,9 +291,9 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
             dec = decimal.Decimal
             mult = self._mult
             if self.nullable:
-                write_array(self._array_type, [int(dec(str(x)) * mult) if x else 0 for x in column], dest)
+                write_array(self._array_type, [int(dec(str(x)) * mult) if x else 0 for x in column], dest, ctx)
             else:
-                write_array(self._array_type, [int(dec(str(x)) * mult) for x in column], dest)
+                write_array(self._array_type, [int(dec(str(x)) * mult) for x in column], dest, ctx)
 
     def _active_null(self, ctx: QueryContext):
         if ctx.use_none:

diff --git a/clickhouse_connect/datatypes/string.py b/clickhouse_connect/datatypes/string.py
@@ -104,7 +104,7 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
                         except UnicodeEncodeError:
                             b = empty
                         if len(b) > sz:
-                            raise DataError(f'UTF-8 encoded FixedString value {b.hex(" ")} exceeds column size {sz}')
+                            raise ctx.make_data_error(f'UTF-8 encoded FixedString value {b.hex(" ")} exceeds column size {sz}')
                         ext(b)
                         ext(empty[:sz - len(b)])
             else:
@@ -114,19 +114,19 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
                     except UnicodeEncodeError:
                         b = empty
                     if len(b) > sz:
-                        raise DataError(f'UTF-8 encoded FixedString value {b.hex(" ")} exceeds column size {sz}')
+                        raise ctx.make_data_error(f'UTF-8 encoded FixedString value {b.hex(" ")} exceeds column size {sz}')
                     ext(b)
                     ext(empty[:sz - len(b)])
         elif self.nullable:
             for b in column:
                 if not b:
                     ext(empty)
                 elif len(b) != sz:
-                    raise DataError(f'Fixed String binary value {b.hex(" ")} does not match column size {sz}')
+                    raise ctx.make_data_error(f'Fixed String binary value {b.hex(" ")} does not match column size {sz}')
                 else:
                     ext(b)
         else:
             for b in column:
                 if len(b) != sz:
-                    raise DataError(f'Fixed String binary value {b.hex(" ")} does not match column size {sz}')
+                    raise ctx.make_data_error(f'Fixed String binary value {b.hex(" ")} does not match column size {sz}')
                 ext(b)
diff --git a/clickhouse_connect/datatypes/temporal.py b/clickhouse_connect/datatypes/temporal.py
@@ -45,7 +45,7 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
                 column = [0 if x is None else (x - esd).days for x in column]
             else:
                 column = [(x - esd).days for x in column]
-        write_array(self._array_type, column, dest)
+        write_array(self._array_type, column, dest, ctx)
 
     def _active_null(self, ctx: QueryContext):
         fmt = self.read_format(ctx)
@@ -136,7 +136,7 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
                 column = [int(x.timestamp()) if x else 0 for x in column]
             else:
                 column = [int(x.timestamp()) for x in column]
-        write_array(self._array_type, column, dest)
+        write_array(self._array_type, column, dest, ctx)
 
 
 class DateTime64(DateTimeBase):
@@ -213,4 +213,4 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
                           for x in column]
             else:
                 column = [((int(x.timestamp()) * 1000000 + x.microsecond) * prec) // 1000000 for x in column]
-        write_array('q', column, dest)
+        write_array('q', column, dest, ctx)
diff --git a/clickhouse_connect/driver/common.py b/clickhouse_connect/driver/common.py
@@ -38,12 +38,13 @@ def array_type(size: int, signed: bool):
     return code if signed else code.upper()
 
 
-def write_array(code: str, column: Sequence, dest: MutableSequence):
+def write_array(code: str, column: Sequence, dest: MutableSequence, ctx):
     """
     Write a column of native Python data matching the array.array code
     :param code: Python array.array code matching the column data type
     :param column: Column of native Python values
     :param dest: Destination byte buffer
+    :param ctx: The InsertContext
     """
     if len(column) and not isinstance(column[0], (int, float)):
         if code in ('f', 'F', 'd', 'D'):
@@ -54,8 +55,8 @@ def write_array(code: str, column: Sequence, dest: MutableSequence):
         buff = struct.Struct(f'<{len(column)}{code}')
         dest += buff.pack(*column)
     except (TypeError, OverflowError, struct.error) as ex:
-        raise DataError('Unable to create Python array.  This is usually caused by trying to insert None ' +
-                        'values into a ClickHouse column that is not Nullable') from ex
+        raise ctx.make_data_error('Unable to create Python array.  This is usually caused by trying to insert None ' +
+                                  'values into a ClickHouse column that is not Nullable') from ex
 
 
 def write_uint64(value: int, dest: MutableSequence):

diff --git a/clickhouse_connect/driver/insert.py b/clickhouse_connect/driver/insert.py
@@ -7,7 +7,7 @@
 from clickhouse_connect.driver.ctypes import data_conv
 from clickhouse_connect.driver.context import BaseQueryContext
 from clickhouse_connect.driver.options import np, pd, pd_time_test
-from clickhouse_connect.driver.exceptions import ProgrammingError
+from clickhouse_connect.driver.exceptions import ProgrammingError, DataError
 
 if TYPE_CHECKING:
     from clickhouse_connect.datatypes.base import ClickHouseType
@@ -53,6 +53,7 @@ def __init__(self,
         self.block_row_count = DEFAULT_BLOCK_BYTES
         self.data = data
         self.insert_exception = None
+        self._column_name = None
 
     @property
     def empty(self) -> bool:
@@ -198,3 +199,12 @@ def _convert_numpy(self, np_array):
                 data[ix] = data[ix].tolist()
         self.column_oriented = True
         return data
+
+    def start_column(self, name: str):
+        super().start_column(name)
+        self._column_name = name
+
+    def make_data_error(self, error_message: str) -> DataError:
+        if self._column_name is not None:
+            return DataError(f"Failed to write column '{self._column_name}': {error_message}")
+        return DataError(error_message)