Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include the column name in the error message for an unexpected NULL #397

Merged
merged 5 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions clickhouse_connect/datatypes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def _write_column_low_card(self, column: Sequence, dest: bytearray, ctx: InsertC
write_uint64(len(index), dest)
self._write_column_binary(index, dest, ctx)
write_uint64(len(keys), dest)
write_array(array_type(1 << ix_type, False), keys, dest)
write_array(array_type(1 << ix_type, False), keys, dest, ctx)

def _active_null(self, _ctx: QueryContext) -> Any:
return None
Expand Down Expand Up @@ -338,7 +338,7 @@ def _finalize_column(self, column: Sequence, ctx: QueryContext) -> Sequence:
def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, ctx: InsertContext):
if len(column) and self.nullable:
column = [0 if x is None else x for x in column]
write_array(self._array_type, column, dest)
write_array(self._array_type, column, dest, ctx)

def _active_null(self, ctx: QueryContext):
if ctx.as_pandas and ctx.use_extended_dtypes:
Expand Down
2 changes: 1 addition & 1 deletion clickhouse_connect/datatypes/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
column = [x._ip if x else 0 for x in column]
else:
column = [x._ip for x in column]
write_array(self._array_type, column, dest)
write_array(self._array_type, column, dest, ctx)

def _active_null(self, ctx: QueryContext):
fmt = self.read_format(ctx)
Expand Down
14 changes: 7 additions & 7 deletions clickhouse_connect/datatypes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,8 @@ def _finalize_column(self, column: Sequence, ctx: QueryContext) -> Sequence:
return np.array(column)
return column

def _write_column_binary(self, column, dest, _ctx):
write_array('B', [1 if x else 0 for x in column], dest)
def _write_column_binary(self, column, dest, ctx):
write_array('B', [1 if x else 0 for x in column], dest, ctx)


class Boolean(Bool):
Expand Down Expand Up @@ -218,15 +218,15 @@ def _read_column_binary(self, source: ByteSource, num_rows: int, ctx: QueryConte
lookup = self._int_map.get
return [lookup(x, None) for x in column]

def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, _ctx):
def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: bytearray, ctx):
first = self._first_value(column)
if first is None or not isinstance(first, str):
if self.nullable:
column = [0 if not x else x for x in column]
write_array(self._array_type, column, dest)
write_array(self._array_type, column, dest, ctx)
else:
lookup = self._name_map.get
write_array(self._array_type, [lookup(x, 0) for x in column], dest)
write_array(self._array_type, [lookup(x, 0) for x in column], dest, ctx)


class Enum8(Enum):
Expand Down Expand Up @@ -291,9 +291,9 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
dec = decimal.Decimal
mult = self._mult
if self.nullable:
write_array(self._array_type, [int(dec(str(x)) * mult) if x else 0 for x in column], dest)
write_array(self._array_type, [int(dec(str(x)) * mult) if x else 0 for x in column], dest, ctx)
else:
write_array(self._array_type, [int(dec(str(x)) * mult) for x in column], dest)
write_array(self._array_type, [int(dec(str(x)) * mult) for x in column], dest, ctx)

def _active_null(self, ctx: QueryContext):
if ctx.use_none:
Expand Down
8 changes: 4 additions & 4 deletions clickhouse_connect/datatypes/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
except UnicodeEncodeError:
b = empty
if len(b) > sz:
raise DataError(f'UTF-8 encoded FixedString value {b.hex(" ")} exceeds column size {sz}')
raise ctx.make_data_error(f'UTF-8 encoded FixedString value {b.hex(" ")} exceeds column size {sz}')
ext(b)
ext(empty[:sz - len(b)])
else:
Expand All @@ -114,19 +114,19 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
except UnicodeEncodeError:
b = empty
if len(b) > sz:
raise DataError(f'UTF-8 encoded FixedString value {b.hex(" ")} exceeds column size {sz}')
raise ctx.make_data_error(f'UTF-8 encoded FixedString value {b.hex(" ")} exceeds column size {sz}')
ext(b)
ext(empty[:sz - len(b)])
elif self.nullable:
for b in column:
if not b:
ext(empty)
elif len(b) != sz:
raise DataError(f'Fixed String binary value {b.hex(" ")} does not match column size {sz}')
raise ctx.make_data_error(f'Fixed String binary value {b.hex(" ")} does not match column size {sz}')
else:
ext(b)
else:
for b in column:
if len(b) != sz:
raise DataError(f'Fixed String binary value {b.hex(" ")} does not match column size {sz}')
raise ctx.make_data_error(f'Fixed String binary value {b.hex(" ")} does not match column size {sz}')
ext(b)
6 changes: 3 additions & 3 deletions clickhouse_connect/datatypes/temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
column = [0 if x is None else (x - esd).days for x in column]
else:
column = [(x - esd).days for x in column]
write_array(self._array_type, column, dest)
write_array(self._array_type, column, dest, ctx)

def _active_null(self, ctx: QueryContext):
fmt = self.read_format(ctx)
Expand Down Expand Up @@ -136,7 +136,7 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
column = [int(x.timestamp()) if x else 0 for x in column]
else:
column = [int(x.timestamp()) for x in column]
write_array(self._array_type, column, dest)
write_array(self._array_type, column, dest, ctx)


class DateTime64(DateTimeBase):
Expand Down Expand Up @@ -213,4 +213,4 @@ def _write_column_binary(self, column: Union[Sequence, MutableSequence], dest: b
for x in column]
else:
column = [((int(x.timestamp()) * 1000000 + x.microsecond) * prec) // 1000000 for x in column]
write_array('q', column, dest)
write_array('q', column, dest, ctx)
7 changes: 4 additions & 3 deletions clickhouse_connect/driver/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,13 @@ def array_type(size: int, signed: bool):
return code if signed else code.upper()


def write_array(code: str, column: Sequence, dest: MutableSequence):
def write_array(code: str, column: Sequence, dest: MutableSequence, ctx):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can add the ctx Type here too?

"""
Write a column of native Python data matching the array.array code
:param code: Python array.array code matching the column data type
:param column: Column of native Python values
:param dest: Destination byte buffer
:param ctx: The InsertContext
"""
if len(column) and not isinstance(column[0], (int, float)):
if code in ('f', 'F', 'd', 'D'):
Expand All @@ -54,8 +55,8 @@ def write_array(code: str, column: Sequence, dest: MutableSequence):
buff = struct.Struct(f'<{len(column)}{code}')
dest += buff.pack(*column)
except (TypeError, OverflowError, struct.error) as ex:
raise DataError('Unable to create Python array. This is usually caused by trying to insert None ' +
'values into a ClickHouse column that is not Nullable') from ex
raise ctx.make_data_error('Unable to create Python array. This is usually caused by trying to insert None ' +
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was the only error I really needed improving, but I modified the other places where DataError could be raised too, in string.py

'values into a ClickHouse column that is not Nullable') from ex


def write_uint64(value: int, dest: MutableSequence):
Expand Down
12 changes: 11 additions & 1 deletion clickhouse_connect/driver/insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from clickhouse_connect.driver.ctypes import data_conv
from clickhouse_connect.driver.context import BaseQueryContext
from clickhouse_connect.driver.options import np, pd, pd_time_test
from clickhouse_connect.driver.exceptions import ProgrammingError
from clickhouse_connect.driver.exceptions import ProgrammingError, DataError

if TYPE_CHECKING:
from clickhouse_connect.datatypes.base import ClickHouseType
Expand Down Expand Up @@ -53,6 +53,7 @@ def __init__(self,
self.block_row_count = DEFAULT_BLOCK_BYTES
self.data = data
self.insert_exception = None
self._column_name = None

@property
def empty(self) -> bool:
Expand Down Expand Up @@ -198,3 +199,12 @@ def _convert_numpy(self, np_array):
data[ix] = data[ix].tolist()
self.column_oriented = True
return data

def start_column(self, name: str):
Copy link
Contributor Author

@angusholder angusholder Sep 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This gets called during the insert to tell us what the current column being inserted is, so I store its name here so we have it if we run into an error during inserting that column

super().start_column(name)
self._column_name = name

def make_data_error(self, error_message: str) -> DataError:
if self._column_name is not None:
Copy link
Contributor Author

@angusholder angusholder Sep 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's where we use the column name that was stored by start_column(). I don't know if it's possible to reach here by doing a column insert without start_column() having been called, but I handled None just in case

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not possible, so it's safe to remove the None check.

return DataError(f"Failed to write column '{self._column_name}': {error_message}")
return DataError(error_message)
Loading