Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix a ValueError on redshift when an all-null column had a text type … #2255

Merged
merged 1 commit into from
Mar 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
## dbt 0.16.1 (Relase date TBD)


### Fixes
- Fix a redshift-only issue that caused an error when `dbt seed` found a seed with an entirely empty column that was set to a `varchar` data type. ([#2250](https://github.com/fishtown-analytics/dbt/issues/2250), [#2254](https://github.com/fishtown-analytics/dbt/pull/2254))

### Under the hood
- Pin google libraries to higher minimum values, add more dependencies as explicit ([#2233](https://github.com/fishtown-analytics/dbt/issues/2233), [#2249](https://github.com/fishtown-analytics/dbt/pull/2249))

Expand Down
4 changes: 3 additions & 1 deletion plugins/redshift/dbt/adapters/redshift/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ def drop_relation(self, relation):
@classmethod
def convert_text_type(cls, agate_table, col_idx):
column = agate_table.columns[col_idx]
lens = (len(d.encode("utf-8")) for d in column.values_without_nulls())
# `lens` must be a list, so this can't be a generator expression,
# because max() raises ane exception if its argument has no members.
lens = [len(d.encode("utf-8")) for d in column.values_without_nulls()]
max_len = max(lens) if lens else 64
return "varchar({})".format(max_len)

Expand Down
72 changes: 71 additions & 1 deletion test/unit/test_bigquery_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import dbt.exceptions
from dbt.logger import GLOBAL_LOGGER as logger # noqa

from .utils import config_from_parts_or_dicts, inject_adapter
from .utils import config_from_parts_or_dicts, inject_adapter, TestAdapterConversions


def _bq_conn():
Expand Down Expand Up @@ -473,3 +473,73 @@ def test__catalog_filter_table(self):
assert isinstance(row['table_database'], str)
assert isinstance(row['table_name'], str)
assert isinstance(row['something'], decimal.Decimal)


class TestBigQueryAdapterConversions(TestAdapterConversions):
def test_convert_text_type(self):
rows = [
['', 'a1', 'stringval1'],
['', 'a2', 'stringvalasdfasdfasdfa'],
['', 'a3', 'stringval3'],
]
agate_table = self._make_table_of(rows, agate.Text)
expected = ['string', 'string', 'string']
for col_idx, expect in enumerate(expected):
assert BigQueryAdapter.convert_text_type(agate_table, col_idx) == expect

def test_convert_number_type(self):
rows = [
['', '23.98', '-1'],
['', '12.78', '-2'],
['', '79.41', '-3'],
]
agate_table = self._make_table_of(rows, agate.Number)
expected = ['int64', 'float64', 'int64']
for col_idx, expect in enumerate(expected):
assert BigQueryAdapter.convert_number_type(agate_table, col_idx) == expect

def test_convert_boolean_type(self):
rows = [
['', 'false', 'true'],
['', 'false', 'false'],
['', 'false', 'true'],
]
agate_table = self._make_table_of(rows, agate.Boolean)
expected = ['bool', 'bool', 'bool']
for col_idx, expect in enumerate(expected):
assert BigQueryAdapter.convert_boolean_type(agate_table, col_idx) == expect

def test_convert_datetime_type(self):
rows = [
['', '20190101T01:01:01Z', '2019-01-01 01:01:01'],
['', '20190102T01:01:01Z', '2019-01-01 01:01:01'],
['', '20190103T01:01:01Z', '2019-01-01 01:01:01'],
]
agate_table = self._make_table_of(rows, [agate.DateTime, agate_helper.ISODateTime, agate.DateTime])
expected = ['datetime', 'datetime', 'datetime']
for col_idx, expect in enumerate(expected):
assert BigQueryAdapter.convert_datetime_type(agate_table, col_idx) == expect

def test_convert_date_type(self):
rows = [
['', '2019-01-01', '2019-01-04'],
['', '2019-01-02', '2019-01-04'],
['', '2019-01-03', '2019-01-04'],
]
agate_table = self._make_table_of(rows, agate.Date)
expected = ['date', 'date', 'date']
for col_idx, expect in enumerate(expected):
assert BigQueryAdapter.convert_date_type(agate_table, col_idx) == expect

def test_convert_time_type(self):
# dbt's default type testers actually don't have a TimeDelta at all.
agate.TimeDelta
rows = [
['', '120s', '10s'],
['', '3m', '11s'],
['', '1h', '12s'],
]
agate_table = self._make_table_of(rows, agate.TimeDelta)
expected = ['time', 'time', 'time']
for col_idx, expect in enumerate(expected):
assert BigQueryAdapter.convert_time_type(agate_table, col_idx) == expect
72 changes: 71 additions & 1 deletion test/unit/test_postgres_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from psycopg2 import extensions as psycopg2_extensions
from psycopg2 import DatabaseError

from .utils import config_from_parts_or_dicts, inject_adapter, mock_connection
from .utils import config_from_parts_or_dicts, inject_adapter, mock_connection, TestAdapterConversions


class TestPostgresAdapter(unittest.TestCase):
Expand Down Expand Up @@ -419,3 +419,73 @@ def test__catalog_filter_table(self):
assert isinstance(row['table_database'], str)
assert isinstance(row['table_name'], str)
assert isinstance(row['something'], decimal.Decimal)


class TestPostgresAdapterConversions(TestAdapterConversions):
def test_convert_text_type(self):
rows = [
['', 'a1', 'stringval1'],
['', 'a2', 'stringvalasdfasdfasdfa'],
['', 'a3', 'stringval3'],
]
agate_table = self._make_table_of(rows, agate.Text)
expected = ['text', 'text', 'text']
for col_idx, expect in enumerate(expected):
assert PostgresAdapter.convert_text_type(agate_table, col_idx) == expect

def test_convert_number_type(self):
rows = [
['', '23.98', '-1'],
['', '12.78', '-2'],
['', '79.41', '-3'],
]
agate_table = self._make_table_of(rows, agate.Number)
expected = ['integer', 'float8', 'integer']
for col_idx, expect in enumerate(expected):
assert PostgresAdapter.convert_number_type(agate_table, col_idx) == expect

def test_convert_boolean_type(self):
rows = [
['', 'false', 'true'],
['', 'false', 'false'],
['', 'false', 'true'],
]
agate_table = self._make_table_of(rows, agate.Boolean)
expected = ['boolean', 'boolean', 'boolean']
for col_idx, expect in enumerate(expected):
assert PostgresAdapter.convert_boolean_type(agate_table, col_idx) == expect

def test_convert_datetime_type(self):
rows = [
['', '20190101T01:01:01Z', '2019-01-01 01:01:01'],
['', '20190102T01:01:01Z', '2019-01-01 01:01:01'],
['', '20190103T01:01:01Z', '2019-01-01 01:01:01'],
]
agate_table = self._make_table_of(rows, [agate.DateTime, agate_helper.ISODateTime, agate.DateTime])
expected = ['timestamp without time zone', 'timestamp without time zone', 'timestamp without time zone']
for col_idx, expect in enumerate(expected):
assert PostgresAdapter.convert_datetime_type(agate_table, col_idx) == expect

def test_convert_date_type(self):
rows = [
['', '2019-01-01', '2019-01-04'],
['', '2019-01-02', '2019-01-04'],
['', '2019-01-03', '2019-01-04'],
]
agate_table = self._make_table_of(rows, agate.Date)
expected = ['date', 'date', 'date']
for col_idx, expect in enumerate(expected):
assert PostgresAdapter.convert_date_type(agate_table, col_idx) == expect

def test_convert_time_type(self):
# dbt's default type testers actually don't have a TimeDelta at all.
agate.TimeDelta
rows = [
['', '120s', '10s'],
['', '3m', '11s'],
['', '1h', '12s'],
]
agate_table = self._make_table_of(rows, agate.TimeDelta)
expected = ['time', 'time', 'time']
for col_idx, expect in enumerate(expected):
assert PostgresAdapter.convert_time_type(agate_table, col_idx) == expect
86 changes: 82 additions & 4 deletions test/unit/test_redshift_adapter.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
import string
import unittest
from unittest import mock
import agate

import dbt.adapters
import dbt.adapters # noqa
import dbt.flags as flags
import dbt.utils

from dbt.adapters.redshift import RedshiftAdapter
from dbt.clients import agate_helper
from dbt.exceptions import FailedToConnectException
from dbt.logger import GLOBAL_LOGGER as logger # noqa

from .utils import config_from_parts_or_dicts, mock_connection
from .utils import config_from_parts_or_dicts, mock_connection, TestAdapterConversions


@classmethod
Expand Down Expand Up @@ -212,7 +214,7 @@ def test_search_path_with_space(self, psycopg2):
password='password',
port=5439,
connect_timeout=10,
options="-c search_path=test\ test",
options=r"-c search_path=test\ test",
keepalives_idle=240)

@mock.patch('dbt.adapters.postgres.connections.psycopg2')
Expand Down Expand Up @@ -261,3 +263,79 @@ def test_dbname_verification_is_case_insensitive(self):
self.adapter.cleanup_connections()
self._adapter = RedshiftAdapter(self.config)
self.adapter.verify_database('redshift')


class TestRedshiftAdapterConversions(TestAdapterConversions):
def test_convert_text_type(self):
rows = [
['', 'a1', 'stringval1'],
['', 'a2', 'stringvalasdfasdfasdfa'],
['', 'a3', 'stringval3'],
]
agate_table = self._make_table_of(rows, agate.Text)
expected = ['varchar(64)', 'varchar(2)', 'varchar(22)']
for col_idx, expect in enumerate(expected):
assert RedshiftAdapter.convert_text_type(agate_table, col_idx) == expect

def test_convert_number_type(self):
rows = [
['', '23.98', '-1'],
['', '12.78', '-2'],
['', '79.41', '-3'],
]
agate_table = self._make_table_of(rows, agate.Number)
expected = ['integer', 'float8', 'integer']
for col_idx, expect in enumerate(expected):
assert RedshiftAdapter.convert_number_type(agate_table, col_idx) == expect

def test_convert_boolean_type(self):
rows = [
['', 'false', 'true'],
['', 'false', 'false'],
['', 'false', 'true'],
]
agate_table = self._make_table_of(rows, agate.Boolean)
expected = ['boolean', 'boolean', 'boolean']
for col_idx, expect in enumerate(expected):
assert RedshiftAdapter.convert_boolean_type(agate_table, col_idx) == expect

def test_convert_datetime_type(self):
rows = [
['', '20190101T01:01:01Z', '2019-01-01 01:01:01'],
['', '20190102T01:01:01Z', '2019-01-01 01:01:01'],
['', '20190103T01:01:01Z', '2019-01-01 01:01:01'],
]
agate_table = self._make_table_of(rows, [agate.DateTime, agate_helper.ISODateTime, agate.DateTime])
expected = ['timestamp without time zone', 'timestamp without time zone', 'timestamp without time zone']
for col_idx, expect in enumerate(expected):
assert RedshiftAdapter.convert_datetime_type(agate_table, col_idx) == expect

def test_convert_date_type(self):
rows = [
['', '2019-01-01', '2019-01-04'],
['', '2019-01-02', '2019-01-04'],
['', '2019-01-03', '2019-01-04'],
]
agate_table = self._make_table_of(rows, agate.Date)
expected = ['date', 'date', 'date']
for col_idx, expect in enumerate(expected):
assert RedshiftAdapter.convert_date_type(agate_table, col_idx) == expect

def test_convert_time_type(self):
# dbt's default type testers actually don't have a TimeDelta at all.
agate.TimeDelta
rows = [
['', '120s', '10s'],
['', '3m', '11s'],
['', '1h', '12s'],
]
agate_table = self._make_table_of(rows, agate.TimeDelta)
expected = ['varchar(24)', 'varchar(24)', 'varchar(24)']
for col_idx, expect in enumerate(expected):
assert RedshiftAdapter.convert_time_type(agate_table, col_idx) == expect


# convert_boolean_type
# convert_datetime_type
# convert_date_type
# convert_time_type
74 changes: 73 additions & 1 deletion test/unit/test_snowflake_adapter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import agate
import unittest
from contextlib import contextmanager
from unittest import mock
Expand All @@ -6,11 +7,12 @@

from dbt.adapters.snowflake import SnowflakeAdapter
from dbt.adapters.base.query_headers import MacroQueryStringSetter
from dbt.clients import agate_helper
from dbt.logger import GLOBAL_LOGGER as logger # noqa
from dbt.parser.results import ParseResult
from snowflake import connector as snowflake_connector

from .utils import config_from_parts_or_dicts, inject_adapter, mock_connection
from .utils import config_from_parts_or_dicts, inject_adapter, mock_connection, TestAdapterConversions


class TestSnowflakeAdapter(unittest.TestCase):
Expand Down Expand Up @@ -376,3 +378,73 @@ def test_authenticator_private_key_authentication_no_passphrase(self, mock_get_p
warehouse='test_warehouse', private_key='test_key',
application='dbt')
])


class TestSnowflakeAdapterConversions(TestAdapterConversions):
def test_convert_text_type(self):
rows = [
['', 'a1', 'stringval1'],
['', 'a2', 'stringvalasdfasdfasdfa'],
['', 'a3', 'stringval3'],
]
agate_table = self._make_table_of(rows, agate.Text)
expected = ['text', 'text', 'text']
for col_idx, expect in enumerate(expected):
assert SnowflakeAdapter.convert_text_type(agate_table, col_idx) == expect

def test_convert_number_type(self):
rows = [
['', '23.98', '-1'],
['', '12.78', '-2'],
['', '79.41', '-3'],
]
agate_table = self._make_table_of(rows, agate.Number)
expected = ['integer', 'float8', 'integer']
for col_idx, expect in enumerate(expected):
assert SnowflakeAdapter.convert_number_type(agate_table, col_idx) == expect

def test_convert_boolean_type(self):
rows = [
['', 'false', 'true'],
['', 'false', 'false'],
['', 'false', 'true'],
]
agate_table = self._make_table_of(rows, agate.Boolean)
expected = ['boolean', 'boolean', 'boolean']
for col_idx, expect in enumerate(expected):
assert SnowflakeAdapter.convert_boolean_type(agate_table, col_idx) == expect

def test_convert_datetime_type(self):
rows = [
['', '20190101T01:01:01Z', '2019-01-01 01:01:01'],
['', '20190102T01:01:01Z', '2019-01-01 01:01:01'],
['', '20190103T01:01:01Z', '2019-01-01 01:01:01'],
]
agate_table = self._make_table_of(rows, [agate.DateTime, agate_helper.ISODateTime, agate.DateTime])
expected = ['timestamp without time zone', 'timestamp without time zone', 'timestamp without time zone']
for col_idx, expect in enumerate(expected):
assert SnowflakeAdapter.convert_datetime_type(agate_table, col_idx) == expect

def test_convert_date_type(self):
rows = [
['', '2019-01-01', '2019-01-04'],
['', '2019-01-02', '2019-01-04'],
['', '2019-01-03', '2019-01-04'],
]
agate_table = self._make_table_of(rows, agate.Date)
expected = ['date', 'date', 'date']
for col_idx, expect in enumerate(expected):
assert SnowflakeAdapter.convert_date_type(agate_table, col_idx) == expect

def test_convert_time_type(self):
# dbt's default type testers actually don't have a TimeDelta at all.
agate.TimeDelta
rows = [
['', '120s', '10s'],
['', '3m', '11s'],
['', '1h', '12s'],
]
agate_table = self._make_table_of(rows, agate.TimeDelta)
expected = ['time', 'time', 'time']
for col_idx, expect in enumerate(expected):
assert SnowflakeAdapter.convert_time_type(agate_table, col_idx) == expect
Loading