Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix SparkConnectionManager.data_type_code_to_name for pyodbc #664

Merged
merged 25 commits into from
Mar 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
27caf0a
fix type annotation on data_type_code_to_name
MichelleArk Mar 3, 2023
f3766b5
changelog entry
MichelleArk Mar 3, 2023
5acbbee
fix test_constraints
MichelleArk Mar 3, 2023
747c59a
[testing] tests/functional/adapter/test_constraints.py
MichelleArk Mar 3, 2023
03559b6
[testing] printing type_code
MichelleArk Mar 3, 2023
64163b2
data_type_code_to_name supports python type
MichelleArk Mar 3, 2023
c88323f
split TestSparkConstraintsColumnsEqual by profile
MichelleArk Mar 3, 2023
91850b3
string_type for TestSparkConstraintsColumnsEqualDatabricksHTTP
MichelleArk Mar 3, 2023
a916281
test string type
MichelleArk Mar 3, 2023
65756db
pyodbc string_type to STR
MichelleArk Mar 3, 2023
e8edeba
TestSparkConstraintsColumnsEqualDatabricksHTTP string_type
MichelleArk Mar 3, 2023
a2483b8
it's friday...
MichelleArk Mar 3, 2023
9cf38fd
schema_data_type is string
MichelleArk Mar 3, 2023
16df401
test more types on TestSparkConstraintsColumnsEqualDatabricksHTTP
MichelleArk Mar 4, 2023
8564108
test more types on TestSparkConstraintsColumnsEqualPyodbc
MichelleArk Mar 4, 2023
cc7c7b2
test array type on TestSparkConstraintsColumnsEqualPyodbc
MichelleArk Mar 4, 2023
ce8a7b1
test datetime types for TestSparkConstraintsColumnsEqualPyodbc
MichelleArk Mar 4, 2023
08af8f7
test types for TestSparkConstraintsColumnsEqualPyodbc
MichelleArk Mar 4, 2023
fcb601e
test types for TestSparkConstraintsColumnsEqualPyodbc
MichelleArk Mar 4, 2023
b06ae8c
update changelog entry
MichelleArk Mar 4, 2023
506a34d
add back full integration test suite
MichelleArk Mar 4, 2023
a0a44e7
arrays pyodbc
MichelleArk Mar 4, 2023
1df5ce3
decimal pyodbc
MichelleArk Mar 4, 2023
5b66236
decimal pyodbc
MichelleArk Mar 4, 2023
7446a24
remove else
MichelleArk Mar 6, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20230303-200542.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: 'Fix pyodbc type_code -> data_type conversion '
time: 2023-03-03T20:05:42.400255-05:00
custom:
Author: michelleark
Issue: "665"
15 changes: 12 additions & 3 deletions dbt/adapters/spark/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

from hologram.helpers import StrEnum
from dataclasses import dataclass, field
from typing import Any, Dict, Optional
from typing import Any, Dict, Optional, Union

try:
from thrift.transport.TSSLSocket import TSSLSocket
Expand Down Expand Up @@ -493,8 +493,17 @@ def open(cls, connection):
return connection

@classmethod
def data_type_code_to_name(cls, type_code: str) -> str:
return type_code
def data_type_code_to_name(cls, type_code: Union[type, str]) -> str: # type: ignore
"""
:param Union[type, str] type_code: The sql to execute.
* type_code is a python type (!) in pyodbc https://github.com/mkleehammer/pyodbc/wiki/Cursor#description, and a string for other spark runtimes.
* ignoring the type annotation on the signature for this adapter instead of updating the base class because this feels like a really special case.
:return: stringified the cursor type_code
:rtype: str
"""
if isinstance(type_code, str):
return type_code
return type_code.__name__.upper()


def build_ssl_transport(host, port, username, auth, kerberos_service_name, password=None):
Expand Down
103 changes: 98 additions & 5 deletions tests/functional/adapter/test_constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,16 @@
BaseConstraintsColumnsEqual,
BaseConstraintsRuntimeEnforcement
)
from dbt.tests.adapter.constraints.fixtures import (
my_model_sql,
my_model_wrong_order_sql,
my_model_wrong_name_sql,
model_schema_yml,
)

# constraints are enforced via 'alter' statements that run after table creation
_expected_sql_spark = """
create or replace table {0}
create or replace table {0}
using delta
as

Expand All @@ -17,20 +23,107 @@
cast('2019-01-01' as date) as date_day
"""

@pytest.mark.skip_profile('spark_session', 'apache_spark')
class TestSparkConstraintsColumnsEqual(BaseConstraintsColumnsEqual):
pass
# Different on Spark:
# - does not support a data type named 'text' (TODO handle this in the base test classes using string_type
constraints_yml = model_schema_yml.replace("text", "string").replace("primary key", "")


@pytest.mark.skip_profile('spark_session', 'apache_spark', 'databricks_http_cluster')
class TestSparkConstraintsColumnsEqualPyodbc(BaseConstraintsColumnsEqual):
@pytest.fixture(scope="class")
def models(self):
return {
"my_model_wrong_order.sql": my_model_wrong_order_sql,
"my_model_wrong_name.sql": my_model_wrong_name_sql,
"constraints_schema.yml": constraints_yml,
}

@pytest.fixture
def string_type(self):
return "STR"

@pytest.fixture
def int_type(self):
return "INT"

@pytest.fixture
def schema_int_type(self):
return "INT"

@pytest.fixture
def data_types(self, int_type, schema_int_type, string_type):
# sql_column_value, schema_data_type, error_data_type
return [
# TODO: the int type is tricky to test in test__constraints_wrong_column_data_type without a schema_string_type to override.
# uncomment the line below once https://github.com/dbt-labs/dbt-core/issues/7121 is resolved
# ['1', schema_int_type, int_type],
['"1"', "string", string_type],
["true", "boolean", "BOOL"],
['array("1","2","3")', "string", string_type],
['array(1,2,3)', "string", string_type],
["6.45", "decimal", "DECIMAL"],
# TODO: test__constraints_correct_column_data_type isn't able to run the following statements in create table statements with pyodbc
# ["cast('2019-01-01' as date)", "date", "DATE"],
# ["cast('2019-01-01' as timestamp)", "date", "DATE"],
]


@pytest.mark.skip_profile('spark_session', 'apache_spark', 'databricks_sql_endpoint', 'databricks_cluster')
class TestSparkConstraintsColumnsEqualDatabricksHTTP(BaseConstraintsColumnsEqual):
@pytest.fixture(scope="class")
def models(self):
return {
"my_model_wrong_order.sql": my_model_wrong_order_sql,
"my_model_wrong_name.sql": my_model_wrong_name_sql,
"constraints_schema.yml": constraints_yml,
}

@pytest.fixture
def string_type(self):
return "STRING_TYPE"

@pytest.fixture
def int_type(self):
return "INT_TYPE"

@pytest.fixture
def schema_int_type(self):
return "INT"

@pytest.fixture
def data_types(self, int_type, schema_int_type, string_type):
# sql_column_value, schema_data_type, error_data_type
return [
# TODO: the int type is tricky to test in test__constraints_wrong_column_data_type without a schema_string_type to override.
# uncomment the line below once https://github.com/dbt-labs/dbt-core/issues/7121 is resolved
# ['1', schema_int_type, int_type],
['"1"', "string", string_type],
["true", "boolean", "BOOLEAN_TYPE"],
['array("1","2","3")', "array<string>", "ARRAY_TYPE"],
['array(1,2,3)', "array<int>", "ARRAY_TYPE"],
["cast('2019-01-01' as date)", "date", "DATE_TYPE"],
["cast('2019-01-01' as timestamp)", "timestamp", "TIMESTAMP_TYPE"],
["cast(1.0 AS DECIMAL(4, 2))", "decimal", "DECIMAL_TYPE"],
]


@pytest.mark.skip_profile('spark_session', 'apache_spark')
class TestSparkConstraintsRuntimeEnforcement(BaseConstraintsRuntimeEnforcement):
@pytest.fixture(scope="class")
def models(self):
return {
"my_model.sql": my_model_sql,
"constraints_schema.yml": constraints_yml,
}

@pytest.fixture(scope="class")
def project_config_update(self):
return {
"models": {
"+file_format": "delta",
}
}

@pytest.fixture(scope="class")
def expected_sql(self, project):
relation = relation_from_name(project.adapter, "my_model")
Expand Down