Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Redshift data creator #2242

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions sdk/python/feast/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,13 @@ def __init__(self, details):
super().__init__(f"Redshift SQL Query failed to finish. Details: {details}")


class RedshiftTableNameTooLong(Exception):
def __init__(self, table_name: str):
super().__init__(
f"Redshift table names have a maximum length of 127 characters, but the table name {table_name} has length {len(table_name)} characters."
)


class EntityTimestampInferenceException(Exception):
def __init__(self, expected_column_name: str):
super().__init__(
Expand Down
18 changes: 14 additions & 4 deletions sdk/python/feast/infra/utils/aws_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@
wait_exponential,
)

from feast.errors import RedshiftCredentialsError, RedshiftQueryError
from feast.errors import (
RedshiftCredentialsError,
RedshiftQueryError,
RedshiftTableNameTooLong,
)
from feast.type_map import pa_to_redshift_value_type

try:
Expand All @@ -28,6 +32,9 @@
raise FeastExtrasDependencyImportError("aws", str(e))


REDSHIFT_TABLE_NAME_MAX_LENGTH = 127


def get_redshift_data_client(aws_region: str):
"""
Get the Redshift Data API Service client for the given AWS region.
Expand Down Expand Up @@ -184,7 +191,7 @@ def upload_df_to_redshift(
iam_role: str,
table_name: str,
df: pd.DataFrame,
) -> None:
):
"""Uploads a Pandas DataFrame to Redshift as a new table.

The caller is responsible for deleting the table when no longer necessary.
Expand All @@ -208,9 +215,12 @@ def upload_df_to_redshift(
table_name: The name of the new Redshift table where we copy the dataframe
df: The Pandas DataFrame to upload

Returns: None

Raises:
RedshiftTableNameTooLong: The specified table name is too long.
"""
if len(table_name) > REDSHIFT_TABLE_NAME_MAX_LENGTH:
raise RedshiftTableNameTooLong(table_name)

bucket, key = get_bucket_and_key(s3_path)

# Drop the index so that we dont have unnecessary columns
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def construct_test_environment(
worker_id: str = "worker_id",
) -> Environment:

_uuid = str(uuid.uuid4()).replace("-", "")[:8]
_uuid = str(uuid.uuid4()).replace("-", "")[:6]

run_id = os.getenv("GITHUB_RUN_ID", default=None)
run_id = f"gh_run_{run_id}_{_uuid}" if run_id else _uuid
Expand Down
4 changes: 2 additions & 2 deletions sdk/python/tests/integration/registration/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,10 @@ def test_nullable_online_store(test_nullable_online_store) -> None:

with tempfile.TemporaryDirectory() as repo_dir_name:
try:
repo_path = Path(repo_dir_name)
feature_store_yaml = make_feature_store_yaml(
project, test_nullable_online_store, repo_dir_name
project, test_nullable_online_store, repo_path
)
repo_path = Path(repo_dir_name)

repo_config = repo_path / "feature_store.yaml"

Expand Down