From 58ce148401fe578b1727bc42ee6b4b9a558660c7 Mon Sep 17 00:00:00 2001 From: Chris Date: Fri, 21 Apr 2023 12:36:20 -0700 Subject: [PATCH] feat: Add AWS Redshift Serverless support (#3595) * Rebase master Signed-off-by: Chris Barcroft * Pass optional types to satisfy mypy Signed-off-by: Chris Barcroft * Remove redundant import Signed-off-by: Chris Barcroft * Regenerate python requirements Signed-off-by: Chris Barcroft * Fix casing error on DbUser Redshift kwarg Signed-off-by: Chris Barcroft --------- Signed-off-by: Chris Barcroft Signed-off-by: Chris Barcroft Co-authored-by: Chris Barcroft --- docs/reference/offline-stores/redshift.md | 22 ++++++ .../feast/infra/offline_stores/redshift.py | 49 +++++++++++-- .../infra/offline_stores/redshift_source.py | 27 +++++-- sdk/python/feast/infra/utils/aws_utils.py | 71 ++++++++++++++----- sdk/python/feast/templates/aws/bootstrap.py | 2 + .../requirements/py3.10-requirements.txt | 57 ++++++++------- .../requirements/py3.8-requirements.txt | 60 ++++++++-------- .../requirements/py3.9-requirements.txt | 54 +++++++------- .../universal/data_sources/redshift.py | 2 + .../infra/scaffolding/test_repo_config.py | 4 +- setup.py | 7 +- 11 files changed, 235 insertions(+), 120 deletions(-) diff --git a/docs/reference/offline-stores/redshift.md b/docs/reference/offline-stores/redshift.md index 98092c44be..e9bcbfeff1 100644 --- a/docs/reference/offline-stores/redshift.md +++ b/docs/reference/offline-stores/redshift.md @@ -155,3 +155,25 @@ While the following trust relationship is necessary to make sure that Redshift, ] } ``` + + +## Redshift Serverless + +In order to use [AWS Redshift Serverless](https://aws.amazon.com/redshift/redshift-serverless/), specify a workgroup instead of a cluster_id and user. + +{% code title="feature_store.yaml" %} +```yaml +project: my_feature_repo +registry: data/registry.db +provider: aws +offline_store: + type: redshift + region: us-west-2 + workgroup: feast-workgroup + database: feast-database + s3_staging_location: s3://feast-bucket/redshift + iam_role: arn:aws:iam::123456789012:role/redshift_s3_access_role +``` +{% endcode %} + +Please note that the IAM policies above will need the [redshift-serverless](https://aws.permissions.cloud/iam/redshift-serverless) version, rather than the standard [redshift](https://aws.permissions.cloud/iam/redshift). \ No newline at end of file diff --git a/sdk/python/feast/infra/offline_stores/redshift.py b/sdk/python/feast/infra/offline_stores/redshift.py index 35fd49f746..aba2bda353 100644 --- a/sdk/python/feast/infra/offline_stores/redshift.py +++ b/sdk/python/feast/infra/offline_stores/redshift.py @@ -19,7 +19,7 @@ import pyarrow import pyarrow as pa from dateutil import parser -from pydantic import StrictStr +from pydantic import StrictStr, root_validator from pydantic.typing import Literal from pytz import utc @@ -51,15 +51,18 @@ class RedshiftOfflineStoreConfig(FeastConfigBaseModel): type: Literal["redshift"] = "redshift" """ Offline store type selector""" - cluster_id: StrictStr - """ Redshift cluster identifier """ + cluster_id: Optional[StrictStr] + """ Redshift cluster identifier, for provisioned clusters """ + + user: Optional[StrictStr] + """ Redshift user name, only required for provisioned clusters """ + + workgroup: Optional[StrictStr] + """ Redshift workgroup identifier, for serverless """ region: StrictStr """ Redshift cluster's AWS region """ - user: StrictStr - """ Redshift user name """ - database: StrictStr """ Redshift database name """ @@ -69,6 +72,26 @@ class RedshiftOfflineStoreConfig(FeastConfigBaseModel): iam_role: StrictStr """ IAM Role for Redshift, granting it access to S3 """ + @root_validator + def require_cluster_and_user_or_workgroup(cls, values): + """ + Provisioned Redshift clusters: Require cluster_id and user, ignore workgroup + Serverless Redshift: Require workgroup, ignore cluster_id and user + """ + cluster_id, user, workgroup = ( + values.get("cluster_id"), + values.get("user"), + values.get("workgroup"), + ) + if not (cluster_id and user) and not workgroup: + raise ValueError( + "please specify either cluster_id & user if using provisioned clusters, or workgroup if using serverless" + ) + elif cluster_id and workgroup: + raise ValueError("cannot specify both cluster_id and workgroup") + + return values + class RedshiftOfflineStore(OfflineStore): @staticmethod @@ -248,6 +271,7 @@ def query_generator() -> Iterator[str]: aws_utils.execute_redshift_statement( redshift_client, config.offline_store.cluster_id, + config.offline_store.workgroup, config.offline_store.database, config.offline_store.user, f"DROP TABLE IF EXISTS {table_name}", @@ -294,6 +318,7 @@ def write_logged_features( table=data, redshift_data_client=redshift_client, cluster_id=config.offline_store.cluster_id, + workgroup=config.offline_store.workgroup, database=config.offline_store.database, user=config.offline_store.user, s3_resource=s3_resource, @@ -336,8 +361,10 @@ def offline_write_batch( table=table, redshift_data_client=redshift_client, cluster_id=config.offline_store.cluster_id, + workgroup=config.offline_store.workgroup, database=redshift_options.database - or config.offline_store.database, # Users can define database in the source if needed but it's not required. + # Users can define database in the source if needed but it's not required. + or config.offline_store.database, user=config.offline_store.user, s3_resource=s3_resource, s3_path=f"{config.offline_store.s3_staging_location}/push/{uuid.uuid4()}.parquet", @@ -405,6 +432,7 @@ def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: return aws_utils.unload_redshift_query_to_df( self._redshift_client, self._config.offline_store.cluster_id, + self._config.offline_store.workgroup, self._config.offline_store.database, self._config.offline_store.user, self._s3_resource, @@ -419,6 +447,7 @@ def _to_arrow_internal(self, timeout: Optional[int] = None) -> pa.Table: return aws_utils.unload_redshift_query_to_pa( self._redshift_client, self._config.offline_store.cluster_id, + self._config.offline_store.workgroup, self._config.offline_store.database, self._config.offline_store.user, self._s3_resource, @@ -439,6 +468,7 @@ def to_s3(self) -> str: aws_utils.execute_redshift_query_and_unload_to_s3( self._redshift_client, self._config.offline_store.cluster_id, + self._config.offline_store.workgroup, self._config.offline_store.database, self._config.offline_store.user, self._s3_path, @@ -455,6 +485,7 @@ def to_redshift(self, table_name: str) -> None: aws_utils.upload_df_to_redshift( self._redshift_client, self._config.offline_store.cluster_id, + self._config.offline_store.workgroup, self._config.offline_store.database, self._config.offline_store.user, self._s3_resource, @@ -471,6 +502,7 @@ def to_redshift(self, table_name: str) -> None: aws_utils.execute_redshift_statement( self._redshift_client, self._config.offline_store.cluster_id, + self._config.offline_store.workgroup, self._config.offline_store.database, self._config.offline_store.user, query, @@ -509,6 +541,7 @@ def _upload_entity_df( aws_utils.upload_df_to_redshift( redshift_client, config.offline_store.cluster_id, + config.offline_store.workgroup, config.offline_store.database, config.offline_store.user, s3_resource, @@ -522,6 +555,7 @@ def _upload_entity_df( aws_utils.execute_redshift_statement( redshift_client, config.offline_store.cluster_id, + config.offline_store.workgroup, config.offline_store.database, config.offline_store.user, f"CREATE TABLE {table_name} AS ({entity_df})", @@ -577,6 +611,7 @@ def _get_entity_df_event_timestamp_range( statement_id = aws_utils.execute_redshift_statement( redshift_client, config.offline_store.cluster_id, + config.offline_store.workgroup, config.offline_store.database, config.offline_store.user, f"SELECT MIN({entity_df_event_timestamp_col}) AS min, MAX({entity_df_event_timestamp_col}) AS max " diff --git a/sdk/python/feast/infra/offline_stores/redshift_source.py b/sdk/python/feast/infra/offline_stores/redshift_source.py index 4279e6a068..1f80dede07 100644 --- a/sdk/python/feast/infra/offline_stores/redshift_source.py +++ b/sdk/python/feast/infra/offline_stores/redshift_source.py @@ -207,18 +207,30 @@ def get_table_column_names_and_types( if self.table: try: paginator = client.get_paginator("describe_table") - response_iterator = paginator.paginate( - ClusterIdentifier=config.offline_store.cluster_id, - Database=( + + paginator_kwargs = { + "Database": ( self.database if self.database else config.offline_store.database ), - DbUser=config.offline_store.user, - Table=self.table, - Schema=self.schema, - ) + "Table": self.table, + "Schema": self.schema, + } + + if config.offline_store.cluster_id: + # Provisioned cluster + paginator_kwargs[ + "ClusterIdentifier" + ] = config.offline_store.cluster_id + paginator_kwargs["DbUser"] = config.offline_store.user + elif config.offline_store.workgroup: + # Redshift serverless + paginator_kwargs["WorkgroupName"] = config.offline_store.workgroup + + response_iterator = paginator.paginate(**paginator_kwargs) table = response_iterator.build_full_result() + except ClientError as e: if e.response["Error"]["Code"] == "ValidationException": raise RedshiftCredentialsError() from e @@ -233,6 +245,7 @@ def get_table_column_names_and_types( statement_id = aws_utils.execute_redshift_statement( client, config.offline_store.cluster_id, + config.offline_store.workgroup, self.database if self.database else config.offline_store.database, config.offline_store.user, f"SELECT * FROM ({self.query}) LIMIT 1", diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py index 7e8335ac92..5095ef4221 100644 --- a/sdk/python/feast/infra/utils/aws_utils.py +++ b/sdk/python/feast/infra/utils/aws_utils.py @@ -74,7 +74,12 @@ def get_bucket_and_key(s3_path: str) -> Tuple[str, str]: reraise=True, ) def execute_redshift_statement_async( - redshift_data_client, cluster_id: str, database: str, user: str, query: str + redshift_data_client, + cluster_id: Optional[str], + workgroup: Optional[str], + database: str, + user: Optional[str], + query: str, ) -> dict: """Execute Redshift statement asynchronously. Does not wait for the query to finish. @@ -83,6 +88,7 @@ def execute_redshift_statement_async( Args: redshift_data_client: Redshift Data API Service client cluster_id: Redshift Cluster Identifier + workgroup: Redshift Serverless Workgroup database: Redshift Database Name user: Redshift username query: The SQL query to execute @@ -91,12 +97,17 @@ def execute_redshift_statement_async( """ try: - return redshift_data_client.execute_statement( - ClusterIdentifier=cluster_id, - Database=database, - DbUser=user, - Sql=query, - ) + rs_kwargs = {"Database": database, "Sql": query} + + # Standard Redshift requires a ClusterId as well as DbUser. RS Serverless instead requires a WorkgroupName. + if cluster_id and user: + rs_kwargs["ClusterIdentifier"] = cluster_id + rs_kwargs["DbUser"] = user + elif workgroup: + rs_kwargs["WorkgroupName"] = workgroup + + return redshift_data_client.execute_statement(**rs_kwargs) + except ClientError as e: if e.response["Error"]["Code"] == "ValidationException": raise RedshiftCredentialsError() from e @@ -133,7 +144,12 @@ def wait_for_redshift_statement(redshift_data_client, statement: dict) -> None: def execute_redshift_statement( - redshift_data_client, cluster_id: str, database: str, user: str, query: str + redshift_data_client, + cluster_id: Optional[str], + workgroup: Optional[str], + database: str, + user: Optional[str], + query: str, ) -> str: """Execute Redshift statement synchronously. Waits for the query to finish. @@ -144,6 +160,7 @@ def execute_redshift_statement( Args: redshift_data_client: Redshift Data API Service client cluster_id: Redshift Cluster Identifier + workgroup: Redshift Serverless Workgroup database: Redshift Database Name user: Redshift username query: The SQL query to execute @@ -152,7 +169,7 @@ def execute_redshift_statement( """ statement = execute_redshift_statement_async( - redshift_data_client, cluster_id, database, user, query + redshift_data_client, cluster_id, workgroup, database, user, query ) wait_for_redshift_statement(redshift_data_client, statement) return statement["Id"] @@ -193,9 +210,10 @@ def upload_df_to_s3( def upload_df_to_redshift( redshift_data_client, - cluster_id: str, + cluster_id: Optional[str], + workgroup: Optional[str], database: str, - user: str, + user: Optional[str], s3_resource, s3_path: str, iam_role: str, @@ -209,6 +227,7 @@ def upload_df_to_redshift( Args: redshift_data_client: Redshift Data API Service client cluster_id: Redshift Cluster Identifier + workgroup: Redshift Serverless Workgroup database: Redshift Database Name user: Redshift username s3_resource: S3 Resource object @@ -236,6 +255,7 @@ def upload_df_to_redshift( table, redshift_data_client, cluster_id=cluster_id, + workgroup=workgroup, database=database, user=user, s3_resource=s3_resource, @@ -248,6 +268,7 @@ def upload_df_to_redshift( def delete_redshift_table( redshift_data_client, cluster_id: str, + workgroup: str, database: str, user: str, table_name: str, @@ -256,6 +277,7 @@ def delete_redshift_table( execute_redshift_statement( redshift_data_client, cluster_id, + workgroup, database, user, drop_query, @@ -265,9 +287,10 @@ def delete_redshift_table( def upload_arrow_table_to_redshift( table: Union[pyarrow.Table, Path], redshift_data_client, - cluster_id: str, + cluster_id: Optional[str], + workgroup: Optional[str], database: str, - user: str, + user: Optional[str], s3_resource, iam_role: str, s3_path: str, @@ -286,6 +309,7 @@ def upload_arrow_table_to_redshift( Args: redshift_data_client: Redshift Data API Service client cluster_id: Redshift Cluster Identifier + workgroup: Redshift Serverless Workgroup database: Redshift Database Name user: Redshift username s3_resource: S3 Resource object @@ -345,6 +369,7 @@ def upload_arrow_table_to_redshift( execute_redshift_statement( redshift_data_client, cluster_id, + workgroup, database, user, f"{create_query}; {copy_query};", @@ -359,6 +384,7 @@ def upload_arrow_table_to_redshift( def temporarily_upload_df_to_redshift( redshift_data_client, cluster_id: str, + workgroup: str, database: str, user: str, s3_resource, @@ -381,6 +407,7 @@ def temporarily_upload_df_to_redshift( upload_df_to_redshift( redshift_data_client, cluster_id, + workgroup, database, user, s3_resource, @@ -396,6 +423,7 @@ def temporarily_upload_df_to_redshift( execute_redshift_statement( redshift_data_client, cluster_id, + workgroup, database, user, f"DROP TABLE {table_name}", @@ -407,6 +435,7 @@ def temporarily_upload_arrow_table_to_redshift( table: Union[pyarrow.Table, Path], redshift_data_client, cluster_id: str, + workgroup: str, database: str, user: str, s3_resource, @@ -431,6 +460,7 @@ def temporarily_upload_arrow_table_to_redshift( table, redshift_data_client, cluster_id, + workgroup, database, user, s3_resource, @@ -447,6 +477,7 @@ def temporarily_upload_arrow_table_to_redshift( execute_redshift_statement( redshift_data_client, cluster_id, + workgroup, database, user, f"DROP TABLE {table_name}", @@ -476,9 +507,10 @@ def delete_s3_directory(s3_resource, bucket: str, key: str): def execute_redshift_query_and_unload_to_s3( redshift_data_client, - cluster_id: str, + cluster_id: Optional[str], + workgroup: Optional[str], database: str, - user: str, + user: Optional[str], s3_path: str, iam_role: str, query: str, @@ -488,6 +520,7 @@ def execute_redshift_query_and_unload_to_s3( Args: redshift_data_client: Redshift Data API Service client cluster_id: Redshift Cluster Identifier + workgroup: Redshift Serverless workgroup name database: Redshift Database Name user: Redshift username s3_path: S3 directory where the unloaded data is written @@ -500,12 +533,15 @@ def execute_redshift_query_and_unload_to_s3( unique_table_name = "_" + str(uuid.uuid4()).replace("-", "") query = f"CREATE TEMPORARY TABLE {unique_table_name} AS ({query});\n" query += f"UNLOAD ('SELECT * FROM {unique_table_name}') TO '{s3_path}/' IAM_ROLE '{iam_role}' FORMAT AS PARQUET" - execute_redshift_statement(redshift_data_client, cluster_id, database, user, query) + execute_redshift_statement( + redshift_data_client, cluster_id, workgroup, database, user, query + ) def unload_redshift_query_to_pa( redshift_data_client, cluster_id: str, + workgroup: str, database: str, user: str, s3_resource, @@ -519,6 +555,7 @@ def unload_redshift_query_to_pa( execute_redshift_query_and_unload_to_s3( redshift_data_client, cluster_id, + workgroup, database, user, s3_path, @@ -535,6 +572,7 @@ def unload_redshift_query_to_pa( def unload_redshift_query_to_df( redshift_data_client, cluster_id: str, + workgroup: str, database: str, user: str, s3_resource, @@ -546,6 +584,7 @@ def unload_redshift_query_to_df( table = unload_redshift_query_to_pa( redshift_data_client, cluster_id, + workgroup, database, user, s3_resource, diff --git a/sdk/python/feast/templates/aws/bootstrap.py b/sdk/python/feast/templates/aws/bootstrap.py index dcabadd358..63e5b50203 100644 --- a/sdk/python/feast/templates/aws/bootstrap.py +++ b/sdk/python/feast/templates/aws/bootstrap.py @@ -35,6 +35,7 @@ def bootstrap(): aws_utils.execute_redshift_statement( client, cluster_id, + None, database, user, "DROP TABLE IF EXISTS feast_driver_hourly_stats", @@ -43,6 +44,7 @@ def bootstrap(): aws_utils.upload_df_to_redshift( client, cluster_id, + None, database, user, s3, diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 63d21e7fa2..f9702505e3 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=sdk/python/requirements/py3.10-requirements.txt @@ -11,7 +11,7 @@ anyio==3.6.2 # watchfiles appdirs==1.4.4 # via fissix -attrs==22.2.0 +attrs==23.1.0 # via # bowler # jsonschema @@ -35,11 +35,11 @@ cloudpickle==2.2.1 # via dask colorama==0.4.6 # via feast (setup.py) -dask==2023.3.0 +dask==2023.4.0 # via feast (setup.py) dill==0.3.6 # via feast (setup.py) -fastapi==0.93.0 +fastapi==0.95.1 # via feast (setup.py) fastavro==1.7.3 # via @@ -47,31 +47,33 @@ fastavro==1.7.3 # pandavro fissix==21.11.13 # via bowler -fsspec==2023.3.0 +fsspec==2023.4.0 # via dask greenlet==2.0.2 # via sqlalchemy -grpcio==1.51.3 +grpcio==1.54.0 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.51.3 +grpcio-reflection==1.54.0 # via feast (setup.py) h11==0.14.0 # via # httpcore # uvicorn -httpcore==0.16.3 +httpcore==0.17.0 # via httpx httptools==0.5.0 # via uvicorn -httpx==0.23.3 +httpx==0.24.0 # via feast (setup.py) idna==3.4 # via # anyio + # httpx # requests - # rfc3986 +importlib-metadata==6.5.0 + # via dask jinja2==3.1.2 # via feast (setup.py) jsonschema==4.17.3 @@ -80,11 +82,11 @@ locket==1.0.0 # via partd markupsafe==2.1.2 # via jinja2 -mmh3==3.0.0 +mmh3==3.1.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -mypy==1.1.1 +mypy==1.2.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy @@ -94,7 +96,7 @@ numpy==1.24.2 # pandas # pandavro # pyarrow -packaging==23.0 +packaging==23.1 # via dask pandas==1.5.3 # via @@ -102,22 +104,22 @@ pandas==1.5.3 # pandavro pandavro==1.5.2 # via feast (setup.py) -partd==1.3.0 +partd==1.4.0 # via dask proto-plus==1.22.2 # via feast (setup.py) -protobuf==4.22.1 +protobuf==4.22.3 # via # feast (setup.py) # grpcio-reflection # proto-plus pyarrow==11.0.0 # via feast (setup.py) -pydantic==1.10.6 +pydantic==1.10.7 # via # fastapi # feast (setup.py) -pygments==2.14.0 +pygments==2.15.1 # via feast (setup.py) pyrsistent==0.19.3 # via jsonschema @@ -125,7 +127,7 @@ python-dateutil==2.8.2 # via pandas python-dotenv==1.0.0 # via uvicorn -pytz==2022.7.1 +pytz==2023.3 # via pandas pyyaml==6.0 # via @@ -134,8 +136,6 @@ pyyaml==6.0 # uvicorn requests==2.28.2 # via feast (setup.py) -rfc3986[idna2008]==1.5.0 - # via httpx six==1.16.0 # via # pandavro @@ -145,11 +145,11 @@ sniffio==1.3.0 # anyio # httpcore # httpx -sqlalchemy[mypy]==1.4.46 +sqlalchemy[mypy]==1.4.47 # via feast (setup.py) -sqlalchemy2-stubs==0.0.2a32 +sqlalchemy2-stubs==0.0.2a34 # via sqlalchemy -starlette==0.25.0 +starlette==0.26.1 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -172,15 +172,18 @@ typing-extensions==4.5.0 # mypy # pydantic # sqlalchemy2-stubs -urllib3==1.26.14 + # starlette +urllib3==1.26.15 # via requests -uvicorn[standard]==0.21.0 +uvicorn[standard]==0.21.1 # via feast (setup.py) uvloop==0.17.0 # via uvicorn volatile==2.1.0 # via bowler -watchfiles==0.18.1 +watchfiles==0.19.0 # via uvicorn -websockets==10.4 +websockets==11.0.2 # via uvicorn +zipp==3.15.0 + # via importlib-metadata diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index ca09b953c8..ac37f3a75d 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --output-file=sdk/python/requirements/py3.8-requirements.txt @@ -11,7 +11,7 @@ anyio==3.6.2 # watchfiles appdirs==1.4.4 # via fissix -attrs==22.2.0 +attrs==23.1.0 # via # bowler # jsonschema @@ -35,11 +35,11 @@ cloudpickle==2.2.1 # via dask colorama==0.4.6 # via feast (setup.py) -dask==2023.3.0 +dask==2023.4.0 # via feast (setup.py) dill==0.3.6 # via feast (setup.py) -fastapi==0.94.0 +fastapi==0.95.1 # via feast (setup.py) fastavro==1.7.3 # via @@ -47,33 +47,33 @@ fastavro==1.7.3 # pandavro fissix==21.11.13 # via bowler -fsspec==2023.3.0 +fsspec==2023.4.0 # via dask greenlet==2.0.2 # via sqlalchemy -grpcio==1.51.3 +grpcio==1.54.0 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.51.3 +grpcio-reflection==1.54.0 # via feast (setup.py) h11==0.14.0 # via # httpcore # uvicorn -httpcore==0.16.3 +httpcore==0.17.0 # via httpx httptools==0.5.0 # via uvicorn -httpx==0.23.3 +httpx==0.24.0 # via feast (setup.py) idna==3.4 # via # anyio + # httpx # requests - # rfc3986 -importlib-resources==5.12.0 - # via jsonschema +importlib-metadata==6.5.0 + # via dask jinja2==3.1.2 # via feast (setup.py) jsonschema==4.17.3 @@ -82,11 +82,11 @@ locket==1.0.0 # via partd markupsafe==2.1.2 # via jinja2 -mmh3==3.0.0 +mmh3==3.1.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -mypy==1.1.1 +mypy==1.2.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy @@ -96,7 +96,7 @@ numpy==1.24.2 # pandas # pandavro # pyarrow -packaging==23.0 +packaging==23.1 # via dask pandas==1.5.3 # via @@ -104,24 +104,22 @@ pandas==1.5.3 # pandavro pandavro==1.5.2 # via feast (setup.py) -partd==1.3.0 +partd==1.4.0 # via dask -pkgutil-resolve-name==1.3.10 - # via jsonschema proto-plus==1.22.2 # via feast (setup.py) -protobuf==4.22.1 +protobuf==4.22.3 # via # feast (setup.py) # grpcio-reflection # proto-plus pyarrow==11.0.0 # via feast (setup.py) -pydantic==1.10.6 +pydantic==1.10.7 # via # fastapi # feast (setup.py) -pygments==2.14.0 +pygments==2.15.1 # via feast (setup.py) pyrsistent==0.19.3 # via jsonschema @@ -129,7 +127,7 @@ python-dateutil==2.8.2 # via pandas python-dotenv==1.0.0 # via uvicorn -pytz==2022.7.1 +pytz==2023.3 # via pandas pyyaml==6.0 # via @@ -138,8 +136,6 @@ pyyaml==6.0 # uvicorn requests==2.28.2 # via feast (setup.py) -rfc3986[idna2008]==1.5.0 - # via httpx six==1.16.0 # via # pandavro @@ -149,11 +145,11 @@ sniffio==1.3.0 # anyio # httpcore # httpx -sqlalchemy[mypy]==1.4.46 +sqlalchemy[mypy]==1.4.47 # via feast (setup.py) -sqlalchemy2-stubs==0.0.2a32 +sqlalchemy2-stubs==0.0.2a34 # via sqlalchemy -starlette==0.26.0.post1 +starlette==0.26.1 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -177,17 +173,17 @@ typing-extensions==4.5.0 # pydantic # sqlalchemy2-stubs # starlette -urllib3==1.26.14 +urllib3==1.26.15 # via requests -uvicorn[standard]==0.21.0 +uvicorn[standard]==0.21.1 # via feast (setup.py) uvloop==0.17.0 # via uvicorn volatile==2.1.0 # via bowler -watchfiles==0.18.1 +watchfiles==0.19.0 # via uvicorn -websockets==10.4 +websockets==11.0.2 # via uvicorn zipp==3.15.0 - # via importlib-resources + # via importlib-metadata diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 9235d8fdf2..ce9dc085ea 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -11,7 +11,7 @@ anyio==3.6.2 # watchfiles appdirs==1.4.4 # via fissix -attrs==22.2.0 +attrs==23.1.0 # via # bowler # jsonschema @@ -35,11 +35,11 @@ cloudpickle==2.2.1 # via dask colorama==0.4.6 # via feast (setup.py) -dask==2023.3.0 +dask==2023.4.0 # via feast (setup.py) dill==0.3.6 # via feast (setup.py) -fastapi==0.93.0 +fastapi==0.95.1 # via feast (setup.py) fastavro==1.7.3 # via @@ -47,31 +47,33 @@ fastavro==1.7.3 # pandavro fissix==21.11.13 # via bowler -fsspec==2023.3.0 +fsspec==2023.4.0 # via dask greenlet==2.0.2 # via sqlalchemy -grpcio==1.51.3 +grpcio==1.54.0 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.51.3 +grpcio-reflection==1.54.0 # via feast (setup.py) h11==0.14.0 # via # httpcore # uvicorn -httpcore==0.16.3 +httpcore==0.17.0 # via httpx httptools==0.5.0 # via uvicorn -httpx==0.23.3 +httpx==0.24.0 # via feast (setup.py) idna==3.4 # via # anyio + # httpx # requests - # rfc3986 +importlib-metadata==6.5.0 + # via dask jinja2==3.1.2 # via feast (setup.py) jsonschema==4.17.3 @@ -80,11 +82,11 @@ locket==1.0.0 # via partd markupsafe==2.1.2 # via jinja2 -mmh3==3.0.0 +mmh3==3.1.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -mypy==1.1.1 +mypy==1.2.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy @@ -94,7 +96,7 @@ numpy==1.24.2 # pandas # pandavro # pyarrow -packaging==23.0 +packaging==23.1 # via dask pandas==1.5.3 # via @@ -102,22 +104,22 @@ pandas==1.5.3 # pandavro pandavro==1.5.2 # via feast (setup.py) -partd==1.3.0 +partd==1.4.0 # via dask proto-plus==1.22.2 # via feast (setup.py) -protobuf==4.22.1 +protobuf==4.22.3 # via # feast (setup.py) # grpcio-reflection # proto-plus pyarrow==11.0.0 # via feast (setup.py) -pydantic==1.10.6 +pydantic==1.10.7 # via # fastapi # feast (setup.py) -pygments==2.14.0 +pygments==2.15.1 # via feast (setup.py) pyrsistent==0.19.3 # via jsonschema @@ -125,7 +127,7 @@ python-dateutil==2.8.2 # via pandas python-dotenv==1.0.0 # via uvicorn -pytz==2022.7.1 +pytz==2023.3 # via pandas pyyaml==6.0 # via @@ -134,8 +136,6 @@ pyyaml==6.0 # uvicorn requests==2.28.2 # via feast (setup.py) -rfc3986[idna2008]==1.5.0 - # via httpx six==1.16.0 # via # pandavro @@ -145,11 +145,11 @@ sniffio==1.3.0 # anyio # httpcore # httpx -sqlalchemy[mypy]==1.4.46 +sqlalchemy[mypy]==1.4.47 # via feast (setup.py) -sqlalchemy2-stubs==0.0.2a32 +sqlalchemy2-stubs==0.0.2a34 # via sqlalchemy -starlette==0.25.0 +starlette==0.26.1 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -173,15 +173,17 @@ typing-extensions==4.5.0 # pydantic # sqlalchemy2-stubs # starlette -urllib3==1.26.14 +urllib3==1.26.15 # via requests -uvicorn[standard]==0.21.0 +uvicorn[standard]==0.21.1 # via feast (setup.py) uvloop==0.17.0 # via uvicorn volatile==2.1.0 # via bowler -watchfiles==0.18.1 +watchfiles==0.19.0 # via uvicorn -websockets==10.4 +websockets==11.0.2 # via uvicorn +zipp==3.15.0 + # via importlib-metadata diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py index c92a413616..dfe8e3d33b 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py @@ -59,6 +59,7 @@ def create_data_source( aws_utils.upload_df_to_redshift( self.client, self.offline_store_config.cluster_id, + self.offline_store_config.workgroup, self.offline_store_config.database, self.offline_store_config.user, self.s3, @@ -105,6 +106,7 @@ def teardown(self): aws_utils.execute_redshift_statement( self.client, self.offline_store_config.cluster_id, + self.offline_store_config.workgroup, self.offline_store_config.database, self.offline_store_config.user, f"DROP TABLE IF EXISTS {table}", diff --git a/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py b/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py index 22fd1e696f..42229f8683 100644 --- a/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py +++ b/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py @@ -45,8 +45,8 @@ def test_nullable_online_store_aws(): entity_key_serialization_version: 2 """ ), - expect_error="__root__ -> offline_store -> cluster_id\n" - " field required (type=value_error.missing)", + expect_error="__root__ -> offline_store -> __root__\n" + " please specify either cluster_id & user if using provisioned clusters, or workgroup if using serverless (type=value_error)", ) diff --git a/setup.py b/setup.py index 7218c1de04..bb9907dadc 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,8 @@ "mmh3", "numpy>=1.22,<3", "pandas>=1.4.3,<2", - "pandavro~=1.5.0", # For some reason pandavro higher than 1.5.* only support pandas less than 1.3. + # For some reason pandavro higher than 1.5.* only support pandas less than 1.3. + "pandavro~=1.5.0", "protobuf<5,>3.20", "proto-plus>=1.20.0,<2", "pyarrow>=4,<12", @@ -71,7 +72,8 @@ "uvicorn[standard]>=0.14.0,<1", "dask>=2021.1.0", "bowler", # Needed for automatic repo upgrades - "httpx>=0.23.3", # FastAPI does not correctly pull starlette dependency on httpx see thread(https://github.com/tiangolo/fastapi/issues/5656). + # FastAPI does not correctly pull starlette dependency on httpx see thread(https://github.com/tiangolo/fastapi/issues/5656). + "httpx>=0.23.3", ] GCP_REQUIRED = [ @@ -313,7 +315,6 @@ def run(self): file.write(filedata) - class BuildCommand(build_py): """Custom build command."""