Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide descriptive error on invalid table reference #1627

Merged
merged 20 commits into from
Jun 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions sdk/python/feast/data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from feast import type_map
from feast.data_format import FileFormat, StreamFormat
from feast.errors import DataSourceNotFoundException
from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto
from feast.value_type import ValueType

Expand Down Expand Up @@ -519,6 +520,12 @@ def to_proto(self) -> DataSourceProto:
"""
raise NotImplementedError

def validate(self):
"""
Validates the underlying data source.
"""
raise NotImplementedError


class FileSource(DataSource):
def __init__(
Expand Down Expand Up @@ -615,6 +622,10 @@ def to_proto(self) -> DataSourceProto:

return data_source_proto

def validate(self):
# TODO: validate a FileSource
pass

@staticmethod
def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]:
return type_map.pa_to_feast_value_type
Expand Down Expand Up @@ -692,6 +703,17 @@ def to_proto(self) -> DataSourceProto:

return data_source_proto

def validate(self):
if not self.query:
from google.api_core.exceptions import NotFound
from google.cloud import bigquery

client = bigquery.Client()
try:
client.get_table(self.table_ref)
except NotFound:
raise DataSourceNotFoundException(self.table_ref)

def get_table_query_string(self) -> str:
"""Returns a string that can directly be used to reference this table in SQL"""
if self.table_ref:
Expand Down
7 changes: 7 additions & 0 deletions sdk/python/feast/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
from colorama import Fore, Style


class DataSourceNotFoundException(Exception):
def __init__(self, path):
super().__init__(
f"Unable to find table at '{path}'. Please check that table exists."
)


class FeastObjectNotFoundException(Exception):
pass

Expand Down
3 changes: 2 additions & 1 deletion sdk/python/feast/repo_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,11 +154,12 @@ def apply_total(repo_config: RepoConfig, repo_path: Path):

data_sources = [t.input for t in repo.feature_views]

# Make sure the data source used by this feature view is supported by
# Make sure the data source used by this feature view is supported by Feast
for data_source in data_sources:
assert_offline_store_supports_data_source(
repo_config.offline_store, data_source
)
data_source.validate()

update_data_sources_with_inferred_event_timestamp_col(data_sources)
for view in repo.feature_views:
Expand Down
20 changes: 20 additions & 0 deletions sdk/python/tests/example_feature_repo_with_missing_bq_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from datetime import timedelta

from feast import BigQuerySource, Entity, Feature, FeatureView, ValueType

nonexistent_source = BigQuerySource(
table_ref="project.dataset.nonexistent_table", event_timestamp_column=""
)

driver = Entity(name="driver", value_type=ValueType.INT64, description="driver id",)

nonexistent_features = FeatureView(
name="driver_locations",
entities=["driver"],
ttl=timedelta(days=1),
features=[
Feature(name="lat", dtype=ValueType.FLOAT),
Feature(name="lon", dtype=ValueType.STRING),
],
input=nonexistent_source,
)
35 changes: 35 additions & 0 deletions sdk/python/tests/test_cli_gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,38 @@ def test_basic() -> None:

result = runner.run(["teardown"], cwd=repo_path)
assert result.returncode == 0


@pytest.mark.integration
def test_missing_bq_source_fail() -> None:
project_id = "".join(
random.choice(string.ascii_lowercase + string.digits) for _ in range(10)
)
runner = CliRunner()
with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:

repo_path = Path(repo_dir_name)
data_path = Path(data_dir_name)

repo_config = repo_path / "feature_store.yaml"

repo_config.write_text(
dedent(
f"""
project: {project_id}
registry: {data_path / "registry.db"}
provider: gcp
"""
)
)

repo_example = repo_path / "example.py"
repo_example.write_text(
(
Path(__file__).parent / "example_feature_repo_with_missing_bq_source.py"
).read_text()
)

returncode, output = runner.run_with_output(["apply"], cwd=repo_path)
assert returncode == 1
assert b"DataSourceNotFoundException" in output
3 changes: 3 additions & 0 deletions sdk/python/tests/test_cli_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
from textwrap import dedent

import assertpy
import pytest

from feast.feature_store import FeatureStore
from tests.cli_utils import CliRunner
from tests.online_read_write_test import basic_rw_test


@pytest.mark.integration
def test_workflow() -> None:
"""
Test running apply on a sample repo, and make sure the infra gets created.
Expand Down Expand Up @@ -78,6 +80,7 @@ def test_workflow() -> None:
assertpy.assert_that(result.returncode).is_equal_to(0)


@pytest.mark.integration
def test_non_local_feature_repo() -> None:
"""
Test running apply on a sample repo, and make sure the infra gets created.
Expand Down
2 changes: 2 additions & 0 deletions sdk/python/tests/test_online_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from tests.cli_utils import CliRunner, get_example_repo


@pytest.mark.integration
def test_online() -> None:
"""
Test reading from the online store in local mode.
Expand Down Expand Up @@ -238,6 +239,7 @@ def test_online() -> None:
os.rename(store.config.registry + "_fake", store.config.registry)


@pytest.mark.integration
def test_online_to_df():
"""
Test dataframe conversion. Make sure the response columns and rows are
Expand Down
2 changes: 2 additions & 0 deletions sdk/python/tests/test_partial_apply.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import pytest
from google.protobuf.duration_pb2 import Duration

from feast import BigQuerySource, Feature, FeatureView, ValueType
from tests.cli_utils import CliRunner, get_example_repo
from tests.online_read_write_test import basic_rw_test


@pytest.mark.integration
def test_partial() -> None:
"""
Add another table to existing repo using partial apply API. Make sure both the table
Expand Down