Skip to content

Commit

Permalink
chore: fix a flappy lineage test case (#58)
Browse files Browse the repository at this point in the history
Change the test logic so that we check all of the upstream fields and
use sorting to ensure the test is stable.

Signed-off-by: Lucas Roesler <roesler.lucas@gmail.com>
  • Loading branch information
LucasRoesler authored Jul 3, 2023
1 parent f3a9524 commit dcf8c36
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 7 deletions.
4 changes: 3 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
{
"cSpell.words": [
"datahub",
"downstreams",
"Hana",
"sqlachemy",
"sqlalchemy",
"sqlglot"
"sqlglot",
"upstreams"
],
"[python]": {
"editor.tabSize": 4,
Expand Down
26 changes: 20 additions & 6 deletions tests/integration/test_lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ def test_get_column_lineage(config, ctx):
upstreams = [x[1][0].name for x in column_lineage]
assert upstreams == upstream_field_names, f"{upstreams}"

# Now test the more complicated view "total_rooms_price"
#
# It is defined as follows:
# SELECT
# H.NAME,
# H.CITY,
Expand All @@ -124,13 +127,24 @@ def test_get_column_lineage(config, ctx):

total_rooms_price = lineages[4]
column_lineage = total_rooms_price[1]
assert column_lineage[0][0].name == "name"
assert column_lineage[1][0].name == "city"
assert column_lineage[2][0].name == "type"
assert column_lineage[3][0].name == "total_room_price"

# check that the downstream (target) columns are correct
expected_downstreams = ["name", "city", "type", "total_room_price"]
downstreams = [x[0].name for x in column_lineage]
assert downstreams == expected_downstreams, f"{downstreams}"

# check that the view schema and name are correct
assert column_lineage[0][0].dataset.schema == "hotel"
assert column_lineage[0][0].dataset.name == "total_rooms_price"

upstream_field_names = ["name", "city", "type", "price"]
upstreams = [x[1][0].name for x in column_lineage]
# now check that each of these columns has the correct upstream (source) columns
upstream_field_names = [
["name"], # upstream for name
["city"], # upstream for city
["type"], # upstream for type
["price", "type"], # upstream for total_room_price
]
# note that we sort the upstreams because the order is not guaranteed by sqlglot
upstreams = [sorted([source.name for source in x[1]]) for x in column_lineage]

assert upstreams == upstream_field_names, f"{upstreams}"

0 comments on commit dcf8c36

Please sign in to comment.