Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: add test for cross schema and more complex queries #62

Merged
merged 5 commits into from
Jul 10, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions datahub_sap_hana/column_lineage_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from dataclasses import dataclass

from serde import serde
from sqlglot import expressions
from sqlglot.lineage import Node


Expand Down Expand Up @@ -40,11 +41,17 @@ class ColumnField:
dataset: Table

@classmethod
def from_node(cls, node: Node, schema: str):
def from_node(cls, node: Node, default_schema: str):
"""Creates a ColumnField from a sqlglot node."""

schema = default_schema

if isinstance(node.source, expressions.Table):
schema = node.source.catalog or node.source.db # type:ignore
eyelesbarrow marked this conversation as resolved.
Show resolved Hide resolved

return cls(
name=parse_column_name(node.name),
dataset=Table(schema=schema, name=node.source.name),
dataset=Table(schema=schema, name=node.source.name or default_schema),
eyelesbarrow marked this conversation as resolved.
Show resolved Hide resolved
)


Expand Down
22 changes: 13 additions & 9 deletions datahub_sap_hana/ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ class HanaConfig(BasicSQLAlchemyConfig):
"""Represents the attributes needed to configure the SAP HANA DB connection"""

scheme = "hana"
schema_pattern: AllowDenyPattern = Field(default=AllowDenyPattern(deny=["*SYS*"]))
schema_pattern: AllowDenyPattern = Field(
default=AllowDenyPattern(deny=["*SYS*"]))
include_view_lineage: bool = Field(
default=False, description="Include table lineage for views"
)
Expand Down Expand Up @@ -193,7 +194,8 @@ def _get_view_lineage_elements(
lineage_elements[key].append(
mce_builder.make_dataset_urn(
self.platform,
self.config.get_identifier(item.source_schema, item.source_table),
self.config.get_identifier(
item.source_schema, item.source_table),
self.config.env,
)
)
Expand Down Expand Up @@ -238,14 +240,14 @@ def get_column_lineage_view_definitions(

for schema_name in schema:
if self.config.schema_pattern.allowed(schema_name):

views: List[str] = inspector.get_view_names(
schema=schema_name
) # returns a list

for view_name in views:
view_sql: str = inspector.get_view_definition(
view_name, schema_name)
view_name, schema_name
)

if view_sql:
yield View(
Expand Down Expand Up @@ -281,8 +283,8 @@ def get_column_view_lineage_elements(
Each tuple contains a downstream field (a column in a view) and a list of
upstream fields (columns in other views or tables that are used to
calculate/transform the downstream column).
"""

"""
for view in self.get_column_lineage_view_definitions(inspector):
column_lineage: List[
Tuple[DownstreamLineageField, List[UpstreamLineageField]]
Expand All @@ -291,8 +293,8 @@ def get_column_view_lineage_elements(
column_lineages = self._get_column_lineage_for_view(view.sql)

downstream_table_metadata = get_table_schema(
inspector, view.name, view.schema)

inspector, view.name, view.schema
)

# lineage_node represents the lineage of 1 column in sqlglot
# lineage_node.downstream is the datahub upstream each element
Expand Down Expand Up @@ -327,7 +329,8 @@ def get_column_view_lineage_elements(
source_table_metadata = get_table_schema(
inspector, column.dataset.name, column.dataset.schema
)
column_metadata = source_table_metadata[column.name.lower()]
column_metadata = source_table_metadata[column.name.lower(
)]
column.name = column_metadata["name"]

# we only have lineage information if there are "upstream" fields
Expand Down Expand Up @@ -417,7 +420,8 @@ def _get_column_lineage_workunits(
fieldLineages = UpstreamLineage(
fineGrainedLineages=column_lineages,
upstreams=[
Upstream(dataset=dataset_urn, type=DatasetLineageType.TRANSFORMED)
Upstream(dataset=dataset_urn,
type=DatasetLineageType.TRANSFORMED)
for dataset_urn in list(upstream_datasets)
],
)
Expand Down
Loading