Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix describe throwing errors when listing #238

Merged
merged 5 commits into from
Sep 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 21 additions & 15 deletions src/databricks/labs/ucx/hive_metastore/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,24 +116,30 @@ def _crawl(self, catalog: str, database: str) -> list[Table]:
tasks = []
for _, table, _is_tmp in self._fetch(f"SHOW TABLES FROM {catalog}.{database}"):
tasks.append(partial(self._describe, catalog, database, table))
return ThreadedExecution.gather(f"listing tables in {catalog}.{database}", tasks)
results = ThreadedExecution.gather(f"listing tables in {catalog}.{database}", tasks)

def _describe(self, catalog: str, database: str, table: str) -> Table:
return [x for x in results if x is not None]

def _describe(self, catalog: str, database: str, table: str) -> Table | None:
"""Fetches metadata like table type, data format, external table location,
and the text of a view if specified for a specific table within the given
catalog and database.
"""
describe = {}
full_name = f"{catalog}.{database}.{table}"
logger.debug(f"[{full_name}] fetching table metadata")
for key, value, _ in self._fetch(f"DESCRIBE TABLE EXTENDED {full_name}"):
describe[key] = value
return Table(
catalog=describe["Catalog"],
database=database,
name=table,
object_type=describe["Type"],
table_format=describe.get("Provider", "").upper(),
location=describe.get("Location", None),
view_text=describe.get("View Text", None),
)
try:
logger.debug(f"[{full_name}] fetching table metadata")
describe = {}
for key, value, _ in self._fetch(f"DESCRIBE TABLE EXTENDED {full_name}"):
describe[key] = value
return Table(
catalog=describe["Catalog"],
database=database,
name=table,
object_type=describe["Type"],
table_format=describe.get("Provider", "").upper(),
location=describe.get("Location", None),
view_text=describe.get("View Text", None),
)
except RuntimeError as e:
logger.error(f"Couldn't fetch information for table {full_name} : {e}")
return None
4 changes: 2 additions & 2 deletions src/databricks/labs/ucx/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def __init__(self, ws: WorkspaceClient, *, prefix: str = "ucx", promtps: bool =
self._dashboards = {}

def run(self):
logger.info(f'Installing UCX v{__version__}')
logger.info(f"Installing UCX v{__version__}")
self._configure()
self._create_dashboards()
self._create_jobs()
Expand Down Expand Up @@ -509,6 +509,6 @@ def _deployed_steps(self):

if __name__ == "__main__":
ws = WorkspaceClient(product="ucx", product_version=__version__)
logger.setLevel('INFO')
logger.setLevel("INFO")
installer = Installer(ws)
installer.run()
10 changes: 10 additions & 0 deletions tests/unit/hive_metastore/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,13 @@ def test_uc_sql(table, query):
def test_tables_crawler_inventory_table():
tc = TablesCrawler(MockBackend(), "main", "default")
assert tc._table == "tables"


def test_tables_returning_error_when_describing():
errors = {"DESCRIBE TABLE EXTENDED test.database.table1": "error"}
rows = {
"SHOW TABLES FROM test.database": [("", "table1", ""), ("", "table2", "")],
"DESCRIBE TABLE EXTENDED test.database.table2": [("Catalog", "catalog", ""), ("Type", "delta", "")],
}
tc = TablesCrawler(MockBackend(fails_on_first=errors, rows=rows), "main", "default")
assert len(tc._crawl("test", "database")) == 1
Loading