Skip to content

Commit

Permalink
Fix describe throwing errors when listing (#238)
Browse files Browse the repository at this point in the history
Fixes #229
  • Loading branch information
william-conti authored and FastLee committed Sep 20, 2023
1 parent 922860f commit ec834bf
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 17 deletions.
36 changes: 21 additions & 15 deletions src/databricks/labs/ucx/hive_metastore/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,24 +116,30 @@ def _crawl(self, catalog: str, database: str) -> list[Table]:
tasks = []
for _, table, _is_tmp in self._fetch(f"SHOW TABLES FROM {catalog}.{database}"):
tasks.append(partial(self._describe, catalog, database, table))
return ThreadedExecution.gather(f"listing tables in {catalog}.{database}", tasks)
results = ThreadedExecution.gather(f"listing tables in {catalog}.{database}", tasks)

def _describe(self, catalog: str, database: str, table: str) -> Table:
return [x for x in results if x is not None]

def _describe(self, catalog: str, database: str, table: str) -> Table | None:
"""Fetches metadata like table type, data format, external table location,
and the text of a view if specified for a specific table within the given
catalog and database.
"""
describe = {}
full_name = f"{catalog}.{database}.{table}"
logger.debug(f"[{full_name}] fetching table metadata")
for key, value, _ in self._fetch(f"DESCRIBE TABLE EXTENDED {full_name}"):
describe[key] = value
return Table(
catalog=describe["Catalog"],
database=database,
name=table,
object_type=describe["Type"],
table_format=describe.get("Provider", "").upper(),
location=describe.get("Location", None),
view_text=describe.get("View Text", None),
)
try:
logger.debug(f"[{full_name}] fetching table metadata")
describe = {}
for key, value, _ in self._fetch(f"DESCRIBE TABLE EXTENDED {full_name}"):
describe[key] = value
return Table(
catalog=describe["Catalog"],
database=database,
name=table,
object_type=describe["Type"],
table_format=describe.get("Provider", "").upper(),
location=describe.get("Location", None),
view_text=describe.get("View Text", None),
)
except RuntimeError as e:
logger.error(f"Couldn't fetch information for table {full_name} : {e}")
return None
4 changes: 2 additions & 2 deletions src/databricks/labs/ucx/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def __init__(self, ws: WorkspaceClient, *, prefix: str = "ucx", promtps: bool =
self._dashboards = {}

def run(self):
logger.info(f'Installing UCX v{__version__}')
logger.info(f"Installing UCX v{__version__}")
self._configure()
self._create_dashboards()
self._create_jobs()
Expand Down Expand Up @@ -509,6 +509,6 @@ def _deployed_steps(self):

if __name__ == "__main__":
ws = WorkspaceClient(product="ucx", product_version=__version__)
logger.setLevel('INFO')
logger.setLevel("INFO")
installer = Installer(ws)
installer.run()
10 changes: 10 additions & 0 deletions tests/unit/hive_metastore/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,13 @@ def test_uc_sql(table, query):
def test_tables_crawler_inventory_table():
tc = TablesCrawler(MockBackend(), "main", "default")
assert tc._table == "tables"


def test_tables_returning_error_when_describing():
errors = {"DESCRIBE TABLE EXTENDED test.database.table1": "error"}
rows = {
"SHOW TABLES FROM test.database": [("", "table1", ""), ("", "table2", "")],
"DESCRIBE TABLE EXTENDED test.database.table2": [("Catalog", "catalog", ""), ("Type", "delta", "")],
}
tc = TablesCrawler(MockBackend(fails_on_first=errors, rows=rows), "main", "default")
assert len(tc._crawl("test", "database")) == 1

0 comments on commit ec834bf

Please sign in to comment.