Skip to content

Commit

Permalink
Added a column to $inventory.tables to specify if a table might hav…
Browse files Browse the repository at this point in the history
…e been synchronised to Unity Catalog already or not (#306)

Closes #303
  • Loading branch information
FastLee authored Oct 3, 2023
1 parent e516998 commit 0bf43e7
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 16 deletions.
31 changes: 17 additions & 14 deletions docs/table_persistence.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,17 @@ Table Utilization:

Holds Inventory of all tables in all databases and their relevant metadata.

| Column | Datatype | Description | Comments |
|-----------|----------|-------------|----------|
| catalog | string | Original catalog of the table. _hive_metastore_ by default |
| database | string | Original schema of the table |
| name |string|Name of the table|
|object_type|string|MANAGED, EXTERNAL, or VIEW|
|table_format|string|Table provider. Like delta or json or parquet.|
|location|string|Location of the data for table|
|view_text|nullable string|If the table is the view, then this column holds the definition of the view|
| Column | Datatype | Description | Comments |
|--------------|----------|-------------|----------|
| catalog | string | Original catalog of the table. _hive_metastore_ by default |
| database | string | Original schema of the table |
| name |string|Name of the table|
| object_type |string|MANAGED, EXTERNAL, or VIEW|
| table_format |string|Table provider. Like delta or json or parquet.|
| location |string|Location of the data for table|
| view_text |nullable string|If the table is the view, then this column holds the definition of the view|
| upgraded_to |string|Upgrade Target (3 level namespace)|


<br/>

Expand Down Expand Up @@ -78,11 +80,12 @@ List of DBFS mount points.
#### _$inventory_.permissions
Workspace object level permissions

| Column | Datatype | Description | Comments |
|-----------|----------|-------------|----------|
|object_id|string|Either:<br/>Group ID<br/>Workspace Object ID<br/>Redash Object ID<br/>Scope name
|supports|string|One of:<br/>AUTHORIZATION<br/><br/>CLUSTERS<br/>CLUSTER_POLICIES<br/>DIRECTORIES<br/>EXPERIMENTS<br/>FILES<br/>INSTANCE_POOLS<br/>JOBS<br/>NOTEBOOKS<br/>PIPELINES<br/>REGISTERED_MODELS<br/>REPOS<br/>SERVING_ENDPOINTS<br/>SQL_WAREHOUSES
|raw_object_permissions|JSON|JSON-serialized response of:<br/>Generic Permissions<br/>Secret ACL<br/>Group roles and entitlements<br/>Redash permissions|
| Column | Datatype | Description | Comments |
|------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
| object_id | string | Either:<br/>Group ID<br/>Workspace Object ID<br/>Redash Object ID<br/>Scope name | |
| supports | string | One of:<br/>AUTHORIZATION<br/><br/>CLUSTERS<br/>CLUSTER_POLICIES<br/>DIRECTORIES<br/>EXPERIMENTS<br/>FILES<br/>INSTANCE_POOLS<br/>JOBS<br/>NOTEBOOKS<br/>PIPELINES<br/>REGISTERED_MODELS<br/>REPOS<br/>SERVING_ENDPOINTS<br/>SQL_WAREHOUSES | |
| raw_object_permissions | JSON | JSON-serialized response of:<br/>Generic Permissions<br/>Secret ACL<br/>Group roles and entitlements<br/>Redash permissions | |


<br/>

Expand Down
11 changes: 11 additions & 0 deletions src/databricks/labs/ucx/hive_metastore/tables.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import logging
import re
import string
from collections.abc import Iterator
from dataclasses import dataclass
from functools import partial
Expand All @@ -22,6 +24,7 @@ class Table:

location: str = None
view_text: str = None
upgraded_to: str = None

@property
def is_delta(self) -> bool:
Expand Down Expand Up @@ -93,6 +96,13 @@ def snapshot(self) -> list[Table]:
"""
return self._snapshot(partial(self._try_load), partial(self._crawl))

@staticmethod
def _parse_table_props(tbl_props: string) -> {}:
pattern = r"([^,\[\]]+)=([^,\[\]]+)"
key_value_pairs = re.findall(pattern, tbl_props)
# Convert key-value pairs to dictionary
return dict(key_value_pairs)

def _try_load(self):
"""Tries to load table information from the database or throws TABLE_OR_VIEW_NOT_FOUND error"""
for row in self._fetch(f"SELECT * FROM {self._full_name}"):
Expand Down Expand Up @@ -140,6 +150,7 @@ def _describe(self, catalog: str, database: str, table: str) -> Table | None:
table_format=describe.get("Provider", "").upper(),
location=describe.get("Location", None),
view_text=describe.get("View Text", None),
upgraded_to=self._parse_table_props(describe.get("Table Properties", "")).get("upgraded_to", None),
)
except Exception as e:
logger.error(f"Couldn't fetch information for table {full_name} : {e}")
Expand Down
6 changes: 4 additions & 2 deletions src/databricks/labs/ucx/hive_metastore/tables.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import org.apache.spark.sql.DataFrame

// must follow the same structure as databricks.labs.ucx.hive_metastore.tables.Table
case class TableDetails(catalog: String, database: String, name: String, object_type: String,
table_format: String, location: String, view_text: String)
table_format: String, location: String, view_text: String, upgraded_to: String)

// recording error log in the database
case class TableError(catalog: String, database: String, name: String, error: String)
Expand Down Expand Up @@ -36,8 +36,10 @@ def metadataForAllTables(databases: Seq[String], queue: ConcurrentLinkedQueue[Ta
failures.add(TableError("hive_metastore", databaseName, tableName, s"result is null"))
None
} else {
val upgraded_to=table.properties.get("upgraded_to")
Some(TableDetails("hive_metastore", databaseName, tableName, table.tableType.name, table.provider.orNull,
table.storage.locationUri.map(_.toString).orNull, table.viewText.orNull))
table.storage.locationUri.map(_.toString).orNull, table.viewText.orNull,
upgraded_to match {case Some(target) => target case None => null}))
}
} catch {
case err: Throwable =>
Expand Down
12 changes: 12 additions & 0 deletions tests/unit/hive_metastore/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,18 @@ def test_tables_crawler_inventory_table():
assert tc._table == "tables"


def test_tables_crawler_parse_tp():
tc = TablesCrawler(MockBackend(), "default")
tp1 = tc._parse_table_props(
"[delta.minReaderVersion=1,delta.minWriterVersion=2,upgraded_to=fake_cat.fake_ext.fake_delta]"
)
tp2 = tc._parse_table_props("[delta.minReaderVersion=1,delta.minWriterVersion=2]")
assert len(tp1) == 3
assert tp1.get("upgraded_to") == "fake_cat.fake_ext.fake_delta"
assert len(tp2) == 2
assert tp2.get("upgraded_to") is None


def test_tables_returning_error_when_describing():
errors = {"DESCRIBE TABLE EXTENDED hive_metastore.database.table1": "error"}
rows = {
Expand Down

0 comments on commit 0bf43e7

Please sign in to comment.