Skip to content

Commit

Permalink
UC now supports: updating cat and schema properties, updating cat and…
Browse files Browse the repository at this point in the history
… schema without changing name, schema comment does not get reset if not provided when updating
  • Loading branch information
VillePuuska authored Sep 11, 2024
1 parent 5891f46 commit 904720d
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 29 deletions.
50 changes: 37 additions & 13 deletions tests/test_api_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,17 @@ def test_catalogs_endpoint(client: UCClient):

cat_name = "asdasdasdasfdsadgsa"
cat_comment = "asd"
cat_properties = {"prop1": "val1", "property 2": "foo bar"}
catalog = Catalog(
name=cat_name,
comment=cat_comment,
properties=cat_properties,
)

cat_name_update = "asdgnlsavnsadn"
cat_comment_update = "ayo"
catalog_update = Catalog(name=cat_name_update)
cat_properties_update = {"prop new": "asddfg"}
catalog_update = Catalog(name=cat_name_update, properties=cat_properties_update)
catalog_update2 = Catalog(name=cat_name_update, comment=cat_comment_update)

# At start, there is only the default catalog; verify this
Expand All @@ -48,6 +51,7 @@ def test_catalogs_endpoint(client: UCClient):
cat = client.create_catalog(catalog)
assert cat.name == cat_name
assert cat.comment == cat_comment
assert cat.properties == cat_properties
assert cat.id is not None

assert len(client.list_catalogs()) == 2
Expand All @@ -64,11 +68,12 @@ def test_catalogs_endpoint(client: UCClient):
with pytest.raises(AlreadyExistsError):
client.update_catalog(default_catalog, catalog)

# Update the catalog name; verify it was updated and the old catalog does not exist
# Update the catalog name and properties; verify it was updated and the old catalog does not exist

cat = client.update_catalog(cat_name, catalog_update)
assert cat.name == cat_name_update
assert cat.comment == cat_comment
assert cat.properties == cat_properties_update

assert len(client.list_catalogs()) == 2

Expand All @@ -78,11 +83,13 @@ def test_catalogs_endpoint(client: UCClient):
client.delete_catalog(cat_name, False)
assert len(client.list_catalogs()) == 2

# Update just the comment but not the name; this used to be impossible due to a UC bug
# Update just the comment but not the name or properties;
# updating without changing the name used to be impossible due to a UC bug

cat = client.update_catalog(cat_name_update, catalog_update2)
assert cat.name == cat_name_update
assert cat.comment == cat_comment_update
assert cat.properties == cat_properties_update

# Delete the updated catalog; verify it actually gets deleted and we cannot "re-delete" it

Expand All @@ -107,16 +114,27 @@ def test_schemas_endpoint(client: UCClient):

new_schema_name = "asdasdasdasfdsadgsa"
new_schema_comment = "asd"
new_schema_properties = {"prop1": "val1", "property 2": "foo bar"}
new_schema = Schema(
name=new_schema_name,
catalog_name=default_catalog,
comment=new_schema_comment,
properties=new_schema_properties,
)

schema_name_update = "asdgnlsavnsadn"
schema_properties_update = {"prop new": "asddfg"}
schema_update = Schema(
name=schema_name_update,
catalog_name=default_catalog,
properties=schema_properties_update,
)

schema_comment_update = "dddddddd"
schema_update2 = Schema(
name=schema_name_update,
catalog_name=default_catalog,
comment=schema_comment_update,
)

# Initially, there is only the default schema; verify this
Expand Down Expand Up @@ -149,6 +167,7 @@ def test_schemas_endpoint(client: UCClient):
schema = client.create_schema(schema=new_schema)
assert schema.full_name == default_catalog + "." + new_schema_name
assert schema.comment == new_schema_comment
assert schema.properties == new_schema_properties
assert schema.created_at is not None
assert schema.updated_at is None
assert schema.schema_id is not None
Expand All @@ -159,30 +178,35 @@ def test_schemas_endpoint(client: UCClient):
schemas = client.list_schemas(catalog=default_catalog)
assert len(schemas) == 2

with pytest.raises(AlreadyExistsError):
client.update_schema(
catalog=default_catalog,
schema_name=new_schema_name,
new_schema=new_schema,
)

with pytest.raises(DoesNotExistError):
client.update_schema(
catalog=default_catalog,
schema_name=schema_name_update,
new_schema=schema_update,
)

# Update the schema; verify the schema was updated and the old schema does not exist
# Update the schema name and properties; verify the schema was updated and the old schema does not exist

schema = client.update_schema(
catalog=default_catalog,
schema_name=new_schema_name,
new_schema=schema_update,
)
assert schema.full_name == default_catalog + "." + schema_name_update
# Default comment set to "" since UC does not clear comment if the new comment is null
assert schema.comment == ""
assert schema.comment == new_schema_comment # comment was not changed
assert schema.properties == schema_properties_update
assert schema.updated_at is not None

# Update only the comment; verify the schema was updated and the old schema does not exist

schema = client.update_schema(
catalog=default_catalog,
schema_name=schema_name_update,
new_schema=schema_update2,
)
assert schema.full_name == default_catalog + "." + schema_name_update # unchanged
assert schema.comment == schema_comment_update
assert schema.properties == schema_properties_update # unchanged
assert schema.updated_at is not None

with pytest.raises(DoesNotExistError):
Expand Down
12 changes: 6 additions & 6 deletions uchelper/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class Catalog(BaseModel):

name: str
comment: str | None = None
properties: dict[str, str] = {}
properties: dict[str, str] | None = {}
created_at: datetime.datetime | None = None
updated_at: datetime.datetime | None = None
id: uuid.UUID | None = None
Expand All @@ -25,8 +25,8 @@ class Schema(BaseModel):

name: str
catalog_name: str
comment: str | None = "" # UC doesn't update a null comment properly so default ""
properties: dict[str, str] = {}
comment: str | None = None
properties: dict[str, str] | None = {}
full_name: str | None = None
created_at: datetime.datetime | None = None
updated_at: datetime.datetime | None = None
Expand Down Expand Up @@ -92,8 +92,8 @@ class Column(BaseModel):
data_type: DataType = Field(
validation_alias="type_name", serialization_alias="type_name"
)
type_precision: int = 0 # TODO: handle this
type_scale: int = 0 # TODO: handle this
type_precision: int = 0
type_scale: int = 0
type_interval_type: int | None = None
position: int
comment: str | None = None
Expand Down Expand Up @@ -180,7 +180,7 @@ class Table(BaseModel):
columns: list[Column]
storage_location: str | None = None
comment: str | None = None
properties: dict[str, str] = {}
properties: dict[str, str] | None = {}
created_at: datetime.datetime | None = None
updated_at: datetime.datetime | None = None
table_id: uuid.UUID | None = None
Expand Down
15 changes: 5 additions & 10 deletions uchelper/uc_api_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,6 @@ def list_catalogs(session: requests.Session, uc_url: str) -> list[Catalog]:
catalogs = []
token = None

# NOTE: GET /catalogs pagination is bugged atm,
# all catalogs are returned regardless of parameters
# and next_page_token is always null.
while True:
response = session.get(
uc_url + api_path + catalogs_endpoint,
Expand All @@ -152,7 +149,7 @@ def list_catalogs(session: requests.Session, uc_url: str) -> list[Catalog]:
]
)
# according to API spec, token should be null when there are no more pages,
# but at least some endpoints are bugged and return "" instead of null
# but at least some endpoints have been bugged and returned "" instead of null
if token is None or token == "":
break

Expand Down Expand Up @@ -281,8 +278,6 @@ def list_schemas(session: requests.Session, uc_url: str, catalog: str) -> list[S
schemas = []
token = None

# NOTE: listCatalogs pagination is not implemented in Unity Catalog yet
# so this handling of pagination is not actually needed atm.
while True:
response = session.get(
url,
Expand Down Expand Up @@ -321,14 +316,14 @@ def update_schema(
- properties.
Returns a Schema with updated information.
Raises a DoesNotExistError if the schema does not exist.
Raises an AlreadyExistsError if the new name is the same as the old name. Unity Catalog
does not allow updating and keeping the same name atm.
Raises an AlreadyExistsError if there already exists a schema with the new name
in the same catalog.
"""
url = uc_url + api_path + schemas_endpoint + "/" + catalog + "." + schema_name
data = {
"comment": new_schema.comment,
"properties": new_schema.properties,
"new_name": new_schema.name,
"new_name": (new_schema.name if new_schema.name != schema_name else None),
}
response = session.patch(url=url, data=json.dumps(data), headers=JSON_HEADER)

Expand Down Expand Up @@ -376,7 +371,7 @@ def create_table(session: requests.Session, uc_url: str, table: Table) -> Table:

def delete_table(
session: requests.Session, uc_url: str, catalog: str, schema: str, table: str
):
) -> None:
"""
Deletes the table.
Raises a DoesNotExistError if the table did not exist.
Expand Down

0 comments on commit 904720d

Please sign in to comment.