Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes to identify service principal with custom roles on Azure storage account for principal-prefix-access #1576

Merged
merged 19 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 64 additions & 14 deletions src/databricks/labs/ucx/azure/access.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import uuid
from dataclasses import dataclass
from functools import partial
import re
from collections.abc import ValuesView
JCZuurmond marked this conversation as resolved.
Show resolved Hide resolved
JCZuurmond marked this conversation as resolved.
Show resolved Hide resolved

from databricks.labs.blueprint.installation import Installation
from databricks.labs.blueprint.parallel import ManyError, Threads
Expand All @@ -16,6 +18,7 @@
AzureResources,
PrincipalSecret,
StorageAccount,
AzureRoleAssignment,
JCZuurmond marked this conversation as resolved.
Show resolved Hide resolved
)
from databricks.labs.ucx.config import WorkspaceConfig
from databricks.labs.ucx.hive_metastore.locations import ExternalLocations
Expand Down Expand Up @@ -54,28 +57,75 @@ def __init__(
"Storage Blob Data Owner": Privilege.WRITE_FILES,
"Storage Blob Data Reader": Privilege.READ_FILES,
}
self._permission_levels = {
"Microsoft.Storage/storageAccounts/blobServices/containers/write": Privilege.WRITE_FILES,
"Microsoft.Storage/storageAccounts/blobServices/containers/blobs/write": Privilege.WRITE_FILES,
"Microsoft.Storage/storageAccounts/blobServices/containers/read": Privilege.READ_FILES,
"Microsoft.Storage/storageAccounts/blobServices/containers/blobs/read": Privilege.READ_FILES,
}

def _get_permission_level(self, permission_to_match: str) -> Privilege | None:
# String might contain '*', check for wildcard match
pattern = re.sub(r'\*', '.*', permission_to_match)
permission_compiled = re.compile(pattern)
for each_level, privilege_level in self._permission_levels.items():
# Check for storage blob permission with regex to account for star pattern
match = permission_compiled.match(each_level)
if match:
# If a write permission is found, no need to check for read permissions
JCZuurmond marked this conversation as resolved.
Show resolved Hide resolved
return privilege_level
return None

def _get_custom_role_privilege(self, role_permissions: list[str]) -> Privilege | None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might make sense to let this return a list of privileges and select the highest privilege in _map_storage. Now the logic for selecting the highest priviliges is duplicated.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JCZuurmond - The method _get_custom_role_privilege() gets the higher privilege for one custom role assignment. A custom role can contain multiple permissions, and this will get the higher privilege from these permissions. Method _map_storage() will get the higher privilege from a custom or builtin role if more than one role assignments exist for one principal. If I want to return a list of privileges for one custom role, then I will need to iterate through all permissions, whereas now I am exiting from the loop if I find one Write privilege. Please let me know if you think this makes sense.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, that is also true. I am a bit indifferent about this, therefore, I would not let it block this PR.

Pros: exit loop early, a pattern you see more often in ucx
Cons: treating WRITE_FILES > READ_FILES in two location, speed gain is probably insignifficant

# If both read and write privileges are found, only write privilege will be considered
higher_privilege = None
for each_permission in role_permissions:
privilege = self._get_permission_level(each_permission)
if not privilege:
JCZuurmond marked this conversation as resolved.
Show resolved Hide resolved
continue
# WRITE_FILES is the higher permission, don't need to check further
if privilege == Privilege.WRITE_FILES:
return privilege
if privilege == Privilege.READ_FILES:
higher_privilege = privilege
return higher_privilege

def _get_role_privilege(self, role_assignment: AzureRoleAssignment) -> Privilege | None:
privilege = None
# Check for custom role permissions on the storage accounts
if role_assignment.role_permissions:
privilege = self._get_custom_role_privilege(role_assignment.role_permissions)
JCZuurmond marked this conversation as resolved.
Show resolved Hide resolved
elif role_assignment.role_name in self._levels:
privilege = self._levels[role_assignment.role_name]
nkvuong marked this conversation as resolved.
Show resolved Hide resolved
return privilege

def _map_storage(self, storage: StorageAccount) -> list[StoragePermissionMapping]:
def _map_storage(self, storage: StorageAccount) -> ValuesView[StoragePermissionMapping]:
logger.info(f"Fetching role assignment for {storage.name}")
out = []
principal_spm_mapping: dict[str, StoragePermissionMapping] = {}
for container in self._azurerm.containers(storage.id):
for role_assignment in self._azurerm.role_assignments(str(container)):
# Skip the role assignments that already have WRITE_FILES privilege
spm_mapping_key = f"{container.container}_{role_assignment.principal.client_id}"
JCZuurmond marked this conversation as resolved.
Show resolved Hide resolved
if (
spm_mapping_key in principal_spm_mapping
and principal_spm_mapping[spm_mapping_key].privilege == Privilege.WRITE_FILES.value
):
continue
# one principal may be assigned multiple roles with overlapping dataActions, hence appearing
# here in duplicates. hence, role name -> permission level is not enough for the perfect scenario.
if role_assignment.role_name not in self._levels:
returned_privilege = self._get_role_privilege(role_assignment)
if not returned_privilege:
continue
privilege = self._levels[role_assignment.role_name].value
out.append(
StoragePermissionMapping(
prefix=f"abfss://{container.container}@{container.storage_account}.dfs.core.windows.net/",
client_id=role_assignment.principal.client_id,
principal=role_assignment.principal.display_name,
privilege=privilege,
type=role_assignment.principal.type,
directory_id=role_assignment.principal.directory_id,
)
privilege = returned_privilege.value
principal_spm_mapping[spm_mapping_key] = StoragePermissionMapping(
prefix=f"abfss://{container.container}@{container.storage_account}.dfs.core.windows.net/",
client_id=role_assignment.principal.client_id,
principal=role_assignment.principal.display_name,
privilege=privilege,
type=role_assignment.principal.type,
directory_id=role_assignment.principal.directory_id,
)
return out
return principal_spm_mapping.values()

def save_spn_permissions(self) -> str | None:
used_storage_accounts = self._get_storage_accounts()
Expand Down
41 changes: 33 additions & 8 deletions src/databricks/labs/ucx/azure/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,15 @@ class AzureRoleAssignment:
scope: AzureResource
principal: Principal
role_name: str
role_type: str
role_permissions: list[str]


@dataclass
class AzureRoleDetails:
role_name: str | None
role_type: str
role_permissions: list[str]


@dataclass
Expand Down Expand Up @@ -270,7 +279,7 @@ def __init__(self, azure_mgmt: AzureAPIClient, azure_graph: AzureAPIClient, incl
self._mgmt = azure_mgmt
self._graph = azure_graph
self._include_subscriptions = include_subscriptions
self._role_definitions = {} # type: dict[str, str]
self._role_definitions = {} # type: dict[str, AzureRoleDetails]
self._principals: dict[str, Principal | None] = {}

def _get_subscriptions(self) -> Iterable[AzureSubscription]:
Expand Down Expand Up @@ -435,7 +444,8 @@ def _role_assignment(
scope = assignment_properties.get("scope")
if not scope:
return None
role_name = self._role_name(role_definition_id)
role_details = self._role_name(role_definition_id)
role_name = role_details.role_name
if not role_name:
return None
principal = self._get_principal(principal_id)
Expand All @@ -444,18 +454,33 @@ def _role_assignment(
if scope == "/":
scope = resource_id
return AzureRoleAssignment(
resource=AzureResource(resource_id), scope=AzureResource(scope), principal=principal, role_name=role_name
resource=AzureResource(resource_id),
scope=AzureResource(scope),
principal=principal,
role_name=role_name,
role_type=role_details.role_type,
role_permissions=role_details.role_permissions,
)

def _role_name(self, role_definition_id) -> str | None:
def _role_name(self, role_definition_id) -> AzureRoleDetails:
if role_definition_id not in self._role_definitions:
role_definition = self._mgmt.get(role_definition_id, "2022-04-01")
definition_properties = role_definition.get("properties", {})
role_name: str = definition_properties.get("roleName")
role_name = definition_properties.get("roleName")
if not role_name:
return None
self._role_definitions[role_definition_id] = role_name
return self._role_definitions.get(role_definition_id)
return AzureRoleDetails(role_name=None, role_type='BuiltInRole', role_permissions=[])
role_type = definition_properties.get("type", "BuiltInRole")
role_permissions = []
if role_type == 'CustomRole':
role_permissions_list = definition_properties.get("permissions", [])
for each_role_permissions in role_permissions_list:
role_permissions = each_role_permissions.get("actions", []) + each_role_permissions.get(
"dataActions", []
)
self._role_definitions[role_definition_id] = AzureRoleDetails(
role_name=role_name, role_type=role_type, role_permissions=role_permissions
)
return self._role_definitions[role_definition_id]

def managed_identity_client_id(
self, access_connector_id: str, user_assigned_identity_id: str | None = None
Expand Down
98 changes: 98 additions & 0 deletions tests/unit/azure/azure/mappings.json
Original file line number Diff line number Diff line change
Expand Up @@ -128,12 +128,110 @@
}
]
},
"subscriptions/002/resourceGroups/rg1/storageAccounts/sto4/providers/Microsoft.Authorization/roleAssignments": {
"value": [
{
"properties": {
"principalId": "user2",
"principalType": "ServicePrincipal",
"roleDefinitionId": "customroleid001",
"scope": "subscriptions/002/resourceGroups/rg1/storageAccounts/sto4"
},
"id": "custrol1"
},
{
"properties": {
"principalId": "user2",
"principalType": "ServicePrincipal",
"roleDefinitionId": "inv001",
"scope": "subscriptions/002/resourceGroups/rg1/storageAccounts/sto4"
},
"id": "custrol2"
},
{
"properties": {
"principalId": "user2",
"roleDefinitionId": "inv001",
"scope": "subscriptions/002/resourceGroups/rg1/storageAccounts/sto4"
},
"id": "custrol2"
},
{
"properties": {
"principalId": "user2",
"principalType": "User",
"roleDefinitionId": "inv001",
"scope": "subscriptions/002/resourceGroups/rg1/storageAccounts/sto4"
},
"id": "custrol2"
},
{
"properties": {
"principalType": "ServicePrincipal",
"roleDefinitionId": "customroleid001",
"scope": "subscriptions/002/resourceGroups/rg1/storageAccounts/sto4"
},
"id": "custrol1"
},
{
"properties": {
"principalId": "user2",
"principalType": "ServicePrincipal",
"scope": "subscriptions/002/resourceGroups/rg1/storageAccounts/sto4"
},
"id": "custrol1"
},
{
"properties": {
"principalId": "user2",
"principalType": "ServicePrincipal",
"roleDefinitionId": "customroleid001"
},
"id": "custrol1"
},
{
"properties": {
"principalId": "user2",
"principalType": "ServicePrincipal",
"roleDefinitionId": "customroleid001",
"scope": "/"
},
"id": "custrol1"
}
]
},
"id002": {
"id": "role2",
"properties": {
"roleName": "Storage Blob Data Owner"
}
},
"inv001": {
"id": "inv001",
"properties": {}
},
"customroleid001": {
"id": "customroleid001",
"properties": {
"roleName": "custom_role_001",
"type": "CustomRole",
"permissions": [
{
"actions": [
"Microsoft.Storage/storageAccounts/blobServices/containers/read",
"Microsoft.Storage/storageAccounts/blobServices/containers/write"
],
"notActions": [],
"dataActions": [
"Microsoft.Storage/storageAccounts/blobServices/containers/blobs/delete",
"Microsoft.Storage/storageAccounts/blobServices/containers/blobs/read",
"Microsoft.Storage/storageAccounts/blobServices/containers/blobs/write"
],
"notDataActions": []
}
]
}
},
"subscriptions/002/resourceGroups/rg1/storageAccounts/sto2/containers/container1/providers/Microsoft.Authorization/roleAssignments": {
"value": [
{
Expand Down
Loading
Loading