Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added CLI Command databricks labs ucx save-uc-compatible-roles #863

Merged
merged 16 commits into from
Feb 4, 2024
8 changes: 8 additions & 0 deletions labs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,11 @@ commands:
flags:
- name: aws-profile
description: AWS Profile to use for authentication

- name: save-uc-compatible-roles
description: |
Scan all the AWS roles that are set for UC access and produce a mapping to the S3 resources.
Requires a working setup of AWS CLI.
flags:
- name: aws-profile
description: AWS Profile to use for authentication
102 changes: 81 additions & 21 deletions src/databricks/labs/ucx/assessment/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,11 @@


@dataclass
class AWSInstanceProfileAction:
instance_profile_arn: str
class AWSRoleAction:
role_arn: str
resource_type: str
privilege: str
resource_path: str
iam_role_arn: str | None = None


@dataclass
Expand Down Expand Up @@ -72,6 +71,10 @@
S3_ACTIONS: typing.ClassVar[set[str]] = {"s3:PutObject", "s3:GetObject", "s3:DeleteObject", "s3:PutObjectAcl"}
S3_READONLY: typing.ClassVar[str] = "s3:GetObject"
S3_REGEX: typing.ClassVar[str] = r"arn:aws:s3:::([a-zA-Z0-9+=,.@_-]*)\/\*$"
UC_MASTER_ROLES_ARN: typing.ClassVar[list[str]] = [
"arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL",
FastLee marked this conversation as resolved.
Show resolved Hide resolved
"arn:aws:iam::707343435239:role/unity-catalog-dev-UCMasterRole-G3MMN8SP21FO",
]

def __init__(self, profile: str, command_runner: Callable[[str], tuple[int, str, str]] = run_command):
self._profile = profile
Expand Down Expand Up @@ -104,11 +107,55 @@
attached_policies.append(policy.get("PolicyArn"))
return attached_policies

def list_all_uc_roles(self):
roles = self._run_json_command(f"iam list-roles --profile {self._profile}")
uc_roles = []
roles = roles.get("Roles")
if not roles:
logger.warning("list-roles couldn't find any roles")
return uc_roles

Check warning on line 116 in src/databricks/labs/ucx/assessment/aws.py

View check run for this annotation

Codecov / codecov/patch

src/databricks/labs/ucx/assessment/aws.py#L115-L116

Added lines #L115 - L116 were not covered by tests
for role in roles:
policy_document = role.get("AssumeRolePolicyDocument")
if not policy_document:
continue
for statement in policy_document["Statement"]:
effect = statement.get("Effect")
action = statement.get("Action")
principal = statement.get("Principal")
if not (effect and action and principal):
continue
if effect != "Allow":
continue
if action != "sts:AssumeRole":
continue
principal = principal.get("AWS")
if not principal:
continue
if isinstance(principal, list):
is_uc_principal = False
for single_principal in principal:
if single_principal in self.UC_MASTER_ROLES_ARN:
is_uc_principal = True
continue
if not is_uc_principal:
continue
elif principal not in self.UC_MASTER_ROLES_ARN:
continue
uc_roles.append(
AWSRole(
role_id=role.get("RoleId"),
role_name=role.get("RoleName"),
arn=role.get("Arn"),
path=role.get("Path"),
)
)

return uc_roles

def get_role_policy(self, role_name, policy_name: str | None = None, attached_policy_arn: str | None = None):
if policy_name:
get_policy = (
f"iam get-role-policy --profile {self._profile} --role-name {role_name} "
f"--policy-name {policy_name} --no-paginate"
f"iam get-role-policy --profile {self._profile} --role-name {role_name} " f"--policy-name {policy_name}"
FastLee marked this conversation as resolved.
Show resolved Hide resolved
)
elif attached_policy_arn:
get_attached_policy = f"iam get-policy --profile {self._profile} --policy-arn {attached_policy_arn}"
Expand All @@ -118,7 +165,7 @@
policy_version = attached_policy["Policy"]["DefaultVersionId"]
get_policy = (
f"iam get-policy-version --profile {self._profile} --policy-arn {attached_policy_arn} "
f"--version-id {policy_version} --no-paginate"
f"--version-id {policy_version}"
)
else:
logger.error("Failed to retrieve role. No role name or attached role ARN specified.")
Expand Down Expand Up @@ -161,7 +208,7 @@

def _run_json_command(self, command: str):
aws_cmd = shutil.which("aws")
code, output, error = self._command_runner(f"{aws_cmd} {command} --output json --no-paginate")
code, output, error = self._command_runner(f"{aws_cmd} {command} --output json")
if code != 0:
logger.error(error)
return None
Expand All @@ -182,6 +229,13 @@
raise ResourceWarning("AWS CLI is not configured properly.")
return cls(installation, ws, aws)

def save_uc_compatible_roles(self):
uc_role_access = list(self._get_role_access())
if len(uc_role_access) == 0:
logger.warning("No Mapping Was Generated.")
return None
return self._installation.save(uc_role_access, filename='uc_roles_access.csv')

def _get_instance_profiles(self) -> Iterable[AWSInstanceProfile]:
instance_profiles = self._ws.instance_profiles.list()
result_instance_profiles = []
Expand All @@ -196,38 +250,44 @@
instance_profiles = list(self._get_instance_profiles())
tasks = []
for instance_profile in instance_profiles:
tasks.append(partial(self._get_instance_profile_access_task, instance_profile))
tasks.append(
partial(self._get_role_access_task, instance_profile.instance_profile_arn, instance_profile.role_name)
)
# Aggregating the outputs from all the tasks
return sum(Threads.strict("Scanning Instance Profiles", tasks), [])

def _get_instance_profile_access_task(self, instance_profile: AWSInstanceProfile):
def _get_role_access(self):
roles = list(self._aws_resources.list_all_uc_roles())
tasks = []
for role in roles:
tasks.append(partial(self._get_role_access_task, role.arn, role.role_name))
# Aggregating the outputs from all the tasks
return sum(Threads.strict("Scanning Roles", tasks), [])

def _get_role_access_task(self, arn: str, role_name: str):
policy_actions = []
policies = list(self._aws_resources.list_role_policies(instance_profile.role_name))
policies = list(self._aws_resources.list_role_policies(role_name))
for policy in policies:
actions = self._aws_resources.get_role_policy(instance_profile.role_name, policy_name=policy)
actions = self._aws_resources.get_role_policy(role_name, policy_name=policy)
for action in actions:
policy_actions.append(
AWSInstanceProfileAction(
instance_profile.instance_profile_arn,
AWSRoleAction(
arn,
action.resource_type,
action.privilege,
action.resource_path,
instance_profile.iam_role_arn,
)
)
attached_policies = self._aws_resources.list_attached_policies_in_role(instance_profile.role_name)
attached_policies = self._aws_resources.list_attached_policies_in_role(role_name)
for attached_policy in attached_policies:
actions = list(
self._aws_resources.get_role_policy(instance_profile.role_name, attached_policy_arn=attached_policy)
)
actions = list(self._aws_resources.get_role_policy(role_name, attached_policy_arn=attached_policy))
for action in actions:
policy_actions.append(
AWSInstanceProfileAction(
instance_profile.instance_profile_arn,
AWSRoleAction(
arn,
action.resource_type,
action.privilege,
action.resource_path,
instance_profile.iam_role_arn,
)
)
return policy_actions
Expand Down
28 changes: 28 additions & 0 deletions src/databricks/labs/ucx/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,5 +273,33 @@
return None


@ucx.command
def save_uc_compatible_roles(w: WorkspaceClient, *, aws_profile: str | None = None):
"""extracts all the iam roles with trust relationships to the UC master role.
Map these roles to the S3 buckets they have access to.
Requires a working setup of AWS CLI.
https://aws.amazon.com/cli/
FastLee marked this conversation as resolved.
Show resolved Hide resolved
The command saves a CSV to the UCX installation folder with the mapping.

The user has to be authenticated with AWS and the have the permissions to browse the resources and iam services.
More information can be found here:
https://docs.aws.amazon.com/IAM/latest/UserGuide/access_permissions-required.html
"""
if not shutil.which("aws"):
logger.error("Couldn't find AWS CLI in path.Please obtain and install the CLI from https://aws.amazon.com/cli/")
return None
if not aws_profile:
aws_profile = os.getenv("AWS_DEFAULT_PROFILE")
if not aws_profile:
logger.error(
"AWS Profile is not specified. Use the environment variable [AWS_DEFAULT_PROFILE] "
"or use the '--aws-profile=[profile-name]' parameter."
)
return None
aws_permissions = AWSResourcePermissions.for_cli(w, aws_profile)
aws_permissions.save_uc_compatible_roles()
return None

Check warning on line 301 in src/databricks/labs/ucx/cli.py

View check run for this annotation

Codecov / codecov/patch

src/databricks/labs/ucx/cli.py#L300-L301

Added lines #L300 - L301 were not covered by tests


if __name__ == "__main__":
ucx()
Loading
Loading