Skip to content

Commit

Permalink
Create storage credentials based on instance profiles and existing ro…
Browse files Browse the repository at this point in the history
…les. (#869)
  • Loading branch information
FastLee authored and dmoore247 committed Mar 23, 2024
1 parent e159977 commit 00b39d1
Show file tree
Hide file tree
Showing 4 changed files with 420 additions and 15 deletions.
173 changes: 165 additions & 8 deletions src/databricks/labs/ucx/assessment/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,17 @@
from collections.abc import Callable, Iterable
from dataclasses import dataclass
from functools import lru_cache, partial
from pathlib import PurePath

from databricks.labs.blueprint.installation import Installation
from databricks.labs.blueprint.parallel import Threads
from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import ResourceDoesNotExist
from databricks.sdk.service.catalog import Privilege

from databricks.labs.ucx.framework.crawlers import StatementExecutionBackend
from databricks.labs.ucx.hive_metastore import ExternalLocations

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -71,6 +76,8 @@ class AWSResources:
S3_ACTIONS: typing.ClassVar[set[str]] = {"s3:PutObject", "s3:GetObject", "s3:DeleteObject", "s3:PutObjectAcl"}
S3_READONLY: typing.ClassVar[str] = "s3:GetObject"
S3_REGEX: typing.ClassVar[str] = r"arn:aws:s3:::([a-zA-Z0-9+=,.@_-]*)\/\*$"
S3_PREFIX: typing.ClassVar[str] = "arn:aws:s3:::"
S3_PATH_REGEX: typing.ClassVar[str] = r"((s3:\/\/)|(s3a:\/\/))(.*)"
UC_MASTER_ROLES_ARN: typing.ClassVar[list[str]] = [
"arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL",
"arn:aws:iam::707343435239:role/unity-catalog-dev-UCMasterRole-G3MMN8SP21FO",
Expand All @@ -85,8 +92,8 @@ def validate_connection(self):
result = self._run_json_command(validate_command)
if result:
logger.info(result)
return True
return False
return result
return None

def list_role_policies(self, role_name: str):
list_policies_cmd = f"iam list-role-policies --profile {self._profile} --role-name {role_name}"
Expand Down Expand Up @@ -206,6 +213,70 @@ def get_role_policy(self, role_name, policy_name: str | None = None, attached_po
policy_actions.append(AWSPolicyAction("s3", privilege, f"s3a://{match.group(1)}"))
return policy_actions

def add_uc_role(self, role_name):
aws_role_trust_doc: dict = {
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"AWS": "arn:aws:iam::414351767826:role/unity-catalog-prod-UCMasterRole-14S5ZJVKOTYTL"
},
"Action": "sts:AssumeRole",
"Condition": {"StringEquals": {"sts:ExternalId": "0000"}},
}
],
}
# the AssumeRole condition will be modified with the external ID captured from the UC credential.
# https://docs.databricks.com/en/connect/unity-catalog/storage-credentials.html
assume_role_json = self._get_json_for_cli(aws_role_trust_doc)
add_role = self._run_json_command(
f"iam create-role --role-name {role_name} --assume-role-policy-document {assume_role_json}"
)
if not add_role:
return False
return True

def add_uc_role_policy(self, role_name, policy_name, s3_prefixes: set[str], account_id: str, kms_key=None):
s3_prefixes_enriched = sorted([self.S3_PREFIX + s3_prefix for s3_prefix in s3_prefixes])
statement = [
{
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject",
"s3:ListBucket",
"s3:GetBucketLocation",
],
"Resource": s3_prefixes_enriched,
"Effect": "Allow",
},
{
"Action": ["sts:AssumeRole"],
"Resource": [f"arn:aws:iam::{account_id}:role/{role_name}"],
"Effect": "Allow",
},
]
if kms_key:
statement.append(
{
"Action": ["kms:Decrypt", "kms:Encrypt", "kms:GenerateDataKey*"],
"Resource": [f"arn:aws:kms:{kms_key}"],
"Effect": "Allow",
}
)
policy_document = {
"Version": "2012-10-17",
"Statement": statement,
}

policy_document_json = self._get_json_for_cli(policy_document)
if not self._run_command(
f"iam put-role-policy --role-name {role_name} --policy-name {policy_name} --policy-document {policy_document_json}"
):
return False
return True

def _run_json_command(self, command: str):
aws_cmd = shutil.which("aws")
code, output, error = self._command_runner(f"{aws_cmd} {command} --output json")
Expand All @@ -214,27 +285,97 @@ def _run_json_command(self, command: str):
return None
return json.loads(output)

def _run_command(self, command: str):
aws_cmd = shutil.which("aws")
code, _, error = self._command_runner(f"{aws_cmd} {command} --output json")
if code != 0:
logger.error(error)
return False
return True

@staticmethod
def _get_json_for_cli(input_json: dict) -> str:
return json.dumps(input_json).replace('\n', '').replace(" ", "")


class AWSResourcePermissions:
def __init__(self, installation: Installation, ws: WorkspaceClient, aws_resources: AWSResources):
UC_ROLES_FILE_NAMES: typing.ClassVar[str] = "uc_roles_access.csv"
INSTANCE_PROFILES_FILE_NAMES: typing.ClassVar[str] = "aws_instance_profile_info.csv"

def __init__(
self,
installation: Installation,
ws: WorkspaceClient,
backend: StatementExecutionBackend,
aws_resources: AWSResources,
schema: str,
aws_account_id=None,
kms_key=None,
):
self._installation = installation
self._aws_resources = aws_resources
self._backend = backend
self._ws = ws
self._schema = schema
self._aws_account_id = aws_account_id
self._kms_key = kms_key

@classmethod
def for_cli(cls, ws: WorkspaceClient, aws_profile, product='ucx'):
def for_cli(cls, ws: WorkspaceClient, backend, aws_profile, schema, kms_key=None, product='ucx'):
installation = Installation.current(ws, product)
aws = AWSResources(aws_profile)
if not aws.validate_connection():
caller_identity = aws.validate_connection()
if not caller_identity:
raise ResourceWarning("AWS CLI is not configured properly.")
return cls(installation, ws, aws)
return cls(
installation,
ws,
backend,
aws,
schema=schema,
aws_account_id=caller_identity.get("Account"),
kms_key=kms_key,
)

def save_uc_compatible_roles(self):
uc_role_access = list(self._get_role_access())
if len(uc_role_access) == 0:
logger.warning("No Mapping Was Generated.")
return None
return self._installation.save(uc_role_access, filename='uc_roles_access.csv')
return self._installation.save(uc_role_access, filename=self.UC_ROLES_FILE_NAMES)

def get_uc_compatible_roles(self):
try:
role_actions = self._installation.load(list[AWSRoleAction], filename=self.UC_ROLES_FILE_NAMES)
except ResourceDoesNotExist:
self.save_uc_compatible_roles()
role_actions = self._installation.load(list[AWSRoleAction], filename=self.UC_ROLES_FILE_NAMES)
return role_actions

def create_uc_roles_cli(self, *, single_role=True, role_name="UC_ROLE", policy_name="UC_POLICY"):
missing_paths = self._identify_missing_paths()
s3_prefixes = set()
for missing_path in missing_paths:
match = re.match(AWSResources.S3_PATH_REGEX, missing_path)
if match:
s3_prefixes.add(match.group(4))
if single_role:
if self._aws_resources.add_uc_role(role_name):
self._aws_resources.add_uc_role_policy(
role_name, policy_name, s3_prefixes, account_id=self._aws_account_id, kms_key=self._kms_key
)
else:
role_id = 1
for s3_prefix in sorted(list(s3_prefixes)):
if self._aws_resources.add_uc_role(f"{role_name}-{role_id}"):
self._aws_resources.add_uc_role_policy(
f"{role_name}-{role_id}",
f"{policy_name}-{role_id}",
{s3_prefix},
account_id=self._aws_account_id,
kms_key=self._kms_key,
)
role_id += 1

def _get_instance_profiles(self) -> Iterable[AWSInstanceProfile]:
instance_profiles = self._ws.instance_profiles.list()
Expand Down Expand Up @@ -292,9 +433,25 @@ def _get_role_access_task(self, arn: str, role_name: str):
)
return policy_actions

def _identify_missing_paths(self):
external_locations = ExternalLocations(self._ws, self._backend, self._schema).snapshot()
compatible_roles = self.get_uc_compatible_roles()
missing_paths = set()
for external_location in external_locations:
path = PurePath(external_location.location)
matching_role = False
for role in compatible_roles:
if path.match(role.resource_path):
matching_role = True
continue
if matching_role:
continue
missing_paths.add(external_location.location)
return missing_paths

def save_instance_profile_permissions(self) -> str | None:
instance_profile_access = list(self._get_instance_profiles_access())
if len(instance_profile_access) == 0:
logger.warning("No Mapping Was Generated.")
return None
return self._installation.save(instance_profile_access, filename='aws_instance_profile_info.csv')
return self._installation.save(instance_profile_access, filename=self.INSTANCE_PROFILES_FILE_NAMES)
5 changes: 4 additions & 1 deletion src/databricks/labs/ucx/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,10 @@ def principal_prefix_access(w: WorkspaceClient, subscription_id: str | None = No
)
return None
logger.info("Generating instance profile and bucket permission info")
aws_permissions = AWSResourcePermissions.for_cli(w, aws_profile)
installation = Installation.current(w, 'ucx')
config = installation.load(WorkspaceConfig)
sql_backend = StatementExecutionBackend(w, config.warehouse_id)
aws_permissions = AWSResourcePermissions.for_cli(w, sql_backend, aws_profile, config.inventory_database)
instance_role_path = aws_permissions.save_instance_profile_permissions()
logger.info(f"Instance profile and bucket info saved {instance_role_path}")
logger.info("Generating UC roles and bucket permission info")
Expand Down
24 changes: 23 additions & 1 deletion tests/integration/assessment/test_aws.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,29 @@
from databricks.labs.ucx.assessment.aws import AWSResources
from databricks.labs.ucx.assessment.aws import AWSResourcePermissions, AWSResources


def test_aws_validate(env_or_skip):
profile = env_or_skip("AWS_DEFAULT_PROFILE")
aws = AWSResources(profile)
assert aws.validate_connection()


def test_get_uc_compatible_roles(ws, sql_backend, env_or_skip, inventory_schema):
profile = env_or_skip("AWS_DEFAULT_PROFILE")
awsrp = AWSResourcePermissions.for_cli(ws, sql_backend, profile, inventory_schema)
compat_roles = awsrp.get_uc_compatible_roles()
print(compat_roles)
assert compat_roles


def test_create_uc_role(env_or_skip, make_random):
profile = env_or_skip("AWS_DEFAULT_PROFILE")
aws = AWSResources(profile)
rand = make_random(5)
role_name = f"UCX_ROLE_{rand}"
policy_name = f"UCX_POLICY_{rand}"
account_id = aws.validate_connection().get("Account")
s3_prefixes = {"BUCKET1/FOLDER1", "BUCKET1/FOLDER1/*", "BUCKET2/FOLDER2", "BUCKET2/FOLDER2/*"}
aws.add_uc_role(role_name)
aws.add_uc_role_policy(role_name, policy_name, s3_prefixes, account_id)
uc_roles = aws.list_all_uc_roles()
assert role_name in [role.role_name for role in uc_roles]
Loading

0 comments on commit 00b39d1

Please sign in to comment.