Skip to content

Commit

Permalink
work
Browse files Browse the repository at this point in the history
  • Loading branch information
nkvuong committed Feb 22, 2024
1 parent d731f3c commit 7a7ab13
Show file tree
Hide file tree
Showing 6 changed files with 385 additions and 8 deletions.
19 changes: 12 additions & 7 deletions src/databricks/labs/ucx/assessment/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,8 @@ def add_uc_role_policy(self, role_name, policy_name, s3_prefixes: set[str], acco

policy_document_json = self._get_json_for_cli(policy_document)
if not self._run_command(
f"iam put-role-policy --role-name {role_name} --policy-name {policy_name} --policy-document {policy_document_json}"
f"iam put-role-policy --role-name {role_name} "
f"--policy-name {policy_name} --policy-document {policy_document_json}"
):
return False
return True
Expand Down Expand Up @@ -324,6 +325,7 @@ def __init__(
self._schema = schema
self._aws_account_id = aws_account_id
self._kms_key = kms_key
self._filename = self.INSTANCE_PROFILES_FILE_NAMES

@classmethod
def for_cli(cls, ws: WorkspaceClient, backend, aws_profile, schema, kms_key=None, product='ucx'):
Expand All @@ -337,9 +339,9 @@ def for_cli(cls, ws: WorkspaceClient, backend, aws_profile, schema, kms_key=None
ws,
backend,
aws,
schema=schema,
aws_account_id=caller_identity.get("Account"),
kms_key=kms_key,
schema,
caller_identity.get("Account"),
kms_key,
)

def save_uc_compatible_roles(self):
Expand Down Expand Up @@ -367,7 +369,7 @@ def create_uc_roles_cli(self, *, single_role=True, role_name="UC_ROLE", policy_n
if single_role:
if self._aws_resources.add_uc_role(role_name):
self._aws_resources.add_uc_role_policy(
role_name, policy_name, s3_prefixes, account_id=self._aws_account_id, kms_key=self._kms_key
role_name, policy_name, s3_prefixes, self._aws_account_id, self._kms_key
)
else:
role_id = 1
Expand All @@ -377,8 +379,8 @@ def create_uc_roles_cli(self, *, single_role=True, role_name="UC_ROLE", policy_n
f"{role_name}-{role_id}",
f"{policy_name}-{role_id}",
{s3_prefix},
account_id=self._aws_account_id,
kms_key=self._kms_key,
self._aws_account_id,
self._kms_key,
)
role_id += 1

Expand Down Expand Up @@ -454,6 +456,9 @@ def _identify_missing_paths(self):
missing_paths.add(external_location.location)
return missing_paths

def load(self):
return self._installation.load(list[AWSInstanceProfile], filename=self._filename)

def save_instance_profile_permissions(self) -> str | None:
instance_profile_access = list(self._get_instance_profiles_access())
if len(instance_profile_access) == 0:
Expand Down
159 changes: 159 additions & 0 deletions src/databricks/labs/ucx/aws/credentials.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import logging
from dataclasses import dataclass

from databricks.labs.blueprint.installation import Installation
from databricks.labs.blueprint.tui import Prompts
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.catalog import AwsIamRole, StorageCredentialInfo

from databricks.labs.ucx.assessment.aws import (
AWSInstanceProfile,
AWSResourcePermissions,
AWSResources,
)
from databricks.labs.ucx.config import WorkspaceConfig
from databricks.labs.ucx.framework.crawlers import StatementExecutionBackend

logger = logging.getLogger(__name__)


@dataclass
class StorageCredentialValidationResult:
name: str | None = None
role_arn: str | None = None
validated_on: str | None = None
failures: str | None = None

@classmethod
def from_validation(cls, storage_credential: StorageCredentialInfo, failures: str | None):
role_arn = None
if storage_credential.aws_iam_role:
role_arn = storage_credential.aws_iam_role.role_arn

return cls(storage_credential.name, role_arn, failures)


class StorageCredentialManager:
def __init__(self, ws: WorkspaceClient):
self._ws = ws

def list(self) -> set[str]:
# list existed storage credentials that is using iam roles, capturing the arns
role_arns = set()

storage_credentials = self._ws.storage_credentials.list(max_results=0)

for storage_credential in storage_credentials:

# only add storage credentials with iam roles
if not storage_credential.aws_iam_role:
continue

role_arns.add(storage_credential.aws_iam_role.role_arn)

logger.info(f"Found {len(role_arns)} distinct IAM roles already used in UC storage credentials")
return role_arns

def create(self, iam: AWSInstanceProfile) -> StorageCredentialValidationResult:
if iam.iam_role_arn is None:
logger.warning("IAM role ARN is None, skipping.")
return StorageCredentialValidationResult.from_validation(StorageCredentialInfo(), "IAM role ARN is None.")
try:
storage_credential = self._ws.storage_credentials.create(
iam.role_name,
aws_iam_role=AwsIamRole(iam.iam_role_arn),
comment=f"Created by UCX during migration to UC using AWS instance profile: {iam.role_name}",
)
return StorageCredentialValidationResult.from_validation(storage_credential, None)
except Exception as e:
logger.warning("There is an error while creating the storage credential. ")
return StorageCredentialValidationResult.from_validation(StorageCredentialInfo(), str(e))


class InstanceProfileMigration:

def __init__(
self,
installation: Installation,
ws: WorkspaceClient,
resource_permissions: AWSResourcePermissions,
storage_credential_manager: StorageCredentialManager,
):
self._output_file = "aws_instance_profile_migration_result.csv"
self._installation = installation
self._ws = ws
self._resource_permissions = resource_permissions
self._storage_credential_manager = storage_credential_manager

@classmethod
def for_cli(cls, ws: WorkspaceClient, aws_profile: str, prompts: Prompts, product='ucx'):
if not ws.config.is_aws:
logger.error("Workspace is not on AWS, please run this command on a Databricks on AWS workspaces.")
raise SystemExit()

msg = (
f"Have you reviewed the {AWSResourcePermissions.INSTANCE_PROFILES_FILE_NAMES} "
"and confirm listed instance profiles to be migrated migration?"
)
if not prompts.confirm(msg):
raise SystemExit()

installation = Installation.current(ws, product)
config = installation.load(WorkspaceConfig)
sql_backend = StatementExecutionBackend(ws, config.warehouse_id)
aws = AWSResources(aws_profile)

resource_permissions = AWSResourcePermissions(installation, ws, sql_backend, aws, config.inventory_database)

storage_credential_manager = StorageCredentialManager(ws)

return cls(installation, ws, resource_permissions, storage_credential_manager)

@staticmethod
def _print_action_plan(iam_list: list[AWSInstanceProfile]):
# print action plan to console for customer to review.
for iam in iam_list:
logger.info(f"IAM Role name: {iam.role_name}, " f"IAM Role ARN: {iam.iam_role_arn}")

def _generate_migration_list(self) -> list[AWSInstanceProfile]:
"""
Create the list of IAM roles that need to be migrated, output an action plan as a csv file for users to confirm
"""
# load instance profile list from aws_instance_profile_info.csv
iam_list = self._resource_permissions.load()
# list existing storage credentials
sc_set = self._storage_credential_manager.list()
# check if the iam is already used in UC storage credential
filtered_iam_list = [iam for iam in iam_list if iam.iam_role_arn not in sc_set]

# output the action plan for customer to confirm
self._print_action_plan(filtered_iam_list)

return filtered_iam_list

def save(self, migration_results: list[StorageCredentialValidationResult]) -> str:
return self._installation.save(migration_results, filename=self._output_file)

def run(self, prompts: Prompts) -> list[StorageCredentialValidationResult]:

iam_list = self._generate_migration_list()

plan_confirmed = prompts.confirm(
"Above Instance Profiles will be migrated to UC storage credentials, please review and confirm."
)
if plan_confirmed is not True:
return []

execution_result = []
for iam in iam_list:
execution_result.append(self._storage_credential_manager.create(iam))

if execution_result:
results_file = self.save(execution_result)
logger.info(
f"Completed migration from Instance Profile to UC Storage credentials"
f"Please check {results_file} for validation results"
)
else:
logger.info("No Instance Profile migrated to UC Storage credentials")
return execution_result
25 changes: 25 additions & 0 deletions src/databricks/labs/ucx/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from databricks.labs.ucx.account import AccountWorkspaces, WorkspaceInfo
from databricks.labs.ucx.assessment.aws import AWSResourcePermissions
from databricks.labs.ucx.aws.credentials import InstanceProfileMigration
from databricks.labs.ucx.azure.access import AzureResourcePermissions
from databricks.labs.ucx.config import WorkspaceConfig
from databricks.labs.ucx.framework.crawlers import StatementExecutionBackend
Expand Down Expand Up @@ -282,5 +283,29 @@ def _aws_principal_prefix_access(w: WorkspaceClient, aws_profile: str):
logger.info(f"UC roles and bucket info saved {uc_role_path}")


def _aws_migration(w: WorkspaceClient, aws_profile: str):
logger.info("Migrating instance profiles to UC storage credentials")
prompts = Prompts()
instance_profile_migration = InstanceProfileMigration.for_cli(w, aws_profile, prompts)
instance_profile_migration.run(prompts)


@ucx.command
def migrate_credentials(w: WorkspaceClient, aws_profile: str | None = None):
"""lorem ipsum"""
if w.config.is_aws:
if not aws_profile:
aws_profile = os.getenv("AWS_DEFAULT_PROFILE")
if not aws_profile:
logger.error(
"AWS Profile is not specified. Use the environment variable [AWS_DEFAULT_PROFILE] "
"or use the '--aws-profile=[profile-name]' parameter."
)
return None
return _aws_migration(w, aws_profile)
logger.error("This cmd is only supported for azure and aws workspaces")
return None


if __name__ == "__main__":
ucx()
Empty file added tests/unit/aws/__init__.py
Empty file.
Loading

0 comments on commit 7a7ab13

Please sign in to comment.