From 4eb8b9a6bf666d7399d13a758b61a0630f6035c9 Mon Sep 17 00:00:00 2001 From: Vuong Date: Fri, 17 May 2024 16:05:05 +0100 Subject: [PATCH 1/8] retrieve `aws_account_id` from aws profile --- labs.yml | 2 ++ src/databricks/labs/ucx/assessment/aws.py | 7 ++++++- src/databricks/labs/ucx/aws/access.py | 3 +-- .../labs/ucx/contexts/workspace_cli.py | 1 - tests/integration/aws/test_access.py | 6 +++--- tests/unit/aws/test_access.py | 3 +++ tests/unit/test_cli.py | 18 +++++++++++++++++- 7 files changed, 32 insertions(+), 8 deletions(-) diff --git a/labs.yml b/labs.yml index 11617f30e3..4540145983 100644 --- a/labs.yml +++ b/labs.yml @@ -128,6 +128,8 @@ commands: flags: - name: aws-profile description: AWS Profile to use for authentication + - name: kms-key + description: (Optional) KMS Key to be specified for the UC roles - name: single-role description: (Optional) Create a single role for all the S3 locations (default:True) diff --git a/src/databricks/labs/ucx/assessment/aws.py b/src/databricks/labs/ucx/assessment/aws.py index d4217c4ca9..6228f582fb 100644 --- a/src/databricks/labs/ucx/assessment/aws.py +++ b/src/databricks/labs/ucx/assessment/aws.py @@ -331,7 +331,12 @@ def update_uc_trust_role(self, role_name: str, external_id: str = "0000") -> str return update_role["Role"]["Arn"] def put_role_policy( - self, role_name: str, policy_name: str, s3_prefixes: set[str], account_id: str, kms_key=None + self, + role_name: str, + policy_name: str, + s3_prefixes: set[str], + account_id: str, + kms_key=None, ) -> bool: if not self._run_command( f"iam put-role-policy --role-name {role_name} --policy-name {policy_name} " diff --git a/src/databricks/labs/ucx/aws/access.py b/src/databricks/labs/ucx/aws/access.py index 59748daa49..eef7014ec1 100644 --- a/src/databricks/labs/ucx/aws/access.py +++ b/src/databricks/labs/ucx/aws/access.py @@ -33,14 +33,13 @@ def __init__( ws: WorkspaceClient, aws_resources: AWSResources, external_locations: ExternalLocations, - aws_account_id=None, kms_key=None, ): self._installation = installation self._aws_resources = aws_resources self._ws = ws self._locations = external_locations - self._aws_account_id = aws_account_id + self._aws_account_id = aws_resources.validate_connection().get("Account") self._kms_key = kms_key def list_uc_roles(self, *, single_role=True, role_name="UC_ROLE", policy_name="UC_POLICY"): diff --git a/src/databricks/labs/ucx/contexts/workspace_cli.py b/src/databricks/labs/ucx/contexts/workspace_cli.py index b6a7a207e9..9c8bebe9ba 100644 --- a/src/databricks/labs/ucx/contexts/workspace_cli.py +++ b/src/databricks/labs/ucx/contexts/workspace_cli.py @@ -151,7 +151,6 @@ def aws_resource_permissions(self): self.workspace_client, self.aws_resources, self.external_locations, - self.named_parameters.get("aws_account_id"), self.named_parameters.get("kms_key"), ) diff --git a/tests/integration/aws/test_access.py b/tests/integration/aws/test_access.py index c092b1b186..cd5622d74d 100644 --- a/tests/integration/aws/test_access.py +++ b/tests/integration/aws/test_access.py @@ -76,9 +76,10 @@ def test_create_uber_instance_profile( ): profile = env_or_skip("AWS_DEFAULT_PROFILE") aws = AWSResources(profile) - account_id = aws.validate_connection().get("Account") sql_backend.save_table( - f"{inventory_schema}.external_locations", [ExternalLocation("s3://bucket1/FOLDER1", 1)], ExternalLocation + f"{inventory_schema}.external_locations", + [ExternalLocation("s3://bucket1/FOLDER1", 1)], + ExternalLocation, ) installation = Installation(ws, make_random(4)) # create a new policy @@ -91,7 +92,6 @@ def test_create_uber_instance_profile( aws, ExternalLocations(ws, sql_backend, inventory_schema), aws_cli_ctx.principal_acl, - account_id, ) aws_permissions.create_uber_principal( MockPrompts( diff --git a/tests/unit/aws/test_access.py b/tests/unit/aws/test_access.py index dab6553b80..ef41ed4eed 100644 --- a/tests/unit/aws/test_access.py +++ b/tests/unit/aws/test_access.py @@ -199,6 +199,7 @@ def test_create_uber_principal_existing_role_in_policy(mock_ws, mock_installatio ) mock_ws.cluster_policies.get.return_value = cluster_policy aws = create_autospec(AWSResources) + aws.validate_connection.return_value = {} aws.get_instance_profile.return_value = instance_profile_arn locations = ExternalLocations(mock_ws, backend, "ucx") prompts = MockPrompts({"We have identified existing UCX migration role *": "yes"}) @@ -310,6 +311,7 @@ def test_create_uber_principal_no_storage(mock_ws, mock_installation, locations) def test_create_uc_role_single(mock_ws, installation_single_role, backend, locations): aws = create_autospec(AWSResources) + aws.validate_connection.return_value = {} aws_resource_permissions = AWSResourcePermissions(installation_single_role, mock_ws, aws, locations) role_creation = IamRoleCreation(installation_single_role, mock_ws, aws_resource_permissions) aws.list_all_uc_roles.return_value = [] @@ -323,6 +325,7 @@ def test_create_uc_role_single(mock_ws, installation_single_role, backend, locat def test_create_uc_role_multiple(mock_ws, installation_single_role, backend, locations): aws = create_autospec(AWSResources) + aws.validate_connection.return_value = {} aws_resource_permissions = AWSResourcePermissions(installation_single_role, mock_ws, aws, locations) role_creation = IamRoleCreation(installation_single_role, mock_ws, aws_resource_permissions) aws.list_all_uc_roles.return_value = [] diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index d4e9526d28..2c9c2423b2 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -388,7 +388,23 @@ def test_migrate_locations_azure(ws): def test_migrate_locations_aws(ws, caplog): - ctx = WorkspaceContext(ws).replace(is_aws=True, is_azure=False, aws_profile="profile") + successful_return = """ + { + "UserId": "uu@mail.com", + "Account": "1234", + "Arn": "arn:aws:sts::1234:assumed-role/AWSVIEW/uu@mail.com" + } + """ + + def successful_call(_): + return 0, successful_return, "" + + ctx = WorkspaceContext(ws).replace( + is_aws=True, + is_azure=False, + aws_profile="profile", + aws_cli_run_command=successful_call, + ) migrate_locations(ws, ctx=ctx) ws.external_locations.list.assert_called() From 6019f5a471b3691c13875557b1161af42023e56e Mon Sep 17 00:00:00 2001 From: Vuong Date: Fri, 17 May 2024 16:24:00 +0100 Subject: [PATCH 2/8] test cov --- tests/unit/test_cli.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 2c9c2423b2..d8c384fa94 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -558,3 +558,10 @@ def test_migrate_dbsql_dashboards(ws, caplog): def test_revert_dbsql_dashboards(ws, caplog): revert_dbsql_dashboards(ws) ws.dashboards.list.assert_called_once() + + +def test_cli_missing_awscli(ws, mocker, caplog): + mocker.patch("shutil.which", side_effect=ValueError("Couldn't find AWS CLI in path")) + with pytest.raises(ValueError): + ctx = WorkspaceContext(ws).replace(is_aws=True, is_azure=False, aws_profile="profile") + migrate_locations(ws, ctx) From 70a3d345d9d7e0eb141502470f54388dc804210c Mon Sep 17 00:00:00 2001 From: Vuong Date: Mon, 20 May 2024 09:18:30 +0100 Subject: [PATCH 3/8] fix test --- tests/unit/aws/test_access.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/aws/test_access.py b/tests/unit/aws/test_access.py index 8b05919ae3..2187f7d9ce 100644 --- a/tests/unit/aws/test_access.py +++ b/tests/unit/aws/test_access.py @@ -200,7 +200,7 @@ def test_create_uber_principal_existing_role_in_policy(mock_ws, mock_installatio mock_ws.cluster_policies.get.return_value = cluster_policy aws = create_autospec(AWSResources) aws.validate_connection.return_value = {} - aws.get_instance_profile.return_value = instance_profile_arn + aws.get_instance_profile_arn.return_value = instance_profile_arn locations = ExternalLocations(mock_ws, backend, "ucx") prompts = MockPrompts({"We have identified existing UCX migration role *": "yes"}) aws_resource_permissions = AWSResourcePermissions( From 63e0a2d324874fa5ebcd176fa524f1adc89e42c4 Mon Sep 17 00:00:00 2001 From: Vuong Date: Mon, 20 May 2024 14:54:58 +0100 Subject: [PATCH 4/8] more missing flags --- labs.yml | 12 ++++++++---- src/databricks/labs/ucx/aws/credentials.py | 4 +++- src/databricks/labs/ucx/cli.py | 4 +++- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/labs.yml b/labs.yml index 4540145983..0b4e8c3ee7 100644 --- a/labs.yml +++ b/labs.yml @@ -123,15 +123,19 @@ commands: - name: create-missing-principals description: For AWS, this command identifies all the S3 locations that are missing a UC compatible role and - creates them. It takes single-role optional parameter. - If set to True, it will create a single role for all the S3 locations. + creates them. It accepts a number of optional parameters, i.e. KMS Key, Role Name, Policy Name, and whether to + create a single role for all the S3 locations. flags: - name: aws-profile description: AWS Profile to use for authentication - name: kms-key - description: (Optional) KMS Key to be specified for the UC roles + description: (Optional) KMS Key to be specified for the UC roles. + - name: role-name + description: (Optional) IAM Role name to be specified for the UC roles. (default:UC_ROLE) + - name: policy-name + description: (Optional) IAM policy Name to be specified for the UC roles. (default:UC_POLICY) - name: single-role - description: (Optional) Create a single role for all the S3 locations (default:True) + description: (Optional) Create a single role for all the S3 locations. (default:True) - name: create-uber-principal description: For azure cloud, creates a service principal and gives STORAGE BLOB READER access on all the storage account diff --git a/src/databricks/labs/ucx/aws/credentials.py b/src/databricks/labs/ucx/aws/credentials.py index c11bd21987..0d0929aec8 100644 --- a/src/databricks/labs/ucx/aws/credentials.py +++ b/src/databricks/labs/ucx/aws/credentials.py @@ -208,7 +208,9 @@ def save(self, migration_results: list[CredentialValidationResult]) -> str: def run(self, prompts: Prompts, *, single_role=True, role_name="UC_ROLE", policy_name="UC_POLICY"): iam_list = self._resource_permissions.list_uc_roles( - single_role=single_role, role_name=role_name, policy_name=policy_name + single_role=single_role, + role_name=role_name, + policy_name=policy_name, ) if not iam_list: logger.info("No IAM Role created") diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index 8cbdca8cc0..1991e75133 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -294,6 +294,8 @@ def create_missing_principals( prompts: Prompts, ctx: WorkspaceContext | None = None, single_role: bool = True, + role_name="UC_ROLE", + policy_name="UC_POLICY", **named_parameters, ): """Not supported for Azure. @@ -303,7 +305,7 @@ def create_missing_principals( if not ctx: ctx = WorkspaceContext(w, named_parameters) if ctx.is_aws: - return ctx.iam_role_creation.run(prompts, single_role=single_role) + return ctx.iam_role_creation.run(prompts, single_role=single_role, role_name=role_name, policy_name=policy_name) raise ValueError("Unsupported cloud provider") From 5bc39d81172968d81487b14ad9d69b31c82325c6 Mon Sep 17 00:00:00 2001 From: Vuong Date: Mon, 20 May 2024 17:15:51 +0100 Subject: [PATCH 5/8] update doc --- labs.yml | 2 +- src/databricks/labs/ucx/cli.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/labs.yml b/labs.yml index 0b4e8c3ee7..daebe3c631 100644 --- a/labs.yml +++ b/labs.yml @@ -135,7 +135,7 @@ commands: - name: policy-name description: (Optional) IAM policy Name to be specified for the UC roles. (default:UC_POLICY) - name: single-role - description: (Optional) Create a single role for all the S3 locations. (default:True) + description: (Optional) Create a single role for all S3 locations. (default:False) - name: create-uber-principal description: For azure cloud, creates a service principal and gives STORAGE BLOB READER access on all the storage account diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index 1991e75133..056fd6d708 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -293,14 +293,14 @@ def create_missing_principals( w: WorkspaceClient, prompts: Prompts, ctx: WorkspaceContext | None = None, - single_role: bool = True, + single_role: bool=False, role_name="UC_ROLE", policy_name="UC_POLICY", **named_parameters, ): """Not supported for Azure. For AWS, this command identifies all the S3 locations that are missing a UC compatible role and creates them. - By default, it will create a single role for all S3. Set the optional single_role parameter to False, to create one role per S3 location. + By default, it will create a role per S3 location. Set the optional single_role parameter to True to create a single role for all S3 locations. """ if not ctx: ctx = WorkspaceContext(w, named_parameters) From b0075a7340433f6c4ddaa6001a4374893a337e74 Mon Sep 17 00:00:00 2001 From: Vuong Date: Mon, 20 May 2024 17:22:25 +0100 Subject: [PATCH 6/8] consistency --- src/databricks/labs/ucx/aws/credentials.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/aws/credentials.py b/src/databricks/labs/ucx/aws/credentials.py index 0d0929aec8..34d77c60eb 100644 --- a/src/databricks/labs/ucx/aws/credentials.py +++ b/src/databricks/labs/ucx/aws/credentials.py @@ -205,7 +205,7 @@ def _print_action_plan(iam_list: list[AWSUCRoleCandidate]): def save(self, migration_results: list[CredentialValidationResult]) -> str: return self._installation.save(migration_results, filename=self._output_file) - def run(self, prompts: Prompts, *, single_role=True, role_name="UC_ROLE", policy_name="UC_POLICY"): + def run(self, prompts: Prompts, *, single_role=False, role_name="UC_ROLE", policy_name="UC_POLICY"): iam_list = self._resource_permissions.list_uc_roles( single_role=single_role, From a69c663e2aca1d2e411ea806829fc4bc96c95511 Mon Sep 17 00:00:00 2001 From: Vuong Date: Mon, 20 May 2024 17:24:46 +0100 Subject: [PATCH 7/8] fmt --- src/databricks/labs/ucx/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index 056fd6d708..5df7ca4e87 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -293,7 +293,7 @@ def create_missing_principals( w: WorkspaceClient, prompts: Prompts, ctx: WorkspaceContext | None = None, - single_role: bool=False, + single_role: bool = False, role_name="UC_ROLE", policy_name="UC_POLICY", **named_parameters, From 4820572082f853acdf449f39ba4a0c11a0b712c4 Mon Sep 17 00:00:00 2001 From: Vuong Date: Tue, 21 May 2024 09:04:14 +0100 Subject: [PATCH 8/8] test --- tests/unit/aws/test_access.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/aws/test_access.py b/tests/unit/aws/test_access.py index 2187f7d9ce..291f260f94 100644 --- a/tests/unit/aws/test_access.py +++ b/tests/unit/aws/test_access.py @@ -315,7 +315,7 @@ def test_create_uc_role_single(mock_ws, installation_single_role, backend, locat aws_resource_permissions = AWSResourcePermissions(installation_single_role, mock_ws, aws, locations) role_creation = IamRoleCreation(installation_single_role, mock_ws, aws_resource_permissions) aws.list_all_uc_roles.return_value = [] - role_creation.run(MockPrompts({"Above *": "yes"})) + role_creation.run(MockPrompts({"Above *": "yes"}), single_role=True) assert aws.create_uc_role.assert_called assert ( call('UC_ROLE', 'UC_POLICY', {'s3://BUCKET1/FOLDER1', 's3://BUCKET2/FOLDER2'}, None, None)