Skip to content

Commit

Permalink
Changes to identify service principal with custom roles on Azure stor…
Browse files Browse the repository at this point in the history
…age account for principal-prefix-access (#1576)
  • Loading branch information
mohanab-db authored May 6, 2024
1 parent 001ff18 commit fda47f0
Show file tree
Hide file tree
Showing 6 changed files with 574 additions and 26 deletions.
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,10 @@ databricks labs ucx principal-prefix-access --subscription-id test-subscription-
```

Use to identify all storage account used by tables, identify the relevant Azure service principals and their permissions
on each storage account. This requires Azure CLI to be installed and configured via `az login`.
on each storage account. The command is used to identify Azure Service Principals, which have `Storage Blob Data Contributor`,
`Storage Blob Data Reader`, `Storage Blob Data Owner` roles, or custom read/write roles on ADLS Gen2 locations that are being
used in Databricks. This requires Azure CLI to be installed and configured via `az login`. It outputs azure_storage_account_info.csv
which will be later used by migrate-credentials command to create UC storage credentials.

Once done, proceed to the [`migrate-credentials` command](#migrate-credentials-command).

Expand Down Expand Up @@ -702,8 +705,8 @@ For Azure, this command prompts to confirm performing the following credential m
`Storage Blob Data Contributor` role on the respective storage account. An storage credential is created for each
access connector.
2. Migrate Azure Service Principals, which have `Storage Blob Data Contributor`,
`Storage Blob Data Reader`, `Storage Blob Data Owner` roles on ADLS Gen2 locations that are being used in
Databricks, to UC storage credentials. The Azure Service Principals to location mapping are listed
`Storage Blob Data Reader`, `Storage Blob Data Owner`, or custom roles on ADLS Gen2 locations that are being used in
Databricks, to UC storage credentials. The Azure Service Principals to location mapping are listed
in `/Users/{user_name}/.ucx/azure_storage_account_info.csv` which is generated by
[`principal-prefix-access` command](#principal-prefix-access-command). Please review the file and delete the Service
Principals you do not want to be migrated. The command will only migrate the Service Principals that have client
Expand Down
80 changes: 65 additions & 15 deletions src/databricks/labs/ucx/azure/access.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
import logging
import re
import uuid
from collections.abc import ValuesView
from dataclasses import dataclass
from functools import partial

Expand All @@ -14,6 +16,7 @@
from databricks.labs.ucx.azure.resources import (
AccessConnector,
AzureResources,
AzureRoleAssignment,
PrincipalSecret,
StorageAccount,
)
Expand Down Expand Up @@ -55,29 +58,76 @@ def __init__(
"Storage Blob Data Owner": Privilege.WRITE_FILES,
"Storage Blob Data Reader": Privilege.READ_FILES,
}
self._permission_levels = {
"Microsoft.Storage/storageAccounts/blobServices/containers/write": Privilege.WRITE_FILES,
"Microsoft.Storage/storageAccounts/blobServices/containers/blobs/write": Privilege.WRITE_FILES,
"Microsoft.Storage/storageAccounts/blobServices/containers/read": Privilege.READ_FILES,
"Microsoft.Storage/storageAccounts/blobServices/containers/blobs/read": Privilege.READ_FILES,
}

def _get_permission_level(self, permission_to_match: str) -> Privilege | None:
# String might contain '*', check for wildcard match
pattern = re.sub(r'\*', '.*', permission_to_match)
permission_compiled = re.compile(pattern)
for each_level, privilege_level in self._permission_levels.items():
# Check for storage blob permission with regex to account for star pattern
match = permission_compiled.match(each_level)
# If a match is found, return the privilege level, no need to check for lower levels
if match:
return privilege_level
return None

def _get_custom_role_privilege(self, role_permissions: list[str]) -> Privilege | None:
# If both read and write privileges are found, only write privilege will be considered
higher_privilege = None
for each_permission in role_permissions:
privilege = self._get_permission_level(each_permission)
if privilege is None:
continue
# WRITE_FILES is the higher permission, don't need to check further
if privilege == Privilege.WRITE_FILES:
return privilege
if privilege == Privilege.READ_FILES:
higher_privilege = privilege
return higher_privilege

def _get_role_privilege(self, role_assignment: AzureRoleAssignment) -> Privilege | None:
privilege = None
# Check for custom role permissions on the storage accounts
if role_assignment.role_permissions:
privilege = self._get_custom_role_privilege(role_assignment.role_permissions)
elif role_assignment.role_name in self._levels:
privilege = self._levels[role_assignment.role_name]
return privilege

def _map_storage(self, storage: StorageAccount) -> list[StoragePermissionMapping]:
def _map_storage(self, storage: StorageAccount) -> ValuesView[StoragePermissionMapping]:
logger.info(f"Fetching role assignment for {storage.name}")
out = []
principal_spm_mapping: dict[str, StoragePermissionMapping] = {}
for container in self._azurerm.containers(storage.id):
for role_assignment in self._azurerm.role_assignments(str(container)):
# Skip the role assignments that already have WRITE_FILES privilege
spm_mapping_key = f"{container.container}_{role_assignment.principal.client_id}"
if (
spm_mapping_key in principal_spm_mapping
and principal_spm_mapping[spm_mapping_key].privilege == Privilege.WRITE_FILES.value
):
continue
# one principal may be assigned multiple roles with overlapping dataActions, hence appearing
# here in duplicates. hence, role name -> permission level is not enough for the perfect scenario.
if role_assignment.role_name not in self._levels:
returned_privilege = self._get_role_privilege(role_assignment)
if not returned_privilege:
continue
privilege = self._levels[role_assignment.role_name].value
out.append(
StoragePermissionMapping(
prefix=f"abfss://{container.container}@{container.storage_account}.dfs.core.windows.net/",
client_id=role_assignment.principal.client_id,
principal=role_assignment.principal.display_name,
privilege=privilege,
type=role_assignment.principal.type,
default_network_action=storage.default_network_action,
directory_id=role_assignment.principal.directory_id,
)
privilege = returned_privilege.value
principal_spm_mapping[spm_mapping_key] = StoragePermissionMapping(
prefix=f"abfss://{container.container}@{container.storage_account}.dfs.core.windows.net/",
client_id=role_assignment.principal.client_id,
principal=role_assignment.principal.display_name,
privilege=privilege,
type=role_assignment.principal.type,
default_network_action=storage.default_network_action,
directory_id=role_assignment.principal.directory_id,
)
return out
return principal_spm_mapping.values()

def save_spn_permissions(self) -> str | None:
used_storage_accounts = self._get_storage_accounts()
Expand Down
41 changes: 33 additions & 8 deletions src/databricks/labs/ucx/azure/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,15 @@ class AzureRoleAssignment:
scope: AzureResource
principal: Principal
role_name: str
role_type: str
role_permissions: list[str]


@dataclass
class AzureRoleDetails:
role_name: str | None
role_type: str
role_permissions: list[str]


@dataclass
Expand Down Expand Up @@ -273,7 +282,7 @@ def __init__(self, azure_mgmt: AzureAPIClient, azure_graph: AzureAPIClient, incl
self._mgmt = azure_mgmt
self._graph = azure_graph
self._include_subscriptions = include_subscriptions
self._role_definitions = {} # type: dict[str, str]
self._role_definitions = {} # type: dict[str, AzureRoleDetails]
self._principals: dict[str, Principal | None] = {}

def _get_subscriptions(self) -> Iterable[AzureSubscription]:
Expand Down Expand Up @@ -438,7 +447,8 @@ def _role_assignment(
scope = assignment_properties.get("scope")
if not scope:
return None
role_name = self._role_name(role_definition_id)
role_details = self._role_name(role_definition_id)
role_name = role_details.role_name
if not role_name:
return None
principal = self._get_principal(principal_id)
Expand All @@ -447,18 +457,33 @@ def _role_assignment(
if scope == "/":
scope = resource_id
return AzureRoleAssignment(
resource=AzureResource(resource_id), scope=AzureResource(scope), principal=principal, role_name=role_name
resource=AzureResource(resource_id),
scope=AzureResource(scope),
principal=principal,
role_name=role_name,
role_type=role_details.role_type,
role_permissions=role_details.role_permissions,
)

def _role_name(self, role_definition_id) -> str | None:
def _role_name(self, role_definition_id) -> AzureRoleDetails:
if role_definition_id not in self._role_definitions:
role_definition = self._mgmt.get(role_definition_id, "2022-04-01")
definition_properties = role_definition.get("properties", {})
role_name: str = definition_properties.get("roleName")
role_name = definition_properties.get("roleName")
if not role_name:
return None
self._role_definitions[role_definition_id] = role_name
return self._role_definitions.get(role_definition_id)
return AzureRoleDetails(role_name=None, role_type='BuiltInRole', role_permissions=[])
role_type = definition_properties.get("type", "BuiltInRole")
role_permissions = []
if role_type == 'CustomRole':
role_permissions_list = definition_properties.get("permissions", [])
for each_role_permissions in role_permissions_list:
role_permissions = each_role_permissions.get("actions", []) + each_role_permissions.get(
"dataActions", []
)
self._role_definitions[role_definition_id] = AzureRoleDetails(
role_name=role_name, role_type=role_type, role_permissions=role_permissions
)
return self._role_definitions[role_definition_id]

def managed_identity_client_id(
self, access_connector_id: str, user_assigned_identity_id: str | None = None
Expand Down
98 changes: 98 additions & 0 deletions tests/unit/azure/azure/mappings.json
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,110 @@
}
]
},
"subscriptions/002/resourceGroups/rg1/storageAccounts/sto4/providers/Microsoft.Authorization/roleAssignments": {
"value": [
{
"properties": {
"principalId": "user2",
"principalType": "ServicePrincipal",
"roleDefinitionId": "customroleid001",
"scope": "subscriptions/002/resourceGroups/rg1/storageAccounts/sto4"
},
"id": "custrol1"
},
{
"properties": {
"principalId": "user2",
"principalType": "ServicePrincipal",
"roleDefinitionId": "inv001",
"scope": "subscriptions/002/resourceGroups/rg1/storageAccounts/sto4"
},
"id": "custrol2"
},
{
"properties": {
"principalId": "user2",
"roleDefinitionId": "inv001",
"scope": "subscriptions/002/resourceGroups/rg1/storageAccounts/sto4"
},
"id": "custrol2"
},
{
"properties": {
"principalId": "user2",
"principalType": "User",
"roleDefinitionId": "inv001",
"scope": "subscriptions/002/resourceGroups/rg1/storageAccounts/sto4"
},
"id": "custrol2"
},
{
"properties": {
"principalType": "ServicePrincipal",
"roleDefinitionId": "customroleid001",
"scope": "subscriptions/002/resourceGroups/rg1/storageAccounts/sto4"
},
"id": "custrol1"
},
{
"properties": {
"principalId": "user2",
"principalType": "ServicePrincipal",
"scope": "subscriptions/002/resourceGroups/rg1/storageAccounts/sto4"
},
"id": "custrol1"
},
{
"properties": {
"principalId": "user2",
"principalType": "ServicePrincipal",
"roleDefinitionId": "customroleid001"
},
"id": "custrol1"
},
{
"properties": {
"principalId": "user2",
"principalType": "ServicePrincipal",
"roleDefinitionId": "customroleid001",
"scope": "/"
},
"id": "custrol1"
}
]
},
"id002": {
"id": "role2",
"properties": {
"roleName": "Storage Blob Data Owner"
}
},
"inv001": {
"id": "inv001",
"properties": {}
},
"customroleid001": {
"id": "customroleid001",
"properties": {
"roleName": "custom_role_001",
"type": "CustomRole",
"permissions": [
{
"actions": [
"Microsoft.Storage/storageAccounts/blobServices/containers/read",
"Microsoft.Storage/storageAccounts/blobServices/containers/write"
],
"notActions": [],
"dataActions": [
"Microsoft.Storage/storageAccounts/blobServices/containers/blobs/delete",
"Microsoft.Storage/storageAccounts/blobServices/containers/blobs/read",
"Microsoft.Storage/storageAccounts/blobServices/containers/blobs/write"
],
"notDataActions": []
}
]
}
},
"subscriptions/002/resourceGroups/rg1/storageAccounts/sto2/containers/container1/providers/Microsoft.Authorization/roleAssignments": {
"value": [
{
Expand Down
Loading

0 comments on commit fda47f0

Please sign in to comment.