Skip to content

Commit

Permalink
Add support for migrate-tables-ctas workflow in the cmd `databricks l…
Browse files Browse the repository at this point in the history
…abs ucx migrate-tables` (#1660)

## Changes
This change adds support for migrate-tables-ctas into the existing cli
cmd for migrate-tables.
Checks for the presence of an external table which cannot be synced and
prompts the user to run the additional workflow
Also updated relevant readme doc



Resolves #1659 

### Functionality 

- [X] added relevant user documentation
- [ ] added new CLI command
- [X] modified existing command: `databricks labs ucx ...`
- [ ] added a new workflow
- [ ] modified existing workflow: `...`
- [ ] added a new table
- [ ] modified existing table: `...`

### Tests
<!-- How is this tested? Please see the checklist below and also
describe any other relevant tests -->

- [ ] manually tested
- [X] added unit tests
- [ ] added integration tests
- [ ] verified on staging environment (screenshot attached)
  • Loading branch information
HariGS-DB authored May 7, 2024
1 parent 1aefd85 commit 9c67520
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 2 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -394,14 +394,17 @@ flowchart TB
subgraph workflow[Table Migration Workflows]
subgraph mt_workflow[workflow: migrate-tables]
dbfs_root_delta_mt_task[migrate_dbfs_root_delta_tables]
dbfs_root_non_delta_mt_task[migrate_dbfs_root_non_delta_tables]
external_tables_sync_mt_task[migrate_external_tables_sync]
view_mt_task[roadmap: migrate_views]
dbfs_root_delta_mt_task --> view_mt_task
dbfs_root_non_delta_mt_task --> view_mt_task
external_tables_sync_mt_task --> view_mt_task
end
subgraph mt_ctas_wf[roadmap workflow: migrate-tables-ctas]
ctas_mt_task[migrate_tables_ctas] --> view_mt_task_ctas[roadmap: migrate_views]
ctas_mt_task[migrate_hiveserde_ctas] --> view_mt_task_ctas[roadmap: migrate_views]
end
subgraph mt_serde_inplace_wf[roadmap workflow: migrate-external-hiveserde-tables-in-place-experimental]
Expand Down
2 changes: 1 addition & 1 deletion labs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -207,4 +207,4 @@ commands:
- name: migrate-tables
description: |
Trigger the migrate-tables workflow and, optionally, migrate-external-hiveserde-tables-in-place-experimental
workflow.
workflow and migrate-external-tables-ctas workflow.
11 changes: 10 additions & 1 deletion src/databricks/labs/ucx/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ def assign_metastore(
def migrate_tables(w: WorkspaceClient, prompts: Prompts, *, ctx: WorkspaceContext | None = None):
"""
Trigger the migrate-tables workflow and, optionally, the migrate-external-hiveserde-tables-in-place-experimental
workflow.
workflow and migrate-external-tables-ctas.
"""
if ctx is None:
ctx = WorkspaceContext(w)
Expand All @@ -445,6 +445,15 @@ def migrate_tables(w: WorkspaceClient, prompts: Prompts, *, ctx: WorkspaceContex
):
deployed_workflows.run_workflow("migrate-external-hiveserde-tables-in-place-experimental")

external_ctas_tables = [table for table in tables if table.what == What.EXTERNAL_NO_SYNC]
if len(external_ctas_tables) > 0:
percentage_external_ctas_tables = len(external_ctas_tables) / len(tables) * 100
if prompts.confirm(
f"Found {len(external_ctas_tables)} ({percentage_external_ctas_tables:.2f}%) external tables which cannot be migrated using sync"
f", do you want to run the migrate-external-tables-ctas workflow?"
):
deployed_workflows.run_workflow("migrate-external-tables-ctas")


if __name__ == "__main__":
ucx()
21 changes: 21 additions & 0 deletions tests/unit/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def ws():
'assessment': '123',
'migrate-tables': '456',
'migrate-external-hiveserde-tables-in-place-experimental': '789',
'migrate-external-tables-ctas': '987',
}
}
}
Expand Down Expand Up @@ -487,6 +488,26 @@ def test_migrate_external_hiveserde_tables_in_place(ws):
ws.jobs.run_now.assert_called_with(789)


def test_migrate_external_tables_ctas(ws):
tables_crawler = create_autospec(TablesCrawler)
table = Table(
catalog="hive_metastore", database="test", name="externalctas", object_type="UNKNOWN", table_format="EXTERNAL"
)
tables_crawler.snapshot.return_value = [table]
ctx = WorkspaceContext(ws).replace(tables_crawler=tables_crawler)

prompt = (
"Found 1 (.*) external tables which cannot be migrated using sync, do you want to run the "
"migrate-external-tables-ctas workflow?"
)

prompts = MockPrompts({prompt: "Yes"})

migrate_tables(ws, prompts, ctx=ctx)

ws.jobs.run_now.assert_called_with(987)


def test_create_missing_principal_aws(ws):
aws_resource_permissions = create_autospec(AWSResourcePermissions)
ctx = WorkspaceContext(ws).replace(is_aws=True, is_azure=False, aws_resource_permissions=aws_resource_permissions)
Expand Down

0 comments on commit 9c67520

Please sign in to comment.