From baafbb4755ff91e884a428733a2c430fde76a064 Mon Sep 17 00:00:00 2001 From: Amin Movahed Date: Tue, 24 Sep 2024 15:15:52 +1000 Subject: [PATCH 1/4] Adding unskip CLI command to undo a skip on schema or a table --- src/databricks/labs/ucx/cli.py | 11 +++++++++++ src/databricks/labs/ucx/hive_metastore/mapping.py | 11 +++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index a8a821d586..c24de41242 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -97,6 +97,17 @@ def skip(w: WorkspaceClient, schema: str | None = None, table: str | None = None return ctx.table_mapping.skip_table_or_view(schema, table, ctx.tables_crawler.load_one) return ctx.table_mapping.skip_schema(schema) +def unskip(w: WorkspaceClient, schema: str | None = None, table: str | None = None): + """Create a unskip comment on a schema or a table""" + logger.info("Running unskip command") + if not schema: + logger.error("--schema is a required parameter.") + return None + ctx = WorkspaceContext(w) + if table: + return ctx.table_mapping.skip_table_or_view(schema, table, ctx.tables_crawler.load_one, unskip=True) + return ctx.table_mapping.skip_schema(schema, unskip=True) + @ucx.command(is_account=True) def sync_workspace_info(a: AccountClient): diff --git a/src/databricks/labs/ucx/hive_metastore/mapping.py b/src/databricks/labs/ucx/hive_metastore/mapping.py index 655172e388..4b9825ecab 100644 --- a/src/databricks/labs/ucx/hive_metastore/mapping.py +++ b/src/databricks/labs/ucx/hive_metastore/mapping.py @@ -116,14 +116,15 @@ def load(self) -> list[Rule]: msg = "Please run: databricks labs ucx table-mapping" raise ValueError(msg) from None - def skip_table_or_view(self, schema_name: str, table_name: str, load_table: Callable[[str, str], Table | None]): + def skip_table_or_view(self, schema_name: str, table_name: str, load_table: Callable[[str, str], Table | None], unskip: bool = False): # Marks a table to be skipped in the migration process by applying a table property + unskip = "false" if unskip else "true" try: table = load_table(schema_name, table_name) if table is None: raise NotFound("[TABLE_OR_VIEW_NOT_FOUND]") self._sql_backend.execute( - f"ALTER {table.kind} {escape_sql_identifier(schema_name)}.{escape_sql_identifier(table_name)} SET TBLPROPERTIES('{self.UCX_SKIP_PROPERTY}' = true)" + f"ALTER {table.kind} {escape_sql_identifier(schema_name)}.{escape_sql_identifier(table_name)} SET TBLPROPERTIES('{self.UCX_SKIP_PROPERTY}' = {unskip})" ) except NotFound as err: if "[TABLE_OR_VIEW_NOT_FOUND]" in str(err) or "[DELTA_TABLE_NOT_FOUND]" in str(err): @@ -135,11 +136,12 @@ def skip_table_or_view(self, schema_name: str, table_name: str, load_table: Call except BadRequest as err: logger.error(f"Failed to apply skip marker for Table {schema_name}.{table_name}: {err!s}", exc_info=True) - def skip_schema(self, schema: str): + def skip_schema(self, schema: str, unskip: bool = False): # Marks a schema to be skipped in the migration process by applying a table property + unskip = "false" if unskip else "true" try: self._sql_backend.execute( - f"ALTER SCHEMA {escape_sql_identifier(schema)} SET DBPROPERTIES('{self.UCX_SKIP_PROPERTY}' = true)" + f"ALTER SCHEMA {escape_sql_identifier(schema)} SET DBPROPERTIES('{self.UCX_SKIP_PROPERTY}' = {unskip})" ) except NotFound as err: if "[SCHEMA_NOT_FOUND]" in str(err): @@ -209,6 +211,7 @@ def _get_table_in_scope_task(self, table_to_migrate: TableToMigrate) -> TableToM return None for value in properties: + #TODO : there should be a check to see if this property is equal to true then skip and if false then ignore if value["key"] == self.UCX_SKIP_PROPERTY: logger.info(f"{table.key} is marked to be skipped") return None From 13c9ef65e5a37bea6e90cc8501563b9c0a037941 Mon Sep 17 00:00:00 2001 From: Amin Movahed Date: Tue, 24 Sep 2024 15:21:30 +1000 Subject: [PATCH 2/4] Added a missing cli command decorator --- src/databricks/labs/ucx/cli.py | 2 ++ src/databricks/labs/ucx/hive_metastore/mapping.py | 14 ++++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index c24de41242..ae6185c12c 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -97,6 +97,8 @@ def skip(w: WorkspaceClient, schema: str | None = None, table: str | None = None return ctx.table_mapping.skip_table_or_view(schema, table, ctx.tables_crawler.load_one) return ctx.table_mapping.skip_schema(schema) + +@ucx.command def unskip(w: WorkspaceClient, schema: str | None = None, table: str | None = None): """Create a unskip comment on a schema or a table""" logger.info("Running unskip command") diff --git a/src/databricks/labs/ucx/hive_metastore/mapping.py b/src/databricks/labs/ucx/hive_metastore/mapping.py index 4b9825ecab..7e364949c6 100644 --- a/src/databricks/labs/ucx/hive_metastore/mapping.py +++ b/src/databricks/labs/ucx/hive_metastore/mapping.py @@ -116,15 +116,17 @@ def load(self) -> list[Rule]: msg = "Please run: databricks labs ucx table-mapping" raise ValueError(msg) from None - def skip_table_or_view(self, schema_name: str, table_name: str, load_table: Callable[[str, str], Table | None], unskip: bool = False): + def skip_table_or_view( + self, schema_name: str, table_name: str, load_table: Callable[[str, str], Table | None], unskip: bool = False + ): # Marks a table to be skipped in the migration process by applying a table property - unskip = "false" if unskip else "true" + skip_flag = "false" if unskip else "true" try: table = load_table(schema_name, table_name) if table is None: raise NotFound("[TABLE_OR_VIEW_NOT_FOUND]") self._sql_backend.execute( - f"ALTER {table.kind} {escape_sql_identifier(schema_name)}.{escape_sql_identifier(table_name)} SET TBLPROPERTIES('{self.UCX_SKIP_PROPERTY}' = {unskip})" + f"ALTER {table.kind} {escape_sql_identifier(schema_name)}.{escape_sql_identifier(table_name)} SET TBLPROPERTIES('{self.UCX_SKIP_PROPERTY}' = {skip_flag})" ) except NotFound as err: if "[TABLE_OR_VIEW_NOT_FOUND]" in str(err) or "[DELTA_TABLE_NOT_FOUND]" in str(err): @@ -138,10 +140,10 @@ def skip_table_or_view(self, schema_name: str, table_name: str, load_table: Call def skip_schema(self, schema: str, unskip: bool = False): # Marks a schema to be skipped in the migration process by applying a table property - unskip = "false" if unskip else "true" + skip_flag = "false" if unskip else "true" try: self._sql_backend.execute( - f"ALTER SCHEMA {escape_sql_identifier(schema)} SET DBPROPERTIES('{self.UCX_SKIP_PROPERTY}' = {unskip})" + f"ALTER SCHEMA {escape_sql_identifier(schema)} SET DBPROPERTIES('{self.UCX_SKIP_PROPERTY}' = {skip_flag})" ) except NotFound as err: if "[SCHEMA_NOT_FOUND]" in str(err): @@ -211,7 +213,7 @@ def _get_table_in_scope_task(self, table_to_migrate: TableToMigrate) -> TableToM return None for value in properties: - #TODO : there should be a check to see if this property is equal to true then skip and if false then ignore + # TODO : there should be a check to see if this property is equal to true then skip and if false then ignore if value["key"] == self.UCX_SKIP_PROPERTY: logger.info(f"{table.key} is marked to be skipped") return None From ca95837ff3a1289655adf2a8964fa2361ff158e8 Mon Sep 17 00:00:00 2001 From: Amin Movahed Date: Tue, 24 Sep 2024 16:07:28 +1000 Subject: [PATCH 3/4] Another approach added --- src/databricks/labs/ucx/cli.py | 4 +- .../labs/ucx/hive_metastore/mapping.py | 45 +++++++++++++++---- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index ae6185c12c..9b438ee408 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -107,8 +107,8 @@ def unskip(w: WorkspaceClient, schema: str | None = None, table: str | None = No return None ctx = WorkspaceContext(w) if table: - return ctx.table_mapping.skip_table_or_view(schema, table, ctx.tables_crawler.load_one, unskip=True) - return ctx.table_mapping.skip_schema(schema, unskip=True) + return ctx.table_mapping.unskip_table_or_view(schema, table, ctx.tables_crawler.load_one) + return ctx.table_mapping.unskip_schema(schema) @ucx.command(is_account=True) diff --git a/src/databricks/labs/ucx/hive_metastore/mapping.py b/src/databricks/labs/ucx/hive_metastore/mapping.py index 7e364949c6..7dc5fd2325 100644 --- a/src/databricks/labs/ucx/hive_metastore/mapping.py +++ b/src/databricks/labs/ucx/hive_metastore/mapping.py @@ -116,17 +116,14 @@ def load(self) -> list[Rule]: msg = "Please run: databricks labs ucx table-mapping" raise ValueError(msg) from None - def skip_table_or_view( - self, schema_name: str, table_name: str, load_table: Callable[[str, str], Table | None], unskip: bool = False - ): + def skip_table_or_view(self, schema_name: str, table_name: str, load_table: Callable[[str, str], Table | None]): # Marks a table to be skipped in the migration process by applying a table property - skip_flag = "false" if unskip else "true" try: table = load_table(schema_name, table_name) if table is None: raise NotFound("[TABLE_OR_VIEW_NOT_FOUND]") self._sql_backend.execute( - f"ALTER {table.kind} {escape_sql_identifier(schema_name)}.{escape_sql_identifier(table_name)} SET TBLPROPERTIES('{self.UCX_SKIP_PROPERTY}' = {skip_flag})" + f"ALTER {table.kind} {escape_sql_identifier(schema_name)}.{escape_sql_identifier(table_name)} SET TBLPROPERTIES('{self.UCX_SKIP_PROPERTY}' = true)" ) except NotFound as err: if "[TABLE_OR_VIEW_NOT_FOUND]" in str(err) or "[DELTA_TABLE_NOT_FOUND]" in str(err): @@ -138,12 +135,30 @@ def skip_table_or_view( except BadRequest as err: logger.error(f"Failed to apply skip marker for Table {schema_name}.{table_name}: {err!s}", exc_info=True) - def skip_schema(self, schema: str, unskip: bool = False): + def unskip_table_or_view(self, schema_name: str, table_name: str, load_table: Callable[[str, str], Table | None]): + # Removes skip mark from the table property + try: + table = load_table(schema_name, table_name) + if table is None: + raise NotFound("[TABLE_OR_VIEW_NOT_FOUND]") + self._sql_backend.execute( + f"ALTER {table.kind} {escape_sql_identifier(schema_name)}.{escape_sql_identifier(table_name)} UNSET TBLPROPERTIES IF EXISTS('{self.UCX_SKIP_PROPERTY}' );" + ) + except NotFound as err: + if "[TABLE_OR_VIEW_NOT_FOUND]" in str(err) or "[DELTA_TABLE_NOT_FOUND]" in str(err): + logger.error(f"Failed to apply skip marker for Table {schema_name}.{table_name}. Table not found.") + else: + logger.error( + f"Failed to apply skip marker for Table {schema_name}.{table_name}: {err!s}", exc_info=True + ) + except BadRequest as err: + logger.error(f"Failed to apply skip marker for Table {schema_name}.{table_name}: {err!s}", exc_info=True) + + def skip_schema(self, schema: str): # Marks a schema to be skipped in the migration process by applying a table property - skip_flag = "false" if unskip else "true" try: self._sql_backend.execute( - f"ALTER SCHEMA {escape_sql_identifier(schema)} SET DBPROPERTIES('{self.UCX_SKIP_PROPERTY}' = {skip_flag})" + f"ALTER SCHEMA {escape_sql_identifier(schema)} SET DBPROPERTIES('{self.UCX_SKIP_PROPERTY}' = true)" ) except NotFound as err: if "[SCHEMA_NOT_FOUND]" in str(err): @@ -153,6 +168,20 @@ def skip_schema(self, schema: str, unskip: bool = False): except BadRequest as err: logger.error(err) + def unskip_schema(self, schema: str): + # Removes skip mark from the schema property + try: + self._sql_backend.execute( + f"ALTER SCHEMA {escape_sql_identifier(schema)} UNSET DBPROPERTIES IF EXISTS('{self.UCX_SKIP_PROPERTY}');" + ) + except NotFound as err: + if "[SCHEMA_NOT_FOUND]" in str(err): + logger.error(f"Failed to remove skip marker for Schema {schema}. Schema not found.") + else: + logger.error(err) + except BadRequest as err: + logger.error(err) + def get_tables_to_migrate(self, tables_crawler: TablesCrawler) -> Collection[TableToMigrate]: rules = self.load() # Getting all the source tables from the rules From b26e1f493d8cebf37d42d6ae59feb258f6706726 Mon Sep 17 00:00:00 2001 From: Amin Movahed Date: Tue, 24 Sep 2024 16:14:43 +1000 Subject: [PATCH 4/4] Another approach added --- src/databricks/labs/ucx/hive_metastore/mapping.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/databricks/labs/ucx/hive_metastore/mapping.py b/src/databricks/labs/ucx/hive_metastore/mapping.py index 7dc5fd2325..94f33a97fe 100644 --- a/src/databricks/labs/ucx/hive_metastore/mapping.py +++ b/src/databricks/labs/ucx/hive_metastore/mapping.py @@ -242,7 +242,6 @@ def _get_table_in_scope_task(self, table_to_migrate: TableToMigrate) -> TableToM return None for value in properties: - # TODO : there should be a check to see if this property is equal to true then skip and if false then ignore if value["key"] == self.UCX_SKIP_PROPERTY: logger.info(f"{table.key} is marked to be skipped") return None