Skip to content

Commit

Permalink
Added initial version of databricks labs ucx migrate-local-code com…
Browse files Browse the repository at this point in the history
…mand (#1067)

The `databricks labs ucx migrate-local-code` command has been added to
facilitate migration of local code to a Databricks environment. This
initial version of the command is highly experimental, with support for
migrating Python and SQL files only. The `.gitignore` file has been
updated to exclude output files and specific configuration files from
being committed to the repository. This command aims to help users and
administrators manage code migration and maintain consistency across
workspaces, while also enhancing the compatibility of local code with
the Unity Catalog, a part of Databricks' offerings for data and AI.
  • Loading branch information
nfx authored Mar 21, 2024
1 parent 40b454c commit 27500c2
Show file tree
Hide file tree
Showing 23 changed files with 1,103 additions and 5 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -151,4 +151,5 @@ dev/cleanup.py

.python-version
.databricks-login.json
*.out
*.out
foo
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ See [contributing instructions](CONTRIBUTING.md) to help improve this project.
* [`create-catalogs-schemas` command](#create-catalogs-schemas-command)
* [`move` command](#move-command)
* [`alias` command](#alias-command)
* [Code migration commands](#code-migration-commands)
* [`migrate-local-code` command](#migrate-local-code-command)
* [Cross-workspace installations](#cross-workspace-installations)
* [`sync-workspace-info` command](#sync-workspace-info-command)
* [`manual-workspace-info` command](#manual-workspace-info-command)
Expand Down Expand Up @@ -625,6 +627,23 @@ It can also be used to debug issues related to table aliasing.

[[back to top](#databricks-labs-ucx)]

# Code migration commands

[[back to top](#databricks-labs-ucx)]

## `migrate-local-code` command

```text
databricks labs ucx migrate-local-code
```

**(Experimental)** Once [table migration](#table-migration-commands) is complete, you can run this command to
migrate all python and SQL files in the current working directory. This command is highly experimental and
at the moment only supports Python and SQL files and discards code comments and formatting during
the automated transformation process.

[[back to top](#databricks-labs-ucx)]

# Cross-workspace installations

When installing UCX across multiple workspaces, administrators need to keep UCX configurations in sync.
Expand Down
3 changes: 3 additions & 0 deletions labs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,6 @@ commands:

- name: revert-cluster-remap
description: Reverting the Re-mapping of the cluster from UC

- name: migrate-local-code
description: (Experimental) Migrate files in the current directory to be more compatible with Unity Catalog.
7 changes: 6 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,12 @@ branch = true
parallel = true

[tool.coverage.report]
omit = ["src/databricks/labs/ucx/mixins/*", "*/working-copy/*", "*/fresh_wheel_file/*"]
omit = [
"src/databricks/labs/ucx/mixins/*",
"src/databricks/labs/ucx/code/lsp.py",
"*/working-copy/*",
"*/fresh_wheel_file/*"
]
exclude_lines = [
"no cov",
"if __name__ == .__main__.:",
Expand Down
12 changes: 12 additions & 0 deletions src/databricks/labs/ucx/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import shutil
import webbrowser
from collections.abc import Callable
from pathlib import Path

from databricks.labs.blueprint.cli import App
from databricks.labs.blueprint.entrypoint import get_logger
Expand All @@ -19,6 +20,7 @@
from databricks.labs.ucx.azure.access import AzureResourcePermissions
from databricks.labs.ucx.azure.credentials import ServicePrincipalMigration
from databricks.labs.ucx.azure.locations import ExternalLocationsMigration
from databricks.labs.ucx.code.files import Files
from databricks.labs.ucx.config import WorkspaceConfig
from databricks.labs.ucx.hive_metastore import ExternalLocations, TablesCrawler
from databricks.labs.ucx.hive_metastore.catalog_schema import CatalogSchema
Expand Down Expand Up @@ -547,5 +549,15 @@ def revert_cluster_remap(w: WorkspaceClient, prompts: Prompts):
cluster_details.revert_cluster_remap(cluster_list, cluster_ids)


@ucx.command
def migrate_local_code(w: WorkspaceClient, prompts: Prompts):
"""Fix the code files based on their language."""
files = Files.for_cli(w)
working_directory = Path.cwd()
if not prompts.confirm("Do you want to apply UC migration to all files in the current directory?"):
return
files.apply(working_directory)


if __name__ == "__main__":
ucx()
Empty file.
91 changes: 91 additions & 0 deletions src/databricks/labs/ucx/code/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from abc import abstractmethod
from collections.abc import Iterable
from dataclasses import dataclass

# Code mapping between LSP, PyLint, and our own diagnostics:
# | LSP | PyLint | Our |
# |---------------------------|------------|----------------|
# | Severity.ERROR | Error | Failure() |
# | Severity.WARN | Warning | Advisory() |
# | DiagnosticTag.DEPRECATED | Warning | Deprecation() |
# | Severity.INFO | Info | Advice() |
# | Severity.HINT | Convention | Convention() |
# | DiagnosticTag.UNNECESSARY | Refactor | Convention() |


@dataclass
class Advice:
code: str
message: str
start_line: int
start_col: int
end_line: int
end_col: int

def replace(
self,
code: str | None = None,
message: str | None = None,
start_line: int | None = None,
start_col: int | None = None,
end_line: int | None = None,
end_col: int | None = None,
) -> 'Advice':
return self.__class__(
code=code if code is not None else self.code,
message=message if message is not None else self.message,
start_line=start_line if start_line is not None else self.start_line,
start_col=start_col if start_col is not None else self.start_col,
end_line=end_line if end_line is not None else self.end_line,
end_col=end_col if end_col is not None else self.end_col,
)

def as_advisory(self) -> 'Advisory':
return Advisory(**self.__dict__)

def as_failure(self) -> 'Failure':
return Failure(**self.__dict__)

def as_deprecation(self) -> 'Deprecation':
return Deprecation(**self.__dict__)

def as_convention(self) -> 'Convention':
return Convention(**self.__dict__)


class Advisory(Advice):
"""A warning that does not prevent the code from running."""


class Failure(Advisory):
"""An error that prevents the code from running."""


class Deprecation(Advisory):
"""An advisory that suggests to replace the code with a newer version."""


class Convention(Advice):
"""A suggestion for a better way to write the code."""


class Linter:
@abstractmethod
def lint(self, code: str) -> Iterable[Advice]: ...


class Fixer:
@abstractmethod
def name(self) -> str: ...

@abstractmethod
def apply(self, code: str) -> str: ...


class SequentialLinter(Linter):
def __init__(self, linters: list[Linter]):
self._linters = linters

def lint(self, code: str) -> Iterable[Advice]:
for linter in self._linters:
yield from linter.lint(code)
68 changes: 68 additions & 0 deletions src/databricks/labs/ucx/code/files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import logging
from pathlib import Path

from databricks.sdk import WorkspaceClient
from databricks.sdk.service.workspace import Language

from databricks.labs.ucx.code.languages import Languages
from databricks.labs.ucx.hive_metastore.table_migrate import TablesMigrate

logger = logging.getLogger(__name__)


class Files:
"""The Files class is responsible for fixing code files based on their language."""

def __init__(self, languages: Languages):
self._languages = languages
self._extensions = {".py": Language.PYTHON, ".sql": Language.SQL}

@classmethod
def for_cli(cls, ws: WorkspaceClient):
tables_migrate = TablesMigrate.for_cli(ws)
index = tables_migrate.index()
languages = Languages(index)
return cls(languages)

def apply(self, path: Path) -> bool:
if path.is_dir():
for folder in path.iterdir():
self.apply(folder)
return True
return self._apply_file_fix(path)

def _apply_file_fix(self, path):
"""
The fix method reads a file, lints it, applies fixes, and writes the fixed code back to the file.
"""
# Check if the file extension is in the list of supported extensions
if path.suffix not in self._extensions:
return False
# Get the language corresponding to the file extension
language = self._extensions[path.suffix]
# If the language is not supported, return
if not language:
return False
logger.info(f"Analysing {path}")
# Get the linter for the language
linter = self._languages.linter(language)
# Open the file and read the code
with path.open("r") as f:
code = f.read()
applied = False
# Lint the code and apply fixes
for advice in linter.lint(code):
logger.info(f"Found: {advice}")
fixer = self._languages.fixer(language, advice.code)
if not fixer:
continue
logger.info(f"Applying fix for {advice}")
code = fixer.apply(code)
applied = True
if not applied:
return False
# Write the fixed code back to the file
with path.open("w") as f:
logger.info(f"Overwriting {path}")
f.write(code)
return True
44 changes: 44 additions & 0 deletions src/databricks/labs/ucx/code/languages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from databricks.sdk.service.workspace import Language

from databricks.labs.ucx.code.base import Fixer, Linter, SequentialLinter
from databricks.labs.ucx.code.pyspark import SparkSql
from databricks.labs.ucx.code.queries import FromTable
from databricks.labs.ucx.hive_metastore.table_migrate import Index


class Languages:
def __init__(self, index: Index):
self._index = index
from_table = FromTable(index)
self._linters = {
Language.PYTHON: SequentialLinter([SparkSql(from_table)]),
Language.SQL: SequentialLinter([from_table]),
}
self._fixers: dict[Language, list[Fixer]] = {
Language.PYTHON: [SparkSql(from_table)],
Language.SQL: [from_table],
}

def is_supported(self, language: Language) -> bool:
return language in self._linters and language in self._fixers

def linter(self, language: Language) -> Linter:
if language not in self._linters:
raise ValueError(f"Unsupported language: {language}")
return self._linters[language]

def fixer(self, language: Language, diagnostic_code: str) -> Fixer | None:
if language not in self._fixers:
return None
for fixer in self._fixers[language]:
if fixer.name() == diagnostic_code:
return fixer
return None

def apply_fixes(self, language: Language, code: str) -> str:
linter = self.linter(language)
for advice in linter.lint(code):
fixer = self.fixer(language, advice.code)
if fixer:
code = fixer.apply(code)
return code
Loading

0 comments on commit 27500c2

Please sign in to comment.