-
Notifications
You must be signed in to change notification settings - Fork 88
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added initial version of
databricks labs ucx migrate-local-code
com…
…mand (#1067) The `databricks labs ucx migrate-local-code` command has been added to facilitate migration of local code to a Databricks environment. This initial version of the command is highly experimental, with support for migrating Python and SQL files only. The `.gitignore` file has been updated to exclude output files and specific configuration files from being committed to the repository. This command aims to help users and administrators manage code migration and maintain consistency across workspaces, while also enhancing the compatibility of local code with the Unity Catalog, a part of Databricks' offerings for data and AI.
- Loading branch information
Showing
23 changed files
with
1,103 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -151,4 +151,5 @@ dev/cleanup.py | |
|
||
.python-version | ||
.databricks-login.json | ||
*.out | ||
*.out | ||
foo |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
from abc import abstractmethod | ||
from collections.abc import Iterable | ||
from dataclasses import dataclass | ||
|
||
# Code mapping between LSP, PyLint, and our own diagnostics: | ||
# | LSP | PyLint | Our | | ||
# |---------------------------|------------|----------------| | ||
# | Severity.ERROR | Error | Failure() | | ||
# | Severity.WARN | Warning | Advisory() | | ||
# | DiagnosticTag.DEPRECATED | Warning | Deprecation() | | ||
# | Severity.INFO | Info | Advice() | | ||
# | Severity.HINT | Convention | Convention() | | ||
# | DiagnosticTag.UNNECESSARY | Refactor | Convention() | | ||
|
||
|
||
@dataclass | ||
class Advice: | ||
code: str | ||
message: str | ||
start_line: int | ||
start_col: int | ||
end_line: int | ||
end_col: int | ||
|
||
def replace( | ||
self, | ||
code: str | None = None, | ||
message: str | None = None, | ||
start_line: int | None = None, | ||
start_col: int | None = None, | ||
end_line: int | None = None, | ||
end_col: int | None = None, | ||
) -> 'Advice': | ||
return self.__class__( | ||
code=code if code is not None else self.code, | ||
message=message if message is not None else self.message, | ||
start_line=start_line if start_line is not None else self.start_line, | ||
start_col=start_col if start_col is not None else self.start_col, | ||
end_line=end_line if end_line is not None else self.end_line, | ||
end_col=end_col if end_col is not None else self.end_col, | ||
) | ||
|
||
def as_advisory(self) -> 'Advisory': | ||
return Advisory(**self.__dict__) | ||
|
||
def as_failure(self) -> 'Failure': | ||
return Failure(**self.__dict__) | ||
|
||
def as_deprecation(self) -> 'Deprecation': | ||
return Deprecation(**self.__dict__) | ||
|
||
def as_convention(self) -> 'Convention': | ||
return Convention(**self.__dict__) | ||
|
||
|
||
class Advisory(Advice): | ||
"""A warning that does not prevent the code from running.""" | ||
|
||
|
||
class Failure(Advisory): | ||
"""An error that prevents the code from running.""" | ||
|
||
|
||
class Deprecation(Advisory): | ||
"""An advisory that suggests to replace the code with a newer version.""" | ||
|
||
|
||
class Convention(Advice): | ||
"""A suggestion for a better way to write the code.""" | ||
|
||
|
||
class Linter: | ||
@abstractmethod | ||
def lint(self, code: str) -> Iterable[Advice]: ... | ||
|
||
|
||
class Fixer: | ||
@abstractmethod | ||
def name(self) -> str: ... | ||
|
||
@abstractmethod | ||
def apply(self, code: str) -> str: ... | ||
|
||
|
||
class SequentialLinter(Linter): | ||
def __init__(self, linters: list[Linter]): | ||
self._linters = linters | ||
|
||
def lint(self, code: str) -> Iterable[Advice]: | ||
for linter in self._linters: | ||
yield from linter.lint(code) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import logging | ||
from pathlib import Path | ||
|
||
from databricks.sdk import WorkspaceClient | ||
from databricks.sdk.service.workspace import Language | ||
|
||
from databricks.labs.ucx.code.languages import Languages | ||
from databricks.labs.ucx.hive_metastore.table_migrate import TablesMigrate | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class Files: | ||
"""The Files class is responsible for fixing code files based on their language.""" | ||
|
||
def __init__(self, languages: Languages): | ||
self._languages = languages | ||
self._extensions = {".py": Language.PYTHON, ".sql": Language.SQL} | ||
|
||
@classmethod | ||
def for_cli(cls, ws: WorkspaceClient): | ||
tables_migrate = TablesMigrate.for_cli(ws) | ||
index = tables_migrate.index() | ||
languages = Languages(index) | ||
return cls(languages) | ||
|
||
def apply(self, path: Path) -> bool: | ||
if path.is_dir(): | ||
for folder in path.iterdir(): | ||
self.apply(folder) | ||
return True | ||
return self._apply_file_fix(path) | ||
|
||
def _apply_file_fix(self, path): | ||
""" | ||
The fix method reads a file, lints it, applies fixes, and writes the fixed code back to the file. | ||
""" | ||
# Check if the file extension is in the list of supported extensions | ||
if path.suffix not in self._extensions: | ||
return False | ||
# Get the language corresponding to the file extension | ||
language = self._extensions[path.suffix] | ||
# If the language is not supported, return | ||
if not language: | ||
return False | ||
logger.info(f"Analysing {path}") | ||
# Get the linter for the language | ||
linter = self._languages.linter(language) | ||
# Open the file and read the code | ||
with path.open("r") as f: | ||
code = f.read() | ||
applied = False | ||
# Lint the code and apply fixes | ||
for advice in linter.lint(code): | ||
logger.info(f"Found: {advice}") | ||
fixer = self._languages.fixer(language, advice.code) | ||
if not fixer: | ||
continue | ||
logger.info(f"Applying fix for {advice}") | ||
code = fixer.apply(code) | ||
applied = True | ||
if not applied: | ||
return False | ||
# Write the fixed code back to the file | ||
with path.open("w") as f: | ||
logger.info(f"Overwriting {path}") | ||
f.write(code) | ||
return True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from databricks.sdk.service.workspace import Language | ||
|
||
from databricks.labs.ucx.code.base import Fixer, Linter, SequentialLinter | ||
from databricks.labs.ucx.code.pyspark import SparkSql | ||
from databricks.labs.ucx.code.queries import FromTable | ||
from databricks.labs.ucx.hive_metastore.table_migrate import Index | ||
|
||
|
||
class Languages: | ||
def __init__(self, index: Index): | ||
self._index = index | ||
from_table = FromTable(index) | ||
self._linters = { | ||
Language.PYTHON: SequentialLinter([SparkSql(from_table)]), | ||
Language.SQL: SequentialLinter([from_table]), | ||
} | ||
self._fixers: dict[Language, list[Fixer]] = { | ||
Language.PYTHON: [SparkSql(from_table)], | ||
Language.SQL: [from_table], | ||
} | ||
|
||
def is_supported(self, language: Language) -> bool: | ||
return language in self._linters and language in self._fixers | ||
|
||
def linter(self, language: Language) -> Linter: | ||
if language not in self._linters: | ||
raise ValueError(f"Unsupported language: {language}") | ||
return self._linters[language] | ||
|
||
def fixer(self, language: Language, diagnostic_code: str) -> Fixer | None: | ||
if language not in self._fixers: | ||
return None | ||
for fixer in self._fixers[language]: | ||
if fixer.name() == diagnostic_code: | ||
return fixer | ||
return None | ||
|
||
def apply_fixes(self, language: Language, code: str) -> str: | ||
linter = self.linter(language) | ||
for advice in linter.lint(code): | ||
fixer = self.fixer(language, advice.code) | ||
if fixer: | ||
code = fixer.apply(code) | ||
return code |
Oops, something went wrong.