Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support more than one dashboard per step #472

Merged
merged 7 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 81 additions & 46 deletions src/databricks/labs/ucx/framework/dashboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,23 @@

@dataclass
class SimpleQuery:
dashboard_ref: str
name: str
query: str
viz: dict[str, str]
widget: dict[str, str]

@property
def query_key(self):
return f"{self.name}:query_id"
return f"{self.dashboard_ref}_{self.name}:query_id"

@property
def viz_key(self):
return f"{self.name}:viz_id"
return f"{self.dashboard_ref}_{self.name}:viz_id"

@property
def widget_key(self):
return f"{self.name}:widget_id"
return f"{self.dashboard_ref}_{self.name}:widget_id"

@property
def viz_type(self) -> str:
Expand Down Expand Up @@ -80,14 +81,14 @@ def __init__(
ws: WorkspaceClient,
local_folder: Path,
remote_folder: str,
name: str,
name_prefix: str,
query_text_callback: Callable[[str], str] | None = None,
warehouse_id: str | None = None,
):
self._ws = ws
self._local_folder = local_folder
self._remote_folder = remote_folder
self._name = name
self._name_prefix = name_prefix
self._query_text_callback = query_text_callback
self._warehouse_id = warehouse_id
self._state = {}
Expand All @@ -97,36 +98,61 @@ def __init__(
def _query_state(self):
return f"{self._remote_folder}/state.json"

@property
def dashboard_link(self):
return f"{self._ws.config.host}/sql/dashboards/{self._state['dashboard_id']}"

def create_dashboard(self) -> str:
desired_queries = self._desired_queries()
parent = self._installed_query_state()
data_source_id = self._dashboard_data_source()
self._install_dashboard(parent)
for query in desired_queries:
self._install_query(query, data_source_id, parent)
self._install_viz(query)
self._install_widget(query)
self._store_query_state(desired_queries)
return self._state["dashboard_id"]
def dashboard_link(self, dashboard_ref: str):
dashboard_id = self._state[f"{dashboard_ref}:dashboard_id"]
return f"{self._ws.config.host}/sql/dashboards/{dashboard_id}"

def create_dashboards(self) -> dict:
dashboards = {}
queries_per_dashboard = {}
# Iterate over dashboards for each step, represented as first-level folders
step_folders = [f for f in self._local_folder.glob("*") if f.is_dir()]
for step_folder in step_folders:
logger.debug(f"Reading step folder {step_folder}...")
dashboard_folders = [f for f in step_folder.glob("*") if f.is_dir()]
# Create separate dashboards per step, represented as second-level folders
for dashboard_folder in dashboard_folders:
logger.debug(f"Reading dashboard folder {dashboard_folder}...")
main_name = step_folder.stem.title()
sub_name = dashboard_folder.stem.title()
dashboard_name = f"{self._name_prefix} {main_name} ({sub_name})"
dashboard_ref = f"{step_folder.stem}_{dashboard_folder.stem}".lower()
logger.info(f"Creating dashboard {dashboard_name}...")
desired_queries = self._desired_queries(dashboard_folder, dashboard_ref)
parent_folder_id = self._installed_query_state()
data_source_id = self._dashboard_data_source()
self._install_dashboard(dashboard_name, parent_folder_id, dashboard_ref)
for query in desired_queries:
self._install_query(query, dashboard_name, data_source_id, parent_folder_id)
self._install_viz(query)
self._install_widget(query, dashboard_ref)
queries_per_dashboard[dashboard_ref] = desired_queries
dashboards[dashboard_ref] = self._state[f"{dashboard_ref}:dashboard_id"]
self._store_query_state(queries_per_dashboard)
return dashboards

def validate(self):
for query in self._desired_queries():
try:
self._get_viz_options(query)
self._get_widget_options(query)
except Exception as err:
msg = f"Error in {query.name}: {err}"
raise AssertionError(msg) from err

def _install_widget(self, query: SimpleQuery):
step_folders = [f for f in self._local_folder.glob("*") if f.is_dir()]
for step_folder in step_folders:
logger.info(f"Reading step folder {step_folder}...")
dashboard_folders = [f for f in step_folder.glob("*") if f.is_dir()]
# Create separate dashboards per step, represented as second-level folders
for dashboard_folder in dashboard_folders:
dashboard_ref = f"{step_folder.stem}_{dashboard_folder.stem}".lower()
for query in self._desired_queries(dashboard_folder, dashboard_ref):
try:
self._get_viz_options(query)
self._get_widget_options(query)
except Exception as err:
msg = f"Error in {query.name}: {err}"
raise AssertionError(msg) from err

def _install_widget(self, query: SimpleQuery, dashboard_ref: str):
dashboard_id = self._state[f"{dashboard_ref}:dashboard_id"]
widget_options = self._get_widget_options(query)
# widgets are cleaned up every dashboard redeploy
widget = self._ws.dashboard_widgets.create(
self._state["dashboard_id"], widget_options, 1, visualization_id=self._state[query.viz_key]
dashboard_id, widget_options, 1, visualization_id=self._state[query.viz_key]
)
self._state[query.widget_key] = widget.id

Expand All @@ -149,9 +175,9 @@ def _installed_query_state(self):
self._state = json.load(self._ws.workspace.download(self._query_state))
to_remove = []
for k, v in self._state.items():
_, name = k.split(":")
if k == "dashboard_id":
continue
_, name = k.split(":")
if name != "query_id":
continue
try:
Expand All @@ -165,17 +191,20 @@ def _installed_query_state(self):
raise err
self._ws.workspace.mkdirs(self._remote_folder)
except JSONDecodeError:
logger.warning(f"JSON state file corrupt: {self._query_state}")
self._state = {} # noop
object_info = self._ws.workspace.get_status(self._remote_folder)
parent = f"folders/{object_info.object_id}"
return parent

def _store_query_state(self, desired_queries: list[SimpleQuery]):
desired_keys = ["dashboard_id"]
for query in desired_queries:
desired_keys.append(query.query_key)
desired_keys.append(query.viz_key)
desired_keys.append(query.widget_key)
def _store_query_state(self, queries: dict[str, list[SimpleQuery]]):
desired_keys = []
for ref, qrs in queries.items():
desired_keys.append(f"{ref}:dashboard_id")
for query in qrs:
desired_keys.append(query.query_key)
desired_keys.append(query.viz_key)
desired_keys.append(query.widget_key)
destructors = {
"query_id": self._ws.queries.delete,
"viz_id": self._ws.query_visualizations.delete,
Expand All @@ -196,27 +225,29 @@ def _store_query_state(self, desired_queries: list[SimpleQuery]):
state_dump = json.dumps(new_state, indent=2).encode("utf8")
self._ws.workspace.upload(self._query_state, state_dump, format=ImportFormat.AUTO, overwrite=True)

def _install_dashboard(self, parent: str):
if "dashboard_id" in self._state:
for widget in self._ws.dashboards.get(self._state["dashboard_id"]).widgets:
def _install_dashboard(self, dashboard_name: str, parent_folder_id: str, dashboard_ref: str):
dashboard_id = f"{dashboard_ref}:dashboard_id"
if dashboard_id in self._state:
for widget in self._ws.dashboards.get(self._state[dashboard_id]).widgets:
self._ws.dashboard_widgets.delete(widget.id)
return
dash = self._ws.dashboards.create(self._name, run_as_role=RunAsRole.VIEWER, parent=parent)
dash = self._ws.dashboards.create(dashboard_name, run_as_role=RunAsRole.VIEWER, parent=parent_folder_id)
self._ws.dbsql_permissions.set(
ObjectTypePlural.DASHBOARDS,
dash.id,
access_control_list=[AccessControl(group_name="users", permission_level=PermissionLevel.CAN_VIEW)],
)
self._state["dashboard_id"] = dash.id
self._state[dashboard_id] = dash.id

def _desired_queries(self) -> list[SimpleQuery]:
def _desired_queries(self, local_folder: Path, dashboard_ref: str) -> list[SimpleQuery]:
desired_queries = []
for f in self._local_folder.glob("*.sql"):
for f in local_folder.glob("*.sql"):
text = f.read_text("utf8")
if self._query_text_callback is not None:
text = self._query_text_callback(text)
desired_queries.append(
SimpleQuery(
dashboard_ref=dashboard_ref,
name=f.name,
query=text,
viz=self._parse_magic_comment(f, "-- viz ", text),
Expand All @@ -240,8 +271,12 @@ def _get_viz_options(self, query: SimpleQuery):
viz_args = viz_types[query.viz_type](**query.viz_args)
return viz_args

def _install_query(self, query: SimpleQuery, data_source_id: str, parent: str):
query_meta = {"data_source_id": data_source_id, "name": f"{self._name} - {query.name}", "query": query.query}
def _install_query(self, query: SimpleQuery, dashboard_name: str, data_source_id: str, parent: str):
query_meta = {
"data_source_id": data_source_id,
"name": f"{dashboard_name} - {query.name}",
"query": query.query,
}
if query.query_key in self._state:
return self._ws.queries.update(self._state[query.query_key], **query_meta)

Expand Down
21 changes: 13 additions & 8 deletions src/databricks/labs/ucx/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,16 +146,17 @@ def run_workflow(self, step: str):
raise OperationFailed(msg) from None

def _create_dashboards(self):
local_query_files = self._find_project_root() / "src/databricks/labs/ucx/assessment/queries"
logger.info("Creating dashboards...")
local_query_files = self._find_project_root() / "src/databricks/labs/ucx/queries"
dash = DashboardFromFiles(
self._ws,
local_folder=local_query_files,
remote_folder=f"{self._install_folder}/queries",
name=self._name("UCX Assessment"),
name_prefix=self._name("UCX "),
warehouse_id=self._warehouse_id,
query_text_callback=self._current_config.replace_inventory_variable,
)
self._dashboards["assessment"] = dash.create_dashboard()
self._dashboards = dash.create_dashboards()

@property
def _warehouse_id(self) -> str:
Expand Down Expand Up @@ -364,14 +365,18 @@ def _create_readme(self):
continue
job_id = self._deployed_steps[step_name]
dashboard_link = ""
if step_name in self._dashboards:
dashboard_link = f"{self._ws.config.host}/sql/dashboards/{self._dashboards[step_name]}"
dashboard_link = f"Go to the [{step_name} dashboard]({dashboard_link}) after running the jobs."
dashboards_per_step = [d for d in self._dashboards.keys() if d.startswith(step_name)]
for dash in dashboards_per_step:
if len(dashboard_link) == 0:
dashboard_link += "Go to the one of the following dashboards after running the job:\n"
first, second = dash.replace("_", " ").title().split()
dashboard_url = f"{self._ws.config.host}/sql/dashboards/{self._dashboards[dash]}"
dashboard_link += f" - [{first} ({second}) dashboard]({dashboard_url})\n"
job_link = f"[{self._name(step_name)}]({self._ws.config.host}#job/{job_id})"
md.append("---\n\n")
md.append(f"## {job_link}\n\n")
md.append(f"{dashboard_link}\n\n")
md.append("The workflow consists of the following separate tasks:\n\n")
md.append(f"{dashboard_link}")
md.append("\nThe workflow consists of the following separate tasks:\n\n")
for t in self._sorted_tasks():
if t.workflow != step_name:
continue
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/ucx/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ def setup_view(cfg: WorkspaceConfig):
@task(
"assessment",
depends_on=[crawl_grants, crawl_permissions, guess_external_locations, setup_view],
dashboard="assessment",
dashboard="assessment_main",
)
def assessment_report(_: WorkspaceConfig):
"""Refreshes the assessment dashboard after all previous tasks have been completed. Note that you can access the
Expand Down
6 changes: 3 additions & 3 deletions tests/integration/test_installation.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def test_jobs_with_no_inventory_database(
for step in required_workflows:
install.run_workflow(step)

@retried(on=[AssertionError], timeout=timedelta(minutes=1))
@retried(on=[AssertionError], timeout=timedelta(minutes=2))
def validate_groups():
group_manager = GroupManager(ws, GroupsConfig(auto=True))
acc_membership = group_manager.get_workspace_membership("Group")
Expand All @@ -185,7 +185,7 @@ def validate_groups():

return True

@retried(on=[AssertionError], timeout=timedelta(minutes=1))
@retried(on=[AssertionError], timeout=timedelta(minutes=2))
def validate_permissions():
logger.info("validating permissions")
policy_permissions = generic_permissions.load_as_dict("cluster-policies", cluster_policy.policy_id)
Expand All @@ -195,7 +195,7 @@ def validate_permissions():

return True

@retried(on=[AssertionError], timeout=timedelta(minutes=1))
@retried(on=[AssertionError], timeout=timedelta(minutes=2))
def validate_tacl():
logger.info("validating tacl")
table_a_grants = grants_crawler.for_table_info(table_a)
Expand Down
11 changes: 6 additions & 5 deletions tests/unit/assessment/test_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,16 @@ def test_dashboard(mocker):
ws.query_visualizations.create.return_value = Visualization(id="abc")
ws.dashboard_widgets.create.return_value = Widget(id="abc")
installer = WorkspaceInstaller(ws)
local_query_files = installer._find_project_root() / "src/databricks/labs/ucx/assessment/queries"
local_query_files = installer._find_project_root() / "src/databricks/labs/ucx/queries"
dash = DashboardFromFiles(
ws,
local_folder=local_query_files,
remote_folder="/users/not_a_real_user/queries",
name="Assessment",
name_prefix="Assessment",
warehouse_id="000000",
query_text_callback=installer._current_config.replace_inventory_variable,
)
dashboard = dash.create_dashboard()
assert dashboard is not None
assert dashboard == "abc"
dashboards = dash.create_dashboards()
assert dashboards is not None
assert dashboards["assessment_main"] == "abc"
assert dashboards["assessment_azure"] == "abc"
2 changes: 1 addition & 1 deletion tests/unit/test_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def test_main_with_existing_conf_does_not_recreate_config(mocker):
def test_query_metadata(mocker):
ws = mocker.Mock()
install = WorkspaceInstaller(ws)
local_query_files = install._find_project_root() / "src/databricks/labs/ucx/assessment/queries"
local_query_files = install._find_project_root() / "src/databricks/labs/ucx/queries"
DashboardFromFiles(ws, local_query_files, "any", "any").validate()


Expand Down