Skip to content

Commit

Permalink
feat: load tag from metastore (#1164)
Browse files Browse the repository at this point in the history
* feat: load tag from metastore

* comments

* comments
  • Loading branch information
jczhong84 authored Feb 15, 2023
1 parent b36a5c0 commit 1308ea7
Show file tree
Hide file tree
Showing 23 changed files with 233 additions and 103 deletions.
8 changes: 8 additions & 0 deletions querybook/server/const/metastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class MetadataMode(Enum):
READ_ONLY = "read_only"

# On saving, metadata will only be written to querybook db. This is the default mode if not specified.
# It also indicates that it will not load this metadata from the metastore.
WRITE_LOCAL = "write_local"

# On saving, metadata will be written back to metastore, as well as querybook db
Expand All @@ -37,5 +38,12 @@ class MetastoreLoaderConfig:
def __init__(self, config: dict[MetadataType, MetadataMode]):
self._config = {**self._default_config, **config}

def can_load_external_metadata(self, metadataType: MetadataType) -> bool:
"""Check if the given metadata type will be loaded from metastore"""
return self._config.get(metadataType, MetadataMode.WRITE_LOCAL) in (
MetadataMode.READ_ONLY,
MetadataMode.WRITE_BACK,
)

def to_dict(self):
return {key.value: value.value for (key, value) in self._config.items()}
61 changes: 12 additions & 49 deletions querybook/server/lib/metastore/base_metastore_loader.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from abc import ABCMeta, abstractmethod, abstractclassmethod
import gevent
import math
from typing import NamedTuple, List, Dict, Tuple, Optional
from typing import List, Dict, Tuple, Optional
import traceback

from app.db import DBSession, with_session
Expand All @@ -26,60 +26,14 @@
get_schema_by_name,
get_table_by_schema_id_and_name,
)
from logic.tag import create_table_tags

from .metastore_data_types import DataTable, DataColumn
from .utils import MetastoreTableACLChecker

LOG = get_logger(__name__)


class DataSchema(NamedTuple):
name: str


class DataTable(NamedTuple):
name: str

# The type of table, it can be an arbitrary string
type: str = None
owner: str = None

# description from metastore, expect HTML format
description: str = None

# Expected in UTC seconds
table_created_at: int = None
table_updated_at: int = None
table_updated_by: str = None

# size of table
data_size_bytes: int = None
# Location of the raw file
location: str = None

# Json arrays of partitions
partitions: List = []

# Store the raw info here
raw_description: str = None

# Arrays of partition keys
partition_keys: List[str] = []

# Custom properties
custom_properties: dict[str, str] = None


class DataColumn(NamedTuple):
name: str
type: str

# column comment from sql query when creating the table
comment: str = None

# user edited description from metastore, expect HTML format
description: str = None


class BaseMetastoreLoader(metaclass=ABCMeta):
loader_config: MetastoreLoaderConfig = MetastoreLoaderConfig({})

Expand Down Expand Up @@ -384,6 +338,15 @@ def _create_table_table(
commit=False,
session=session,
)

# create tags if the metastore is configured to sync tags
if self.loader_config.can_load_external_metadata(MetadataType.TAG):
create_table_tags(
table_id=table_id,
tags=table.tags,
commit=False,
session=session,
)
session.commit()
update_table_by_id(table_id, session=session)
return table_id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

from clients.glue_client import GlueDataCatalogClient
from lib.form import StructFormField, FormField
from lib.metastore.base_metastore_loader import (
BaseMetastoreLoader,
from lib.metastore.base_metastore_loader import BaseMetastoreLoader
from lib.metastore.metastore_data_types import (
DataTable,
DataColumn,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from hmsclient.genthrift.hive_metastore.ttypes import NoSuchObjectException

from clients.hms_client import HiveMetastoreClient
from lib.metastore.base_metastore_loader import (
BaseMetastoreLoader,
from lib.metastore.base_metastore_loader import BaseMetastoreLoader
from lib.metastore.metastore_data_types import (
DataTable,
DataColumn,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

from lib.utils.utils import DATETIME_TO_UTC
from lib.utils import json as ujson
from lib.metastore.base_metastore_loader import DataTable, DataColumn
from lib.metastore.metastore_data_types import (
DataTable,
DataColumn,
)
from .sqlalchemy_metastore_loader import SqlAlchemyMetastoreLoader


Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Dict, List, Tuple

from lib.metastore.base_metastore_loader import (
BaseMetastoreLoader,
from lib.metastore.base_metastore_loader import BaseMetastoreLoader
from lib.metastore.metastore_data_types import (
DataTable,
DataColumn,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
from clients.hms_client import HiveMetastoreClient
from lib.metastore.loaders.hive_metastore_loader import HMSMetastoreLoader
from lib.metastore.loaders.form_fileds import load_partitions_field
from lib.metastore.base_metastore_loader import DataTable, DataColumn
from lib.metastore.metastore_data_types import (
DataTable,
DataColumn,
)


class HMSThriftMetastoreLoader(HMSMetastoreLoader):
Expand Down
60 changes: 60 additions & 0 deletions querybook/server/lib/metastore/metastore_data_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from typing import NamedTuple, List


class DataSchema(NamedTuple):
name: str


class DataTag(NamedTuple):
name: str
# below properties will be stored in tag.meta
type: str = None
description: str = None
color: str = None


class DataTable(NamedTuple):
name: str

# The type of table, it can be an arbitrary string
type: str = None
owner: str = None

# description from metastore, expect HTML format
description: str = None

# list of tags
tags: List[DataTag] = []

# Expected in UTC seconds
table_created_at: int = None
table_updated_at: int = None
table_updated_by: str = None

# size of table
data_size_bytes: int = None
# Location of the raw file
location: str = None

# Json arrays of partitions
partitions: List = []

# Store the raw info here
raw_description: str = None

# Arrays of partition keys
partition_keys: List[str] = []

# Custom properties
custom_properties: dict[str, str] = None


class DataColumn(NamedTuple):
name: str
type: str

# column comment from sql query when creating the table
comment: str = None

# user edited description from metastore, expect HTML format
description: str = None
45 changes: 42 additions & 3 deletions querybook/server/logic/tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from app.db import with_session
from models.tag import Tag, TagItem
from logic.metastore import update_es_tables_by_id
from lib.metastore.metastore_data_types import DataTag


@with_session
Expand All @@ -28,19 +29,19 @@ def get_tags_by_keyword(keyword, limit=10, session=None):


@with_session
def create_or_update_tag(tag_name, commit=True, session=None):
def create_or_update_tag(tag_name, meta={}, commit=True, session=None):
tag = Tag.get(name=tag_name, session=session)

if not tag:
tag = Tag.create(
{"name": tag_name, "count": 1, "meta": {}},
{"name": tag_name, "count": 1, "meta": meta},
commit=commit,
session=session,
)
else:
tag = Tag.update(
id=tag.id,
fields={"count": tag.count + 1},
fields={"count": tag.count + 1, "meta": meta},
skip_if_value_none=True,
commit=commit,
session=session,
Expand Down Expand Up @@ -89,3 +90,41 @@ def delete_tag_from_table(
update_es_tables_by_id(tag_item.table_id)
else:
session.flush()


@with_session
def create_table_tags(
table_id: int = None,
tags: list[DataTag] = [],
commit=True,
session=None,
):
"""This function is used for loading tags from metastore."""
# delete all tags from the table
session.query(TagItem).filter_by(table_id=table_id).delete()

for tag in tags:
meta = {
"type": tag.type,
"tooltip": tag.description,
"color": tag.color,
"admin": True,
}
# filter out properties with none values
meta = {k: v for k, v in meta.items() if v is not None}

# update or create a new tag if not exist
create_or_update_tag(
tag_name=tag.name, meta=meta, commit=commit, session=session
)

# add a new tag_item to associate with the table
TagItem.create(
{"tag_name": tag.name, "table_id": table_id, "uid": None},
session=session,
)

if commit:
session.commit()
else:
session.flush()
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
from datetime import datetime

from lib.metastore.loaders.glue_data_catalog_loader import GlueDataCatalogLoader
from lib.metastore.base_metastore_loader import DataColumn, DataTable
from lib.metastore.metastore_data_types import (
DataTable,
DataColumn,
)

moto_import_failed = False
try:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
.DataTableTags {
flex-wrap: wrap;

.Tag {
.TableTag {
margin: 4px 12px 4px 0px;
cursor: pointer;
}
Expand Down
20 changes: 6 additions & 14 deletions querybook/webapp/components/DataTableTags/DataTableTags.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ export const TableTag: React.FC<{
);

return (
<>
<div className="TableTag">
{canUserUpdate && (
<ContextMenu
anchorRef={tagRef}
Expand All @@ -138,28 +138,20 @@ export const TableTag: React.FC<{
onHide={() => setShowConfigModal(false)}
/>
)}

<HoverIconTag
key={tag.id}
name={tag.name}
type={tagMeta.type}
icon={tagMeta.icon}
iconOnHover={canUserDelete ? 'X' : null}
onIconHoverClick={canUserDelete ? handleDeleteTag : null}
tooltip={tagMeta.tooltip}
tooltipPos={'up'}
color={tagMeta.color}
onClick={handleTagClick}
ref={tagRef}
withBorder={tagMeta.admin}
mini={mini}
>
{tagMeta.icon && (
<Icon
name={tagMeta.icon as any}
size={16}
className="mr4"
/>
)}
<span>{tag.name}</span>
</HoverIconTag>
</>
/>
</div>
);
};
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ export const TableTagConfigModal: React.FC<{
{({ submitForm }) => (
<div className="ph12">
<FormWrapper minLabelWidth="120px">
<SimpleField
name="type"
type="input"
help="The type of the tag. E.g. table domain"
/>
<SimpleField name="tooltip" type="input" />
<SimpleField
name="color"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,10 @@ export const TableTagGroupSelect: React.FC<{
{tags.map((tag) => (
<HoverIconTag
key={tag}
name={tag}
iconOnHover={'X'}
onIconHoverClick={() => handleTagRemove(tag)}
>
<span>{tag}</span>
</HoverIconTag>
/>
))}
</div>
) : null;
Expand Down
Loading

0 comments on commit 1308ea7

Please sign in to comment.