From 62f7cc4f86c8deaafd53232e30bfbbc3c78469de Mon Sep 17 00:00:00 2001 From: "J.C. Zhong" Date: Tue, 28 Nov 2023 22:28:02 +0000 Subject: [PATCH] add break change & bump version --- .../docs/changelog/breaking_change.md | 9 ++++++ .../docs/integrations/add_table_upload.md | 8 ++--- .../lib/table_upload/exporter/s3_exporter.py | 30 +++++++++---------- .../TableUploader/TableUploaderForm.tsx | 6 +++- 4 files changed, 32 insertions(+), 21 deletions(-) diff --git a/docs_website/docs/changelog/breaking_change.md b/docs_website/docs/changelog/breaking_change.md index e42ef7e1e..feebd722c 100644 --- a/docs_website/docs/changelog/breaking_change.md +++ b/docs_website/docs/changelog/breaking_change.md @@ -7,12 +7,21 @@ slug: /changelog Here are the list of breaking changes that you should be aware of when updating Querybook: +## v3.29.0 + +Made below changes for `S3BaseExporter` (csv table uploader feature): + +- Both `s3_path` and `use_schema_location` are optional now +- If none is provided, or `use_schema_location=False`, the table will be created as managed table, whose location will be determined by the query engine. + ## v3.27.0 + Updated properties of `QueryValidationResult` object. `line` and `ch` are replaced with `start_line` and `start_ch` respectively. ## v3.22.0 Updated the charset of table `data_element` to `utf8mb4`. For those mysql db's default charset is not utf8, please run below sql to update it if needed. + ```sql ALTER TABLE data_element CONVERT TO CHARACTER SET utf8mb4 ``` diff --git a/docs_website/docs/integrations/add_table_upload.md b/docs_website/docs/integrations/add_table_upload.md index 07411f621..56dfa2716 100644 --- a/docs_website/docs/integrations/add_table_upload.md +++ b/docs_website/docs/integrations/add_table_upload.md @@ -42,10 +42,8 @@ Included by default: No Available options: -Either s3_path or use_schema_location must be supplied. - -- s3_path (str): if supplied, will use it as the root path for upload. Must be the full s3 path like s3://bucket/key, the trailing / is optional. -- use_schema_location (boolean): +- s3_path (str, optional): if supplied, will use it as the root path for upload. Must be the full s3 path like s3://bucket/key, the trailing / is optional. +- use_schema_location (boolean, optional): if true, the upload root path is inferred by locationUri specified by the schema/database in HMS. To use this option, the engine must be connected to a metastore that uses HMSMetastoreLoader (or its derived class). if false, it will be created as managed table, whose location will be determined automatically by the query engine. @@ -53,6 +51,8 @@ Either s3_path or use_schema_location must be supplied. Checkout here for examples in SparkSQL: https://spark.apache.org/docs/latest/sql-ref-syntax-ddl-create-table-hiveformat.html#examples For Trino/Presto, it would be the WITH statement: https://trino.io/docs/current/sql/create-table.html +If neither s3_path nor use_schema_location is supplied, it will be treated same as `use_schema_location=False``, and it will be created as managed table. + ### S3 Parquet exporter This would upload a Parquet file instead of a CSV file. In addition to dependencies such as boto3, `pyarrow` must also be installed. diff --git a/querybook/server/lib/table_upload/exporter/s3_exporter.py b/querybook/server/lib/table_upload/exporter/s3_exporter.py index e59c4ae96..a81e0d506 100644 --- a/querybook/server/lib/table_upload/exporter/s3_exporter.py +++ b/querybook/server/lib/table_upload/exporter/s3_exporter.py @@ -36,17 +36,13 @@ - table_properties (List[str]): list of table properties passed, this must be query engine specific. Checkout here for examples in SparkSQL: https://spark.apache.org/docs/latest/sql-ref-syntax-ddl-create-table-hiveformat.html#examples For Trino/Presto, it would be the WITH statement: https://trino.io/docs/current/sql/create-table.html + + If neither s3_path nor use_schema_location is provided, it will be treated same as `use_schema_location=False``, + and it will be created as managed table. """ class S3BaseExporter(BaseTableUploadExporter): - def __init__(self, exporter_config: dict = {}): - if ("s3_path" not in exporter_config) and ( - "use_schema_location" not in exporter_config - ): - raise Exception("Either s3_path or use_schema_location must be specified") - super().__init__(exporter_config) - @abstractmethod def UPLOAD_FILE_TYPE(cls) -> str: """Override this to specify what kind of file is getting uploaded @@ -85,7 +81,7 @@ def destination_s3_folder(self, session=None) -> str: metastore = get_metastore_loader(query_engine.metastore_id, session=session) if metastore is None: - raise Exception("Invalid metastore") + raise Exception("Invalid metastore for table upload") if self._exporter_config.get("use_schema_location", False): schema_location_uri = metastore.get_schema_location(schema_name) @@ -96,12 +92,12 @@ def destination_s3_folder(self, session=None) -> str: # Use its actual location for managed tables table_location = metastore.get_table_location(schema_name, table_name) - if table_location: - return sanitize_s3_url(table_location) - raise Exception( - "Cant get the table location from metastore. Please make sure the query engine supports managed table with default location." - ) + if not table_location: + raise Exception( + "Cant get the table location from metastore. Please make sure the query engine supports managed table with default location." + ) + return sanitize_s3_url(table_location) @with_session def _handle_if_table_exists(self, session=None): @@ -125,14 +121,16 @@ def _handle_if_table_exists(self, session=None): def _get_table_create_query(self, session=None) -> str: query_engine = get_query_engine_by_id(self._engine_id, session=session) schema_name, table_name = self._fq_table_name - is_managed = self._exporter_config.get("use_schema_location") is False + is_external = "s3_path" in self._exporter_config or self._exporter_config.get( + "use_schema_location" + ) return get_create_table_statement( language=query_engine.language, table_name=table_name, schema_name=schema_name, column_name_types=self._table_config["column_name_types"], - # table location is not needed for managed (non external) table creation - file_location=None if is_managed else self.destination_s3_folder(), + # table location is only needed for external (non managed) table creation + file_location=self.destination_s3_folder() if is_external else None, file_format=self.UPLOAD_FILE_TYPE(), table_properties=self._exporter_config.get("table_properties", []), ) diff --git a/querybook/webapp/components/TableUploader/TableUploaderForm.tsx b/querybook/webapp/components/TableUploader/TableUploaderForm.tsx index 9927b5196..e6e2c5a75 100644 --- a/querybook/webapp/components/TableUploader/TableUploaderForm.tsx +++ b/querybook/webapp/components/TableUploader/TableUploaderForm.tsx @@ -78,9 +78,13 @@ export const TableUploaderForm: React.FC = ({ }); // sometimes there will be sync delay between the metastore and querybook - // skip the redirect if the table has not been synced over. + // skip the redirection if the table has not been synced over. if (tableId) { navigateWithinEnv(`/table/${tableId}`); + } else { + toast( + 'Waiting for the table to be synced over from the metastore.' + ); } onHide(); },