-
Notifications
You must be signed in to change notification settings - Fork 59
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add vdk sql-query command which will use the same database as the rest of the SQL interfaces (SQL steps, job_input methods) This will replace the per-db commends like vdk impala-query, vdk sqlite-query which are just problematic. This would enable to generalize a lot of the tutorials/examples we have which currently give impression they can be used only wiht "sqlite" (sqlite-query) or trino. Which is primary driver for doing the change now. The command would be used in functional tests where you want to query the default db without needing ot create a job for that. The command for now is hidden (hidden feature) since there are a few TODOs that would be resolved in subsequent PRs.
- Loading branch information
1 parent
27ea3ea
commit aef3b9e
Showing
3 changed files
with
160 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
84 changes: 84 additions & 0 deletions
84
projects/vdk-core/src/vdk/internal/builtin_plugins/connection/query_command_plugin.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
# Copyright 2021-2023 VMware, Inc. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
import json | ||
import os | ||
import pathlib | ||
from pathlib import Path | ||
|
||
import click | ||
from vdk.api.plugin.hook_markers import hookimpl | ||
from vdk.internal.builtin_plugins.connection.managed_connection_base import ( | ||
ManagedConnectionBase, | ||
) | ||
from vdk.internal.builtin_plugins.run.standalone_data_job import ( | ||
StandaloneDataJobFactory, | ||
) | ||
from vdk.internal.util.decorators import closing_noexcept_on_close | ||
|
||
OUTPUT_OPTIONS = ["json", "text"] | ||
|
||
# The command is hidden as it's still experimental. | ||
# TODOs: | ||
# Remove logs by default only show output (and perhaps warning logs) | ||
# When logging omit "job details" logs | ||
# Make clear how it is authenticated. Currently | ||
|
||
|
||
@click.command( | ||
name="sql-query", | ||
hidden=True, # Hidden as it is experimental | ||
help="executes SQL query against configured database. " | ||
"The database used is configured with 'db_default_type' option. " | ||
"See vdk config-help for more info. ", | ||
) | ||
@click.option( | ||
"-q", "--query", type=click.STRING, required=True, help="The query to be executed" | ||
) | ||
@click.option( | ||
"-o", | ||
"--output", | ||
type=click.Choice(OUTPUT_OPTIONS, False), | ||
default="text", | ||
help=f"Output format. It can be one of: {OUTPUT_OPTIONS}.", | ||
) | ||
@click.pass_context | ||
def sql_query(ctx: click.Context, query: str, output: str) -> None: | ||
# TODO: os.getcwd() is passed to StandaloneDataJob, this means that if the current directory is same as data job | ||
# it would authenticate using the data job credentials (if there are any like keytab) | ||
# Is that a good idea? | ||
|
||
with StandaloneDataJobFactory.create( | ||
data_job_directory=Path(os.getcwd()) | ||
) as job_input: | ||
conn: ManagedConnectionBase = job_input.get_managed_connection() | ||
with closing_noexcept_on_close(conn.cursor()) as cursor: | ||
cursor.execute(query) | ||
column_names = ( | ||
[column_info[0] for column_info in cursor.description] | ||
if cursor.description | ||
else () # same as the default value for the headers parameter of the tabulate function | ||
) | ||
# TODO: this is basically emulating | ||
# https://github.com/vmware/versatile-data-kit/blob/main/projects/vdk-control-cli/src/vdk/internal/control/utils/output_printer.py | ||
# It will be best printer functiality to be reusable (either moved to vdk-core or in a library/plugin | ||
result = cursor.fetchall() | ||
if output.lower() == "json": | ||
result = [ | ||
{column_names[i]: v for i, v in enumerate(row)} for row in result | ||
] | ||
click.echo(json.dumps(result)) | ||
elif output.lower() == "text": | ||
from tabulate import tabulate | ||
|
||
click.echo(tabulate(result, headers=column_names)) | ||
else: | ||
raise ValueError( | ||
f"Unsupported output format. Choose between: {OUTPUT_OPTIONS}" | ||
) | ||
|
||
|
||
class QueryCommandPlugin: | ||
@staticmethod | ||
@hookimpl | ||
def vdk_command_line(root_command: click.Group) -> None: | ||
root_command.add_command(sql_query) |
72 changes: 72 additions & 0 deletions
72
projects/vdk-core/tests/functional/cli/test_query_command_plugin.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
# Copyright 2021-2023 VMware, Inc. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
import json | ||
import os | ||
from unittest import mock | ||
|
||
import py | ||
from click.testing import CliRunner | ||
from click.testing import Result | ||
from vdk.plugin.test_utils.util_funcs import cli_assert_equal | ||
from vdk.plugin.test_utils.util_funcs import CliEntryBasedTestRunner | ||
|
||
VDK_DB_DEFAULT_TYPE = "VDK_DB_DEFAULT_TYPE" | ||
|
||
|
||
def test_execute_sql(tmpdir: py.path.local): | ||
with mock.patch.dict( | ||
os.environ, | ||
{ | ||
VDK_DB_DEFAULT_TYPE: "sqlite", | ||
"SQLITE_FILE": os.path.join(str(tmpdir), "sqlite-tmp"), | ||
}, | ||
): | ||
runner = CliEntryBasedTestRunner() | ||
|
||
result: Result = runner.invoke( | ||
[ | ||
"sql-query", | ||
"--query", | ||
"CREATE TABLE cli_stocks (date text, symbol text, price real)", | ||
] | ||
) | ||
cli_assert_equal(0, result) | ||
|
||
result: Result = runner.invoke( | ||
[ | ||
"sql-query", | ||
"--query", | ||
""" | ||
INSERT INTO cli_stocks VALUES ('2020-01-01', 'GOOG', 123.0), ('2020-01-01', 'GOOG', 123.0) | ||
""", | ||
] | ||
) | ||
cli_assert_equal(0, result) | ||
|
||
result: Result = runner.invoke( | ||
["sql-query", "--query", "select * from cli_stocks"], | ||
runner=CliRunner( | ||
mix_stderr=False | ||
), # TODO: replace when CliEntryBasedTestRunner add support for it | ||
) | ||
cli_assert_equal(0, result) | ||
expected_stdout = ( | ||
"date symbol price\n" | ||
"---------- -------- -------\n" | ||
"2020-01-01 GOOG 123\n" | ||
"2020-01-01 GOOG 123\n" | ||
) | ||
assert expected_stdout == result.stdout | ||
|
||
result: Result = runner.invoke( | ||
["sql-query", "--output", "json", "--query", "select * from cli_stocks"], | ||
runner=CliRunner( | ||
mix_stderr=False | ||
), # TODO: replace when CliEntryBasedTestRunner add support for it | ||
) | ||
cli_assert_equal(0, result) | ||
expected_stdout_json = [ | ||
{"date": "2020-01-01", "symbol": "GOOG", "price": 123.0}, | ||
{"date": "2020-01-01", "symbol": "GOOG", "price": 123.0}, | ||
] | ||
assert expected_stdout_json == json.loads(result.stdout) |