Skip to content

Commit

Permalink
vdk-core: add vdk sql-query command
Browse files Browse the repository at this point in the history
Add vdk sql-query command which will use the same database as the rest
of the SQL interfaces (SQL steps, job_input methods)

This will replace the per-db commends like vdk impala-query, vdk
sqlite-query which are just problematic.

This would enable to generalize a lot of the tutorials/examples we have
which currently give impression they can be used only wiht "sqlite"
(sqlite-query) or trino. Which is primary driver for doing the change
now.

The command would be used in functional tests where you want to query
the default db without needing ot create a job for that.

The command for now is hidden (hidden feature) since there are a few
TODOs that would be resolved in subsequent PRs.
  • Loading branch information
antoniivanov committed Aug 2, 2023
1 parent 27ea3ea commit 2e0f4e6
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
from vdk.internal.builtin_plugins.connection.connection_plugin import (
QueryDecoratorPlugin,
)
from vdk.internal.builtin_plugins.connection.query_command_plugin import (
QueryCommandPlugin,
)
from vdk.internal.builtin_plugins.debug.debug import DebugPlugins
from vdk.internal.builtin_plugins.ingestion.ingester_configuration_plugin import (
IngesterConfigurationPlugin,
Expand Down Expand Up @@ -130,6 +133,7 @@ def vdk_start(plugin_registry: PluginRegistry, command_line_args: List) -> None:
# connection plugins
plugin_registry.add_hook_specs(ConnectionHookSpec)
plugin_registry.load_plugin_with_hooks_impl(QueryDecoratorPlugin())
plugin_registry.load_plugin_with_hooks_impl(QueryCommandPlugin())


@hookimpl(tryfirst=True)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# Copyright 2021-2023 VMware, Inc.
# SPDX-License-Identifier: Apache-2.0
import json
import os
from pathlib import Path

import click
from vdk.api.plugin.hook_markers import hookimpl
from vdk.internal.builtin_plugins.connection.managed_connection_base import (
ManagedConnectionBase,
)
from vdk.internal.builtin_plugins.run.standalone_data_job import (
StandaloneDataJobFactory,
)
from vdk.internal.util.decorators import closing_noexcept_on_close

OUTPUT_OPTIONS = ["json", "text"]

# The command is hidden as it's still experimental.
# TODOs:
# Remove logs by default only show output (and perhaps warning logs)
# When logging omit "job details" logs
# Make clear how it is authenticated. Currently


@click.command(
name="sql-query",
hidden=True, # Hidden as it is experimental
help="executes SQL query against configured database. "
"The database used is configured with 'db_default_type' option. "
"See vdk config-help for more info. ",
)
@click.option(
"-q", "--query", type=click.STRING, required=True, help="The query to be executed"
)
@click.option(
"-o",
"--output",
type=click.Choice(OUTPUT_OPTIONS, False),
default="text",
help=f"Output format. It can be one of: {OUTPUT_OPTIONS}.",
)
@click.pass_context
def sql_query(ctx: click.Context, query: str, output: str) -> None:
# TODO: os.getcwd() is passed to StandaloneDataJob, this means that if the current directory is same as data job
# it would authenticate using the data job credentials (if there are any like keytab)
# Is that a good idea?

with StandaloneDataJobFactory.create(
data_job_directory=Path(os.getcwd())
) as job_input:
conn: ManagedConnectionBase = job_input.get_managed_connection()
with closing_noexcept_on_close(conn.cursor()) as cursor:
cursor.execute(query)
column_names = (
[column_info[0] for column_info in cursor.description]
if cursor.description
else () # same as the default value for the headers parameter of the tabulate function
)
# TODO: this is basically emulating
# https://github.com/vmware/versatile-data-kit/blob/main/projects/vdk-control-cli/src/vdk/internal/control/utils/output_printer.py
# It will be best printer functiality to be reusable (either moved to vdk-core or in a library/plugin
result = cursor.fetchall()
if output.lower() == "json":
result = [
{column_names[i]: v for i, v in enumerate(row)} for row in result
]
click.echo(json.dumps(result))
elif output.lower() == "text":
from tabulate import tabulate

click.echo(tabulate(result, headers=column_names))
else:
raise ValueError(
f"Unsupported output format. Choose between: {OUTPUT_OPTIONS}"
)


class QueryCommandPlugin:
@staticmethod
@hookimpl
def vdk_command_line(root_command: click.Group) -> None:
root_command.add_command(sql_query)
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Copyright 2021-2023 VMware, Inc.
# SPDX-License-Identifier: Apache-2.0
import json
import os
from unittest import mock

import py
from click.testing import CliRunner
from click.testing import Result
from vdk.plugin.test_utils.util_funcs import cli_assert_equal
from vdk.plugin.test_utils.util_funcs import CliEntryBasedTestRunner

VDK_DB_DEFAULT_TYPE = "VDK_DB_DEFAULT_TYPE"


def test_execute_sql(tmpdir: py.path.local):
with mock.patch.dict(
os.environ,
{
VDK_DB_DEFAULT_TYPE: "sqlite",
"SQLITE_FILE": os.path.join(str(tmpdir), "sqlite-tmp"),
},
):
runner = CliEntryBasedTestRunner()

result: Result = runner.invoke(
[
"sql-query",
"--query",
"CREATE TABLE cli_stocks (date text, symbol text, price real)",
]
)
cli_assert_equal(0, result)

result: Result = runner.invoke(
[
"sql-query",
"--query",
"""
INSERT INTO cli_stocks VALUES ('2020-01-01', 'GOOG', 123.0), ('2020-01-01', 'GOOG', 123.0)
""",
]
)
cli_assert_equal(0, result)

result: Result = runner.invoke(
["sql-query", "--query", "select * from cli_stocks"],
runner=CliRunner(
mix_stderr=False
), # TODO: replace when CliEntryBasedTestRunner add support for it
)
cli_assert_equal(0, result)
expected_stdout = (
"date symbol price\n"
"---------- -------- -------\n"
"2020-01-01 GOOG 123\n"
"2020-01-01 GOOG 123\n"
)
assert expected_stdout == result.stdout

result: Result = runner.invoke(
["sql-query", "--output", "json", "--query", "select * from cli_stocks"],
runner=CliRunner(
mix_stderr=False
), # TODO: replace when CliEntryBasedTestRunner add support for it
)
cli_assert_equal(0, result)
expected_stdout_json = [
{"date": "2020-01-01", "symbol": "GOOG", "price": 123.0},
{"date": "2020-01-01", "symbol": "GOOG", "price": 123.0},
]
assert expected_stdout_json == json.loads(result.stdout)

0 comments on commit 2e0f4e6

Please sign in to comment.