Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vdk-core: add vdk sql-query command #2512

Merged
merged 4 commits into from
Aug 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions projects/vdk-core/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ pytest-randomly
pytest-sugar
reorder-python-imports
smtpdfix
vdk-sqlite
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
from vdk.internal.builtin_plugins.connection.connection_plugin import (
QueryDecoratorPlugin,
)
from vdk.internal.builtin_plugins.connection.query_command_plugin import (
QueryCommandPlugin,
)
from vdk.internal.builtin_plugins.debug.debug import DebugPlugins
from vdk.internal.builtin_plugins.ingestion.ingester_configuration_plugin import (
IngesterConfigurationPlugin,
Expand Down Expand Up @@ -130,6 +133,7 @@ def vdk_start(plugin_registry: PluginRegistry, command_line_args: List) -> None:
# connection plugins
plugin_registry.add_hook_specs(ConnectionHookSpec)
plugin_registry.load_plugin_with_hooks_impl(QueryDecoratorPlugin())
plugin_registry.load_plugin_with_hooks_impl(QueryCommandPlugin())


@hookimpl(tryfirst=True)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# Copyright 2021-2023 VMware, Inc.
# SPDX-License-Identifier: Apache-2.0
import json
import os
from pathlib import Path

import click
from vdk.api.plugin.hook_markers import hookimpl
from vdk.internal.builtin_plugins.connection.managed_connection_base import (
ManagedConnectionBase,
)
from vdk.internal.builtin_plugins.run.standalone_data_job import (
StandaloneDataJobFactory,
)
from vdk.internal.util.decorators import closing_noexcept_on_close

OUTPUT_OPTIONS = ["json", "text"]

# The command is hidden as it's still experimental.
# TODOs:
# Remove logs by default only show output (and perhaps warning logs)
# When logging omit "job details" logs
# Make clear how it is authenticated.


@click.command(
name="sql-query",
hidden=True, # Hidden as it is experimental
help="executes SQL query against configured database. "
"The database used is configured with 'db_default_type' option. "
"See vdk config-help for more info. ",
)
@click.option(
"-q", "--query", type=click.STRING, required=True, help="The query to be executed"
)
@click.option(
"-o",
"--output",
type=click.Choice(OUTPUT_OPTIONS, False),
default="text",
help=f"Output format. It can be one of: {OUTPUT_OPTIONS}.",
)
@click.pass_context
def sql_query(ctx: click.Context, query: str, output: str) -> None:
# TODO: os.getcwd() is passed to StandaloneDataJob, this means that if the current directory is same as data job
# it would authenticate using the data job credentials (if there are any like keytab)
# Is that a good idea?

with StandaloneDataJobFactory.create(
data_job_directory=Path(os.getcwd())
) as job_input:
conn: ManagedConnectionBase = job_input.get_managed_connection()
with closing_noexcept_on_close(conn.cursor()) as cursor:
cursor.execute(query)
column_names = (
[column_info[0] for column_info in cursor.description]
if cursor.description
else () # same as the default value for the headers parameter of the tabulate function
)
# TODO: this is basically emulating
# https://github.com/vmware/versatile-data-kit/blob/main/projects/vdk-control-cli/src/vdk/internal/control/utils/output_printer.py
# It will be best printer functionality to be reusable (either moved to vdk-core or in a library/plugin)
result = cursor.fetchall()
if output.lower() == "json":
result = [
{column_names[i]: v for i, v in enumerate(row)} for row in result
]
click.echo(json.dumps(result))
elif output.lower() == "text":
from tabulate import tabulate

click.echo(tabulate(result, headers=column_names))
else:
raise ValueError(
f"Unsupported output format. Choose between: {OUTPUT_OPTIONS}"
)


class QueryCommandPlugin:
@staticmethod
@hookimpl
def vdk_command_line(root_command: click.Group) -> None:
root_command.add_command(sql_query)
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Copyright 2021-2023 VMware, Inc.
# SPDX-License-Identifier: Apache-2.0
import json
import os
from unittest import mock

import py
from click.testing import CliRunner
from click.testing import Result
from vdk.plugin.test_utils.util_funcs import cli_assert_equal
from vdk.plugin.test_utils.util_funcs import CliEntryBasedTestRunner

VDK_DB_DEFAULT_TYPE = "VDK_DB_DEFAULT_TYPE"


def test_execute_sql(tmpdir: py.path.local):
with mock.patch.dict(
os.environ,
{
VDK_DB_DEFAULT_TYPE: "sqlite",
"SQLITE_FILE": os.path.join(str(tmpdir), "sqlite-tmp"),
},
):
runner = CliEntryBasedTestRunner()

result: Result = runner.invoke(
[
"sql-query",
"--query",
"CREATE TABLE cli_stocks (date text, symbol text, price real)",
]
)
cli_assert_equal(0, result)

result: Result = runner.invoke(
[
"sql-query",
"--query",
"""
INSERT INTO cli_stocks VALUES ('2020-01-01', 'GOOG', 123.0), ('2020-01-01', 'GOOG', 123.0)
""",
]
)
cli_assert_equal(0, result)

result: Result = runner.invoke(
["sql-query", "--query", "select * from cli_stocks"],
runner=CliRunner(
mix_stderr=False
), # TODO: replace when CliEntryBasedTestRunner add support for it
)
cli_assert_equal(0, result)
expected_stdout = (
"date symbol price\n"
"---------- -------- -------\n"
"2020-01-01 GOOG 123\n"
"2020-01-01 GOOG 123\n"
)
assert expected_stdout == result.stdout

result: Result = runner.invoke(
["sql-query", "--output", "json", "--query", "select * from cli_stocks"],
runner=CliRunner(
mix_stderr=False
), # TODO: replace when CliEntryBasedTestRunner add support for it
)
cli_assert_equal(0, result)
expected_stdout_json = [
{"date": "2020-01-01", "symbol": "GOOG", "price": 123.0},
{"date": "2020-01-01", "symbol": "GOOG", "price": 123.0},
]
assert expected_stdout_json == json.loads(result.stdout)