-
Notifications
You must be signed in to change notification settings - Fork 59
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge main into person/miroslavi/introduce-data-job-deployment-entity
- Loading branch information
Showing
10 changed files
with
519 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# Copyright 2021-2023 VMware, Inc. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
image: "python:3.7" | ||
|
||
.build-vdk-duckdb: | ||
variables: | ||
PLUGIN_NAME: vdk-duckdb | ||
extends: .build-plugin | ||
|
||
build-py37-vdk-duckdb: | ||
extends: .build-vdk-duckdb | ||
image: "python:3.7" | ||
|
||
build-py311-vdk-duckdb: | ||
extends: .build-vdk-duckdb | ||
image: "python:3.11" | ||
|
||
release-vdk-duckdb: | ||
variables: | ||
PLUGIN_NAME: vdk-duckdb | ||
extends: .release-plugin |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# duckdb | ||
|
||
Simple description of my project. | ||
|
||
TODO: what the project is about, what is its purpose | ||
|
||
|
||
## Usage | ||
|
||
``` | ||
pip install vdk-duckdb | ||
``` | ||
|
||
### Configuration | ||
|
||
(`vdk config-help` is useful command to browse all config options of your installation of vdk) | ||
|
||
| Name | Description | (example) Value | | ||
|---|---|---| | ||
| dummy_config_key | Dummy configuration | "Dummy" | | ||
|
||
### Example | ||
|
||
TODO | ||
|
||
### Build and testing | ||
|
||
``` | ||
pip install -r requirements.txt | ||
pip install -e . | ||
pytest | ||
``` | ||
|
||
In VDK repo [../build-plugin.sh](https://github.com/vmware/versatile-data-kit/tree/main/projects/vdk-plugins/build-plugin.sh) script can be used also. | ||
|
||
|
||
#### Note about the CICD: | ||
|
||
.plugin-ci.yaml is needed only for plugins part of [Versatile Data Kit Plugin repo](https://github.com/vmware/versatile-data-kit/tree/main/projects/vdk-plugins). | ||
|
||
The CI/CD is separated in two stages, a build stage and a release stage. | ||
The build stage is made up of a few jobs, all which inherit from the same | ||
job configuration and only differ in the Python version they use (3.7, 3.8, 3.9 and 3.10). | ||
They run according to rules, which are ordered in a way such that changes to a | ||
plugin's directory trigger the plugin CI, but changes to a different plugin does not. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# this file is used to provide testing requirements | ||
# for requirements (dependencies) needed during and after installation of the plugin see (and update) setup.py install_requires section | ||
|
||
click | ||
duckdb | ||
pytest | ||
|
||
pytest | ||
pytest-cov | ||
|
||
vdk-core | ||
vdk-test-utils |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# Copyright 2021-2023 VMware, Inc. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
import pathlib | ||
|
||
import setuptools | ||
|
||
""" | ||
Builds a package with the help of setuptools in order for this package to be imported in other projects | ||
""" | ||
|
||
__version__ = "0.1.0" | ||
|
||
setuptools.setup( | ||
name="vdk-duckdb", | ||
version=__version__, | ||
url="https://github.com/vmware/versatile-data-kit", | ||
description="DuckDB Plugin for VDK.", | ||
long_description=pathlib.Path("README.md").read_text(), | ||
long_description_content_type="text/markdown", | ||
install_requires=["vdk-core", "tabulate"], | ||
package_dir={"": "src"}, | ||
packages=setuptools.find_namespace_packages(where="src"), | ||
# This is the only vdk plugin specific part | ||
# Define entry point called "vdk.plugin.run" with name of plugin and module to act as entry point. | ||
entry_points={"vdk.plugin.run": ["vdk-duckdb = vdk.plugin.duckdb.duckdb_plugin"]}, | ||
classifiers=[ | ||
"Development Status :: 2 - Pre-Alpha", | ||
"License :: OSI Approved :: Apache Software License", | ||
"Programming Language :: Python :: 3.7", | ||
"Programming Language :: Python :: 3.8", | ||
"Programming Language :: Python :: 3.9", | ||
"Programming Language :: Python :: 3.10", | ||
"Programming Language :: Python :: 3.11", | ||
], | ||
project_urls={ | ||
"Documentation": "https://github.com/vmware/versatile-data-kit/tree/main/projects/vdk-plugins/vdk-duckdb", | ||
"Source Code": "https://github.com/vmware/versatile-data-kit/tree/main/projects/vdk-plugins/vdk-duckdb", | ||
"Bug Tracker": "https://github.com/vmware/versatile-data-kit/issues/new/choose", | ||
}, | ||
) |
38 changes: 38 additions & 0 deletions
38
projects/vdk-plugins/vdk-duckdb/src/vdk/plugin/duckdb/duckdb_configuration.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# Copyright 2021-2023 VMware, Inc. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
import pathlib | ||
import tempfile | ||
|
||
from vdk.internal.core.config import Configuration | ||
from vdk.internal.core.config import ConfigurationBuilder | ||
|
||
DUCKDB_FILE = "DUCKDB_FILE" | ||
DUCKDB_INGEST_AUTO_CREATE_TABLE_ENABLED = "DUCKDB_INGEST_AUTO_CREATE_TABLE_ENABLED" | ||
|
||
|
||
class DuckDBConfiguration: | ||
def __init__(self, configuration: Configuration): | ||
self.__config = configuration | ||
|
||
def get_auto_create_table_enabled(self) -> bool: | ||
return self.__config.get_value(DUCKDB_INGEST_AUTO_CREATE_TABLE_ENABLED) | ||
|
||
def get_duckdb_file(self): | ||
duckdb_file_path = self.__config.get_value(DUCKDB_FILE) or "default_path.duckdb" | ||
return pathlib.Path(duckdb_file_path) | ||
|
||
|
||
def add_definitions(config_builder: ConfigurationBuilder): | ||
config_builder.add( | ||
key=DUCKDB_FILE, | ||
default_value=str( | ||
pathlib.Path(tempfile.gettempdir()).joinpath("vdk-duckdb.db") | ||
), | ||
description="The file of the DuckDB database.", | ||
) | ||
config_builder.add( | ||
key=DUCKDB_INGEST_AUTO_CREATE_TABLE_ENABLED, | ||
default_value=True, | ||
description="If set to true, auto create table if it does not exist during ingestion." | ||
"This is only applicable when ingesting data into DuckDB (ingest method is DuckDB).", | ||
) |
37 changes: 37 additions & 0 deletions
37
projects/vdk-plugins/vdk-duckdb/src/vdk/plugin/duckdb/duckdb_connection.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# Copyright 2021-2023 VMware, Inc. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
import logging | ||
import pathlib | ||
import tempfile | ||
from typing import List | ||
|
||
import duckdb | ||
from vdk.internal.util.decorators import closing_noexcept_on_close | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
|
||
class DuckDBConnection: | ||
""" | ||
Create file based DuckDB database. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
duckdb_file: pathlib.Path = pathlib.Path(tempfile.gettempdir()).joinpath( | ||
"vdk-duckdb.db" | ||
), | ||
): | ||
self.__db_file = duckdb_file | ||
|
||
def new_connection(self): | ||
log.info( | ||
f"Creating new connection against local file database located at: {self.__db_file}" | ||
) | ||
return duckdb.connect(f"{self.__db_file}") | ||
|
||
def execute_query(self, query: str) -> List[List]: | ||
conn = self.new_connection() | ||
with closing_noexcept_on_close(conn.cursor()) as cursor: | ||
cursor.execute(query) | ||
return cursor.fetchall() |
61 changes: 61 additions & 0 deletions
61
projects/vdk-plugins/vdk-duckdb/src/vdk/plugin/duckdb/duckdb_plugin.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
# Copyright 2021-2023 VMware, Inc. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
import logging | ||
import pathlib | ||
|
||
import click | ||
import duckdb | ||
from tabulate import tabulate | ||
from vdk.api.plugin.hook_markers import hookimpl | ||
from vdk.internal.builtin_plugins.run.job_context import JobContext | ||
from vdk.internal.core.config import ConfigurationBuilder | ||
from vdk.internal.util.decorators import closing_noexcept_on_close | ||
|
||
log = logging.getLogger(__name__) | ||
""" | ||
Include the plugins implementation. For example: | ||
""" | ||
|
||
|
||
@hookimpl | ||
def vdk_configure(config_builder: ConfigurationBuilder) -> None: | ||
"""Define the configuration settings needed for duckdb""" | ||
config_builder.add("DUCKDB_FILE", default_value="mydb.duckdb") | ||
|
||
|
||
@hookimpl | ||
def initialize_job(context: JobContext) -> None: | ||
conf = context.core_context.configuration | ||
duckdb_file = conf.get_value("DUCKDB_FILE") | ||
|
||
context.connections.add_open_connection_factory_method( | ||
"DUCKDB", lambda: duckdb.connect(database=duckdb_file) | ||
) | ||
|
||
|
||
@click.command( | ||
name="duckdb-query", help="Execute a DuckDB query against a local DUCKDB database." | ||
) | ||
@click.option("-q", "--query", type=click.STRING, required=True) | ||
@click.pass_context | ||
def duckdb_query(ctx: click.Context, query): | ||
conf = ctx.obj.configuration | ||
duckdb_file = conf.get_value("DUCKDB_FILE") | ||
conn = duckdb.connect(database=duckdb_file) | ||
|
||
with closing_noexcept_on_close(conn.cursor()) as cursor: | ||
cursor.execute(query) | ||
column_names = ( | ||
[column_info[0] for column_info in cursor.description] | ||
if cursor.description | ||
else () # same as the default value for the headers parameters of the tabulate function | ||
) | ||
res = cursor.fetchall() | ||
click.echo(tabulate(res, headers=column_names)) | ||
|
||
|
||
@hookimpl | ||
def vdk_command_line(root_command: click.Group): | ||
"""Here we extend the vdk with a new command called "duckdb-query" | ||
enabling users to execute""" | ||
root_command.add_command(duckdb_query) |
Oops, something went wrong.