-
Notifications
You must be signed in to change notification settings - Fork 59
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
vdk-singer: Singer.io plugin for data sources (#2821)
This is adding a data source plugin for singer.io. So now users can specify singer taps as data sources They can also list all singer taps that can be found with `vdk singer --list-taps` The change depends on #2805
- Loading branch information
1 parent
fe9e8fd
commit c245154
Showing
16 changed files
with
1,232 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# Copyright 2021-2023 VMware, Inc. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
image: "python:3.7" | ||
|
||
.build-vdk-singer: | ||
variables: | ||
PLUGIN_NAME: vdk-singer | ||
extends: .build-plugin | ||
|
||
build-py37-vdk-singer: | ||
extends: .build-vdk-singer | ||
image: "python:3.7" | ||
|
||
build-py311-vdk-singer: | ||
extends: .build-vdk-singer | ||
image: "python:3.11" | ||
|
||
release-vdk-singer: | ||
variables: | ||
PLUGIN_NAME: vdk-singer | ||
extends: .release-plugin |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
# singer | ||
|
||
The vdk-singer plugin provides an easy way to integrate Singer Taps as data sources within the Versatile Data Kit (VDK). | ||
This allows you to pull data from various external systems that have Singer Taps available and use them seamlessly | ||
within your VDK pipelines. | ||
|
||
|
||
|
||
|
||
## Usage | ||
|
||
``` | ||
pip install vdk-singer | ||
``` | ||
|
||
### Configuration | ||
|
||
(`vdk config-help` is useful command to browse all config options of your installation of vdk) | ||
|
||
|
||
You can configure the Singer data source via the SingerDataSourceConfiguration class. The configuration options include: | ||
|
||
* tap_name: The name of the Singer Tap you are using. | ||
* tap_config: A dictionary containing configuration specific to the Singer Tap. | ||
* tap_auto_discover_schema: A boolean to indicate whether to auto-discover the schema. | ||
|
||
```python | ||
config = SingerDataSourceConfiguration( | ||
tap_name="tap-gitlab", | ||
tap_config={ | ||
"api_url": "https://gitlab.com/api/v4", | ||
"private_token": "your_token_here", | ||
# ... other tap specific configurations | ||
}, | ||
tap_auto_discover_schema=True, | ||
|
||
``` | ||
|
||
### Example | ||
|
||
This will change as we will introduce more user frinedly way of defining sources but currently it looks like this: | ||
|
||
```python | ||
from vdk.api.job_input import IJobInput | ||
from vdk.internal.builtin_plugins.ingestion.source.factory import SingletonDataSourceFactory | ||
from vdk.plugin.singer.singer_data_source import SingerDataSourceConfiguration | ||
|
||
def run(job_input: IJobInput): | ||
data_source = SingletonDataSourceFactory().create_data_source("singer-tap") | ||
config = SingerDataSourceConfiguration( | ||
tap_name="tap-gitlab", | ||
tap_config={ | ||
"api_url": "https://gitlab.com/api/v4", | ||
"private_token": "your_token_here", | ||
# ... other configurations | ||
}, | ||
tap_auto_discover_schema=True, | ||
) | ||
data_source.connect(config, None) | ||
# ... rest of the job logic | ||
|
||
``` | ||
|
||
#### List all likely available taps | ||
|
||
```shell | ||
vdk singer --list-taps | ||
``` | ||
|
||
### Build and testing | ||
|
||
``` | ||
pip install -r requirements.txt | ||
pip install -e . | ||
pytest | ||
``` | ||
|
||
In VDK repo [../build-plugin.sh](https://github.com/vmware/versatile-data-kit/tree/main/projects/vdk-plugins/build-plugin.sh) script can be used also. | ||
|
||
|
||
#### Note about the CICD: | ||
|
||
.plugin-ci.yaml is needed only for plugins part of [Versatile Data Kit Plugin repo](https://github.com/vmware/versatile-data-kit/tree/main/projects/vdk-plugins). | ||
|
||
The CI/CD is separated in two stages, a build stage and a release stage. | ||
The build stage is made up of a few jobs, all which inherit from the same | ||
job configuration and only differ in the Python version they use (3.7, 3.8, 3.9 and 3.10). | ||
They run according to rules, which are ordered in a way such that changes to a | ||
plugin's directory trigger the plugin CI, but changes to a different plugin does not. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# this file is used to provide testing requirements | ||
# for requirements (dependencies) needed during and after installation of the plugin see (and update) setup.py install_requires section | ||
|
||
httpretty | ||
pytest | ||
pytest-httpserver | ||
tap-rest-api-msdk | ||
vdk-core | ||
vdk-sqlite | ||
vdk-test-utils |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# Copyright 2021-2023 VMware, Inc. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
import pathlib | ||
|
||
import setuptools | ||
|
||
""" | ||
Builds a package with the help of setuptools in order for this package to be imported in other projects | ||
""" | ||
|
||
__version__ = "0.1.0" | ||
|
||
setuptools.setup( | ||
name="vdk-singer", | ||
version=__version__, | ||
url="https://github.com/vmware/versatile-data-kit", | ||
description="The plugin provides seamless configuration and execution of Singer Taps and Targets.", | ||
long_description=pathlib.Path("README.md").read_text(), | ||
long_description_content_type="text/markdown", | ||
install_requires=[ | ||
"vdk-core", | ||
"simplejson", | ||
"pytz", | ||
"vdk-control-cli", | ||
"vdk-data-sources", | ||
], | ||
package_dir={"": "src"}, | ||
packages=setuptools.find_namespace_packages(where="src"), | ||
# This is the only vdk plugin specifc part | ||
# Define entry point called "vdk.plugin.run" with name of plugin and module to act as entry point. | ||
entry_points={"vdk.plugin.run": ["vdk-singer = vdk.plugin.singer.plugin_entry"]}, | ||
classifiers=[ | ||
"Development Status :: 2 - Pre-Alpha", | ||
"License :: OSI Approved :: Apache Software License", | ||
"Programming Language :: Python :: 3.7", | ||
"Programming Language :: Python :: 3.8", | ||
"Programming Language :: Python :: 3.9", | ||
"Programming Language :: Python :: 3.10", | ||
"Programming Language :: Python :: 3.11", | ||
], | ||
project_urls={ | ||
"Documentation": "https://github.com/vmware/versatile-data-kit/tree/main/projects/vdk-plugins/vdk-singer", | ||
"Source Code": "https://github.com/vmware/versatile-data-kit/tree/main/projects/vdk-plugins/vdk-singer", | ||
"Bug Tracker": "https://github.com/vmware/versatile-data-kit/issues/new/choose", | ||
}, | ||
) |
Oops, something went wrong.