From 351f8c7668a3569efac9f5f1211e898ebef6cf4f Mon Sep 17 00:00:00 2001 From: Nicholas Yager Date: Wed, 27 Nov 2024 10:06:25 -0500 Subject: [PATCH 1/3] feat: Implement a patch that disables tracking on dbt-loom specifically --- dbt_loom/__init__.py | 35 ++++++++++++++++++++++++++++++++++- dbt_loom/config.py | 1 + 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/dbt_loom/__init__.py b/dbt_loom/__init__.py index 7d84c24..c0b4f81 100644 --- a/dbt_loom/__init__.py +++ b/dbt_loom/__init__.py @@ -129,6 +129,15 @@ def __init__(self, project_name: str): self.config: Optional[dbtLoomConfig] = self.read_config(configuration_path) self.models: Dict[str, LoomModelNodeArgs] = {} + self._patch_ref_protection() + + if not self.config or (self.config and not self.config.enable_telemetry): + self._patch_plugin_telemetry() + + super().__init__(project_name) + + def _patch_ref_protection(self) -> None: + """Patch out the ref protection functions for proper protections""" import dbt.contracts.graph.manifest fire_event( @@ -152,7 +161,31 @@ def __init__(self, project_name: str): self.model_node_wrapper(dbt.contracts.graph.nodes.ModelNode.from_args) # type: ignore ) - super().__init__(project_name) + def _patch_plugin_telemetry(self) -> None: + """Patch the plugin telemetry function to prevent tracking of dbt plugins.""" + import dbt.tracking + + dbt.tracking.track = self.tracking_wrapper(dbt.tracking.track) + + def tracking_wrapper(self, function) -> Callable: + """Wrap the telemetry `track` function and return early if we're tracking plugin actions.""" + + def outer_function(*args, **kwargs): + """Check the context of the snowplow tracker message for references to loom. Return if present.""" + + if any( + [ + self.__class__.__name__ in str(context_item.__dict__) + or "dbt-loom" in str(context_item.__dict__) + or "dbt_loom" in str(context_item.__dict__) + for context_item in kwargs.get("context", []) + ] + ): + return + + return function(*args, **kwargs) + + return outer_function def model_node_wrapper(self, function) -> Callable: """Wrap the ModelNode.from_args function and inject extra properties from the LoomModelNodeArgs.""" diff --git a/dbt_loom/config.py b/dbt_loom/config.py index ea66ea7..6e9cc5d 100644 --- a/dbt_loom/config.py +++ b/dbt_loom/config.py @@ -62,6 +62,7 @@ class dbtLoomConfig(BaseModel): """Configuration for dbt Loom""" manifests: List[ManifestReference] + enable_telemetry: bool = False class LoomConfigurationError(BaseException): From fd880fcd1e780013f6e921d8bf5e6107bd1b3171 Mon Sep 17 00:00:00 2001 From: Nicholas Yager Date: Wed, 27 Nov 2024 10:06:59 -0500 Subject: [PATCH 2/3] feat: Add a test to confirm telemetry blocking --- tests/test_dbt_core_execution.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/test_dbt_core_execution.py b/tests/test_dbt_core_execution.py index 32842ae..39e997d 100644 --- a/tests/test_dbt_core_execution.py +++ b/tests/test_dbt_core_execution.py @@ -140,3 +140,25 @@ def test_dbt_loom_injects_groups(): # Make sure nothing failed assert isinstance(output.exception, dbt.exceptions.DbtReferenceError) + + +def test_dbt_core_telemetry_blocking(): + """Verify that dbt-loom prevents telemetry about itself from being sent.""" + import shutil + + runner = dbtRunner() + + # Compile the revenue project + + os.chdir(f"{starting_path}/test_projects/revenue") + runner.invoke(["clean"]) + runner.invoke(["deps"]) + shutil.rmtree("logs") + runner.invoke(["compile"]) + + # Check that no plugin events were sent. This is important to verify that + # telemetry blocking is working. + with open("logs/dbt.log") as log_file: + assert "plugin_get_nodes" not in log_file.read() + + os.chdir(starting_path) From 4655ed3ace84747dffbf34a63a60e27a5907c68e Mon Sep 17 00:00:00 2001 From: Nicholas Yager Date: Wed, 27 Nov 2024 10:18:18 -0500 Subject: [PATCH 3/3] docs: Add useful docs and refactor the docs site a little bit --- README.md | 2 - docs/advanced-configuration.md | 67 ++++++++++++++++++++++++++++++++++ docs/getting-started.md | 40 ++------------------ mkdocs.yml | 1 + 4 files changed, 72 insertions(+), 38 deletions(-) create mode 100644 docs/advanced-configuration.md diff --git a/README.md b/README.md index b050a32..23ee2d2 100644 --- a/README.md +++ b/README.md @@ -40,8 +40,6 @@ dbt-loom currently supports obtaining model definitions from: - S3-compatible object storage services - Azure Storage -:warning: **dbt Core's plugin functionality is still in beta. Please note that this may break in the future as dbt Labs solidifies the dbt plugin API in future versions.** - ## Getting Started To begin, install the `dbt-loom` python package. diff --git a/docs/advanced-configuration.md b/docs/advanced-configuration.md new file mode 100644 index 0000000..a7cc1d9 --- /dev/null +++ b/docs/advanced-configuration.md @@ -0,0 +1,67 @@ +# Advanced Configuration + +`dbt-loom` also has a couple advanced configuration options for power users. + +## Using environment variables in the `dbt-loom` config + +You can easily incorporate your own environment variables into the config file. This allows for dynamic configuration values that can change based on the environment. To specify an environment variable in the `dbt-loom` config file, use one of the following formats: + +`${ENV_VAR}` or `$ENV_VAR` + +### Example: + +```yaml +manifests: + - name: revenue + type: gcs + config: + project_id: ${GCP_PROJECT} + bucket_name: ${GCP_BUCKET} + object_name: ${MANIFEST_PATH} +``` + +## Exclude nested packages + +In some circumstances, like running `dbt-project-evaluator`, you may not want a +given package in an upstream project to be imported into a downstream project. +You can manually exclude downstream projects from injecting assets from packages +by adding the package name to the downstream project's `excluded_packages` list. + +```yaml +manifests: + - name: revenue + type: file + config: + path: ../revenue/target/manifest.json + excluded_packages: + # Provide the string name of the package to exclude during injection. + - dbt_project_evaluator +``` + +## Gzipped files + +`dbt-loom` natively supports decompressing gzipped manifest files. This is useful to reduce object storage size and to minimize loading times when reading manifests from object storage. Compressed file detection is triggered when the file path for the manifest is suffixed +with `.gz`. + +```yaml +manifests: + - name: revenue + type: s3 + config: + bucket_name: example_bucket_name + object_name: manifest.json.gz +``` + +## Enabling Telemetry + +By default, the `dbt-loom` plugin blocks outbound telemetry that reports on +the use of this plugin. This is a privacy-preserving measure for `dbt-loom` +users that does not impact the function of dbt-core and does not impede +dbt-core development in any way. If you _want_ this telemetry to be sent, you +can re-enable this behavior by setting the `enable_telemetry` property +in the `dbt_loom.config.yml` file. + +```yaml +enable_telemetry: true +manifests: ... +``` diff --git a/docs/getting-started.md b/docs/getting-started.md index 749975c..d7c1a99 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -22,7 +22,7 @@ manifests: By default, `dbt-loom` will look for `dbt_loom.config.yml` in your working directory. You can also set the `DBT_LOOM_CONFIG` environment variable. -### Using dbt Cloud as an artifact source +## Using dbt Cloud as an artifact source You can use dbt-loom to fetch model definitions from dbt Cloud by setting up a `dbt-cloud` manifest in your `dbt-loom` config, and setting the `DBT_CLOUD_API_TOKEN` environment variable in your execution environment. @@ -45,7 +45,7 @@ manifests: # which to fetch artifacts. Defaults to the last step. ``` -### Using an S3-compatible object store as an artifact source +## Using an S3-compatible object store as an artifact source You can use dbt-loom to fetch manifest files from S3-compatible object stores by setting up ab `s3` manifest in your `dbt-loom` config. Please note that this @@ -63,7 +63,7 @@ manifests: # The object name of your manifest file. ``` -### Using GCS as an artifact source +## Using GCS as an artifact source You can use dbt-loom to fetch manifest files from Google Cloud Storage by setting up a `gcs` manifest in your `dbt-loom` config. @@ -85,7 +85,7 @@ manifests: # The OAuth2 Credentials to use. If not passed, falls back to the default inferred from the environment. ``` -### Using Azure Storage as an artifact source +## Using Azure Storage as an artifact source You can use dbt-loom to fetch manifest files from Azure Storage by setting up an `azure` manifest in your `dbt-loom` config. The `azure` type implements @@ -103,35 +103,3 @@ manifests: container_name: # The name of your Azure Storage container object_name: # The object name of your manifest file. ``` - -### Using environment variables - -You can easily incorporate your own environment variables into the config file. This allows for dynamic configuration values that can change based on the environment. To specify an environment variable in the `dbt-loom` config file, use one of the following formats: - -`${ENV_VAR}` or `$ENV_VAR` - -#### Example: - -```yaml -manifests: - - name: revenue - type: gcs - config: - project_id: ${GCP_PROJECT} - bucket_name: ${GCP_BUCKET} - object_name: ${MANIFEST_PATH} -``` - -### Gzipped files - -`dbt-loom` natively supports decompressing gzipped manifest files. This is useful to reduce object storage size and to minimize loading times when reading manifests from object storage. Compressed file detection is triggered when the file path for the manifest is suffixed -with `.gz`. - -```yaml -manifests: - - name: revenue - type: s3 - config: - bucket_name: example_bucket_name - object_name: manifest.json.gz -``` diff --git a/mkdocs.yml b/mkdocs.yml index 16938d2..8c086e2 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -60,3 +60,4 @@ edit_uri: edit/main/docs/ nav: - Home: index.md - Getting started: getting-started.md + - Advanced configuration: advanced-configuration.md