Skip to content

Commit

Permalink
Round 2: Add doc_blocks to manifest for nodes and columns (#11294)
Browse files Browse the repository at this point in the history
* Reapply "Add `doc_blocks` to manifest for nodes and columns (#11224)" (#11283)

This reverts commit 55e0df1.

* Expand doc_blocks backcompat test

* Refactor to method, add docstring
  • Loading branch information
aranke authored Feb 11, 2025
1 parent 7f32e42 commit f29836f
Show file tree
Hide file tree
Showing 12 changed files with 537 additions and 16 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20250122-170328.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Add doc_blocks to manifest for nodes and columns
time: 2025-01-22T17:03:28.866522Z
custom:
Author: aranke
Issue: 11000 11001
25 changes: 25 additions & 0 deletions core/dbt/artifacts/resources/v1/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,20 @@
NodeVersion = Union[str, float]


def _backcompat_doc_blocks(doc_blocks: Any) -> List[str]:
"""
Make doc_blocks backwards-compatible for scenarios where a user specifies `doc_blocks` on a model or column.
Mashumaro will raise a serialization error if the specified `doc_blocks` isn't a list of strings.
In such a scenario, this method returns an empty list to avoid a serialization error.
Further along, `_get_doc_blocks` in `manifest.py` populates the correct `doc_blocks` for the happy path.
"""

if isinstance(doc_blocks, list) and all(isinstance(x, str) for x in doc_blocks):
return doc_blocks

return []


@dataclass
class MacroDependsOn(dbtClassMixin):
macros: List[str] = field(default_factory=list)
Expand Down Expand Up @@ -68,6 +82,12 @@ class ColumnInfo(AdditionalPropertiesMixin, ExtensibleDbtClassMixin):
tags: List[str] = field(default_factory=list)
_extra: Dict[str, Any] = field(default_factory=dict)
granularity: Optional[TimeGranularity] = None
doc_blocks: List[str] = field(default_factory=list)

def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None) -> dict:
dct = super().__post_serialize__(dct, context)
dct["doc_blocks"] = _backcompat_doc_blocks(dct["doc_blocks"])
return dct


@dataclass
Expand Down Expand Up @@ -197,13 +217,18 @@ class ParsedResource(ParsedResourceMandatory):
unrendered_config_call_dict: Dict[str, Any] = field(default_factory=dict)
relation_name: Optional[str] = None
raw_code: str = ""
doc_blocks: List[str] = field(default_factory=list)

def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None):
dct = super().__post_serialize__(dct, context)

if context and context.get("artifact") and "config_call_dict" in dct:
del dct["config_call_dict"]
if context and context.get("artifact") and "unrendered_config_call_dict" in dct:
del dct["unrendered_config_call_dict"]

dct["doc_blocks"] = _backcompat_doc_blocks(dct["doc_blocks"])

return dct


Expand Down
1 change: 1 addition & 0 deletions core/dbt/artifacts/resources/v1/source_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,4 @@ class SourceDefinition(ParsedSourceMandatory):
created_at: float = field(default_factory=lambda: time.time())
unrendered_database: Optional[str] = None
unrendered_schema: Optional[str] = None
doc_blocks: List[str] = field(default_factory=list)
65 changes: 53 additions & 12 deletions core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from typing import Any, Callable, Dict, List, Mapping, Optional, Set, Tuple, Type, Union

import msgpack
from jinja2.nodes import Call

import dbt.deprecations
import dbt.exceptions
Expand Down Expand Up @@ -115,6 +116,7 @@
from dbt.parser.sources import SourcePatcher
from dbt.parser.unit_tests import process_models_for_unit_test
from dbt.version import __version__
from dbt_common.clients.jinja import parse
from dbt_common.clients.system import make_directory, path_exists, read_json, write_file
from dbt_common.constants import SECRET_ENV_PREFIX
from dbt_common.dataclass_schema import StrEnum, dbtClassMixin
Expand Down Expand Up @@ -1240,7 +1242,7 @@ def process_docs(self, config: RuntimeConfig):
self.manifest,
config.project_name,
)
_process_docs_for_node(ctx, node)
_process_docs_for_node(ctx, node, self.manifest)
for source in self.manifest.sources.values():
if source.created_at < self.started_at:
continue
Expand All @@ -1250,7 +1252,7 @@ def process_docs(self, config: RuntimeConfig):
self.manifest,
config.project_name,
)
_process_docs_for_source(ctx, source)
_process_docs_for_source(ctx, source, self.manifest)
for macro in self.manifest.macros.values():
if macro.created_at < self.started_at:
continue
Expand Down Expand Up @@ -1657,32 +1659,71 @@ def _check_manifest(manifest: Manifest, config: RuntimeConfig) -> None:
DocsContextCallback = Callable[[ResultNode], Dict[str, Any]]


def _get_doc_blocks(description: str, manifest: Manifest, node_package: str) -> List[str]:
ast = parse(description)
doc_blocks: List[str] = []

if not hasattr(ast, "body"):
return doc_blocks

for statement in ast.body:
for node in statement.nodes:
if (
isinstance(node, Call)
and hasattr(node, "node")
and hasattr(node, "args")
and node.node.name == "doc"
):
doc_args = [arg.value for arg in node.args]

if len(doc_args) == 1:
package, name = None, doc_args[0]
elif len(doc_args) == 2:
package, name = doc_args
else:
continue

if not manifest.metadata.project_name:
continue

resolved_doc = manifest.resolve_doc(
name, package, manifest.metadata.project_name, node_package
)

if resolved_doc:
doc_blocks.append(resolved_doc.unique_id)

return doc_blocks


# node and column descriptions
def _process_docs_for_node(
context: Dict[str, Any],
node: ManifestNode,
manifest: Manifest,
):
node.doc_blocks = _get_doc_blocks(node.description, manifest, node.package_name)
node.description = get_rendered(node.description, context)

for column_name, column in node.columns.items():
column.doc_blocks = _get_doc_blocks(column.description, manifest, node.package_name)
column.description = get_rendered(column.description, context)


# source and table descriptions, column descriptions
def _process_docs_for_source(
context: Dict[str, Any],
source: SourceDefinition,
manifest: Manifest,
):
table_description = source.description
source_description = source.source_description
table_description = get_rendered(table_description, context)
source_description = get_rendered(source_description, context)
source.description = table_description
source.source_description = source_description
source.doc_blocks = _get_doc_blocks(source.description, manifest, source.package_name)
source.description = get_rendered(source.description, context)

source.source_description = get_rendered(source.source_description, context)

for column in source.columns.values():
column_desc = column.description
column_desc = get_rendered(column_desc, context)
column.description = column_desc
column.doc_blocks = _get_doc_blocks(column.description, manifest, source.package_name)
column.description = get_rendered(column.description, context)


# macro argument descriptions
Expand Down Expand Up @@ -2040,7 +2081,7 @@ def process_node(config: RuntimeConfig, manifest: Manifest, node: ManifestNode):
_process_sources_for_node(manifest, config.project_name, node)
_process_refs(manifest, config.project_name, node, config.dependencies)
ctx = generate_runtime_docs_context(config, node, manifest, config.project_name)
_process_docs_for_node(ctx, node)
_process_docs_for_node(ctx, node, manifest)


def write_semantic_manifest(manifest: Manifest, target_path: str) -> None:
Expand Down
Loading

0 comments on commit f29836f

Please sign in to comment.