Skip to content

Commit

Permalink
Merge pull request #1407 from fishtown-analytics/feature/wrap-models-…
Browse files Browse the repository at this point in the history
…in-jinja-tags

add a flag to wrap models/tests in jinja blocks
  • Loading branch information
beckjake authored Apr 18, 2019
2 parents 4dd8056 + 4ffc5cb commit dd469ad
Show file tree
Hide file tree
Showing 8 changed files with 330 additions and 32 deletions.
139 changes: 120 additions & 19 deletions core/dbt/clients/_jinja_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ def __init__(self, contents):


class BlockTag(object):
def __init__(self, block_type_name, block_name, contents=None, **kw):
def __init__(self, block_type_name, block_name, contents=None,
full_block=None, **kw):
self.block_type_name = block_type_name
self.block_name = block_name
self.contents = contents
self.full_block = None
self.full_block = full_block

def __str__(self):
return 'BlockTag({!r}, {!r})'.format(self.block_type_name,
Expand Down Expand Up @@ -59,7 +60,7 @@ def end_pat(self):
r'(?:\s+(?P<block_name>({})))?'.format(_NAME_PATTERN),
)))

TAG_CLOSE_PATTERN = regex(r'(?:\-\%\}\s*|\%\})')
TAG_CLOSE_PATTERN = regex(r'(?:(?P<tag_close>(\-\%\}\s*|\%\})))')
# if you do {% materialization foo, adapter="myadapter' %} and end up with
# mismatched quotes this will still match, but jinja will fail somewhere
# since the adapter= argument has to be an adapter name, and none have quotes
Expand Down Expand Up @@ -103,6 +104,13 @@ def end_pat(self):
)


NON_STRING_DO_BLOCK_MEMBER_PATTERN = regex(
# anything, followed by a quote, paren, or a tag end
r'''(.*?)'''
r'''((?P<quote>(['"]))|(?P<open>(\())|(?P<close>(\))))'''
)


class BlockIterator(object):
def __init__(self, data):
self.data = data
Expand Down Expand Up @@ -134,9 +142,7 @@ def _match(self, pattern):
def expect_comment_end(self):
"""Expect a comment end and return the match object.
"""
match = self._match(COMMENT_END_PATTERN)
if match is None:
dbt.exceptions.raise_compiler_error('unexpected EOF, expected #}')
match = self._expect_match('#}', COMMENT_END_PATTERN)
self.advance(match.end())

def expect_raw_end(self):
Expand Down Expand Up @@ -195,7 +201,8 @@ def handle_block(self, match, block_start=None):
while True:
match = self._expect_match(
'"{}"'.format(found.end_block_type_name),
found.end_pat(), COMMENT_START_PATTERN, RAW_START_PATTERN
found.end_pat(), COMMENT_START_PATTERN, RAW_START_PATTERN,
regex('''(?P<quote>(['"]))''')
)
groups = match.groupdict()
if groups.get('endblock') is not None:
Expand All @@ -207,6 +214,10 @@ def handle_block(self, match, block_start=None):
self.expect_comment_end()
elif groups.get('raw_start') is not None:
self.expect_raw_end()
elif groups.get('quote') is not None:
self.rewind()
match = self._expect_match('any string', STRING_PATTERN)
self.advance(match.end())
else:
raise dbt.exceptions.InternalException(
'unhandled regex in handle_block, no match: {}'
Expand All @@ -226,10 +237,49 @@ def handle_block(self, match, block_start=None):
def handle_materialization(self, match):
self._expect_match('materialization args',
MATERIALIZATION_ARGS_PATTERN)
self._expect_match('%}', TAG_CLOSE_PATTERN)
endtag = self._expect_match('%}', TAG_CLOSE_PATTERN)
self.advance(endtag.end())
# handle the block we started with!
self.blocks.append(self.handle_block(match))

def handle_do(self, match, expect_block):
if expect_block:
# we might be wrong to expect a block ({% do (...) %}, for example)
# so see if there's more data before the tag closes. if there
# isn't, we expect a block.
close_match = self._expect_match('%}', TAG_CLOSE_PATTERN)
unprocessed = self.data[match.end():close_match.start()].strip()
expect_block = not unprocessed

if expect_block:
# if we're here, expect_block is True and we must have set
# close_match
self.advance(close_match.end())
block = self.handle_block(match)
else:
# we have a do-statement like {% do thing() %}, so no {% enddo %}
# also, we don't want to advance to the end of the match, as it
# might be inside a string or something! So go back and figure out
self._process_rval_components()
block = BlockTag('do', None,
full_block=self.data[match.start():self.pos])
self.blocks.append(block)

def handle_set(self, match):
equal_or_close = self._expect_match('%} or =',
TAG_CLOSE_PATTERN, regex(r'='))
self.advance(equal_or_close.end())
if equal_or_close.groupdict().get('tag_close') is None:
# it's an equals sign, must be like {% set x = 1 %}
self._process_rval_components()
# watch out, order matters here on python 2
block = BlockTag(full_block=self.data[match.start():self.pos],
**match.groupdict())
else:
# it's a tag close, must be like {% set x %}...{% endset %}
block = self.handle_block(match)
self.blocks.append(block)

def find_block(self):
open_block = (
r'(?:\s*\{\%\-|\{\%)\s*'
Expand All @@ -251,21 +301,39 @@ def find_block(self):
# comments are easy
if matchgroups.get('comment_start') is not None:
start = match.start()
self.advance(match.end())
self.expect_comment_end()
self.blocks.append(BlockData(self.data[start:self.pos]))
return True

if matchgroups.get('block_type_name') == 'raw':
block_type_name = matchgroups.get('block_type_name')

if block_type_name == 'raw':
start = match.start()
self.expect_raw_end()
self.blocks.append(BlockData(self.data[start:self.pos]))
return True

if matchgroups.get('block_type_name') == 'materialization':
if block_type_name == 'materialization':
self.advance(match.end())
self.handle_materialization(match)
return True

if block_type_name == 'do':
# if there is a "block_name" in the match groups, we don't expect a
# block as the "block name" is actually part of the do-statement.
# we need to do this to handle the (weird and probably wrong!) case
# of a do-statement that is only a single identifier - techincally
# allowed in jinja. (for example, {% do thing %})
expect_block = matchgroups.get('block_name') is None
self.handle_do(match, expect_block=expect_block)
return True

if block_type_name == 'set':
self.advance(match.end())
self.handle_set(match)
return True

# we're somewhere like this {% block_type_name block_type
# we've either got arguments, a close of tag (%}), or bad input.
# we've handled materializations already (they're weird!)
Expand All @@ -284,6 +352,42 @@ def find_block(self):
self.blocks.append(self.handle_block(match))
return True

def _process_rval_components(self):
"""This is suspiciously similar to _process_macro_default_arg, probably
want to figure out how to merge the two.
Process the rval of an assignment statement or a do-block
"""
while True:
match = self._expect_match(
'do block component',
# you could have a string, though that would be weird
STRING_PATTERN,
# a quote or an open/close parenthesis
NON_STRING_DO_BLOCK_MEMBER_PATTERN,
# a tag close
TAG_CLOSE_PATTERN
)
matchgroups = match.groupdict()
self.advance(match.end())
if matchgroups.get('string') is not None:
continue
elif matchgroups.get('quote') is not None:
self.rewind()
# now look for a string
match = self._expect_match('any string', STRING_PATTERN)
self.advance(match.end())
elif matchgroups.get('open'):
self._parenthesis_stack.append(True)
elif matchgroups.get('close'):
self._parenthesis_stack.pop()
elif matchgroups.get('tag_close'):
if self._parenthesis_stack:
msg = ('Found "%}", expected ")"')
dbt.exceptions.raise_compiler_error(msg)
return
# else whitespace

def _process_macro_default_arg(self):
"""Handle the bit after an '=' in a macro default argument. This is
probably the trickiest thing. The goal here is to accept all strings
Expand All @@ -297,7 +401,7 @@ def _process_macro_default_arg(self):
'macro argument',
# you could have a string
STRING_PATTERN,
# a quote, a comma, or a close parenthesis
# a quote, a comma, or a open/close parenthesis
NON_STRING_MACRO_ARGS_PATTERN,
# we want to "match", not "search"
method='match'
Expand All @@ -311,20 +415,17 @@ def _process_macro_default_arg(self):
# we got a bunch of data and then a string opening value.
# put the quote back on the menu
self.rewind()
# if we only got a single quote mark and didn't hit a string
# at all, this file has an unclosed quote. Fail accordingly.
if match.end() - match.start() == 1:
msg = (
'Unclosed quotation mark at position {}. Context:\n{}'
.format(self.pos, self.data[self.pos-20:self.pos+20])
)
dbt.exceptions.raise_compiler_error(msg)
# now look for a string
match = self._expect_match('any string', STRING_PATTERN)
self.advance(match.end())
elif matchgroups.get('comma') is not None:
# small hack: if we hit a comma and there is one parenthesis
# left, return to look for a new name. otherwise we're still
# looking for the parameter close.
if len(self._parenthesis_stack) == 1:
return
elif matchgroups.get('open'):
self._parenthesis_stack.append(True)
elif matchgroups.get('close'):
self._parenthesis_stack.pop()
else:
Expand Down
5 changes: 4 additions & 1 deletion core/dbt/flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@
FULL_REFRESH = False
USE_CACHE = True
WARN_ERROR = False
TEST_NEW_PARSER = False


def reset():
global STRICT_MODE, NON_DESTRUCTIVE, FULL_REFRESH, USE_CACHE, WARN_ERROR
global STRICT_MODE, NON_DESTRUCTIVE, FULL_REFRESH, USE_CACHE, WARN_ERROR, \
TEST_NEW_PARSER

STRICT_MODE = False
NON_DESTRUCTIVE = False
FULL_REFRESH = False
USE_CACHE = True
WARN_ERROR = False
TEST_NEW_PARSER = False
11 changes: 11 additions & 0 deletions core/dbt/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ def update_flags(parsed):
elif arg_full_refresh:
flags.FULL_REFRESH = True

flags.TEST_NEW_PARSER = getattr(parsed, 'test_new_parser', False)


def _build_base_subparser():
base_subparser = argparse.ArgumentParser(add_help=False)
Expand Down Expand Up @@ -637,6 +639,15 @@ def parse_args(args):
help=argparse.SUPPRESS,
)

# if set, extract all models and blocks with the jinja block extractor, and
# verify that we don't fail anywhere the actual jinja parser passes. The
# reverse (passing files that ends up failing jinja) is fine.
p.add_argument(
'--test-new-parser',
action='store_true',
help=argparse.SUPPRESS
)

subs = p.add_subparsers(title="Available sub-commands")

base_subparser = _build_base_subparser()
Expand Down
13 changes: 13 additions & 0 deletions core/dbt/parser/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,3 +265,16 @@ def parse_node(self, node, node_path, package_project_config, tags=None,
parsed_node.validate()

return parsed_node

def check_block_parsing(self, name, path, contents):
"""Check if we were able to extract toplevel blocks from the given
contents. Return True if extraction was successful (no exceptions),
False if it fails.
"""
if not dbt.flags.TEST_NEW_PARSER:
return True
try:
dbt.clients.jinja.extract_toplevel_blocks(contents)
except Exception:
return False
return True
15 changes: 15 additions & 0 deletions core/dbt/parser/base_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from dbt.contracts.graph.unparsed import UnparsedNode
from dbt.parser.base import MacrosKnownParser
from dbt.node_types import NodeType


class BaseSqlParser(MacrosKnownParser):
Expand Down Expand Up @@ -74,7 +75,21 @@ def parse_sql_node(self, node_dict, tags=None):
node.name)

project = self.all_projects.get(package_name)

parse_ok = True
if node.resource_type == NodeType.Model:
parse_ok = self.check_block_parsing(
node.name, node.original_file_path, node.raw_sql
)

node_parsed = self.parse_node(node, unique_id, project, tags=tags)
if not parse_ok:
# if we had a parse error in parse_node, we would not get here. So
# this means we rejected a good file :(
raise dbt.exceptions.InternalException(
'the block parser rejected a good node: {} was marked invalid '
'but is actually valid!'.format(node.original_file_path)
)
return unique_id, node_parsed

def parse_sql_nodes(self, nodes, tags=None):
Expand Down
25 changes: 18 additions & 7 deletions core/dbt/parser/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ def build_test_node(self, test_target, package_name, test, root_dir, path,
full_path = get_pseudo_test_path(full_name, test_path, 'schema_test')
raw_sql = self._build_raw_sql(test_namespace, test_target, test_type,
test_args)

unparsed = UnparsedNode(
name=full_name,
resource_type=NodeType.Test,
Expand All @@ -295,13 +296,23 @@ def build_test_node(self, test_target, package_name, test, root_dir, path,
node_path = self.get_path(NodeType.Test, unparsed.package_name,
unparsed.name)

return self.parse_node(unparsed,
node_path,
source_package,
tags=['schema'],
fqn_extra=None,
fqn=fqn_override,
column_name=column_name)
result = self.parse_node(unparsed,
node_path,
source_package,
tags=['schema'],
fqn_extra=None,
fqn=fqn_override,
column_name=column_name)

parse_ok = self.check_block_parsing(full_name, test_path, raw_sql)
if not parse_ok:
# if we had a parse error in parse_node, we would not get here. So
# this means we rejected a good file :(
raise dbt.exceptions.InternalException(
'the block parser rejected a good node: {} was marked invalid '
'but is actually valid!'.format(test_path)
)
return result


class SchemaModelParser(SchemaBaseTestParser):
Expand Down
17 changes: 12 additions & 5 deletions test/integration/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,16 +441,23 @@ def project_config(self):
def profile_config(self):
return {}

def run_dbt(self, args=None, expect_pass=True, strict=True):
def run_dbt(self, args=None, expect_pass=True, strict=True, parser=True):
if args is None:
args = ["run"]

final_args = []

if strict:
args = ["--strict"] + args
args.append('--log-cache-events')
logger.info("Invoking dbt with {}".format(args))
final_args.append('--strict')
if parser:
final_args.append('--test-new-parser')

final_args.extend(args)
final_args.append('--log-cache-events')

logger.info("Invoking dbt with {}".format(final_args))

res, success = dbt.handle_and_check(args)
res, success = dbt.handle_and_check(final_args)
self.assertEqual(
success, expect_pass,
"dbt exit state did not match expected")
Expand Down
Loading

0 comments on commit dd469ad

Please sign in to comment.