Skip to content

Commit

Permalink
Merge pull request #2417 from Raalsky/feature/model-intersection-syntax
Browse files Browse the repository at this point in the history
Model intersection syntax
  • Loading branch information
beckjake authored May 18, 2020
2 parents 770cf71 + 598c06f commit 4e2ec6b
Show file tree
Hide file tree
Showing 5 changed files with 436 additions and 9 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

### Features
- Added a `full_refresh` config item that overrides the behavior of the `--full-refresh` flag ([#1009](/~https://github.com/fishtown-analytics/dbt/issues/1009), [#2348](/~https://github.com/fishtown-analytics/dbt/pull/2348))
- Added intersection syntax for model selector ([#2167](/~https://github.com/fishtown-analytics/dbt/issues/2167), [#2417](/~https://github.com/fishtown-analytics/dbt/pull/2417))

Contributors:
- [@raalsky](/~https://github.com/Raalsky) ([#2417](/~https://github.com/fishtown-analytics/dbt/pull/2417))

## dbt 0.17.0 (Release TBD)

Expand Down
18 changes: 15 additions & 3 deletions core/dbt/graph/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
SELECTOR_GLOB = '*'
SELECTOR_CHILDREN_AND_ANCESTORS = '@'
SELECTOR_DELIMITER = ':'
SPEC_DELIMITER = ' '
INTERSECTION_DELIMITER = ','


def _probably_path(value: str):
Expand Down Expand Up @@ -69,6 +71,10 @@ def __init__(self, node_spec: str):
self.selector_type = SELECTOR_FILTERS.FQN


def split_intersection_blocks(spec):
return spec.split(INTERSECTION_DELIMITER)


class SELECTOR_FILTERS(str, Enum):
FQN = 'fqn'
TAG = 'tag'
Expand All @@ -90,7 +96,7 @@ def alert_non_existence(raw_spec, nodes):
def split_specs(node_specs: Iterable[str]):
specs: Set[str] = set()
for spec in node_specs:
parts = spec.split(" ")
parts = spec.split(SPEC_DELIMITER)
specs.update(parts)

return specs
Expand Down Expand Up @@ -388,6 +394,13 @@ def get_nodes_from_spec(self, graph, spec):

return collected

def get_nodes_from_intersection_spec(self, graph, raw_spec):
return set.intersection(
*[self.get_nodes_from_spec(graph, SelectionCriteria(
intersection_block_spec)) for intersection_block_spec in
split_intersection_blocks(raw_spec)]
)

def get_nodes_from_multiple_specs(
self,
graph,
Expand All @@ -400,8 +413,7 @@ def get_nodes_from_multiple_specs(
operator = set.difference_update if exclude else set.update

for raw_spec in split_specs(specs):
spec = SelectionCriteria(raw_spec)
nodes = self.get_nodes_from_spec(graph, spec)
nodes = self.get_nodes_from_intersection_spec(graph, raw_spec)

if check_existence:
alert_non_existence(raw_spec, nodes)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def test__postgres__childrens_parents(self):
def test__postgres__more_childrens_parents(self):
self.run_sql_file("seed.sql")
results = self.run_dbt(['run', '--models', '@users'])
# base_users, emails, users_rollup, users_rollup_dependency, but not users (ephemeral)
# users, emails_alt, users_rollup, users_rollup_dependency, but not base_users (ephemeral)
self.assertEqual(len(results), 4)

created_models = self.get_models_in_schema()
Expand Down Expand Up @@ -244,3 +244,48 @@ def test__snowflake__skip_intermediate(self):
user_last_end <= dep_first_start,
'dependency started before its transitive parent ({} > {})'.format(user_last_end, dep_first_start)
)

@use_profile('postgres')
def test__postgres__concat(self):
self.run_sql_file("seed.sql")
results = self.run_dbt(['run', '--models', '@emails_alt', 'users_rollup'])
# users, emails_alt, users_rollup
self.assertEqual(len(results), 3)

created_models = self.get_models_in_schema()
self.assertIn('users_rollup', created_models)
self.assertIn('users', created_models)
self.assertIn('emails_alt', created_models)
self.assertNotIn('subdir', created_models)
self.assertNotIn('nested_users', created_models)

@use_profile('postgres')
def test__postgres__concat_exclude(self):
self.run_sql_file("seed.sql")
results = self.run_dbt(['run', '--models', '@emails_alt', 'users_rollup', '--exclude', 'emails_alt'])
# users, users_rollup
self.assertEqual(len(results), 2)

created_models = self.get_models_in_schema()
self.assertIn('users', created_models)
self.assertIn('users_rollup', created_models)
self.assertNotIn('emails_alt', created_models)
self.assertNotIn('subdir', created_models)
self.assertNotIn('nested_users', created_models)

@use_profile('postgres')
def test__postgres__concat_exclude_concat(self):
self.run_sql_file("seed.sql")
results = self.run_dbt(
['run', '--models', '@emails_alt', 'users_rollup', '--exclude', 'emails_alt', 'users_rollup']
)
# users
self.assertEqual(len(results), 1)

created_models = self.get_models_in_schema()

self.assertIn('users', created_models)
self.assertNotIn('emails_alt', created_models)
self.assertNotIn('users_rollup', created_models)
self.assertNotIn('subdir', created_models)
self.assertNotIn('nested_users', created_models)
223 changes: 223 additions & 0 deletions test/integration/007_graph_selection_tests/test_intersection_syntax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
from test.integration.base import DBTIntegrationTest, use_profile


class TestGraphSelection(DBTIntegrationTest):

@property
def schema(self):
return "graph_selection_tests_007"

@property
def models(self):
return "models"

@use_profile('postgres')
def test__postgres__same_model_intersection(self):
self.run_sql_file("seed.sql")

results = self.run_dbt(['run', '--models', 'users,users'])
# users
self.assertEqual(len(results), 1)

created_models = self.get_models_in_schema()
self.assertIn('users', created_models)
self.assertNotIn('users_rollup', created_models)
self.assertNotIn('emails_alt', created_models)
self.assertNotIn('subdir', created_models)
self.assertNotIn('nested_users', created_models)

@use_profile('postgres')
def test__postgres__tags_intersection(self):
self.run_sql_file("seed.sql")

results = self.run_dbt(['run', '--models', 'tag:bi,tag:users'])
# users
self.assertEqual(len(results), 1)

created_models = self.get_models_in_schema()
self.assertIn('users', created_models)
self.assertNotIn('users_rollup', created_models)
self.assertNotIn('emails_alt', created_models)
self.assertNotIn('subdir', created_models)
self.assertNotIn('nested_users', created_models)

@use_profile('postgres')
def test__postgres__intersection_triple_descending(self):
self.run_sql_file("seed.sql")

results = self.run_dbt(['run', '--models', '*,tag:bi,tag:users'])
# users
self.assertEqual(len(results), 1)

created_models = self.get_models_in_schema()
self.assertIn('users', created_models)
self.assertNotIn('users_rollup', created_models)
self.assertNotIn('emails_alt', created_models)
self.assertNotIn('subdir', created_models)
self.assertNotIn('nested_users', created_models)

@use_profile('postgres')
def test__postgres__intersection_triple_ascending(self):
self.run_sql_file("seed.sql")

results = self.run_dbt(['run', '--models', 'tag:users,tag:bi,*'])
# users
self.assertEqual(len(results), 1)

created_models = self.get_models_in_schema()
self.assertIn('users', created_models)
self.assertNotIn('users_rollup', created_models)
self.assertNotIn('emails_alt', created_models)
self.assertNotIn('subdir', created_models)
self.assertNotIn('nested_users', created_models)

@use_profile('postgres')
def test__postgres__intersection_with_exclusion(self):
self.run_sql_file("seed.sql")

results = self.run_dbt(['run', '--models', '+users_rollup_dependency,users+', '--exclude', 'users_rollup_dependency'])
# users, users_rollup
self.assertEqual(len(results), 2)

created_models = self.get_models_in_schema()
self.assertIn('users', created_models)
self.assertIn('users_rollup', created_models)
self.assertNotIn('emails_alt', created_models)
self.assertNotIn('subdir', created_models)
self.assertNotIn('nested_users', created_models)

@use_profile('postgres')
def test__postgres__intersection_exclude_intersection(self):
self.run_sql_file("seed.sql")

results = self.run_dbt(
['run', '--models', 'tag:bi,@users', '--exclude',
'tag:bi,users_rollup+'])
# users
self.assertEqual(len(results), 1)

created_models = self.get_models_in_schema()
self.assertIn('users', created_models)
self.assertNotIn('users_rollup', created_models)
self.assertNotIn('emails_alt', created_models)
self.assertNotIn('subdir', created_models)
self.assertNotIn('nested_users', created_models)

@use_profile('postgres')
def test__postgres__intersection_exclude_intersection_lack(self):
self.run_sql_file("seed.sql")

results = self.run_dbt(
['run', '--models', 'tag:bi,@users', '--exclude',
'@emails,@emails_alt'])
# users, users_rollup
self.assertEqual(len(results), 2)

created_models = self.get_models_in_schema()
self.assertIn('users', created_models)
self.assertIn('users_rollup', created_models)
self.assertNotIn('emails_alt', created_models)
self.assertNotIn('subdir', created_models)
self.assertNotIn('nested_users', created_models)


@use_profile('postgres')
def test__postgres__intersection_exclude_triple_intersection(self):
self.run_sql_file("seed.sql")

results = self.run_dbt(
['run', '--models', 'tag:bi,@users', '--exclude',
'*,tag:bi,users_rollup'])
# users
self.assertEqual(len(results), 1)

created_models = self.get_models_in_schema()
self.assertIn('users', created_models)
self.assertNotIn('users_rollup', created_models)
self.assertNotIn('emails_alt', created_models)
self.assertNotIn('subdir', created_models)
self.assertNotIn('nested_users', created_models)

@use_profile('postgres')
def test__postgres__intersection_concat(self):
self.run_sql_file("seed.sql")

results = self.run_dbt(
['run', '--models', 'tag:bi,@users', 'emails_alt'])
# users, users_rollup, emails_alt
self.assertEqual(len(results), 3)

created_models = self.get_models_in_schema()
self.assertIn('users', created_models)
self.assertIn('users_rollup', created_models)
self.assertIn('emails_alt', created_models)
self.assertNotIn('subdir', created_models)
self.assertNotIn('nested_users', created_models)

@use_profile('postgres')
def test__postgres__intersection_concat_intersection(self):
self.run_sql_file("seed.sql")

results = self.run_dbt(
['run', '--models', 'tag:bi,@users', '@emails_alt,emails_alt'])
# users, users_rollup, emails_alt
self.assertEqual(len(results), 3)

created_models = self.get_models_in_schema()
self.assertIn('users', created_models)
self.assertIn('users_rollup', created_models)
self.assertIn('emails_alt', created_models)
self.assertNotIn('subdir', created_models)
self.assertNotIn('nested_users', created_models)

@use_profile('postgres')
def test__postgres__intersection_concat_exclude(self):
self.run_sql_file("seed.sql")

results = self.run_dbt(
['run', '--models', 'tag:bi,@users', 'emails_alt', '--exclude', 'users_rollup']
)
# users, emails_alt
self.assertEqual(len(results), 2)

created_models = self.get_models_in_schema()
self.assertIn('users', created_models)
self.assertIn('emails_alt', created_models)
self.assertNotIn('users_rollup', created_models)
self.assertNotIn('subdir', created_models)
self.assertNotIn('nested_users', created_models)

@use_profile('postgres')
def test__postgres__intersection_concat_exclude_concat(self):
self.run_sql_file("seed.sql")

results = self.run_dbt(
['run', '--models', 'tag:bi,@users', 'emails_alt,@users',
'--exclude', 'users_rollup_dependency', 'users_rollup'])
# users, emails_alt
self.assertEqual(len(results), 2)

created_models = self.get_models_in_schema()
self.assertIn('users', created_models)
self.assertIn('emails_alt', created_models)
self.assertNotIn('users_rollup', created_models)
self.assertNotIn('subdir', created_models)
self.assertNotIn('nested_users', created_models)


@use_profile('postgres')
def test__postgres__intersection_concat_exclude_intersection_concat(self):
self.run_sql_file("seed.sql")

results = self.run_dbt(
['run', '--models', 'tag:bi,@users', 'emails_alt,@users',
'--exclude', '@users,users_rollup_dependency', '@users,users_rollup'])
# users, emails_alt
self.assertEqual(len(results), 2)

created_models = self.get_models_in_schema()
self.assertIn('users', created_models)
self.assertIn('emails_alt', created_models)
self.assertNotIn('users_rollup', created_models)
self.assertNotIn('subdir', created_models)
self.assertNotIn('nested_users', created_models)
Loading

0 comments on commit 4e2ec6b

Please sign in to comment.