Skip to content

Commit

Permalink
fix: trino metadata performance issues (#3123)
Browse files Browse the repository at this point in the history
* fix: remove costly metadata `aggregation`

* fix: generate `full` intermediate metadata tables

* fix: join metadata on similar `metric` identifier
  • Loading branch information
Jabolol authored Feb 28, 2025
1 parent 16017ac commit f51a5f9
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 65 deletions.
16 changes: 1 addition & 15 deletions warehouse/metrics_mesh/models/marts/metrics/metrics_v0.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,6 @@ MODEL (
)
);

@DEF(MAX_STAGE_OVERRIDE, 550);

-- TODO(jabolo): Remove Trino session logic once #3117 lands
@IF(
@OR(@gateway = 'trino', @gateway = 'local-trino'),
SET SESSION query_max_stage_count = @MAX_STAGE_OVERRIDE
);

WITH unioned_metric_names AS (
SELECT *
FROM metrics.int_metric_names_from_artifact
Expand Down Expand Up @@ -55,7 +47,7 @@ WITH unioned_metric_names AS (
'TODO' AS definition_ref,
'UNKNOWN' AS aggregation_function
FROM all_timeseries_metric_names t
LEFT JOIN all_metrics_metadata m ON t.metric = m.metric
LEFT JOIN all_metrics_metadata m ON t.metric LIKE '%' || m.metric || '%'
)
SELECT
metric_id::TEXT,
Expand All @@ -68,9 +60,3 @@ SELECT
definition_ref::TEXT,
aggregation_function::TEXT
FROM metrics_v0_no_casting;

-- TODO(jabolo): Remove Trino session logic once #3117 lands
@IF(
@OR(@gateway = 'trino', @gateway = 'local-trino'),
RESET SESSION query_max_stage_count
);
54 changes: 24 additions & 30 deletions warehouse/metrics_tools/factory/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,9 +379,9 @@ def generate_models(self, calling_file: str):
)(join_all_of_entity_type)

raw_table_metadata = {
key: asdict(value["metadata"])
for key, value in self._rendered_queries.items()
if value["metadata"] is not None
key: asdict(value.metadata)
for key, value in self._raw_options.get("metric_queries").items()
if value.metadata is not None
}

transformed_metadata = defaultdict(list)
Expand All @@ -390,38 +390,32 @@ def generate_models(self, calling_file: str):
metadata_tuple = tuple(metadata.items())
transformed_metadata[metadata_tuple].append(table)

table_meta = [
{"metadata": dict(meta), "tables": tables}
for meta, tables in transformed_metadata.items()
]

metadata_depends_on = functools.reduce(
lambda x, y: x.union({f"metrics.metrics_metadata_{ident}" for ident in y}),
transformed_metadata.values(),
set(),
)

for elem in table_meta:
meta = elem["metadata"]
tables = elem["tables"]

for ident in tables:
MacroOverridingModel(
additional_macros=[],
override_module_path=override_module_path,
override_path=override_path,
locals=dict(
db=self.catalog,
table=ident,
metadata=meta,
),
name=f"metrics.metrics_metadata_{ident}",
is_sql=True,
kind="VIEW",
dialect="clickhouse",
columns=constants.METRIC_METADATA_COLUMNS,
enabled=self._raw_options.get("enabled", True),
)(map_metadata_to_metric)
for metric_key, metric_value in self._raw_options.get("metric_queries").items():
if not metric_value.metadata:
continue

MacroOverridingModel(
additional_macros=[],
depends_on=[],
override_module_path=override_module_path,
override_path=override_path,
locals={
"metric": metric_key,
"metadata": asdict(metric_value.metadata),
},
name=f"metrics.metrics_metadata_{metric_key}",
is_sql=True,
kind="FULL",
dialect="clickhouse",
columns=constants.METRIC_METADATA_COLUMNS,
enabled=self._raw_options.get("enabled", True),
)(map_metadata_to_metric)

MacroOverridingModel(
additional_macros=[],
Expand All @@ -431,7 +425,7 @@ def generate_models(self, calling_file: str):
locals={},
name="metrics.metrics_metadata",
is_sql=True,
kind="VIEW",
kind="FULL",
dialect="clickhouse",
columns=constants.METRIC_METADATA_COLUMNS,
enabled=self._raw_options.get("enabled", True),
Expand Down
25 changes: 5 additions & 20 deletions warehouse/metrics_tools/factory/proxy/proxies.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,31 +114,16 @@ def join_all_of_entity_type(
def map_metadata_to_metric(
evaluator: MacroEvaluator,
):
db = t.cast(str, evaluator.var("db"))
table = t.cast(str, evaluator.var("table"))
metric = t.cast(str, evaluator.var("metric"))
metadata = t.cast(t.Dict[str, t.Any], evaluator.var("metadata"))

description = metadata["description"]
display_name = metadata["display_name"]

metrics_alias = exp.Concat(
expressions=[
exp.to_column("event_source"),
exp.Literal(this="_", is_string=True),
exp.to_column("metric"),
],
safe=False,
coalesce=False,
).as_("metric")

return (
exp.select(
exp.Literal(this=display_name, is_string=True).as_("display_name"),
exp.Literal(this=description, is_string=True).as_("description"),
metrics_alias,
)
.from_(sql.to_table(f"{db}.{table}"))
.distinct()
return exp.select(
exp.Literal(this=display_name, is_string=True).as_("display_name"),
exp.Literal(this=description, is_string=True).as_("description"),
exp.Literal(this=metric, is_string=True).as_("metric"),
)


Expand Down

0 comments on commit f51a5f9

Please sign in to comment.