Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: trino metadata performance issues #3123

Merged
merged 3 commits into from
Feb 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 1 addition & 15 deletions warehouse/metrics_mesh/models/marts/metrics/metrics_v0.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,6 @@ MODEL (
)
);

@DEF(MAX_STAGE_OVERRIDE, 550);

-- TODO(jabolo): Remove Trino session logic once #3117 lands
@IF(
@OR(@gateway = 'trino', @gateway = 'local-trino'),
SET SESSION query_max_stage_count = @MAX_STAGE_OVERRIDE
);

WITH unioned_metric_names AS (
SELECT *
FROM metrics.int_metric_names_from_artifact
Expand Down Expand Up @@ -55,7 +47,7 @@ WITH unioned_metric_names AS (
'TODO' AS definition_ref,
'UNKNOWN' AS aggregation_function
FROM all_timeseries_metric_names t
LEFT JOIN all_metrics_metadata m ON t.metric = m.metric
LEFT JOIN all_metrics_metadata m ON t.metric LIKE '%' || m.metric || '%'
)
SELECT
metric_id::TEXT,
Expand All @@ -68,9 +60,3 @@ SELECT
definition_ref::TEXT,
aggregation_function::TEXT
FROM metrics_v0_no_casting;

-- TODO(jabolo): Remove Trino session logic once #3117 lands
@IF(
@OR(@gateway = 'trino', @gateway = 'local-trino'),
RESET SESSION query_max_stage_count
);
54 changes: 24 additions & 30 deletions warehouse/metrics_tools/factory/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,9 +379,9 @@ def generate_models(self, calling_file: str):
)(join_all_of_entity_type)

raw_table_metadata = {
key: asdict(value["metadata"])
for key, value in self._rendered_queries.items()
if value["metadata"] is not None
key: asdict(value.metadata)
for key, value in self._raw_options.get("metric_queries").items()
if value.metadata is not None
}

transformed_metadata = defaultdict(list)
Expand All @@ -390,38 +390,32 @@ def generate_models(self, calling_file: str):
metadata_tuple = tuple(metadata.items())
transformed_metadata[metadata_tuple].append(table)

table_meta = [
{"metadata": dict(meta), "tables": tables}
for meta, tables in transformed_metadata.items()
]

metadata_depends_on = functools.reduce(
lambda x, y: x.union({f"metrics.metrics_metadata_{ident}" for ident in y}),
transformed_metadata.values(),
set(),
)

for elem in table_meta:
meta = elem["metadata"]
tables = elem["tables"]

for ident in tables:
MacroOverridingModel(
additional_macros=[],
override_module_path=override_module_path,
override_path=override_path,
locals=dict(
db=self.catalog,
table=ident,
metadata=meta,
),
name=f"metrics.metrics_metadata_{ident}",
is_sql=True,
kind="VIEW",
dialect="clickhouse",
columns=constants.METRIC_METADATA_COLUMNS,
enabled=self._raw_options.get("enabled", True),
)(map_metadata_to_metric)
for metric_key, metric_value in self._raw_options.get("metric_queries").items():
if not metric_value.metadata:
continue

MacroOverridingModel(
additional_macros=[],
depends_on=[],
override_module_path=override_module_path,
override_path=override_path,
locals={
"metric": metric_key,
"metadata": asdict(metric_value.metadata),
},
name=f"metrics.metrics_metadata_{metric_key}",
is_sql=True,
kind="FULL",
dialect="clickhouse",
columns=constants.METRIC_METADATA_COLUMNS,
enabled=self._raw_options.get("enabled", True),
)(map_metadata_to_metric)

MacroOverridingModel(
additional_macros=[],
Expand All @@ -431,7 +425,7 @@ def generate_models(self, calling_file: str):
locals={},
name="metrics.metrics_metadata",
is_sql=True,
kind="VIEW",
kind="FULL",
dialect="clickhouse",
columns=constants.METRIC_METADATA_COLUMNS,
enabled=self._raw_options.get("enabled", True),
Expand Down
25 changes: 5 additions & 20 deletions warehouse/metrics_tools/factory/proxy/proxies.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,31 +114,16 @@ def join_all_of_entity_type(
def map_metadata_to_metric(
evaluator: MacroEvaluator,
):
db = t.cast(str, evaluator.var("db"))
table = t.cast(str, evaluator.var("table"))
metric = t.cast(str, evaluator.var("metric"))
metadata = t.cast(t.Dict[str, t.Any], evaluator.var("metadata"))

description = metadata["description"]
display_name = metadata["display_name"]

metrics_alias = exp.Concat(
expressions=[
exp.to_column("event_source"),
exp.Literal(this="_", is_string=True),
exp.to_column("metric"),
],
safe=False,
coalesce=False,
).as_("metric")

return (
exp.select(
exp.Literal(this=display_name, is_string=True).as_("display_name"),
exp.Literal(this=description, is_string=True).as_("description"),
metrics_alias,
)
.from_(sql.to_table(f"{db}.{table}"))
.distinct()
return exp.select(
exp.Literal(this=display_name, is_string=True).as_("display_name"),
exp.Literal(this=description, is_string=True).as_("description"),
exp.Literal(this=metric, is_string=True).as_("metric"),
)


Expand Down
Loading