Skip to content

Commit

Permalink
add: display_name and description to metrics_v0 (#3076)
Browse files Browse the repository at this point in the history
* add: metric `metadata` to metric factories

* add: `display_name` and `description` to `metrics_v0`

* add: metric `metadata` class def

* add: metric `metadata` column def

* add: generate `metadata` table in sqlmesh factory

* add: map `metadata` to metric query builder

* add: dynamically generated `metadata` models

* add: sqlmesh `metadata` proxy functions

* fix: import `functools` inside sqlmesh proxy
  • Loading branch information
Jabolol authored Feb 25, 2025
1 parent 5c42afb commit 085437d
Show file tree
Hide file tree
Showing 6 changed files with 272 additions and 8 deletions.
17 changes: 12 additions & 5 deletions warehouse/metrics_mesh/models/marts/metrics/metrics_v0.sql
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,25 @@ WITH unioned_metric_names AS (
SELECT DISTINCT
metric
FROM unioned_metric_names
), all_metrics_metadata AS (
SELECT
metric,
display_name,
description
FROM metrics.metrics_metadata
), metrics_v0_no_casting AS (
SELECT
@oso_id('OSO', 'oso', metric) AS metric_id,
@oso_id('OSO', 'oso', t.metric) AS metric_id,
'OSO' AS metric_source,
'oso' AS metric_namespace,
metric AS metric_name,
metric AS display_name,
'TODO' AS description,
t.metric AS metric_name,
COALESCE(m.display_name, t.metric) AS display_name,
COALESCE(m.description, 'TODO') AS description,
NULL AS raw_definition,
'TODO' AS definition_ref,
'UNKNOWN' AS aggregation_function
FROM all_timeseries_metric_names
FROM all_timeseries_metric_names t
LEFT JOIN all_metrics_metadata m ON t.metric = m.metric
)
SELECT
metric_id::TEXT,
Expand Down
97 changes: 97 additions & 0 deletions warehouse/metrics_mesh/models/metrics_factories.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os

from dotenv import load_dotenv
from metrics_tools.definition import MetricMetadata
from metrics_tools.factory import MetricQueryDef, RollingConfig, timeseries_metrics

# Annoyingly sqlmesh doesn't load things in an expected order but we want to be
Expand Down Expand Up @@ -36,6 +37,10 @@
),
entity_types=["artifact", "project", "collection"],
over_all_time=True,
metadata=MetricMetadata(
display_name="Stars",
description="Metrics related to GitHub stars",
),
),
"commits": MetricQueryDef(
ref="code/commits.sql",
Expand All @@ -47,41 +52,73 @@
slots=8,
),
over_all_time=True,
metadata=MetricMetadata(
display_name="Commits",
description="Metrics related to GitHub commits",
),
),
"comments": MetricQueryDef(
ref="code/comments.sql",
time_aggregations=["daily", "weekly", "monthly"],
over_all_time=True,
metadata=MetricMetadata(
display_name="Comments",
description="Metrics related to GitHub comments",
),
),
"releases": MetricQueryDef(
ref="code/releases.sql",
time_aggregations=["daily", "weekly", "monthly"],
over_all_time=True,
metadata=MetricMetadata(
display_name="Releases",
description="Metrics related to GitHub releases",
),
),
"forks": MetricQueryDef(
ref="code/forks.sql",
time_aggregations=["daily", "weekly", "monthly"],
over_all_time=True,
metadata=MetricMetadata(
display_name="Forks",
description="Metrics related to GitHub repository forks",
),
),
"repositories": MetricQueryDef(
ref="code/repositories.sql",
time_aggregations=["daily", "weekly", "monthly"],
over_all_time=True,
metadata=MetricMetadata(
display_name="Repositories",
description="Metrics related to GitHub repositories",
),
),
"active_contracts": MetricQueryDef(
ref="blockchain/active_contracts.sql",
time_aggregations=["daily", "weekly", "monthly"],
over_all_time=True,
metadata=MetricMetadata(
display_name="Active Contracts",
description="Metrics related to active blockchain contracts",
),
),
"contributors": MetricQueryDef(
ref="code/contributors.sql",
time_aggregations=["daily", "weekly", "monthly"],
over_all_time=True,
metadata=MetricMetadata(
display_name="Contributors",
description="Metrics related to GitHub contributors",
),
),
"active_developers": MetricQueryDef(
ref="code/active_developers.sql",
time_aggregations=["daily", "weekly", "monthly"],
over_all_time=True,
metadata=MetricMetadata(
display_name="Active Developers",
description="Metrics related to active GitHub developers",
),
),
# This defines something with a rolling option that allows you to look back
# to some arbitrary window. So you specify the window and specify the unit.
Expand Down Expand Up @@ -134,6 +171,10 @@
cron="@monthly",
slots=32,
),
metadata=MetricMetadata(
display_name="Developer Classifications",
description="Metrics related to developer activity classifications",
),
),
"contributor_classifications": MetricQueryDef(
ref="code/contributor_activity_classification.sql",
Expand All @@ -152,6 +193,10 @@
cron="@monthly",
slots=32,
),
metadata=MetricMetadata(
display_name="Contributor Classifications",
description="Metrics related to contributor activity classifications",
),
),
# Currently this query performs really poorly. We need to do some debugging on it
# "user_retention_classifications": MetricQueryDef(
Expand All @@ -174,6 +219,10 @@
cron="@monthly",
slots=32,
),
metadata=MetricMetadata(
display_name="Change in Developer Activity",
description="Metrics related to change in developer activity",
),
),
"opened_pull_requests": MetricQueryDef(
ref="code/prs_opened.sql",
Expand All @@ -185,6 +234,10 @@
),
entity_types=["artifact", "project", "collection"],
over_all_time=True,
metadata=MetricMetadata(
display_name="Opened Pull Requests",
description="Metrics related to opened GitHub pull requests",
),
),
"merged_pull_requests": MetricQueryDef(
ref="code/prs_merged.sql",
Expand All @@ -196,6 +249,10 @@
),
entity_types=["artifact", "project", "collection"],
over_all_time=True,
metadata=MetricMetadata(
display_name="Merged Pull Requests",
description="Metrics related to merged GitHub pull requests",
),
),
"opened_issues": MetricQueryDef(
ref="code/issues_opened.sql",
Expand All @@ -207,6 +264,10 @@
),
entity_types=["artifact", "project", "collection"],
over_all_time=True,
metadata=MetricMetadata(
display_name="Opened Issues",
description="Metrics related to opened GitHub issues",
),
),
"closed_issues": MetricQueryDef(
ref="code/issues_closed.sql",
Expand All @@ -218,6 +279,10 @@
),
entity_types=["artifact", "project", "collection"],
over_all_time=True,
metadata=MetricMetadata(
display_name="Closed Issues",
description="Metrics related to closed GitHub issues",
),
),
"avg_prs_time_to_merge": MetricQueryDef(
ref="code/prs_time_to_merge.sql",
Expand All @@ -229,6 +294,10 @@
),
entity_types=["artifact", "project", "collection"],
over_all_time=True,
metadata=MetricMetadata(
display_name="Average PR Time to Merge",
description="Metrics related to average GitHub PR time to merge",
),
),
"avg_time_to_first_response": MetricQueryDef(
ref="code/time_to_first_response.sql",
Expand All @@ -240,6 +309,10 @@
),
entity_types=["artifact", "project", "collection"],
over_all_time=True,
metadata=MetricMetadata(
display_name="Average Time to First Response",
description="Metrics related to average time to first response",
),
),
"active_addresses_aggregation": MetricQueryDef(
ref="blockchain/active_addresses.sql",
Expand All @@ -254,6 +327,10 @@
),
time_aggregations=["daily", "monthly"],
over_all_time=True,
metadata=MetricMetadata(
display_name="Active Addresses Aggregation",
description="Metrics related to active blockchain addresses",
),
),
"gas_fees": MetricQueryDef(
ref="blockchain/gas_fees.sql",
Expand All @@ -265,6 +342,10 @@
),
entity_types=["artifact", "project", "collection"],
over_all_time=True,
metadata=MetricMetadata(
display_name="Gas Fees",
description="Metrics related to blockchain gas fees",
),
),
"transactions": MetricQueryDef(
ref="blockchain/transactions.sql",
Expand All @@ -276,6 +357,10 @@
),
entity_types=["artifact", "project", "collection"],
over_all_time=True,
metadata=MetricMetadata(
display_name="Transactions",
description="Metrics related to blockchain transactions",
),
),
"contributors_lifecycle": MetricQueryDef(
ref="code/lifecycle.sql",
Expand All @@ -294,6 +379,10 @@
slots=32,
),
entity_types=["artifact", "project", "collection"],
metadata=MetricMetadata(
display_name="Contributors Lifecycle",
description="Metrics related to contributor lifecycle",
),
),
"funding_received": MetricQueryDef(
ref="funding/funding_received.sql",
Expand All @@ -305,6 +394,10 @@
),
entity_types=["artifact", "project", "collection"],
over_all_time=True,
metadata=MetricMetadata(
display_name="Funding Received",
description="Metrics related to funding received",
),
),
"dependencies": MetricQueryDef(
ref="deps/dependencies.sql",
Expand All @@ -316,6 +409,10 @@
),
entity_types=["artifact", "project", "collection"],
over_all_time=True,
metadata=MetricMetadata(
display_name="Dependencies",
description="Metrics related to dependencies",
),
),
},
default_dialect="clickhouse",
Expand Down
8 changes: 8 additions & 0 deletions warehouse/metrics_tools/definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@ def assert_allowed_items_in_list[T](to_validate: t.List[T], allowed_items: t.Lis
assert item in allowed_items, "List contains invalid items"


@dataclass(kw_only=True)
class MetricMetadata:
description: str
display_name: str


@dataclass(kw_only=True)
class MetricQueryDef:
# The relative path to the query in `oso_metrics`
Expand All @@ -166,6 +172,8 @@ class MetricQueryDef:

use_python_model: bool = True

metadata: t.Optional[MetricMetadata] = None

def raw_sql(self, queries_dir: str):
return open(os.path.join(queries_dir, self.ref)).read()

Expand Down
6 changes: 6 additions & 0 deletions warehouse/metrics_tools/factory/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,9 @@
"amount": exp.DataType.build("DOUBLE", dialect="duckdb"),
},
}

METRIC_METADATA_COLUMNS: t.Dict[str, exp.DataType] = {
"display_name": exp.DataType.build("STRING", dialect="duckdb"),
"description": exp.DataType.build("STRING", dialect="duckdb"),
"metric": exp.DataType.build("STRING", dialect="duckdb"),
}
Loading

0 comments on commit 085437d

Please sign in to comment.