Skip to content

Commit

Permalink
feat: add insights metric
Browse files Browse the repository at this point in the history
Save number of insights, grouped by the following fields
Every night at 1AM
  • Loading branch information
raphael0202 committed Dec 1, 2022
1 parent aca7a39 commit 9cc8bb6
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 5 deletions.
59 changes: 55 additions & 4 deletions robotoff/metrics.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
import datetime
from typing import List, Optional
from typing import Optional
from urllib.parse import urlparse

import requests
from influxdb_client import InfluxDBClient
from influxdb_client.client.write_api import SYNCHRONOUS
from peewee import fn

from robotoff import settings
from robotoff.models import ProductInsight, with_db
from robotoff.utils import get_logger, http_session

logger = get_logger(__name__)

URL_PATHS: List[str] = [
URL_PATHS: list[str] = [
"/ingredients-analysis?json=1",
"/data-quality?json=1",
"/ingredients?stats=1&json=1",
Expand Down Expand Up @@ -142,8 +144,8 @@ def generate_metrics_from_path(
target_datetime: datetime.datetime,
count: Optional[int] = None,
facet: Optional[str] = None,
) -> List:
inserts: List = []
) -> list[dict]:
inserts: list[dict] = []
url = settings.BaseURLProvider().country(country_tag + "-en").get() + path

if facet is None:
Expand Down Expand Up @@ -192,3 +194,52 @@ def generate_metrics_from_path(
}
)
return inserts


def save_insight_metrics():
"""Save number of insights, grouped by the following fields:
- type
- annotation
- automatic_processing
- predictor
- reserved_barcode
"""
target_datetime = datetime.datetime.now()

client = get_influx_client()
if client is not None:
write_client = client.write_api(write_options=SYNCHRONOUS)

for inserts in (generate_insight_metrics(target_datetime),):
write_client.write(bucket=settings.INFLUXDB_BUCKET, record=inserts)


@with_db
def generate_insight_metrics(target_datetime: datetime.datetime) -> list[dict]:
group_by_fields = [
ProductInsight.type,
ProductInsight.annotation,
ProductInsight.automatic_processing,
ProductInsight.predictor,
ProductInsight.reserved_barcode,
]
inserts = []
for item in (
ProductInsight.select(
*group_by_fields,
fn.COUNT(ProductInsight.id).alias("count"),
)
.group_by(*group_by_fields)
.dicts()
.iterator()
):
count = item.pop("count")
inserts.append(
{
"measurement": "insights",
"tags": item,
"time": target_datetime.isoformat(),
"fields": {"count": count},
}
)
return inserts
7 changes: 6 additions & 1 deletion robotoff/scheduler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@
InsightAnnotatorFactory,
)
from robotoff.insights.importer import import_insights
from robotoff.metrics import ensure_influx_database, save_facet_metrics
from robotoff.metrics import (
ensure_influx_database,
save_facet_metrics,
save_insight_metrics,
)
from robotoff.models import ProductInsight, with_db
from robotoff.prediction.category.matcher import predict_from_dataset
from robotoff.products import (
Expand Down Expand Up @@ -270,6 +274,7 @@ def run():

# This job exports daily product metrics for monitoring.
scheduler.add_job(save_facet_metrics, "cron", day="*", hour=1, max_instances=1)
scheduler.add_job(save_insight_metrics, "cron", day="*", hour=1, max_instances=1)

# This job refreshes data needed to generate insights.
scheduler.add_job(_update_data, "cron", day="*", hour="3", max_instances=1)
Expand Down

0 comments on commit 9cc8bb6

Please sign in to comment.