Skip to content

Commit

Permalink
fix: give credit to annotator when annotating logos
Browse files Browse the repository at this point in the history
  • Loading branch information
raphael0202 committed Dec 28, 2022
1 parent 4268939 commit 02f7e2e
Show file tree
Hide file tree
Showing 15 changed files with 291 additions and 196 deletions.
127 changes: 58 additions & 69 deletions robotoff/app/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
get_logo_annotation,
get_predictions,
save_annotation,
update_logo_annotations,
)
from robotoff.app.middleware import DBConnectionMiddleware
from robotoff.elasticsearch import get_es_client
Expand All @@ -45,6 +46,7 @@
LogoEmbedding,
ProductInsight,
batch_insert,
db,
)
from robotoff.off import (
OFFAuthentication,
Expand Down Expand Up @@ -749,6 +751,16 @@ def on_get(self, req: falcon.Request, resp: falcon.Response):
resp.media = {"logos": items, "count": query_count}


def check_logo_annotation(type_: str, value: Optional[str] = None):
if value is not None:
if type_ == "label" and not is_prefixed_value(value):
raise falcon.HTTPBadRequest(
description=f"language-prefixed value are required for label type (here: {value})"
)
elif type_ in ("brand", "category", "label", "store"):
raise falcon.HTTPBadRequest(description=f"value required for type {type_})")


class ImageLogoDetailResource:
def on_get(self, req: falcon.Request, resp: falcon.Response, logo_id: int):
logo = LogoAnnotation.get_or_none(id=logo_id)
Expand All @@ -770,38 +782,25 @@ def on_put(self, req: falcon.Request, resp: falcon.Response, logo_id: int):
description="authentication is required to annotate logos"
)

logo = LogoAnnotation.get_or_none(id=logo_id)

if logo is None:
resp.status = falcon.HTTP_404
return

type_ = req.media["type"]
value = req.media["value"] or None
updated = False

if type_ != logo.annotation_type:
logo.annotation_type = type_
updated = True

if value != logo.annotation_value:
logo.annotation_value = value

if value is not None:
value_tag = get_tag(value)
logo.annotation_value_tag = value_tag
logo.taxonomy_value = match_taxonomized_value(value_tag, type_)
else:
logo.annotation_value_tag = None
logo.taxonomy_value = None
with db.atomic():
logo = LogoAnnotation.get_or_none(id=logo_id)
if logo is None:
resp.status = falcon.HTTP_404
return

updated = True
type_ = req.media["type"]
value = req.media["value"] or None
check_logo_annotation(type_, value)

if updated:
logo.username = auth.get_username()
logo.completed_at = datetime.datetime.utcnow()
logo.save()
generate_insights_from_annotated_logos([logo], settings.OFF_SERVER_DOMAIN)
if type_ != logo.annotation_type or value != logo.annotation_value:
annotated_logos = update_logo_annotations(
[(type_, value, logo)],
username=auth.get_username() or "unknown",
completed_at=datetime.datetime.utcnow(),
)
generate_insights_from_annotated_logos(
annotated_logos, settings.OFF_SERVER_DOMAIN, auth
)

resp.status = falcon.HTTP_204

Expand All @@ -816,47 +815,37 @@ def on_post(self, req: falcon.Request, resp: falcon.Response):
)
server_domain = req.media.get("server_domain", settings.OFF_SERVER_DOMAIN)
annotations = req.media["annotations"]
username = auth.get_username()
completed_at = datetime.datetime.utcnow()
annotated_logos = []

for annotation in annotations:
logo_id = annotation["logo_id"]
type_ = annotation["type"]
value = annotation["value"] or None
try:
logo = LogoAnnotation.get_by_id(logo_id)
except LogoAnnotation.DoesNotExist:
raise falcon.HTTPNotFound(description=f"logo {logo_id} not found")

if logo.annotation_type is not None:
# Logo is already annotated, skip
continue

if value is not None:
if type_ == "label" and not is_prefixed_value(value):
raise falcon.HTTPBadRequest(
description=f"language-prefixed value are required for label type (here: {value})"
)
logo.annotation_value = value
value_tag = get_tag(value)
logo.annotation_value_tag = value_tag
logo.taxonomy_value = match_taxonomized_value(value_tag, type_)
elif type_ in ("brand", "category", "label", "store"):
raise falcon.HTTPBadRequest(
description=f"value required for type {type_} (logo {logo_id})"
annotation_logos = []

with db.atomic():
for annotation in annotations:
logo_id = annotation["logo_id"]
type_ = annotation["type"]
value = annotation["value"] or None
check_logo_annotation(type_, value)

try:
logo = LogoAnnotation.get_by_id(logo_id)
except LogoAnnotation.DoesNotExist:
raise falcon.HTTPNotFound(description=f"logo {logo_id} not found")

if logo.annotation_type is None:
# Don't annotate already annotated logos
annotation_logos.append((type_, value, logo))

if annotation_logos:
annotated_logos = update_logo_annotations(
annotation_logos,
username=auth.get_username() or "unknown",
completed_at=completed_at,
)

logo.annotation_type = type_
logo.username = username
logo.completed_at = completed_at
annotated_logos.append(logo)

for logo in annotated_logos:
logo.save()

created = generate_insights_from_annotated_logos(annotated_logos, server_domain)
resp.media = {"created insights": created}
annotated = generate_insights_from_annotated_logos(
annotated_logos, server_domain, auth
)
else:
annotated = 0
resp.media = {"created insights": annotated}


class ImageLogoUpdateResource:
Expand Down
36 changes: 36 additions & 0 deletions robotoff/app/core.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import datetime
import functools
from enum import Enum
from typing import Iterable, NamedTuple, Optional, Union
Expand All @@ -24,7 +25,9 @@
db,
)
from robotoff.off import OFFAuthentication
from robotoff.taxonomy import match_taxonomized_value
from robotoff.utils import get_logger
from robotoff.utils.text import get_tag

logger = get_logger(__name__)

Expand Down Expand Up @@ -434,3 +437,36 @@ def get_logo_annotation(
return query.count()
else:
return query.iterator()


def update_logo_annotations(
annotation_logos: list[tuple[str, Optional[str], LogoAnnotation]],
username: str,
completed_at: datetime.datetime,
) -> list[LogoAnnotation]:
updated_fields = set()
updated_logos = []
for type_, value, logo in annotation_logos:
logo.annotation_type = type_
updated_fields.add("annotation_type")

if value is not None:
value_tag = get_tag(value)
logo.annotation_value = value
logo.annotation_value_tag = value_tag
logo.taxonomy_value = match_taxonomized_value(value_tag, type_)
logo.username = username
logo.completed_at = completed_at
updated_fields |= {
"annotation_value",
"annotation_value_tag",
"taxonomy_value",
"username",
"completed_at",
}
updated_logos.append(logo)

if updated_logos:
LogoAnnotation.bulk_update(updated_logos, fields=list(updated_fields))

return updated_logos
15 changes: 6 additions & 9 deletions robotoff/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,10 @@ def regenerate_ocr_insights(
)

with db:
imported = importer.import_insights(predictions, settings.OFF_SERVER_DOMAIN)

logger.info("Import finished, %s insights imported", imported)
import_result = importer.import_insights(
predictions, settings.OFF_SERVER_DOMAIN
)
logger.info(import_result)


@app.command()
Expand Down Expand Up @@ -200,20 +201,16 @@ def import_insights(
else:
raise ValueError("--generate-from or --input-path must be provided")

imported = 0
with db.connection_context():
for prediction_batch in tqdm.tqdm(
chunked(predictions, batch_size), desc="prediction batch"
):
# Create a new transaction for every batch
with db.atomic():
batch_imported = importer.import_insights(
import_results = importer.import_insights(
prediction_batch, settings.OFF_SERVER_DOMAIN
)
logger.info(f"{batch_imported} insights imported in batch")
imported += batch_imported

logger.info(f"{imported} insights imported")
logger.info(import_results)


@app.command()
Expand Down
Loading

0 comments on commit 02f7e2e

Please sign in to comment.