Skip to content

Commit

Permalink
fix: remove automatic processing disabling
Browse files Browse the repository at this point in the history
This rule was overkill, and prevented many insights from being applied
  • Loading branch information
raphael0202 committed Dec 26, 2022
1 parent 2031fa4 commit 081f4a9
Show file tree
Hide file tree
Showing 4 changed files with 0 additions and 112 deletions.
7 changes: 0 additions & 7 deletions doc/explanations/predictions.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,4 @@ The importer must have a `generate_candidates` method that returns a list of can

From the list of candidate insight, we update the `product_insight` table, by deleting all insights and importing all candidates. The import mechanism is actually a bit smarter, we avoid unnecessary DB insertion/deletion by trying to match each candidate with a reference insight — an insight that is already in DB.

For each insight, if `automatic_processing=True`, we check that the insight can indeed be applied automatically: we don't want to apply insights based on very old images that are not relevant anymore.

We keep automatic processing on if any of the following conditions are true:

- the image is a selected image, in any language (`front`, `nutrition`, `ingredients`, `packaging` )
- the image has been uploaded no more than 120 days (3 months) before the last uploaded image.

Once insights are refreshed/imported, they become available as questions in `/questions/*` API routes. Automatically processable insights are applied by the scheduler, once `current_timestamp >= ${process_after}`.
35 changes: 0 additions & 35 deletions robotoff/insights/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,37 +108,6 @@ def is_valid_insight_image(images: dict[str, Any], source_image: Optional[str]):
return image_id.isdigit() and image_id in images


def is_trustworthy_insight_image(
images: dict[str, Any],
source_image: Optional[str],
max_timedelta: datetime.timedelta = settings.IMAGE_MAX_TIMEDELTA,
):
"""Return True if the source image is trustworthy for insight generation,
- the image ID is a digit and is referenced in `images`
- the image is either selected or recent enough
If `source_image` is None, we always consider the insight as trustworthy.
Insights considered as trustworthy can have automatic_processing = True.
:param images: The image dict as stored in MongoDB.
:param source_image: The insight source image, should be the path of the
image path or None.
:param max_timedelta: Maximum timedelta between most recent image and
source image, default settings.IMAGE_MAX_TIMEDELTA.
"""
if source_image is None:
return True

image_id = Path(source_image).stem

if not image_id.isdigit() or image_id not in images:
return False

return is_selected_image(images, image_id) or is_recent_image(
images, image_id, max_timedelta
)


def get_existing_insight(
insight_type: InsightType, barcode: str, server_domain: str
) -> list[ProductInsight]:
Expand Down Expand Up @@ -361,10 +330,6 @@ def generate_insights(
)
candidate.automatic_processing = False

if not is_trustworthy_insight_image(product.images, candidate.source_image):
# Don't process automatically if the insight image is not
# trustworthy (too old and not selected)
candidate.automatic_processing = False
if candidate.data.get("is_annotation"):
username = candidate.data.get("username")
if username:
Expand Down
10 changes: 0 additions & 10 deletions robotoff/settings.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import copy
import datetime
import logging
import os
from pathlib import Path
Expand Down Expand Up @@ -285,15 +284,6 @@ def init_sentry(integrations: Optional[list[Integration]] = None):
TEST_DIR = PROJECT_DIR / "tests"
TEST_DATA_DIR = TEST_DIR / "unit/data"

# Maximum interval between the upload of the insight image and the upload of
# the most recent image of the product to consider the insight image as
# trustworthy. Insights with untrustworthy images cannot be applied
# automatically.
# For example if days=120, it means that an insight based on an image that is
# more than 120 days older than the most recent product image cannot be applied
# automatically.
IMAGE_MAX_TIMEDELTA = datetime.timedelta(days=120)

# Number of minutes to wait before processing an insight automatically
INSIGHT_AUTOMATIC_PROCESSING_WAIT = int(
os.environ.get("INSIGHT_AUTOMATIC_PROCESSING_WAIT", 10)
Expand Down
60 changes: 0 additions & 60 deletions tests/unit/insights/test_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import_insights_for_products,
is_recent_image,
is_selected_image,
is_trustworthy_insight_image,
is_valid_insight_image,
select_deepest_taxonomized_candidates,
sort_predictions,
Expand Down Expand Up @@ -86,65 +85,6 @@ def test_is_selected_image(images, image_id, expected):
assert is_selected_image(images, image_id) is expected


@pytest.mark.parametrize(
"images,image_id,max_timedelta,expected",
[
(
{"1": {"uploaded_t": DEFAULT_UPLOADED_T}},
"1",
datetime.timedelta(seconds=10),
True,
),
(
{
"1": {"uploaded_t": DEFAULT_UPLOADED_T},
"2": {"uploaded_t": str(int(DEFAULT_UPLOADED_T) + 9)},
},
"1",
datetime.timedelta(seconds=10),
True,
),
(
{
"1": {"uploaded_t": DEFAULT_UPLOADED_T},
"2": {"uploaded_t": str(int(DEFAULT_UPLOADED_T) + 11)},
},
"1",
datetime.timedelta(seconds=10),
False,
),
(
{
"1": {"uploaded_t": DEFAULT_UPLOADED_T},
"2": {"uploaded_t": DEFAULT_UPLOADED_T},
"ingredients_fr": {"imgid": "1"},
},
"1",
datetime.timedelta(seconds=10),
True,
),
(
{
"2": {"uploaded_t": DEFAULT_UPLOADED_T},
},
"1",
datetime.timedelta(seconds=10),
False,
),
(
{
"1": {"uploaded_t": DEFAULT_UPLOADED_T},
},
"front_fr",
datetime.timedelta(seconds=10),
False,
),
],
)
def test_is_trustworthy_insight_image(images, image_id, max_timedelta, expected):
assert is_trustworthy_insight_image(images, image_id, max_timedelta) is expected


@pytest.mark.parametrize(
"images,image_id,expected",
[
Expand Down

0 comments on commit 081f4a9

Please sign in to comment.