fix: remove automatic processing disabling

This rule was overkill, and prevented many insights from being applied
openfoodfacts · Dec 26, 2022 · 081f4a9 · 081f4a9
1 parent 2031fa4
commit 081f4a9
Show file tree

Hide file tree

Showing 4 changed files with 0 additions and 112 deletions.
diff --git a/doc/explanations/predictions.md b/doc/explanations/predictions.md
@@ -60,11 +60,4 @@ The importer must have a `generate_candidates` method that returns a list of can
 
 From the list of candidate insight, we update the `product_insight` table, by deleting all insights and importing all candidates. The import mechanism is actually a bit smarter, we avoid unnecessary DB insertion/deletion by trying to match each candidate with a reference insight — an insight that is already in DB.
 
-For each insight, if `automatic_processing=True`, we check that the insight can indeed be applied automatically: we don't want to apply insights based on very old images that are not relevant anymore.
-
-We keep automatic processing on if any of the following conditions are true:
-
-- the image is a selected image, in any language (`front`, `nutrition`, `ingredients`, `packaging` )
-- the image has been uploaded no more than 120 days (3 months) before the last uploaded image.
-
 Once insights are refreshed/imported, they become available as questions in `/questions/*` API routes. Automatically processable insights are applied by the scheduler, once `current_timestamp >= ${process_after}`.
diff --git a/robotoff/insights/importer.py b/robotoff/insights/importer.py
@@ -108,37 +108,6 @@ def is_valid_insight_image(images: dict[str, Any], source_image: Optional[str]):
     return image_id.isdigit() and image_id in images
 
 
-def is_trustworthy_insight_image(
-    images: dict[str, Any],
-    source_image: Optional[str],
-    max_timedelta: datetime.timedelta = settings.IMAGE_MAX_TIMEDELTA,
-):
-    """Return True if the source image is trustworthy for insight generation,
-      - the image ID is a digit and is referenced in `images`
-      - the image is either selected or recent enough
-
-    If `source_image` is None, we always consider the insight as trustworthy.
-    Insights considered as trustworthy can have automatic_processing = True.
-
-    :param images: The image dict as stored in MongoDB.
-    :param source_image: The insight source image, should be the path of the
-    image path or None.
-    :param max_timedelta: Maximum timedelta between most recent image and
-    source image, default settings.IMAGE_MAX_TIMEDELTA.
-    """
-    if source_image is None:
-        return True
-
-    image_id = Path(source_image).stem
-
-    if not image_id.isdigit() or image_id not in images:
-        return False
-
-    return is_selected_image(images, image_id) or is_recent_image(
-        images, image_id, max_timedelta
-    )
-
-
 def get_existing_insight(
     insight_type: InsightType, barcode: str, server_domain: str
 ) -> list[ProductInsight]:
@@ -361,10 +330,6 @@ def generate_insights(
                 )
                 candidate.automatic_processing = False
 
-            if not is_trustworthy_insight_image(product.images, candidate.source_image):
-                # Don't process automatically if the insight image is not
-                # trustworthy (too old and not selected)
-                candidate.automatic_processing = False
             if candidate.data.get("is_annotation"):
                 username = candidate.data.get("username")
                 if username:

diff --git a/robotoff/settings.py b/robotoff/settings.py
@@ -1,5 +1,4 @@
 import copy
-import datetime
 import logging
 import os
 from pathlib import Path
@@ -285,15 +284,6 @@ def init_sentry(integrations: Optional[list[Integration]] = None):
 TEST_DIR = PROJECT_DIR / "tests"
 TEST_DATA_DIR = TEST_DIR / "unit/data"
 
-# Maximum interval between the upload of the insight image and the upload of
-# the most recent image of the product to consider the insight image as
-# trustworthy. Insights with untrustworthy images cannot be applied
-# automatically.
-# For example if days=120, it means that an insight based on an image that is
-# more than 120 days older than the most recent product image cannot be applied
-# automatically.
-IMAGE_MAX_TIMEDELTA = datetime.timedelta(days=120)
-
 # Number of minutes to wait before processing an insight automatically
 INSIGHT_AUTOMATIC_PROCESSING_WAIT = int(
     os.environ.get("INSIGHT_AUTOMATIC_PROCESSING_WAIT", 10)

diff --git a/tests/unit/insights/test_importer.py b/tests/unit/insights/test_importer.py
@@ -17,7 +17,6 @@
     import_insights_for_products,
     is_recent_image,
     is_selected_image,
-    is_trustworthy_insight_image,
     is_valid_insight_image,
     select_deepest_taxonomized_candidates,
     sort_predictions,
@@ -86,65 +85,6 @@ def test_is_selected_image(images, image_id, expected):
     assert is_selected_image(images, image_id) is expected
 
 
-@pytest.mark.parametrize(
-    "images,image_id,max_timedelta,expected",
-    [
-        (
-            {"1": {"uploaded_t": DEFAULT_UPLOADED_T}},
-            "1",
-            datetime.timedelta(seconds=10),
-            True,
-        ),
-        (
-            {
-                "1": {"uploaded_t": DEFAULT_UPLOADED_T},
-                "2": {"uploaded_t": str(int(DEFAULT_UPLOADED_T) + 9)},
-            },
-            "1",
-            datetime.timedelta(seconds=10),
-            True,
-        ),
-        (
-            {
-                "1": {"uploaded_t": DEFAULT_UPLOADED_T},
-                "2": {"uploaded_t": str(int(DEFAULT_UPLOADED_T) + 11)},
-            },
-            "1",
-            datetime.timedelta(seconds=10),
-            False,
-        ),
-        (
-            {
-                "1": {"uploaded_t": DEFAULT_UPLOADED_T},
-                "2": {"uploaded_t": DEFAULT_UPLOADED_T},
-                "ingredients_fr": {"imgid": "1"},
-            },
-            "1",
-            datetime.timedelta(seconds=10),
-            True,
-        ),
-        (
-            {
-                "2": {"uploaded_t": DEFAULT_UPLOADED_T},
-            },
-            "1",
-            datetime.timedelta(seconds=10),
-            False,
-        ),
-        (
-            {
-                "1": {"uploaded_t": DEFAULT_UPLOADED_T},
-            },
-            "front_fr",
-            datetime.timedelta(seconds=10),
-            False,
-        ),
-    ],
-)
-def test_is_trustworthy_insight_image(images, image_id, max_timedelta, expected):
-    assert is_trustworthy_insight_image(images, image_id, max_timedelta) is expected
-
-
 @pytest.mark.parametrize(
     "images,image_id,expected",
     [