diff --git a/robotoff/prediction/nutrition_extraction.py b/robotoff/prediction/nutrition_extraction.py index 40007d70aa..d42c82c26e 100644 --- a/robotoff/prediction/nutrition_extraction.py +++ b/robotoff/prediction/nutrition_extraction.py @@ -551,26 +551,25 @@ def match_nutrient_value( # `entity_label` is in the form "energy_kcal_100g", so we can # extract the unit from the 2nd part (index 1) of the entity name unit = entity_label.split("_")[1].lower() - if ( - any( - entity_label.startswith(target) - for target in ( - "proteins", - "sugars", - "carbohydrates", - "fat", - "fiber", - "salt", - # we use "_" here as separator as '-' is only used in - # Product Opener, the label names are all separated by '_' - "saturated_fat", - "added_sugars", - "trans_fat", - ) + if any( + entity_label.startswith(target) + for target in ( + "proteins", + "sugars", + "carbohydrates", + "fat", + "fiber", + "salt", + # we use "_" here as separator as '-' is only used in + # Product Opener, the label names are all separated by '_' + "saturated_fat", + "added_sugars", + "trans_fat", ) - and value.endswith("9") - and "." in value - and not value.endswith(".9") + ) and ( + value in ("08", "09") + or (value.endswith("8") and "." in value and not value.endswith(".8")) + or (value.endswith("9") and "." in value and not value.endswith(".9")) ): unit = "g" value = value[:-1] diff --git a/tests/unit/prediction/test_nutrition_extraction.py b/tests/unit/prediction/test_nutrition_extraction.py index effe567f74..98e6e16fed 100644 --- a/tests/unit/prediction/test_nutrition_extraction.py +++ b/tests/unit/prediction/test_nutrition_extraction.py @@ -393,7 +393,13 @@ def test_aggregate_entities_multiple_entities(self): ("25.9", "iron_100g", ("25.9", None, True)), ("O g", "salt_100g", ("0", "g", True)), ("O", "salt_100g", ("0", None, True)), - ("0,19", "saturated_fat_100g", ("0.1", "g", True)), + # Missing unit and value ends with '9' or '8' + ("0.19", "saturated_fat_100g", ("0.1", "g", True)), + ("0,18", "saturated_fat_100g", ("0.1", "g", True)), + ("08", "saturated_fat_100g", ("0", "g", True)), + ("09", "salt_100g", ("0", "g", True)), + # Missing unit but value does not end with '8' or '9' + ("091", "proteins_100g", ("091", None, True)), ], ) def test_match_nutrient_value(words_str: str, entity_label: str, expected_output):