Skip to content

Commit

Permalink
fix: fix get_tag function
Browse files Browse the repository at this point in the history
fix corner cases with '!' or '?' letters
  • Loading branch information
raphael0202 committed Oct 20, 2022
1 parent f2bd704 commit cca5592
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 1 deletion.
14 changes: 13 additions & 1 deletion robotoff/utils/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,23 @@ def get_lemmatizing_nlp(lang: str) -> spacy.Language:


def get_tag(text: str) -> str:
"""Return a tag from a text.
In Open Food Facts, tags are obtained from free text by performing the
following:
- lowercasing
- accent removal
- replacement of punctuation by either a comma ("-") or nothing, depending
on the punctuation
"""
text = strip_accents_ascii_v2(text)
return (
text = (
text.lower()
.replace(" & ", "-")
.replace(" ", "-")
.replace("'", "-")
.replace(".", "-")
.replace("!", "")
.replace("?", "")
)
return strip_consecutive_spaces(text).strip("-")
1 change: 1 addition & 0 deletions tests/unit/utils/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
("monop'daily", "monop-daily"),
("épi d'or", "epi-d-or"),
("Health Star Rating 0.5", "health-star-rating-0-5"),
("C'est qui le Patron ?!", "c-est-qui-le-patron"),
],
)
def test_get_tag(value: str, output: str):
Expand Down

0 comments on commit cca5592

Please sign in to comment.