Skip to content

Commit

Permalink
fix: cache result word string in Word
Browse files Browse the repository at this point in the history
  • Loading branch information
raphael0202 committed Apr 3, 2023
1 parent b461669 commit 30bc347
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 12 deletions.
23 changes: 12 additions & 11 deletions robotoff/prediction/ocr/dataclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ def detect_words_orientation(self) -> list[ImageOrientation]:

def get_text(self) -> str:
"""Return the text of the paragraph, by concatenating the words."""
return "".join(w.get_text() for w in self.words)
return "".join(w.text for w in self.words)


class Word:
Expand All @@ -429,20 +429,21 @@ def __init__(self, data: JSONType):
DetectedLanguage(lang) for lang in data["property"]["detectedLanguages"]
]

def get_text(self) -> str:
@property

This comment has been minimized.

Copy link
@alexgarel

alexgarel Apr 5, 2023

Member

from Python 3.8 on you have the cached_property decorator

This comment has been minimized.

Copy link
@raphael0202

raphael0202 Apr 6, 2023

Author Collaborator

I didn't know of this, thanks for the pointer!

def text(self):
if not self._text:
self._text = self._get_text()

return self._text

def _get_text(self) -> str:
text_list = []
for symbol in self.symbols:
symbol_str = ""

if symbol.symbol_break and symbol.symbol_break.is_prefix:
symbol_str = symbol.symbol_break.get_value()

symbol_str += symbol.text

text_list.append(symbol.symbol_break.get_value())
text_list.append(symbol.text)
if symbol.symbol_break and not symbol.symbol_break.is_prefix:
symbol_str += symbol.symbol_break.get_value()

text_list.append(symbol_str)
text_list.append(symbol.symbol_break.get_value())

return "".join(text_list)

Expand Down
2 changes: 1 addition & 1 deletion scripts/ocr/extract_ocr_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ def get_languages(self):

def get_text(self):
"""Return the text of the paragraph, by concatenating the words."""
return "".join(w.get_text() for w in self.words)
return "".join(w.text for w in self.words)


class Word:
Expand Down

0 comments on commit 30bc347

Please sign in to comment.