Skip to content

Commit

Permalink
fix: add CLI command to pretty print OCR result
Browse files Browse the repository at this point in the history
  • Loading branch information
raphael0202 committed May 31, 2023
1 parent a61eef4 commit 26f44a4
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 6 deletions.
84 changes: 83 additions & 1 deletion robotoff/cli/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import os
import pathlib
from pathlib import Path
from typing import Optional
Expand Down Expand Up @@ -48,6 +49,9 @@ def regenerate_ocr_insights(
server_type: ServerType = typer.Option(
ServerType.off, help="Server type of the product"
),
ocr_prediction_types: Optional[list[PredictionType]] = typer.Option(
None, help="Types of OCR prediction to use"
),
) -> None:
"""Regenerate OCR predictions/insights for a specific product and import
them."""
Expand All @@ -61,6 +65,9 @@ def regenerate_ocr_insights(
from robotoff.products import get_product
from robotoff.utils import get_logger

if ocr_prediction_types is None:
ocr_prediction_types = DEFAULT_OCR_PREDICTION_TYPES

logger = get_logger()

product_id = ProductIdentifier(barcode, server_type)
Expand All @@ -75,7 +82,7 @@ def regenerate_ocr_insights(

ocr_url = generate_json_ocr_url(product_id, image_id)
predictions += extract_ocr_predictions(
product_id, ocr_url, DEFAULT_OCR_PREDICTION_TYPES
product_id, ocr_url, ocr_prediction_types
)

with db:
Expand Down Expand Up @@ -794,5 +801,80 @@ def import_image_webhook(
logger.info("Robotoff response: %s", r.json())


@app.command()
def pprint_ocr_result(
uri: str = typer.Argument(..., help="URI of the image or OCR"),
) -> None:
"""Pretty print OCR result."""
import sys

import orjson

from robotoff.prediction.ocr.core import get_ocr_result
from robotoff.prediction.ocr.dataclass import OCRResult
from robotoff.utils import get_logger, http_session

logger = get_logger()

if uri.endswith(".jpg"):
uri = uri.replace(".jpg", ".json")

logger.info("displaying OCR result %s", uri)

if uri.startswith("http"):
ocr_result = get_ocr_result(uri, http_session)
else:
with open(uri, "rb") as f:
data = orjson.loads(f.read())
ocr_result = OCRResult.from_json(data)

if ocr_result is None:
logger.info("error while downloading %s", uri)
sys.exit(0)

if ocr_result.full_text_annotation is None:
logger.info("no full text annotation available")
sys.exit(0)
ocr_result.pprint()


@app.command()
def generate_ocr_result(
image_url: str = typer.Argument(..., help="URL of the image"),
output_dir: Path = typer.Argument(
...,
file_okay=False,
dir_okay=True,
help="Directory where the OCR JSON should be saved",
),
) -> None:
import orjson

from robotoff.cli.ocr import run_ocr_on_image
from robotoff.off import get_source_from_url
from robotoff.utils import get_logger, http_session

logger = get_logger()
API_KEY = os.environ["GOOGLE_CLOUD_VISION_API_KEY"]

output_dir.mkdir(parents=True, exist_ok=True)
source_image_path = Path(get_source_from_url(image_url))
output_file = output_dir / (
str(source_image_path.parent).replace("/", "_")[1:]
+ f"_{source_image_path.stem}.json"
)
logger.info("Downloading image %s", image_url)
r = http_session.get(image_url)
r.raise_for_status()

logger.info("Generating OCR result")
response = run_ocr_on_image(r.content, API_KEY)

with open(output_file, "wb") as f:
f.write(orjson.dumps(response))

pprint_ocr_result(str(output_file))


def main() -> None:
app()
39 changes: 39 additions & 0 deletions robotoff/cli/ocr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import base64
from typing import List

import orjson
import requests

from robotoff.utils import get_logger, http_session

logger = get_logger(__name__)


def run_ocr_on_image_batch(base64_images: List[str], api_key: str) -> requests.Response:
url = f"https://vision.googleapis.com/v1/images:annotate?key={api_key}"
return http_session.post(
url,
json={
"requests": [
{
"features": [{"type": "TEXT_DETECTION"}],
"image": {"content": base64_image},
}
for base64_image in base64_images
]
},
)


def run_ocr_on_image(image_bytes: bytes, api_key: str):
if not image_bytes:
raise ValueError("empty image")

content = base64.b64encode(image_bytes).decode("utf-8")
r = run_ocr_on_image_batch([content], api_key)

if not r.ok:
logger.info("HTTP %s received", r.status_code)
logger.info("Response: %s", r.text)
return
return orjson.loads(r.content)
35 changes: 30 additions & 5 deletions robotoff/prediction/ocr/dataclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ def get_match_bounding_box(
annotation is not available
"""
words = self.get_words_from_indices(start_idx, end_idx, raises)
logger.debug("get_match_bounding_box: words: %s", words)

if words is not None:
if words:
Expand Down Expand Up @@ -257,6 +258,13 @@ def get_words_from_indices(
start_idx, end_idx, raises
)

def pprint(self):
"""Pretty print the full text annotation, if it is not null."""
if self.full_text_annotation:
print(self._generate_pretty_print_string())
else:
print("No full text annotation available")


def get_text(
content: Union[OCRResult, str], ocr_regex: Optional[OCRRegex] = None
Expand Down Expand Up @@ -396,6 +404,27 @@ def get_words_from_indices(

return selected

def pprint(self):
"""Pretty print the full text annotation."""
print(self._generate_pretty_print_string())

def _generate_pretty_print_string(self) -> str:
"""Generate a pretty print version of the full text annotation, ready
to print.
:return: the generated string
"""
strings = []
for page_id, page in enumerate(self.pages):
strings.append(f"> page #{page_id}")
for block_id, block in enumerate(page.blocks):
strings.append(f">> block #{block_id}")
for paragraph_id, paragraph in enumerate(block.paragraphs):
strings.append(f">>> paragraph #{paragraph_id}")
text = paragraph.text
strings.append(f" {repr(text)}")
return "\n".join(strings)


class TextAnnotationPage:
"""Detected page from OCR."""
Expand Down Expand Up @@ -566,11 +595,7 @@ class Paragraph:
"""Structural unit of text representing a number of words in certain
order."""

__slots__ = (
"words",
"text",
"bounding_poly",
)
__slots__ = ("words", "text", "bounding_poly")

def __init__(self, data: JSONType, initial_offset: int = 0):
"""Initialize a Paragraph.
Expand Down

0 comments on commit 26f44a4

Please sign in to comment.