From c3de864e2fb5a9424e5a39db89253e910b38bfd6 Mon Sep 17 00:00:00 2001 From: Eugen Ciur Date: Tue, 9 Jan 2024 08:16:16 +0100 Subject: [PATCH] optimize db conn for pages/id/(jpg|svg) endpoints (#294) --- papermerge/core/db/__init__.py | 6 ++- papermerge/core/db/doc_ver.py | 16 ++++++++ papermerge/core/db/pages.py | 24 +++++++++++- papermerge/core/routers/pages.py | 47 +++++++++++++----------- papermerge/core/routers/thumbnails.py | 53 --------------------------- papermerge/core/utils/image.py | 22 +++++++++++ 6 files changed, 91 insertions(+), 77 deletions(-) diff --git a/papermerge/core/db/__init__.py b/papermerge/core/db/__init__.py index dd55ba4d5..7e4188d70 100644 --- a/papermerge/core/db/__init__.py +++ b/papermerge/core/db/__init__.py @@ -1,10 +1,10 @@ from sqlalchemy import Engine -from .doc_ver import get_last_doc_ver +from .doc_ver import get_doc_ver, get_last_doc_ver from .engine import get_engine from .folders import get_folder from .nodes import get_paginated_nodes -from .pages import get_first_page +from .pages import get_first_page, get_page from .users import get_user __all__ = [ @@ -12,7 +12,9 @@ 'get_user', 'get_folder', 'get_first_page', + 'get_page', 'get_last_doc_ver', + 'get_doc_ver', 'get_paginated_nodes', 'Engine' ] diff --git a/papermerge/core/db/doc_ver.py b/papermerge/core/db/doc_ver.py index ad2df2654..b7ebfb456 100644 --- a/papermerge/core/db/doc_ver.py +++ b/papermerge/core/db/doc_ver.py @@ -27,3 +27,19 @@ def get_last_doc_ver( model_doc_ver = schemas.DocumentVersion.model_validate(db_doc_ver) return model_doc_ver + + +def get_doc_ver( + engine: Engine, + id: UUID # noqa +) -> schemas.DocumentVersion: + """ + Returns last version of the document + identified by doc_id + """ + with Session(engine) as session: # noqa + stmt = select(DocumentVersion).where(DocumentVersion.id == id) + db_doc_ver = session.scalars(stmt).one() + model_doc_ver = schemas.DocumentVersion.model_validate(db_doc_ver) + + return model_doc_ver diff --git a/papermerge/core/db/pages.py b/papermerge/core/db/pages.py index aa45d7cf5..66e7a23dc 100644 --- a/papermerge/core/db/pages.py +++ b/papermerge/core/db/pages.py @@ -4,7 +4,7 @@ from sqlalchemy.orm import Session from papermerge.core import schemas -from papermerge.core.db.models import Page +from papermerge.core.db.models import Document, DocumentVersion, Page from .exceptions import PageNotFound @@ -34,3 +34,25 @@ def get_first_page( model = schemas.Page.model_validate(db_page) return model + + +def get_page( + engine: Engine, + id: UUID, + user_id: UUID +) -> schemas.Page: + with Session(engine) as session: # noqa + stmt = select(Page).join(DocumentVersion).join(Document).where( + Page.id == id, + Document.user_id == user_id + ) + try: + db_page = session.scalars(stmt).one() + except exc.NoResultFound: + session.close() + raise PageNotFound( + f"PageID={id} not found" + ) + model = schemas.Page.model_validate(db_page) + + return model diff --git a/papermerge/core/routers/pages.py b/papermerge/core/routers/pages.py index b5d754157..beb4b8897 100644 --- a/papermerge/core/routers/pages.py +++ b/papermerge/core/routers/pages.py @@ -1,23 +1,25 @@ import logging -import os import uuid from typing import List from fastapi import APIRouter, Depends, HTTPException, Query from fastapi.responses import FileResponse +from papermerge.core import db +from papermerge.core import pathlib as core_pathlib from papermerge.core import schemas from papermerge.core.auth import get_current_user from papermerge.core.constants import DEFAULT_THUMBNAIL_SIZE -from papermerge.core.models import BaseTreeNode, Page +from papermerge.core.db import exceptions as db_exc +from papermerge.core.models import BaseTreeNode from papermerge.core.page_ops import apply_pages_op from papermerge.core.page_ops import extract_pages as api_extract_pages from papermerge.core.page_ops import move_pages as api_move_pages -from papermerge.core.pathlib import rel2abs, thumbnail_path from papermerge.core.schemas import ExtractPagesOut, MovePagesOut from papermerge.core.schemas.documents import DocumentVersion as PyDocVer from papermerge.core.schemas.pages import (ExtractPagesIn, MovePagesIn, PageAndRotOp) +from papermerge.core.utils import image logger = logging.getLogger(__name__) @@ -38,28 +40,27 @@ class JPEGFileResponse(FileResponse): @router.get("/{page_id}/svg", response_class=SVGFileResponse) def get_page_svg_url( page_id: uuid.UUID, - user: schemas.User = Depends(get_current_user) + user: schemas.User = Depends(get_current_user), + engine: db.Engine = Depends(db.get_engine) ): try: - page = Page.objects.get( - id=page_id, document_version__document__user_id=user.id - ) - except Page.DoesNotExist: + page = db.get_page(engine, id=page_id, user_id=user.id) + except db_exc.PageNotFound: raise HTTPException( status_code=404, detail="Page not found" ) - svg_abs_path = page.svg_path + svg_abs_path = core_pathlib.abs_page_svg_path(str(page.id)) logger.debug(f"page UUID={page_id} svg abs path={svg_abs_path}") - if not page.svg_path.exists(): + if not svg_abs_path.exists(): raise HTTPException( status_code=404, detail="File not found" ) - return SVGFileResponse(page.svg_path) + return SVGFileResponse(svg_abs_path) @router.get("/{page_id}/jpg", response_class=JPEGFileResponse) @@ -69,18 +70,17 @@ def get_page_jpg_url( DEFAULT_THUMBNAIL_SIZE, description="jpg image width in pixels" ), - user: schemas.User = Depends(get_current_user) + user: schemas.User = Depends(get_current_user), + engine: db.Engine = Depends(db.get_engine) ): """Returns jpg preview image of the page. Returned jpg image's width is `size` pixels. """ try: - page = Page.objects.get( - id=page_id, - document_version__document__user_id=user.id - ) - except Page.DoesNotExist: + page = db.get_page(engine, id=page_id, user_id=user.id) + doc_ver = db.get_doc_ver(engine, id=page.document_version_id) + except db_exc.PageNotFound: raise HTTPException( status_code=404, detail="Page does not exist" @@ -90,13 +90,18 @@ def get_page_jpg_url( f"Generating page preview for page.number={page.number}" f" page.id={page.id}" ) - jpeg_abs_path = rel2abs( - thumbnail_path(page.id, size=size) + jpeg_abs_path = core_pathlib.rel2abs( + core_pathlib.thumbnail_path(page.id, size=size) ) - if not os.path.exists(jpeg_abs_path): + if not jpeg_abs_path.exists(): # generate preview only for this page - page.generate_thumbnail(size=size) + image.generate_thumbnail( + page_id=page.id, + doc_ver_id=doc_ver.id, + file_name=doc_ver.file_name, + size=size + ) logger.debug(f"jpeg_abs_path={jpeg_abs_path}") diff --git a/papermerge/core/routers/thumbnails.py b/papermerge/core/routers/thumbnails.py index 2b3f957c7..1d0f50d65 100644 --- a/papermerge/core/routers/thumbnails.py +++ b/papermerge/core/routers/thumbnails.py @@ -12,7 +12,6 @@ from papermerge.core.auth import get_current_user from papermerge.core.constants import DEFAULT_THUMBNAIL_SIZE from papermerge.core.db import exceptions as db_exc -from papermerge.core.models import Document from papermerge.core.pathlib import rel2abs, thumbnail_path from papermerge.core.utils import image @@ -34,58 +33,6 @@ class JPEGFileResponse(FileResponse): media_type = 'application/jpeg' -@router.get( - "/old/{document_id}", - response_class=JPEGFileResponse, - responses={ - 309: { - "description": """Preview image cannot be generated at this moment - yet. This may happen for example because the document is currently - still being uploaded. A later response may succeed with 200 status - code.""", - "content": OPEN_API_GENERIC_JSON_DETAIL - }, - 404: { - "description": """Document with specified UUID was not found""", - "content": OPEN_API_GENERIC_JSON_DETAIL - } - } -) -def retrieve_document_thumbnail_old( - document_id: uuid.UUID, - size: int = DEFAULT_THUMBNAIL_SIZE, - user: schemas.User = Depends(get_current_user) -): - """Retrieves thumbnail of the document last version's first page""" - try: - doc = Document.objects.get(id=document_id, user_id=user.id) - except Document.DoesNotExist: - raise HTTPException( - status_code=404, - detail="Page does not exist" - ) - - last_version = doc.versions.last() - - first_page = last_version.pages.first() - - if first_page is None: - # may happen e.g. when document is still being uploaded - raise HTTPException( - status_code=309, - detail="Not ready for preview yet" - ) - - jpeg_abs_path = rel2abs( - thumbnail_path(first_page.id, size=size) - ) - - if not os.path.exists(jpeg_abs_path): - first_page.generate_thumbnail(size=size) - - return JPEGFileResponse(jpeg_abs_path) - - @router.get( "/{document_id}", response_class=JPEGFileResponse, diff --git a/papermerge/core/utils/image.py b/papermerge/core/utils/image.py index 2ce2e254e..02fd6c778 100644 --- a/papermerge/core/utils/image.py +++ b/papermerge/core/utils/image.py @@ -1,14 +1,36 @@ from pathlib import Path +from uuid import UUID from pdf2image import convert_from_path from papermerge.core import constants as const +from papermerge.core import pathlib as core_pathlib def file_name_generator(size): yield str(size) +def generate_thumbnail( + page_id: UUID, + doc_ver_id: UUID, + file_name: str, + size: int = const.DEFAULT_THUMBNAIL_SIZE +): + thb_path = core_pathlib.abs_thumbnail_path(str(page_id), size=size) + pdf_path = core_pathlib.abs_docver_path( + str(doc_ver_id), + file_name + ) + + generate_preview( + pdf_path=pdf_path, + output_folder=thb_path.parent, + page_number=1, + size=size + ) + + def generate_preview( pdf_path: Path, output_folder: Path,