Skip to content

Commit

Permalink
optimize db conn for pages/id/(jpg|svg) endpoints (#294)
Browse files Browse the repository at this point in the history
  • Loading branch information
ciur authored Jan 9, 2024
1 parent de16a76 commit c3de864
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 77 deletions.
6 changes: 4 additions & 2 deletions papermerge/core/db/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
from sqlalchemy import Engine

from .doc_ver import get_last_doc_ver
from .doc_ver import get_doc_ver, get_last_doc_ver
from .engine import get_engine
from .folders import get_folder
from .nodes import get_paginated_nodes
from .pages import get_first_page
from .pages import get_first_page, get_page
from .users import get_user

__all__ = [
'get_engine',
'get_user',
'get_folder',
'get_first_page',
'get_page',
'get_last_doc_ver',
'get_doc_ver',
'get_paginated_nodes',
'Engine'
]
16 changes: 16 additions & 0 deletions papermerge/core/db/doc_ver.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,19 @@ def get_last_doc_ver(
model_doc_ver = schemas.DocumentVersion.model_validate(db_doc_ver)

return model_doc_ver


def get_doc_ver(
engine: Engine,
id: UUID # noqa
) -> schemas.DocumentVersion:
"""
Returns last version of the document
identified by doc_id
"""
with Session(engine) as session: # noqa
stmt = select(DocumentVersion).where(DocumentVersion.id == id)
db_doc_ver = session.scalars(stmt).one()
model_doc_ver = schemas.DocumentVersion.model_validate(db_doc_ver)

return model_doc_ver
24 changes: 23 additions & 1 deletion papermerge/core/db/pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from sqlalchemy.orm import Session

from papermerge.core import schemas
from papermerge.core.db.models import Page
from papermerge.core.db.models import Document, DocumentVersion, Page

from .exceptions import PageNotFound

Expand Down Expand Up @@ -34,3 +34,25 @@ def get_first_page(
model = schemas.Page.model_validate(db_page)

return model


def get_page(
engine: Engine,
id: UUID,
user_id: UUID
) -> schemas.Page:
with Session(engine) as session: # noqa
stmt = select(Page).join(DocumentVersion).join(Document).where(
Page.id == id,
Document.user_id == user_id
)
try:
db_page = session.scalars(stmt).one()
except exc.NoResultFound:
session.close()
raise PageNotFound(
f"PageID={id} not found"
)
model = schemas.Page.model_validate(db_page)

return model
47 changes: 26 additions & 21 deletions papermerge/core/routers/pages.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
import logging
import os
import uuid
from typing import List

from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi.responses import FileResponse

from papermerge.core import db
from papermerge.core import pathlib as core_pathlib
from papermerge.core import schemas
from papermerge.core.auth import get_current_user
from papermerge.core.constants import DEFAULT_THUMBNAIL_SIZE
from papermerge.core.models import BaseTreeNode, Page
from papermerge.core.db import exceptions as db_exc
from papermerge.core.models import BaseTreeNode
from papermerge.core.page_ops import apply_pages_op
from papermerge.core.page_ops import extract_pages as api_extract_pages
from papermerge.core.page_ops import move_pages as api_move_pages
from papermerge.core.pathlib import rel2abs, thumbnail_path
from papermerge.core.schemas import ExtractPagesOut, MovePagesOut
from papermerge.core.schemas.documents import DocumentVersion as PyDocVer
from papermerge.core.schemas.pages import (ExtractPagesIn, MovePagesIn,
PageAndRotOp)
from papermerge.core.utils import image

logger = logging.getLogger(__name__)

Expand All @@ -38,28 +40,27 @@ class JPEGFileResponse(FileResponse):
@router.get("/{page_id}/svg", response_class=SVGFileResponse)
def get_page_svg_url(
page_id: uuid.UUID,
user: schemas.User = Depends(get_current_user)
user: schemas.User = Depends(get_current_user),
engine: db.Engine = Depends(db.get_engine)
):
try:
page = Page.objects.get(
id=page_id, document_version__document__user_id=user.id
)
except Page.DoesNotExist:
page = db.get_page(engine, id=page_id, user_id=user.id)
except db_exc.PageNotFound:
raise HTTPException(
status_code=404,
detail="Page not found"
)

svg_abs_path = page.svg_path
svg_abs_path = core_pathlib.abs_page_svg_path(str(page.id))
logger.debug(f"page UUID={page_id} svg abs path={svg_abs_path}")

if not page.svg_path.exists():
if not svg_abs_path.exists():
raise HTTPException(
status_code=404,
detail="File not found"
)

return SVGFileResponse(page.svg_path)
return SVGFileResponse(svg_abs_path)


@router.get("/{page_id}/jpg", response_class=JPEGFileResponse)
Expand All @@ -69,18 +70,17 @@ def get_page_jpg_url(
DEFAULT_THUMBNAIL_SIZE,
description="jpg image width in pixels"
),
user: schemas.User = Depends(get_current_user)
user: schemas.User = Depends(get_current_user),
engine: db.Engine = Depends(db.get_engine)
):
"""Returns jpg preview image of the page.
Returned jpg image's width is `size` pixels.
"""
try:
page = Page.objects.get(
id=page_id,
document_version__document__user_id=user.id
)
except Page.DoesNotExist:
page = db.get_page(engine, id=page_id, user_id=user.id)
doc_ver = db.get_doc_ver(engine, id=page.document_version_id)
except db_exc.PageNotFound:
raise HTTPException(
status_code=404,
detail="Page does not exist"
Expand All @@ -90,13 +90,18 @@ def get_page_jpg_url(
f"Generating page preview for page.number={page.number}"
f" page.id={page.id}"
)
jpeg_abs_path = rel2abs(
thumbnail_path(page.id, size=size)
jpeg_abs_path = core_pathlib.rel2abs(
core_pathlib.thumbnail_path(page.id, size=size)
)

if not os.path.exists(jpeg_abs_path):
if not jpeg_abs_path.exists():
# generate preview only for this page
page.generate_thumbnail(size=size)
image.generate_thumbnail(
page_id=page.id,
doc_ver_id=doc_ver.id,
file_name=doc_ver.file_name,
size=size
)

logger.debug(f"jpeg_abs_path={jpeg_abs_path}")

Expand Down
53 changes: 0 additions & 53 deletions papermerge/core/routers/thumbnails.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from papermerge.core.auth import get_current_user
from papermerge.core.constants import DEFAULT_THUMBNAIL_SIZE
from papermerge.core.db import exceptions as db_exc
from papermerge.core.models import Document
from papermerge.core.pathlib import rel2abs, thumbnail_path
from papermerge.core.utils import image

Expand All @@ -34,58 +33,6 @@ class JPEGFileResponse(FileResponse):
media_type = 'application/jpeg'


@router.get(
"/old/{document_id}",
response_class=JPEGFileResponse,
responses={
309: {
"description": """Preview image cannot be generated at this moment
yet. This may happen for example because the document is currently
still being uploaded. A later response may succeed with 200 status
code.""",
"content": OPEN_API_GENERIC_JSON_DETAIL
},
404: {
"description": """Document with specified UUID was not found""",
"content": OPEN_API_GENERIC_JSON_DETAIL
}
}
)
def retrieve_document_thumbnail_old(
document_id: uuid.UUID,
size: int = DEFAULT_THUMBNAIL_SIZE,
user: schemas.User = Depends(get_current_user)
):
"""Retrieves thumbnail of the document last version's first page"""
try:
doc = Document.objects.get(id=document_id, user_id=user.id)
except Document.DoesNotExist:
raise HTTPException(
status_code=404,
detail="Page does not exist"
)

last_version = doc.versions.last()

first_page = last_version.pages.first()

if first_page is None:
# may happen e.g. when document is still being uploaded
raise HTTPException(
status_code=309,
detail="Not ready for preview yet"
)

jpeg_abs_path = rel2abs(
thumbnail_path(first_page.id, size=size)
)

if not os.path.exists(jpeg_abs_path):
first_page.generate_thumbnail(size=size)

return JPEGFileResponse(jpeg_abs_path)


@router.get(
"/{document_id}",
response_class=JPEGFileResponse,
Expand Down
22 changes: 22 additions & 0 deletions papermerge/core/utils/image.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,36 @@
from pathlib import Path
from uuid import UUID

from pdf2image import convert_from_path

from papermerge.core import constants as const
from papermerge.core import pathlib as core_pathlib


def file_name_generator(size):
yield str(size)


def generate_thumbnail(
page_id: UUID,
doc_ver_id: UUID,
file_name: str,
size: int = const.DEFAULT_THUMBNAIL_SIZE
):
thb_path = core_pathlib.abs_thumbnail_path(str(page_id), size=size)
pdf_path = core_pathlib.abs_docver_path(
str(doc_ver_id),
file_name
)

generate_preview(
pdf_path=pdf_path,
output_folder=thb_path.parent,
page_number=1,
size=size
)


def generate_preview(
pdf_path: Path,
output_folder: Path,
Expand Down

0 comments on commit c3de864

Please sign in to comment.