Skip to content

Commit

Permalink
Merge pull request #70 from 0xabu/skimpdf
Browse files Browse the repository at this point in the history
Minor usability improvements with Skim PDF
  • Loading branch information
0xabu authored Mar 30, 2023
2 parents 17bd36c + b32d8a0 commit c829231
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 3 deletions.
12 changes: 9 additions & 3 deletions pdfannots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,9 +384,11 @@ def emit_progress(msg: str) -> None:
# Construct Annotation objects, and append them to the page.
for pa in pdftypes.resolve1(pdfpage.annots) if pdfpage.annots else []:
if isinstance(pa, pdftypes.PDFObjRef):
annot = _mkannotation(pdftypes.dict_value(pa), page)
if annot is not None:
page.annots.append(annot)
annot_dict = pdftypes.dict_value(pa)
if annot_dict: # Would be empty if pa is a broken ref
annot = _mkannotation(annot_dict, page)
if annot is not None:
page.annots.append(annot)
else:
logger.warning("Unknown annotation: %s", pa)

Expand All @@ -404,6 +406,10 @@ def emit_progress(msg: str) -> None:
page.annots.sort()
page.outlines.sort()

# Give the annotations a chance to update their internals
for a in page.annots:
a.postprocess()

emit_progress("\n")

device.close()
Expand Down
9 changes: 9 additions & 0 deletions pdfannots/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,15 @@ def get_context(self, remove_hyphens: bool = False) -> typ.Tuple[str, str]:
return (merge_lines(self.pre_context or '', remove_hyphens, strip_space=False),
merge_lines(self.post_context or '', remove_hyphens, strip_space=False))

def postprocess(self) -> None:
"""Update internal state once all text and context has been captured."""
# The Skim PDF reader (https://skim-app.sourceforge.io/) creates annotations whose
# default initial contents are a copy of the selected text. Unless the user goes to
# the trouble of editing each annotation, this goes badly for us because we have
# duplicate text and contents (e.g., for simple highlights and strikeout).
if self.contents and self.text and ''.join(self.text).strip() == self.contents.strip():
self.contents = None


UnresolvedPage = typ.Union[int, PDFObjRef]
"""A reference to a page that is *either* a page number, or a PDF object ID."""
Expand Down

0 comments on commit c829231

Please sign in to comment.