From c0c7f212422f2a20e831411290c7993a5bd62445 Mon Sep 17 00:00:00 2001 From: Alexander Dyuzhev Date: Thu, 31 Oct 2024 20:46:24 +0300 Subject: [PATCH 1/9] Code refactoring, #293 --- Makefile | 2 +- README.adoc | 10 +++--- pom.xml | 2 +- .../metanorma/fop/annotations/Annotation.java | 36 +++++++++++-------- 4 files changed, 29 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index f1bdf76..6909ce1 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ SHELL ?= /bin/bash endif #JAR_VERSION := $(shell mvn -q -Dexec.executable="echo" -Dexec.args='$${project.version}' --non-recursive exec:exec -DforceStdout) -JAR_VERSION := 2.06 +JAR_VERSION := 2.07 JAR_FILE := mn2pdf-$(JAR_VERSION).jar all: target/$(JAR_FILE) diff --git a/README.adoc b/README.adoc index d6348fb..46b87c5 100644 --- a/README.adoc +++ b/README.adoc @@ -17,14 +17,14 @@ You will need the Java Development Kit (JDK) version 8, Update 241 (8u241) or hi [source,sh] ---- -java -Xss5m -Xmx2048m -jar target/mn2pdf-2.06.jar --xml-file --xsl-file --pdf-file [--syntax-highlight] +java -Xss5m -Xmx2048m -jar target/mn2pdf-2.07.jar --xml-file --xsl-file --pdf-file [--syntax-highlight] ---- e.g. [source,sh] ---- -java -Xss5m -Xmx2048m -jar target/mn2pdf-2.06.jar --xml-file tests/G.191.xml --xsl-file tests/itu.recommendation.xsl --pdf-file tests/G.191.pdf +java -Xss5m -Xmx2048m -jar target/mn2pdf-2.07.jar --xml-file tests/G.191.xml --xsl-file tests/itu.recommendation.xsl --pdf-file tests/G.191.pdf ---- === PDF encryption features @@ -100,7 +100,7 @@ Update version in `pom.xml`, e.g.: ---- org.metanorma.fop mn2pdf -2.06 +2.07 Metanorma XML to PDF converter ---- @@ -111,8 +111,8 @@ Tag the same version in Git: [source,xml] ---- -git tag v2.06 -git push origin v2.06 +git tag v2.07 +git push origin v2.07 ---- Then the corresponding GitHub release will be automatically created at: diff --git a/pom.xml b/pom.xml index e3a981d..f401ef4 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ 4.0.0 org.metanorma.fop mn2pdf - 2.06 + 2.07 Metanorma XML to PDF converter jar https://www.metanorma.org diff --git a/src/main/java/org/metanorma/fop/annotations/Annotation.java b/src/main/java/org/metanorma/fop/annotations/Annotation.java index 858e683..f2114a4 100644 --- a/src/main/java/org/metanorma/fop/annotations/Annotation.java +++ b/src/main/java/org/metanorma/fop/annotations/Annotation.java @@ -31,7 +31,19 @@ import javax.xml.xpath.XPathException; import javax.xml.xpath.XPathFactory; +import org.apache.fop.pdf.PDFObject; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSObject; +import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.common.PDStream; +import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureTreeRoot; +import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDPropertyList; +import org.apache.pdfbox.pdmodel.documentinterchange.taggedpdf.StandardStructureTypes; +import org.apache.pdfbox.pdmodel.font.PDType1Font; import org.xml.sax.InputSource; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.fdf.FDFAnnotation; @@ -114,9 +126,9 @@ public void process(File pdf, String xmlReview) throws IOException { if (DEBUG) { - System.out.println("page=" + page); + /*System.out.println("page=" + page); System.out.println("x=" + x); - System.out.println("y=" + y); + System.out.println("y=" + y);*/ } AnnotationArea annotationArea = new AnnotationArea(); @@ -225,7 +237,7 @@ public void process(File pdf, String xmlReview) throws IOException { } if (DEBUG) { - System.out.println("postItPopup position=" + Arrays.toString(annotationArea.getPosition())); + //System.out.println("postItPopup position=" + Arrays.toString(annotationArea.getPosition())); } Node node_popup = ((Element)node_annotation).getElementsByTagName("popup").item(0); @@ -253,16 +265,14 @@ public void process(File pdf, String xmlReview) throws IOException { xmlwriter.write(updatedXMLReview); } } - - + // import XFDF annotation xml FDFDocument fdfDoc = FDFDocument.loadXFDF(new ByteArrayInputStream(updatedXMLReview.getBytes(StandardCharsets.UTF_8))); List fdfAnnots = fdfDoc.getCatalog().getFDF().getAnnotations(); // group annotations relate to one page and add them into page - HashMap> map_pdfannots = new HashMap<>(); - + HashMap> mapPDFannots = new HashMap<>(); for (int i=0; i()); + if (mapPDFannots.get(page) == null) { + mapPDFannots.put(page, new ArrayList()); } - map_pdfannots.get(page).add(pdfannot); + mapPDFannots.get(page).add(pdfannot); } - - for (Map.Entry> set: map_pdfannots.entrySet()) { + + for (Map.Entry> set: mapPDFannots.entrySet()) { PDPage page = document.getPage(set.getKey()); List pageAnotations = page.getAnnotations(); // merge existing annotations (including hyperlinks) and new annotations pageAnotations.addAll(set.getValue()); - //document.getPage(set.getKey()).setAnnotations(set.getValue()); document.getPage(set.getKey()).setAnnotations(pageAnotations); } From b1e32c1e5870b1b792412e1accc7e333d365694d Mon Sep 17 00:00:00 2001 From: Alexander Dyuzhev Date: Thu, 31 Oct 2024 20:54:08 +0300 Subject: [PATCH 2/9] added hashmap for id and annotations, #293 --- .../metanorma/fop/annotations/Annotation.java | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/metanorma/fop/annotations/Annotation.java b/src/main/java/org/metanorma/fop/annotations/Annotation.java index f2114a4..c2d05f6 100644 --- a/src/main/java/org/metanorma/fop/annotations/Annotation.java +++ b/src/main/java/org/metanorma/fop/annotations/Annotation.java @@ -295,7 +295,10 @@ public void process(File pdf, String xmlReview) throws IOException { } fdfDoc.close(); - + + HashMap hashMapDocumentAnnotations = new HashMap<>(); + hashMapDocumentAnnotations = getAnnotationIDmap(document); + document.save(pdf); } catch (IOException | NumberFormatException | ParserConfigurationException | DOMException | TransformerException | SAXException | XPathException ex) { @@ -311,5 +314,23 @@ public void process(File pdf, String xmlReview) throws IOException { } } - + + private HashMap getAnnotationIDmap(PDDocument document) throws IOException { + HashMap hashMapDocumentAnnotations = new HashMap<>(); + for(int i = 0; i< document.getNumberOfPages(); i++) { + PDPage page = document.getPage(i); + for (PDAnnotation pdAnnotation: page.getAnnotations()){ + COSDictionary pdAnnotationDict = pdAnnotation.getCOSObject(); + if (pdAnnotationDict != null) { + // subject contains id 'Annot___', see xfdf_simple.xsl, attribute 'subject' + String subj = pdAnnotationDict.getString(COSName.SUBJ); + if (subj != null) { + hashMapDocumentAnnotations.put(subj, pdAnnotation); + } + } + } + } + return hashMapDocumentAnnotations; + } + } From 17de40a8200260defa2f4777a84df5361093f90f Mon Sep 17 00:00:00 2001 From: Alexander Dyuzhev Date: Thu, 31 Oct 2024 20:57:36 +0300 Subject: [PATCH 3/9] removing temp empty links, #293 --- .../metanorma/fop/annotations/Annotation.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/main/java/org/metanorma/fop/annotations/Annotation.java b/src/main/java/org/metanorma/fop/annotations/Annotation.java index c2d05f6..afb2d5a 100644 --- a/src/main/java/org/metanorma/fop/annotations/Annotation.java +++ b/src/main/java/org/metanorma/fop/annotations/Annotation.java @@ -299,6 +299,9 @@ public void process(File pdf, String xmlReview) throws IOException { HashMap hashMapDocumentAnnotations = new HashMap<>(); hashMapDocumentAnnotations = getAnnotationIDmap(document); + + clearEmptyAnnotations(document); + document.save(pdf); } catch (IOException | NumberFormatException | ParserConfigurationException | DOMException | TransformerException | SAXException | XPathException ex) { @@ -333,4 +336,18 @@ private HashMap getAnnotationIDmap(PDDocument document) thr return hashMapDocumentAnnotations; } + private void clearEmptyAnnotations(PDDocument document) throws IOException { + for (int i = 0; i < document.getNumberOfPages(); i++) + { + List pageAnnotations = new ArrayList<>(); + PDPage page = document.getPage(i); + for(PDAnnotation pageAnnotation: page.getAnnotations()) { + if(!(pageAnnotation.getContents().startsWith("Annot___"))) { + pageAnnotations.add(pageAnnotation); + } + } + document.getPage(i).setAnnotations(pageAnnotations); + } + } + } From 4e2e2923e1a710de4945f4bbd8fafdff1051e215 Mon Sep 17 00:00:00 2001 From: Alexander Dyuzhev Date: Thu, 31 Oct 2024 21:30:37 +0300 Subject: [PATCH 4/9] Annot tags added, #293 --- .../metanorma/fop/annotations/Annotation.java | 102 +++++++++++++++--- src/main/resources/xfdf_simple.xsl | 2 + 2 files changed, 87 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/metanorma/fop/annotations/Annotation.java b/src/main/java/org/metanorma/fop/annotations/Annotation.java index afb2d5a..c1e8a28 100644 --- a/src/main/java/org/metanorma/fop/annotations/Annotation.java +++ b/src/main/java/org/metanorma/fop/annotations/Annotation.java @@ -7,16 +7,12 @@ import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Writer; -import java.util.List; -import java.util.ArrayList; +import java.util.*; import java.io.StringReader; import java.io.StringWriter; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; import java.util.logging.Logger; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; @@ -32,14 +28,12 @@ import javax.xml.xpath.XPathFactory; import org.apache.fop.pdf.PDFObject; -import org.apache.pdfbox.cos.COSArray; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.cos.COSObject; +import org.apache.pdfbox.cos.*; import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.common.PDStream; +import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDObjectReference; import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureTreeRoot; import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDPropertyList; import org.apache.pdfbox.pdmodel.documentinterchange.taggedpdf.StandardStructureTypes; @@ -68,7 +62,9 @@ public class Annotation { protected static final Logger logger = Logger.getLogger(LoggerHelper.LOGGER_NAME); private boolean DEBUG = false; - + + private HashMap hashMapDocumentAnnotations = new HashMap<>(); + public void process(File pdf, String xmlReview) throws IOException { PDDocument document = null; @@ -296,20 +292,28 @@ public void process(File pdf, String xmlReview) throws IOException { fdfDoc.close(); - HashMap hashMapDocumentAnnotations = new HashMap<>(); + document.save(pdf); + + } catch (IOException | NumberFormatException | ParserConfigurationException | DOMException | TransformerException | SAXException | XPathException ex) { + logger.severe("Can't read annotation data from xml."); + ex.printStackTrace(); + } + + try { hashMapDocumentAnnotations = getAnnotationIDmap(document); + PDStructureTreeRoot structureTreeRoot = document.getDocumentCatalog().getStructureTreeRoot(); + COSArray aDocument = (COSArray) structureTreeRoot.getK(); + fixAnnotationTags(aDocument, null, 0); clearEmptyAnnotations(document); document.save(pdf); - - } catch (IOException | NumberFormatException | ParserConfigurationException | DOMException | TransformerException | SAXException | XPathException ex) { - logger.severe("Can't read annotation data from xml."); + } catch (IOException ex) { + logger.severe("Can't enclose the annotation into the Annot tag."); ex.printStackTrace(); - } - - + } + } finally { if( document != null ) { document.close(); @@ -336,6 +340,70 @@ private HashMap getAnnotationIDmap(PDDocument document) thr return hashMapDocumentAnnotations; } + private void fixAnnotationTags(COSArray oArray, COSObject parentObject, int level) throws IOException { + + if (oArray != null) { + for(int i = 0; i < oArray.size(); i++) { + COSObject oArrayItem = (COSObject) oArray.get(i); + + COSName cName = (COSName) oArrayItem.getItem(COSName.S); + if (cName != null) { + String tagName = cName.getName(); + + String levelPrefix = Collections.nCopies(level, " ").toString() + .replace("[", "") + .replace("]", "") + .replace(", ", ""); + if (DEBUG) { + System.out.println(levelPrefix + tagName); + } + + if (tagName.equals("Annot")) { + COSBase cbAlt = oArrayItem.getItem(COSName.ALT); + if (cbAlt != null) { + String tagAlt = ((COSString)cbAlt).toString(); + String ANNOT_PREFIX = "COSString{"; + if (tagAlt.startsWith(ANNOT_PREFIX + "Annot___")) { + // here replace exising tag Annot with new tag Annot + + String annotationId = tagAlt.substring(ANNOT_PREFIX.length(), tagAlt.length() - 1); + + if (DEBUG) { + System.out.println(levelPrefix + "id=" + tagAlt); + } + + // add the annotation element + COSDictionary anDict = new COSDictionary(); + // set Tag name (S) + anDict.setItem(COSName.S, COSName.ANNOT); + // set Parent (P) + anDict.setItem(COSName.P, parentObject); //oArrayItem oArray + // set Page (PG) + COSArray oArrayK = (COSArray) oArrayItem.getItem(COSName.K); + anDict.setItem(COSName.PG, ((COSObject)oArrayK.get(0)).getItem(COSName.PG)); + + PDObjectReference objRef = new PDObjectReference(); + anDict.setItem(COSName.K, objRef); + + PDAnnotation foundAnnotation = hashMapDocumentAnnotations.get(annotationId); + objRef.setReferencedObject(foundAnnotation); + + if (DEBUG) { + System.out.println(oArrayItem.getItem(COSName.K)); + } + + oArrayItem.setObject(anDict); + } + } + } + } + + COSArray oA_K = (COSArray) oArrayItem.getItem(COSName.K); + fixAnnotationTags(oA_K, oArrayItem, ++level); + } + } + } + private void clearEmptyAnnotations(PDDocument document) throws IOException { for (int i = 0; i < document.getNumberOfPages(); i++) { diff --git a/src/main/resources/xfdf_simple.xsl b/src/main/resources/xfdf_simple.xsl index 87bc2f1..8d7efd0 100644 --- a/src/main/resources/xfdf_simple.xsl +++ b/src/main/resources/xfdf_simple.xsl @@ -98,6 +98,8 @@ + + Annot___ From fe1ad7cd2561a19019beb538777e797788e19ed2 Mon Sep 17 00:00:00 2001 From: Alexander Dyuzhev Date: Thu, 31 Oct 2024 21:59:36 +0300 Subject: [PATCH 5/9] skip structures, #293 --- .../java/org/metanorma/fop/annotations/Annotation.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/metanorma/fop/annotations/Annotation.java b/src/main/java/org/metanorma/fop/annotations/Annotation.java index c1e8a28..7f2e367 100644 --- a/src/main/java/org/metanorma/fop/annotations/Annotation.java +++ b/src/main/java/org/metanorma/fop/annotations/Annotation.java @@ -397,9 +397,12 @@ private void fixAnnotationTags(COSArray oArray, COSObject parentObject, int leve } } } - - COSArray oA_K = (COSArray) oArrayItem.getItem(COSName.K); - fixAnnotationTags(oA_K, oArrayItem, ++level); + try { + COSArray oA_K = (COSArray) oArrayItem.getItem(COSName.K); + fixAnnotationTags(oA_K, oArrayItem, ++level); + } catch (Exception e) { + // + } } } } From 1fab04ddbcae8491fb38e6f613e97fb6c2e8a886 Mon Sep 17 00:00:00 2001 From: Alexander Dyuzhev Date: Thu, 31 Oct 2024 23:12:19 +0300 Subject: [PATCH 6/9] StructParent id added, #293 --- .../metanorma/fop/annotations/Annotation.java | 61 ++++++++++++++++++- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/metanorma/fop/annotations/Annotation.java b/src/main/java/org/metanorma/fop/annotations/Annotation.java index 7f2e367..b962ded 100644 --- a/src/main/java/org/metanorma/fop/annotations/Annotation.java +++ b/src/main/java/org/metanorma/fop/annotations/Annotation.java @@ -32,8 +32,11 @@ import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.common.COSObjectable; +import org.apache.pdfbox.pdmodel.common.PDNumberTreeNode; import org.apache.pdfbox.pdmodel.common.PDStream; import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDObjectReference; +import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDParentTreeValue; import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureTreeRoot; import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDPropertyList; import org.apache.pdfbox.pdmodel.documentinterchange.taggedpdf.StandardStructureTypes; @@ -65,6 +68,8 @@ public class Annotation { private HashMap hashMapDocumentAnnotations = new HashMap<>(); + private PDStructureTreeRoot structureTreeRoot; + public void process(File pdf, String xmlReview) throws IOException { PDDocument document = null; @@ -300,9 +305,10 @@ public void process(File pdf, String xmlReview) throws IOException { } try { + document = PDDocument.load(pdf); // important hashMapDocumentAnnotations = getAnnotationIDmap(document); - PDStructureTreeRoot structureTreeRoot = document.getDocumentCatalog().getStructureTreeRoot(); + structureTreeRoot = document.getDocumentCatalog().getStructureTreeRoot(); COSArray aDocument = (COSArray) structureTreeRoot.getK(); fixAnnotationTags(aDocument, null, 0); @@ -392,7 +398,32 @@ private void fixAnnotationTags(COSArray oArray, COSObject parentObject, int leve System.out.println(oArrayItem.getItem(COSName.K)); } - oArrayItem.setObject(anDict); + try { + oArrayItem.setObject(anDict); + + // from https://stackoverflow.com/questions/79083813/how-to-add-the-annotation-tag-in-tagged-pdf-using-pdfbox + + int parentTreeNextKey = structureTreeRoot.getParentTreeNextKey(); // -1, ignored here + + // assign a number to the annotation and insert the annotation element into the parent tree, and set ParentTreeNextKey + PDNumberTreeNode parentTree = structureTreeRoot.getParentTree(); + Map numberTreeAsMap = getNumberTreeAsMap(parentTree); + Set keySet = numberTreeAsMap.keySet(); + + if (parentTreeNextKey == -1) { + parentTreeNextKey = keySet.stream().reduce(Integer::max).get() + 1; + } + + foundAnnotation.setStructParent(parentTreeNextKey); + structureTreeRoot.setParentTreeNextKey(parentTreeNextKey + 1); + numberTreeAsMap.put(parentTreeNextKey, anDict); + parentTree = new PDNumberTreeNode(PDParentTreeValue.class); + parentTree.setNumbers(numberTreeAsMap); + structureTreeRoot.setParentTree(parentTree); + // END from stackoverflow + } catch (IOException e) { + System.out.println(e.toString()); + } } } } @@ -421,4 +452,30 @@ private void clearEmptyAnnotations(PDDocument document) throws IOException { } } + private Map getNumberTreeAsMap(PDNumberTreeNode tree) throws IOException { + if (tree == null) + { + return new LinkedHashMap<>(); + } + Map numbers = tree.getNumbers(); + if (numbers == null) + { + numbers = new LinkedHashMap<>(); + } + else + { + // must copy because the map is read only + numbers = new LinkedHashMap<>(numbers); + } + List kids = tree.getKids(); + if (kids != null) + { + for (PDNumberTreeNode kid : kids) + { + numbers.putAll(getNumberTreeAsMap(kid)); + } + } + return numbers; + } + } From 08ebc3be99af0d2880446bb5f25ec6a016b3e619 Mon Sep 17 00:00:00 2001 From: Alexander Dyuzhev Date: Fri, 1 Nov 2024 12:38:05 +0300 Subject: [PATCH 7/9] code refactoring, #293 --- .../metanorma/fop/annotations/Annotation.java | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/metanorma/fop/annotations/Annotation.java b/src/main/java/org/metanorma/fop/annotations/Annotation.java index b962ded..7f8dfc6 100644 --- a/src/main/java/org/metanorma/fop/annotations/Annotation.java +++ b/src/main/java/org/metanorma/fop/annotations/Annotation.java @@ -66,6 +66,7 @@ public class Annotation { private boolean DEBUG = false; + private final String ANNOT_PREFIX = "Annot___"; private HashMap hashMapDocumentAnnotations = new HashMap<>(); private PDStructureTreeRoot structureTreeRoot; @@ -304,6 +305,7 @@ public void process(File pdf, String xmlReview) throws IOException { ex.printStackTrace(); } + // add Annot tag for the text annotation try { document = PDDocument.load(pdf); // important hashMapDocumentAnnotations = getAnnotationIDmap(document); @@ -319,6 +321,7 @@ public void process(File pdf, String xmlReview) throws IOException { logger.severe("Can't enclose the annotation into the Annot tag."); ex.printStackTrace(); } + // END Annot tag adding } finally { if( document != null ) { @@ -337,7 +340,7 @@ private HashMap getAnnotationIDmap(PDDocument document) thr if (pdAnnotationDict != null) { // subject contains id 'Annot___', see xfdf_simple.xsl, attribute 'subject' String subj = pdAnnotationDict.getString(COSName.SUBJ); - if (subj != null) { + if (subj != null && subj.startsWith(ANNOT_PREFIX)) { hashMapDocumentAnnotations.put(subj, pdAnnotation); } } @@ -368,11 +371,11 @@ private void fixAnnotationTags(COSArray oArray, COSObject parentObject, int leve COSBase cbAlt = oArrayItem.getItem(COSName.ALT); if (cbAlt != null) { String tagAlt = ((COSString)cbAlt).toString(); - String ANNOT_PREFIX = "COSString{"; - if (tagAlt.startsWith(ANNOT_PREFIX + "Annot___")) { + String COSSTRING_PREFIX = "COSString{"; + if (tagAlt.startsWith(COSSTRING_PREFIX + ANNOT_PREFIX)) { // here replace exising tag Annot with new tag Annot - String annotationId = tagAlt.substring(ANNOT_PREFIX.length(), tagAlt.length() - 1); + String annotationId = tagAlt.substring(COSSTRING_PREFIX.length(), tagAlt.length() - 1); if (DEBUG) { System.out.println(levelPrefix + "id=" + tagAlt); @@ -444,7 +447,11 @@ private void clearEmptyAnnotations(PDDocument document) throws IOException { List pageAnnotations = new ArrayList<>(); PDPage page = document.getPage(i); for(PDAnnotation pageAnnotation: page.getAnnotations()) { - if(!(pageAnnotation.getContents().startsWith("Annot___"))) { + if(!(pageAnnotation.getContents().startsWith(ANNOT_PREFIX))) { + String subj = pageAnnotation.getCOSObject().getString(COSName.SUBJ); + if (subj != null && subj.startsWith(ANNOT_PREFIX)) { + pageAnnotation.getCOSObject().setItem(COSName.SUBJ, null); + } pageAnnotations.add(pageAnnotation); } } From 30cd175859bd02895b853e39a81b8d53a2e22712 Mon Sep 17 00:00:00 2001 From: Alexander Dyuzhev Date: Fri, 1 Nov 2024 17:12:23 +0300 Subject: [PATCH 8/9] code refactoring, #293 --- src/main/java/org/metanorma/fop/annotations/Annotation.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/metanorma/fop/annotations/Annotation.java b/src/main/java/org/metanorma/fop/annotations/Annotation.java index 7f8dfc6..931c409 100644 --- a/src/main/java/org/metanorma/fop/annotations/Annotation.java +++ b/src/main/java/org/metanorma/fop/annotations/Annotation.java @@ -425,7 +425,7 @@ private void fixAnnotationTags(COSArray oArray, COSObject parentObject, int leve structureTreeRoot.setParentTree(parentTree); // END from stackoverflow } catch (IOException e) { - System.out.println(e.toString()); + logger.severe("ParentTreeKey update error:" + e.toString()); } } } @@ -447,7 +447,8 @@ private void clearEmptyAnnotations(PDDocument document) throws IOException { List pageAnnotations = new ArrayList<>(); PDPage page = document.getPage(i); for(PDAnnotation pageAnnotation: page.getAnnotations()) { - if(!(pageAnnotation.getContents().startsWith(ANNOT_PREFIX))) { + if(pageAnnotation.getContents() != null && + !(pageAnnotation.getContents().startsWith(ANNOT_PREFIX))) { String subj = pageAnnotation.getCOSObject().getString(COSName.SUBJ); if (subj != null && subj.startsWith(ANNOT_PREFIX)) { pageAnnotation.getCOSObject().setItem(COSName.SUBJ, null); From ee0b902b590e5c24dab86ee2a059acf5ba27e8ab Mon Sep 17 00:00:00 2001 From: Alexander Dyuzhev Date: Fri, 1 Nov 2024 19:34:44 +0300 Subject: [PATCH 9/9] updated for Annot tag, clear helper link objects, #293 --- .../metanorma/fop/annotations/Annotation.java | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/metanorma/fop/annotations/Annotation.java b/src/main/java/org/metanorma/fop/annotations/Annotation.java index 931c409..e9a5b07 100644 --- a/src/main/java/org/metanorma/fop/annotations/Annotation.java +++ b/src/main/java/org/metanorma/fop/annotations/Annotation.java @@ -447,8 +447,25 @@ private void clearEmptyAnnotations(PDDocument document) throws IOException { List pageAnnotations = new ArrayList<>(); PDPage page = document.getPage(i); for(PDAnnotation pageAnnotation: page.getAnnotations()) { - if(pageAnnotation.getContents() != null && - !(pageAnnotation.getContents().startsWith(ANNOT_PREFIX))) { + boolean process = true; + if(pageAnnotation.getContents() != null && pageAnnotation.getContents().startsWith(ANNOT_PREFIX)) { + process = false; + } + // if link with alt-text Annot___ placed near the clause block, then the Contents changed to something like '1 Scope' + // therefore need remove links with small difference between coordinates + if (process) { + COSArray rect = pageAnnotation.getCOSObject().getCOSArray(COSName.RECT); + if (rect != null) { + float x1 = ((COSFloat)rect.get(0)).floatValue(); + float x2 = ((COSFloat)rect.get(2)).floatValue(); + if (x2 - x1 < 0.07f) { + process = false; + } + } + } + + if (process) { + // clear Subject field with 'Annot___', see xfdf_simple.xsl, attribute 'subject' String subj = pageAnnotation.getCOSObject().getString(COSName.SUBJ); if (subj != null && subj.startsWith(ANNOT_PREFIX)) { pageAnnotation.getCOSObject().setItem(COSName.SUBJ, null);