From e7dc29497f7888a08eb4144dd40f0be1c52548f6 Mon Sep 17 00:00:00 2001 From: furfurylic <17105140+furfurylic@users.noreply.github.com> Date: Thu, 28 Jul 2016 22:01:06 +0900 Subject: [PATCH] Add "timid" attribute to Output sink to avoid unnecessary touching --- UsersGuide.asciidoc | 2 + .../chionographis/CachingResolver.java | 7 +-- src/net/furfurylic/chionographis/Output.java | 56 +++++++++++++++++-- src/net/furfurylic/chionographis/Pool.java | 2 + test/crossing/input-timid/input.xml | 1 + test/crossing/input-timid/placeholder1.txt | 1 + test/crossing/input-timid/placeholder2.txt | 1 + test/crossing/input-timid/placeholder3.txt | 1 + test/test.xml | 50 +++++++++++++++++ 9 files changed, 112 insertions(+), 9 deletions(-) create mode 100644 test/crossing/input-timid/input.xml create mode 100644 test/crossing/input-timid/placeholder1.txt create mode 100644 test/crossing/input-timid/placeholder2.txt create mode 100644 test/crossing/input-timid/placeholder3.txt diff --git a/UsersGuide.asciidoc b/UsersGuide.asciidoc index b5359cc..05d610e 100644 --- a/UsersGuide.asciidoc +++ b/UsersGuide.asciidoc @@ -324,6 +324,8 @@ The XPath expression can include names which belong some namespaces only when th |mkdirs|Whether this sink creates parent directories of the destination file if needed.| No; defaults to +yes+ |force|Whether this sink creates output files even if existing files seem new enough.| No; defaults to +no+ + +|timid|Whether this sink avoids overwriting existing files which already have identical contents to be written.| No; defaults to +no+ |================= ==== Nested elements diff --git a/src/net/furfurylic/chionographis/CachingResolver.java b/src/net/furfurylic/chionographis/CachingResolver.java index 5d3d9e0..09a0a70 100644 --- a/src/net/furfurylic/chionographis/CachingResolver.java +++ b/src/net/furfurylic/chionographis/CachingResolver.java @@ -40,8 +40,6 @@ final class CachingResolver implements EntityResolver, URIResolver { private static final NetResourceCache BYTES = new NetResourceCache<>(); private static final NetResourceCache TREES = new NetResourceCache<>(); - private static final Pool BUFFER = new Pool<>(() -> new byte[4096]); - private Consumer listenStored_; private Consumer listenHit_; @@ -76,7 +74,7 @@ public InputSource resolveEntity(String publicId, String systemId) return Files.readAllBytes(Paths.get(u)); } else { ByteArrayOutputStream bytes = new ByteArrayOutputStream(); - byte[] buffer = BUFFER.get(); + byte[] buffer = Pool.BYTES.get(); try { try (InputStream in = u.toURL().openStream()) { int length; @@ -85,8 +83,9 @@ public InputSource resolveEntity(String publicId, String systemId) } } } finally { - BUFFER.release(buffer); + Pool.BYTES.release(buffer); } + // TODO: Can make more efficient (by avoiding copy) return bytes.toByteArray(); } diff --git a/src/net/furfurylic/chionographis/Output.java b/src/net/furfurylic/chionographis/Output.java index 676d8ce..25a23d0 100644 --- a/src/net/furfurylic/chionographis/Output.java +++ b/src/net/furfurylic/chionographis/Output.java @@ -40,7 +40,7 @@ * An Output {@linkplain Sink sink} writes each source document into an filesystem file. */ public final class Output extends Sink { - private static final Pool BUFFER = + private static final Pool BUFFER = new Pool<>(() -> new ExposingByteArrayOutputStream()); private Path destDir_ = null; @@ -48,6 +48,7 @@ public final class Output extends Sink { private boolean mkDirs_ = true; private String referent_ = null; private boolean force_ = false; + private boolean timid_ = false; private FileNameMapper mapper_ = null; private Logger logger_; @@ -137,7 +138,7 @@ public void setRefer(String refer) { * if necessary. Defaulted to "no". * * @param mkDirs - * {@code true} if make parent directories; {@code false} otherwise. + * {@code true} if makes parent directories; {@code false} otherwise. */ public void setMkDirs(boolean mkDirs) { mkDirs_ = mkDirs; @@ -154,6 +155,19 @@ public void setForce(boolean force) { force_ = force; } + /** + * Sets whether this sink should compare existing destination files with the contents + * about to be written and avoid overwriting them if not necessary. + * + * @param timid + * {@code true} if avoids unnecessary overwriting; {@code false} otherwise. + * + * @since 1.1 + */ + public void setTimid(boolean timid) { + timid_ = timid; + } + /** * Installs a file mapper. * The file mapper maps a source file name to an destination file name. @@ -352,7 +366,19 @@ void finishOne(Result result) { Files.createDirectories(parent); } } + Path absolute = mapped.toAbsolutePath(); + + if (timid_) { + File file = absolute.toFile(); + if (file.exists() && (file.length() == out.size()) + && hasIdenticalContent(file, out.buffer())) { + logger_.log(this, "No need to overwrite the output file: " + absolute, + Logger.Level.FINE); + continue; + } + } + logger_.log(this, "Creating " + absolute, Logger.Level.FINE); // We take advantage of FileChannel for its capability to be interrupted try { @@ -360,6 +386,7 @@ void finishOne(Result result) { absolute, StandardOpenOption.WRITE, StandardOpenOption.CREATE)) { channel.write(ByteBuffer.wrap(out.buffer(), 0, out.size())); } + countInBundle_.incrementAndGet(); } catch (IOException e) { logger_.log(this, "Failed to create " + absolute, Logger.Level.WARN); logger_.log(this, e, " Cause: ", Logger.Level.INFO, Logger.Level.VERBOSE); @@ -371,16 +398,35 @@ void finishOne(Result result) { } finally { placeBackBuffer(out); } + } - countInBundle_.incrementAndGet(); + private boolean hasIdenticalContent(File file, byte[] content) throws IOException { + byte[] bytes = Pool.BYTES.get(); + try (FileChannel in = FileChannel.open(file.toPath(), StandardOpenOption.READ)) { + ByteBuffer buffer = ByteBuffer.wrap(bytes); + int length; + int head = 0; + while ((length = in.read(buffer)) > -1) { + buffer.limit(length); + buffer.rewind(); + if (!buffer.equals(ByteBuffer.wrap(content, head, length))) { + return false; + } + head += length; + } + return true; + } finally { + Pool.BYTES.release(bytes); + } } @Override void abortOne(Result result) { - placeBackBuffer((ByteArrayOutputStream) ((OutputStreamResult) result).getOutputStream()); + placeBackBuffer( + (ExposingByteArrayOutputStream) ((OutputStreamResult) result).getOutputStream()); } - private void placeBackBuffer(ByteArrayOutputStream buffer) { + private void placeBackBuffer(ExposingByteArrayOutputStream buffer) { buffer.reset(); BUFFER.release(buffer); } diff --git a/src/net/furfurylic/chionographis/Pool.java b/src/net/furfurylic/chionographis/Pool.java index 97347c6..4c38139 100644 --- a/src/net/furfurylic/chionographis/Pool.java +++ b/src/net/furfurylic/chionographis/Pool.java @@ -23,6 +23,8 @@ */ final class Pool { + public static final Pool BYTES = new Pool<>(() -> new byte[4096]); + private final ReentrantLock lock_ = new ReentrantLock(); private Supplier create_; private SoftReference> pool_; diff --git a/test/crossing/input-timid/input.xml b/test/crossing/input-timid/input.xml new file mode 100644 index 0000000..d23c7ee --- /dev/null +++ b/test/crossing/input-timid/input.xml @@ -0,0 +1 @@ + diff --git a/test/crossing/input-timid/placeholder1.txt b/test/crossing/input-timid/placeholder1.txt new file mode 100644 index 0000000..900d878 --- /dev/null +++ b/test/crossing/input-timid/placeholder1.txt @@ -0,0 +1 @@ +[X:[Y:[Z(message=abc):]]] \ No newline at end of file diff --git a/test/crossing/input-timid/placeholder2.txt b/test/crossing/input-timid/placeholder2.txt new file mode 100644 index 0000000..c342ac3 --- /dev/null +++ b/test/crossing/input-timid/placeholder2.txt @@ -0,0 +1 @@ +[X:[Y:[Z(message=abc):]]x \ No newline at end of file diff --git a/test/crossing/input-timid/placeholder3.txt b/test/crossing/input-timid/placeholder3.txt new file mode 100644 index 0000000..421064b --- /dev/null +++ b/test/crossing/input-timid/placeholder3.txt @@ -0,0 +1 @@ +[X:[Y:[Z(message=abc):]] \ No newline at end of file diff --git a/test/test.xml b/test/test.xml index 2af602f..f17f9bc 100644 --- a/test/test.xml +++ b/test/test.xml @@ -35,6 +35,7 @@ + @@ -1111,6 +1112,55 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +