From a5c66ac837e01e447161c3d73fe35eadc26cf304 Mon Sep 17 00:00:00 2001 From: Mathieu Baudier Date: Sat, 5 Nov 2016 20:10:03 +0000 Subject: [PATCH] Introduce DirH git-svn-id: https://svn.argeo.org/commons/trunk@9309 4cfe0d0a-d680-48aa-b62c-e0a02a3f76cc --- .../src/org/argeo/util/DigestUtils.java | 42 ++++++- org.argeo.util/src/org/argeo/util/DirH.java | 116 ++++++++++++++++++ 2 files changed, 155 insertions(+), 3 deletions(-) create mode 100644 org.argeo.util/src/org/argeo/util/DirH.java diff --git a/org.argeo.util/src/org/argeo/util/DigestUtils.java b/org.argeo.util/src/org/argeo/util/DigestUtils.java index 52cbc5d21..2e8560f08 100644 --- a/org.argeo.util/src/org/argeo/util/DigestUtils.java +++ b/org.argeo.util/src/org/argeo/util/DigestUtils.java @@ -21,12 +21,15 @@ import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.Files; +import java.nio.file.Path; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; /** Utilities around cryptographic digests */ public class DigestUtils { - private static Boolean debug = true; + private static Boolean debug = false; // TODO: make it writable private final static Integer byteBufferCapacity = 100 * 1024;// 100 KB @@ -105,6 +108,39 @@ public class DigestUtils { } } + public static byte[] digestRaw(String algorithm, Path file, long bufferSize) { + long begin = System.currentTimeMillis(); + try { + MessageDigest md = MessageDigest.getInstance(algorithm); + FileChannel fc = FileChannel.open(file); + long fileSize = Files.size(file); + if (fileSize <= bufferSize) { + ByteBuffer bb = fc.map(MapMode.READ_ONLY, 0, fileSize); + md.update(bb); + } else { + long lastCycle = (fileSize / bufferSize) - 1; + long position = 0; + for (int i = 0; i <= lastCycle; i++) { + ByteBuffer bb; + if (i != lastCycle) { + bb = fc.map(MapMode.READ_ONLY, position, bufferSize); + position = position + bufferSize; + } else { + bb = fc.map(MapMode.READ_ONLY, position, fileSize - bufferSize); + position = fileSize; + } + md.update(bb); + } + } + long end = System.currentTimeMillis(); + if (debug) + System.out.println((end - begin) + " ms / " + ((end - begin) / 1000) + " s"); + return md.digest(); + } catch (Exception e) { + throw new UtilsException("Cannot digest " + file + " with algorithm " + algorithm, e); + } + } + public static void main(String[] args) { File file; if (args.length > 0) @@ -130,14 +166,14 @@ public class DigestUtils { } } - final private static char[] hexArray = "0123456789ABCDEF".toCharArray(); + final private static char[] hexArray = "0123456789abcdef".toCharArray(); /** * From * http://stackoverflow.com/questions/9655181/how-to-convert-a-byte-array-to * -a-hex-string-in-java */ - private static String encodeHexString(byte[] bytes) { + public static String encodeHexString(byte[] bytes) { char[] hexChars = new char[bytes.length * 2]; for (int j = 0; j < bytes.length; j++) { int v = bytes[j] & 0xFF; diff --git a/org.argeo.util/src/org/argeo/util/DirH.java b/org.argeo.util/src/org/argeo/util/DirH.java new file mode 100644 index 000000000..4035b96bc --- /dev/null +++ b/org.argeo.util/src/org/argeo/util/DirH.java @@ -0,0 +1,116 @@ +package org.argeo.util; + +import java.io.PrintStream; +import java.nio.charset.Charset; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** Hashes the hashes of the files in a directory.*/ +public class DirH { + + private final static Charset charset = Charset.forName("UTF-16"); + private final static long bufferSize = 200 * 1024 * 1024; + private final static String algorithm = "SHA"; + + private final static byte EOL = (byte) '\n'; + private final static byte SPACE = (byte) ' '; + + private final int hashSize; + + private final byte[][] hashes; + private final byte[][] fileNames; + private final byte[] digest; + private final byte[] dirName; + + /** + * @param dirName + * can be null or empty + */ + private DirH(byte[][] hashes, byte[][] fileNames, byte[] dirName) { + if (hashes.length != fileNames.length) + throw new UtilsException(hashes.length + " hashes and " + fileNames.length + " file names"); + this.hashes = hashes; + this.fileNames = fileNames; + this.dirName = dirName == null ? new byte[0] : dirName; + if (hashes.length == 0) {// empty dir + hashSize = 20; + // FIXME what is the digest of an empty dir? + digest = new byte[hashSize]; + Arrays.fill(digest, SPACE); + return; + } + hashSize = hashes[0].length; + for (int i = 0; i < hashes.length; i++) { + if (hashes[i].length != hashSize) + throw new UtilsException( + "Hash size for " + new String(fileNames[i], charset) + " is " + hashes[i].length); + } + + try { + MessageDigest md = MessageDigest.getInstance(algorithm); + for (int i = 0; i < hashes.length; i++) { + md.update(this.hashes[i]); + md.update(SPACE); + md.update(this.fileNames[i]); + md.update(EOL); + } + digest = md.digest(); + } catch (NoSuchAlgorithmException e) { + throw new UtilsException("Cannot digest", e); + } + } + + public void print(PrintStream out) { + out.print(DigestUtils.encodeHexString(digest)); + if (dirName.length > 0) { + out.print(' '); + out.print(new String(dirName, charset)); + } + out.print('\n'); + for (int i = 0; i < hashes.length; i++) { + out.print(DigestUtils.encodeHexString(hashes[i])); + out.print(' '); + out.print(new String(fileNames[i], charset)); + out.print('\n'); + } + } + + public static DirH digest(Path dir) { + try (DirectoryStream files = Files.newDirectoryStream(dir)) { + List hs = new ArrayList(); + List fNames = new ArrayList<>(); + for (Path file : files) { + if (!Files.isDirectory(file)) { + byte[] digest = DigestUtils.digestRaw(algorithm, file, bufferSize); + hs.add(digest); + fNames.add(file.getFileName().toString()); + } + } + + byte[][] fileNames = new byte[fNames.size()][]; + for (int i = 0; i < fNames.size(); i++) { + fileNames[i] = fNames.get(i).getBytes(charset); + } + byte[][] hashes = hs.toArray(new byte[hs.size()][]); + return new DirH(hashes, fileNames, dir.toString().getBytes(charset)); + } catch (Exception e) { + throw new UtilsException("Cannot digest " + dir, e); + } + } + + public static void main(String[] args) { + try { + DirH dirH = DirH.digest(Paths.get("/home/mbaudier/tmp/")); + dirH.print(System.out); + } catch (Exception e) { + e.printStackTrace(); + } + } +} -- 2.30.2