Introduce DirH
authorMathieu Baudier <mbaudier@argeo.org>
Sat, 5 Nov 2016 20:10:03 +0000 (20:10 +0000)
committerMathieu Baudier <mbaudier@argeo.org>
Sat, 5 Nov 2016 20:10:03 +0000 (20:10 +0000)
git-svn-id: https://svn.argeo.org/commons/trunk@9309 4cfe0d0a-d680-48aa-b62c-e0a02a3f76cc

org.argeo.util/src/org/argeo/util/DigestUtils.java
org.argeo.util/src/org/argeo/util/DirH.java [new file with mode: 0644]

index 52cbc5d21a3b801885a88555c1ad9d88463938b2..2e8560f088ec1dd89c6bedbad7618dc841aaf755 100644 (file)
@@ -21,12 +21,15 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;
+import java.nio.channels.FileChannel.MapMode;
+import java.nio.file.Files;
+import java.nio.file.Path;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
 
 /** Utilities around cryptographic digests */
 public class DigestUtils {
-       private static Boolean debug = true;
+       private static Boolean debug = false;
        // TODO: make it writable
        private final static Integer byteBufferCapacity = 100 * 1024;// 100 KB
 
@@ -105,6 +108,39 @@ public class DigestUtils {
                }
        }
 
+       public static byte[] digestRaw(String algorithm, Path file, long bufferSize) {
+               long begin = System.currentTimeMillis();
+               try {
+                       MessageDigest md = MessageDigest.getInstance(algorithm);
+                       FileChannel fc = FileChannel.open(file);
+                       long fileSize = Files.size(file);
+                       if (fileSize <= bufferSize) {
+                               ByteBuffer bb = fc.map(MapMode.READ_ONLY, 0, fileSize);
+                               md.update(bb);
+                       } else {
+                               long lastCycle = (fileSize / bufferSize) - 1;
+                               long position = 0;
+                               for (int i = 0; i <= lastCycle; i++) {
+                                       ByteBuffer bb;
+                                       if (i != lastCycle) {
+                                               bb = fc.map(MapMode.READ_ONLY, position, bufferSize);
+                                               position = position + bufferSize;
+                                       } else {
+                                               bb = fc.map(MapMode.READ_ONLY, position, fileSize - bufferSize);
+                                               position = fileSize;
+                                       }
+                                       md.update(bb);
+                               }
+                       }
+                       long end = System.currentTimeMillis();
+                       if (debug)
+                               System.out.println((end - begin) + " ms / " + ((end - begin) / 1000) + " s");
+                       return md.digest();
+               } catch (Exception e) {
+                       throw new UtilsException("Cannot digest " + file + "  with algorithm " + algorithm, e);
+               }
+       }
+
        public static void main(String[] args) {
                File file;
                if (args.length > 0)
@@ -130,14 +166,14 @@ public class DigestUtils {
                }
        }
 
-       final private static char[] hexArray = "0123456789ABCDEF".toCharArray();
+       final private static char[] hexArray = "0123456789abcdef".toCharArray();
 
        /**
         * From
         * http://stackoverflow.com/questions/9655181/how-to-convert-a-byte-array-to
         * -a-hex-string-in-java
         */
-       private static String encodeHexString(byte[] bytes) {
+       public static String encodeHexString(byte[] bytes) {
                char[] hexChars = new char[bytes.length * 2];
                for (int j = 0; j < bytes.length; j++) {
                        int v = bytes[j] & 0xFF;
diff --git a/org.argeo.util/src/org/argeo/util/DirH.java b/org.argeo.util/src/org/argeo/util/DirH.java
new file mode 100644 (file)
index 0000000..4035b96
--- /dev/null
@@ -0,0 +1,116 @@
+package org.argeo.util;
+
+import java.io.PrintStream;
+import java.nio.charset.Charset;
+import java.nio.file.DirectoryStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/** Hashes the hashes of the files in a directory.*/
+public class DirH {
+
+       private final static Charset charset = Charset.forName("UTF-16");
+       private final static long bufferSize = 200 * 1024 * 1024;
+       private final static String algorithm = "SHA";
+
+       private final static byte EOL = (byte) '\n';
+       private final static byte SPACE = (byte) ' ';
+
+       private final int hashSize;
+
+       private final byte[][] hashes;
+       private final byte[][] fileNames;
+       private final byte[] digest;
+       private final byte[] dirName;
+
+       /**
+        * @param dirName
+        *            can be null or empty
+        */
+       private DirH(byte[][] hashes, byte[][] fileNames, byte[] dirName) {
+               if (hashes.length != fileNames.length)
+                       throw new UtilsException(hashes.length + " hashes and " + fileNames.length + " file names");
+               this.hashes = hashes;
+               this.fileNames = fileNames;
+               this.dirName = dirName == null ? new byte[0] : dirName;
+               if (hashes.length == 0) {// empty dir
+                       hashSize = 20;
+                       // FIXME what is the digest of an empty dir?
+                       digest = new byte[hashSize];
+                       Arrays.fill(digest, SPACE);
+                       return;
+               }
+               hashSize = hashes[0].length;
+               for (int i = 0; i < hashes.length; i++) {
+                       if (hashes[i].length != hashSize)
+                               throw new UtilsException(
+                                               "Hash size for " + new String(fileNames[i], charset) + " is " + hashes[i].length);
+               }
+
+               try {
+                       MessageDigest md = MessageDigest.getInstance(algorithm);
+                       for (int i = 0; i < hashes.length; i++) {
+                               md.update(this.hashes[i]);
+                               md.update(SPACE);
+                               md.update(this.fileNames[i]);
+                               md.update(EOL);
+                       }
+                       digest = md.digest();
+               } catch (NoSuchAlgorithmException e) {
+                       throw new UtilsException("Cannot digest", e);
+               }
+       }
+
+       public void print(PrintStream out) {
+               out.print(DigestUtils.encodeHexString(digest));
+               if (dirName.length > 0) {
+                       out.print(' ');
+                       out.print(new String(dirName, charset));
+               }
+               out.print('\n');
+               for (int i = 0; i < hashes.length; i++) {
+                       out.print(DigestUtils.encodeHexString(hashes[i]));
+                       out.print(' ');
+                       out.print(new String(fileNames[i], charset));
+                       out.print('\n');
+               }
+       }
+
+       public static DirH digest(Path dir) {
+               try (DirectoryStream<Path> files = Files.newDirectoryStream(dir)) {
+                       List<byte[]> hs = new ArrayList<byte[]>();
+                       List<String> fNames = new ArrayList<>();
+                       for (Path file : files) {
+                               if (!Files.isDirectory(file)) {
+                                       byte[] digest = DigestUtils.digestRaw(algorithm, file, bufferSize);
+                                       hs.add(digest);
+                                       fNames.add(file.getFileName().toString());
+                               }
+                       }
+
+                       byte[][] fileNames = new byte[fNames.size()][];
+                       for (int i = 0; i < fNames.size(); i++) {
+                               fileNames[i] = fNames.get(i).getBytes(charset);
+                       }
+                       byte[][] hashes = hs.toArray(new byte[hs.size()][]);
+                       return new DirH(hashes, fileNames, dir.toString().getBytes(charset));
+               } catch (Exception e) {
+                       throw new UtilsException("Cannot digest " + dir, e);
+               }
+       }
+
+       public static void main(String[] args) {
+               try {
+                       DirH dirH = DirH.digest(Paths.get("/home/mbaudier/tmp/"));
+                       dirH.print(System.out);
+               } catch (Exception e) {
+                       e.printStackTrace();
+               }
+       }
+}