1 package org
.argeo
.cms
.file
;
3 import java
.io
.IOException
;
4 import java
.math
.BigInteger
;
5 import java
.nio
.MappedByteBuffer
;
6 import java
.nio
.channels
.FileChannel
;
7 import java
.nio
.file
.FileVisitResult
;
8 import java
.nio
.file
.Files
;
9 import java
.nio
.file
.Path
;
10 import java
.nio
.file
.Paths
;
11 import java
.nio
.file
.SimpleFileVisitor
;
12 import java
.nio
.file
.attribute
.BasicFileAttributes
;
13 import java
.security
.MessageDigest
;
14 import java
.security
.NoSuchAlgorithmException
;
15 import java
.util
.Base64
;
16 import java
.util
.zip
.Checksum
;
18 /** Allows to fine tune how files are read. */
19 public class ChecksumFactory
{
20 private int regionSize
= 10 * 1024 * 1024;
22 public byte[] digest(Path path
, final String algo
) {
24 final MessageDigest md
= MessageDigest
.getInstance(algo
);
25 if (Files
.isDirectory(path
)) {
26 long begin
= System
.currentTimeMillis();
27 Files
.walkFileTree(path
, new SimpleFileVisitor
<Path
>() {
30 public FileVisitResult
visitFile(Path file
, BasicFileAttributes attrs
) throws IOException
{
31 if (!Files
.isDirectory(file
)) {
32 byte[] digest
= digest(file
, algo
);
35 return FileVisitResult
.CONTINUE
;
39 byte[] digest
= md
.digest();
40 long duration
= System
.currentTimeMillis() - begin
;
41 System
.out
.println(printBase64Binary(digest
) + " " + path
+ " (" + duration
/ 1000 + "s)");
44 long begin
= System
.nanoTime();
46 try (FileChannel channel
= (FileChannel
) Files
.newByteChannel(path
);) {
47 length
= channel
.size();
49 while (cursor
< length
) {
50 long effectiveSize
= Math
.min(regionSize
, length
- cursor
);
51 MappedByteBuffer mb
= channel
.map(FileChannel
.MapMode
.READ_ONLY
, cursor
, effectiveSize
);
53 byte[] buffer
= new byte[1024];
54 while (mb
.hasRemaining()) {
61 // MessageDigest subMd =
62 // MessageDigest.getInstance(algo);
64 // byte[] subDigest = subMd.digest();
65 // System.out.println(" -> " + cursor);
66 // System.out.println(IOUtils.encodeHexString(subDigest));
67 // System.out.println(new BigInteger(1,
68 // subDigest).toString(16));
69 // System.out.println(new BigInteger(1, subDigest)
70 // .toString(Character.MAX_RADIX));
71 // System.out.println(printBase64Binary(subDigest));
73 cursor
= cursor
+ regionSize
;
75 byte[] digest
= md
.digest();
76 long duration
= System
.nanoTime() - begin
;
77 System
.out
.println(printBase64Binary(digest
) + " " + path
.getFileName() + " (" + duration
/ 1000000
78 + "ms, " + (length
/ 1024) + "kB, " + (length
/ (duration
/ 1000000)) * 1000 / (1024 * 1024)
83 } catch (NoSuchAlgorithmException
| IOException e
) {
84 throw new IllegalStateException("Cannot digest " + path
, e
);
88 /** Whether the file should be mapped. */
89 protected boolean mapFile(FileChannel fileChannel
) throws IOException
{
90 long size
= fileChannel
.size();
91 if (size
> (regionSize
/ 10))
96 public long checksum(Path path
, Checksum crc
) {
97 final int bufferSize
= 2 * 1024 * 1024;
98 long begin
= System
.currentTimeMillis();
99 try (FileChannel channel
= (FileChannel
) Files
.newByteChannel(path
);) {
100 byte[] bytes
= new byte[bufferSize
];
101 long length
= channel
.size();
103 while (cursor
< length
) {
104 long effectiveSize
= Math
.min(regionSize
, length
- cursor
);
105 MappedByteBuffer mb
= channel
.map(FileChannel
.MapMode
.READ_ONLY
, cursor
, effectiveSize
);
107 while (mb
.hasRemaining()) {
108 nGet
= Math
.min(mb
.remaining(), bufferSize
);
109 mb
.get(bytes
, 0, nGet
);
110 crc
.update(bytes
, 0, nGet
);
112 cursor
= cursor
+ regionSize
;
114 return crc
.getValue();
115 } catch (IOException e
) {
116 throw new IllegalStateException("Cannot checksum " + path
, e
);
118 long duration
= System
.currentTimeMillis() - begin
;
119 System
.out
.println(duration
/ 1000 + "s");
123 public static void main(String
... args
) {
124 ChecksumFactory cf
= new ChecksumFactory();
126 // Paths.get("/home/mbaudier/apache-maven-3.2.3-bin.tar.gz");
128 if (args
.length
> 0) {
129 path
= Paths
.get(args
[0]);
131 path
= Paths
.get("/home/mbaudier/Downloads/torrents/CentOS-7-x86_64-DVD-1503-01/"
132 + "CentOS-7-x86_64-DVD-1503-01.iso");
134 // long adler = cf.checksum(path, new Adler32());
135 // System.out.format("Adler=%d%n", adler);
136 // long crc = cf.checksum(path, new CRC32());
137 // System.out.format("CRC=%d%n", crc);
138 String algo
= "SHA1";
139 byte[] digest
= cf
.digest(path
, algo
);
140 System
.out
.println(algo
+ " " + printBase64Binary(digest
));
141 System
.out
.println(algo
+ " " + new BigInteger(1, digest
).toString(16));
142 // String sha1 = printBase64Binary(cf.digest(path, "SHA1"));
143 // System.out.format("SHA1=%s%n", sha1);
146 private static String
printBase64Binary(byte[] arr
) {
147 return Base64
.getEncoder().encodeToString(arr
);