diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..b1ec12f --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +src/test/resources/hash_lf.txt text eol=lf +src/test/resources/hash_crlf.txt text eol=crlf \ No newline at end of file diff --git a/src/main/java/com/superzanti/serversync/files/FileHash.java b/src/main/java/com/superzanti/serversync/files/FileHash.java index 8229162..0d2bf2b 100644 --- a/src/main/java/com/superzanti/serversync/files/FileHash.java +++ b/src/main/java/com/superzanti/serversync/files/FileHash.java @@ -3,20 +3,32 @@ import com.superzanti.serversync.util.Logger; import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; import java.math.BigInteger; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.security.DigestInputStream; import java.security.MessageDigest; +import java.util.Arrays; public class FileHash { public static String hashFile(Path file) { - try ( + try { + InputStream stream = null; + if( FileHash.isBinaryFile(file) ){ + stream = Files.newInputStream(file); + }else{ + stream = new ByteArrayInputStream(String.join("", Files.readAllLines(file, StandardCharsets.UTF_8)).getBytes(StandardCharsets.UTF_8)); + } + DigestInputStream in = new DigestInputStream( - new BufferedInputStream(Files.newInputStream(file)), + new BufferedInputStream(stream), MessageDigest.getInstance("SHA-256") - ) - ) { + ); + byte[] buffer = new byte[8192]; while (in.read(buffer) > -1) { } return String.format("%064x", new BigInteger(1, in.getMessageDigest().digest())); @@ -26,4 +38,26 @@ public static String hashFile(Path file) { } return ""; } + + private static boolean isBinaryFile(Path f) throws IOException { + String[] textMine = {"text", "application/xml", "application/json", "application/javascript", "application/vnd.ms-excel"}; + + String type = Files.probeContentType(f); + if (type == null) { + //type couldn't be determined, guess via first 8192 bytes + try (InputStream stream = new BufferedInputStream(Files.newInputStream(f))) { + byte[] buffer = new byte[8192]; + int read = stream.read(buffer); + for( int i = 0; i < read; i++ ){ + if(buffer[i] == 0x00) return true; + } + return false; + } + } else if (Arrays.stream(textMine).anyMatch(type::startsWith)) { + return false; + } else { + //type isn't text + return true; + } + } } diff --git a/src/test/java/com/superzanti/serversync/LineFeedTest.java b/src/test/java/com/superzanti/serversync/LineFeedTest.java new file mode 100644 index 0000000..9087e52 --- /dev/null +++ b/src/test/java/com/superzanti/serversync/LineFeedTest.java @@ -0,0 +1,25 @@ +package com.superzanti.serversync; + +import java.nio.file.Paths; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.superzanti.serversync.files.FileHash; +import com.superzanti.serversync.util.Logger; + +class LineFeedTest { + + public LineFeedTest() { + Logger.instantiate("test"); + } + + @Test + @DisplayName("Should hash text files ignoring line feeds") + void textHash() { + String hashLF = FileHash.hashFile(Paths.get(this.getClass().getResource("/hash_lf.txt").getPath())); + String hashCRLF = FileHash.hashFile(Paths.get(this.getClass().getResource("/hash_crlf.txt").getPath())); + assertEquals(hashLF, hashCRLF); + } +} \ No newline at end of file diff --git a/src/test/resources/hash_crlf.txt b/src/test/resources/hash_crlf.txt new file mode 100644 index 0000000..37d4522 --- /dev/null +++ b/src/test/resources/hash_crlf.txt @@ -0,0 +1 @@ +This is a file that has specific line endings. diff --git a/src/test/resources/hash_lf.txt b/src/test/resources/hash_lf.txt new file mode 100644 index 0000000..37d4522 --- /dev/null +++ b/src/test/resources/hash_lf.txt @@ -0,0 +1 @@ +This is a file that has specific line endings.