From c8c9e08234955d4857e14fc216f0a6881c14d023 Mon Sep 17 00:00:00 2001 From: James Sweet Date: Tue, 23 Nov 2021 12:14:56 -0500 Subject: [PATCH 1/2] Change hashing to ignore line feed differences between windows and linux --- .gitattributes | 2 ++ .../superzanti/serversync/files/FileHash.java | 32 ++++++++++++++++--- .../superzanti/serversync/LineFeedTest.java | 25 +++++++++++++++ src/test/resources/hash_crlf.txt | 1 + src/test/resources/hash_lf.txt | 1 + 5 files changed, 57 insertions(+), 4 deletions(-) create mode 100644 .gitattributes create mode 100644 src/test/java/com/superzanti/serversync/LineFeedTest.java create mode 100644 src/test/resources/hash_crlf.txt create mode 100644 src/test/resources/hash_lf.txt diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..b1ec12f1 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +src/test/resources/hash_lf.txt text eol=lf +src/test/resources/hash_crlf.txt text eol=crlf \ No newline at end of file diff --git a/src/main/java/com/superzanti/serversync/files/FileHash.java b/src/main/java/com/superzanti/serversync/files/FileHash.java index 82291623..69b72ec3 100644 --- a/src/main/java/com/superzanti/serversync/files/FileHash.java +++ b/src/main/java/com/superzanti/serversync/files/FileHash.java @@ -3,7 +3,11 @@ import com.superzanti.serversync.util.Logger; import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; import java.math.BigInteger; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.security.DigestInputStream; @@ -11,12 +15,19 @@ public class FileHash { public static String hashFile(Path file) { - try ( + try { + InputStream stream = null; + if( FileHash.isBinaryFile(file) ){ + stream = Files.newInputStream(file); + }else{ + stream = new ByteArrayInputStream(String.join("", Files.readAllLines(file, StandardCharsets.UTF_8)).getBytes()); + } + DigestInputStream in = new DigestInputStream( - new BufferedInputStream(Files.newInputStream(file)), + new BufferedInputStream(stream), MessageDigest.getInstance("SHA-256") - ) - ) { + ); + byte[] buffer = new byte[8192]; while (in.read(buffer) > -1) { } return String.format("%064x", new BigInteger(1, in.getMessageDigest().digest())); @@ -26,4 +37,17 @@ public static String hashFile(Path file) { } return ""; } + + private static boolean isBinaryFile(Path f) throws IOException { + String type = Files.probeContentType(f); + if (type == null) { + //type couldn't be determined, assume binary + return true; + } else if (type.startsWith("text")) { + return false; + } else { + //type isn't text + return true; + } + } } diff --git a/src/test/java/com/superzanti/serversync/LineFeedTest.java b/src/test/java/com/superzanti/serversync/LineFeedTest.java new file mode 100644 index 00000000..9087e529 --- /dev/null +++ b/src/test/java/com/superzanti/serversync/LineFeedTest.java @@ -0,0 +1,25 @@ +package com.superzanti.serversync; + +import java.nio.file.Paths; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.superzanti.serversync.files.FileHash; +import com.superzanti.serversync.util.Logger; + +class LineFeedTest { + + public LineFeedTest() { + Logger.instantiate("test"); + } + + @Test + @DisplayName("Should hash text files ignoring line feeds") + void textHash() { + String hashLF = FileHash.hashFile(Paths.get(this.getClass().getResource("/hash_lf.txt").getPath())); + String hashCRLF = FileHash.hashFile(Paths.get(this.getClass().getResource("/hash_crlf.txt").getPath())); + assertEquals(hashLF, hashCRLF); + } +} \ No newline at end of file diff --git a/src/test/resources/hash_crlf.txt b/src/test/resources/hash_crlf.txt new file mode 100644 index 00000000..37d4522a --- /dev/null +++ b/src/test/resources/hash_crlf.txt @@ -0,0 +1 @@ +This is a file that has specific line endings. diff --git a/src/test/resources/hash_lf.txt b/src/test/resources/hash_lf.txt new file mode 100644 index 00000000..37d4522a --- /dev/null +++ b/src/test/resources/hash_lf.txt @@ -0,0 +1 @@ +This is a file that has specific line endings. From c285152e5dfb75f10c5cbf1b62afe13a9b7415e2 Mon Sep 17 00:00:00 2001 From: James Sweet Date: Tue, 23 Nov 2021 15:00:46 -0500 Subject: [PATCH 2/2] Update binary check to work more accurately cross platform --- .../superzanti/serversync/files/FileHash.java | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/superzanti/serversync/files/FileHash.java b/src/main/java/com/superzanti/serversync/files/FileHash.java index 69b72ec3..0d2bf2b2 100644 --- a/src/main/java/com/superzanti/serversync/files/FileHash.java +++ b/src/main/java/com/superzanti/serversync/files/FileHash.java @@ -12,6 +12,7 @@ import java.nio.file.Path; import java.security.DigestInputStream; import java.security.MessageDigest; +import java.util.Arrays; public class FileHash { public static String hashFile(Path file) { @@ -20,7 +21,7 @@ public static String hashFile(Path file) { if( FileHash.isBinaryFile(file) ){ stream = Files.newInputStream(file); }else{ - stream = new ByteArrayInputStream(String.join("", Files.readAllLines(file, StandardCharsets.UTF_8)).getBytes()); + stream = new ByteArrayInputStream(String.join("", Files.readAllLines(file, StandardCharsets.UTF_8)).getBytes(StandardCharsets.UTF_8)); } DigestInputStream in = new DigestInputStream( @@ -39,11 +40,20 @@ public static String hashFile(Path file) { } private static boolean isBinaryFile(Path f) throws IOException { + String[] textMine = {"text", "application/xml", "application/json", "application/javascript", "application/vnd.ms-excel"}; + String type = Files.probeContentType(f); if (type == null) { - //type couldn't be determined, assume binary - return true; - } else if (type.startsWith("text")) { + //type couldn't be determined, guess via first 8192 bytes + try (InputStream stream = new BufferedInputStream(Files.newInputStream(f))) { + byte[] buffer = new byte[8192]; + int read = stream.read(buffer); + for( int i = 0; i < read; i++ ){ + if(buffer[i] == 0x00) return true; + } + return false; + } + } else if (Arrays.stream(textMine).anyMatch(type::startsWith)) { return false; } else { //type isn't text