From 3c6ca369205406b979693533923efb13d2f2f17e Mon Sep 17 00:00:00 2001 From: James Mudd Date: Fri, 13 Dec 2024 18:09:42 +0000 Subject: [PATCH 1/3] Add failing test --- .../io/jhdf/writing/StringWritingTest.java | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/jhdf/src/test/java/io/jhdf/writing/StringWritingTest.java b/jhdf/src/test/java/io/jhdf/writing/StringWritingTest.java index 6156a6e5..4a297884 100644 --- a/jhdf/src/test/java/io/jhdf/writing/StringWritingTest.java +++ b/jhdf/src/test/java/io/jhdf/writing/StringWritingTest.java @@ -165,7 +165,7 @@ void writeVarStringAttributes() throws Exception { } } - @Test() + @Test @Order(4) void writeReallyLongStrings() throws Exception { Path tempFile = Files.createTempFile(this.getClass().getSimpleName(), ".hdf5"); @@ -192,4 +192,21 @@ void writeReallyLongStrings() throws Exception { tempFile.toFile().delete(); } } + + @Test + void writingNonAsciiStrings() throws Exception { + Path tempFile = Files.createTempFile(this.getClass().getSimpleName(), ".hdf5"); + WritableHdfFile writableHdfFile = HdfFile.write(tempFile); + + WritiableDataset dataset1 = writableHdfFile.putDataset("dataset1", "你好"); + dataset1.putAttribute("attr", "你好"); + + WritiableDataset dataset2 = writableHdfFile.putDataset("dataset2", new String[] {"你好"}); + dataset2.putAttribute("attr", new String[] {"你好"}); + + WritiableDataset dataset3 = writableHdfFile.putDataset("dataset3", new String[][] {{"你好"}, {"世界"}}); + dataset3.putAttribute("attr", new String[][] {{"你好"}, {"世界"}}); + + writableHdfFile.close(); + } } From 30b964d3d33a0e9bb17b0d8693316f9fb00328a3 Mon Sep 17 00:00:00 2001 From: James Mudd Date: Fri, 13 Dec 2024 18:16:17 +0000 Subject: [PATCH 2/3] Fix bug calculating buffer size with non-ascii chars --- jhdf/src/main/java/io/jhdf/object/datatype/StringData.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jhdf/src/main/java/io/jhdf/object/datatype/StringData.java b/jhdf/src/main/java/io/jhdf/object/datatype/StringData.java index fa204a1b..c6094e07 100644 --- a/jhdf/src/main/java/io/jhdf/object/datatype/StringData.java +++ b/jhdf/src/main/java/io/jhdf/object/datatype/StringData.java @@ -167,7 +167,7 @@ public void setBufferLimit(ByteBuffer byteBuffer) { public static StringData create(Object data) { int maxLength = Arrays.stream(Utils.flatten(data)) .map(String.class::cast) - .mapToInt(String::length) + .mapToInt(s -> StandardCharsets.UTF_8.encode(s).limit()) .max().getAsInt(); return new StringData(PaddingType.NULL_TERMINATED, StandardCharsets.UTF_8, maxLength); From c8352b1ed7d91c5d920bec3f73fd88c7bbcac501 Mon Sep 17 00:00:00 2001 From: James Mudd Date: Fri, 13 Dec 2024 18:23:19 +0000 Subject: [PATCH 3/3] Improve tests validating file --- .../io/jhdf/writing/StringWritingTest.java | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/jhdf/src/test/java/io/jhdf/writing/StringWritingTest.java b/jhdf/src/test/java/io/jhdf/writing/StringWritingTest.java index 4a297884..812a9bdd 100644 --- a/jhdf/src/test/java/io/jhdf/writing/StringWritingTest.java +++ b/jhdf/src/test/java/io/jhdf/writing/StringWritingTest.java @@ -193,9 +193,11 @@ void writeReallyLongStrings() throws Exception { } } + // https://github.com/jamesmudd/jhdf/issues/656 @Test + @Order(5) void writingNonAsciiStrings() throws Exception { - Path tempFile = Files.createTempFile(this.getClass().getSimpleName(), ".hdf5"); + tempFile = Files.createTempFile(this.getClass().getSimpleName(), ".hdf5"); WritableHdfFile writableHdfFile = HdfFile.write(tempFile); WritiableDataset dataset1 = writableHdfFile.putDataset("dataset1", "你好"); @@ -208,5 +210,37 @@ void writingNonAsciiStrings() throws Exception { dataset3.putAttribute("attr", new String[][] {{"你好"}, {"世界"}}); writableHdfFile.close(); + + // Now read it back + try (HdfFile hdfFile = new HdfFile(tempFile)) { + Dataset dataset1Readback = hdfFile.getDatasetByPath("dataset1"); + assertThat(dataset1Readback.getData()).isEqualTo("你好"); + assertThat(dataset1Readback.getAttribute("attr").getData()) + .isEqualTo("你好"); + + Dataset dataset2Readback = hdfFile.getDatasetByPath("dataset2"); + assertThat(dataset2Readback.getData()).isEqualTo(new String[] {"你好"}); + assertThat(dataset2Readback.getAttribute("attr").getData()) + .isEqualTo(new String[] {"你好"}); + + Dataset dataset3Readback = hdfFile.getDatasetByPath("dataset3"); + assertThat(dataset3Readback.getData()).isEqualTo(new String[][] {{"你好"}, {"世界"}}); + assertThat(dataset3Readback.getAttribute("attr").getData()) + .isEqualTo(new String[][] {{"你好"}, {"世界"}}); + } + } + + @Test + @Order(6) + @EnabledIfH5DumpAvailable + void readNonAsciiStringDatasetsWithH5Dump() throws Exception { + // Read with h5dump + HDF5FileXml hdf5FileXml = H5Dump.dumpAndParse(tempFile); + + // Read with jhdf + try (HdfFile hdfFile = new HdfFile(tempFile)) { + // Compare + H5Dump.assetXmlAndHdfFileMatch(hdf5FileXml, hdfFile); + } } }