Skip to content

Commit

Permalink
Update reported datatypes to conform with specs (#268)
Browse files Browse the repository at this point in the history
- [Zarr
V2](https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html#data-type-encoding)
- [Zarr
V3](https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#core-data-types)

zarr-python had been reading our datatypes without complaining. Testing
by @chris-delg has revealed that TensorStore will not.

## Changes

- Link against nlohman::json in examples builds (orthogonal to the
issue, but a light lift).
- Remove `sample_type_to_dtype()` from common.{h,cpp}
- Implement version-specific `sample_type_to_dtype()`
- Update tests to check data type is correct.
  • Loading branch information
aliddell committed Jul 9, 2024
1 parent c77a987 commit 463fe6e
Show file tree
Hide file tree
Showing 22 changed files with 115 additions and 30 deletions.
16 changes: 0 additions & 16 deletions src/common.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
#include "common.hh"
#include "zarr.hh"

#include "platform.h"

#include <cmath>
#include <thread>
Expand Down Expand Up @@ -199,19 +196,6 @@ common::bytes_per_chunk(const std::vector<Dimension>& dimensions,
return n_bytes;
}

const char*
common::sample_type_to_dtype(SampleType t)

{
static const char* table[] = { "u1", "u2", "i1", "i2",
"f4", "u2", "u2", "u2" };
if (t < countof(table)) {
return table[t];
} else {
throw std::runtime_error("Invalid sample type.");
}
}

const char*
common::sample_type_to_string(SampleType t) noexcept
{
Expand Down
7 changes: 0 additions & 7 deletions src/common.hh
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,6 @@ size_t
bytes_per_chunk(const std::vector<Dimension>& dimensions,
const SampleType& dtype);

/// @brief Get the Zarr dtype for a given SampleType.
/// @param t An enumerated sample type.
/// @throw std::runtime_error if @par t is not a valid SampleType.
/// @return A representation of the SampleType @par t expected by a Zarr reader.
const char*
sample_type_to_dtype(SampleType t);

/// @brief Get a string representation of the SampleType enum.
/// @param t An enumerated sample type.
/// @return A human-readable representation of the SampleType @par t.
Expand Down
31 changes: 30 additions & 1 deletion src/zarr.v2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#include "nlohmann/json.hpp"

#include <bit>

namespace zarr = acquire::sink::zarr;

namespace {
Expand All @@ -22,6 +24,33 @@ compressed_zarr_v2_init()
}
return nullptr;
}

std::string
sample_type_to_dtype(SampleType t)

{
const std::string dtype_prefix =
std::endian::native == std::endian::big ? ">" : "<";

switch (t) {
case SampleType_u8:
return dtype_prefix + "u1";
case SampleType_u10:
case SampleType_u12:
case SampleType_u14:
case SampleType_u16:
return dtype_prefix + "u2";
case SampleType_i8:
return dtype_prefix + "i1";
case SampleType_i16:
return dtype_prefix + "i2";
case SampleType_f32:
return dtype_prefix + "f4";
default:
throw std::runtime_error("Invalid SampleType: " +
std::to_string(static_cast<int>(t)));
}
}
} // end ::{anonymous} namespace

/// ZarrV2
Expand Down Expand Up @@ -249,7 +278,7 @@ zarr::ZarrV2::write_array_metadata_(size_t level) const
metadata["zarr_format"] = 2;
metadata["shape"] = array_shape;
metadata["chunks"] = chunk_shape;
metadata["dtype"] = common::sample_type_to_dtype(image_shape.type);
metadata["dtype"] = sample_type_to_dtype(image_shape.type);
metadata["fill_value"] = 0;
metadata["order"] = "C";
metadata["filters"] = nullptr;
Expand Down
26 changes: 25 additions & 1 deletion src/zarr.v3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,30 @@ compressed_zarr_v3_init()
}
return nullptr;
}

std::string
sample_type_to_dtype(SampleType t)

{
switch (t) {
case SampleType_u8:
return "uint8";
case SampleType_u10:
case SampleType_u12:
case SampleType_u14:
case SampleType_u16:
return "uint16";
case SampleType_i8:
return "int8";
case SampleType_i16:
return "int16";
case SampleType_f32:
return "float32";
default:
throw std::runtime_error("Invalid SampleType: " +
std::to_string(static_cast<int>(t)));
}
}
} // end ::{anonymous} namespace

zarr::ZarrV3::ZarrV3(BloscCompressionParams&& compression_params)
Expand Down Expand Up @@ -175,7 +199,7 @@ zarr::ZarrV3::write_array_metadata_(size_t level) const
});

metadata["chunk_memory_layout"] = "C";
metadata["data_type"] = common::sample_type_to_dtype(image_shape.type);
metadata["data_type"] = sample_type_to_dtype(image_shape.type);
metadata["extensions"] = json::array();
metadata["fill_value"] = 0;
metadata["shape"] = array_shape;
Expand Down
4 changes: 4 additions & 0 deletions tests/multiscales-metadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,10 @@ verify_layer(const LayerTestCase& test_case)
std::ifstream f(zarray_path);
json zarray = json::parse(f);

const std::string dtype =
std::endian::native == std::endian::little ? "<u1" : ">u1";
CHECK(dtype == zarray["dtype"].get<std::string>());

const auto shape = zarray["shape"];
ASSERT_EQ(int, "%d", frames_per_layer, shape[0]);
ASSERT_EQ(int, "%d", layer_frame_height, shape[1]);
Expand Down
4 changes: 3 additions & 1 deletion tests/repeat-start.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,9 @@ validate(AcquireRuntime* runtime)
ASSERT_EQ(int, "%d", 32, chunk_shape[3]);

CHECK("C" == metadata["chunk_memory_layout"]);
CHECK("u1" == metadata["data_type"]);

CHECK("uint8" == metadata["data_type"].get<std::string>());

CHECK(metadata["extensions"].empty());

const auto array_shape = metadata["shape"];
Expand Down
4 changes: 4 additions & 0 deletions tests/write-zarr-v2-compressed-multiscale.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,10 @@ verify_layer(const LayerTestCase& test_case)
std::ifstream f(zarray_path);
json zarray = json::parse(f);

const std::string dtype =
std::endian::native == std::endian::little ? "<u1" : ">u1";
CHECK(dtype == zarray["dtype"].get<std::string>());

const auto shape = zarray["shape"];
ASSERT_EQ(int, "%d", frames_per_layer, shape[0]);
ASSERT_EQ(int, "%d", 1, shape[1]);
Expand Down
4 changes: 4 additions & 0 deletions tests/write-zarr-v2-compressed-with-chunking-and-rollover.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,10 @@ validate()
std::ifstream f(zarray_path);
json zarray = json::parse(f);

const std::string dtype =
std::endian::native == std::endian::little ? "<u1" : ">u1";
CHECK(dtype == zarray["dtype"].get<std::string>());

auto shape = zarray["shape"];
ASSERT_EQ(int, "%d", chunk_planes + 1, shape[0]);
ASSERT_EQ(int, "%d", 1, shape[1]);
Expand Down
4 changes: 4 additions & 0 deletions tests/write-zarr-v2-compressed-with-chunking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,10 @@ validate()
std::ifstream f(zarray_path);
json zarray = json::parse(f);

const std::string dtype =
std::endian::native == std::endian::little ? "<u1" : ">u1";
CHECK(dtype == zarray["dtype"].get<std::string>());

auto shape = zarray["shape"];
ASSERT_EQ(int, "%d", chunk_planes, shape[0]);
ASSERT_EQ(int, "%d", 1, shape[1]);
Expand Down
4 changes: 4 additions & 0 deletions tests/write-zarr-v2-raw-chunk-size-larger-than-frame-size.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,10 @@ validate()
std::ifstream f(zarray_path);
json zarray = json::parse(f);

const std::string dtype =
std::endian::native == std::endian::little ? "<u1" : ">u1";
CHECK(dtype == zarray["dtype"].get<std::string>());

auto shape = zarray["shape"];
ASSERT_EQ(int, "%d", frames_per_chunk, shape[0]);
ASSERT_EQ(int, "%d", 1, shape[1]);
Expand Down
4 changes: 4 additions & 0 deletions tests/write-zarr-v2-raw-multiscale-with-trivial-tile-size.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,10 @@ verify_layer(const LayerTestCase& test_case)
std::ifstream f(zarray_path);
json zarray = json::parse(f);

const std::string dtype =
std::endian::native == std::endian::little ? "<u1" : ">u1";
CHECK(dtype == zarray["dtype"].get<std::string>());

const auto shape = zarray["shape"];
ASSERT_EQ(int, "%d", frames_per_layer, shape[0]);
ASSERT_EQ(int, "%d", 1, shape[1]);
Expand Down
4 changes: 4 additions & 0 deletions tests/write-zarr-v2-raw-multiscale.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,10 @@ verify_layer(const LayerTestCase& test_case)
std::ifstream f(zarray_path);
json zarray = json::parse(f);

const std::string dtype =
std::endian::native == std::endian::little ? "<u1" : ">u1";
CHECK(dtype == zarray["dtype"].get<std::string>());

const auto shape = zarray["shape"];
ASSERT_EQ(int, "%d", frames_per_layer, shape[0]);
ASSERT_EQ(int, "%d", 1, shape[1]);
Expand Down
4 changes: 4 additions & 0 deletions tests/write-zarr-v2-raw-with-even-chunking-and-rollover.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,10 @@ validate()
std::ifstream f(zarray_path);
json zarray = json::parse(f);

const std::string dtype =
std::endian::native == std::endian::little ? "<u1" : ">u1";
CHECK(dtype == zarray["dtype"].get<std::string>());

auto shape = zarray["shape"];
ASSERT_EQ(int, "%d", chunk_planes + 1, shape[0]);
ASSERT_EQ(int, "%d", 1, shape[1]);
Expand Down
4 changes: 4 additions & 0 deletions tests/write-zarr-v2-raw-with-even-chunking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ validate()
std::ifstream f(zarray_path);
json zarray = json::parse(f);

const std::string dtype =
std::endian::native == std::endian::little ? "<u1" : ">u1";
CHECK(dtype == zarray["dtype"].get<std::string>());

auto shape = zarray["shape"];
ASSERT_EQ(int, "%d", chunk_planes, shape[0]);
ASSERT_EQ(int, "%d", 1, shape[1]);
Expand Down
4 changes: 4 additions & 0 deletions tests/write-zarr-v2-raw-with-ragged-chunking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,10 @@ validate()
std::ifstream f(zarray_path);
json zarray = json::parse(f);

const std::string dtype =
std::endian::native == std::endian::little ? "<u1" : ">u1";
CHECK(dtype == zarray["dtype"].get<std::string>());

auto shape = zarray["shape"];
ASSERT_EQ(int, "%d", max_frame_count, shape[0]);
ASSERT_EQ(int, "%d", 1, shape[1]);
Expand Down
5 changes: 5 additions & 0 deletions tests/write-zarr-v2-raw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "platform.h" // clock
#include "logger.h"

#include <bit>
#include <filesystem>
#include <fstream>
#include <stdexcept>
Expand Down Expand Up @@ -233,6 +234,10 @@ validate()
std::ifstream f(zarray_path);
json zarray = json::parse(f);

const std::string dtype =
std::endian::native == std::endian::little ? "<u1" : ">u1";
CHECK(dtype == zarray["dtype"].get<std::string>());

auto shape = zarray["shape"];
ASSERT_EQ(int, "%d", frames_per_chunk, shape[0]);
ASSERT_EQ(int, "%d", 1, shape[1]);
Expand Down
4 changes: 4 additions & 0 deletions tests/write-zarr-v2-with-lz4-compression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,10 @@ validate()
std::ifstream f(zarray_path);
json zarray = json::parse(f);

const std::string dtype =
std::endian::native == std::endian::little ? "<u1" : ">u1";
CHECK(dtype == zarray["dtype"].get<std::string>());

auto shape = zarray["shape"];
ASSERT_EQ(int, "%d", frames_per_chunk, shape[0]);
ASSERT_EQ(int, "%d", 1, shape[1]);
Expand Down
4 changes: 4 additions & 0 deletions tests/write-zarr-v2-with-zstd-compression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,10 @@ validate()
std::ifstream f(zarray_path);
json zarray = json::parse(f);

const std::string dtype =
std::endian::native == std::endian::little ? "<u1" : ">u1";
CHECK(dtype == zarray["dtype"].get<std::string>());

auto shape = zarray["shape"];
ASSERT_EQ(int, "%d", frames_per_chunk, shape[0]);
ASSERT_EQ(int, "%d", 1, shape[1]);
Expand Down
2 changes: 1 addition & 1 deletion tests/write-zarr-v3-compressed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ validate()
ASSERT_EQ(int, "%d", chunk_width, chunk_shape[3]);

CHECK("C" == metadata["chunk_memory_layout"]);
CHECK("u1" == metadata["data_type"]);
CHECK("uint8" == metadata["data_type"]);
CHECK(metadata["extensions"].empty());

const auto array_shape = metadata["shape"];
Expand Down
2 changes: 1 addition & 1 deletion tests/write-zarr-v3-raw-chunk-exceeds-array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ validate()
ASSERT_EQ(int, "%d", chunk_width, chunk_shape[2]);

CHECK("C" == metadata["chunk_memory_layout"]);
CHECK("u1" == metadata["data_type"]);
CHECK("uint8" == metadata["data_type"]);
CHECK(metadata["extensions"].empty());

const auto array_shape = metadata["shape"];
Expand Down
2 changes: 1 addition & 1 deletion tests/write-zarr-v3-raw-with-ragged-sharding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ validate()
ASSERT_EQ(int, "%d", chunk_width, chunk_shape[2]);

CHECK("C" == metadata["chunk_memory_layout"]);
CHECK("u1" == metadata["data_type"]);
CHECK("uint8" == metadata["data_type"]);
CHECK(metadata["extensions"].empty());

const auto array_shape = metadata["shape"];
Expand Down
2 changes: 1 addition & 1 deletion tests/write-zarr-v3-raw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ validate()
ASSERT_EQ(int, "%d", chunk_width, chunk_shape[3]);

CHECK("C" == metadata["chunk_memory_layout"]);
CHECK("u1" == metadata["data_type"]);
CHECK("uint8" == metadata["data_type"]);
CHECK(metadata["extensions"].empty());

const auto array_shape = metadata["shape"];
Expand Down

0 comments on commit 463fe6e

Please sign in to comment.