Skip to content

Commit

Permalink
Merge pull request #1570 from NOAA-GSD/ejh_compact
Browse files Browse the repository at this point in the history
enable compact storage for netcdf-4 vars
  • Loading branch information
WardF authored Dec 19, 2019
2 parents 3acf69c + fd604dd commit 6c75e97
Show file tree
Hide file tree
Showing 8 changed files with 365 additions and 246 deletions.
1 change: 1 addition & 0 deletions include/nc4internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ typedef struct NC_VAR_INFO
void *fill_value;
size_t *chunksizes;
nc_bool_t contiguous; /**< True if variable is stored contiguously in HDF5 file */
nc_bool_t compact; /**< True if variable is in comact storage in HDF5 file */
int parallel_access; /**< Type of parallel access for I/O on variable (collective or independent) */
nc_bool_t dimscale; /**< True if var is a dimscale */
nc_bool_t *dimscale_attached; /**< Array of flags that are true if dimscale is attached for that dim index */
Expand Down
1 change: 1 addition & 0 deletions include/netcdf.h
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ NOTE: The NC_MAX_DIMS, NC_MAX_ATTRS, and NC_MAX_VARS limits
/**@{*/
#define NC_CHUNKED 0
#define NC_CONTIGUOUS 1
#define NC_COMPACT 2
/**@}*/

/** In HDF5 files you can set check-summing for each variable.
Expand Down
50 changes: 36 additions & 14 deletions libdispatch/dvar.c
Original file line number Diff line number Diff line change
Expand Up @@ -452,9 +452,25 @@ nc_def_var_fletcher32(int ncid, int varid, int fletcher32)
/**
Define chunking parameters for a variable
The function nc_def_var_chunking sets the chunking parameters for a
variable in a netCDF-4 file. It can set the chunk sizes to get chunked
storage, or it can set the contiguous flag to get contiguous storage.
The function nc_def_var_chunking sets the storage and, optionally,
the chunking parameters for a variable in a netCDF-4 file.
The storage may be set to NC_CONTIGUOUS, NC_COMPACT, or NC_CHUNKED.
Contiguous storage means the variable is stored as one block of
data in the file.
Compact storage means the variable is stored in the header record
of the file. This can have large performance benefits on HPC system
running many processors. Compact storage is only available for
variables whose data are 64 KB or less. Attempting to turn on
compact storage for a variable that is too large will result in the
::NC_EVARSIZE error.
Chunked storage means the data are stored as chunks, of
user-configurable size. Chunked storage is required for variable
with one or more unlimted dimensions, or variable which use
compression.
The total size of a chunk must be less than 4 GiB. That is, the
product of all chunksizes and the size of the data (or the size of
Expand All @@ -467,20 +483,21 @@ nc_def_var_fletcher32(int ncid, int varid, int fletcher32)
Note that this does not work for scalar variables. Only non-scalar
variables can have chunking.
@param[in] ncid NetCDF ID, from a previous call to nc_open or
nc_create.
@param ncid NetCDF ID, from a previous call to nc_open() or
nc_create().
@param[in] varid Variable ID.
@param varid Variable ID.
@param[in] storage If ::NC_CONTIGUOUS, then contiguous storage is used
for this variable. Variables with one or more unlimited dimensions
cannot use contiguous storage. If contiguous storage is turned on, the
chunksizes parameter is ignored. If ::NC_CHUNKED, then chunked storage
is used for this variable. Chunk sizes may be specified with the
chunksizes parameter or default sizes will be used if that parameter
is NULL.
@param storage If ::NC_CONTIGUOUS or ::NC_COMPACT, then contiguous
or compact storage is used for this variable. Variables with one or
more unlimited dimensions cannot use contiguous or compact
storage. If contiguous or compact storage is turned on, the
chunksizes parameter is ignored. If ::NC_CHUNKED, then chunked
storage is used for this variable. Chunk sizes may be specified
with the chunksizes parameter or default sizes will be used if that
parameter is NULL.
@param[in] chunksizesp A pointer to an array list of chunk sizes. The
@param chunksizesp A pointer to an array list of chunk sizes. The
array must have one chunksize for each dimension of the variable. If
::NC_CONTIGUOUS storage is set, then the chunksizes parameter is
ignored.
Expand All @@ -500,6 +517,10 @@ nc_def_var_fletcher32(int ncid, int varid, int fletcher32)
@return ::NC_EBADCHUNK Returns if the chunk size specified for a
variable is larger than the length of the dimensions associated with
variable.
@return ::NC_EVARSIZE Compact storage attempted for variable bigger
than 64 KB.
@return ::NC_EINVAL Attempt to set contiguous or compact storage
for var with one or more unlimited dimensions.
@section nc_def_var_chunking_example Example
Expand Down Expand Up @@ -539,6 +560,7 @@ nc_def_var_fletcher32(int ncid, int varid, int fletcher32)
if (chunksize[d] != chunksize_in[d]) ERR;
if (storage_in != NC_CHUNKED) ERR;
@endcode
@author Ed Hartnett, Dennis Heimbigner
*/
int
nc_def_var_chunking(int ncid, int varid, int storage,
Expand Down
4 changes: 3 additions & 1 deletion libhdf5/hdf5open.c
Original file line number Diff line number Diff line change
Expand Up @@ -1088,8 +1088,10 @@ static int get_chunking_info(hid_t propid, NC_VAR_INFO_T *var)
for (d = 0; d < var->ndims; d++)
var->chunksizes[d] = chunksize[d];
}
else if (layout == H5D_CONTIGUOUS || layout == H5D_COMPACT)
else if (layout == H5D_CONTIGUOUS)
var->contiguous = NC_TRUE;
else if (layout == H5D_COMPACT)
var->compact = NC_TRUE;

return NC_NOERR;
}
Expand Down
79 changes: 53 additions & 26 deletions libhdf5/hdf5var.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
* order. */
#define NC_TEMP_NAME "_netcdf4_temporary_variable_name_for_rename"

/** Number of bytes in 64 KB. */
#define SIXTY_FOUR_KB (65536)

#ifdef LOGGING
/**
* Report the chunksizes selected for a variable.
Expand Down Expand Up @@ -707,41 +710,65 @@ nc_def_var_extra(int ncid, int varid, int *shuffle, int *deflate,
var->contiguous = NC_FALSE;
}

/* Does the user want a contiguous dataset? Not so fast! Make sure
* that there are no unlimited dimensions, and no filters in use
* for this data. */
if (contiguous && *contiguous)
/* Handle storage settings. */
if (contiguous)
{
if (var->deflate || var->fletcher32 || var->shuffle)
return NC_EINVAL;

for (d = 0; d < var->ndims; d++)
if (var->dim[d]->unlimited)
/* Does the user want a contiguous or compact dataset? Not so
* fast! Make sure that there are no unlimited dimensions, and
* no filters in use for this data. */
if (*contiguous)
{
if (var->deflate || var->fletcher32 || var->shuffle)
return NC_EINVAL;
var->contiguous = NC_TRUE;
}

/* Chunksizes anyone? */
if (contiguous && *contiguous == NC_CHUNKED)
{
var->contiguous = NC_FALSE;
for (d = 0; d < var->ndims; d++)
if (var->dim[d]->unlimited)
return NC_EINVAL;
}

/* If the user provided chunksizes, check that they are not too
* big, and that their total size of chunk is less than 4 GB. */
if (chunksizes)
/* Handle chunked storage settings. */
if (*contiguous == NC_CHUNKED)
{
var->contiguous = NC_FALSE;

if ((retval = check_chunksizes(grp, var, chunksizes)))
return retval;
/* If the user provided chunksizes, check that they are not too
* big, and that their total size of chunk is less than 4 GB. */
if (chunksizes)
{
/* Check the chunksizes for validity. */
if ((retval = check_chunksizes(grp, var, chunksizes)))
return retval;

/* Ensure chunksize is smaller than dimension size */
for (d = 0; d < var->ndims; d++)
if(!var->dim[d]->unlimited && var->dim[d]->len > 0 && chunksizes[d] > var->dim[d]->len)
return NC_EBADCHUNK;
/* Ensure chunksize is smaller than dimension size */
for (d = 0; d < var->ndims; d++)
if (!var->dim[d]->unlimited && var->dim[d]->len > 0 &&
chunksizes[d] > var->dim[d]->len)
return NC_EBADCHUNK;

/* Set the chunksizes for this variable. */
for (d = 0; d < var->ndims; d++)
var->chunksizes[d] = chunksizes[d];
}
}
else if (*contiguous == NC_CONTIGUOUS)
{
var->contiguous = NC_TRUE;
}
else if (*contiguous == NC_COMPACT)
{
size_t ndata = 1;

/* Set the chunksizes for this variable. */
/* Find the number of elements in the data. */
for (d = 0; d < var->ndims; d++)
var->chunksizes[d] = chunksizes[d];
ndata *= var->dim[d]->len;

/* Ensure var is small enough to fit in compact
* storage. It must be <= 64 KB. */
if (ndata * var->type_info->size > SIXTY_FOUR_KB)
return NC_EVARSIZE;

var->contiguous = NC_FALSE;
var->compact = NC_TRUE;
}
}

Expand Down
12 changes: 10 additions & 2 deletions libhdf5/nc4hdf.c
Original file line number Diff line number Diff line change
Expand Up @@ -994,11 +994,17 @@ var_create_dataset(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var, nc_bool_t write_dimid
}
}

/* Set the var storage to contiguous, compact, or chunked. */
if (var->contiguous)
{
if (H5Pset_layout(plistid, H5D_CONTIGUOUS) < 0)
BAIL(NC_EHDFERR);
}
else if (var->compact)
{
if (H5Pset_layout(plistid, H5D_COMPACT) < 0)
BAIL(NC_EHDFERR);
}
else
{
if (H5Pset_chunk(plistid, var->ndims, chunksize) < 0)
Expand Down Expand Up @@ -1106,9 +1112,11 @@ nc4_adjust_var_cache(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var)
int d;
int retval;

/* Nothing to be done. */
if (var->contiguous)
/* Nothing to be done for contiguous or compact data. */
if (var->contiguous || var->compact)
return NC_NOERR;

/* No cache adjusting for parallel builds. */
#ifdef USE_PARALLEL4
return NC_NOERR;
#endif
Expand Down
16 changes: 13 additions & 3 deletions libsrc4/nc4var.c
Original file line number Diff line number Diff line change
Expand Up @@ -187,16 +187,26 @@ NC4_inq_var_all(int ncid, int varid, char *name, nc_type *xtypep,
if (nattsp)
*nattsp = ncindexcount(var->att);

/* Chunking stuff. */
if (!var->contiguous && chunksizesp)
/* Did the user want the chunksizes? */
if (!var->contiguous && !var->compact && chunksizesp)
{
for (d = 0; d < var->ndims; d++)
{
chunksizesp[d] = var->chunksizes[d];
LOG((4, "chunksizesp[%d]=%d", d, chunksizesp[d]));
}
}

/* Did the user inquire about the storage? */
if (contiguousp)
*contiguousp = var->contiguous ? NC_CONTIGUOUS : NC_CHUNKED;
{
if (var->contiguous)
*contiguousp = NC_CONTIGUOUS;
else if (var->compact)
*contiguousp = NC_COMPACT;
else
*contiguousp = NC_CHUNKED;
}

/* Filter stuff. */
if (deflatep)
Expand Down
Loading

0 comments on commit 6c75e97

Please sign in to comment.