Skip to content

Commit

Permalink
Merge pull request #1654 from DennisHeimbigner/utilcompact.dmh
Browse files Browse the repository at this point in the history
Make utilities support NC_COMPACT
  • Loading branch information
WardF authored Mar 2, 2020
2 parents 0014db9 + 7353760 commit 8a799c7
Show file tree
Hide file tree
Showing 21 changed files with 275 additions and 198 deletions.
2 changes: 1 addition & 1 deletion h5_test/tst_h_vars.c
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ main()
#define SZIP_VAR_NAME "szip_var"
#define SZIP_DIM1_LEN 32
{
int data[DIM1_LEN];
int data[SZIP_DIM1_LEN];
hid_t plistid;
hsize_t chunksize[NDIM1] = {SZIP_DIM1_LEN};
int options_mask = 32, pixels_per_block = 4;
Expand Down
4 changes: 2 additions & 2 deletions libhdf5/hdf5filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ NC4_filter_actions(int ncid, int varid, int op, NC_Filterobject* args)
/* If the HDF5 dataset has already been created, then it is too
* late to set all the extra stuff. */
if (!(h5->flags & NC_INDEF)) return THROW(NC_EINDEFINE);
if (!var->ndims) return NC_NOERR; /* For scalars, ignore */
if (!var->ndims) return NC_EINVAL; /* For scalars, complain */
if (var->created)
return THROW(NC_ELATEDEF);
/* Can't turn on parallel and szip before HDF5 1.10.2. */
Expand Down Expand Up @@ -315,7 +315,7 @@ NC4_filter_actions(int ncid, int varid, int op, NC_Filterobject* args)
#endif /* USE_PARALLEL */
} break;
case NCFILTER_INQ: {
if (!var->ndims) return THROW(NC_ENOFILTER); /* For scalars, fail */
if (!var->ndims) return THROW(NC_EINVAL); /* For scalars, fail */
if(obj->sort != NC_FILTER_SORT_SPEC) return THROW(NC_EFILTER);
idp = &obj->u.spec.filterid;
nparamsp = &obj->u.spec.nparams;
Expand Down
72 changes: 29 additions & 43 deletions libhdf5/hdf5var.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,11 @@ nc4_find_default_chunksizes2(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var)
total_chunk_size = (double) type_size;
#endif

if(var->chunksizes == NULL) {
if((var->chunksizes = calloc(1,sizeof(size_t)*var->ndims)) == NULL)
return NC_ENOMEM;
}

/* How many values in the variable (or one record, if there are
* unlimited dimensions). */
for (d = 0; d < var->ndims; d++)
Expand Down Expand Up @@ -557,17 +562,16 @@ NC4_def_var(int ncid, const char *name, nc_type xtype, int ndims,
* variables which may be contiguous.) */
LOG((4, "allocating array of %d size_t to hold chunksizes for var %s",
var->ndims, var->hdr.name));
if (var->ndims)
if (var->ndims) {
if (!(var->chunksizes = calloc(var->ndims, sizeof(size_t))))
BAIL(NC_ENOMEM);

if ((retval = nc4_find_default_chunksizes2(grp, var)))
BAIL(retval);

/* Is this a variable with a chunksize greater than the current
* cache size? */
if ((retval = nc4_adjust_var_cache(grp, var)))
BAIL(retval);
if ((retval = nc4_find_default_chunksizes2(grp, var)))
BAIL(retval);
/* Is this a variable with a chunksize greater than the current
* cache size? */
if ((retval = nc4_adjust_var_cache(grp, var)))
BAIL(retval);
}

/* If the user names this variable the same as a dimension, but
* doesn't use that dimension first in its list of dimension ids,
Expand Down Expand Up @@ -672,50 +676,27 @@ nc_def_var_extra(int ncid, int varid, int *shuffle, int *unused1,
if (var->created)
return NC_ELATEDEF;

#if 0
/* Check compression options. */
if (deflate && !deflate_level)
return NC_EINVAL;

/* Valid deflate level? */
if (deflate)
{
if (*deflate)
if (*deflate_level < NC_MIN_DEFLATE_LEVEL ||
*deflate_level > NC_MAX_DEFLATE_LEVEL)
return NC_EINVAL;

/* For scalars, just ignore attempt to deflate. */
if (!var->ndims)
return NC_NOERR;

/* If szip is in use, return an error. */
if ((retval = nc_inq_var_szip(ncid, varid, &option_mask, NULL)))
return retval;
if (option_mask)
return NC_EINVAL;

/* Set the deflate settings. */
var->contiguous = NC_FALSE;
var->deflate = *deflate;
if (*deflate)
var->deflate_level = *deflate_level;
LOG((3, "%s: *deflate_level %d", __func__, *deflate_level));
/* Cannot set filters of any sort on scalars */
if(var->ndims == 0) {
if(shuffle && *shuffle)
return NC_EINVAL;
if(fletcher32 && *fletcher32)
return NC_EINVAL;
}
#endif

/* Shuffle filter? */
if (shuffle)
{
var->shuffle = *shuffle;
var->contiguous = NC_FALSE;
}
var->compact = NC_FALSE; }

/* Fletcher32 checksum error protection? */
if (fletcher32)
{
var->fletcher32 = *fletcher32;
var->contiguous = NC_FALSE;
var->compact = NC_FALSE;
}

#ifdef USE_PARALLEL
Expand All @@ -736,7 +717,7 @@ nc_def_var_extra(int ncid, int varid, int *shuffle, int *unused1,
/* Does the user want a contiguous or compact dataset? Not so
* fast! Make sure that there are no unlimited dimensions, and
* no filters in use for this data. */
if (*storage)
if (*storage != NC_CHUNKED)
{
if (nclistlength(var->filters) > 0 || var->fletcher32 || var->shuffle)
return NC_EINVAL;
Expand All @@ -747,9 +728,13 @@ nc_def_var_extra(int ncid, int varid, int *shuffle, int *unused1,
}

/* Handle chunked storage settings. */
if (*storage == NC_CHUNKED)
if (*storage == NC_CHUNKED && var->ndims == 0) {
var->contiguous = NC_TRUE;
var->compact = NC_FALSE;
} else if (*storage == NC_CHUNKED)
{
var->contiguous = NC_FALSE;
var->compact = NC_FALSE;

/* If the user provided chunksizes, check that they are not too
* big, and that their total size of chunk is less than 4 GB. */
Expand All @@ -773,6 +758,7 @@ nc_def_var_extra(int ncid, int varid, int *shuffle, int *unused1,
else if (*storage == NC_CONTIGUOUS)
{
var->contiguous = NC_TRUE;
var->compact = NC_FALSE;
}
else if (*storage == NC_COMPACT)
{
Expand All @@ -798,7 +784,7 @@ nc_def_var_extra(int ncid, int varid, int *shuffle, int *unused1,
{
/* Determine default chunksizes for this variable (do nothing
* for scalar vars). */
if (var->chunksizes && !var->chunksizes[0])
if (var->chunksizes == NULL || var->chunksizes[0] == 0)
if ((retval = nc4_find_default_chunksizes2(grp, var)))
return retval;

Expand Down
2 changes: 1 addition & 1 deletion libsrc4/nc4var.c
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ NC4_var_par_access(int ncid, int varid, int par_access)
/* If zlib, shuffle, or fletcher32 filters are in use, then access
* must be collective. Fail an attempt to set such a variable to
* independent access. */
if ((var->deflate || var->shuffle || var->fletcher32) &&
if ((nclistlength(var->filters) > 0 || var->shuffle || var->fletcher32) &&
par_access == NC_INDEPENDENT)
return NC_EINVAL;

Expand Down
4 changes: 2 additions & 2 deletions nc_test4/tst_vars2.c
Original file line number Diff line number Diff line change
Expand Up @@ -1478,8 +1478,8 @@ main(int argc, char **argv)
if (shuffle_in || deflate_in) ERR;
if (nc_inq_var_deflate(ncid, varid, NULL, NULL, NULL)) ERR;

/* Deflate is ignored for scalar. */
if (nc_def_var_deflate(ncid, varid_scalar, 0, 1, 4)) ERR;
/* Deflate fails for scalar. */
if (nc_def_var_deflate(ncid, varid_scalar, 0, 1, 4) != NC_EINVAL) ERR;
if (nc_inq_var_deflate(ncid, varid, &shuffle_in, &deflate_in, &deflate_level_in)) ERR;
if (shuffle_in || deflate_in) ERR;

Expand Down
4 changes: 4 additions & 0 deletions ncdump/cdl/ref_tst_special_atts3.cdl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ variables:
var4:_Shuffle = "true" ;
var4:_Endianness = "little" ;
var4:_NoFill = "true" ;
int var5(dim1, dim2, dim3) ;
var5:_Storage = "compact" ;
int var6;
var6:_Storage = "compact" ;
float slp(time, lat, lon) ;
slp:_FillValue = 1.e+15f ;
slp:_DeflateLevel = 1 ;
Expand Down
39 changes: 39 additions & 0 deletions ncdump/chunkspec.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ struct VarChunkSpec {
size_t rank; /* number of dimensions in chunkspec string */
size_t chunksizes[NC_MAX_VAR_DIMS]; /* corresponding chunk sizes */
bool_t omit; /* true if chunking to be turned off */
int kind;
int igrpid; /* container of the (input) variable */
int ivarid; /* (input) Variable whose chunks are specified */
};
Expand Down Expand Up @@ -208,6 +209,20 @@ dimchunkspec_omit(void) {
}


/* Return whether chunking should be omitted, due to explicit
* command-line specification. */
bool_t
dimchunkspec_exists(int indimid) {
int idim;
for(idim = 0; idim < dimchunkspecs.ndims; idim++) {
if(indimid == dimchunkspecs.idimids[idim]) {
return 1;
}
}
return 0;
}


/*
* Parse per-variable chunkspec string and convert into varchunkspec structure.
* ncid: location ID of open netCDF file or group in an open file
Expand Down Expand Up @@ -260,6 +275,16 @@ varchunkspec_parse(int igrp, const char *spec0)
goto done;
}

/* See if the remainder matches 'compact' or 'contiguous' */
if(strcasecmp(p,"compact")==0) {
chunkspec->kind = NC_COMPACT;
goto notchunked;
} if(strcasecmp(p,"contiguous")==0) {
chunkspec->kind = NC_CONTIGUOUS;
goto notchunked;
} else
chunkspec->kind = NC_CHUNKED;

/* Iterate over dimension sizes */
while(*p) {
unsigned long dimsize;
Expand Down Expand Up @@ -293,6 +318,7 @@ varchunkspec_parse(int igrp, const char *spec0)
if(chunkspec->chunksizes[i] > len) {ret = NC_EBADCHUNK; goto done;}
}

notchunked:
/* add the chunkspec to our list */
listpush(varchunkspecs,chunkspec);
chunkspec = NULL;
Expand All @@ -307,6 +333,19 @@ varchunkspec_parse(int igrp, const char *spec0)

/* Accessors */

/* Return NC_CHUNKED || NC_CONTIGUOUS || NC_COMPACT */
int
varchunkspec_kind(int grpid, int varid)
{
int i;
for(i=0;i<listlength(varchunkspecs);i++) {
struct VarChunkSpec* spec = listget(varchunkspecs,i);
if(spec->igrpid == grpid && spec->ivarid == varid)
return spec->kind;
}
return NC_CONTIGUOUS; /* default */
}

bool_t
varchunkspec_exists(int igrpid, int ivarid)
{
Expand Down
7 changes: 7 additions & 0 deletions ncdump/chunkspec.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ chunkspec_parse(int ncid, const char *spec);
extern size_t
dimchunkspec_size(int dimid);

/* Return 1 if a dimension spec is defined
* corresponding to dimid, 0 if not found */
extern bool_t
dimchunkspec_exists(int indimid);

/* Return number of dimensions for which chunking was specified in
* chunkspec string on command line, 0 if no chunkspec string was
* specified. */
Expand All @@ -36,6 +41,8 @@ extern size_t varchunkspec_ndims(int grpid, int varid);

extern bool_t varchunkspec_exists(int grpid, int varid);

extern int varchunkspec_kind(int grpid, int varid);

extern void chunkspecinit(void);


Expand Down
8 changes: 7 additions & 1 deletion ncdump/nccopy.1
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ other filters such as checksums. Changing the chunking in a netCDF
file can also greatly speedup access, by choosing chunk shapes that
are appropriate for the most common access patterns.
.IP
The \fIchunkspec\fP argument has two forms. The first form is the
The \fIchunkspec\fP argument has several forms. The first form is the
original, deprecated form and is a string of comma-separated associations,
each specifying a dimension name, a '/' character, and optionally the
corresponding chunk length for that dimension. No blanks should
Expand Down Expand Up @@ -186,6 +186,12 @@ If the same variable is specified
more than once, the second and later specifications are ignored.
Also, this second form, per-variable chunking, takes precedence over any
per-dimension chunking except the bare "/" case.
.IP
The third form of the \fIchunkspec\fP has the
syntax: \fI var:compact\fP or \fI var:contiguous\fP.
This explicitly attempts to set the variable storage type as
compact or contiguous, respectively. These may be overridden
if other flags require the variable to be chunked.
.IP "\fB \-v \fP \fI var1,... \fP"
The output will include data values for the specified variables, in
addition to the declarations of all dimensions, variables, and
Expand Down
Loading

0 comments on commit 8a799c7

Please sign in to comment.