Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Suppress filters on variables with non-fixed-size types. #2716

Merged
merged 4 commits into from
Jun 26, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/nc4internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,7 @@ extern int NC4_inq_type_fixed_size(int ncid, nc_type xtype, int* isfixedsizep);
/* Manage the fixed/var sized'ness of a type */
extern int NC4_recheck_varsize(NC_TYPE_INFO_T* parenttype, nc_type addedtype);
extern int NC4_set_varsize(NC_TYPE_INFO_T* parenttype);
extern int NC4_var_varsized(NC_VAR_INFO_T* var);

/* Close the file. */
extern int nc4_close_netcdf4_file(NC_FILE_INFO_T *h5, int abort, NC_memio *memio);
Expand Down
6 changes: 5 additions & 1 deletion libdispatch/dfilter.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "netcdf_filter.h"
#include "ncdispatch.h"
#include "nc4internal.h"
#include "nclog.h"

#ifdef USE_HDF5
#include "hdf5internal.h"
Expand Down Expand Up @@ -134,7 +135,10 @@ nc_def_var_filter(int ncid, int varid, unsigned int id, size_t nparams, const un
if((stat = nc_inq_vartype(ncid,varid,&xtype))) return stat;
/* If the variable's type is not fixed-size, then signal error */
if((stat = NC4_inq_type_fixed_size(ncid, xtype, &fixedsize))) return stat;
if(!fixedsize) return NC_EFILTER;
if(!fixedsize) {
nclog(NCLOGWARN,"Filters cannot be applied to variable length data types.");
return NC_NOERR; /* Deliberately suppress */
}
if((stat = ncp->dispatch->def_var_filter(ncid,varid,id,nparams,params))) goto done;
done:
return stat;
Expand Down
14 changes: 12 additions & 2 deletions libhdf5/hdf5open.c
Original file line number Diff line number Diff line change
Expand Up @@ -1061,6 +1061,7 @@ static int get_filter_info(hid_t propid, NC_VAR_INFO_T *var)
int f;
int stat = NC_NOERR;
NC_HDF5_VAR_INFO_T *hdf5_var;
int varsized = 0;

assert(var);

Expand All @@ -1070,12 +1071,16 @@ static int get_filter_info(hid_t propid, NC_VAR_INFO_T *var)
if ((num_filters = H5Pget_nfilters(propid)) < 0)
{stat = NC_EHDFERR; goto done;}

/* If the type of the variable is variable length, and
it has filters defined, suppress the variable. */
varsized = NC4_var_varsized(var);

for (f = 0; f < num_filters; f++)
{
int flags = 0;
htri_t avail = -1;
unsigned flags = 0;
cd_nelems = 0;
if ((filter = H5Pget_filter2(propid, f, NULL, &cd_nelems, NULL, 0, NULL, NULL)) < 0)
if ((filter = H5Pget_filter2(propid, f, &flags, &cd_nelems, NULL, 0, NULL, NULL)) < 0)
{stat = NC_ENOFILTER; goto done;} /* Assume this means an unknown filter */
if((avail = H5Zfilter_avail(filter)) < 0)
{stat = NC_EHDFERR; goto done;} /* Something in HDF5 went wrong */
Expand All @@ -1084,6 +1089,11 @@ static int get_filter_info(hid_t propid, NC_VAR_INFO_T *var)
/* mark variable as unreadable */
hdf5_var->flags |= NC_HDF5_VAR_FILTER_MISSING;
}
/* If variable type is varsized and filter is mandatory then this variable is unreadable */
if(varsized && (flags & H5Z_FLAG_MANDATORY) != 0) {
/* mark variable as unreadable */
hdf5_var->flags |= NC_HDF5_VAR_FILTER_MISSING;
}
if((cd_values = calloc(sizeof(unsigned int),cd_nelems))==NULL)
{stat = NC_ENOMEM; goto done;}
if ((filter = H5Pget_filter2(propid, f, NULL, &cd_nelems, cd_values, 0, NULL, NULL)) < 0)
Expand Down
72 changes: 46 additions & 26 deletions libnczarr/zclose.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,49 @@ zclose_gatts(NC_GRP_INFO_T* grp)
return stat;
}

/**
* @internal Close resources for single var
*
* @param var var to reclaim
*
* @return ::NC_NOERR No error.
* @author Dennis Heimbigner
*/
int
NCZ_zclose_var1(NC_VAR_INFO_T* var)
{
int stat = NC_NOERR;
NCZ_VAR_INFO_T* zvar;
NC_ATT_INFO_T* att;
int a;

assert(var && var->format_var_info);
zvar = var->format_var_info;;
for(a = 0; a < ncindexsize(var->att); a++) {
NCZ_ATT_INFO_T* zatt;
att = (NC_ATT_INFO_T*)ncindexith(var->att, a);
assert(att && att->format_att_info);
zatt = att->format_att_info;
nullfree(zatt);
att->format_att_info = NULL; /* avoid memory errors */
}
#ifdef ENABLE_NCZARR_FILTERS
/* Reclaim filters */
if(var->filters != NULL) {
(void)NCZ_filter_freelists(var);
}
var->filters = NULL;
#endif
/* Reclaim the type */
if(var->type_info) (void)zclose_type(var->type_info);
if(zvar->cache) NCZ_free_chunk_cache(zvar->cache);
/* reclaim xarray */
if(zvar->xarray) nclistfreeall(zvar->xarray);
nullfree(zvar);
var->format_var_info = NULL; /* avoid memory errors */
return stat;
}

/**
* @internal Close resources for vars in a group.
*
Expand All @@ -148,37 +191,14 @@ zclose_vars(NC_GRP_INFO_T* grp)
{
int stat = NC_NOERR;
NC_VAR_INFO_T* var;
NCZ_VAR_INFO_T* zvar;
NC_ATT_INFO_T* att;
int a, i;
int i;

for(i = 0; i < ncindexsize(grp->vars); i++) {
var = (NC_VAR_INFO_T*)ncindexith(grp->vars, i);
assert(var && var->format_var_info);
zvar = var->format_var_info;;
for(a = 0; a < ncindexsize(var->att); a++) {
NCZ_ATT_INFO_T* zatt;
att = (NC_ATT_INFO_T*)ncindexith(var->att, a);
assert(att && att->format_att_info);
zatt = att->format_att_info;
nullfree(zatt);
att->format_att_info = NULL; /* avoid memory errors */
}
#ifdef ENABLE_NCZARR_FILTERS
/* Reclaim filters */
if(var->filters != NULL) {
(void)NCZ_filter_freelists(var);
}
var->filters = NULL;
#endif
/* Reclaim the type */
if(var->type_info) (void)zclose_type(var->type_info);
if(zvar->cache) NCZ_free_chunk_cache(zvar->cache);
/* reclaim xarray */
if(zvar->xarray) nclistfreeall(zvar->xarray);
nullfree(zvar);
var->format_var_info = NULL; /* avoid memory errors */
if((stat = NCZ_zclose_var1(var))) goto done;
}
done:
return stat;
}

Expand Down
1 change: 1 addition & 0 deletions libnczarr/zinternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ int ncz_closeorabort(NC_FILE_INFO_T*, void* params, int abort);

/* zclose.c */
int ncz_close_ncz_file(NC_FILE_INFO_T* file, int abort);
int NCZ_zclose_var1(NC_VAR_INFO_T* var);

/* zattr.c */
int ncz_getattlist(NC_GRP_INFO_T *grp, int varid, NC_VAR_INFO_T **varp, NCindex **attlist);
Expand Down
21 changes: 18 additions & 3 deletions libnczarr/zsync.c
Original file line number Diff line number Diff line change
Expand Up @@ -1451,6 +1451,8 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
NCjson* jfilter = NULL;
int chainindex;
#endif
int varsized;
int suppressvar = 0; /* 1 => make this variable invisible */

ZTRACE(3,"file=%s grp=%s |varnames|=%u",file->controller->path,grp->hdr.name,nclistlength(varnames));

Expand Down Expand Up @@ -1533,6 +1535,9 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
}
}

/* See if this variable is variable sized */
varsized = NC4_var_varsized(var);

if(!purezarr) {
/* Extract the _NCZARR_ARRAY values */
/* Do this first so we know about storage esp. scalar */
Expand Down Expand Up @@ -1719,7 +1724,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
/* compressor key */
/* From V2 Spec: A JSON object identifying the primary compression codec and providing
configuration parameters, or ``null`` if no compressor is to be used. */
{
if(!varsized) { /* Only process if variable is fixed-size */
#ifdef ENABLE_NCZARR_FILTERS
if(var->filters == NULL) var->filters = (void*)nclistnew();
if((stat = NCZ_filter_initialize())) goto done;
Expand All @@ -1730,6 +1735,9 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
}
#endif
}
/* Suppress variable if there are filters and var is not fixed-size */
if(varsized && nclistlength((NClist*)var->filters) > 0)
suppressvar = 1;

if((stat = computedimrefs(file, var, purezarr, xarray, rank, dimnames, shapes, var->dim)))
goto done;
Expand All @@ -1741,9 +1749,16 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
}

#ifdef ENABLE_NCZARR_FILTERS
/* At this point, we can finalize the filters */
if((stat = NCZ_filter_setup(var))) goto done;
if(!suppressvar) {
/* At this point, we can finalize the filters */
if((stat = NCZ_filter_setup(var))) goto done;
}
#endif

if(suppressvar) {
if((stat = NCZ_zclose_var1(var))) goto done;
}

/* Clean up from last cycle */
nclistfreeall(dimnames); dimnames = nclistnew();
nullfree(varpath); varpath = NULL;
Expand Down
18 changes: 18 additions & 0 deletions libsrc4/nc4type.c
Original file line number Diff line number Diff line change
Expand Up @@ -799,3 +799,21 @@ NC4_set_varsize(NC_TYPE_INFO_T* typ)
done:
return stat;
}

/**
* Test if a variable's type is fixed sized or not.
* @param var - to test
* @return 0 if fixed size, 1 otherwise.
*/
int
NC4_var_varsized(NC_VAR_INFO_T* var)
{
NC_TYPE_INFO_T* vtype = NULL;

/* Check the variable type */
vtype = var->type_info;
if(vtype->hdr.id < NC_STRING) return 0;
if(vtype->hdr.id == NC_STRING) return 1;
return vtype->varsized;
}

9 changes: 7 additions & 2 deletions nc_test4/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,20 @@ IF(USE_HDF5 AND ENABLE_FILTER_TESTING)
build_bin_test(tst_multifilter)
build_bin_test(test_filter_order)
build_bin_test(test_filter_repeat)
build_bin_test(tst_filter_avail)
build_bin_test(test_filter_vlen)
build_bin_test(tst_filter_vlen)
ADD_SH_TEST(nc_test4 tst_filter)
ADD_SH_TEST(nc_test4 tst_specific_filters)
ADD_SH_TEST(nc_test4 tst_bloscfail)
ADD_SH_TEST(nc_test4 tst_filter_vlen)
IF(FALSE)
# This test is too dangerous to run in a parallel make environment.
# It causes race conditions. So suppress and only test by hand.
ADD_SH_TEST(nc_test4 tst_unknown)
# The tst_filterinstall test can only be run after an install
# occurred with --with-plugin-dir enabled. So there is no point
#in running it via make check. It is kept here so it can be
# manually invoked if desired
ADD_SH_TEST(nc_test4 tst_filterinstall)
ENDIF(FALSE)
ENDIF(USE_HDF5 AND ENABLE_FILTER_TESTING)

Expand Down
26 changes: 17 additions & 9 deletions nc_test4/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -82,16 +82,29 @@ if USE_HDF5
if ENABLE_FILTER_TESTING
extradir =
check_PROGRAMS += test_filter test_filter_misc test_filter_order test_filter_repeat test_filter_vlen
check_PROGRAMS += tst_multifilter tst_filter_avail
check_PROGRAMS += tst_multifilter tst_filter_vlen
TESTS += tst_filter.sh
TESTS += tst_specific_filters.sh
TESTS += tst_bloscfail.sh
TESTS += tst_filter_vlen.sh
if ISMINGW
XFAIL_TESTS += tst_filter.sh
endif # ISMINGW

if AX_MANUAL
# This test is too dangerous to run in a parallel make environment.
# It causes race conditions. So suppress and only test by hand.
#TESTS += tst_unknown.sh
TESTS += tst_unknown.sh
endif

if AX_MANUAL
# The tst_filterinstall test can only be run after an install
# occurred with --with-plugin-dir enabled. So there is no point
#in running it via make check. It is kept here so it can be
# manually invoked if desired
TESTS += tst_filterinstall.sh
endif

endif # ENABLE_FILTER_TESTING
endif # USE_HDF5
endif # BUILD_UTILITIES
Expand Down Expand Up @@ -120,13 +133,8 @@ ref_filter_order_create.txt ref_filter_order_read.txt \
ref_any.cdl tst_specific_filters.sh tst_unknown.sh \
tst_virtual_datasets.c noop1.cdl unknown.cdl \
tst_broken_files.c ref_bloscx.cdl tst_bloscfail.sh \
tst_fixedstring.sh ref_fixedstring.h5 ref_fixedstring.cdl

# The tst_filterinstall test can only be run after an install
# occurred with --with-plugin-dir enabled. So there is no point
#in running it via make check. It is kept here so it can be
# manually invoked if desired
EXTRA_DIST += tst_filterinstall.sh
tst_fixedstring.sh ref_fixedstring.h5 ref_fixedstring.cdl \
tst_filterinstall.sh tst_filter_vlen.sh

CLEANFILES = tst_mpi_parallel.bin cdm_sea_soundings.nc bm_chunking.nc \
tst_floats_1D.cdl floats_1D_3.nc floats_1D.cdl tst_*.nc tmp_*.txt \
Expand Down
Loading