diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 43a9edda09..11f68bc91b 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -7,6 +7,7 @@ This file contains a high-level description of this package's evolution. Release ## 4.8.1 - TBD +* [Enhancement] Convert to a new representation of the NCZarr meta-data extensions: version 2. Read-only backward compatibility is provided. See [Github #2032](https://github.com/Unidata/netcdf-c/issues/2032). * [Bug Fix] Fix dimension_separator bug in libnczarr. See [Github #2035](https://github.com/Unidata/netcdf-c/issues/2035). * [Bug Fix] Fix bugs in libdap4. See [Github #2005](https://github.com/Unidata/netcdf-c/issues/2005). * [Bug Fix] Store NCZarr fillvalue as a singleton instead of a 1-element array. See [Github #2017](https://github.com/Unidata/netcdf-c/issues/2017). diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index c9f722a239..a369f83f3c 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -92,6 +92,6 @@ obsolete/fan_utils.html bestpractices.md filters.md indexing.md inmemory.md DAP2.dox attribute_conventions.md FAQ.md file_format_specifications.md known_problems.md COPYRIGHT.dox user_defined_formats.md DAP4.md DAP4.dox -testserver.dox byterange.dox) +testserver.dox byterange.dox filters.md nczarr.md) ADD_EXTRA_DIST("${CUR_EXTRA_DIST}") diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index 05063d4142..1d0131d361 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -751,6 +751,8 @@ INPUT = \ @abs_top_srcdir@/docs/byterange.dox \ @abs_top_srcdir@/docs/inmemory.md \ @abs_top_srcdir@/docs/auth.md \ + @abs_top_srcdir@/docs/filters.md \ + @abs_top_srcdir@/docs/nczarr.md \ @abs_top_srcdir@/docs/notes.md \ @abs_top_srcdir@/docs/all-error-codes.md \ @abs_top_srcdir@/docs/building-with-cmake.md \ diff --git a/docs/Makefile.am b/docs/Makefile.am index ad98725188..7e6ef3419e 100644 --- a/docs/Makefile.am +++ b/docs/Makefile.am @@ -16,7 +16,8 @@ obsolete/fan_utils.html indexing.dox \ inmemory.md attribute_conventions.md FAQ.md \ file_format_specifications.md known_problems.md COPYRIGHT.md \ inmeminternal.dox \ -testserver.dox byterange.dox +testserver.dox byterange.dox \ +nczarr.md filters.md # Turn off parallel builds in this directory. .NOTPARALLEL: diff --git a/docs/nczarr.md b/docs/nczarr.md new file mode 100644 index 0000000000..6a5f411ad0 --- /dev/null +++ b/docs/nczarr.md @@ -0,0 +1,631 @@ +The NetCDF NCZarr Implementation +============================ + + +# The NetCDF NCZarr Implementation {#nczarr_head} + +\tableofcontents + +# NCZarr Introduction {#nczarr_introduction} + +Beginning with netCDF version 4.8.0, the Unidata NetCDF group has extended the netcdf-c library to provide access to cloud storage (e.g. Amazon S3 [1] ). +This extension provides a mapping from a subset of the full netCDF Enhanced (aka netCDF-4) data model to a variant of the Zarr [4] data model. +The NetCDF version of this storage format is called NCZarr [4]. + +A note on terminology in this document. + +1. The term "dataset" is used to refer to all of the Zarr objects constituting + the meta-data and data. + +# The NCZarr Data Model {#nczarr_data_model} + +NCZarr uses a data model [4] that, by design, extends the Zarr Version 2 Specification [6] to add support for the NetCDF-4 data model. + +__Note Carefully__: a legal _Zarr_ dataset is also a legal _NCZarr_ dataset with a specific assumption. This assumption is that within Zarr meta-data objects, like __.zarray__, unrecognized dictionary keys are ignored. +If this assumption is true of an implementation, then the _NCZarr_ dataset is a legal _Zarr_ dataset and should be readable by that _Zarr_ implementation. + +There are two other, secondary assumption: + +1. The actual storage format in which the dataset is stored -- a zip file, for example -- can be read by the _Zarr_ implementation. +2. The filters used by the dataset can be encoded/decoded by the implementation. + +Briefly, the data model supported by NCZarr is netcdf-4 minus the user-defined types and the String type. +As with netcdf-4 chunking is supported. +Filters and compression are supported, but the companion document on filters +should be consulted for the details. + +Specifically, the model supports the following. +- "Atomic" types: char, byte, ubyte, short, ushort, int, uint, int64, uint64. +- Shared (named) dimensions +- Attributes with specified types -- both global and per-variable +- Chunking +- Fill values +- Groups +- N-Dimensional variables +- Per-variable endianness (big or little) +- Filters (including compression) + +With respect to full netCDF-4, the following concepts are +currently unsupported. +- String type +- User-defined types (enum, opaque, VLEN, and Compound) +- Unlimited dimensions +- Contiguous or compact storage + +Note that contiguous and compact are not actually supported +because they are HDF5 specific. +When specified, they are treated as chunked where the file consists of only one chunk. +This means that testing for contiguous or compact is not possible; the _nc_inq_var_chunking_ function will always return NC_CHUNKED and the chunksizes will be the same as the dimension sizes of the variable's dimensions. + +# Enabling NCZarr Support {#nczarr_enable} + +NCZarr support is enabled by default. +If the _--disable-nczarr_ option is used with './configure', then NCZarr (and Zarr) support is disabled. +If NCZarr support is enabled, then support for datasets stored as files in a directory tree is provided as the only guaranteed mechanism for storing datasets. +However, several addition storage mechanisms are available if additional libraries are installed. + +1. Zip format -- if _libzip_ is installed, then it is possible to directly read and write datasets stored in zip files. +2. If the AWS C++ SDK is installed, and _libcurl_ is installed, then it is possible to directly read and write datasets stored in the Amazon S3 cloud storage. + +# Accessing Data Using the NCZarr Prototocol {#nczarr_accessing_data} + +In order to access a NCZarr data source through the netCDF API, the file name normally used is replaced with a URL with a specific format. +Note specifically that there is no NC_NCZARR flag for the mode argument of _nc_create_ or _nc_open_. +In this case, it is indicated by the URL path. + +## URL Format +The URL is the usual format. +```` +scheme:://host:port/path?query#fragment format +```` +There are some details that are important. +- Scheme: this should be _https_ or _s3_,or _file_. + The _s3_ scheme is equivalent + to "https" plus setting "mode=nczarr,s3" (see below). + Specifying "file" is mostly used for testing, but is used to support + directory tree or zipfile format storage. +- Host: Amazon S3 defines two forms: _Virtual_ and _Path_. + + _Virtual_: the host includes the bucket name as in + __bucket.s3.<region>.amazonaws.com__ + + _Path_: the host does not include the bucket name, but + rather the bucket name is the first segment of the path. + For example __s3.<region>.amazonaws.com/bucket__ + + _Other_: It is possible to use other non-Amazon cloud storage, but + that is cloud library dependent. +- Query: currently not used. +- Fragment: the fragment is of the form _key=value&key=value&..._. + Depending on the key, the _value_ part may be left out and some + default value will be used. + +## Client Parameters + +The fragment part of a URL is used to specify information that is interpreted to specify what data format is to be used, as well as additional controls for that data format. +For NCZarr support, the following _key=value_ pairs are allowed. + +- mode=nczarr|zarr|noxarray|file|zip|s3 + +Typically one will specify two mode flags: one to indicate what format +to use and one to specify the way the dataset is to be stored. +For example, a common one is "mode=zarr,file" + +Using _mode=nczarr_ causes the URL to be interpreted as a +reference to a dataset that is stored in NCZarr format. +The _zarr_ mode tells the library to +use NCZarr, but to restrict its operation to operate on pure +Zarr Version 2 datasets. + +The modes _s3_, _file_, and _zip_ tell the library what storage +driver to use. +* The _s3_ driver is the default and indicates using Amazon S3 or some equivalent. +* The _file_ format stores data in a directory tree. +* The _zip_ format stores data in a local zip file. + +Note that It should be the case that zipping a _file_ +format directory tree will produce a file readable by the +_zip_ storage format, and vice-versa. + +By default, _mode=zarr_ also supports the XArray _\_ARRAY\_DIMENSIONS_ convention. The _noxarray_ mode tells the library to disable the XArray support. + +The netcdf-c library is capable of inferring additional mode flags based on the flags it finds. Currently we have the following inferences. + +- _xarray_ => _zarr_ +- _noxarray_ => _zarr_ +- _zarr_ => _nczarr_ + +So for example: ````...#mode=noxarray,zip```` is equivalent to this. +````...#mode=nczarr,zarr,noxarray,zip +```` + + +# NCZarr Map Implementation {#nczarr_mapimpl} + +Internally, the nczarr implementation has a map abstraction that allows different storage formats to be used. +This is closely patterned on the same approach used in the Python Zarr implementation, which relies on the Python _MutableMap_ [5] class. + +In NCZarr, the corresponding type is called _zmap_. +The __zmap__ API essentially implements a simplified variant +of the Amazon S3 API. + +As with Amazon S3, __keys__ are utf8 strings with a specific structure: +that of a path similar to those of a Unix path with '/' as the +separator for the segments of the path. + +As with Unix, all keys have this BNF syntax: +```` +key: '/' | keypath ; +keypath: '/' segment | keypath '/' segment ; +segment: +```` +Obviously, one can infer a tree structure from this key structure. +A containment relationship is defined by key prefixes. +Thus one key is "contained" (possibly transitively) +by another if one key is a prefix (in the string sense) of the other. +So in this sense the key "/x/y/z" is contained by the key "/x/y". + +In this model all keys "exist" but only some keys refer to +objects containing content -- aka _content bearing_. +An important restriction is placed on the structure of the tree, +namely that keys are only defined for content-bearing objects. +Further, all the leaves of the tree are these content-bearing objects. +This means that the key for one content-bearing object should not +be a prefix of any other key. + +There several other concepts of note. +1. __Dataset__ - a dataset is the complete tree contained by the key defining +the root of the dataset. +Technically, the root of the tree is the key /.zgroup, where .zgroup can be considered the _superblock_ of the dataset. +2. __Object__ - equivalent of the S3 object; Each object has a unique key +and "contains" data in the form of an arbitrary sequence of 8-bit bytes. + +The zmap API defined here isolates the key-value pair mapping +code from the Zarr-based implementation of NetCDF-4. + It wraps an internal C dispatch table manager for implementing an +abstract data structure implementing the zmap key/object model. +Of special note is the "search" function of the API. + +__Search__: The search function has two purposes: +1. Support reading of pure zarr datasets (because they do not explicitly track their contents). +2. Debugging to allow raw examination of the storage. See zdump for example. + +The search function takes a prefix path which has a key syntax (see above). +The set of legal keys is the set of keys such that the key references a content-bearing object -- e.g. /x/y/.zarray or /.zgroup. +Essentially this is the set of keys pointing to the leaf objects of the tree of keys constituting a dataset. +This set potentially limits the set of keys that need to be examined during search. + +The search function returns a limited set of names, where the set of names are immediate suffixes of a given prefix path. +That is, if _\_ is the prefix path, then search returnsnall _\_ such that _\/\_ is itself a prefix of a "legal" key. +This can be used to implement glob style searches such as "/x/y/*" or "/x/y/**" + +This semantics was chosen because it appears to be the minimum required to implement all other kinds of search using recursion. +It was also chosen to limit the number of names returned from the search. +Specifically +1. Avoid returning keys that are not a prefix of some legal key. +2. Avoid returning all the legal keys in the dataset because that set may be very large; although the implementation may still have to examine all legal keys to get the desired subset. +3. Allow for use of partial read mechanisms such as iterators, if available. +This can support processing a limited set of keys for each iteration. +This is a straighforward tradeoff of space over time. + +As a side note, S3 supports this kind of search using common prefixes with a delimiter of '/', although its use is a bit tricky. +For the file system zmap implementation, the legal search keys can be obtained one level at a time, which directly implements the search semantics. +For the zip file implementation, this semantics is not possible, so the whole +tree must be obtained and searched. + +__Issues:__ + +1. S3 limits key lengths to 1024 bytes. +Some deeply nested netcdf files will almost certainly exceed this limit. +2. Besides content, S3 objects can have an associated small set +of what may be called tags, which are themselves of the form of +key-value pairs, but where the key and value are always text. +As far as it is possible to determine, Zarr never uses these tags, +so they are not included in the zmap data structure. + +__A Note on Error Codes:__ + +The zmap API returns some distinguished error code: +1. NC_NOERR if a operation succeeded +2. NC_EEMPTY is returned when accessing a key that has no content. +3. NC_EOBJECT is returned when an object is found which should not exist +4. NC_ENOOBJECT is returned when an object is not found which should exist + +This does not preclude other errors being returned such NC_EACCESS or NC_EPERM or NC_EINVAL if there are permission errors or illegal function arguments, for example. +It also does not preclude the use of other error codes internal to the zmap implementation. +So zmap_file, for example, uses NC_ENOTFOUND internally because it is possible to detect the existence of directories and files. +But this does not propagate outside the zmap_file implementation. + +## Zmap Implementatons + +The primary zmap implementation is _s3_ (i.e. _mode=nczarr,s3_) and indicates that the Amazon S3 cloud storage -- or some related applicance -- is to be used. +Another storage format uses a file system tree of directories and files (_mode=nczarr,file_). +A third storage format uses a zip file (_mode=nczarr,zip_). +The latter two are used mostly for debugging and testing. +However, the _file_ and _zip_ formats are important because they is intended to match corresponding storage formats used by the Python Zarr implementation. +Hence it should serve to provide interoperability between NCZarr and the Python Zarr, although this interoperability has not been tested. + +Examples of the typical URL form for _file_ and _zip_ are as follows. +```` +file:///xxx/yyy/testdata.file#mode=nczarr,file +file:///xxx/yyy/testdata.zip#mode=nczarr,zip +```` + +Note that the extension (e.g. ".file" in "testdata.file") +is arbitraty, so this would be equally acceptable. +```` +file:///xxx/yyy/testdata.anyext#mode=nczarr,file +```` +As with other URLS (e.g. DAP), these kind of URLS can be passed as the path argument to, for example, __ncdump__. + +# NCZarr versus Pure Zarr. {#nczarr_purezarr} + +The NCZARR format extends the pure Zarr format by adding extra keys such as _\_NCZARR\_ARRAY_ inside the _.zarray_ object. +It is possible to suppress the use of these extensions so that the netcdf library can read and write a pure zarr formatted file. +This is controlled by using _mode=nczarr,zarr_ combination. +The primary effects of using pure zarr are described in the [Translation Section](@ref nczarr_translation). + +There are some constraints on the reading of Zarr datasets using the NCZarr implementation. + +1. Zarr allows some primitive types not recognized by NCZarr. +Over time, the set of unrecognized types is expected to diminish. +Examples of currently unsupported types are as follows: +* "c" -- complex floating point +* "m" -- timedelta +* "M" -- datetime +2. The Zarr dataset may reference filters and compressors unrecognized by NCZarr. +Again, this list should diminish over time. + +# Notes on Debugging NCZarr Access {#nczarr_debug} + +The NCZarr support has a trace facility. +Enabling this can sometimes give important, but voluminous information. +Tracing can be enabled by setting the environment variable NCTRACING=n, +where _n_ indicates the level of tracing. +A good value of _n_ is 9. + +# Zip File Support {#nczarr_zip} + +In order to use the _zip_ storage format, the libzip [3] library must be installed. +Note that this is different from zlib. + +# Amazon S3 Storage {#nczarr_s3} + +The Amazon AWS S3 storage driver currently uses the Amazon AWS S3 Software Development Kit for C++ (aws-s3-sdk-cpp). +In order to use it, the client must provide some configuration information. +Specifically, the `~/.aws/config` file should contain something like this. + +``` +[default] +output = json +aws_access_key_id=XXXX... +aws_secret_access_key=YYYY... +``` + +## Addressing Style + +The notion of "addressing style" may need some expansion. +Amazon S3 accepts two forms for specifying the endpoint for accessing the data. + +1. Virtual -- the virtual addressing style places the bucket in the host part of a URL. +For example: +``` +https://.s2..amazonaws.com/ +``` +2. Path -- the path addressing style places the bucket in at the front of the path part of a URL. +For example: +``` +https://s2..amazonaws.com// +``` + +The NCZarr code will accept either form, although internally, it is standardized on path style. +The reason for this is that the bucket name forms the initial segment in the keys. + +# Zarr vs NCZarr {#nczarr_vs_zarr} + +## Data Model + +The NCZarr storage format is almost identical to that of the the standard Zarr version 2 format. +The data model differs as follows. + +1. Zarr only supports anonymous dimensions -- NCZarr supports only shared (named) dimensions. +2. Zarr attributes are untyped -- or perhaps more correctly characterized as of type string. + +## Storage Format + +Consider both NCZarr and Zarr, and assume S3 notions of bucket and object. +In both systems, Groups and Variables (Array in Zarr) map to S3 objects. +Containment is modeled using the fact that the dataset's key is a prefix of the variable's key. +So for example, if variable _v1_ is contained in top level group g1 -- _/g1 -- then the key for _v1_ is _/g1/v_. +Additional meta-data information is stored in special objects whose name start with ".z". + +In Zarr, the following special objects exist. + +1. Information about a group is kept in a special object named _.zgroup_; +so for example the object _/g1/.zgroup_. +2. Information about an array is kept as a special object named _.zarray_; +so for example the object _/g1/v1/.zarray_. +3. Group-level attributes and variable-level attributes are stored in a special object named _.zattr_; +so for example the objects _/g1/.zattr_ and _/g1/v1/.zattr_. +4. Chunk data is stored in objects named "\.\...,\" where the ni are positive integers representing the chunk index for the ith dimension. + +The first three contain meta-data objects in the form of a string representing a JSON-formatted dictionary. +The NCZarr format uses the same objects as Zarr, but inserts NCZarr +specific key-value pairs in them to hold NCZarr specific information +The value of each of these keys is a JSON dictionary containing a variety +of NCZarr specific information. + +These keys are as follows: + +_\_NCZARR_SUPERBLOCK\__ -- this is in the top level group -- key _/.zarr_. +It is in effect the "superblock" for the dataset and contains +any netcdf specific dataset level information. +It is also used to verify that a given key is the root of a dataset. +Currently it contains the following key(s): +* "version" -- the NCZarr version defining the format of the dataset. + +_\_NCZARR_GROUP\__ -- this key appears in every _.zgroup_ object. +It contains any netcdf specific group information. +Specifically it contains the following keys: +* "dims" -- the name and size of shared dimensions defined in this group. +* "vars" -- the name of variables defined in this group. +* "groups" -- the name of sub-groups defined in this group. +These lists allow walking the NCZarr dataset without having to use the potentially costly search operation. + +_\_NCZARR_ARRAY\__ -- this key appears in every _.zarray_ object. +It contains netcdf specific array information. +Specifically it contains the following keys: +* dimrefs -- the names of the shared dimensions referenced by the variable. +* storage -- indicates if the variable is chunked vs contiguous in the netcdf sense. + +_\_NCZARR_ATTR\__ -- this key appears in every _.zattr_ object. +This means that technically, it is attribute, but one for which access +is normally surpressed . +Specifically it contains the following keys: +* types -- the types of all of the other attributes in the _.zattr_ object. + +## Translation {#nczarr_translation} + +With some constraints, it is possible for an nczarr library to read Zarr and for a zarr library to read the nczarr format. +The latter case, zarr reading nczarr is possible if the zarr library is willing to ignore keys whose name it does not recognize; specifically anything beginning with _\_NCZARR\__. + +The former case, nczarr reading zarr is also possible if the nczarr can simulate or infer the contents of the missing _\_NCZARR\_XXX_ objects. +As a rule this can be done as follows. +1. _\_NCZARR_GROUP\__ -- The list of contained variables and sub-groups can be computed using the search API to list the keys "contained" in the key for a group. +The search looks for occurrences of _.zgroup_, _.zattr_, _.zarray_ to infer the keys for the contained groups, attribute sets, and arrays (variables). +Constructing the set of "shared dimensions" is carried out +by walking all the variables in the whole dataset and collecting +the set of unique integer shapes for the variables. +For each such dimension length, a top level dimension is created +named ".zdim_" where len is the integer length. +2. _\_NCZARR_ARRAY\__ -- The dimrefs are inferred by using the shape +in _.zarray_ and creating references to the simulated shared dimension. +netcdf specific information. +3. _\_NCZARR_ATTR\__ -- The type of each attribute is inferred by trying to parse the first attribute value string. + +# Compatibility {#nczarr_compatibility} + +In order to accomodate existing implementations, certain mode tags are provided to tell the NCZarr code to look for information used by specific implementations. + +## XArray + +The Xarray [7] Zarr implementation uses its own mechanism for specifying shared dimensions. +It uses a special attribute named ''_ARRAY_DIMENSIONS''. +The value of this attribute is a list of dimension names (strings). +An example might be ````["time", "lon", "lat"]````. +It is essentially equivalent to the ````_NCZARR_ARRAY "dimrefs" list````, except that the latter uses fully qualified names so the referenced dimensions can be anywhere in the dataset. + +As of _netcdf-c_ version 4.8.1, The Xarray ''_ARRAY_DIMENSIONS'' attribute is supported. +This attribute will be read/written by default, but can be suppressed if the mode value "noxarray" is specified. +If detected, then these dimension names are used to define shared dimensions. +Note that "noxarray" or "xarray" implies pure zarr format. + +# Examples {#nczarr_examples} + +Here are a couple of examples using the _ncgen_ and _ncdump_ utilities. + +1. Create an nczarr file using a local directory tree as storage. + ``` + ncgen -4 -lb -o "file:///home/user/dataset.file#mode=nczarr,file" dataset.cdl + ``` +2. Display the content of an nczarr file using a zip file as storage. + ``` + ncdump "file:///home/user/dataset.zip#mode=nczarr,zip" + ``` +3. Create an nczarr file using S3 as storage. + ``` + ncgen -4 -lb -o "s3://s3.us-west-1.amazonaws.com/datasetbucket" dataset.cdl + ``` +4. Create an nczarr file using S3 as storage and keeping to the pure zarr format. + ``` + ncgen -4 -lb -o "s3://s3.uswest-1.amazonaws.com/datasetbucket#mode=zarr" dataset.cdl + ``` + +# References {#nczarr_bib} + +[1] [Amazon Simple Storage Service Documentation](https://docs.aws.amazon.com/s3/index.html)
+[2] [Amazon Simple Storage Service Library](https://github.com/aws/aws-sdk-cpp)
+[3] [The LibZip Library](https://libzip.org/)
+[4] [NetCDF ZARR Data Model Specification](https://www.unidata.ucar.edu/blogs/developer/en/entry/netcdf-zarr-data-model-specification)
+[5] [Python Documentation: 8.3. +collections — High-performance dataset datatypes](https://docs.python.org/2/library/collections.html)
+[6] [Zarr Version 2 Specification](https://zarr.readthedocs.io/en/stable/spec/v2.html)
+[7] [XArray Zarr Encoding Specification](http://xarray.pydata.org/en/latest/internals.html#zarr-encoding-specification)
+[8] [Dynamic Filter Loading](https://support.hdfgroup.org/HDF5/doc/Advanced/DynamicallyLoadedFilters/HDF5DynamicallyLoadedFilters.pdf)
+[9] [Officially Registered Custom HDF5 Filters](https://portal.hdfgroup.org/display/support/Registered+Filter+Plugins)
+[10] [C-Blosc Compressor Implementation](https://github.com/Blosc/c-blosc) + +# Appendix A. Building NCZarr Support {#nczarr_build} + +Currently the following build cases are known to work. + + +
Operating SystemBuild SystemNCZarrS3 Support +
Linux Automake yes yes +
Linux CMake yes yes +
Cygwin Automake yes no +
OSX Automake unknown unknown +
OSX CMake unknown unknown +
Visual Studio CMake yes tests fail +
+ +Note: S3 support includes both compiling the S3 support code as well as running the S3 tests. + +# Automake + +There are several options relevant to NCZarr support and to Amazon S3 support. +These are as follows. + +1. _--disable-nczarr_ -- disable the NCZarr support. +If disabled, then all of the following options are disabled or irrelevant. +2. _--enable-nczarr-s3_ -- Enable NCZarr S3 support. +3. _--enable-nczarr-s3-tests_ -- the NCZarr S3 tests are currently only usable by Unidata personnel, so they are disabled by default. + +__A note about using S3 with Automake.__ +If S3 support is desired, and using Automake, then LDFLAGS must be properly set, namely to this. +```` +LDFLAGS="$LDFLAGS -L/usr/local/lib -laws-cpp-sdk-s3" +```` +The above assumes that these libraries were installed in '/usr/local/lib', so the above requires modification if they were installed elsewhere. + +Note also that if S3 support is enabled, then you need to have a C++ compiler installed because part of the S3 support code is written in C++. + +# CMake {#nczarr_cmake} + +The necessary CMake flags are as follows (with defaults) + +1. +-DENABLE_NCZARR=off -- equivalent to the Automake _--disable-nczarr_ option. +2. -DENABLE_NCZARR_S3=off -- equivalent to the Automake _--enable-nczarr-s3_ option. +3. -DENABLE_NCZARR_S3_TESTS=off -- equivalent to the Automake _--enable-nczarr-s3-tests_ option. + +Note that unlike Automake, CMake can properly locate C++ libraries, so it should not be necessary to specify _-laws-cpp-sdk-s3_ assuming that the aws s3 libraries are installed in the default location. +For CMake with Visual Studio, the default location is here: + +```` +C:/Program Files (x86)/aws-cpp-sdk-all +```` + +It is possible to install the sdk library in another location. +In this case, one must add the following flag to the cmake command. +```` +cmake ... -DAWSSDK_DIR=\ +```` +where "awssdkdir" is the path to the sdk installation. +For example, this might be as follows. +```` +cmake ... -DAWSSDK_DIR="c:\tools\aws-cpp-sdk-all" +```` +This can be useful if blanks in path names cause problems in your build environment. + +## Testing S3 Support {#nczarr_testing_S3_support} + +The relevant tests for S3 support are in the _nczarr_test_ directory. +Currently, by default, testing of S3 with NCZarr is supported only for Unidata members of the NetCDF Development Group. +This is because it uses a specific bucket on a specific internal S3 appliance that is inaccessible to the general user. + +However, an untested mechanism exists by which others may be able to run the S3 specific tests. + If someone else wants to attempt these tests, then they need to define the following environment variables: +* NCZARR_S3_TEST_HOST=\ +* NCZARR_S3_TEST_BUCKET=\ + +This assumes a Path Style address (see above) where +* host -- the complete host part of the url +* bucket -- a bucket in which testing can occur without fear of damaging anything. + +_Example:_ + +```` +NCZARR_S3_TEST_HOST=s3.us-west-1.amazonaws.com +NCZARR_S3_TEST_BUCKET=testbucket +```` +If anyone tries to use this mechanism, it would be appreciated +it any difficulties were reported to Unidata as a Github issue. + +# Appendix B. Building aws-sdk-cpp {#nczarr_s3sdk} + +In order to use the S3 storage driver, it is necessary to install the Amazon [aws-sdk-cpp library](https://github.com/aws/aws-sdk-cpp.git). + +As a starting point, here are the CMake options used by Unidata to build that library. +It assumes that it is being executed in a build directory, `build` say, and that `build/../CMakeLists.txt exists`. +``` +cmake -DBUILD_ONLY=s3 +``` +The expected set of installed libraries are as follows: +* aws-cpp-sdk-s3 +* aws-cpp-sdk-core + +This library depends on libcurl, so you may need to install that +before building the sdk library. + +# Appendix C. Amazon S3 Imposed Limits {#nczarr_s3limits} + +The Amazon S3 cloud storage imposes some significant limits that are inherited by NCZarr (and Zarr also, for that matter). + +Some of the relevant limits are as follows: +1. The maximum object size is 5 Gigabytes with a total for all objects limited to 5 Terabytes. +2. S3 key names can be any UNICODE name with a maximum length of 1024 bytes. +Note that the limit is defined in terms of bytes and not (Unicode) characters. +This affects the depth to which groups can be nested because the key encodes the full path name of a group. + +# Appendix D. Alternative Mechanisms for Accessing Remote Datasets + +The NetCDF-C library contains an alternate mechanism for accessing traditional netcdf-4 files stored in Amazon S3: The byte-range mechanism. +The idea is to treat the remote data as if it was a big file. +This remote "file" can be randomly accessed using the HTTP Byte-Range header. + +In the Amazon S3 context, a copy of a dataset, a netcdf-3 or netdf-4 file, is uploaded into a single object in some bucket. +Then using the key to this object, it is possible to tell the netcdf-c library to treat the object as a remote file and to use the HTTP Byte-Range protocol to access the contents of the object. +The dataset object is referenced using a URL with the trailing fragment containing the string ````#mode=bytes````. + +An examination of the test program _nc_test/test_byterange.sh_ shows simple examples using the _ncdump_ program. +One such test is specified as follows: +```` +https://s3.us-east-1.amazonaws.com/noaa-goes16/ABI-L1b-RadC/2017/059/03/OR_ABI-L1b-RadC-M3C13_G16_s20170590337505_e20170590340289_c20170590340316.nc#mode=bytes +```` +Note that for S3 access, it is expected that the URL is in what is called "path" format where the bucket, _noaa-goes16_ in this case, is part of the URL path instead of the host. + +The _#mode=byterange_ mechanism generalizes to work with most servers that support byte-range access. + +Specifically, Thredds servers support such access using the HttpServer access method as can be seen from this URL taken from the above test program. +```` +https://thredds-test.unidata.ucar.edu/thredds/fileServer/irma/metar/files/METAR_20170910_0000.nc#bytes +```` + +## Byte-Range Authorization + +If using byte-range access, it may be necessary to tell the netcdf-c +library about the so-called secretid and accessid values. +These are usually stored in the file ````~/.aws/config```` +and/or ````~/.aws/credentials````. +In the latter file, this +might look like this. +```` + [default] + aws_access_key_id=XXXXXXXXXXXXXXXXXXXX + aws_secret_access_key=YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY +```` + +# Appendix E. NCZarr Version 1 Meta-Data Representation + +In NCZarr Version 1, the NCZarr specific metadata was represented using new objects rather than as keys in existing Zarr objects. +Due to conflicts with the Zarr specification, that format is deprecated in favor of the one described above. +However the netcdf-c NCZarr support can still read the version 1 format. + +The version 1 format defines three specific objects: _.nczgroup_, _.nczarray_,_.nczattr_. +These are stored in parallel with the corresponding Zarr objects. So if there is a key of the form "/x/y/.zarray", then there is also a key "/x/y/.nczarray". +The content of these objects is the same as the contents of the corresponding keys. So the value of the ''_NCZARR_ARRAY'' key is the same as the content of the ''.nczarray'' object. The list of connections is as follows: + +* ''.nczarr'' <=> ''_NCZARR_SUPERBLOCK_'' +* ''.nczgroup <=> ''_NCZARR_GROUP_'' +* ''.nczarray <=> ''_NCZARR_ARRAY_'' +* ''.nczattr <=> ''_NCZARR_ATTR_'' + +# Point of Contact {#nczarr_poc} + +__Author__: Dennis Heimbigner
+__Email__: dmh at ucar dot edu
+__Initial Version__: 4/10/2020
+__Last Revised__: 7/16/2021 diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt index 2ffcf55629..4e0b137b7b 100644 --- a/include/CMakeLists.txt +++ b/include/CMakeLists.txt @@ -27,6 +27,9 @@ INSTALL(FILES ${netCDF_BINARY_DIR}/include/netcdf_meta.h INSTALL(FILES ${netCDF_SOURCE_DIR}/include/netcdf_filter.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} COMPONENT headers) +INSTALL(FILES ${netCDF_SOURCE_DIR}/include/netcdf_filter_build.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + COMPONENT headers) INSTALL(FILES ${netCDF_BINARY_DIR}/include/netcdf_dispatch.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} diff --git a/include/Makefile.am b/include/Makefile.am index 77b97a0bba..e43748d164 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -7,7 +7,7 @@ # Ed Hartnett, Dennis Heimbigner, Ward Fisher include_HEADERS = netcdf.h netcdf_meta.h netcdf_mem.h netcdf_aux.h \ -netcdf_filter.h netcdf_dispatch.h +netcdf_filter.h netcdf_filter_build.h netcdf_dispatch.h if BUILD_PARALLEL include_HEADERS += netcdf_par.h @@ -19,7 +19,8 @@ ncbytes.h nchashmap.h ceconstraints.h rnd.h nclog.h ncconfigure.h \ nc4internal.h nctime.h nc3internal.h onstack.h ncrc.h ncauth.h \ ncoffsets.h nctestserver.h nc4dispatch.h nc3dispatch.h ncexternl.h \ ncpathmgr.h ncindex.h hdf4dispatch.h hdf5internal.h nc_provenance.h \ -hdf5dispatch.h ncmodel.h isnan.h nccrc.h ncexhash.h ncxcache.h ncfilter.h +hdf5dispatch.h ncmodel.h isnan.h nccrc.h ncexhash.h ncxcache.h \ +ncfilter.h ncjson.h if USE_DAP noinst_HEADERS += ncdap.h diff --git a/include/nc4internal.h b/include/nc4internal.h index 15709e2b5e..49bda44f1c 100644 --- a/include/nc4internal.h +++ b/include/nc4internal.h @@ -461,5 +461,6 @@ extern const NC_reservedatt* NC_findreserved(const char* name); #define NC_ATT_DIMID_NAME "_Netcdf4Dimid" #define NC_ATT_NC3_STRICT_NAME "_nc3_strict" #define NC_XARRAY_DIMS "_ARRAY_DIMENSIONS" +#define NC_NCZARR_ATTR "_NCZARR_ATTR" #endif /* _NC4INTERNAL_ */ diff --git a/include/nchttp.h b/include/nchttp.h index f5dd963148..59e541deaf 100644 --- a/include/nchttp.h +++ b/include/nchttp.h @@ -24,7 +24,7 @@ typedef struct NC_HTTP_STATE { } NC_HTTP_STATE; extern int nc_http_open(const char* objecturl, NC_HTTP_STATE** state, long long* lenp); -extern int nc_http_size(NC_HTTP_STATE* state, const char* url, size64_t* sizep); +extern int nc_http_size(NC_HTTP_STATE* state, const char* url, long long* sizep); extern int nc_http_read(NC_HTTP_STATE* state, const char* url, size64_t start, size64_t count, NCbytes* buf); extern int nc_http_close(NC_HTTP_STATE* state); extern int nc_http_headers(NC_HTTP_STATE* state, const NClist** headersp); /* only if headerson */ diff --git a/include/ncjson.h b/include/ncjson.h new file mode 100644 index 0000000000..18b4208633 --- /dev/null +++ b/include/ncjson.h @@ -0,0 +1,126 @@ +/* Copyright 2018, UCAR/Unidata. +Copyright 2018 Unidata + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef NCJSON_H +#define NCJSON_H 1 + +/* Json object sorts (note use of term sort rather than e.g. type or discriminant) */ +#define NCJ_UNDEF 0 +#define NCJ_STRING 1 +#define NCJ_INT 2 +#define NCJ_DOUBLE 3 +#define NCJ_BOOLEAN 4 +#define NCJ_DICT 5 +#define NCJ_ARRAY 6 +#define NCJ_NULL 7 + +#define NCJ_NSORTS 8 + +/* No flags are currently defined, but the argument is a placeholder */ + + +/* Define a struct to store primitive values + as unquoted strings. The sort will + provide more info. + Do not bother with a union since + the amount of saved space is minimal. +*/ + +typedef struct NCjson { + int sort; /* of this object */ + char* string; /* sort != DICT|ARRAY */ + struct NCjlist { + int len; + struct NCjson** contents; + } list; /* sort == DICT|ARRAY */ +} NCjson; + +/* Support Windows declspec */ +#ifndef EXTERNL +# ifdef _WIN32 +# ifdef NCJSON_INTERNAL /* define when compiling code */ +# define EXTERNL __declspec(dllexport) extern +# else +# define EXTERNL __declspec(dllimport) extern +# endif +# else /* !_WIN32 */ +# define EXTERNL extern +# endif +#endif /* !defined EXTERNL */ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* int return value is either 1 (ok) or 0 (failure) */ + +/* Parse */ +EXTERNL int NCJparse(const char* text, unsigned flags, NCjson** jsonp); + +/* Build */ +EXTERNL int NCJnew(int sort, NCjson** object); + +/* Recursively free NCjson instance */ +EXTERNL void NCJreclaim(NCjson*); + +/* Assign a nul terminated string value to an NCjson object as its contents */ +EXTERNL int NCJnewstring(int sort, const char* value, NCjson** jsonp); + +/* Assign a counted string value to an NCjson object as its contents */ +EXTERNL int NCJnewstringn(int sort, size_t len, const char* value, NCjson** jsonp); + +/* Append value to an array or dict object. */ +EXTERNL int NCJappend(NCjson* object, NCjson* value); + +/* Insert key-value pair into a dict object. key will be copied */ +EXTERNL int NCJinsert(NCjson* object, char* key, NCjson* value); + +/* Unparser to convert NCjson object to text in buffer */ +EXTERNL int NCJunparse(const NCjson* json, unsigned flags, char** textp); + +/* Utilities */ +EXTERNL int NCJaddstring(NCjson*, int sort, const char* s); +EXTERNL int NCJdictget(const NCjson* dict, const char* key, NCjson** valuep); + +/* dump NCjson* object to output file */ +EXTERNL void NCJdump(const NCjson* json, unsigned flags, FILE*); + +/* Convert one json sort to value of another type; don't use union so we can know when to reclaim sval */ +struct NCJconst {int bval; long long ival; double dval; char* sval;}; +EXTERNL int NCJcvt(const NCjson* value, int outsort, struct NCJconst* output); + +/* Deep clone a json object */ +EXTERNL int NCJclone(const NCjson* json, NCjson** clonep); + +/* Getters */ +#define NCJsort(x) ((x)->sort) +#define NCJstring(x) ((x)->string) +#define NCJlength(x) ((x)==NULL ? 0 : (x)->list.len) +#define NCJcontents(x) ((x)->list.contents) +#define NCJith(x,i) ((x)->list.contents[i]) + +/* Setters */ +#define NCJsetsort(x,s) (x)->sort=(s) +#define NCJsetstring(x,y) (x)->string=(y) +#define NCJsetcontents(x,c) (x)->list.contents=(c) +#define NCJsetlength(x,l) (x)->list.len=(l) + +/* Misc */ +#define NCJisatomic(j) ((j)->sort != NCJ_ARRAY && (j)->sort != NCJ_DICT && (j)->sort != NCJ_NULL && (j)->sort != NCJ_UNDEF) + +#if defined(__cplusplus) +} +#endif + +#endif /*NCJSON_H*/ diff --git a/include/ncpathmgr.h b/include/ncpathmgr.h index 6fd3bf915a..c51c9bd20d 100644 --- a/include/ncpathmgr.h +++ b/include/ncpathmgr.h @@ -97,6 +97,22 @@ This means it is ok to call it repeatedly with no harm. */ EXTERNL char* NCpathcvt(const char* path); +/** +It is often convenient to convert a path to some canonical format +that has some desirable properties: +1. All backslashes have been converted to forward slash +2. It can be suffixed or prefixed by simple concatenation + with a '/' separator. The exception being if the base part + may be absolute, in which case, suffixing only is allowed; + the user is responsible for getting this right. +To this end we choose the linux/cygwin format as our standard canonical form. +If the path has a windows drive letter, then it is represented +in the cygwin "/cygdrive/" form. If it is on *nix* platform, +then this sequence will never appear and the canonical path will look +like a standard *nix* path. +*/ +EXTERNL int NCpathcanonical(const char* srcpath, char** canonp); + EXTERNL int NChasdriveletter(const char* path); /* Canonicalize and make absolute by prefixing the current working directory */ diff --git a/include/netcdf.h b/include/netcdf.h index b6d434a780..380135e336 100644 --- a/include/netcdf.h +++ b/include/netcdf.h @@ -480,9 +480,11 @@ by the desired type. */ #define NC_ENCZARR (-137) /**< Error at NCZarr layer. */ #define NC_ES3 (-138) /**< Generic S3 error */ #define NC_EEMPTY (-139) /**< Attempt to read empty NCZarr map key */ -#define NC_EFOUND (-140) /**< Some object exists when it should not */ +#define NC_EOBJECT (-140) /**< Some object exists when it should not */ +#define NC_ENOOBJECT (-141) /**< Some object not found */ +#define NC_EPLUGIN (-142) /**< Unclassified failure in accessing a dynamically loaded plugin> */ -#define NC4_LAST_ERROR (-140) /**< @internal All netCDF errors > this. */ +#define NC4_LAST_ERROR (-142) /**< @internal All netCDF errors > this. */ /* Errors for all remote access methods(e.g. DAP and CDMREMOTE)*/ #define NC_EURL (NC_EDAPURL) /**< Malformed URL */ diff --git a/include/netcdf_filter_build.h b/include/netcdf_filter_build.h new file mode 100644 index 0000000000..0ed08bcaa0 --- /dev/null +++ b/include/netcdf_filter_build.h @@ -0,0 +1,230 @@ +/* Copyright 2018, UCAR/Unidata and OPeNDAP, Inc. + See the COPYRIGHT file for more information. */ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * Copyright by the Board of Trustees of the University of Illinois. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the COPYING file, which can be found at the root of the source code * + * distribution tree, or in https://support.hdfgroup.org/ftp/hdf5/releases. * + * If you do not have access to either file, you may request a copy from * + * help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* This include file is used if one wished to build a filter plugin + independent of HDF5. See examples in the plugins directory +*/ + +#ifndef NETCDF_FILTER_BUILD_H +#define NETCDF_FILTER_BUILD_H 1 + +/**************************************************/ +/* Build To the HDF5 C-API for Filters */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +/* Support headers */ +#include +#include + +#ifdef USE_HDF5 +#include +/* Older versions of the hdf library may define H5PL_type_t here */ +#include + +#else /*!USE_HDF5*/ /* Provide replacement definitions */ + +/* WARNING: In order make NCZARR independent of HDF5, + while still using HDF5-style filters, some HDF5 + declarations need to be duplicated here with + different names. Watch out for changes in + the underlying HDF5 declarations. + + See the file H5Zpublic.h for more detailed descriptions. + + Note that these declarations are always enabled because + HDF5-style filters may have been created with these definitions + but for use by HDF5. + + Note also that certain filters in the plugins directory will not build if HDF5 is not installed: + notably blosc. +*/ + +/* H5Z_FILTER_RESERVED => H5Z_FILTER_RESERVED */ +#define H5Z_FILTER_RESERVED 256 /*filter ids below this value are reserved for library use */ + +/* H5Z_FILTER_MAX => H5Z_FILTER_MAX */ +#define H5Z_FILTER_MAX 65535 /*maximum filter id */ + +/* Only a limited set of definition and invocation flags are allowed */ +#define H5Z_FLAG_MANDATORY 0x0000 /*filter is mandatory */ +#define H5Z_FLAG_OPTIONAL 0x0001 /*filter is optional */ +#define H5Z_FLAG_REVERSE 0x0100 /*reverse direction; read */ + +typedef int htri_t; +typedef int herr_t; +typedef size_t hsize_t; +typedef long long hid_t; + +#define H5allocate_memory(size,n) malloc(size) +#define H5free_memory(buf) free(buf) + +/* htri_t (*H5Z_can_apply_func_t)(hid_t dcpl_id, hid_t type_id, hid_t space_id) => currently not supported; must be NULL. */ +typedef htri_t (*H5Z_can_apply_func_t)(long long, long long, long long); + +/* herr_t (*H5Z_set_local_func_t)(hid_t dcpl_id, hid_t type_id, hid_t space_id); => currently not supported; must be NULL. */ +typedef herr_t (*H5Z_set_local_func_t)(long long, long long, long long); + +/* H5Z_funct_t => H5Z_filter_func_t */ +typedef size_t (*H5Z_func_t)(unsigned int flags, size_t cd_nelmts, + const unsigned int cd_values[], size_t nbytes, + size_t *buf_size, void **buf); + +typedef int H5Z_filter_t; + +#define H5Z_CLASS_T_VERS 1 + +/* + * The filter table maps filter identification numbers to structs that + * contain a pointers to the filter function and timing statistics. + */ +typedef struct H5Z_class2_t { + int version; /* Version number of the struct; should be H5Z_FILTER_CLASS_VER */ + H5Z_filter_t id; /* Filter ID number */ + unsigned encoder_present; /* Does this filter have an encoder? */ + unsigned decoder_present; /* Does this filter have a decoder? */ + const char *name; /* Comment for debugging */ + H5Z_can_apply_func_t can_apply; /* The "can apply" callback for a filter */ + H5Z_set_local_func_t set_local; /* The "set local" callback for a filter */ + H5Z_func_t filter; /* The actual filter function */ +} H5Z_class2_t; + +/* The HDF5/H5Zarr dynamic loader looks for the following:*/ + +/* Plugin type used by the plugin library */ +typedef enum H5PL_type_t { + H5PL_TYPE_ERROR = -1, /* Error */ + H5PL_TYPE_FILTER = 0, /* Filter */ + H5PL_TYPE_NONE = 1 /* This must be last! */ +} H5PL_type_t; + +#endif /*HAVE_HDF5_H*/ + +/* Following External Discovery Functions should be present for the dynamic loading of filters */ + +/* returns specific constant H5ZP_TYPE_FILTER */ +typedef H5PL_type_t (*H5PL_get_plugin_type_proto)(void); + +/* return */ +typedef const void* (*H5PL_get_plugin_info_proto)(void); + +/**************************************************/ +/* Build To a NumCodecs-style C-API for Filters */ + +/* Version of the NCZ_codec_t structure */ +#define NCZ_CODEC_CLASS_VER 1 + +/* List of the kinds of NCZ_codec_t formats */ +#define NCZ_CODEC_HDF5 1 /* HDF5 <-> Codec converter */ + +/* Defined flags for filter invocation (not stored); powers of two */ +#define NCZ_FILTER_DECODE 0x00000001 + +/* External Discovery Function */ + +/* +Obtain a pointer to an instance of NCZ_codec_class_t. + +NCZ_get_plugin_info(void) -- returns pointer to instance of NCZ_codec_class_t. + Instance an be recast based on version+sort to the plugin type specific info. +So the void* return value is typically actually of type NCZ_codec_class_t*. +*/ +typedef const void* (*NCZ_get_plugin_info_proto)(void); + +/* The current object returned by NCZ_get_plugin_info is a + pointer to an instance of NCZ_codec_t. + +The key to this struct are the four function pointers that do setup/reset/finalize +and conversion between codec JSON and HDF5 parameters. + +Setup context state for the codec converter +int (*NCZ_codec_setup)(int ncid, int varid, void** contextp); + +@param ncid -- (in) ncid of the variable's group +@param varid -- (in) varid of the variable +@params contextp -- (out) context for this (var,codec) combination. +@return -- a netcdf-c error code. + +Reclaim any codec resources from setup. Not same as finalize. +int (*NCZ_codec_reset)(void* context); + +@param context -- (in) context state + +Finalize use of the plugin. Since HDF5 does not provide this functionality, +the codec may need to do it. See H5Zblosc.c for an example. +void (*NCZ_codec_finalize)(void); + +@param context -- (in) context state + +Convert a JSON representation to an HDF5 representation: +int (*NCZ_codec_to_hdf5)(void* context, const char* codec, int* nparamsp, unsigned** paramsp); + +@param context -- (in) context state from setup. +@param codec -- (in) ptr to JSON string representing the codec. +@param nparamsp -- (out) store the length of the converted HDF5 unsigned vector +@param paramsp -- (out) store a pointer to the converted HDF5 unsigned vector; + caller frees. Note the double indirection. +@return -- a netcdf-c error code. + +Convert an HDF5 representation to a JSON representation +int (*NCZ_hdf5_to_codec)(void* context, int nparams, const unsigned* params, char** codecp); + +@param context -- (in) context state from setup. +@param nparams -- (in) the length of the HDF5 unsigned vector +@param params -- (in) pointer to the HDF5 unsigned vector. +@param codecp -- (out) store the string representation of the codec; caller must free. +@return -- a netcdf-c error code. +*/ + +/* QUESTION? do we want to provide a netcdf-specific + alternative to H5Z_set_local since NCZarr may not have HDF5 access? + HDF5: herr_t set_local(hid_t dcpl, hid_t type, hid_t space); + Proposed netcdf equivalent: int NCZ_set_local(int ncid, int varid, int* nparamsp, unsigned** paramsp); + where ncid+varid is equivalent to the space. +*/ + +/* +The struct that provides the necessary filter info. +The combination of version + sort uniquely determines +the format of the remainder of the struct +*/ +typedef struct NCZ_codec_t { + int version; /* Version number of the struct */ + int sort; /* Format of remainder of the struct; + Currently always NCZ_CODEC_HDF5 */ + const char* codecid; /* The name/id of the codec */ + unsigned int hdf5id; /* corresponding hdf5 id */ + int (*NCZ_codec_to_hdf5)(void* context, const char* codec, int* nparamsp, unsigned** paramsp); + int (*NCZ_hdf5_to_codec)(void* context, int nparams, const unsigned* params, char** codecp); + int (*NCZ_codec_setup)(int ncid, int varid, void** contextp); + int (*NCZ_codec_reset)(void* context); + void (*NCZ_codec_finalize)(void); +} NCZ_codec_t; + +#ifndef NC_UNUSED +#define NC_UNUSED(var) (void)var +#endif + +#ifndef DLLEXPORT +#ifdef _WIN32 +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT +#endif +#endif + +#endif /*NETCDF_FILTER_BUILD_H*/ diff --git a/libdispatch/CMakeLists.txt b/libdispatch/CMakeLists.txt index f2838ab2d6..c74660d897 100644 --- a/libdispatch/CMakeLists.txt +++ b/libdispatch/CMakeLists.txt @@ -5,7 +5,7 @@ # See netcdf-c/COPYRIGHT file for more info. SET(libdispatch_SOURCES dparallel.c dcopy.c dfile.c ddim.c datt.c dattinq.c dattput.c dattget.c derror.c dvar.c dvarget.c dvarput.c dvarinq.c ddispatch.c nclog.c dstring.c dutf8.c dinternal.c doffsets.c ncuri.c nclist.c ncbytes.c nchashmap.c nctime.c nc.c nclistmgr.c utf8proc.h utf8proc.c dpathmgr.c dutil.c drc.c dauth.c dreadonly.c dnotnc4.c dnotnc3.c daux.c dinfermodel.c -dcrc32.c dcrc32.h dcrc64.c ncexhash.c ncxcache.c) +dcrc32.c dcrc32.h dcrc64.c ncexhash.c ncxcache.c ncjson.c) # Netcdf-4 only functions. Must be defined even if not used SET(libdispatch_SOURCES ${libdispatch_SOURCES} dgroup.c dvlen.c dcompound.c dtype.c denum.c dopaque.c dfilter.c) diff --git a/libdispatch/Makefile.am b/libdispatch/Makefile.am index 797f66960a..ea01937190 100644 --- a/libdispatch/Makefile.am +++ b/libdispatch/Makefile.am @@ -21,7 +21,7 @@ dvarinq.c dinternal.c ddispatch.c dutf8.c nclog.c dstring.c ncuri.c \ nclist.c ncbytes.c nchashmap.c nctime.c nc.c nclistmgr.c dauth.c \ doffsets.c dpathmgr.c dutil.c dreadonly.c dnotnc4.c dnotnc3.c \ daux.c dinfermodel.c \ -dcrc32.c dcrc32.h dcrc64.c ncexhash.c ncxcache.c +dcrc32.c dcrc32.h dcrc64.c ncexhash.c ncxcache.c ncjson.c # Add the utf8 codebase libdispatch_la_SOURCES += utf8proc.c utf8proc.h diff --git a/libdispatch/derror.c b/libdispatch/derror.c index dda0e063a5..921d15a573 100644 --- a/libdispatch/derror.c +++ b/libdispatch/derror.c @@ -259,7 +259,7 @@ const char *nc_strerror(int ncerr1) case NC_EDISKLESS: return "NetCDF: Error in using diskless access"; case NC_EFILTER: - return "NetCDF: Filter error: bad id or parameters"; + return "NetCDF: Filter error: bad id or parameters or duplicate filter"; case NC_ENOFILTER: return "NetCDF: Filter error: unimplemented filter encountered"; case NC_ECANTEXTEND: @@ -277,8 +277,12 @@ const char *nc_strerror(int ncerr1) return "NetCDF: AWS S3 error"; case NC_EEMPTY: return "NetCDF: Attempt to read empty NCZarr map key"; - case NC_EFOUND: + case NC_EOBJECT: return "NetCDF: Some object exists when it should not"; + case NC_ENOOBJECT: + return "NetCDF: Some object not found"; + case NC_EPLUGIN: + return "NetCDF: Unclassified failure in accessing a dynamically loaded plugin"; default: #ifdef USE_PNETCDF /* The behavior of ncmpi_strerror here is to return diff --git a/libdispatch/dhttp.c b/libdispatch/dhttp.c index e21cadaa6a..236c12fba5 100644 --- a/libdispatch/dhttp.c +++ b/libdispatch/dhttp.c @@ -189,7 +189,7 @@ Assume URL etc has already been set. */ int -nc_http_size(NC_HTTP_STATE* state, const char* objecturl, size64_t* sizep) +nc_http_size(NC_HTTP_STATE* state, const char* objecturl, long long* sizep) { int i,stat = NC_NOERR; diff --git a/libdispatch/dinfermodel.c b/libdispatch/dinfermodel.c index 5abd11b1e2..ba7db07e9b 100644 --- a/libdispatch/dinfermodel.c +++ b/libdispatch/dinfermodel.c @@ -44,7 +44,7 @@ struct MagicFile { struct NCURI* uri; int omode; NCmodel* model; - size64_t filelen; + long long filelen; int use_parallel; void* parameters; /* !NULL if inmemory && !diskless */ FILE* fp; diff --git a/libdispatch/dpathmgr.c b/libdispatch/dpathmgr.c index faebde6a70..15c61536c4 100644 --- a/libdispatch/dpathmgr.c +++ b/libdispatch/dpathmgr.c @@ -147,6 +147,49 @@ NCpathcvt(const char* inpath) return tmp1; } +EXTERNL +int +NCpathcanonical(const char* srcpath, char** canonp) +{ + int stat = NC_NOERR; + char* canon = NULL; + size_t len; + struct Path path = empty; + + if(srcpath == NULL) goto done; + + if(!pathinitialized) pathinit(); + + /* parse the src path */ + if((stat = parsepath(srcpath,&path))) {goto done;} + switch (path.kind) { + case NCPD_NIX: + case NCPD_CYGWIN: + case NCPD_REL: + /* use as is */ + canon = path.path; path.path = NULL; + break; + case NCPD_MSYS: + case NCPD_WIN: /* convert to cywin form */ + len = strlen(path.path) + strlen("/cygdrive/X") + 1; + canon = (char*)malloc(len); + if(canon != NULL) { + canon[0] = '\0'; + strlcat(canon,"/cygdrive/X",len); + canon[10] = path.drive; + strlcat(canon,path.path,len); + } + break; + default: goto done; /* return NULL */ + } + if(canonp) {*canonp = canon; canon = NULL;} + +done: + nullfree(canon); + clearPath(&path); + return stat; +} + EXTERNL char* /* caller frees */ NCpathabsolute(const char* relpath) @@ -161,7 +204,7 @@ NCpathabsolute(const char* relpath) if(!pathinitialized) pathinit(); - /* Canonicalize relpath */ + /* Decompose path */ if((stat = parsepath(relpath,&canon))) {goto done;} /* See if relative */ @@ -472,7 +515,7 @@ NCmkstemp(char* base) cvtpath = NCpathcvt(base); len = strlen(cvtpath); xp = cvtpath+(len-6); - assert(memcmp(xp,"XXXXXX")==0); + assert(memcmp(xp,"XXXXXX",6)==0); for(attempts=10;attempts>0;attempts--) { /* The Windows version of mkstemp does not work right; it only allows for 26 possible XXXXXX values */ diff --git a/libdispatch/ncjson.c b/libdispatch/ncjson.c new file mode 100644 index 0000000000..1cca2132ef --- /dev/null +++ b/libdispatch/ncjson.c @@ -0,0 +1,1000 @@ +/* Copyright 2018, UCAR/Unidata. + See the COPYRIGHT file for more information. +*/ + +/* +TODO: make utf8 safe +*/ + +#define NCJSON_INTERNAL + +#include +#include +#include +#include +#include "ncjson.h" + +#ifdef _WIN32 +#define strcasecmp _stricmp +#else +#include +#endif + +#undef DEBUG + +#define NCJ_OK 0 +#define NCJ_ERR 1 + +#define NCJ_EOF -1 + +#define NCJ_LBRACKET '[' +#define NCJ_RBRACKET ']' +#define NCJ_LBRACE '{' +#define NCJ_RBRACE '}' +#define NCJ_COLON ':' +#define NCJ_COMMA ',' +#define NCJ_QUOTE '"' +#define NCJ_ESCAPE '\\' +#define NCJ_TAG_TRUE "true" +#define NCJ_TAG_FALSE "false" +#define NCJ_TAG_NULL "null" + +/* WORD Subsumes Number also */ +#define WORD "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$+-." + +/*//////////////////////////////////////////////////*/ + +typedef struct NCJparser { + char* text; + char* pos; + size_t yylen; /* |yytext| */ + char* yytext; /* string or word */ + long long num; + int tf; + int err; +} NCJparser; + +typedef struct NCJbuf { + int len; /* |text|; does not include nul terminator */ + char* text; /* NULL || nul terminated */ +} NCJbuf; + +/**************************************************/ +/* Forward */ +static int NCJparseR(NCJparser* parser, NCjson**); +static int NCJparseArray(NCJparser* parser, struct NCjlist* array); +static int NCJparseDict(NCJparser* parser, struct NCjlist* dict); +static int testbool(const char* word); +static int testint(const char* word); +static int testdouble(const char* word); +static int testnull(const char* word); +static int NCJlex(NCJparser* parser); +static int NCJyytext(NCJparser*, char* start, size_t pdlen); +static void NCJreclaimArray(struct NCjlist*); +static void NCJreclaimDict(struct NCjlist*); +static int NCJunparseR(const NCjson* json, NCJbuf* buf, unsigned flags); +static int NCJunescape(NCJparser* parser); +static int listappend(struct NCjlist* list, NCjson* element); +static int bytesappendquoted(NCJbuf* buf, const char*); +static int bytesappend(NCJbuf* buf, const char* s); +static int bytesappendc(NCJbuf* buf, const char c); +static int escape(const char* text, NCJbuf* buf); +static int NCJcloneArray(const NCjson* array, NCjson** clonep); +static int NCJcloneDict(const NCjson* dict, NCjson** clonep); + +#ifndef nullfree +#define nullfree(x) {if(x)free(x);} +#endif +#ifndef nulldup +#define nulldup(x) ((x)?strdup(x):(x)) +#endif + +#ifdef DEBUG +static char* tokenname(int token); +#endif +/**************************************************/ +int +NCJparse(const char* text, unsigned flags, NCjson** jsonp) +{ + int stat = NCJ_OK; + size_t len; + NCJparser* parser = NULL; + NCjson* json = NULL; + + /* Need at least 1 character of input */ + if(text == NULL || text[0] == '\0') + {stat = NCJ_ERR; goto done;} + if(jsonp == NULL) goto done; + parser = calloc(1,sizeof(NCJparser)); + if(parser == NULL) + {stat = NCJ_ERR; goto done;} + len = strlen(text); + parser->text = (char*)malloc(len+1+1); + if(parser->text == NULL) + {stat = NCJ_ERR; goto done;} + strcpy(parser->text,text); + parser->text[len] = '\0'; + parser->text[len+1] = '\0'; + parser->pos = &parser->text[0]; +#ifdef DEBUG +fprintf(stderr,"json: |%s|\n",parser->text); +#endif + if((stat=NCJparseR(parser,&json))) goto done; + *jsonp = json; + json = NULL; + +done: + if(parser != NULL) { + nullfree(parser->text); + nullfree(parser->yytext); + free(parser); + } + (void)NCJreclaim(json); + return (stat); +} + +/* +Simple recursive descent +intertwined with dict and list parsers. + +Invariants: +1. The json argument is provided by caller and filled in by NCJparseR. +2. Each call pushed back last unconsumed token +*/ + +static int +NCJparseR(NCJparser* parser, NCjson** jsonp) +{ + int stat = NCJ_OK; + int token = NCJ_UNDEF; + NCjson* json = NULL; + + if(jsonp == NULL) + {stat = NCJ_ERR; goto done;} + if((token = NCJlex(parser)) == NCJ_UNDEF) + {stat = NCJ_ERR; goto done;} + switch (token) { + case NCJ_EOF: + break; + case NCJ_NULL: + if((stat = NCJnew(NCJ_NULL,&json))) goto done; + break; + case NCJ_BOOLEAN: + if((stat = NCJnew(NCJ_BOOLEAN,&json))) goto done; + json->string = strdup(parser->yytext); + break; + case NCJ_INT: + if((stat = NCJnew(NCJ_INT,&json))) goto done; + json->string = strdup(parser->yytext); + break; + case NCJ_DOUBLE: + if((stat = NCJnew(NCJ_DOUBLE,&json))) goto done; + json->string = strdup(parser->yytext); + break; + case NCJ_STRING: + if((stat = NCJnew(NCJ_STRING,&json))) goto done; + json->string = strdup(parser->yytext); + break; + case NCJ_LBRACE: + if((stat = NCJnew(NCJ_DICT,&json))) goto done; + if((stat = NCJparseDict(parser, &json->list))) goto done; + break; + case NCJ_LBRACKET: + if((stat = NCJnew(NCJ_ARRAY,&json))) goto done; + if((stat = NCJparseArray(parser, &json->list))) goto done; + break; + case NCJ_RBRACE: /* We hit end of the dict we are parsing */ + parser->pos--; /* pushback so NCJparseArray will catch */ + json = NULL; + break; + case NCJ_RBRACKET: + parser->pos--; /* pushback so NCJparseDict will catch */ + json = NULL; + break; + default: + stat = NCJ_ERR; + break; + } + if(jsonp && json) {*jsonp = json; json = NULL;} + +done: + NCJreclaim(json); + return (stat); +} + +static int +NCJparseArray(NCJparser* parser, struct NCjlist* arrayp) +{ + int stat = NCJ_OK; + int token = NCJ_UNDEF; + NCjson* element = NULL; + int stop = 0; + + /* [ ^e1,e2, ...en] */ + + while(!stop) { + /* Recurse to get the value ei (might be null) */ + if((stat = NCJparseR(parser,&element))) goto done; + token = NCJlex(parser); /* Get next token */ + /* Next token should be comma or rbracket */ + switch(token) { + case NCJ_RBRACKET: + if(element != NULL) listappend(arrayp,element); + element = NULL; + stop = 1; + break; + case NCJ_COMMA: + /* Append the ei to the list */ + if(element == NULL) {stat = NCJ_ERR; goto done;} /* error */ + listappend(arrayp,element); + element = NULL; + break; + case NCJ_EOF: + case NCJ_UNDEF: + default: + stat = NCJ_ERR; + goto done; + } + } + +done: + if(element != NULL) + NCJreclaim(element); + return (stat); +} + +static int +NCJparseDict(NCJparser* parser, struct NCjlist* dictp) +{ + int stat = NCJ_OK; + int token = NCJ_UNDEF; + NCjson* value = NULL; + NCjson* key = NULL; + int stop = 0; + + /* { ^k1:v1,k2:v2, ...kn:vn] */ + + while(!stop) { + /* Get the key, which must be a word of some sort */ + token = NCJlex(parser); + switch(token) { + case NCJ_STRING: + case NCJ_BOOLEAN: + case NCJ_INT: case NCJ_DOUBLE: { + if((stat=NCJnewstring(token,parser->yytext,&key))) goto done; + } break; + case NCJ_RBRACE: /* End of containing Dict */ + stop = 1; + continue; /* leave loop */ + case NCJ_EOF: case NCJ_UNDEF: + default: + stat = NCJ_ERR; + goto done; + } + /* Next token must be colon*/ + switch((token = NCJlex(parser))) { + case NCJ_COLON: break; + case NCJ_UNDEF: case NCJ_EOF: + default: stat = NCJ_ERR; goto done; + } + /* Get the value */ + if((stat = NCJparseR(parser,&value))) goto done; + /* Next token must be comma or RBRACE */ + switch((token = NCJlex(parser))) { + case NCJ_RBRACE: + stop = 1; + /* fall thru */ + case NCJ_COMMA: + /* Insert key value into dict: key first, then value */ + listappend(dictp,key); + key = NULL; + listappend(dictp,value); + value = NULL; + break; + case NCJ_EOF: + case NCJ_UNDEF: + default: + stat = NCJ_ERR; + goto done; + } + } + +done: + if(key != NULL) + NCJreclaim(key); + if(value != NULL) + NCJreclaim(value); + return (stat); +} + +static int +NCJlex(NCJparser* parser) +{ + int c; + int token = 0; + char* start; + size_t count; + + while(token == 0) { /* avoid need to goto when retrying */ + c = *parser->pos; + if(c == '\0') { + token = NCJ_EOF; + } else if(c <= ' ' || c == '\177') { + parser->pos++; + continue; /* ignore whitespace */ + } else if(strchr(WORD, c) != NULL) { + start = parser->pos; + for(;;) { + c = *parser->pos++; + if(c == '\0' || strchr(WORD,c) == NULL) break; /* end of word */ + } + /* Pushback c if not whitespace */ + parser->pos--; + count = ((parser->pos) - start); + if(NCJyytext(parser,start,count)) goto done; + /* Discriminate the word string to get the proper sort */ + if(testbool(parser->yytext) == NCJ_OK) + token = NCJ_BOOLEAN; + /* do int test first since double subsumes int */ + else if(testint(parser->yytext) == NCJ_OK) + token = NCJ_INT; + else if(testdouble(parser->yytext) == NCJ_OK) + token = NCJ_DOUBLE; + else if(testnull(parser->yytext) == NCJ_OK) + token = NCJ_NULL; + else + token = NCJ_STRING; + } else if(c == NCJ_QUOTE) { + parser->pos++; + start = parser->pos; + for(;;) { + c = *parser->pos++; + if(c == NCJ_ESCAPE) parser->pos++; + else if(c == NCJ_QUOTE || c == '\0') break; + } + if(c == '\0') { + parser->err = NCJ_ERR; + token = NCJ_UNDEF; + goto done; + } + count = ((parser->pos) - start) - 1; /* -1 for trailing quote */ + if(NCJyytext(parser,start,count)) goto done; + if(NCJunescape(parser)) goto done; + token = NCJ_STRING; + } else { /* single char token */ + if(NCJyytext(parser,parser->pos,1)) goto done; + token = *parser->pos++; + } +#ifdef DEBUG +fprintf(stderr,"%s(%d): |%s|\n",tokenname(token),token,parser->yytext); +#endif + } /*for(;;)*/ +done: + if(parser->err) token = NCJ_UNDEF; + return token; +} + +static int +testnull(const char* word) +{ + if(strcasecmp(word,NCJ_TAG_NULL)==0) + return NCJ_OK; + return NCJ_ERR; +} + +static int +testbool(const char* word) +{ + if(strcasecmp(word,NCJ_TAG_TRUE)==0 + || strcasecmp(word,NCJ_TAG_FALSE)==0) + return NCJ_OK; + return NCJ_ERR; +} + +static int +testint(const char* word) +{ + int ncvt; + long long i; + int count = 0; + /* Try to convert to number */ + ncvt = sscanf(word,"%lld%n",&i,&count); + return (ncvt == 1 && strlen(word)==count ? NCJ_OK : NCJ_ERR); +} + +static int +testdouble(const char* word) +{ + int ncvt; + double d; + int count = 0; + /* Check for Nan and Infinity */ + if(strcasecmp("nan",word)==0) return NCJ_OK; + if(strcasecmp("infinity",word)==0) return NCJ_OK; + if(strcasecmp("-infinity",word)==0) return NCJ_OK; + /* Allow the XXXf versions as well */ + if(strcasecmp("nanf",word)==0) return NCJ_OK; + if(strcasecmp("infinityf",word)==0) return NCJ_OK; + if(strcasecmp("-infinityf",word)==0) return NCJ_OK; + /* Try to convert to number */ + ncvt = sscanf(word,"%lg%n",&d,&count); + return (ncvt == 1 && strlen(word)==count ? NCJ_OK : NCJ_ERR); +} + +static int +NCJyytext(NCJparser* parser, char* start, size_t pdlen) +{ + size_t len = (size_t)pdlen; + if(parser->yytext == NULL) { + parser->yytext = (char*)malloc(len+1); + parser->yylen = len; + } else if(parser->yylen <= len) { + parser->yytext = (char*) realloc(parser->yytext,len+1); + parser->yylen = len; + } + if(parser->yytext == NULL) return NCJ_ERR; + memcpy(parser->yytext,start,len); + parser->yytext[len] = '\0'; + return NCJ_OK; +} + +/**************************************************/ + +void +NCJreclaim(NCjson* json) +{ + if(json == NULL) return; + switch(json->sort) { + case NCJ_INT: + case NCJ_DOUBLE: + case NCJ_BOOLEAN: + case NCJ_STRING: + nullfree(json->string); + break; + case NCJ_DICT: + NCJreclaimDict(&json->list); + break; + case NCJ_ARRAY: + NCJreclaimArray(&json->list); + break; + default: break; /* nothing to reclaim */ + } + free(json); +} + +static void +NCJreclaimArray(struct NCjlist* array) +{ + int i; + for(i=0;ilen;i++) { + NCJreclaim(array->contents[i]); + } + nullfree(array->contents); + array->contents = NULL; +} + +static void +NCJreclaimDict(struct NCjlist* dict) +{ + return NCJreclaimArray(dict); +} + +int +NCJclone(const NCjson* json, NCjson** clonep) +{ + int stat = NCJ_OK; + NCjson* clone = NULL; + if(json == NULL) goto done; + switch(NCJsort(json)) { + case NCJ_INT: + case NCJ_DOUBLE: + case NCJ_BOOLEAN: + case NCJ_STRING: + if((stat=NCJnew(NCJsort(json),&clone))) goto done; + if((NCJstring(clone) = strdup(NCJstring(json))) == NULL) + {stat = NCJ_ERR; goto done;} + break; + case NCJ_NULL: + if((stat=NCJnew(NCJsort(json),&clone))) goto done; + break; + case NCJ_DICT: + if((stat=NCJcloneDict(json,&clone))) goto done; + break; + case NCJ_ARRAY: + if((stat=NCJcloneArray(json,&clone))) goto done; + break; + default: break; /* nothing to clone */ + } +done: + if(stat == NCJ_OK && clonep) {*clonep = clone; clone = NULL;} + NCJreclaim(clone); + return stat; +} + +static int +NCJcloneArray(const NCjson* array, NCjson** clonep) +{ + int i, stat=NCJ_OK; + NCjson* clone = NULL; + if((stat=NCJnew(NCJ_ARRAY,&clone))) goto done; + for(i=0;istring = (char*)malloc(len+1))==NULL) + {stat = NCJ_ERR; goto done;} + memcpy(json->string,value,len); + json->string[len] = '\0'; + if(jsonp) *jsonp = json; + json = NULL; /* avoid memory errors */ +done: + NCJreclaim(json); + return (stat); +} + +int +NCJaddstring(NCjson* json, int sort, const char* s) +{ + int stat = NCJ_OK; + NCjson* jtmp = NULL; + + if(NCJsort(json) != NCJ_DICT && NCJsort(json) != NCJ_ARRAY) + {stat = NCJ_ERR; goto done;} + if((stat = NCJnewstring(sort, s, &jtmp))) goto done; + if((stat = NCJappend(json,jtmp))) goto done; + jtmp = NULL; + +done: + NCJreclaim(jtmp); + return stat; +} + +int +NCJdictget(const NCjson* dict, const char* key, NCjson** valuep) +{ + int i,stat = NCJ_OK; + + if(dict == NULL || dict->sort != NCJ_DICT) + {stat = NCJ_ERR; goto done;} + if(valuep) {*valuep = NULL;} + for(i=0;istring != NULL && strcmp(jkey->string,key)==0) { + if(valuep) {*valuep = NCJith(dict,i+1); break;} + } + } + +done: + return stat; +} + +/* Insert key-value pair into a dict object. key will be strdup'd */ +int +NCJinsert(NCjson* object, char* key, NCjson* jvalue) +{ + int stat = NCJ_OK; + NCjson* jkey = NULL; + if(object == NULL || object->sort != NCJ_DICT || key == NULL || jvalue == NULL) + {stat = NCJ_ERR; goto done;} + if((stat = NCJnewstring(NCJ_STRING,key,&jkey))) goto done; + if((stat = NCJappend(object,jkey))) goto done; + if((stat = NCJappend(object,jvalue))) goto done; +done: + return stat; +} + +/* Append value to an array or dict object. */ +int +NCJappend(NCjson* object, NCjson* value) +{ + if(object == NULL || value == NULL) + return NCJ_ERR; + switch (object->sort) { + case NCJ_ARRAY: + case NCJ_DICT: + listappend(&object->list,value); + break; + default: + return NCJ_ERR; + } + return NCJ_OK; +} + +/* Unescape the text in parser->yytext; can + do in place because unescaped string will + always be shorter */ +static int +NCJunescape(NCJparser* parser) +{ + char* p = parser->yytext; + char* q = p; + int c; + for(;(c=*p++);) { + if(c == NCJ_ESCAPE) { + c = *p++; + switch (c) { + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case NCJ_QUOTE: c = c; break; + case NCJ_ESCAPE: c = c; break; + default: c = c; break;/* technically not Json conformant */ + } + } + *q++ = c; + } + *q = '\0'; + return NCJ_OK; +} + +/**************************************************/ +/* Unparser to convert NCjson object to text in buffer */ + +int +NCJunparse(const NCjson* json, unsigned flags, char** textp) +{ + int stat = NCJ_OK; + NCJbuf buf = {0,NULL}; + if((stat = NCJunparseR(json,&buf,flags))) + goto done; + if(textp) {*textp = buf.text; buf.text = NULL; buf.len = 0;} +done: + nullfree(buf.text); + return (stat); +} + +static int +NCJunparseR(const NCjson* json, NCJbuf* buf, unsigned flags) +{ + int stat = NCJ_OK; + int i; + + switch (NCJsort(json)) { + case NCJ_STRING: + bytesappendquoted(buf,json->string); + break; + case NCJ_INT: + case NCJ_DOUBLE: + case NCJ_BOOLEAN: + bytesappend(buf,json->string); + break; + case NCJ_DICT: + bytesappendc(buf,NCJ_LBRACE); + if(json->list.len > 0 && json->list.contents != NULL) { + int shortlist = 0; + for(i=0;!shortlist && i < json->list.len;i+=2) { + if(i > 0) bytesappendc(buf,NCJ_COMMA); + NCJunparseR(json->list.contents[i],buf,flags); /* key */ + bytesappendc(buf,NCJ_COLON); + bytesappendc(buf,' '); + /* Allow for the possibility of a short dict entry */ + if(json->list.contents[i+1] == NULL) { /* short */ + bytesappendc(buf,'?'); + shortlist = 1; + } else { + NCJunparseR(json->list.contents[i+1],buf,flags); + } + } + } + bytesappendc(buf,NCJ_RBRACE); + break; + case NCJ_ARRAY: + bytesappendc(buf,NCJ_LBRACKET); + if(json->list.len > 0 && json->list.contents != NULL) { + for(i=0;i < json->list.len;i++) { + if(i > 0) bytesappendc(buf,NCJ_COMMA); + NCJunparseR(json->list.contents[i],buf,flags); + } + } + bytesappendc(buf,NCJ_RBRACKET); + break; + case NCJ_NULL: + bytesappend(buf,"null"); + break; + default: + stat = NCJ_ERR; goto done; + } +done: + return (stat); +} + +/* Escape a string and append to buf */ +static int +escape(const char* text, NCJbuf* buf) +{ + const char* p = text; + int c; + for(;(c=*p++);) { + char replace = 0; + switch (c) { + case '\b': replace = 'b'; break; + case '\f': replace = 'f'; break; + case '\n': replace = 'n'; break; + case '\r': replace = 'r'; break; + case '\t': replace = 't'; break; + case NCJ_QUOTE: replace = '\''; break; + case NCJ_ESCAPE: replace = '\\'; break; + default: break; + } + if(replace) { + bytesappendc(buf,NCJ_ESCAPE); + bytesappendc(buf,replace); + } else + bytesappendc(buf,c); + } + return NCJ_OK; +} + +static int +bytesappendquoted(NCJbuf* buf, const char* s) +{ + bytesappend(buf,"\""); + escape(s,buf); + bytesappend(buf,"\""); + return NCJ_OK; +} + +void +NCJdump(const NCjson* json, unsigned flags, FILE* out) +{ + char* text = NULL; + (void)NCJunparse(json,0,&text); + if(out == NULL) out = stderr; + fprintf(out,"%s\n",text); + fflush(out); + nullfree(text); +} + +#ifdef DEBUG +static char* +tokenname(int token) +{ + switch (token) { + case NCJ_STRING: return "NCJ_STRING"; + case NCJ_INT: return "NCJ_INT"; + case NCJ_DOUBLE: return "NCJ_DOUBLE"; + case NCJ_BOOLEAN: return "NCJ_BOOLEAN"; + case NCJ_DICT: return "NCJ_DICT"; + case NCJ_ARRAY: return "NCJ_ARRAY"; + case NCJ_NULL: return "NCJ_NULL"; + default: + if(token > ' ' && token <= 127) { + static char s[4]; + s[0] = '\''; + s[1] = (char)token; + s[2] = '\''; + s[3] = '\0'; + return s; + } else + break; + } + return "NCJ_UNDEF"; +} +#endif + + +/* Convert a JSON value to an equivalent value of a specified sort */ +int +NCJcvt(const NCjson* jvalue, int outsort, struct NCJconst* output) +{ + int stat = NCJ_OK; + + if(output == NULL) goto done; + +#undef CASE +#define CASE(t1,t2) ((t1)<<4 | (t2)) /* the shift constant must be larger than log2(NCJ_NSORTS) */ + switch (CASE(jvalue->sort,outsort)) { + + case CASE(NCJ_BOOLEAN,NCJ_BOOLEAN): + if(strcasecmp(jvalue->string,NCJ_TAG_FALSE)==0) output->bval = 0; else output->bval = 1; + break; + case CASE(NCJ_BOOLEAN,NCJ_INT): + if(strcasecmp(jvalue->string,NCJ_TAG_FALSE)==0) output->ival = 0; else output->ival = 1; + break; + case CASE(NCJ_BOOLEAN,NCJ_DOUBLE): + if(strcasecmp(jvalue->string,NCJ_TAG_FALSE)==0) output->dval = 0.0; else output->dval = 1.0; + break; + case CASE(NCJ_BOOLEAN,NCJ_STRING): + output->sval = nulldup(jvalue->string); + break; + + case CASE(NCJ_INT,NCJ_BOOLEAN): + sscanf(jvalue->string,"%lldd",&output->ival); + output->bval = (output->ival?1:0); + break; + case CASE(NCJ_INT,NCJ_INT): + sscanf(jvalue->string,"%lld",&output->ival); + break; + case CASE(NCJ_INT,NCJ_DOUBLE): + sscanf(jvalue->string,"%lld",&output->ival); + output->dval = (double)output->ival; + break; + case CASE(NCJ_INT,NCJ_STRING): + output->sval = nulldup(jvalue->string); + break; + + case CASE(NCJ_DOUBLE,NCJ_BOOLEAN): + sscanf(jvalue->string,"%lf",&output->dval); + output->bval = (output->dval == 0?0:1); + break; + case CASE(NCJ_DOUBLE,NCJ_INT): + sscanf(jvalue->string,"%lf",&output->dval); + output->ival = (long long)output->dval; + break; + case CASE(NCJ_DOUBLE,NCJ_DOUBLE): + sscanf(jvalue->string,"%lf",&output->dval); + break; + case CASE(NCJ_DOUBLE,NCJ_STRING): + output->sval = nulldup(jvalue->string); + break; + + case CASE(NCJ_STRING,NCJ_BOOLEAN): + if(strcasecmp(jvalue->string,NCJ_TAG_FALSE)==0) output->bval = 0; else output->bval = 1; + break; + case CASE(NCJ_STRING,NCJ_INT): + sscanf(jvalue->string,"%lld",&output->ival); + break; + case CASE(NCJ_STRING,NCJ_DOUBLE): + sscanf(jvalue->string,"%lf",&output->dval); + break; + case CASE(NCJ_STRING,NCJ_STRING): + output->sval = nulldup(jvalue->string); + break; + + default: + stat = NCJ_ERR; + break; + } + +done: + return stat; +} + +static int +listappend(struct NCjlist* list, NCjson* json) +{ + int stat = NCJ_OK; + NCjson** newcontents = NULL; + + assert(list->len == 0 || list->contents != NULL); + if(json == NULL) + {stat = NCJ_ERR; goto done;} + if(list->len == 0) { + nullfree(list->contents); + list->contents = (NCjson**)calloc(2,sizeof(NCjson*)); + if(list->contents == NULL) + {stat = NCJ_ERR; goto done;} + list->contents[0] = json; + list->len++; + } else { + if((newcontents = (NCjson**)calloc((2*list->len)+1,sizeof(NCjson*)))==NULL) + {stat = NCJ_ERR; goto done;} + memcpy(newcontents,list->contents,list->len*sizeof(NCjson*)); + newcontents[list->len] = json; + list->len++; + free(list->contents); + list->contents = newcontents; newcontents = NULL; + } + +done: + nullfree(newcontents); + return stat; +} + +static int +bytesappend(NCJbuf* buf, const char* s) +{ + int stat = NCJ_OK; + char* newtext = NULL; + if(buf == NULL) + {stat = NCJ_ERR; goto done;} + if(s == NULL) s = ""; + if(buf->len == 0) { + assert(buf->text == NULL); + buf->text = strdup(s); + if(buf->text == NULL) + {stat = NCJ_ERR; goto done;} + buf->len = strlen(s); + } else { + size_t slen = strlen(s); + size_t newlen = buf->len + slen + 1; + if((newtext = (char*)malloc(newlen))==NULL) + {stat = NCJ_ERR; goto done;} + strcpy(newtext,buf->text); + strcat(newtext,s); + free(buf->text); buf->text = NULL; + buf->text = newtext; newtext = NULL; + buf->len = newlen; + } + +done: + nullfree(newtext); + return stat; +} + +static int +bytesappendc(NCJbuf* bufp, const char c) +{ + char s[2]; + s[0] = c; + s[1] = '\0'; + return bytesappend(bufp,s); +} diff --git a/libdispatch/ncxcache.c b/libdispatch/ncxcache.c index c6d7a0cd90..ee1602d6f6 100644 --- a/libdispatch/ncxcache.c +++ b/libdispatch/ncxcache.c @@ -94,7 +94,7 @@ ncxcachelookup(NCxcache* NCxcache, ncexhashkey_t hkey, void** op) if(NCxcache == NULL) return THROW(NC_EINVAL); assert(NCxcache->map != NULL); if((stat=ncexhashget(NCxcache->map,hkey,&inode))) - {stat = THROW(NC_ENOTFOUND); goto done;} /* not present */ + {stat = THROW(NC_ENOOBJECT); goto done;} /* not present */ node = (void*)inode; if(op) *op = node->content; @@ -112,7 +112,7 @@ ncxcachetouch(NCxcache* cache, ncexhashkey_t hkey) if(cache == NULL) return THROW(NC_EINVAL); if((stat=ncexhashget(cache->map,hkey,&inode))) - {stat = THROW(NC_ENOTFOUND); goto done;} /* not present */ + {stat = THROW(NC_ENOOBJECT); goto done;} /* not present */ node = (void*)inode; /* unlink */ unlinknode(node); @@ -171,7 +171,7 @@ ncxcacheremove(NCxcache* cache, ncexhashkey_t hkey, void** op) /* Remove from the hash map */ if((stat=ncexhashremove(cache->map,hkey,&inode))) - {stat = NC_ENOTFOUND; goto done;} /* not present */ + {stat = NC_ENOOBJECT; goto done;} /* not present */ node = (NCxnode*)inode; /* unlink */ unlinknode(node); diff --git a/libnczarr/CMakeLists.txt b/libnczarr/CMakeLists.txt index 215ab955f9..86e093b20d 100644 --- a/libnczarr/CMakeLists.txt +++ b/libnczarr/CMakeLists.txt @@ -22,7 +22,6 @@ zfile.c zfilter.c zgrp.c zinternal.c -zjson.c zmap.c zmap_file.c zodom.c @@ -40,7 +39,6 @@ zchunking.h zdispatch.h zincludes.h zinternal.h -zjson.h zmap.h zodom.h zprovenance.h diff --git a/libnczarr/Makefile.am b/libnczarr/Makefile.am index ae4c4321f5..22a0631ce5 100644 --- a/libnczarr/Makefile.am +++ b/libnczarr/Makefile.am @@ -41,7 +41,6 @@ zfile.c \ zfilter.c \ zgrp.c \ zinternal.c \ -zjson.c \ zmap.c \ zmap_file.c \ zodom.c \ @@ -60,7 +59,6 @@ zdispatch.h \ ztracedispatch.h \ zincludes.h \ zinternal.h \ -zjson.h \ zmap.h \ zodom.h \ zprovenance.h \ diff --git a/libnczarr/zjson.c b/libnczarr/obsolete/zjson.c similarity index 96% rename from libnczarr/zjson.c rename to libnczarr/obsolete/zjson.c index 2434bb038d..a55fb81f4d 100644 --- a/libnczarr/zjson.c +++ b/libnczarr/obsolete/zjson.c @@ -588,6 +588,27 @@ NCJinsert(NCjson* object, char* key, NCjson* value) return NC_NOERR; } +/* Remove a key-value pair from a dict object. +*/ +int +NCJremove(NCjson* dict, char* key, NCjson** keyp, NCjson** valuep) +{ + int i; + if(dict == NULL || dict->sort != NCJ_DICT) + return NC_ENCZARR; + for(i=0;icontents);i+=2) { + NCjson* tmp = (NCjson*)nclistget(dict->contents,i); + if(strcmp(tmp->value,key)==0) { + if(keyp) *keyp = tmp; + if(valuep) *valuep = (NCjson*)nclistget(dict->contents,i+1); + nclistremove(dict->contents,i+1); + nclistremove(dict->contents,i); + break; + } + } + return NC_NOERR; +} + int NCJaddstring(NCjson* dictarray, int sort, const char* value) { @@ -610,7 +631,7 @@ NCJaddstring(NCjson* dictarray, int sort, const char* value) } int -NCJdictith(NCjson* object, size_t i, const char** keyp, NCjson** valuep) +NCJdictith(NCjson* object, size_t i, NCjson** keyp, NCjson** valuep) { if(object == NULL || object->sort != NCJ_DICT) return NC_EINTERNAL; diff --git a/libnczarr/zjson.h b/libnczarr/obsolete/zjson.h similarity index 92% rename from libnczarr/zjson.h rename to libnczarr/obsolete/zjson.h index 81c092cb66..93d8439479 100644 --- a/libnczarr/zjson.h +++ b/libnczarr/obsolete/zjson.h @@ -55,11 +55,15 @@ EXTERNL int NCJnewstringn(int sort, size_t len, const char* value, NCjson** json */ EXTERNL int NCJinsert(NCjson* object, char* key, NCjson* value); +/* Remove a key-value pair from a dict object. +*/ +EXTERNL int NCJremove(NCjson* object, char* key, NCjson** keyp, NCjson** valuep); + /* Insert a string value into a json Dict|Array */ EXTERNL int NCJaddstring(NCjson* dictarray, int sort, const char* value); /* Get ith pair from dict */ -EXTERNL int NCJdictith(NCjson* object, size_t i, const char** keyp, NCjson** valuep); +EXTERNL int NCJdictith(NCjson* object, size_t i, NCjson** keyp, NCjson** valuep); /* Get value for key from dict */ EXTERNL int NCJdictget(NCjson* object, const char* key, NCjson** valuep); diff --git a/libnczarr/zarr.c b/libnczarr/zarr.c index b1ed766016..c874eb709a 100644 --- a/libnczarr/zarr.c +++ b/libnczarr/zarr.c @@ -56,7 +56,7 @@ ncz_create_dataset(NC_FILE_INFO_T* file, NC_GRP_INFO_T* root, const char** contr {stat = NC_ENOMEM; goto done;} /* fill in some of the zinfo and zroot fields */ - zinfo->zarr.zarr_version = ZARRVERSION; + zinfo->zarr.zarr_version = atoi(ZARRVERSION); sscanf(NCZARRVERSION,"%lu.%lu.%lu", &zinfo->zarr.nczarr_version.major, &zinfo->zarr.nczarr_version.minor, @@ -94,7 +94,7 @@ ncz_create_dataset(NC_FILE_INFO_T* file, NC_GRP_INFO_T* root, const char** contr int ncz_open_dataset(NC_FILE_INFO_T* file, const char** controls) { - int i,stat = NC_NOERR; + int stat = NC_NOERR; NC* nc = NULL; NC_GRP_INFO_T* root = NULL; NCURI* uri = NULL; @@ -103,6 +103,8 @@ ncz_open_dataset(NC_FILE_INFO_T* file, const char** controls) NCZ_FILE_INFO_T* zinfo = NULL; int mode; NClist* modeargs = NULL; + char* nczarr_version = NULL; + char* zarr_format = NULL; ZTRACE(3,"file=%s controls=%s",file->hdr.name,(controls?nczprint_envv(controls):"null")); @@ -137,29 +139,20 @@ ncz_open_dataset(NC_FILE_INFO_T* file, const char** controls) if((stat = nczmap_open(zinfo->controls.mapimpl,nc->path,mode,zinfo->controls.flags,NULL,&zinfo->map))) goto done; - if(!(zinfo->controls.flags & FLAG_PUREZARR) - && (stat = NCZ_downloadjson(zinfo->map, NCZMETAROOT, &json)) == NC_NOERR) { - /* Extract the information from it */ - for(i=0;icontents);i+=2) { - const NCjson* key = nclistget(json->contents,i); - const NCjson* value = nclistget(json->contents,i+1); - if(strcmp(key->value,"zarr_format")==0) { - if(sscanf(value->value,"%d",&zinfo->zarr.zarr_version)!=1) - {stat = NC_ENOTNC; goto done;} - } else if(strcmp(key->value,"nczarr_version")==0) { - sscanf(value->value,"%lu.%lu.%lu", - &zinfo->zarr.nczarr_version.major, - &zinfo->zarr.nczarr_version.minor, - &zinfo->zarr.nczarr_version.release); - } - } - } else { /* zinfo->controls.purezarr || no object */ - zinfo->zarr.zarr_version = ZARRVERSION; - sscanf(NCZARRVERSION,"%lu.%lu.%lu", + if((stat = ncz_read_superblock(file,&nczarr_version,&zarr_format))) goto done; + + if(nczarr_version == NULL) /* default */ + nczarr_version = strdup(NCZARRVERSION); + if(zarr_format == NULL) /* default */ + zarr_format = strdup(ZARRVERSION); + /* Extract the information from it */ + if(sscanf(zarr_format,"%d",&zinfo->zarr.zarr_version)!=1) + {stat = NC_ENCZARR; goto done;} + if(sscanf(nczarr_version,"%lu.%lu.%lu", &zinfo->zarr.nczarr_version.major, &zinfo->zarr.nczarr_version.minor, - &zinfo->zarr.nczarr_version.release); - } + &zinfo->zarr.nczarr_version.release) == 0) + {stat = NC_ENCZARR; goto done;} /* Load auth info from rc file */ if((stat = ncuriparse(nc->path,&uri))) goto done; @@ -169,6 +162,8 @@ ncz_open_dataset(NC_FILE_INFO_T* file, const char** controls) } done: + nullfree(zarr_format); + nullfree(nczarr_version); ncurifree(uri); nclistfreeall(modeargs); if(json) NCJreclaim(json); @@ -176,7 +171,6 @@ ncz_open_dataset(NC_FILE_INFO_T* file, const char** controls) return ZUNTRACE(stat); } - /** * @internal Determine whether file is netCDF-4. * @@ -276,9 +270,9 @@ ncz_open_rootgroup(NC_FILE_INFO_T* dataset) for(i=0;icontents);i+=2) { const NCjson* key = nclistget(json->contents,i); const NCjson* value = nclistget(json->contents,i+1); - if(strcmp(key->value,"zarr_format")==0) { + if(strcmp(NCJstring(key),"zarr_format")==0) { int zversion; - if(sscanf(value->value,"%d",&zversion)!=1) + if(sscanf(NCJstring(value),"%d",&zversion)!=1) {stat = NC_ENOTNC; goto done;} /* Verify against the dataset */ if(zversion != zfile->zarr.zarr_version) @@ -294,6 +288,7 @@ ncz_open_rootgroup(NC_FILE_INFO_T* dataset) } #endif +#if 0 /** @internal Rewrite attributes into a group or var @param map - [in] the map object for storage @@ -331,19 +326,26 @@ ncz_unload_jatts(NCZ_FILE_INFO_T* zinfo, NC_OBJ* container, NCjson* jattrs, NCjs if((stat = nczm_concat(fullpath,ZATTRS,&akey))) goto done; + /* Always write as V2 */ + + { + NCjson* k = NULL; + NCjson* v = NULL; + /* remove any previous version */ + if(NCJremove(jattrs,NCZ_V2_ATTRS,&k,&v) == NC_NOERR) { + NCJreclaim(k); NCJreclaim(v); + } + } + + if(!(zinfo->controls.flags & FLAG_PUREZARR)) { + /* Insert the jtypes into the set of attributes */ + if((stat = NCJinsert(jattrs,NCZ_V2_ATTRS,jtypes))) goto done; + } + /* Upload the .zattrs object */ if((stat=NCZ_uploadjson(map,tkey,jattrs))) goto done; - if(!(zinfo->controls.flags & FLAG_PUREZARR)) { - /* Construct the path to the .nczattr object */ - if((stat = nczm_concat(fullpath,NCZATTRS,&tkey))) - goto done; - /* Upload the .nczattr object */ - if((stat=NCZ_uploadjson(map,tkey,jtypes))) - goto done; - } - done: if(stat) { NCJreclaim(jattrs); @@ -354,6 +356,7 @@ ncz_unload_jatts(NCZ_FILE_INFO_T* zinfo, NC_OBJ* container, NCjson* jattrs, NCjs nullfree(tkey); return stat; } +#endif static const char* controllookup(const char** envv_controls, const char* key) diff --git a/libnczarr/zarr.h b/libnczarr/zarr.h index 5e5ac5e72a..0922e3c012 100644 --- a/libnczarr/zarr.h +++ b/libnczarr/zarr.h @@ -19,6 +19,7 @@ extern int ncz_create_dataset(NC_FILE_INFO_T*, NC_GRP_INFO_T*, const char** cont extern int ncz_open_dataset(NC_FILE_INFO_T*, const char** controls); extern int ncz_del_attr(NC_FILE_INFO_T* file, NC_OBJ* container, const char* name); +/* HDF5 Mimics */ extern int NCZ_isnetcdf4(struct NC_FILE_INFO*); extern int NCZ_get_libversion(unsigned long* majorp, unsigned long* minorp,unsigned long* releasep); extern int NCZ_get_superblock(NC_FILE_INFO_T* file, int* superblockp); @@ -42,7 +43,7 @@ extern int ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container); extern int ncz_read_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp); extern int ncz_read_file(NC_FILE_INFO_T* file); extern int ncz_write_var(NC_VAR_INFO_T* var); -extern int ncz_create_superblock(NCZ_FILE_INFO_T* zinfo); +extern int ncz_read_superblock(NC_FILE_INFO_T* zinfo, char** nczarrvp, char** zarrfp); /* zutil.c */ extern int NCZ_grpkey(const NC_GRP_INFO_T* grp, char** pathp); @@ -52,6 +53,7 @@ extern int ncz_splitkey(const char* path, NClist* segments); extern int NCZ_readdict(NCZMAP* zmap, const char* key, NCjson** jsonp); extern int NCZ_readarray(NCZMAP* zmap, const char* key, NCjson** jsonp); extern int ncz_zarr_type_name(nc_type nctype, int little, const char** znamep); +extern int ncz_nctype2typeinfo(const char* snctype, nc_type* nctypep); extern int ncz_dtype2typeinfo(const char* dtype, nc_type* nctypep, int* endianness); extern int ncz_fill_value_sort(nc_type nctype, int*); extern int NCZ_createobject(NCZMAP* zmap, const char* key, size64_t size); diff --git a/libnczarr/zcvt.c b/libnczarr/zcvt.c index cb0d388cb9..374eda5b00 100644 --- a/libnczarr/zcvt.c +++ b/libnczarr/zcvt.c @@ -33,35 +33,36 @@ NCZ_convert1(NCjson* jsrc, nc_type dsttype, unsigned char* memory) /* Convert the incoming jsrc string to a restricted set of values */ switch (jsrc->sort) { case NCJ_INT: /* convert to (u)int64 */ - if(jsrc->value[0] == '-') { - if(sscanf(jsrc->value,"%lld",&zcvt.int64v) != 1) + if(NCJstring(jsrc)[0] == '-') { + if(sscanf(NCJstring(jsrc),"%lld",&zcvt.int64v) != 1) {stat = NC_EINVAL; goto done;} srctype = NC_INT64; } else { - if(sscanf(jsrc->value,"%llu",&zcvt.uint64v) != 1) + if(sscanf(NCJstring(jsrc),"%llu",&zcvt.uint64v) != 1) {stat = NC_EINVAL; goto done;} srctype = NC_UINT64; } break; + case NCJ_STRING: case NCJ_DOUBLE: /* Capture nan and infinity values */ - if(strcasecmp(jsrc->value,"nan")==0) + if(strcasecmp(NCJstring(jsrc),"nan")==0) zcvt.float64v = NAN; - else if(strcasecmp(jsrc->value,"-nan")==0) + else if(strcasecmp(NCJstring(jsrc),"-nan")==0) zcvt.float64v = - NAN; - else if(strcasecmp(jsrc->value,"infinity")==0) + else if(strcasecmp(NCJstring(jsrc),"infinity")==0) zcvt.float64v = INFINITY; - else if(strcasecmp(jsrc->value,"-infinity")==0) + else if(strcasecmp(NCJstring(jsrc),"-infinity")==0) zcvt.float64v = (- INFINITY); else { - if(sscanf(jsrc->value,"%lg",&zcvt.float64v) != 1) + if(sscanf(NCJstring(jsrc),"%lg",&zcvt.float64v) != 1) {stat = NC_EINVAL; goto done;} } srctype = NC_DOUBLE; break; case NCJ_BOOLEAN: srctype = NC_UINT64; - if(strcasecmp(jsrc->value,"false")==0) + if(strcasecmp(NCJstring(jsrc),"false")==0) zcvt.uint64v = 0; else zcvt.uint64v = 1; @@ -373,12 +374,12 @@ NCZ_stringconvert(nc_type typeid, size_t len, void* data0, NCjson** jdatap) default: stat = NC_EINTERNAL; goto done; } if(special) {nullfree(str); str = strdup(special);} - jvalue->value = str; + NCJstring(jvalue) = str; str = NULL; if(len == 1) jdata = jvalue; else - nclistpush(jdata->contents,jvalue); + NCJappend(jdata,jvalue); jvalue = NULL; src += typelen; } diff --git a/libnczarr/zdebug.h b/libnczarr/zdebug.h index a7cbd5be2d..301c55f2c8 100644 --- a/libnczarr/zdebug.h +++ b/libnczarr/zdebug.h @@ -28,8 +28,8 @@ EXTERNL int zthrow(int err, const char* fname, const char* fcn, int line); #ifdef ZTRACING #define ZTRACE(level,fmt,...) nctrace((level),__func__,fmt,##__VA_ARGS__) #define ZTRACEMORE(level,fmt,...) nctracemore((level),fmt,##__VA_ARGS__) -#define ZUNTRACE(e) ncuntrace(__func__,(e),NULL) -#define ZUNTRACEX(e,fmt,...) ncuntrace(__func__,(e),fmt,##__VA_ARGS__) +#define ZUNTRACE(e) ncuntrace(__func__,THROW(e),NULL) +#define ZUNTRACEX(e,fmt,...) ncuntrace(__func__,THROW(e),fmt,##__VA_ARGS__) #else #define ZTRACE(level,fmt,...) #define ZTRACEMORE(level,fmt,...) diff --git a/libnczarr/zincludes.h b/libnczarr/zincludes.h index 893c2b04c0..c6df092d7e 100644 --- a/libnczarr/zincludes.h +++ b/libnczarr/zincludes.h @@ -45,6 +45,7 @@ extern "C" { #include "nclog.h" #include "ncrc.h" #include "ncindex.h" +#include "ncjson.h" #include "zmap.h" #include "zinternal.h" @@ -53,7 +54,6 @@ extern "C" { #include "zodom.h" #include "zchunking.h" #include "zcache.h" -#include "zjson.h" #include "zarr.h" #include "zdebug.h" diff --git a/libnczarr/zinternal.h b/libnczarr/zinternal.h index ee34cd198d..a666181874 100644 --- a/libnczarr/zinternal.h +++ b/libnczarr/zinternal.h @@ -12,13 +12,13 @@ #ifndef ZINTERNAL_H #define ZINTERNAL_H -#define ZARRVERSION 2 +#define ZARRVERSION "2" -/* NCZARRVERSION is ndependent of Zarr version, +/* NCZARRVERSION is independent of Zarr version, but NCZARRVERSION => ZARRVERSION */ -#define NCZARRVERSION "1.0.0" +#define NCZARRVERSION "2.0.0" -/* These have to do with creating chuncked datasets in ZARR. */ +/* These have to do with creating chunked datasets in ZARR. */ #define NCZ_CHUNKSIZE_FACTOR (10) #define NCZ_MIN_CHUNK_SIZE (2) @@ -42,19 +42,46 @@ # endif #endif +/* V1 reserved objects */ #define NCZMETAROOT "/.nczarr" #define NCZGROUP ".nczgroup" #define NCZARRAY ".nczarray" #define NCZATTRS ".nczattrs" - /* Deprecated */ #define NCZVARDEP ".nczvar" #define NCZATTRDEP ".nczattr" +#define ZMETAROOT "/.zgroup" #define ZGROUP ".zgroup" #define ZATTRS ".zattrs" #define ZARRAY ".zarray" +/* V2 Reserved Attributes */ +/* +Inserted into /.zgroup +_NCZARR_SUPERBLOCK: {"version": "2.0.0"} +Inserted into any .zgroup +"_NCZARR_GROUP": "{ +\"dimensions\": {\"d1\": \"1\", \"d2\": \"1\",...} +\"variables\": [\"v1\", \"v2\", ...] +\"groups\": [\"g1\", \"g2\", ...] +}" +Inserted into any .zarray +"_NCZARR_ARRAY": "{ +\"dimensions\": [\"/g1/g2/d1\", \"/d2\",...] +\"storage\": \"scalar\"|\"contiguous\"|\"compact\"|\"chunked\" +}" +Inserted into any .zattrs ? or should it go into the container? +"_NCZARR_ATTRS": "{ +\"types\": {\"attr1\": \"path,&truepath))) goto done; - -#ifdef CHECKNESTEDDATASETS - if(isnesteddataset(truepath)) - {stat = NC_ENCZARR; goto done;} -#endif + if((stat = NCpathcanonical(url->path,&canonpath))) goto done; - /* Build the z4 state */ + /* Build the zmap state */ if((zfmap = calloc(1,sizeof(ZFMAP))) == NULL) {stat = NC_ENOMEM; goto done;} @@ -217,8 +213,8 @@ zfilecreate(const char *path, int mode, size64_t flags, void* parameters, NCZMAP /* create => NC_WRITE */ zfmap->map.mode = mode; zfmap->map.api = &zapi; - zfmap->root = truepath; - truepath = NULL; + zfmap->root = canonpath; + canonpath = NULL; /* If NC_CLOBBER, then delete below file tree */ if(!fIsSet(mode,NC_NOCLOBBER)) @@ -234,7 +230,7 @@ zfilecreate(const char *path, int mode, size64_t flags, void* parameters, NCZMAP done: ncurifree(url); - nullfree(truepath); + nullfree(canonpath); if(stat) zfileclose((NCZMAP*)zfmap,1); return ZUNTRACE(stat); @@ -253,7 +249,7 @@ static int zfileopen(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** mapp) { int stat = NC_NOERR; - char* truepath = NULL; + char* canonpath = NULL; ZFMAP* zfmap = NULL; NCURI*url = NULL; @@ -272,9 +268,9 @@ zfileopen(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** {stat = NC_EURL; goto done;} /* Canonicalize the root path */ - if((stat = nczm_canonicalpath(url->path,&truepath))) goto done; + if((stat = NCpathcanonical(url->path,&canonpath))) goto done; - /* Build the z4 state */ + /* Build the zmap state */ if((zfmap = calloc(1,sizeof(ZFMAP))) == NULL) {stat = NC_ENOMEM; goto done;} @@ -283,8 +279,8 @@ zfileopen(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** zfmap->map.flags = flags; zfmap->map.mode = mode; zfmap->map.api = (NCZMAP_API*)&zapi; - zfmap->root = truepath; - truepath = NULL; + zfmap->root = canonpath; + canonpath = NULL; /* Verify root dir exists */ if((stat = platformopendir(zfmap,zfmap->root))) @@ -296,7 +292,7 @@ zfileopen(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** done: ncurifree(url); - nullfree(truepath); + nullfree(canonpath); if(stat) zfileclose((NCZMAP*)zfmap,0); return ZUNTRACE(stat); } @@ -314,7 +310,7 @@ zfileexists(NCZMAP* map, const char* key) ZTRACE(5,"map=%s key=%s",zfmap->map.url,key); switch(stat=zflookupobj(zfmap,key,&fd)) { case NC_NOERR: break; - case NC_ENOTFOUND: stat = NC_EEMPTY; + case NC_ENOOBJECT: stat = NC_EEMPTY; case NC_EEMPTY: break; default: break; } @@ -337,7 +333,7 @@ zfilelen(NCZMAP* map, const char* key, size64_t* lenp) /* Get file size */ if((stat=platformseek(zfmap, &fd, SEEK_END, &len))) goto done; break; - case NC_ENOTFOUND: stat = NC_EEMPTY; + case NC_ENOOBJECT: stat = NC_EEMPTY; case NC_EEMPTY: break; default: break; } @@ -367,7 +363,7 @@ zfileread(NCZMAP* map, const char* key, size64_t start, size64_t count, void* co if((stat = platformseek(zfmap, &fd, SEEK_SET, &start))) goto done; if((stat = platformread(zfmap, &fd, count, content))) goto done; break; - case NC_ENOTFOUND: stat = NC_EEMPTY; + case NC_ENOOBJECT: stat = NC_EEMPTY; case NC_EEMPTY: break; default: break; } @@ -393,8 +389,9 @@ zfilewrite(NCZMAP* map, const char* key, size64_t start, size64_t count, const v #endif switch (stat = zflookupobj(zfmap,key,&fd)) { - case NC_ENOTFOUND: + case NC_ENOOBJECT: case NC_EEMPTY: + stat = NC_NOERR; /* Create the directories leading to this */ if((stat = zfcreategroup(zfmap,key,SKIPLAST))) goto done; /* Create truepath */ @@ -427,7 +424,7 @@ zfileclose(NCZMAP* map, int delete) /* Delete the subtree below the root and the root */ if(delete) { stat = platformdelete(zfmap,zfmap->root,1); - unlink(zfmap->root); + zfunlink(zfmap->root); } nczm_clear(map); nullfree(zfmap->root); @@ -447,7 +444,7 @@ zfilesearch(NCZMAP* map, const char* prefixkey, NClist* matches) { int stat = NC_NOERR; ZFMAP* zfmap = (ZFMAP*)map; - char* truepath = NULL; + char* fullpath = NULL; NClist* nextlevel = nclistnew(); NCbytes* buf = ncbytesnew(); @@ -455,17 +452,17 @@ zfilesearch(NCZMAP* map, const char* prefixkey, NClist* matches) /* Make the root path be true */ if(prefixkey == NULL || strlen(prefixkey)==0 || strcmp(prefixkey,"/")==0) - truepath = strdup(zfmap->root); - else if((stat = nczm_concat(zfmap->root,prefixkey,&truepath))) goto done; + fullpath = strdup(zfmap->root); + else if((stat = nczm_concat(zfmap->root,prefixkey,&fullpath))) goto done; /* get names of the next level path entries */ - switch (stat = platformdircontent(zfmap, truepath, nextlevel)) { + switch (stat = platformdircontent(zfmap, fullpath, nextlevel)) { case NC_NOERR: /* ok */ break; case NC_EEMPTY: /* not a dir */ stat = NC_NOERR; goto done; - case NC_ENOTFOUND: /* does not exist */ + case NC_ENOOBJECT: default: goto done; } @@ -477,13 +474,23 @@ zfilesearch(NCZMAP* map, const char* prefixkey, NClist* matches) done: nclistfreeall(nextlevel); ncbytesfree(buf); - nullfree(truepath); + nullfree(fullpath); return ZUNTRACEX(stat,"|matches|=%d",(int)nclistlength(matches)); } /**************************************************/ /* Utilities */ +static void +zfunlink(const char* canonpath) +{ + char* local = NULL; + if((local = NCpathcvt(canonpath))==NULL) goto done; + unlink(local); +done: + nullfree(local); +} + /* Lookup a group by parsed path (segments)*/ /* Return NC_EEMPTY if not found, NC_EINVAL if not a directory; create if create flag is set */ static int @@ -519,7 +526,7 @@ zfcreategroup(ZFMAP* zfmap, const char* key, int nskip) /* Lookup an object @return NC_NOERR if found and is a content-bearing object @return NC_EEMPTY if exists but is not-content-bearing -@return NC_ENOTFOUND if not found +@return NC_ENOOBJECT if not found */ static int zflookupobj(ZFMAP* zfmap, const char* key, FD* fd) @@ -582,6 +589,8 @@ zffullpath(ZFMAP* zfmap, const char* key, char** pathp) size_t klen, pxlen, flen; char* path = NULL; + ZTRACE(6,"map=%s key=%s",zfmap->map.url,key); + klen = nulllen(key); pxlen = strlen(zfmap->root); flen = klen+pxlen+1+1; @@ -597,7 +606,7 @@ zffullpath(ZFMAP* zfmap, const char* key, char** pathp) if(pathp) {*pathp = path; path = NULL;} done: nullfree(path) - return stat; + return ZUNTRACEX(stat,"path=%s",(pathp?*pathp:"null")); } static int @@ -621,51 +630,46 @@ zfparseurl(const char* path0, NCURI** urip) static int platformerr(int err) { - switch (err) { - case ENOENT: err = NC_ENOTFOUND; break; /* File does not exist */ + ZTRACE(6,"err=%d",err); + switch (err) { + case ENOENT: err = NC_ENOOBJECT; break; /* File does not exist */ case ENOTDIR: err = NC_EEMPTY; break; /* no content */ case EACCES: err = NC_EAUTH; break; /* file permissions */ case EPERM: err = NC_EAUTH; break; /* ditto */ default: break; } - return err; + return ZUNTRACE(err); } /* Test type of the specified file. @return NC_NOERR if found and is a content-bearing object (file) @return NC_EEMPTY if exists but is not-content-bearing (a directory) -@return NC_ENOTFOUND if not found +@return NC_ENOOBJECT if not found */ static int -platformtestcontentbearing(ZFMAP* zfmap, const char* truepath) +platformtestcontentbearing(ZFMAP* zfmap, const char* canonpath) { int ret = 0; struct stat buf; - char* local = NULL; - ZTRACE(6,"map=%s truepath=%s",zfmap->map.url,truepath); + ZTRACE(6,"map=%s canonpath=%s",zfmap->map.url,canonpath); - /* Localize */ - if((ret = nczm_localize(truepath,&local,LOCALIZE))) goto done; - errno = 0; - ret = NCstat(local, &buf); - ZTRACEMORE(6,"stat: local=%s ret=%d, errno=%d st_mode=%d",local,ret,errno,buf.st_mode); + ret = NCstat(canonpath, &buf); + ZTRACEMORE(6,"\tstat: ret=%d, errno=%d st_mode=%d",ret,errno,buf.st_mode); if(ret < 0) { ret = platformerr(errno); } else if(S_ISDIR(buf.st_mode)) { ret = NC_EEMPTY; } else ret = NC_NOERR; -done: - nullfree(local); errno = 0; return ZUNTRACE(ret); } /* Create a file */ static int -platformcreatefile(ZFMAP* zfmap, const char* truepath, FD* fd) +platformcreatefile(ZFMAP* zfmap, const char* canonpath, FD* fd) { int stat = NC_NOERR; int ioflags = 0; @@ -673,7 +677,7 @@ platformcreatefile(ZFMAP* zfmap, const char* truepath, FD* fd) int mode = zfmap->map.mode; int permissions = NC_DEFAULT_ROPEN_PERMS; - ZTRACE(6,"map=%s truepath=%s",zfmap->map.url,truepath); + ZTRACE(6,"map=%s canonpath=%s",zfmap->map.url,canonpath); errno = 0; if(!fIsSet(mode, NC_WRITE)) @@ -694,10 +698,10 @@ platformcreatefile(ZFMAP* zfmap, const char* truepath, FD* fd) if(fIsSet(mode,NC_WRITE)) createflags = (ioflags|O_CREAT); - /* Try to create file (will also localize) */ - fd->fd = NCopen3(truepath, createflags, permissions); + /* Try to create file (will also NCpathcvt) */ + fd->fd = NCopen3(canonpath, createflags, permissions); if(fd->fd < 0) { /* could not create */ - stat = platformerr(errno); + stat = platformerr(errno); goto done; /* could not open */ } done: @@ -707,14 +711,14 @@ platformcreatefile(ZFMAP* zfmap, const char* truepath, FD* fd) /* Open a file; fail if it does not exist */ static int -platformopenfile(ZFMAP* zfmap, const char* truepath, FD* fd) +platformopenfile(ZFMAP* zfmap, const char* canonpath, FD* fd) { int stat = NC_NOERR; int ioflags = 0; int mode = zfmap->map.mode; int permissions = 0; - ZTRACE(6,"map=%s truepath=%s",zfmap->map.url,truepath); + ZTRACE(6,"map=%s canonpath=%s",zfmap->map.url,canonpath); errno = 0; if(!fIsSet(mode, NC_WRITE)) { @@ -729,14 +733,16 @@ platformopenfile(ZFMAP* zfmap, const char* truepath, FD* fd) #endif #ifdef VERIFY - if(!verify(truepath,!FLAG_ISDIR)) + if(!verify(canonpath,!FLAG_ISDIR)) assert(!"expected file, have dir"); #endif /* Try to open file (will localize) */ - fd->fd = NCopen3(truepath, ioflags, permissions); + fd->fd = NCopen3(canonpath, ioflags, permissions); if(fd->fd < 0) - {stat = platformerr(errno); goto done;} /* could not open */ + { +fprintf(stderr,"xxx: canonpath=%s\n",canonpath); +stat = platformerr(errno); goto done;} /* could not open */ done: errno = 0; return ZUNTRACEX(stat,"fd=%d",(fd?fd->fd:-1)); @@ -744,24 +750,24 @@ platformopenfile(ZFMAP* zfmap, const char* truepath, FD* fd) /* Create a dir */ static int -platformcreatedir(ZFMAP* zfmap, const char* truepath) +platformcreatedir(ZFMAP* zfmap, const char* canonpath) { int ret = NC_NOERR; int mode = zfmap->map.mode; - ZTRACE(6,"map=%s truepath=%s",zfmap->map.url,truepath); + ZTRACE(6,"map=%s canonpath=%s",zfmap->map.url,canonpath); errno = 0; /* Try to access file as if it exists */ - ret = NCaccess(truepath,ACCESS_MODE_EXISTS); + ret = NCaccess(canonpath,ACCESS_MODE_EXISTS); if(ret < 0) { /* it does not exist, then it can be anything */ if(fIsSet(mode,NC_WRITE)) { /* Try to create it */ /* Create the directory using mkdir */ - if(NCmkdir(truepath,NC_DEFAULT_DIR_PERMS) < 0) + if(NCmkdir(canonpath,NC_DEFAULT_DIR_PERMS) < 0) {ret = platformerr(errno); goto done;} /* try to access again */ - ret = NCaccess(truepath,ACCESS_MODE_EXISTS); + ret = NCaccess(canonpath,ACCESS_MODE_EXISTS); if(ret < 0) {ret = platformerr(errno); goto done;} } else @@ -775,15 +781,15 @@ platformcreatedir(ZFMAP* zfmap, const char* truepath) /* Open a dir; fail if it does not exist */ static int -platformopendir(ZFMAP* zfmap, const char* truepath) +platformopendir(ZFMAP* zfmap, const char* canonpath) { int ret = NC_NOERR; - ZTRACE(6,"map=%s truepath=%s",zfmap->map.url,truepath); + ZTRACE(6,"map=%s canonpath=%s",zfmap->map.url,canonpath); errno = 0; /* Try to access file as if it exists */ - ret = NCaccess(truepath,ACCESS_MODE_EXISTS); + ret = NCaccess(canonpath,ACCESS_MODE_EXISTS); if(ret < 0) {ret = platformerr(errno); goto done;} done: @@ -806,7 +812,7 @@ There are several possibilities: #ifdef _WIN32 static int -platformdircontent(ZFMAP* zfmap, const char* truepath, NClist* contents) +platformdircontent(ZFMAP* zfmap, const char* canonpath, NClist* contents) { int ret = NC_NOERR; errno = 0; @@ -817,20 +823,20 @@ platformdircontent(ZFMAP* zfmap, const char* truepath, NClist* contents) size_t len; char* d = NULL; - ZTRACE(6,"map=%s truepath=%s",zfmap->map.url,truepath); + ZTRACE(6,"map=%s canonpath=%s",zfmap->map.url,canonpath); - switch (ret = platformtestcontentbearing(zfmap, truepath)) { + switch (ret = platformtestcontentbearing(zfmap, canonpath)) { case NC_EEMPTY: ret = NC_NOERR; break; /* directory */ case NC_NOERR: ret = NC_EEMPTY; goto done; default: goto done; } /* We need to process the path to make it work with FindFirstFile */ - len = strlen(truepath); + len = strlen(canonpath); /* Need to terminate path with '/''*' */ ffpath = (char*)malloc(len+2+1); - memcpy(ffpath,truepath,len); - if(truepath[len-1] != '/') { + memcpy(ffpath,canonpath,len); + if(canonpath[len-1] != '/') { ffpath[len] = '/'; len++; } @@ -838,7 +844,8 @@ platformdircontent(ZFMAP* zfmap, const char* truepath, NClist* contents) ffpath[len] = '\0'; /* localize it */ - if((ret = nczm_localize(ffpath,&lpath,LOCALIZE))) goto done; + if((lpath = NCpathcvt(ffpath))==NULL) + {ret = NC_ENOMEM; goto done;} dir = FindFirstFile(lpath, &FindFileData); if(dir == INVALID_HANDLE_VALUE) { /* Distinquish not-a-directory from no-matching-file */ @@ -873,21 +880,21 @@ platformdircontent(ZFMAP* zfmap, const char* truepath, NClist* contents) #else /*!_WIN32*/ static int -platformdircontent(ZFMAP* zfmap, const char* truepath, NClist* contents) +platformdircontent(ZFMAP* zfmap, const char* canonpath, NClist* contents) { int ret = NC_NOERR; errno = 0; DIR* dir = NULL; - ZTRACE(6,"map=%s truepath=%s",zfmap->map.url,truepath); + ZTRACE(6,"map=%s canonpath=%s",zfmap->map.url,canonpath); - switch (ret = platformtestcontentbearing(zfmap, truepath)) { + switch (ret = platformtestcontentbearing(zfmap, canonpath)) { case NC_EEMPTY: ret = NC_NOERR; break; /* directory */ case NC_NOERR: ret = NC_EEMPTY; goto done; default: goto done; } - dir = NCopendir(truepath); + dir = NCopendir(canonpath); if(dir == NULL) {ret = platformerr(errno); goto done;} for(;;) { @@ -972,17 +979,18 @@ platformdeleter(ZFMAP* zfmap, NClist* segments, int depth) #endif /*0*/ static int -platformdeleter(ZFMAP* zfmap, NCbytes* truepath, int delroot, int depth) +platformdeleter(ZFMAP* zfmap, NCbytes* canonpath, int delroot, int depth) { int ret = NC_NOERR; int i; - NClist* contents = nclistnew(); - size_t tpathlen = ncbyteslength(truepath); + NClist* subfiles = nclistnew(); + size_t tpathlen = ncbyteslength(canonpath); char* local = NULL; - ZTRACE(6,"map=%s truepath=%s delroot=%d depth=%d",zfmap->map.url,truepath,delroot,depth); + local = ncbytescontents(canonpath); + ZTRACE(6,"map=%s canonpath=%s delroot=%d depth=%d",zfmap->map.url,local,delroot,depth); - ret = platformdircontent(zfmap, ncbytescontents(truepath), contents); + ret = platformdircontent(zfmap, local, subfiles); #ifdef DEBUG {int i; fprintf(stderr,"xxx: contents:\n"); @@ -993,21 +1001,20 @@ platformdeleter(ZFMAP* zfmap, NCbytes* truepath, int delroot, int depth) #endif switch (ret) { case NC_NOERR: /* recurse to remove levels below */ - for(i=0;i 0 || delroot) { - /* localize and delete */ - if((ret = nczm_localize(ncbytescontents(truepath),&local,LOCALIZE))) goto done; #ifdef DEBUG -fprintf(stderr,"xxx: remove: %s\n",local); +fprintf(stderr,"xxx: remove: %s\n",canonpath); #endif if(NCrmdir(local) < 0) { /* kill this dir */ #ifdef DEBUG @@ -1020,11 +1027,8 @@ fprintf(stderr,"xxx: remove: errno=%d|%s\n",errno,nc_strerror(errno)); break; case NC_EEMPTY: /* Not a directory */ ret = NC_NOERR; - /* localize and delete */ - if(local) {nullfree(local); local = NULL;} - if((ret = nczm_localize(ncbytescontents(truepath),&local,LOCALIZE))) goto done; #ifdef DEBUG -fprintf(stderr,"xxx: remove: %s\n",local); +fprintf(stderr,"xxx: remove: %s\n",canonpath); #endif if(NCremove(local) < 0) {/* kill this file */ #ifdef DEBUG @@ -1040,10 +1044,10 @@ fprintf(stderr,"xxx: remove: errno=%d|%s\n",errno,nc_strerror(errno)); } done: - nclistfreeall(contents); - nullfree(local); - ncbytessetlength(truepath,tpathlen); - ncbytesnull(truepath); + errno = 0; + nclistfreeall(subfiles); + ncbytessetlength(canonpath,tpathlen); + ncbytesnull(canonpath); return ZUNTRACE(ret); } @@ -1052,17 +1056,17 @@ static int platformdelete(ZFMAP* zfmap, const char* rootpath, int delroot) { int stat = NC_NOERR; - NCbytes* truepath = ncbytesnew(); + NCbytes* canonpath = ncbytesnew(); ZTRACE(6,"map=%s rootpath=%s delroot=%d",zfmap->map.url,rootpath,delroot); if(rootpath == NULL || strlen(rootpath) == 0) goto done; - ncbytescat(truepath,rootpath); + ncbytescat(canonpath,rootpath); if(rootpath[strlen(rootpath)-1] == '/') /* elide trailing '/' */ - ncbytessetlength(truepath,ncbyteslength(truepath)-1); - if((stat = platformdeleter(zfmap,truepath,delroot,0))) goto done; + ncbytessetlength(canonpath,ncbyteslength(canonpath)-1); + if((stat = platformdeleter(zfmap,canonpath,delroot,0))) goto done; done: - ncbytesfree(truepath); + ncbytesfree(canonpath); errno = 0; return ZUNTRACE(stat); } @@ -1079,7 +1083,7 @@ platformseek(ZFMAP* zfmap, FD* fd, int pos, size64_t* sizep) ZTRACE(6,"map=%s fd=%d pos=%d",zfmap->map.url,(fd?fd->fd:-1),pos); errno = 0; - ret = fstat(fd->fd, &statbuf); + ret = NCfstat(fd->fd, &statbuf); if(ret < 0) {ret = platformerr(errno); goto done;} if(sizep) size = *sizep; else size = 0; @@ -1109,6 +1113,7 @@ platformread(ZFMAP* zfmap, FD* fd, size64_t count, void* content) readpoint += red; } done: + errno = 0; return ZUNTRACE(stat); } @@ -1195,21 +1200,21 @@ verify(const char* path, int isdir) #endif #if 0 -/* Return NC_EINVAL if path does not exist; els 1/0 in isdirp and local path in truepathp */ +/* Return NC_EINVAL if path does not exist; els 1/0 in isdirp and local path in canonpathp */ static int -testifdir(const char* path, int* isdirp, char** truepathp) +testifdir(const char* path, int* isdirp, char** canonpathp) { int ret = NC_NOERR; char* tmp = NULL; - char* truepath = NULL; + char* canonpath = NULL; struct stat statbuf; /* Make path be windows compatible */ if((ret = nczm_fixpath(path,&tmp))) goto done; - if((truepath = NCpathcvt(tmp))==NULL) {ret = NC_ENOMEM; goto done;} + if((canonpath = NCpathcvt(tmp))==NULL) {ret = NC_ENOMEM; goto done;} errno = 0; - ret = NCstat(truepath, &statbuf); + ret = NCstat(canonpath, &statbuf); if(ret < 0) { if(errno == ENOENT) ret = NC_ENOTFOUND; /* path does not exist */ @@ -1221,11 +1226,11 @@ testifdir(const char* path, int* isdirp, char** truepathp) if(isdirp) { if(S_ISDIR(statbuf.st_mode)) {*isdirp = 1;} else {*isdirp = 0;} } - if(truepathp) {*truepathp = truepath; truepath = NULL;} + if(canonpathp) {*canonpathp = canonpath; canonpath = NULL;} done: errno = 0; nullfree(tmp); - nullfree(truepath); + nullfree(canonpath); return ZUNTRACE(ret); } #endif /* 0 */ diff --git a/libnczarr/zmap_s3sdk.c b/libnczarr/zmap_s3sdk.c index ffed086af5..389134e441 100644 --- a/libnczarr/zmap_s3sdk.c +++ b/libnczarr/zmap_s3sdk.c @@ -165,7 +165,7 @@ zs3create(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** stat = NC_NOERR; /* which is what we want */ errclear(z3map); break; - case NC_NOERR: stat = NC_EFOUND; goto done; /* already exists */ + case NC_NOERR: stat = NC_EOBJECT; goto done; /* already exists */ default: reporterr(z3map); goto done; } if(!stat) { @@ -234,8 +234,8 @@ zs3open(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** m if((stat = NCZ_s3sdkgetkeys(z3map->s3client,z3map->s3.bucket,z3map->s3.rootkey,&nkeys,NULL,&z3map->errmsg))) goto done; if(nkeys == 0) { - /* dataset does not actually exist; we choose to return ENOTFOUND instead of EEMPTY */ - stat = NC_ENOTFOUND; + /* dataset does not actually exist; we choose to return ENOOBJECT instead of EEMPTY */ + stat = NC_ENOOBJECT; goto done; } if(mapp) *mapp = (NCZMAP*)z3map; diff --git a/libnczarr/zmap_zip.c b/libnczarr/zmap_zip.c index 289b1e44f0..4cb375be3a 100755 --- a/libnczarr/zmap_zip.c +++ b/libnczarr/zmap_zip.c @@ -125,7 +125,7 @@ zipcreate(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** {stat = NC_EURL; goto done;} /* Canonicalize the root path */ - if((stat = nczm_canonicalpath(url->path,&truepath))) goto done; + if((stat = NCpathcanonical(url->path,&truepath))) goto done; /* Extract the dataset name */ if((stat = nczm_basename(truepath,&dataset))) goto done; @@ -210,7 +210,7 @@ zipopen(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** m {stat = NC_EURL; goto done;} /* Canonicalize the root path */ - if((stat = nczm_canonicalpath(url->path,&truepath))) goto done; + if((stat = NCpathcanonical(url->path,&truepath))) goto done; /* Build the zz state */ if((zzmap = calloc(1,sizeof(ZZMAP))) == NULL) @@ -278,7 +278,7 @@ zipexists(NCZMAP* map, const char* key) ZTRACE(6,"map=%s key=%s",map->url,key); switch(stat=zzlookupobj(zzmap,key,&zindex)) { case NC_NOERR: break; - case NC_ENOTFOUND: stat = NC_EEMPTY; break; + case NC_ENOOBJECT: stat = NC_EEMPTY; break; case NC_EEMPTY: break; default: break; } @@ -299,7 +299,7 @@ ziplen(NCZMAP* map, const char* key, size64_t* lenp) case NC_NOERR: if((stat = zzlen(zzmap,zindex,&len))) goto done; break; - case NC_ENOTFOUND: stat = NC_EEMPTY; len = 0; break; + case NC_ENOOBJECT: stat = NC_EEMPTY; len = 0; break; case NC_EEMPTY: len = 0; break; /* |dir|==0 */ default: goto done; } @@ -328,7 +328,7 @@ zipread(NCZMAP* map, const char* key, size64_t start, size64_t count, void* cont switch(stat = zzlookupobj(zzmap,key,&zindex)) { case NC_NOERR: break; - case NC_ENOTFOUND: stat = NC_EEMPTY; /* fall thru */ + case NC_ENOOBJECT: stat = NC_EEMPTY; /* fall thru */ case NC_EEMPTY: /* its a dir; fall thru*/ default: goto done; } @@ -395,10 +395,10 @@ zipwrite(NCZMAP* map, const char* key, size64_t start, size64_t count, const voi switch(stat = zzlookupobj(zzmap,key,&zindex)) { case NC_NOERR: - stat = NC_EFOUND; //goto done; /* Zip files are write once */ + stat = NC_EOBJECT; //goto done; /* Zip files are write once */ zflags |= ZIP_FL_OVERWRITE; break; - case NC_ENOTFOUND: stat = NC_NOERR; break; + case NC_ENOOBJECT: stat = NC_NOERR; break; case NC_EEMPTY: /* its a dir; fall thru */ default: goto done; } @@ -621,7 +621,7 @@ zzcreategroup(ZZMAP* zzmap, const char* key, int nskip) /* open and/or create the directory */ if((zindex = zip_dir_add(zzmap->archive, ncbytescontents(path), zipflags))<0) { switch(stat = zipmaperr(zzmap)) { - case NC_EFOUND: stat = NC_NOERR; break; /* ok */ + case NC_EOBJECT: stat = NC_NOERR; break; /* ok */ default: goto done; } @@ -636,7 +636,7 @@ zzcreategroup(ZZMAP* zzmap, const char* key, int nskip) /* Lookup a key @return NC_NOERR if found and is a content-bearing object -@return NC_ENOTFOUND if not found +@return NC_ENOOBJECT if not found @return NC_EEMPTY if a dir */ static int @@ -747,7 +747,7 @@ ziperrno(int zerror) int stat = NC_NOERR; switch (zerror) { case ZIP_ER_OK: stat = NC_NOERR; break; - case ZIP_ER_EXISTS: stat = NC_EFOUND; break; + case ZIP_ER_EXISTS: stat = NC_EOBJECT; break; case ZIP_ER_MEMORY: stat = NC_ENOMEM; break; case ZIP_ER_SEEK: case ZIP_ER_READ: @@ -755,14 +755,14 @@ ziperrno(int zerror) case ZIP_ER_TMPOPEN: case ZIP_ER_CRC: stat = NC_EIO; break; case ZIP_ER_ZIPCLOSED: stat = NC_EBADID; break; - case ZIP_ER_NOENT: stat = NC_ENOTFOUND; break; + case ZIP_ER_NOENT: stat = NC_ENOOBJECT; break; case ZIP_ER_OPEN: stat = NC_EACCESS; break; case ZIP_ER_INVAL: stat = NC_EINVAL; break; case ZIP_ER_INTERNAL: stat = NC_EINTERNAL; break; case ZIP_ER_REMOVE: stat = NC_ECANTREMOVE; break; - case ZIP_ER_DELETED: stat = NC_ENOTFOUND; break; + case ZIP_ER_DELETED: stat = NC_ENOOBJECT; break; case ZIP_ER_RDONLY: stat = NC_EPERM; break; - case ZIP_ER_CHANGED: stat = NC_EFOUND; break; + case ZIP_ER_CHANGED: stat = NC_EOBJECT; break; default: stat = NC_ENCZARR; break; } return stat; diff --git a/libnczarr/zsync.c b/libnczarr/zsync.c index 83913214c6..95f82679a0 100644 --- a/libnczarr/zsync.c +++ b/libnczarr/zsync.c @@ -12,7 +12,7 @@ static int ncz_collect_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** j static int ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var); static int ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp); -static int load_jatts(NCZMAP* map, NC_OBJ* container, NCjson** jattrsp, NClist** atypes); +static int load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClist** atypes); static int zconvert(nc_type typeid, size_t typelen, void* dst, NCjson* src); static int computeattrinfo(const char* name, NClist* atypes, NCjson* values, nc_type* typeidp, size_t* typelenp, size_t* lenp, void** datap); @@ -56,23 +56,15 @@ ncz_sync_file(NC_FILE_INFO_T* file, int isclose) { int stat = NC_NOERR; NCjson* json = NULL; - NCZ_FILE_INFO_T* zinfo = NULL; NC_UNUSED(isclose); LOG((3, "%s: file: %s", __func__, file->controller->path)); ZTRACE(3,"file=%s isclose=%d",file->controller->path,isclose); - zinfo = (NCZ_FILE_INFO_T*)file->format_file_info; - - /* Create super block (NCZMETAROOT) */ - { - if((stat = ncz_create_superblock(zinfo))) goto done; - - /* Write out root group recursively */ - if((stat = ncz_sync_grp(file, file->root_grp))) - goto done; - } + /* Write out root group recursively */ + if((stat = ncz_sync_grp(file, file->root_grp))) + goto done; done: NCJreclaim(json); @@ -132,6 +124,8 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) NCjson* jdims = NULL; NCjson* jvars = NULL; NCjson* jsubgrps = NULL; + NCjson* jsuper = NULL; + NCjson* jtmp = NULL; LOG((3, "%s: dims: %s", __func__, key)); @@ -161,39 +155,49 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) if((stat = NCJaddstring(jsubgrps,NCJ_STRING,g->hdr.name))) goto done; } - /* build ZGROUP path */ - if((stat = nczm_concat(fullpath,ZGROUP,&key))) - goto done; + /* Create the "_NCZARR_GROUP" dict */ if((stat = NCJnew(NCJ_DICT,&json))) goto done; + /* Insert the various dicts and arrays */ + if((stat = NCJinsert(json,"dims",jdims))) goto done; + jdims = NULL; /* avoid memory problems */ + if((stat = NCJinsert(json,"vars",jvars))) goto done; + jvars = NULL; /* avoid memory problems */ + if((stat = NCJinsert(json,"groups",jsubgrps))) goto done; + jsubgrps = NULL; /* avoid memory problems */ + + /* build ZGROUP contents */ + if((stat = NCJnew(NCJ_DICT,&jgroup))) + goto done; snprintf(version,sizeof(version),"%d",zinfo->zarr.zarr_version); - if((stat = NCJaddstring(json,NCJ_STRING,"zarr_format"))) goto done; - if((stat = NCJaddstring(json,NCJ_INT,version))) goto done; + if((stat = NCJaddstring(jgroup,NCJ_STRING,"zarr_format"))) goto done; + if((stat = NCJaddstring(jgroup,NCJ_INT,version))) goto done; + if(grp->parent == NULL) { /* Root group */ + snprintf(version,sizeof(version),"%lu.%lu.%lu", + zinfo->zarr.nczarr_version.major, + zinfo->zarr.nczarr_version.minor, + zinfo->zarr.nczarr_version.release); + if((stat = NCJnew(NCJ_DICT,&jsuper))) goto done; + if((stat-NCJnewstring(NCJ_STRING,version,&jtmp))) goto done; + if((stat = NCJinsert(jsuper,"version",jtmp))) goto done; + jtmp = NULL; + if((stat = NCJinsert(jgroup,NCZ_V2_SUPERBLOCK,jsuper))) goto done; + jsuper = NULL; + } + + /* Insert the "_NCZARR_GROUP" dict */ + if((stat = NCJinsert(jgroup,NCZ_V2_GROUP,json))) goto done; + json = NULL; + + /* build ZGROUP path */ + if((stat = nczm_concat(fullpath,ZGROUP,&key))) + goto done; /* Write to map */ - if((stat=NCZ_uploadjson(map,key,json))) + if((stat=NCZ_uploadjson(map,key,jgroup))) goto done; nullfree(key); key = NULL; - if(!(zinfo->controls.flags & FLAG_PUREZARR)) { - /* Create the NCZGROUP json object */ - if((stat = NCJnew(NCJ_DICT,&jgroup))) - goto done; - /* Insert the various dicts and arrays */ - if((stat = NCJinsert(jgroup,"dims",jdims))) goto done; - jdims = NULL; /* avoid memory problems */ - if((stat = NCJinsert(jgroup,"vars",jvars))) goto done; - jvars = NULL; /* avoid memory problems */ - if((stat = NCJinsert(jgroup,"groups",jsubgrps))) goto done; - jsubgrps = NULL; /* avoid memory problems */ - /* build NCZGROUP path */ - if((stat = nczm_concat(fullpath,NCZGROUP,&key))) - goto done; - /* Write to map */ - if((stat=NCZ_uploadjson(map,key,jgroup))) - goto done; - nullfree(key); key = NULL; - } - /* Build NCZATTRS object and the .zattrs object */ + /* Build the .zattrs object */ assert(grp->att); if((stat = ncz_sync_atts(file,(NC_OBJ*)grp, grp->att))) goto done; @@ -211,6 +215,8 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) } done: + NCJreclaim(jtmp); + NCJreclaim(jsuper); NCJreclaim(json); NCJreclaim(jgroup); NCJreclaim(jdims); @@ -340,9 +346,9 @@ ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var) } /* Convert var->fill_value to a string */ if((stat = NCZ_stringconvert(atomictype,1,var->fill_value,&jfill))) goto done; - if(jfill->sort == NCJ_ARRAY) { /* stringconvert should prevent this from happening */ + if(NCJsort(jfill) == NCJ_ARRAY) { /* stringconvert should prevent this from happening */ assert(NCJlength(jfill) > 0); - if((stat = NCJarrayith(jfill,0,&jtmp))) goto done; /* use the 0th element */ + jtmp = NCJith(jfill,0); if((stat = NCJclone(jtmp,&jtmp))) goto done; /* clone so we can free it later */ NCJreclaim(jfill); jfill = jtmp; @@ -386,16 +392,6 @@ ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var) if((stat = NCJinsert(jvar,"dimension_separator",jtmp))) goto done; jtmp = NULL; } - - /* build .zarray path */ - if((stat = nczm_concat(fullpath,ZARRAY,&key))) - goto done; - - /* Write to map */ - if((stat=NCZ_uploadjson(map,key,jvar))) - goto done; - - nullfree(key); key = NULL; /* Capture dimref names as FQNs */ if(var->ndims > 0) { @@ -408,7 +404,7 @@ ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var) } } - /* Build the NCZARRAY object */ + /* Build the NCZ_V2_ARRAY object */ { /* Create the dimrefs json object */ if((stat = NCJnew(NCJ_ARRAY,&jdimrefs))) @@ -417,8 +413,6 @@ ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var) const char* dim = nclistget(dimrefs,i); NCJaddstring(jdimrefs,NCJ_STRING,dim); } - - /* Create the NCZARRAY json object */ if((stat = NCJnew(NCJ_DICT,&jncvar))) goto done; @@ -442,17 +436,21 @@ ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var) jtmp = NULL; if(!(zinfo->controls.flags & FLAG_PUREZARR)) { - /* Write out NCZARRAY */ - if((stat = nczm_concat(fullpath,NCZARRAY,&key))) - goto done; - /* Write to map */ - if((stat=NCZ_uploadjson(map,key,jncvar))) - goto done; - nullfree(key); key = NULL; + if((stat = NCJinsert(jvar,NCZ_V2_ARRAY,jncvar))) goto done; + jncvar = NULL; } } - /* Build .zattrs object including .nczattrs object */ + /* build .zarray path */ + if((stat = nczm_concat(fullpath,ZARRAY,&key))) + goto done; + + /* Write to map */ + if((stat=NCZ_uploadjson(map,key,jvar))) + goto done; + nullfree(key); key = NULL; + + /* Build .zattrs object */ assert(var->att); if((stat = ncz_sync_atts(file,(NC_OBJ*)var, var->att))) goto done; @@ -571,15 +569,13 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist) NCjson* jatts = NULL; NCjson* jtypes = NULL; NCjson* jtype = NULL; - NCjson* jnczarr = NULL; - NCjson* jnczattr = NULL; NCjson* jdimrefs = NULL; + NCjson* jdict = NULL; NCZMAP* map = NULL; char* fullpath = NULL; char* key = NULL; char* content = NULL; char* dimpath = NULL; - NCjson* jname = NULL; int isxarray = 0; LOG((3, "%s", __func__)); @@ -592,10 +588,6 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist) if(!isxarray && ncindexsize(attlist) == 0) goto done; /* do nothing */ - /* Create the jncattr object */ - if((stat = NCJnew(NCJ_DICT,&jnczattr))) - goto done; - if(ncindexsize(attlist) > 0) { /* Create the jncattr.types object */ if((stat = NCJnew(NCJ_DICT,&jtypes))) @@ -603,47 +595,34 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist) /* Walk all the attributes and collect the types by attribute name */ for(i=0;ihdr.name); /* If reserved and hidden, then ignore */ if(ra && (ra->flags & HIDDENATTRFLAG)) continue; if(a->nc_typeid >= NC_STRING) {stat = THROW(NC_ENCZARR); goto done;} - snprintf(tname,sizeof(tname),"%u",a->nc_typeid); + if((stat = ncz_zarr_type_name(a->nc_typeid,1,&tname))) goto done; if((stat = NCJnewstring(NCJ_STRING,tname,&jtype))) goto done; if((stat = NCJinsert(jtypes,a->hdr.name,jtype))) /* add {name: type} */ goto done; jtype = NULL; } - /* Insert into the NCZATTRS json */ - if((stat = NCJinsert(jnczattr,"types",jtypes))) - goto done; - jtypes = NULL; } /* Construct container path */ - if(container->sort == NCGRP) + if(NCJsort(container) == NCGRP) stat = NCZ_grpkey((NC_GRP_INFO_T*)container,&fullpath); else stat = NCZ_varkey((NC_VAR_INFO_T*)container,&fullpath); if(stat) goto done; - /* Upload the NCZATTRS object */ - if((stat = nczm_concat(fullpath,NCZATTRS,&key))) - goto done; - if(!(zinfo->controls.flags & FLAG_PUREZARR)) { - /* Write to map */ - if((stat=NCZ_uploadjson(map,key,jnczattr))) - goto done; - } - nullfree(key); key = NULL; - /* Jsonize the attribute list */ if((stat = ncz_jsonize_atts(attlist,&jatts))) goto done; - if(container->sort == NCVAR) { + + if(NCJsort(container) == NCVAR) { if(isxarray) { NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)container; /* Insert the XARRAY _ARRAY_ATTRIBUTE attribute */ @@ -657,11 +636,20 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist) NCJaddstring(jdimrefs,NCJ_STRING,dimname); nullfree(dimname); dimname = NULL; } - /* Add the _ARRAY_ATTRIBUTE */ + /* Add the _ARRAY_DIMENSIONS attribute */ if((stat = NCJinsert(jatts,NC_XARRAY_DIMS,jdimrefs))) goto done; jdimrefs = NULL; } } + if(!(zinfo->controls.flags & FLAG_PUREZARR)) { + /* Insert the _NCZARR_ATTR attribute */ + if((stat = NCJnew(NCJ_DICT,&jdict))) + goto done; + if((stat = NCJinsert(jdict,"types",jtypes))) goto done; + jtypes = NULL; + if((stat = NCJinsert(jatts,NCZ_V2_ATTR,jdict))) goto done; + jdict = NULL; + } /* write .zattrs path */ if((stat = nczm_concat(fullpath,ZATTRS,&key))) @@ -679,10 +667,8 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist) NCJreclaim(jatts); NCJreclaim(jtypes); NCJreclaim(jtype); - NCJreclaim(jnczarr); NCJreclaim(jdimrefs); - NCJreclaim(jnczattr); - NCJreclaim(jname); + NCJreclaim(jdict); return THROW(stat); } @@ -741,7 +727,7 @@ the corresponding NCjson dict. @author Dennis Heimbigner */ static int -load_jatts(NCZMAP* map, NC_OBJ* container, NCjson** jattrsp, NClist** atypesp) +load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClist** atypesp) { int i,stat = NC_NOERR; char* fullpath = NULL; @@ -754,7 +740,7 @@ load_jatts(NCZMAP* map, NC_OBJ* container, NCjson** jattrsp, NClist** atypesp) /* alway return (possibly empty) list of types */ atypes = nclistnew(); - if(container->sort == NCGRP) { + if(NCJsort(container) == NCGRP) { NC_GRP_INFO_T* grp = (NC_GRP_INFO_T*)container; /* Get grp's fullpath name */ if((stat = NCZ_grpkey(grp,&fullpath))) @@ -779,15 +765,19 @@ load_jatts(NCZMAP* map, NC_OBJ* container, NCjson** jattrsp, NClist** atypesp) nullfree(key); key = NULL; if(jattrs != NULL) { - /* Construct the path to the NCZATTRS object */ - if((stat = nczm_concat(fullpath,NCZATTRS,&key))) goto done; - /* Download the NCZATTRS object: may not exist if pure zarr or using deprecated name */ - stat=NCZ_downloadjson(map,key,&jncattr); - if(stat == NC_EEMPTY) { - /* try deprecated name */ - nullfree(key); key = NULL; - if((stat = nczm_concat(fullpath,NCZATTRDEP,&key))) goto done; + if(nczarrv1) { + /* Construct the path to the NCZATTRS object */ + if((stat = nczm_concat(fullpath,NCZATTRS,&key))) goto done; + /* Download the NCZATTRS object: may not exist if pure zarr or using deprecated name */ stat=NCZ_downloadjson(map,key,&jncattr); + if(stat == NC_EEMPTY) { + /* try deprecated name */ + nullfree(key); key = NULL; + if((stat = nczm_concat(fullpath,NCZATTRDEP,&key))) goto done; + stat=NCZ_downloadjson(map,key,&jncattr); + } + } else {/* Get _NCZARR_ATTRS from .zattrs */ + stat = NCJdictget(jattrs,NCZ_V2_ATTR,&jncattr); } nullfree(key); key = NULL; switch (stat) { @@ -798,19 +788,19 @@ load_jatts(NCZMAP* map, NC_OBJ* container, NCjson** jattrsp, NClist** atypesp) if(jncattr != NULL) { NCjson* jtypes = NULL; /* jncattr attribute should be a dict */ - if(jncattr->sort != NCJ_DICT) {stat = THROW(NC_ENCZARR); goto done;} + if(NCJsort(jncattr) != NCJ_DICT) {stat = THROW(NC_ENCZARR); goto done;} /* Extract "types; may not exist if only hidden attributes are defined */ if((stat = NCJdictget(jncattr,"types",&jtypes))) goto done; if(jtypes != NULL) { - if(jtypes->sort != NCJ_DICT) {stat = THROW(NC_ENCZARR); goto done;} + if(NCJsort(jtypes) != NCJ_DICT) {stat = THROW(NC_ENCZARR); goto done;} /* Convert to an envv list */ - for(i=0;icontents);i+=2) { - const NCjson* key = nclistget(jtypes->contents,i); - const NCjson* value = nclistget(jtypes->contents,i+1); - if(key->sort != NCJ_STRING) {stat = THROW(NC_ENCZARR); goto done;} - if(value->sort != NCJ_STRING) {stat = THROW(NC_ENCZARR); goto done;} - nclistpush(atypes,strdup(key->value)); - nclistpush(atypes,strdup(value->value)); + for(i=0;isort) { + switch (NCJsort(src)) { case NCJ_ARRAY: - for(i=0;icontents);i++) { - NCjson* value = nclistget(src->contents,i); - assert(value->sort != NCJ_STRING); + for(i=0;ivalue); - memcpy(dst,src->value,len); - dst[len] = '\0'; /* nul terminate */ - break; + if(typeid == NC_CHAR) { + len = strlen(NCJstring(src)); + memcpy(dst,NCJstring(src),len); + dst[len] = '\0'; /* nul terminate */ + break; + } + /* Fall thru */ case NCJ_INT: case NCJ_DOUBLE: case NCJ_BOOLEAN: if((stat = NCZ_convert1(src, typeid, dst))) goto done; @@ -883,9 +877,9 @@ computeattrinfo(const char* name, NClist* atypes, NCjson* values, const char* aname = nclistget(atypes,i); if(strcmp(aname,name)==0) { const char* atype = nclistget(atypes,i+1); - unsigned long tid; - if(sscanf(atype,"%lu",&tid) != 1) {stat = THROW(NC_ENCZARR); goto done;} - typeid = (nc_type)tid; + if((stat = ncz_dtype2typeinfo(atype,&typeid,NULL))) { + if((stat = ncz_nctype2typeinfo(atype,&typeid))) goto done; + } break; } } @@ -910,7 +904,7 @@ static int computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp, void** datap) { int stat = NC_NOERR; - size_t datalen; + size_t count; void* data = NULL; size_t typelen; nc_type typeid = NC_NAT; @@ -922,34 +916,37 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp if(typeid == NC_NAT) {stat = NC_EBADTYPE; goto done;} /* Collect the length of the attribute; might be a singleton */ - switch (values->sort) { + switch (NCJsort(values)) { case NCJ_DICT: stat = NC_ENCZARR; goto done; case NCJ_ARRAY: - datalen = nclistlength(values->contents); + count = NCJlength(values); break; case NCJ_STRING: /* requires special handling as an array of characters */ - datalen = strlen(values->value); + if(typeid == NC_CHAR) + count = strlen(NCJstring(values)); + else + count = 1; break; default: - datalen = 1; + count = 1; /* singleton */ break; } - if(datalen > 0) { + if(count > 0) { /* Allocate data space */ if((stat = NC4_inq_atomic_type(typeid, NULL, &typelen))) goto done; if(typeid == NC_CHAR) - data = malloc(typelen*(datalen+1)); + data = malloc(typelen*(count+1)); else - data = malloc(typelen*datalen); + data = malloc(typelen*count); if(data == NULL) {stat = NC_ENOMEM; goto done;} /* convert to target type */ if((stat = zconvert(typeid, typelen, data, values))) goto done; } - if(lenp) *lenp = datalen; + if(lenp) *lenp = count; if(typelenp) *typelenp = typelen; if(datap) {*datap = data; data = NULL;} if(typeidp) *typeidp = typeid; /* return possibly inferred type */ @@ -961,7 +958,7 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp } static int -inferattrtype(NCjson* value, nc_type* typeidp) +inferattrtype(NCjson* values, nc_type* typeidp) { nc_type typeid; NCjson* j = NULL; @@ -969,20 +966,30 @@ inferattrtype(NCjson* value, nc_type* typeidp) long long i64; int negative = 0; - if(NCJlength(value) == 0) return NC_EINVAL; - if(value->sort == NCJ_ARRAY) { - if(NCJarrayith(value,0,&j)) return NC_EINVAL; + + switch (NCJsort(values)) { + case NCJ_ARRAY: + if(NCJlength(values) == 0) return NC_EINVAL; + j = NCJith(values,0); return inferattrtype(j,typeidp); + case NCJ_NULL: + typeid = NC_CHAR; + return NC_NOERR; + case NCJ_DICT: /* fall thru */ + case NCJ_UNDEF: + return NC_EINVAL; + default: /* atomic */ + break; } - if(value->value) - negative = (value->value[0] == '-'); - switch (value->sort) { + if(NCJstring(values)) + negative = (NCJstring(values)[0] == '-'); + switch (NCJsort(values)) { case NCJ_INT: if(negative) { - sscanf(value->value,"%lld",&i64); + sscanf(NCJstring(values),"%lld",&i64); u64 = (unsigned long long)i64; } else - sscanf(value->value,"%llu",&u64); + sscanf(NCJstring(values),"%llu",&u64); typeid = mininttype(u64,negative); break; case NCJ_DOUBLE: @@ -1067,11 +1074,13 @@ define_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) char* fullpath = NULL; char* key = NULL; NCjson* json = NULL; - NCjson* jncgroup = NULL; + NCjson* jgroup = NULL; + NCjson* jdict = NULL; NClist* dimdefs = nclistnew(); NClist* varnames = nclistnew(); NClist* subgrps = nclistnew(); int purezarr = 0; + int v1 = 0; LOG((3, "%s: dims: %s", __func__, key)); @@ -1087,24 +1096,38 @@ define_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) goto done; purezarr = 1; } else { /*!purezarr*/ - /* build NCZGROUP path */ - if((stat = nczm_concat(fullpath,NCZGROUP,&key))) - goto done; - /* Read */ - switch (stat=NCZ_downloadjson(map,key,&jncgroup)) { - case NC_NOERR: /* we read it */ - /* Pull out lists about group content */ - if((stat = parse_group_content(jncgroup,dimdefs,varnames,subgrps))) - goto done; - break; - case NC_EEMPTY: /* probably pure zarr, so does not exist, use search */ - if((stat = parse_group_content_pure(zinfo,grp,varnames,subgrps))) - goto done; - purezarr = 1; - break; - default: goto done; + if(zinfo->controls.flags & FLAG_NCZARR_V1) { + /* build NCZGROUP path */ + if((stat = nczm_concat(fullpath,NCZGROUP,&key))) + goto done; + /* Read */ + jdict = NULL; + stat=NCZ_downloadjson(map,key,&jdict); + v1 = 1; + } else { + /* build ZGROUP path */ + if((stat = nczm_concat(fullpath,ZGROUP,&key))) + goto done; + /* Read */ + switch (stat=NCZ_downloadjson(map,key,&jgroup)) { + case NC_NOERR: /* we read it */ + /* Extract the NCZ_V2_GROUP dict */ + if((stat = NCJdictget(jgroup,NCZ_V2_GROUP,&jdict))) goto done; + break; + case NC_EEMPTY: /* does not exist, use search */ + if((stat = parse_group_content_pure(zinfo,grp,varnames,subgrps))) + goto done; + purezarr = 1; + break; + default: goto done; + } } nullfree(key); key = NULL; + if(jdict) { + /* Pull out lists about group content */ + if((stat = parse_group_content(jdict,dimdefs,varnames,subgrps))) + goto done; + } } if(!purezarr) { @@ -1119,8 +1142,9 @@ define_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) if((stat = define_subgrps(file,grp,subgrps))) goto done; done: + if(v1) NCJreclaim(jdict); NCJreclaim(json); - NCJreclaim(jncgroup); + NCJreclaim(jgroup); nclistfreeall(dimdefs); nclistfreeall(varnames); nclistfreeall(subgrps); @@ -1160,12 +1184,12 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) zinfo = file->format_file_info; map = zinfo->map; - if(container->sort == NCGRP) + if(NCJsort(container) == NCGRP) attlist = ((NC_GRP_INFO_T*)container)->att; else attlist = ((NC_VAR_INFO_T*)container)->att; - switch ((stat = load_jatts(map, container, &jattrs, &atypes))) { + switch ((stat = load_jatts(map, container, (zinfo->controls.flags & FLAG_NCZARR_V1), &jattrs, &atypes))) { case NC_NOERR: break; case NC_EEMPTY: /* container has no attributes */ stat = NC_NOERR; @@ -1176,38 +1200,38 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) if(jattrs != NULL) { /* Iterate over the attributes to create the in-memory attributes */ /* Watch for special cases: _FillValue and _ARRAY_DIMENSIONS (xarray) */ - for(i=0;icontents);i+=2) { - NCjson* key = nclistget(jattrs->contents,i); - NCjson* value = nclistget(jattrs->contents,i+1); + for(i=0;ivalue); + ra = NC_findreserved(NCJstring(key)); if(ra != NULL) { /* case 1: name = _NCProperties, grp=root, varid==NC_GLOBAL, flags & READONLYFLAG */ - if(strcmp(key->value,NCPROPS)==0 - && container->sort == NCGRP + if(strcmp(NCJstring(key),NCPROPS)==0 + && NCJsort(container) == NCGRP && file->root_grp == (NC_GRP_INFO_T*)container) { /* Setup provenance */ - if(value->sort != NCJ_STRING) + if(NCJsort(value) != NCJ_STRING) {stat = THROW(NC_ENCZARR); goto done;} /*malformed*/ - if((stat = NCZ_read_provenance(file,key->value,value->value))) + if((stat = NCZ_read_provenance(file,NCJstring(key),NCJstring(value)))) goto done; } /* case 2: name = _ARRAY_DIMENSIONS, sort==NCVAR, flags & HIDDENATTRFLAG */ - if(strcmp(key->value,NC_XARRAY_DIMS)==0 - && container->sort == NCVAR + if(strcmp(NCJstring(key),NC_XARRAY_DIMS)==0 + && NCJsort(container) == NCVAR && (ra->flags & HIDDENATTRFLAG)) { /* store for later */ NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)((NC_VAR_INFO_T*)container)->format_var_info; int i; - assert(value->sort == NCJ_ARRAY); + assert(NCJsort(value) == NCJ_ARRAY); if((zvar->xarray = nclistnew())==NULL) {stat = NC_ENOMEM; goto done;} - for(i=0;icontents);i++) { - const NCjson* k = nclistget(value->contents,i); - assert(k != NULL && k->sort == NCJ_STRING); - nclistpush(zvar->xarray,strdup(k->value)); + for(i=0;ixarray,strdup(NCJstring(k))); } } /* else ignore */ @@ -1215,23 +1239,23 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) } /* Create the attribute */ /* Collect the attribute's type and value */ - if((stat = computeattrinfo(key->value,atypes,value, + if((stat = computeattrinfo(NCJstring(key),atypes,value, &typeid,NULL,&len,&data))) goto done; - if((stat = ncz_makeattr(container,attlist,key->value,typeid,len,data,&att))) + if((stat = ncz_makeattr(container,attlist,NCJstring(key),typeid,len,data,&att))) goto done; /* Is this _FillValue ? */ if(strcmp(att->hdr.name,_FillValue)==0) fillvalueatt = att; } } /* If we have not read a _FillValue, then go ahead and create it */ - if(fillvalueatt == NULL && container->sort == NCVAR) { + if(fillvalueatt == NULL && NCJsort(container) == NCVAR) { if((stat = ncz_create_fillvalue((NC_VAR_INFO_T*)container))) goto done; } /* Remember that we have read the atts for this var or group. */ - if(container->sort == NCVAR) + if(NCJsort(container) == NCVAR) ((NC_VAR_INFO_T*)container)->atts_read = 1; else ((NC_GRP_INFO_T*)container)->atts_read = 1; @@ -1308,6 +1332,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) NCjson* jvalue = NULL; int purezarr = 0; int xarray = 0; + int formatv1 = 0; nc_type typeid; size64_t* shapes = NULL; int rank = 0; @@ -1317,6 +1342,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) map = zinfo->map; if(zinfo->controls.flags & FLAG_PUREZARR) purezarr = 1; + if(zinfo->controls.flags & FLAG_NCZARR_V1) formatv1 = 1; if(zinfo->controls.flags & FLAG_XARRAYDIMS) {purezarr = 1; xarray = 1;} /* Load each var in turn */ @@ -1348,7 +1374,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) if((stat=NCZ_readdict(map,key,&jvar))) goto done; nullfree(key); key = NULL; - assert((jvar->sort == NCJ_DICT)); + assert((NCJsort(jvar) == NCJ_DICT)); /* Extract the .zarray info from jvar */ @@ -1356,7 +1382,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) { int version; if((stat = NCJdictget(jvar,"zarr_format",&jvalue))) goto done; - sscanf(jvalue->value,"%d",&version); + sscanf(NCJstring(jvalue),"%d",&version); if(version != zinfo->zarr.zarr_version) {stat = THROW(NC_ENCZARR); goto done;} } @@ -1366,7 +1392,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) int endianness; if((stat = NCJdictget(jvar,"dtype",&jvalue))) goto done; /* Convert dtype to nc_type + endianness */ - if((stat = ncz_dtype2typeinfo(jvalue->value,&vtype,&endianness))) + if((stat = ncz_dtype2typeinfo(NCJstring(jvalue),&vtype,&endianness))) goto done; if(vtype > NC_NAT && vtype < NC_STRING) { /* Locate the NC_TYPE_INFO_T object */ @@ -1381,9 +1407,9 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) /* shape */ { if((stat = NCJdictget(jvar,"shape",&jvalue))) goto done; - if(jvalue->sort != NCJ_ARRAY) {stat = THROW(NC_ENCZARR); goto done;} + if(NCJsort(jvalue) != NCJ_ARRAY) {stat = THROW(NC_ENCZARR); goto done;} /* Verify the rank */ - if(zvar->scalar) rank = 0; else rank = nclistlength(jvalue->contents); + if(zvar->scalar) rank = 0; else rank = NCJlength(jvalue); /* Set the rank of the variable */ if((stat = nc4_var_set_ndims(var, rank))) goto done; /* extract the shapes */ @@ -1399,8 +1425,8 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) if((stat = NCJdictget(jvar,"dimension_separator",&jvalue))) goto done; if(jvalue != NULL) { /* Verify its value */ - if(jvalue->sort == NCJ_STRING && jvalue->value != NULL && strlen(jvalue->value) == 1) - zvar->dimension_separator = jvalue->value[0]; + if(NCJsort(jvalue) == NCJ_STRING && NCJstring(jvalue) != NULL && strlen(NCJstring(jvalue)) == 1) + zvar->dimension_separator = NCJstring(jvalue)[0]; } /* If value is invalid, then use global default */ if(!islegaldimsep(zvar->dimension_separator)) @@ -1412,10 +1438,10 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) int rank; size64_t chunks[NC_MAX_VAR_DIMS]; if((stat = NCJdictget(jvar,"chunks",&jvalue))) goto done; - if(jvalue != NULL && jvalue->sort != NCJ_ARRAY) + if(jvalue != NULL && NCJsort(jvalue) != NCJ_ARRAY) {stat = THROW(NC_ENCZARR); goto done;} /* Verify the rank */ - rank = nclistlength(jvalue->contents); + rank = NCJlength(jvalue); if(rank > 0) { var->storage = NC_CHUNKED; if(var->ndims+zvar->scalar != rank) @@ -1457,7 +1483,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) /* Capture row vs column major; currently, column major not used*/ { if((stat = NCJdictget(jvar,"order",&jvalue))) goto done; - if(strcmp(jvalue->value,"C")==1) + if(strcmp(NCJstring(jvalue),"C")==1) ((NCZ_VAR_INFO_T*)var->format_var_info)->order = 1; else ((NCZ_VAR_INFO_T*)var->format_var_info)->order = 0; } @@ -1473,27 +1499,28 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) } if(!purezarr) { - /* Download the NCZARRAY object */ - if((stat = nczm_concat(varpath,NCZARRAY,&key))) - goto done; - if((stat=NCZ_readdict(map,key,&jncvar))) { - nullfree(key); key = NULL; - if((stat = nczm_concat(varpath,NCZVARDEP,&key))) /* try deprecated name */ + if(formatv1) { + /* Construct the path to the zarray object */ + if((stat = nczm_concat(varpath,NCZARRAY,&key))) goto done; - if((stat=NCZ_readdict(map,key,&jncvar))) + /* Download the nczarray object */ + if((stat=NCZ_readdict(map,key,&jncvar))) goto done; + nullfree(key); key = NULL; + } else {/* format v2 */ + /* Extract the NCZ_V2_ARRAY dict */ + if((stat = NCJdictget(jvar,NCZ_V2_ARRAY,&jncvar))) goto done; } - nullfree(key); key = NULL; - assert((jncvar->sort == NCJ_DICT)); + assert((NCJsort(jncvar) == NCJ_DICT)); /* Extract storage flag */ if((stat = NCJdictget(jncvar,"storage",&jvalue))) goto done; if(jvalue != NULL) { - if(strcmp(jvalue->value,"chunked") == 0) { + if(strcmp(NCJstring(jvalue),"chunked") == 0) { var->storage = NC_CHUNKED; - } else if(strcmp(jvalue->value,"compact") == 0) { + } else if(strcmp(NCJstring(jvalue),"compact") == 0) { var->storage = NC_COMPACT; - } else if(strcmp(jvalue->value,"scalar") == 0) { + } else if(strcmp(NCJstring(jvalue),"scalar") == 0) { var->storage = NC_CONTIGUOUS; zvar->scalar = 1; } else { /*storage = NC_CONTIGUOUS;*/ @@ -1503,12 +1530,12 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) /* Extract dimnames list */ switch ((stat = NCJdictget(jncvar,"dimrefs",&jdimrefs))) { case NC_NOERR: /* Extract the dimref names */ - assert((jdimrefs->sort == NCJ_ARRAY)); - assert(nclistlength(jdimrefs->contents) == rank); + assert((NCJsort(jdimrefs) == NCJ_ARRAY)); + assert(NCJlength(jdimrefs) == rank); for(j=0;jcontents,j); - assert(dimpath->sort == NCJ_STRING); - nclistpush(dimnames,strdup(dimpath->value)); + const NCjson* dimpath = NCJith(jdimrefs,j); + assert(NCJsort(dimpath) == NCJ_STRING); + nclistpush(dimnames,strdup(NCJstring(dimpath))); } jdimrefs = NULL; /* avoid double free */ break; @@ -1532,7 +1559,6 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) nullfree(varpath); varpath = NULL; nullfree(shapes); shapes = NULL; NCJreclaim(jvar); jvar = NULL; - NCJreclaim(jncvar); jncvar = NULL; } done: @@ -1541,7 +1567,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) nullfree(key); nclistfreeall(dimnames); NCJreclaim(jvar); - NCJreclaim(jncvar); + if(formatv1) NCJreclaim(jncvar); return THROW(stat); } @@ -1586,6 +1612,73 @@ define_subgrps(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* subgrpnames) return THROW(stat); } +int +ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp) +{ + int stat = NC_NOERR; + NCjson* jnczgroup = NULL; + NCjson* jzgroup = NULL; + NCjson* jsuper = NULL; + NCjson* jtmp = NULL; + char* nczarr_version = NULL; + char* zarr_format = NULL; + NCZ_FILE_INFO_T* zinfo = (NCZ_FILE_INFO_T*)file->format_file_info; + + /* See if the V1 META-Root is being used */ + switch(stat = NCZ_downloadjson(zinfo->map, NCZMETAROOT, &jnczgroup)) { + case NC_EEMPTY: /* not there */ + stat = NC_NOERR; + break; + case NC_NOERR: + if((stat = NCJdictget(jnczgroup,"nczarr_version",&jtmp))) goto done; + nczarr_version = strdup(NCJstring(jtmp)); + break; + default: goto done; + } + /* Also gett Zarr Root Group */ + switch(stat = NCZ_downloadjson(zinfo->map, ZMETAROOT, &jzgroup)) { + case NC_NOERR: + break; + case NC_EEMPTY: /* not there */ + stat = NC_NOERR; + assert(jzgroup == NULL); + break; + default: goto done; + } + if(jzgroup != NULL) { + /* See if this NCZarr V2 */ + if((stat = NCJdictget(jzgroup,NCZ_V2_SUPERBLOCK,&jsuper))) goto done; + if(jsuper != NULL) { + /* Extract the equivalent attribute */ + if(jsuper->sort != NCJ_DICT) + {stat = NC_ENCZARR; goto done;} + if((stat = NCJdictget(jsuper,"version",&jtmp))) goto done; + nczarr_version = nulldup(NCJstring(jtmp)); + } + /* In any case, extract the zarr format */ + if((stat = NCJdictget(jzgroup,"zarr_format",&jtmp))) goto done; + zarr_format = nulldup(NCJstring(jtmp)); + } + /* Set the controls */ + if(jnczgroup == NULL && jsuper == NULL) { + zinfo->controls.flags |= FLAG_PUREZARR; + } else if(jnczgroup != NULL) { + zinfo->controls.flags |= FLAG_NCZARR_V1; + /* Also means file is read only */ + file->no_write = 1; + } else if(jsuper != NULL) { + /* ! FLAG_NCZARR_V1 && ! FLAG_PUREZARR */ + } + if(nczarrvp) {*nczarrvp = nczarr_version; nczarr_version = NULL;} + if(zarrfp) {*zarrfp = zarr_format; zarr_format = NULL;} +done: + nullfree(zarr_format); + nullfree(nczarr_version); + NCJreclaim(jzgroup); + NCJreclaim(jnczgroup); + return THROW(stat); +} + /**************************************************/ /* Utilities */ @@ -1597,33 +1690,33 @@ parse_group_content(NCjson* jcontent, NClist* dimdefs, NClist* varnames, NClist* if((stat=NCJdictget(jcontent,"dims",&jvalue))) goto done; if(jvalue != NULL) { - if(jvalue->sort != NCJ_DICT) {stat = THROW(NC_ENCZARR); goto done;} + if(NCJsort(jvalue) != NCJ_DICT) {stat = THROW(NC_ENCZARR); goto done;} /* Extract the dimensions defined in this group */ - for(i=0;icontents);i+=2) { - NCjson* jname = nclistget(jvalue->contents,i); - NCjson* jlen = nclistget(jvalue->contents,i+1); + for(i=0;ivalue, norm_name))) + if((stat = nc4_check_name(NCJstring(jname), norm_name))) {stat = NC_EBADNAME; goto done;} /* check the length */ - sscanf(jlen->value,"%lld",&len); + sscanf(NCJstring(jlen),"%lld",&len); if(len < 0) {stat = NC_EDIMSIZE; goto done;} nclistpush(dimdefs,strdup(norm_name)); - nclistpush(dimdefs,strdup(jlen->value)); + nclistpush(dimdefs,strdup(NCJstring(jlen))); } } if((stat=NCJdictget(jcontent,"vars",&jvalue))) goto done; if(jvalue != NULL) { /* Extract the variable names in this group */ - for(i=0;icontents);i++) { - NCjson* jname = nclistget(jvalue->contents,i); + for(i=0;ivalue, norm_name))) + if((stat = nc4_check_name(NCJstring(jname), norm_name))) {stat = NC_EBADNAME; goto done;} nclistpush(varnames,strdup(norm_name)); } @@ -1632,11 +1725,11 @@ parse_group_content(NCjson* jcontent, NClist* dimdefs, NClist* varnames, NClist* if((stat=NCJdictget(jcontent,"groups",&jvalue))) goto done; if(jvalue != NULL) { /* Extract the subgroup names in this group */ - for(i=0;icontents);i++) { - NCjson* jname = nclistget(jvalue->contents,i); + for(i=0;ivalue, norm_name))) + if((stat = nc4_check_name(NCJstring(jname), norm_name))) {stat = NC_EBADNAME; goto done;} nclistpush(subgrps,strdup(norm_name)); } @@ -1678,7 +1771,7 @@ parse_var_dims_pure(NCZ_FILE_INFO_T* zinfo, NC_GRP_INFO_T* grp, NC_VAR_INFO_T* /* Download the zarray object */ if((stat=NCZ_readdict(zinfo->map,zakey,&jvar))) goto done; - assert((jvar->sort == NCJ_DICT)); + assert((NCJsort(jvar) == NCJ_DICT)); nullfree(varkey); varkey = NULL; nullfree(zakey); zakey = NULL; /* Extract the shape */ @@ -1768,9 +1861,9 @@ decodeints(NCjson* jshape, size64_t* shapes) { int i, stat = NC_NOERR; - for(i=0;icontents);i++) { + for(i=0;icontents,i); + NCjson* jv = NCJith(jshape,i); if((stat = NCZ_convert1(jv,NC_INT64,(char*)&v))) goto done; if(v < 0) {stat = THROW(NC_ENCZARR); goto done;} shapes[i] = (size64_t)v; @@ -1943,6 +2036,7 @@ ncz_get_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var) return retval; } +#if 0 int ncz_create_superblock(NCZ_FILE_INFO_T* zinfo) { @@ -1953,6 +2047,9 @@ ncz_create_superblock(NCZ_FILE_INFO_T* zinfo) ZTRACE(4,"zinfo=%s",zinfo->common.file->controller->path); + /* If V2, then do not create a superblock per-se */ + if(!(zinfo->controls.flags & FLAG_NCZARR_V1)) goto done; + map = zinfo->map; /* create superblock json */ @@ -1963,7 +2060,7 @@ ncz_create_superblock(NCZ_FILE_INFO_T* zinfo) snprintf(version,sizeof(version),"%d",zinfo->zarr.zarr_version); if((stat = NCJaddstring(json,NCJ_STRING,"zarr_format"))) goto done; if((stat = NCJaddstring(json,NCJ_INT,version))) goto done; - if((stat = NCJaddstring(json,NCJ_STRING,"nczarr_version"))) goto done; + if((stat = NCJaddstring(json,NCJ_STRING,NCZ_V2_VERSION))) goto done; { char ver[1024]; snprintf(ver,sizeof(ver),"%lu.%lu.%lu", @@ -1972,15 +2069,14 @@ ncz_create_superblock(NCZ_FILE_INFO_T* zinfo) zinfo->zarr.nczarr_version.release); if((stat = NCJaddstring(json,NCJ_STRING,ver))) goto done; } - if(!(zinfo->controls.flags & FLAG_PUREZARR)) { - /* Write back to map */ - if((stat=NCZ_uploadjson(map,NCZMETAROOT,json))) - goto done; - } + /* Write back to map */ + if((stat=NCZ_uploadjson(map,NCZMETAROOT,json))) + goto done; done: NCJreclaim(json); return ZUNTRACE(stat); } +#endif /* Compute the set of dim refs for this variable, taking purezarr and xarray into account */ static int @@ -2049,11 +2145,11 @@ static int iscomplexjson(NCjson* j) { int i; - switch(j->sort) { + switch(NCJsort(j)) { case NCJ_ARRAY: /* verify that the elements of the array are not complex */ - for(i=0;icontents);i++) { - switch (((NCjson*)nclistget(j->contents,i))->sort) { + for(i=0;ixcache,hkey); break; - case NC_ENOTFOUND: + case NC_ENOOBJECT: entry = NULL; /* not found; */ break; default: goto done; diff --git a/libsrc/httpio.c b/libsrc/httpio.c index 3f560d567a..8760b42bab 100644 --- a/libsrc/httpio.c +++ b/libsrc/httpio.c @@ -44,7 +44,7 @@ typedef struct NCHTTP { NC_HTTP_STATE* state; - size64_t size; /* of the S3 object */ + long long size; /* of the S3 object */ NCbytes* region; } NCHTTP; diff --git a/libsrc4/nc4internal.c b/libsrc4/nc4internal.c index 2f8c58f978..1c75015821 100644 --- a/libsrc4/nc4internal.c +++ b/libsrc4/nc4internal.c @@ -43,6 +43,7 @@ static const NC_reservedatt NC_reserved[] = { {NC_ATT_FORMAT, READONLYFLAG}, /*_Format*/ {ISNETCDF4ATT, READONLYFLAG|NAMEONLYFLAG}, /*_IsNetcdf4*/ {NCPROPS, READONLYFLAG|NAMEONLYFLAG|MATERIALIZEDFLAG}, /*_NCProperties*/ + {NC_NCZARR_ATTR, READONLYFLAG|HIDDENATTRFLAG}, /*_NCZARR_ATTR*/ {NC_ATT_COORDINATES, READONLYFLAG|HIDDENATTRFLAG|MATERIALIZEDFLAG}, /*_Netcdf4Coordinates*/ {NC_ATT_DIMID_NAME, READONLYFLAG|HIDDENATTRFLAG|MATERIALIZEDFLAG}, /*_Netcdf4Dimid*/ {SUPERBLOCKATT, READONLYFLAG|NAMEONLYFLAG}, /*_SuperblockVersion*/ diff --git a/ncdump/nccopy.c b/ncdump/nccopy.c index 1b9e1b50e6..12f7592347 100644 --- a/ncdump/nccopy.c +++ b/ncdump/nccopy.c @@ -2200,7 +2200,7 @@ usage(void) [-5] CDF5 output (same as -k 'cdf5)\n\ [-d n] set output deflation compression level, default same as input (0=none 9=max)\n\ [-s] add shuffle option to deflation compression\n\ - [-c chunkspec] specify chunking for dimensions, e.g. \"dim1/N1,dim2/N2,...\"\n\ + [-c chunkspec] specify chunking for variable and dimensions, e.g. \"var:N1,N2,...\" or \"dim1/N1,dim2/N2,...\"\n\ [-u] convert unlimited dimensions to fixed-size dimensions in output copy\n\ [-w] write whole output file from diskless netCDF on close\n\ [-v var1,...] include data for only listed variables, but definitions for all variables\n\ diff --git a/nczarr_test/CMakeLists.txt b/nczarr_test/CMakeLists.txt index 9431be90a6..bd8a68781c 100644 --- a/nczarr_test/CMakeLists.txt +++ b/nczarr_test/CMakeLists.txt @@ -95,6 +95,10 @@ IF(ENABLE_TESTS) add_sh_test(nczarr_test run_s3_cleanup) ENDIF() + if(ENABLE_NCZARR_ZIP) + add_sh_test(nczarr_test run_newformat) + endif() + ENDIF(BUILD_UTILITIES) ENDIF(ENABLE_TESTS) diff --git a/nczarr_test/Makefile.am b/nczarr_test/Makefile.am index 8ad227c1a1..3825de3fae 100644 --- a/nczarr_test/Makefile.am +++ b/nczarr_test/Makefile.am @@ -61,6 +61,10 @@ TESTS += run_misc.sh endif +if ENABLE_NCZARR_ZIP +TESTS += run_newformat.sh +endif + if BUILD_BENCHMARKS if BUILD_UTILITIES @@ -101,7 +105,9 @@ ncdumpchunks_SOURCES = ncdumpchunks.c EXTRA_DIST = CMakeLists.txt \ run_ut_map.sh run_ut_mapapi.sh run_ut_misc.sh run_ut_chunk.sh run_ncgen4.sh \ run_nccopyz.sh run_fillonlyz.sh run_chunkcases.sh test_nczarr.sh run_perf_chunks1.sh run_s3_cleanup.sh \ -run_purezarr.sh run_interop.sh run_misc.sh \ +run_purezarr.sh run_interop.sh run_misc.sh run_newformat.sh + +EXTRA_DIST += \ ref_ut_map_create.cdl ref_ut_map_writedata.cdl ref_ut_map_writemeta2.cdl ref_ut_map_writemeta.cdl \ ref_ut_map_readmeta.txt ref_ut_map_readmeta2.txt ref_ut_map_search.txt \ ref_ut_mapapi_create.cdl ref_ut_mapapi_data.cdl ref_ut_mapapi_meta.cdl ref_ut_mapapi_search.txt \ @@ -115,7 +121,8 @@ ref_rem.cdl ref_rem.dmp ref_ndims.cdl ref_ndims.dmp \ ref_misc1.cdl ref_misc1.dmp ref_misc2.cdl \ ref_avail1.cdl ref_avail1.dmp ref_avail1.txt \ ref_xarray.cdl ref_purezarr.cdl ref_purezarr_base.cdl ref_nczarr2zarr.cdl \ -ref_power_901_constants.zip ref_power_901_constants.cdl ref_quotes.zip ref_quotes.cdl +ref_power_901_constants.zip ref_power_901_constants.cdl ref_quotes.zip ref_quotes.cdl \ +ref_oldformat.cdl ref_oldformat.zip ref_newformatpure.cdl CLEANFILES = ut_*.txt ut*.cdl tmp*.nc tmp*.cdl tmp*.txt tmp*.dmp tmp*.zip tmp*.nc diff --git a/nczarr_test/bm_utils.c b/nczarr_test/bm_utils.c index 36c016690a..0395f5149b 100644 --- a/nczarr_test/bm_utils.c +++ b/nczarr_test/bm_utils.c @@ -188,7 +188,7 @@ fprintf(stderr,"arg=%s value=%s\n",argv[optind-1],optarg); } } - if(opt->filename == NULL) return NC_ENOTFOUND; + if(opt->filename == NULL) return NC_ENOOBJECT; switch(opt->format) { case NC_FORMATX_NCZARR: diff --git a/nczarr_test/ref_newformatpure.cdl b/nczarr_test/ref_newformatpure.cdl new file mode 100644 index 0000000000..1224f4091f --- /dev/null +++ b/nczarr_test/ref_newformatpure.cdl @@ -0,0 +1,33 @@ +netcdf ref_oldformat { +dimensions: + .zdim_8 = 8 ; + .zdim_10 = 10 ; +variables: + int lat(.zdim_8) ; + lat:_FillValue = -1 ; + lat:lat_attr = "latitude" ; +data: + + lat = 1, 2, 3, 4, 5, 6, 7, 8 ; + +group: g1 { + variables: + int pos(.zdim_8, .zdim_10) ; + pos:_FillValue = -1 ; + pos:pos_attr = "latXlon" ; + + // group attributes: + :g1_attr = 17 ; + data: + + pos = + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _ ; + } // group g1 +} diff --git a/nczarr_test/ref_oldformat.cdl b/nczarr_test/ref_oldformat.cdl new file mode 100644 index 0000000000..d931a37c59 --- /dev/null +++ b/nczarr_test/ref_oldformat.cdl @@ -0,0 +1,34 @@ +netcdf ref_oldformat { +dimensions: + lat = 8 ; +variables: + int lat(lat) ; + lat:_FillValue = -1 ; + lat:lat_attr = "latitude" ; +data: + + lat = 1, 2, 3, 4, 5, 6, 7, 8 ; + +group: g1 { + dimensions: + lon = 10 ; + variables: + int pos(lat, lon) ; + pos:_FillValue = -1 ; + pos:pos_attr = "latXlon" ; + + // group attributes: + :g1_attr = 17 ; + data: + + pos = + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _ ; + } // group g1 +} diff --git a/nczarr_test/ref_oldformat.zip b/nczarr_test/ref_oldformat.zip new file mode 100644 index 0000000000..ef3455eb9f Binary files /dev/null and b/nczarr_test/ref_oldformat.zip differ diff --git a/nczarr_test/run_newformat.sh b/nczarr_test/run_newformat.sh new file mode 100755 index 0000000000..d5bc2ce76a --- /dev/null +++ b/nczarr_test/run_newformat.sh @@ -0,0 +1,42 @@ +#!/bin/sh + +if test "x$srcdir" = x ; then srcdir=`pwd`; fi +. ../test_common.sh + +. "$srcdir/test_nczarr.sh" + +set -e + +echo "" +echo "*** Testing backward compatibilty between nczarr meta data format V1 vs V2" + +testcaseold() { +zext=$1 +fileargs ${srcdir}/ref_oldformat +${NCDUMP} -n ref_oldformat "$fileurl" > ./tmp_oldformat.cdl +diff -w ${srcdir}/ref_oldformat.cdl ./tmp_oldformat.cdl +} + +testcasecvt() { +zext=$1 +fileargs ${srcdir}/ref_oldformat +${NCCOPY} "$fileurl" "file://tmp_newformat.file#mode=nczarr,file" +${NCDUMP} -n ref_oldformat "file://tmp_newformat.file#mode=nczarr,file" > ./tmp_newformat.cdl +diff -w ${srcdir}/ref_oldformat.cdl ./tmp_newformat.cdl +} + +testcasepure() { +zext=$1 +fileargs ${srcdir}/ref_oldformat +${NCCOPY} "$fileurl" "file://tmp_newformat.file#mode=nczarr,file" +${NCDUMP} -n ref_oldformat "file://tmp_newformat.file#mode=zarr,file" > ./tmp_newpure.cdl +diff -w ${srcdir}/ref_newformatpure.cdl ./tmp_newpure.cdl +} + +if test "x$FEATURE_NCZARR_ZIP" = xyes ; then + testcaseold zip + testcasecvt zip + testcasepure zip +fi + +exit 0 diff --git a/nczarr_test/run_ut_mapapi.sh b/nczarr_test/run_ut_mapapi.sh index 8761c44b0c..2948f1ce65 100755 --- a/nczarr_test/run_ut_mapapi.sh +++ b/nczarr_test/run_ut_mapapi.sh @@ -6,7 +6,7 @@ if test "x$srcdir" = x ; then srcdir=`pwd`; fi . "$srcdir/test_nczarr.sh" -TR="-T10" +#TR="-T10" set -e diff --git a/nczarr_test/ut_includes.h b/nczarr_test/ut_includes.h index 07797c5983..dccf5b863b 100644 --- a/nczarr_test/ut_includes.h +++ b/nczarr_test/ut_includes.h @@ -18,6 +18,7 @@ #include "nclist.h" #include "ncbytes.h" #include "ncuri.h" +#include "ncpathmgr.h" #include "zincludes.h" #include "ut_test.h" diff --git a/nczarr_test/ut_json.c b/nczarr_test/ut_json.c index 53aca4f547..98dd1a292b 100644 --- a/nczarr_test/ut_json.c +++ b/nczarr_test/ut_json.c @@ -63,13 +63,13 @@ build(NCJ* ncj) /* Build instances of primitives */ if((stat = NCJnew(NCJ_STRING,&ncj->ncj_string))) goto done; - ncj->ncj_string->value = strdup("string"); + ncj->ncj_string->string = strdup("string"); if((stat = NCJnew(NCJ_INT,&ncj->ncj_int))) goto done; - ncj->ncj_int->value = strdup("117"); + ncj->ncj_int->string = strdup("117"); if((stat = NCJnew(NCJ_DOUBLE,&ncj->ncj_double))) goto done; - ncj->ncj_double->value = strdup("3.1415926"); + ncj->ncj_double->string = strdup("3.1415926"); if((stat = NCJnew(NCJ_BOOLEAN,&ncj->ncj_boolean))) goto done; - ncj->ncj_boolean->value = strdup("true"); + ncj->ncj_boolean->string = strdup("true"); if((stat = NCJnew(NCJ_NULL,&ncj->ncj_null))) goto done; /* Create an empty array */ @@ -210,51 +210,51 @@ dumpR(NCjson* json, int depth) printf("/%s/ ",sortname(json->sort)); switch(json->sort) { - case NCJ_STRING: printf("\"%s\"",json->value); break; + case NCJ_STRING: printf("\"%s\"",json->string); break; case NCJ_INT: - ok = sscanf(json->value,"%lld%n",&int64v,&count); - if(ok != 1 || count != strlen(json->value)) goto fail; + ok = sscanf(json->string,"%lld%n",&int64v,&count); + if(ok != 1 || count != strlen(json->string)) goto fail; printf("%lld",int64v); break; case NCJ_DOUBLE: - ok = sscanf(json->value,"%lg%n",&float64v,&count); - if(ok != 1 || count != strlen(json->value)) goto fail; + ok = sscanf(json->string,"%lg%n",&float64v,&count); + if(ok != 1 || count != strlen(json->string)) goto fail; printf("%lg",float64v); break; case NCJ_BOOLEAN: - if(strcasecmp(json->value,"true") != 0 - && strcasecmp(json->value,"false") != 0) goto fail; - printf("%s",json->value); + if(strcasecmp(json->string,"true") != 0 + && strcasecmp(json->string,"false") != 0) goto fail; + printf("%s",json->string); break; case NCJ_NULL: printf("null"); break; case NCJ_DICT: - if(nclistlength(json->contents) == 0) { + if(NCJlength(json) == 0) { printf("{}"); } else { printf("\n"); - for(i=0;icontents);i+=2) { + for(i=0;icontents,i); + j = (NCjson*)NCJith(json,i); assert(j->sort == NCJ_STRING); printf("{%d} ",depth+1); - printf("\"%s\" => ",j->value); - if(i+1 >= nclistlength(json->contents)) {/* malformed */ + printf("\"%s\" => ",j->string); + if(i+1 >= NCJlength(json)) {/* malformed */ printf(""); } else - dumpR((NCjson*)nclistget(json->contents,i+1),depth+1); + dumpR((NCjson*)NCJith(json,i+1),depth+1); } } break; case NCJ_ARRAY: - if(nclistlength(json->contents) == 0) { + if(NCJlength(json) == 0) { printf("[]"); } else { printf("\n"); - for(i=0;icontents);i++) { + for(i=0;icontents,i),depth+1); + dumpR((NCjson*)NCJith(json,i),depth+1); } } break; diff --git a/nczarr_test/ut_map.c b/nczarr_test/ut_map.c index 4be2a0f527..f59434fd6d 100644 --- a/nczarr_test/ut_map.c +++ b/nczarr_test/ut_map.c @@ -52,16 +52,28 @@ int main(int argc, char** argv) { int stat = NC_NOERR; + char* tmp = NULL; if((stat = ut_init(argc, argv, &utoptions))) goto done; + if(utoptions.file == NULL && utoptions.output == NULL) { stat = NC_EINVAL; goto done; } if(utoptions.file == NULL && utoptions.output != NULL) utoptions.file = strdup(utoptions.output); if(utoptions.output == NULL && utoptions.file != NULL)utoptions.output = strdup(utoptions.file); + + /* Canonicalize */ + if((stat = NCpathcanonical(utoptions.file,&tmp))) goto done; + free(utoptions.file); + utoptions.file = tmp; + if((stat = NCpathcanonical(utoptions.output,&tmp))) goto done; + free(utoptions.output); + utoptions.output = tmp; + impl = kind2impl(utoptions.kind); url = makeurl(utoptions.file,impl); if((stat = runtests((const char**)utoptions.cmds,tests))) goto done; done: + nullfree(tmp); if(stat) usage(stat); return 0; } @@ -339,7 +351,7 @@ searchR(NCZMAP* map, int depth, const char* prefix0, NClist* objects) /* get next level object keys **below** the prefix: should have form: */ switch (stat = nczmap_search(map, prefix, matches)) { case NC_NOERR: break; - case NC_ENOTFOUND: stat = NC_NOERR; break;/* prefix is not a dir */ + case NC_ENOOBJECT: stat = NC_NOERR; break;/* prefix is not an object */ default: goto done; } /* recurse */ diff --git a/nczarr_test/ut_mapapi.c b/nczarr_test/ut_mapapi.c index ea8ecee2e8..b0bc47d8ef 100644 --- a/nczarr_test/ut_mapapi.c +++ b/nczarr_test/ut_mapapi.c @@ -49,10 +49,20 @@ int main(int argc, char** argv) { int stat = NC_NOERR; + char* tmp = NULL; if((stat = ut_init(argc, argv, &utoptions))) goto done; if(utoptions.file == NULL && utoptions.output != NULL) utoptions.file = strdup(utoptions.output); if(utoptions.output == NULL && utoptions.file != NULL)utoptions.output = strdup(utoptions.file); + + /* Canonicalize */ + if((stat = NCpathcanonical(utoptions.file,&tmp))) goto done; + free(utoptions.file); + utoptions.file = tmp; + if((stat = NCpathcanonical(utoptions.output,&tmp))) goto done; + free(utoptions.output); + utoptions.output = tmp; + impl = kind2impl(utoptions.kind); // if(impl == NCZM_S3) setkeyprefix(utoptions.file); url = makeurl(utoptions.file,impl); @@ -60,6 +70,7 @@ main(int argc, char** argv) if((stat = runtests((const char**)utoptions.cmds,tests))) goto done; done: + nullfree(tmp); nullfree(url); url = NULL; nullfree(keyprefix); if(stat) usage(THROW(stat)); @@ -140,7 +151,7 @@ simpledelete(void) case NC_NOERR: report(FAIL,"open",map); break; - case NC_ENOTFOUND: + case NC_ENOOBJECT: report(XFAIL,"open",map); stat = NC_NOERR; break; diff --git a/nczarr_test/zisjson.c b/nczarr_test/zisjson.c index e2f567b42a..186ae45f6c 100644 --- a/nczarr_test/zisjson.c +++ b/nczarr_test/zisjson.c @@ -18,7 +18,7 @@ #include "netcdf.h" #include "nclist.h" -#include "zjson.h" +#include "ncjson.h" #define MAXREAD 8192 diff --git a/nczarr_test/zmapio.c b/nczarr_test/zmapio.c index 5a56e28544..05fc76afc3 100644 --- a/nczarr_test/zmapio.c +++ b/nczarr_test/zmapio.c @@ -72,6 +72,7 @@ static struct Type { /* Command line options */ struct Dumpptions { int debug; + int meta_only; Mapop mop; char infile[4096]; NCZM_IMPL impl; @@ -137,11 +138,14 @@ main(int argc, char** argv) memset((void*)&dumpoptions,0,sizeof(dumpoptions)); - while ((c = getopt(argc, argv, "dvx:t:T:X:")) != EOF) { + while ((c = getopt(argc, argv, "dhvx:t:T:X:")) != EOF) { switch(c) { case 'd': dumpoptions.debug = 1; break; + case 'h': + dumpoptions.meta_only = 1; + break; case 'v': zmapusage(); goto done; @@ -323,7 +327,8 @@ objdump(void) } if(!hascontent) goto next; /* ignore it */ if(len > 0) { - content = malloc(len+1); + size_t padlen = (len+dumpoptions.nctype->typesize); + content = calloc(1,padlen+1); if((stat=nczmap_read(map,obj,0,len,content))) goto done; content[len] = '\0'; } else { @@ -331,13 +336,26 @@ objdump(void) } if(hascontent) { if(len > 0) { - assert(content != NULL); - if(kind == OK_CHUNK) len /= dumpoptions.nctype->typesize; + assert(content != NULL); + if(kind == OK_CHUNK) { + len = ceildiv(len,dumpoptions.nctype->typesize); + } printf("[%d] %s : (%llu)",depth,obj,len); - if(kind == OK_CHUNK) printf(" (%s)",dumpoptions.nctype->typename); + if(kind == OK_CHUNK) + printf(" (%s)",dumpoptions.nctype->typename); printf(" |"); - if(kind != OK_IGNORE) { + switch(kind) { + case OK_GROUP: + case OK_META: printcontent(len,content,kind); + break; + case OK_CHUNK: + if(dumpoptions.meta_only) + printf("..."); + else + printcontent(len,content,kind); + break; + default: break; } printf("|\n"); } else {