diff --git a/autotest/gdrivers/data/zarr/generate_test_files.py b/autotest/gdrivers/data/zarr/generate_test_files.py index 73d41ce15ce0..978efb80c070 100644 --- a/autotest/gdrivers/data/zarr/generate_test_files.py +++ b/autotest/gdrivers/data/zarr/generate_test_files.py @@ -16,7 +16,7 @@ import numpy as np import zarr -from numcodecs import LZ4, LZMA, Blosc, GZip, Zlib, Zstd +from numcodecs import LZ4, LZMA, Blosc, GZip, Shuffle, Zlib, Zstd os.chdir(os.path.dirname(__file__)) @@ -69,6 +69,17 @@ ) z[:] = [1, 2] +z = zarr.open( + "shuffle.zarr", + mode="w", + dtype="u2", + shape=(2,), + chunks=(2,), + compressor=None, + filters=[Shuffle(elementsize=2)], +) +z[:] = [1, 2] + z = zarr.open( "order_f_u1.zarr", diff --git a/autotest/gdrivers/data/zarr/shuffle.zarr/.zarray b/autotest/gdrivers/data/zarr/shuffle.zarr/.zarray new file mode 100644 index 000000000000..39be4ee62653 --- /dev/null +++ b/autotest/gdrivers/data/zarr/shuffle.zarr/.zarray @@ -0,0 +1,19 @@ +{ + "chunks": [ + 2 + ], + "compressor": null, + "dtype": "(*output_data))[j * nElts + i] = + (static_cast(input_data))[i * eltSize + j]; + } + } + + *output_size = input_size; + return true; + } + + if (output_data == nullptr && output_size != nullptr) + { + *output_size = input_size; + return true; + } + + if (output_data != nullptr && *output_data == nullptr && + output_size != nullptr) + { + *output_data = VSI_MALLOC_VERBOSE(input_size); + *output_size = input_size; + if (*output_data == nullptr) + return false; + bool ret = ZarrShuffleCompressor(input_data, input_size, output_data, + output_size, options, nullptr); + if (!ret) + { + VSIFree(*output_data); + *output_data = nullptr; + } + return ret; + } + + CPLError(CE_Failure, CPLE_AppDefined, "Invalid use of API"); + return false; +} + +/************************************************************************/ +/* ZarrShuffleDecompressor() */ +/************************************************************************/ + +static bool ZarrShuffleDecompressor(const void *input_data, size_t input_size, + void **output_data, size_t *output_size, + CSLConstList options, + void * /* compressor_user_data */) +{ + // 4 is the default of the shuffle numcodecs: + // https://numcodecs.readthedocs.io/en/v0.10.0/shuffle.html + const int eltSize = atoi(CSLFetchNameValueDef(options, "ELEMENTSIZE", "4")); + if (eltSize != 2 && eltSize != 4 && eltSize != 8) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Only ELEMENTSIZE=2,4,8 is supported"); + if (output_size) + *output_size = 0; + return false; + } + if ((input_size % eltSize) != 0) + { + CPLError(CE_Failure, CPLE_AppDefined, + "input_size should be a multiple of ELEMENTSIZE"); + if (output_size) + *output_size = 0; + return false; + } + if (output_data != nullptr && *output_data != nullptr && + output_size != nullptr && *output_size != 0) + { + if (*output_size < input_size) + { + CPLError(CE_Failure, CPLE_AppDefined, "Too small output size"); + *output_size = input_size; + return false; + } + + // Reverse of what is done in the compressor function. + const size_t nElts = input_size / eltSize; + for (size_t i = 0; i < nElts; ++i) + { + for (int j = 0; j < eltSize; j++) + { + (static_cast(*output_data))[i * eltSize + j] = + (static_cast(input_data))[j * nElts + i]; + } + } + + *output_size = input_size; + return true; + } + + if (output_data == nullptr && output_size != nullptr) + { + *output_size = input_size; + return true; + } + + if (output_data != nullptr && *output_data == nullptr && + output_size != nullptr) + { + *output_data = VSI_MALLOC_VERBOSE(input_size); + *output_size = input_size; + if (*output_data == nullptr) + return false; + bool ret = ZarrShuffleDecompressor(input_data, input_size, output_data, + output_size, options, nullptr); + if (!ret) + { + VSIFree(*output_data); + *output_data = nullptr; + } + return ret; + } + + CPLError(CE_Failure, CPLE_AppDefined, "Invalid use of API"); + return false; +} + +static const CPLCompressor gShuffleCompressor = { + /* nStructVersion = */ 1, + /* pszId = */ "shuffle", CCT_FILTER, + /* papszMetadata = */ nullptr, ZarrShuffleCompressor, + /* user_data = */ nullptr}; + +static const CPLCompressor gShuffleDecompressor = { + /* nStructVersion = */ 1, + /* pszId = */ "shuffle", + CCT_FILTER, + /* papszMetadata = */ nullptr, + ZarrShuffleDecompressor, + /* user_data = */ nullptr}; + /************************************************************************/ /* ZarrV2Array::LoadTileData() */ /************************************************************************/ @@ -563,7 +738,9 @@ bool ZarrV2Array::LoadTileData(const uint64_t *tileIndices, bool bUseMutex, const auto &oFilter = m_oFiltersArray[i]; const auto osFilterId = oFilter["id"].ToString(); const auto psFilterDecompressor = - CPLGetDecompressor(osFilterId.c_str()); + EQUAL(osFilterId.c_str(), "shuffle") + ? &gShuffleDecompressor + : CPLGetDecompressor(osFilterId.c_str()); CPLAssert(psFilterDecompressor); CPLStringList aosOptions; @@ -846,7 +1023,10 @@ bool ZarrV2Array::FlushDirtyTile() const for (const auto &oFilter : m_oFiltersArray) { const auto osFilterId = oFilter["id"].ToString(); - const auto psFilterCompressor = CPLGetCompressor(osFilterId.c_str()); + const auto psFilterCompressor = + EQUAL(osFilterId.c_str(), "shuffle") + ? &gShuffleCompressor + : CPLGetCompressor(osFilterId.c_str()); CPLAssert(psFilterCompressor); CPLStringList aosOptions; @@ -1865,16 +2045,19 @@ ZarrV2Group::LoadArray(const std::string &osArrayName, CPLError(CE_Failure, CPLE_AppDefined, "Missing filter id"); return nullptr; } - const auto psFilterCompressor = - CPLGetCompressor(osFilterId.c_str()); - const auto psFilterDecompressor = - CPLGetDecompressor(osFilterId.c_str()); - if (psFilterCompressor == nullptr || - psFilterDecompressor == nullptr) + if (!EQUAL(osFilterId.c_str(), "shuffle")) { - CPLError(CE_Failure, CPLE_AppDefined, "Filter %s not handled", - osFilterId.c_str()); - return nullptr; + const auto psFilterCompressor = + CPLGetCompressor(osFilterId.c_str()); + const auto psFilterDecompressor = + CPLGetDecompressor(osFilterId.c_str()); + if (psFilterCompressor == nullptr || + psFilterDecompressor == nullptr) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Filter %s not handled", osFilterId.c_str()); + return nullptr; + } } } }