Skip to content

Commit

Permalink
Minor BJData fixes (#3637)
Browse files Browse the repository at this point in the history
* Replace vector/map LUTs in binary_reader with arrays

* Replace string_t::npos in binary_reader
  • Loading branch information
falbrechtskirchinger authored Aug 3, 2022
1 parent ac9e668 commit 3224202
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 46 deletions.
73 changes: 55 additions & 18 deletions include/nlohmann/detail/input/binary_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <string> // char_traits, string
#include <utility> // make_pair, move
#include <vector> // vector
#include <map> // map

#include <nlohmann/detail/exceptions.hpp>
#include <nlohmann/detail/input/input_adapters.hpp>
Expand Down Expand Up @@ -1953,7 +1952,7 @@ class binary_reader
return false;
}

if (size_and_type.first != string_t::npos)
if (size_and_type.first != npos)
{
if (size_and_type.second != 0)
{
Expand Down Expand Up @@ -2186,7 +2185,7 @@ class binary_reader
for (auto i : dim)
{
result *= i;
if (result == 0 || result == string_t::npos) // because dim elements shall not have zeros, result = 0 means overflow happened; it also can't be string_t::npos as it is used to initialize size in get_ubjson_size_type()
if (result == 0 || result == npos) // because dim elements shall not have zeros, result = 0 means overflow happened; it also can't be npos as it is used to initialize size in get_ubjson_size_type()
{
return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408, exception_message(input_format, "excessive ndarray size caused overflow", "size"), nullptr));
}
Expand Down Expand Up @@ -2232,18 +2231,17 @@ class binary_reader
*/
bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result, bool inside_ndarray = false)
{
result.first = string_t::npos; // size
result.first = npos; // size
result.second = 0; // type
bool is_ndarray = false;

get_ignore_noop();

if (current == '$')
{
std::vector<char_int_type> bjdx = {'[', '{', 'S', 'H', 'T', 'F', 'N', 'Z'}; // excluded markers in bjdata optimized type

result.second = get(); // must not ignore 'N', because 'N' maybe the type
if (JSON_HEDLEY_UNLIKELY( input_format == input_format_t::bjdata && std::find(bjdx.begin(), bjdx.end(), result.second) != bjdx.end() ))
if (input_format == input_format_t::bjdata
&& JSON_HEDLEY_UNLIKELY(std::binary_search(bjd_optimized_type_markers.begin(), bjd_optimized_type_markers.end(), result.second)))
{
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
Expand Down Expand Up @@ -2492,23 +2490,23 @@ class binary_reader
// if bit-8 of size_and_type.second is set to 1, encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata):
// {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]}

if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0)
if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0)
{
std::map<char_int_type, string_t> bjdtype = {{'U', "uint8"}, {'i', "int8"}, {'u', "uint16"}, {'I', "int16"},
{'m', "uint32"}, {'l', "int32"}, {'M', "uint64"}, {'L', "int64"}, {'d', "single"}, {'D', "double"}, {'C', "char"}
};

size_and_type.second &= ~(static_cast<char_int_type>(1) << 8); // use bit 8 to indicate ndarray, here we remove the bit to restore the type marker

auto it = std::lower_bound(bjd_types_map.begin(), bjd_types_map.end(), size_and_type.second, [](const bjd_type & p, char_int_type t)
{
return p.first < t;
});
string_t key = "_ArrayType_";
if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0))
if (JSON_HEDLEY_UNLIKELY(it == bjd_types_map.end() || it->first != size_and_type.second))
{
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr));
}

if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(bjdtype[size_and_type.second]) ))
string_t type = it->second; // sax->string() takes a reference
if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(type)))
{
return false;
}
Expand All @@ -2535,7 +2533,7 @@ class binary_reader
return (sax->end_array() && sax->end_object());
}

if (size_and_type.first != string_t::npos)
if (size_and_type.first != npos)
{
if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first)))
{
Expand Down Expand Up @@ -2598,15 +2596,15 @@ class binary_reader
}

// do not accept ND-array size in objects in BJData
if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0)
if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0)
{
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
exception_message(input_format, "BJData object does not support ND-array size in optimized format", "object"), nullptr));
}

string_t key;
if (size_and_type.first != string_t::npos)
if (size_and_type.first != npos)
{
if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first)))
{
Expand Down Expand Up @@ -2950,6 +2948,8 @@ class binary_reader
}

private:
static JSON_INLINE_VARIABLE constexpr std::size_t npos = static_cast<std::size_t>(-1);

/// input adapter
InputAdapterType ia;

Expand All @@ -2967,7 +2967,44 @@ class binary_reader

/// the SAX parser
json_sax_t* sax = nullptr;

// excluded markers in bjdata optimized type
#define JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_ \
make_array<char_int_type>('F', 'H', 'N', 'S', 'T', 'Z', '[', '{')

#define JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_ \
make_array<bjd_type>( \
bjd_type{'C', "char"}, \
bjd_type{'D', "double"}, \
bjd_type{'I', "int16"}, \
bjd_type{'L', "int64"}, \
bjd_type{'M', "uint64"}, \
bjd_type{'U', "uint8"}, \
bjd_type{'d', "single"}, \
bjd_type{'i', "int8"}, \
bjd_type{'l', "int32"}, \
bjd_type{'m', "uint32"}, \
bjd_type{'u', "uint16"})

JSON_PRIVATE_UNLESS_TESTED:
// lookup tables
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
const decltype(JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_) bjd_optimized_type_markers =
JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_;

using bjd_type = std::pair<char_int_type, string_t>;
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
const decltype(JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_) bjd_types_map =
JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_;

#undef JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_
#undef JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_
};

#ifndef JSON_HAS_CPP_17
template<typename BasicJsonType, typename InputAdapterType, typename SAX>
constexpr std::size_t binary_reader<BasicJsonType, InputAdapterType, SAX>::npos;
#endif

} // namespace detail
NLOHMANN_JSON_NAMESPACE_END
13 changes: 9 additions & 4 deletions include/nlohmann/detail/meta/cpp_future.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#pragma once

#include <array> // array
#include <cstddef> // size_t
#include <type_traits> // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type
#include <utility> // index_sequence, make_index_sequence, index_sequence_for
Expand Down Expand Up @@ -152,15 +153,19 @@ template<> struct priority_tag<0> {};
template<typename T>
struct static_const
{
static constexpr T value{};
static JSON_INLINE_VARIABLE constexpr T value{};
};

#ifndef JSON_HAS_CPP_17

template<typename T>
constexpr T static_const<T>::value; // NOLINT(readability-redundant-declaration)

constexpr T static_const<T>::value;
#endif

template<typename T, typename... Args>
inline constexpr std::array<T, sizeof...(Args)> make_array(Args&& ... args)
{
return std::array<T, sizeof...(Args)> {{static_cast<T>(std::forward<Args>(args))...}};
}

} // namespace detail
NLOHMANN_JSON_NAMESPACE_END
86 changes: 64 additions & 22 deletions single_include/nlohmann/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3028,6 +3028,7 @@ NLOHMANN_JSON_NAMESPACE_END



#include <array> // array
#include <cstddef> // size_t
#include <type_traits> // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type
#include <utility> // index_sequence, make_index_sequence, index_sequence_for
Expand Down Expand Up @@ -3172,16 +3173,20 @@ template<> struct priority_tag<0> {};
template<typename T>
struct static_const
{
static constexpr T value{};
static JSON_INLINE_VARIABLE constexpr T value{};
};

#ifndef JSON_HAS_CPP_17

template<typename T>
constexpr T static_const<T>::value; // NOLINT(readability-redundant-declaration)

constexpr T static_const<T>::value;
#endif

template<typename T, typename... Args>
inline constexpr std::array<T, sizeof...(Args)> make_array(Args&& ... args)
{
return std::array<T, sizeof...(Args)> {{static_cast<T>(std::forward<Args>(args))...}};
}

} // namespace detail
NLOHMANN_JSON_NAMESPACE_END

Expand Down Expand Up @@ -5977,7 +5982,6 @@ NLOHMANN_JSON_NAMESPACE_END
#include <string> // char_traits, string
#include <utility> // make_pair, move
#include <vector> // vector
#include <map> // map

// #include <nlohmann/detail/exceptions.hpp>

Expand Down Expand Up @@ -10943,7 +10947,7 @@ class binary_reader
return false;
}

if (size_and_type.first != string_t::npos)
if (size_and_type.first != npos)
{
if (size_and_type.second != 0)
{
Expand Down Expand Up @@ -11176,7 +11180,7 @@ class binary_reader
for (auto i : dim)
{
result *= i;
if (result == 0 || result == string_t::npos) // because dim elements shall not have zeros, result = 0 means overflow happened; it also can't be string_t::npos as it is used to initialize size in get_ubjson_size_type()
if (result == 0 || result == npos) // because dim elements shall not have zeros, result = 0 means overflow happened; it also can't be npos as it is used to initialize size in get_ubjson_size_type()
{
return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408, exception_message(input_format, "excessive ndarray size caused overflow", "size"), nullptr));
}
Expand Down Expand Up @@ -11222,18 +11226,17 @@ class binary_reader
*/
bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result, bool inside_ndarray = false)
{
result.first = string_t::npos; // size
result.first = npos; // size
result.second = 0; // type
bool is_ndarray = false;

get_ignore_noop();

if (current == '$')
{
std::vector<char_int_type> bjdx = {'[', '{', 'S', 'H', 'T', 'F', 'N', 'Z'}; // excluded markers in bjdata optimized type

result.second = get(); // must not ignore 'N', because 'N' maybe the type
if (JSON_HEDLEY_UNLIKELY( input_format == input_format_t::bjdata && std::find(bjdx.begin(), bjdx.end(), result.second) != bjdx.end() ))
if (input_format == input_format_t::bjdata
&& JSON_HEDLEY_UNLIKELY(std::binary_search(bjd_optimized_type_markers.begin(), bjd_optimized_type_markers.end(), result.second)))
{
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
Expand Down Expand Up @@ -11482,23 +11485,23 @@ class binary_reader
// if bit-8 of size_and_type.second is set to 1, encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata):
// {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]}

if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0)
if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0)
{
std::map<char_int_type, string_t> bjdtype = {{'U', "uint8"}, {'i', "int8"}, {'u', "uint16"}, {'I', "int16"},
{'m', "uint32"}, {'l', "int32"}, {'M', "uint64"}, {'L', "int64"}, {'d', "single"}, {'D', "double"}, {'C', "char"}
};

size_and_type.second &= ~(static_cast<char_int_type>(1) << 8); // use bit 8 to indicate ndarray, here we remove the bit to restore the type marker

auto it = std::lower_bound(bjd_types_map.begin(), bjd_types_map.end(), size_and_type.second, [](const bjd_type & p, char_int_type t)
{
return p.first < t;
});
string_t key = "_ArrayType_";
if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0))
if (JSON_HEDLEY_UNLIKELY(it == bjd_types_map.end() || it->first != size_and_type.second))
{
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr));
}

if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(bjdtype[size_and_type.second]) ))
string_t type = it->second; // sax->string() takes a reference
if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(type)))
{
return false;
}
Expand All @@ -11525,7 +11528,7 @@ class binary_reader
return (sax->end_array() && sax->end_object());
}

if (size_and_type.first != string_t::npos)
if (size_and_type.first != npos)
{
if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first)))
{
Expand Down Expand Up @@ -11588,15 +11591,15 @@ class binary_reader
}

// do not accept ND-array size in objects in BJData
if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0)
if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0)
{
auto last_token = get_token_string();
return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read,
exception_message(input_format, "BJData object does not support ND-array size in optimized format", "object"), nullptr));
}

string_t key;
if (size_and_type.first != string_t::npos)
if (size_and_type.first != npos)
{
if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first)))
{
Expand Down Expand Up @@ -11940,6 +11943,8 @@ class binary_reader
}

private:
static JSON_INLINE_VARIABLE constexpr std::size_t npos = static_cast<std::size_t>(-1);

/// input adapter
InputAdapterType ia;

Expand All @@ -11957,8 +11962,45 @@ class binary_reader

/// the SAX parser
json_sax_t* sax = nullptr;

// excluded markers in bjdata optimized type
#define JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_ \
make_array<char_int_type>('F', 'H', 'N', 'S', 'T', 'Z', '[', '{')

#define JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_ \
make_array<bjd_type>( \
bjd_type{'C', "char"}, \
bjd_type{'D', "double"}, \
bjd_type{'I', "int16"}, \
bjd_type{'L', "int64"}, \
bjd_type{'M', "uint64"}, \
bjd_type{'U', "uint8"}, \
bjd_type{'d', "single"}, \
bjd_type{'i', "int8"}, \
bjd_type{'l', "int32"}, \
bjd_type{'m', "uint32"}, \
bjd_type{'u', "uint16"})

JSON_PRIVATE_UNLESS_TESTED:
// lookup tables
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
const decltype(JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_) bjd_optimized_type_markers =
JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_;

using bjd_type = std::pair<char_int_type, string_t>;
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
const decltype(JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_) bjd_types_map =
JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_;

#undef JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_
#undef JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_
};

#ifndef JSON_HAS_CPP_17
template<typename BasicJsonType, typename InputAdapterType, typename SAX>
constexpr std::size_t binary_reader<BasicJsonType, InputAdapterType, SAX>::npos;
#endif

} // namespace detail
NLOHMANN_JSON_NAMESPACE_END

Expand Down
Loading

0 comments on commit 3224202

Please sign in to comment.