From 5ec866a66aacbd70f92a1e41368c6ca9fbdc92c4 Mon Sep 17 00:00:00 2001 From: Larry Gritz Date: Wed, 10 Nov 2021 13:45:03 -0800 Subject: [PATCH] Strutil: add new utilities for parsing values out of strings (#3173) * `parse_values()` and `scan_values()` both can parse a series of values in a string, with optional prefix, suffix, and separator. The scan versions just figure out the values, the parse versions optionally can update a string_view& like the other parsing functions in Strutil. * `scan_datetime` parses the int components of a string that looks like "YYYY:MM:DD hh:mm:ss" (and a few minor variants). * Fix stoui -- it was previously casting stoi results, which is not correct. * Add `from_string<>` specialties for `double`, `int64_t`, `uint64_t`. --- src/include/OpenImageIO/strutil.h | 85 +++++++++++++++++++++++++++++-- src/libutil/strutil.cpp | 75 +++++++++++++++++++++++++++ src/libutil/strutil_test.cpp | 65 +++++++++++++++++++++++ 3 files changed, 222 insertions(+), 3 deletions(-) diff --git a/src/include/OpenImageIO/strutil.h b/src/include/OpenImageIO/strutil.h index d317e61328..54c22dd676 100644 --- a/src/include/OpenImageIO/strutil.h +++ b/src/include/OpenImageIO/strutil.h @@ -547,9 +547,7 @@ OIIO_UTIL_API int stoi (string_view s, size_t* pos=0, int base=10); // stoui() returns the unsigned int conversion of text from a string. // No exceptions or errors -- parsing errors just return 0. Negative // values are cast, overflow is clamped. No locale considerations. -inline unsigned int stoui (string_view s, size_t* pos=0, int base=10) { - return static_cast(stoi (s, pos, base)); -} +OIIO_UTIL_API unsigned int stoui (string_view s, size_t* pos=0, int base=10); /// stof() returns the float conversion of text from several string types. /// No exceptions or errors -- parsing errors just return 0.0. These always @@ -588,6 +586,8 @@ template inline T from_string (string_view s) { return T(s); // Generic: assume there is an explicit converter } + +#ifndef OIIO_DOXYGEN // Special case for int template<> inline int from_string (string_view s) { return Strutil::stoi(s); @@ -601,6 +601,24 @@ template<> inline unsigned int from_string (string_view s) { template<> inline float from_string (string_view s) { return Strutil::stof(s); } +// Special case for double -- note that by using Strutil::strtof, this +// always treats '.' as the decimal mark. +template<> inline double from_string (string_view s) { + return Strutil::stod(s); +} + +template<> inline int64_t from_string(string_view s) { + // For conversion of string_view to unsigned int, fall back on strtoll. + auto r = strtoll(std::string(s).c_str(), nullptr, 10); + return static_cast(r); +} + +template<> inline uint64_t from_string(string_view s) { + // For conversion of string_view to unsigned int, fall back on strtoull. + auto r = strtoull(std::string(s).c_str(), nullptr, 10); + return static_cast(r); +} +#endif @@ -712,6 +730,20 @@ extract_from_list_string (string_view list, size_t nvals=0, T val=T(), +/// Scan a string for date and time information. Return true upon success, +/// false if the string did not appear to contain a valid date/time. If, after +/// parsing a valid date/time (including out of range values), `str` contains +/// more characters after that, it is not considered a failure. +/// +/// Valid date/time formats include: +/// * YYYY-MM-DD HH:MM:SS +/// * YYYY:MM:DD HH:MM:SS +/// * YYYY/MM/DD HH:MM:SS +OIIO_UTIL_API bool +scan_datetime(string_view str, int& year, int& month, int& day, + int& hour, int& min, int& sec); + + /// C++ functor wrapper class for using strhash for unordered_map or /// unordered_set. The way this is used, in conjunction with @@ -858,6 +890,53 @@ bool OIIO_UTIL_API parse_int (string_view &str, int &val, bool eat=true) noexcep /// str. bool OIIO_UTIL_API parse_float (string_view &str, float &val, bool eat=true) noexcept; +/// Synonym for parse_int +inline bool parse_value(string_view &str, float &val, bool eat=true) noexcept +{ + return parse_float(str, val, eat); +} + +/// Synonym for parse_float +inline bool parse_value(string_view &str, int &val, bool eat=true) noexcept +{ + return parse_int(str, val, eat); +} + +/// Parse from `str`: a `prefix`, a series of int values separated by the +/// `sep` string, and a `postfix`, placing the values in the elements of +/// mutable span `values`, where the span length indicates the number of +/// values to read. Any of the prefix, separator, or postfix may be empty +/// strings. If `eat` is true and the parse was successful, `str` will be +/// updated in place to trim everything that was parsed, but if any part of +/// the parse failed, `str` will not be altered from its original state. +bool OIIO_UTIL_API +parse_values(string_view& str, string_view prefix, span values, + string_view sep = "", string_view postfix = "", + bool eat = true) noexcept; +/// parse_values for int. +bool OIIO_UTIL_API +parse_values(string_view& str, string_view prefix, span values, + string_view sep = "", string_view postfix = "", + bool eat = true) noexcept; + +/// Similar to parse_values, but with no option to "eat" from +/// or modify the source string. +inline bool +scan_values(string_view str, string_view prefix, span values, + string_view sep = "", string_view postfix = "") noexcept +{ + string_view sv(str); + return parse_values(sv, prefix, values, sep, postfix); +} + +inline bool +scan_values(string_view str, string_view prefix, span values, + string_view sep = "", string_view postfix = "") noexcept +{ + string_view sv(str); + return parse_values(sv, prefix, values, sep, postfix); +} + enum QuoteBehavior { DeleteQuotes, KeepQuotes }; /// If str's first non-whitespace characters form a valid string (either a /// single word separated by whitespace or anything inside a double-quoted diff --git a/src/libutil/strutil.cpp b/src/libutil/strutil.cpp index 8eb1b149e7..c93d54c1e8 100644 --- a/src/libutil/strutil.cpp +++ b/src/libutil/strutil.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -934,6 +935,48 @@ Strutil::parse_string(string_view& str, string_view& val, bool eat, +bool +Strutil::parse_values(string_view& str, string_view prefix, span values, + string_view sep, string_view postfix, bool eat) noexcept +{ + string_view p = str; + bool ok = true; + if (prefix.size()) + ok &= Strutil::parse_prefix(p, prefix); + for (size_t i = 0, sz = values.size(); i < sz && ok; ++i) { + ok &= Strutil::parse_value(p, values[i]); + if (ok && sep.size() && i < sz - 1) + ok &= Strutil::parse_prefix(p, sep); + } + if (ok && postfix.size()) + ok &= Strutil::parse_prefix(p, postfix); + if (ok && eat) + str = p; + return ok; +} + +bool +Strutil::parse_values(string_view& str, string_view prefix, span values, + string_view sep, string_view postfix, bool eat) noexcept +{ + string_view p = str; + bool ok = true; + if (prefix.size()) + ok &= Strutil::parse_prefix(p, prefix); + for (size_t i = 0, sz = values.size(); i < sz && ok; ++i) { + ok &= Strutil::parse_value(p, values[i]); + if (ok && sep.size() && i < sz - 1) + ok &= Strutil::parse_prefix(p, sep); + } + if (ok && postfix.size()) + ok &= Strutil::parse_prefix(p, postfix); + if (ok && eat) + str = p; + return ok; +} + + + string_view Strutil::parse_word(string_view& str, bool eat) noexcept { @@ -1504,6 +1547,20 @@ Strutil::stod(string_view s, size_t* pos) +unsigned int +Strutil::stoui(string_view s, size_t* pos, int base) +{ + // For conversion of string_view to unsigned int, fall back on strtoul. + char* endptr = nullptr; + std::string ss(s); + auto r = strtoul(ss.c_str(), &endptr, base); + if (pos) + *pos = size_t(endptr - ss.c_str()); + return static_cast(r); +} + + + bool Strutil::string_is_int(string_view s) { @@ -1530,4 +1587,22 @@ Strutil::string_is_float(string_view s) } + +bool +Strutil::scan_datetime(string_view str, int& year, int& month, int& day, + int& hour, int& min, int& sec) +{ + bool ok = parse_int(str, year) + && (parse_char(str, ':', false) || parse_char(str, '-', false) + || parse_char(str, '/', false)) + && parse_int(str, month) + && (parse_char(str, ':', false) || parse_char(str, '-', false) + || parse_char(str, '/', false)) + && parse_int(str, day) && parse_int(str, hour) + && parse_char(str, ':', false) && parse_int(str, min) + && parse_char(str, ':', false) && parse_int(str, sec); + return ok && month >= 1 && month <= 12 && day >= 1 && day <= 31 && hour >= 0 + && hour <= 23 && min >= 0 && min <= 59 && sec >= 0 && sec <= 59; +} + OIIO_NAMESPACE_END diff --git a/src/libutil/strutil_test.cpp b/src/libutil/strutil_test.cpp index b19f744e45..5df58dfb25 100644 --- a/src/libutil/strutil_test.cpp +++ b/src/libutil/strutil_test.cpp @@ -736,6 +736,15 @@ test_numeric_conversion() OIIO_CHECK_EQUAL(Strutil::stoi("-12345678901234567890"), std::numeric_limits::min()); + OIIO_CHECK_EQUAL(Strutil::stoui("hi"), 0); + OIIO_CHECK_EQUAL(Strutil::stoui(" "), 0); + OIIO_CHECK_EQUAL(Strutil::stoui("123"), 123); + OIIO_CHECK_EQUAL(Strutil::stoui("+123"), 123); + OIIO_CHECK_EQUAL(Strutil::stoui(" 123 "), 123); + OIIO_CHECK_EQUAL(Strutil::stoui("123.45"), 123); + // bigger than fits in an int, to be sure we're really using uint: + OIIO_CHECK_EQUAL(Strutil::stoui("3221225472"), 3221225472UL); + OIIO_CHECK_EQUAL(Strutil::stoi("hi", &pos), 0); OIIO_CHECK_EQUAL(pos, 0); OIIO_CHECK_EQUAL(Strutil::stoi(" ", &pos), 0); @@ -808,6 +817,7 @@ test_numeric_conversion() bench ("std atoi", [&](){ DoNotOptimize(atoi(numcstr));}); // NOLINT(cert-err34-c) bench ("Strutil::stoi(string) ", [&](){ return DoNotOptimize(Strutil::stoi(numstring)); }); bench ("Strutil::stoi(char*) ", [&](){ return DoNotOptimize(Strutil::stoi(numcstr)); }); + bench ("Strutil::stoui(char*) ", [&](){ return DoNotOptimize(Strutil::stoui(numcstr)); }); bench ("std atof", [&](){ DoNotOptimize(atof(numcstr));}); // NOLINT(cert-err34-c) bench ("std strtod", [&](){ DoNotOptimize(::strtod(numcstr, nullptr));}); bench ("Strutil::from_string", [&](){ DoNotOptimize(Strutil::from_string(numstring));}); @@ -1058,6 +1068,23 @@ void test_parse () s = " 42.1 abc"; OIIO_CHECK_ASSERT (parse_float (s, f) && f == 42.1f && s == " abc"); s = " 42.1 abc"; OIIO_CHECK_ASSERT (parse_float (s, f, false) && f == 42.1f && s == " 42.1 abc"); + { + string_view sv; + float xyz[3] = { 0, 0, 0 }; + sv = "xxx 1 2 3 4 5 6"; + OIIO_CHECK_ASSERT(parse_values(sv, "xxx", xyz, "", "4") + && xyz[0] == 1 && xyz[1] == 2 && xyz[2] == 3 + && sv == " 5 6"); + sv = "xxx 1 2 3 4 5 6"; + OIIO_CHECK_ASSERT(!parse_values(sv, "", xyz)); + sv = "xxx 1 2 3 4 5 6"; + OIIO_CHECK_ASSERT(!parse_values(sv, "xxx", xyz, ",")); + sv = "xxx 1, 2.5,3, 4, 5,6"; + OIIO_CHECK_ASSERT(parse_values(sv, "xxx", xyz, ",") + && xyz[0] == 1 && xyz[1] == 2.5 && xyz[2] == 3 + && sv == ", 4, 5,6"); + } + string_view ss; s = "foo bar"; OIIO_CHECK_ASSERT (parse_string (s, ss) && ss == "foo" && s == " bar"); @@ -1254,6 +1281,43 @@ test_string_compare_function() +void +test_datetime() +{ + using namespace Strutil; + int y = -1, m = -1, d = -1, h = -1, min = -1, s = -1; + + y = -1, m = -1, d = -1, h = -1, min = -1, s = -1; + OIIO_CHECK_ASSERT(scan_datetime("2020-05-01 12:34:21", y, m, d, h, min, s)); + OIIO_CHECK_ASSERT(y == 2020 && m == 5 && d == 1 && h == 12 && min == 34 && s == 21); + + y = -1, m = -1, d = -1, h = -1, min = -1, s = -1; + OIIO_CHECK_ASSERT(scan_datetime("2020/05/01 12:34:21", y, m, d, h, min, s)); + OIIO_CHECK_ASSERT(y == 2020 && m == 5 && d == 1 && h == 12 && min == 34 && s == 21); + + y = -1, m = -1, d = -1, h = -1, min = -1, s = -1; + OIIO_CHECK_ASSERT(scan_datetime("2020:05:01 12:34:21", y, m, d, h, min, s)); + OIIO_CHECK_ASSERT(y == 2020 && m == 5 && d == 1 && h == 12 && min == 34 && s == 21); + + y = -1, m = -1, d = -1, h = -1, min = -1, s = -1; + OIIO_CHECK_ASSERT(scan_datetime("2020:05:01 12:34:21", y, m, d, h, min, s)); + OIIO_CHECK_ASSERT(y == 2020 && m == 5 && d == 1 && h == 12 && min == 34 && s == 21); + + // No time + OIIO_CHECK_ASSERT(!scan_datetime("2020:05:01", y, m, d, h, min, s)); + // Out of range values + OIIO_CHECK_ASSERT(!scan_datetime("2020:00:01 12:34:21", y, m, d, h, min, s)); + OIIO_CHECK_ASSERT(!scan_datetime("2020:13:01 12:34:21", y, m, d, h, min, s)); + OIIO_CHECK_ASSERT(!scan_datetime("2020:05:00 12:34:21", y, m, d, h, min, s)); + OIIO_CHECK_ASSERT(!scan_datetime("2020:05:32 12:34:21", y, m, d, h, min, s)); + OIIO_CHECK_ASSERT(!scan_datetime("2020:05:01 24:34:21", y, m, d, h, min, s)); + OIIO_CHECK_ASSERT(!scan_datetime("2020:05:01 24:60:21", y, m, d, h, min, s)); + OIIO_CHECK_ASSERT(!scan_datetime("2020:05:01 12:34:60", y, m, d, h, min, s)); + OIIO_CHECK_ASSERT(!scan_datetime("2020:05:01 12:34:-1", y, m, d, h, min, s)); +} + + + int main(int /*argc*/, char* /*argv*/[]) { @@ -1283,6 +1347,7 @@ main(int /*argc*/, char* /*argv*/[]) test_locale(); // test_float_formatting (); test_string_compare_function(); + test_datetime(); return unit_test_failures; }