Skip to content

Commit

Permalink
Strutil: add new utilities for parsing values out of strings (#3173)
Browse files Browse the repository at this point in the history
* `parse_values()` and `scan_values()` both can parse a series of
  values in a string, with optional prefix, suffix, and separator. The
  scan versions just figure out the values, the parse versions
  optionally can update a string_view& like the other parsing
  functions in Strutil.

* `scan_datetime` parses the int components of a string that looks like
  "YYYY:MM:DD hh:mm:ss" (and a few minor variants).

* Fix stoui -- it was previously casting stoi results, which is not
  correct.

* Add `from_string<>` specialties for `double`, `int64_t`, `uint64_t`.
  • Loading branch information
lgritz committed Nov 27, 2021
1 parent f5bc3fc commit 5ec866a
Show file tree
Hide file tree
Showing 3 changed files with 222 additions and 3 deletions.
85 changes: 82 additions & 3 deletions src/include/OpenImageIO/strutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -547,9 +547,7 @@ OIIO_UTIL_API int stoi (string_view s, size_t* pos=0, int base=10);
// stoui() returns the unsigned int conversion of text from a string.
// No exceptions or errors -- parsing errors just return 0. Negative
// values are cast, overflow is clamped. No locale considerations.
inline unsigned int stoui (string_view s, size_t* pos=0, int base=10) {
return static_cast<unsigned int>(stoi (s, pos, base));
}
OIIO_UTIL_API unsigned int stoui (string_view s, size_t* pos=0, int base=10);

/// stof() returns the float conversion of text from several string types.
/// No exceptions or errors -- parsing errors just return 0.0. These always
Expand Down Expand Up @@ -588,6 +586,8 @@ template<typename T>
inline T from_string (string_view s) {
return T(s); // Generic: assume there is an explicit converter
}

#ifndef OIIO_DOXYGEN
// Special case for int
template<> inline int from_string<int> (string_view s) {
return Strutil::stoi(s);
Expand All @@ -601,6 +601,24 @@ template<> inline unsigned int from_string<unsigned int> (string_view s) {
template<> inline float from_string<float> (string_view s) {
return Strutil::stof(s);
}
// Special case for double -- note that by using Strutil::strtof, this
// always treats '.' as the decimal mark.
template<> inline double from_string<double> (string_view s) {
return Strutil::stod(s);
}

template<> inline int64_t from_string<int64_t>(string_view s) {
// For conversion of string_view to unsigned int, fall back on strtoll.
auto r = strtoll(std::string(s).c_str(), nullptr, 10);
return static_cast<int64_t>(r);
}

template<> inline uint64_t from_string<uint64_t>(string_view s) {
// For conversion of string_view to unsigned int, fall back on strtoull.
auto r = strtoull(std::string(s).c_str(), nullptr, 10);
return static_cast<uint64_t>(r);
}
#endif



Expand Down Expand Up @@ -712,6 +730,20 @@ extract_from_list_string (string_view list, size_t nvals=0, T val=T(),



/// Scan a string for date and time information. Return true upon success,
/// false if the string did not appear to contain a valid date/time. If, after
/// parsing a valid date/time (including out of range values), `str` contains
/// more characters after that, it is not considered a failure.
///
/// Valid date/time formats include:
/// * YYYY-MM-DD HH:MM:SS
/// * YYYY:MM:DD HH:MM:SS
/// * YYYY/MM/DD HH:MM:SS
OIIO_UTIL_API bool
scan_datetime(string_view str, int& year, int& month, int& day,
int& hour, int& min, int& sec);



/// C++ functor wrapper class for using strhash for unordered_map or
/// unordered_set. The way this is used, in conjunction with
Expand Down Expand Up @@ -858,6 +890,53 @@ bool OIIO_UTIL_API parse_int (string_view &str, int &val, bool eat=true) noexcep
/// str.
bool OIIO_UTIL_API parse_float (string_view &str, float &val, bool eat=true) noexcept;

/// Synonym for parse_int
inline bool parse_value(string_view &str, float &val, bool eat=true) noexcept
{
return parse_float(str, val, eat);
}

/// Synonym for parse_float
inline bool parse_value(string_view &str, int &val, bool eat=true) noexcept
{
return parse_int(str, val, eat);
}

/// Parse from `str`: a `prefix`, a series of int values separated by the
/// `sep` string, and a `postfix`, placing the values in the elements of
/// mutable span `values`, where the span length indicates the number of
/// values to read. Any of the prefix, separator, or postfix may be empty
/// strings. If `eat` is true and the parse was successful, `str` will be
/// updated in place to trim everything that was parsed, but if any part of
/// the parse failed, `str` will not be altered from its original state.
bool OIIO_UTIL_API
parse_values(string_view& str, string_view prefix, span<int> values,
string_view sep = "", string_view postfix = "",
bool eat = true) noexcept;
/// parse_values for int.
bool OIIO_UTIL_API
parse_values(string_view& str, string_view prefix, span<float> values,
string_view sep = "", string_view postfix = "",
bool eat = true) noexcept;

/// Similar to parse_values, but with no option to "eat" from
/// or modify the source string.
inline bool
scan_values(string_view str, string_view prefix, span<int> values,
string_view sep = "", string_view postfix = "") noexcept
{
string_view sv(str);
return parse_values(sv, prefix, values, sep, postfix);
}

inline bool
scan_values(string_view str, string_view prefix, span<float> values,
string_view sep = "", string_view postfix = "") noexcept
{
string_view sv(str);
return parse_values(sv, prefix, values, sep, postfix);
}

enum QuoteBehavior { DeleteQuotes, KeepQuotes };
/// If str's first non-whitespace characters form a valid string (either a
/// single word separated by whitespace or anything inside a double-quoted
Expand Down
75 changes: 75 additions & 0 deletions src/libutil/strutil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <cstdarg>
#include <cstdint>
#include <cstdlib>
#include <ctime>
#include <iostream>
#include <limits>
#include <locale.h>
Expand Down Expand Up @@ -934,6 +935,48 @@ Strutil::parse_string(string_view& str, string_view& val, bool eat,



bool
Strutil::parse_values(string_view& str, string_view prefix, span<int> values,
string_view sep, string_view postfix, bool eat) noexcept
{
string_view p = str;
bool ok = true;
if (prefix.size())
ok &= Strutil::parse_prefix(p, prefix);
for (size_t i = 0, sz = values.size(); i < sz && ok; ++i) {
ok &= Strutil::parse_value(p, values[i]);
if (ok && sep.size() && i < sz - 1)
ok &= Strutil::parse_prefix(p, sep);
}
if (ok && postfix.size())
ok &= Strutil::parse_prefix(p, postfix);
if (ok && eat)
str = p;
return ok;
}

bool
Strutil::parse_values(string_view& str, string_view prefix, span<float> values,
string_view sep, string_view postfix, bool eat) noexcept
{
string_view p = str;
bool ok = true;
if (prefix.size())
ok &= Strutil::parse_prefix(p, prefix);
for (size_t i = 0, sz = values.size(); i < sz && ok; ++i) {
ok &= Strutil::parse_value(p, values[i]);
if (ok && sep.size() && i < sz - 1)
ok &= Strutil::parse_prefix(p, sep);
}
if (ok && postfix.size())
ok &= Strutil::parse_prefix(p, postfix);
if (ok && eat)
str = p;
return ok;
}



string_view
Strutil::parse_word(string_view& str, bool eat) noexcept
{
Expand Down Expand Up @@ -1504,6 +1547,20 @@ Strutil::stod(string_view s, size_t* pos)



unsigned int
Strutil::stoui(string_view s, size_t* pos, int base)
{
// For conversion of string_view to unsigned int, fall back on strtoul.
char* endptr = nullptr;
std::string ss(s);
auto r = strtoul(ss.c_str(), &endptr, base);
if (pos)
*pos = size_t(endptr - ss.c_str());
return static_cast<unsigned int>(r);
}



bool
Strutil::string_is_int(string_view s)
{
Expand All @@ -1530,4 +1587,22 @@ Strutil::string_is_float(string_view s)
}



bool
Strutil::scan_datetime(string_view str, int& year, int& month, int& day,
int& hour, int& min, int& sec)
{
bool ok = parse_int(str, year)
&& (parse_char(str, ':', false) || parse_char(str, '-', false)
|| parse_char(str, '/', false))
&& parse_int(str, month)
&& (parse_char(str, ':', false) || parse_char(str, '-', false)
|| parse_char(str, '/', false))
&& parse_int(str, day) && parse_int(str, hour)
&& parse_char(str, ':', false) && parse_int(str, min)
&& parse_char(str, ':', false) && parse_int(str, sec);
return ok && month >= 1 && month <= 12 && day >= 1 && day <= 31 && hour >= 0
&& hour <= 23 && min >= 0 && min <= 59 && sec >= 0 && sec <= 59;
}

OIIO_NAMESPACE_END
65 changes: 65 additions & 0 deletions src/libutil/strutil_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,15 @@ test_numeric_conversion()
OIIO_CHECK_EQUAL(Strutil::stoi("-12345678901234567890"),
std::numeric_limits<int>::min());

OIIO_CHECK_EQUAL(Strutil::stoui("hi"), 0);
OIIO_CHECK_EQUAL(Strutil::stoui(" "), 0);
OIIO_CHECK_EQUAL(Strutil::stoui("123"), 123);
OIIO_CHECK_EQUAL(Strutil::stoui("+123"), 123);
OIIO_CHECK_EQUAL(Strutil::stoui(" 123 "), 123);
OIIO_CHECK_EQUAL(Strutil::stoui("123.45"), 123);
// bigger than fits in an int, to be sure we're really using uint:
OIIO_CHECK_EQUAL(Strutil::stoui("3221225472"), 3221225472UL);

OIIO_CHECK_EQUAL(Strutil::stoi("hi", &pos), 0);
OIIO_CHECK_EQUAL(pos, 0);
OIIO_CHECK_EQUAL(Strutil::stoi(" ", &pos), 0);
Expand Down Expand Up @@ -808,6 +817,7 @@ test_numeric_conversion()
bench ("std atoi", [&](){ DoNotOptimize(atoi(numcstr));}); // NOLINT(cert-err34-c)
bench ("Strutil::stoi(string) ", [&](){ return DoNotOptimize(Strutil::stoi(numstring)); });
bench ("Strutil::stoi(char*) ", [&](){ return DoNotOptimize(Strutil::stoi(numcstr)); });
bench ("Strutil::stoui(char*) ", [&](){ return DoNotOptimize(Strutil::stoui(numcstr)); });
bench ("std atof", [&](){ DoNotOptimize(atof(numcstr));}); // NOLINT(cert-err34-c)
bench ("std strtod", [&](){ DoNotOptimize(::strtod(numcstr, nullptr));});
bench ("Strutil::from_string<float>", [&](){ DoNotOptimize(Strutil::from_string<float>(numstring));});
Expand Down Expand Up @@ -1058,6 +1068,23 @@ void test_parse ()
s = " 42.1 abc"; OIIO_CHECK_ASSERT (parse_float (s, f) && f == 42.1f && s == " abc");
s = " 42.1 abc"; OIIO_CHECK_ASSERT (parse_float (s, f, false) && f == 42.1f && s == " 42.1 abc");

{
string_view sv;
float xyz[3] = { 0, 0, 0 };
sv = "xxx 1 2 3 4 5 6";
OIIO_CHECK_ASSERT(parse_values(sv, "xxx", xyz, "", "4")
&& xyz[0] == 1 && xyz[1] == 2 && xyz[2] == 3
&& sv == " 5 6");
sv = "xxx 1 2 3 4 5 6";
OIIO_CHECK_ASSERT(!parse_values(sv, "", xyz));
sv = "xxx 1 2 3 4 5 6";
OIIO_CHECK_ASSERT(!parse_values(sv, "xxx", xyz, ","));
sv = "xxx 1, 2.5,3, 4, 5,6";
OIIO_CHECK_ASSERT(parse_values(sv, "xxx", xyz, ",")
&& xyz[0] == 1 && xyz[1] == 2.5 && xyz[2] == 3
&& sv == ", 4, 5,6");
}

string_view ss;
s = "foo bar";
OIIO_CHECK_ASSERT (parse_string (s, ss) && ss == "foo" && s == " bar");
Expand Down Expand Up @@ -1254,6 +1281,43 @@ test_string_compare_function()



void
test_datetime()
{
using namespace Strutil;
int y = -1, m = -1, d = -1, h = -1, min = -1, s = -1;

y = -1, m = -1, d = -1, h = -1, min = -1, s = -1;
OIIO_CHECK_ASSERT(scan_datetime("2020-05-01 12:34:21", y, m, d, h, min, s));
OIIO_CHECK_ASSERT(y == 2020 && m == 5 && d == 1 && h == 12 && min == 34 && s == 21);

y = -1, m = -1, d = -1, h = -1, min = -1, s = -1;
OIIO_CHECK_ASSERT(scan_datetime("2020/05/01 12:34:21", y, m, d, h, min, s));
OIIO_CHECK_ASSERT(y == 2020 && m == 5 && d == 1 && h == 12 && min == 34 && s == 21);

y = -1, m = -1, d = -1, h = -1, min = -1, s = -1;
OIIO_CHECK_ASSERT(scan_datetime("2020:05:01 12:34:21", y, m, d, h, min, s));
OIIO_CHECK_ASSERT(y == 2020 && m == 5 && d == 1 && h == 12 && min == 34 && s == 21);

y = -1, m = -1, d = -1, h = -1, min = -1, s = -1;
OIIO_CHECK_ASSERT(scan_datetime("2020:05:01 12:34:21", y, m, d, h, min, s));
OIIO_CHECK_ASSERT(y == 2020 && m == 5 && d == 1 && h == 12 && min == 34 && s == 21);

// No time
OIIO_CHECK_ASSERT(!scan_datetime("2020:05:01", y, m, d, h, min, s));
// Out of range values
OIIO_CHECK_ASSERT(!scan_datetime("2020:00:01 12:34:21", y, m, d, h, min, s));
OIIO_CHECK_ASSERT(!scan_datetime("2020:13:01 12:34:21", y, m, d, h, min, s));
OIIO_CHECK_ASSERT(!scan_datetime("2020:05:00 12:34:21", y, m, d, h, min, s));
OIIO_CHECK_ASSERT(!scan_datetime("2020:05:32 12:34:21", y, m, d, h, min, s));
OIIO_CHECK_ASSERT(!scan_datetime("2020:05:01 24:34:21", y, m, d, h, min, s));
OIIO_CHECK_ASSERT(!scan_datetime("2020:05:01 24:60:21", y, m, d, h, min, s));
OIIO_CHECK_ASSERT(!scan_datetime("2020:05:01 12:34:60", y, m, d, h, min, s));
OIIO_CHECK_ASSERT(!scan_datetime("2020:05:01 12:34:-1", y, m, d, h, min, s));
}



int
main(int /*argc*/, char* /*argv*/[])
{
Expand Down Expand Up @@ -1283,6 +1347,7 @@ main(int /*argc*/, char* /*argv*/[])
test_locale();
// test_float_formatting ();
test_string_compare_function();
test_datetime();

return unit_test_failures;
}

0 comments on commit 5ec866a

Please sign in to comment.