Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

protoc: fix consistency with parsing very large decimal numbers #10555

11 changes: 8 additions & 3 deletions src/google/protobuf/compiler/parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -310,9 +310,14 @@ bool Parser::ConsumeNumber(double* output, const char* error) {
std::numeric_limits<uint64_t>::max(),
&value)) {
*output = value;
} else {
// out of int range, treat as double literal
*output = io::Tokenizer::ParseFloat(input_->current().text);
} else if (input_->current().text[0] == '0') {
// octal or hexadecimal; don't bother parsing as float
AddError("Integer out of range.");
// We still return true because we did, in fact, parse a number.
Comment on lines +313 to +316
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I put this here, instead of just relying on the branch below, because I think an octal literal would likely be successfully parsed by ParseFloat, but incorrectly interpreted as decimal.

} else if (!io::Tokenizer::TryParseFloat(input_->current().text, output)) {
// out of int range, and not valid float? 🤷
AddError("Integer out of range.");
// We still return true because we did, in fact, parse a number.
}
input_->Next();
return true;
Expand Down
16 changes: 16 additions & 0 deletions src/google/protobuf/compiler/parser_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1941,6 +1941,22 @@ TEST_F(ParserValidationErrorTest, FieldDefaultValueError) {
"2:32: Enum type \"Baz\" has no value named \"NO_SUCH_VALUE\".\n");
}

TEST_F(ParserValidationErrorTest, FieldDefaultIntegerOutOfRange) {
ExpectHasErrors(
"message Foo {\n"
" optional double bar = 1 [default = 0x10000000000000000];\n"
"}\n",
"1:37: Integer out of range.\n");
}

TEST_F(ParserValidationErrorTest, FieldOptionOutOfRange) {
ExpectHasErrors(
"message Foo {\n"
" optional double bar = 1 [foo = 0x10000000000000000];\n"
"}\n",
"1:33: Integer out of range.\n");
}

TEST_F(ParserValidationErrorTest, FileOptionNameError) {
ExpectHasValidationErrors(
"option foo = 5;",
Expand Down
19 changes: 12 additions & 7 deletions src/google/protobuf/io/tokenizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1002,9 +1002,19 @@ bool Tokenizer::ParseInteger(const std::string& text, uint64_t max_value,
}

double Tokenizer::ParseFloat(const std::string& text) {
double result;
GOOGLE_LOG_IF(DFATAL,
!TryParseFloat(text, &result))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hate side effected in LOG statements, do you mind switching this to the more straight forward:

if (!TryParseFloat(...)) {
  LOG(DFATAL) << ...
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

<< " Tokenizer::ParseFloat() passed text that could not have been"
" tokenized as a float: "
<< absl::CEscape(text);
return result;
}

bool Tokenizer::TryParseFloat(const std::string& text, double* result) {
const char* start = text.c_str();
char* end;
double result = NoLocaleStrtod(start, &end);
*result = NoLocaleStrtod(start, &end);

// "1e" is not a valid float, but if the tokenizer reads it, it will
// report an error but still return it as a valid token. We need to
Expand All @@ -1020,12 +1030,7 @@ double Tokenizer::ParseFloat(const std::string& text) {
++end;
}

GOOGLE_LOG_IF(DFATAL,
static_cast<size_t>(end - start) != text.size() || *start == '-')
<< " Tokenizer::ParseFloat() passed text that could not have been"
" tokenized as a float: "
<< absl::CEscape(text);
return result;
return static_cast<size_t>(end - start) == text.size() && *start != '-';
}

// Helper to append a Unicode code point to a string as UTF8, without bringing
Expand Down
4 changes: 4 additions & 0 deletions src/google/protobuf/io/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ class PROTOBUF_EXPORT Tokenizer {
// result is undefined (possibly an assert failure).
static double ParseFloat(const std::string& text);

// Parses given text as if it were a TYPE_FLOAT token. Returns false if the
// given text is not actually a valid float literal.
static bool TryParseFloat(const std::string& text, double* result);

// Parses a TYPE_STRING token. This never fails, so long as the text actually
// comes from a TYPE_STRING token parsed by Tokenizer. If it doesn't, the
// result is undefined (possibly an assert failure).
Expand Down