Skip to content

Commit 092e447

Browse files
Make TDP accept and discard garbage non-continuation bits on the 10th byte of a varint.
This is the behavior of the codegen parser and the reflection parser. PiperOrigin-RevId: 504022814
1 parent 46656ed commit 092e447

5 files changed

+413
-178
lines changed

src/google/protobuf/generated_message_tctable_impl.h

+12-10
Original file line numberDiff line numberDiff line change
@@ -805,22 +805,24 @@ Parse64FallbackPair(const char* p, int64_t res1) {
805805
// correctly, so all we have to do is check that the expected case is true.
806806
if (PROTOBUF_PREDICT_TRUE(ptr[9] == 1)) goto done10;
807807

808-
// A value of 0, however, represents an over-serialized varint. This case
809-
// should not happen, but if does (say, due to a nonconforming serializer),
810-
// deassert the continuation bit that came from ptr[8].
811-
if (ptr[9] == 0) {
808+
if (PROTOBUF_PREDICT_FALSE(ptr[9] & 0x80)) {
809+
// If the continue bit is set, it is an unterminated varint.
810+
return {nullptr, 0};
811+
}
812+
813+
// A zero value of the first bit of the 10th byte represents an
814+
// over-serialized varint. This case should not happen, but if does (say, due
815+
// to a nonconforming serializer), deassert the continuation bit that came
816+
// from ptr[8].
817+
if ((ptr[9] & 1) == 0) {
812818
#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__)
813819
// Use a small instruction since this is an uncommon code path.
814820
asm("btcq $63,%0" : "+r"(res3));
815821
#else
816822
res3 ^= static_cast<uint64_t>(1) << 63;
817823
#endif
818-
goto done10;
819824
}
820-
821-
// If the 10th byte/ptr[9] itself has any other value, then it is too big to
822-
// fit in 64 bits. If the continue bit is set, it is an unterminated varint.
823-
return {nullptr, 0};
825+
goto done10;
824826

825827
done2:
826828
return {p + 2, res1 & res2};
@@ -963,7 +965,7 @@ PROTOBUF_NOINLINE const char* TcParser::FastTV32S1(PROTOBUF_TC_PARAM_DECL) {
963965
if (PROTOBUF_PREDICT_FALSE(ptr[6] & 0x80)) {
964966
if (PROTOBUF_PREDICT_FALSE(ptr[7] & 0x80)) {
965967
if (PROTOBUF_PREDICT_FALSE(ptr[8] & 0x80)) {
966-
if (ptr[9] & 0xFE) return Error(PROTOBUF_TC_PARAM_PASS);
968+
if (ptr[9] & 0x80) return Error(PROTOBUF_TC_PARAM_PASS);
967969
*out = RotateLeft(res, 28);
968970
ptr += 10;
969971
PROTOBUF_MUSTTAIL return ToTagDispatch(

src/google/protobuf/generated_message_tctable_lite.cc

+3-1
Original file line numberDiff line numberDiff line change
@@ -751,7 +751,9 @@ inline PROTOBUF_ALWAYS_INLINE const char* ParseVarint(const char* p,
751751
if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
752752
byte = (byte - 0x80) | *p++;
753753
if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
754-
byte = (byte - 0x80) | *p++;
754+
// We only care about the continuation bit and the first bit
755+
// of the 10th byte.
756+
byte = (byte - 0x80) | (*p++ & 0x81);
755757
if (PROTOBUF_PREDICT_FALSE(byte & 0x80)) {
756758
return nullptr;
757759
}

src/google/protobuf/generated_message_tctable_lite_test.cc

+125-100
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ TEST(FastVarints, NameHere) {
126126
uint8_t serialize_buffer[64];
127127

128128
for (int size : {8, 32, 64, -8, -32, -64}) {
129+
SCOPED_TRACE(size);
129130
auto next_i = [](uint64_t i) {
130131
// if i + 1 is a power of two, return that.
131132
// (This will also match when i == -1, but for this loop we know that will
@@ -136,28 +137,48 @@ TEST(FastVarints, NameHere) {
136137
return i + (i - 1);
137138
};
138139
for (uint64_t i = 0; i + 1 != 0; i = next_i(i)) {
139-
char fake_msg[64] = {
140-
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
141-
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
142-
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
143-
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
144-
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
145-
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
146-
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
147-
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
148-
};
149-
memset(&fake_msg[kHasBitsOffset], 0, sizeof(uint32_t));
150-
151-
auto serialize_ptr = WireFormatLite::WriteUInt64ToArray(
152-
/* field_number= */ 1, i, serialize_buffer);
153-
absl::string_view serialized{
154-
reinterpret_cast<char*>(&serialize_buffer[0]),
155-
static_cast<size_t>(serialize_ptr - serialize_buffer)};
156-
157-
const char* ptr = nullptr;
158-
const char* end_ptr = nullptr;
159-
ParseContext ctx(io::CodedInputStream::GetDefaultRecursionLimit(),
160-
/* aliasing= */ false, &ptr, serialized);
140+
SCOPED_TRACE(i);
141+
enum OverlongKind { kNotOverlong, kOverlong, kOverlongWithDroppedBits };
142+
for (OverlongKind overlong :
143+
{kNotOverlong, kOverlong, kOverlongWithDroppedBits}) {
144+
SCOPED_TRACE(overlong);
145+
alignas(16) char fake_msg[64] = {
146+
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
147+
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
148+
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
149+
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
150+
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
151+
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
152+
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
153+
kDND, kDND, kDND, kDND, kDND, kDND, kDND, kDND, //
154+
};
155+
memset(&fake_msg[kHasBitsOffset], 0, sizeof(uint32_t));
156+
157+
auto serialize_ptr = WireFormatLite::WriteUInt64ToArray(
158+
/* field_number= */ 1, i, serialize_buffer);
159+
160+
if (overlong == kOverlong || overlong == kOverlongWithDroppedBits) {
161+
// 1 for the tag plus 10 for the value
162+
while (serialize_ptr - serialize_buffer < 11) {
163+
serialize_ptr[-1] |= 0x80;
164+
*serialize_ptr++ = 0;
165+
}
166+
if (overlong == kOverlongWithDroppedBits) {
167+
// For this one we add some unused bits to the last byte.
168+
// They should be dropped. Bits 1-6 are dropped. Bit 0 is used and
169+
// bit 7 is checked for continuation.
170+
serialize_ptr[-1] |= 0b0111'1110;
171+
}
172+
}
173+
174+
absl::string_view serialized{
175+
reinterpret_cast<char*>(&serialize_buffer[0]),
176+
static_cast<size_t>(serialize_ptr - serialize_buffer)};
177+
178+
const char* ptr = nullptr;
179+
const char* end_ptr = nullptr;
180+
ParseContext ctx(io::CodedInputStream::GetDefaultRecursionLimit(),
181+
/* aliasing= */ false, &ptr, serialized);
161182
#if 0 // FOR_DEBUGGING
162183
GOOGLE_ABSL_LOG(ERROR) << "size=" << size << " i=" << i << " ptr points to " //
163184
<< +ptr[0] << "," << +ptr[1] << "," //
@@ -166,84 +187,88 @@ TEST(FastVarints, NameHere) {
166187
<< +ptr[6] << "," << +ptr[7] << "," //
167188
<< +ptr[8] << "," << +ptr[9] << "," << +ptr[10] << "\n";
168189
#endif
169-
TailCallParseFunc fn = nullptr;
170-
switch (size) {
171-
case 8:
172-
fn = &TcParser::FastV8S1;
173-
break;
174-
case -8:
175-
fn = &TcParser::FastTV8S1<kFieldOffset, kHasBitIndex>;
176-
break;
177-
case 32:
178-
fn = &TcParser::FastV32S1;
179-
break;
180-
case -32:
181-
fn = &TcParser::FastTV32S1<uint32_t, kFieldOffset, kHasBitIndex>;
182-
break;
183-
case 64:
184-
fn = &TcParser::FastV64S1;
185-
break;
186-
case -64:
187-
fn = &TcParser::FastTV64S1<uint64_t, kFieldOffset, kHasBitIndex>;
188-
break;
189-
}
190-
fallback_ptr_received = absl::nullopt;
191-
fallback_hasbits_received = absl::nullopt;
192-
fallback_tag_received = absl::nullopt;
193-
end_ptr = fn(reinterpret_cast<MessageLite*>(fake_msg), ptr, &ctx,
194-
Xor2SerializedBytes(parse_table.fast_entries[0].bits, ptr),
195-
&parse_table.header, /*hasbits=*/0);
196-
switch (size) {
197-
case -8:
198-
case 8: {
199-
if (end_ptr == nullptr) {
200-
// If end_ptr is nullptr, that means the FastParser gave up and
201-
// tried to pass control to MiniParse.... which is expected anytime
202-
// we encounter something other than 0 or 1 encodings. (Since
203-
// FastV8S1 is only used for `bool` fields.)
204-
EXPECT_NE(i, true);
205-
EXPECT_NE(i, false);
206-
EXPECT_THAT(fallback_hasbits_received, Optional(0));
207-
// Like the mini-parser functions, and unlike the fast-parser
208-
// functions, the fallback receives a ptr already incremented past
209-
// the tag, and receives the actual tag in the `data` parameter.
210-
EXPECT_THAT(fallback_ptr_received, Optional(ptr + 1));
211-
EXPECT_THAT(fallback_tag_received, Optional(0x7F & *ptr));
212-
continue;
213-
}
214-
ASSERT_EQ(end_ptr - ptr, serialized.size());
215-
216-
auto actual_field = ReadAndReset<uint8_t>(&fake_msg[kFieldOffset]);
217-
EXPECT_EQ(actual_field, static_cast<decltype(actual_field)>(i)) //
218-
<< " hex: " << absl::StrCat(absl::Hex(actual_field));
219-
}; break;
220-
case -32:
221-
case 32: {
222-
ASSERT_EQ(end_ptr - ptr, serialized.size());
223-
224-
auto actual_field = ReadAndReset<uint32_t>(&fake_msg[kFieldOffset]);
225-
EXPECT_EQ(actual_field, static_cast<decltype(actual_field)>(i)) //
226-
<< " hex: " << absl::StrCat(absl::Hex(actual_field));
227-
}; break;
228-
case -64:
229-
case 64: {
230-
ASSERT_EQ(end_ptr - ptr, serialized.size());
231-
232-
auto actual_field = ReadAndReset<uint64_t>(&fake_msg[kFieldOffset]);
233-
EXPECT_EQ(actual_field, static_cast<decltype(actual_field)>(i)) //
234-
<< " hex: " << absl::StrCat(absl::Hex(actual_field));
235-
}; break;
236-
}
237-
EXPECT_TRUE(!fallback_ptr_received);
238-
EXPECT_TRUE(!fallback_hasbits_received);
239-
EXPECT_TRUE(!fallback_tag_received);
240-
auto hasbits = ReadAndReset<uint32_t>(&fake_msg[kHasBitsOffset]);
241-
EXPECT_EQ(hasbits, 1 << kHasBitIndex);
242-
243-
int offset = 0;
244-
for (char ch : fake_msg) {
245-
EXPECT_EQ(ch, kDND) << " corruption of message at offset " << offset;
246-
++offset;
190+
TailCallParseFunc fn = nullptr;
191+
switch (size) {
192+
case 8:
193+
fn = &TcParser::FastV8S1;
194+
break;
195+
case -8:
196+
fn = &TcParser::FastTV8S1<kFieldOffset, kHasBitIndex>;
197+
break;
198+
case 32:
199+
fn = &TcParser::FastV32S1;
200+
break;
201+
case -32:
202+
fn = &TcParser::FastTV32S1<uint32_t, kFieldOffset, kHasBitIndex>;
203+
break;
204+
case 64:
205+
fn = &TcParser::FastV64S1;
206+
break;
207+
case -64:
208+
fn = &TcParser::FastTV64S1<uint64_t, kFieldOffset, kHasBitIndex>;
209+
break;
210+
}
211+
fallback_ptr_received = absl::nullopt;
212+
fallback_hasbits_received = absl::nullopt;
213+
fallback_tag_received = absl::nullopt;
214+
end_ptr = fn(reinterpret_cast<MessageLite*>(fake_msg), ptr, &ctx,
215+
Xor2SerializedBytes(parse_table.fast_entries[0].bits, ptr),
216+
&parse_table.header, /*hasbits=*/0);
217+
switch (size) {
218+
case -8:
219+
case 8: {
220+
if (end_ptr == nullptr) {
221+
// If end_ptr is nullptr, that means the FastParser gave up and
222+
// tried to pass control to MiniParse.... which is expected
223+
// anytime we encounter something other than 0 or 1 encodings.
224+
// (Since FastV8S1 is only used for `bool` fields.)
225+
if (overlong == kNotOverlong) {
226+
EXPECT_NE(i, true);
227+
EXPECT_NE(i, false);
228+
}
229+
EXPECT_THAT(fallback_hasbits_received, Optional(0));
230+
// Like the mini-parser functions, and unlike the fast-parser
231+
// functions, the fallback receives a ptr already incremented past
232+
// the tag, and receives the actual tag in the `data` parameter.
233+
EXPECT_THAT(fallback_ptr_received, Optional(ptr + 1));
234+
EXPECT_THAT(fallback_tag_received, Optional(0x7F & *ptr));
235+
continue;
236+
}
237+
ASSERT_EQ(end_ptr - ptr, serialized.size());
238+
239+
auto actual_field = ReadAndReset<uint8_t>(&fake_msg[kFieldOffset]);
240+
EXPECT_EQ(actual_field, static_cast<decltype(actual_field)>(i)) //
241+
<< " hex: " << absl::StrCat(absl::Hex(actual_field));
242+
}; break;
243+
case -32:
244+
case 32: {
245+
ASSERT_TRUE(end_ptr);
246+
ASSERT_EQ(end_ptr - ptr, serialized.size());
247+
248+
auto actual_field = ReadAndReset<uint32_t>(&fake_msg[kFieldOffset]);
249+
EXPECT_EQ(actual_field, static_cast<decltype(actual_field)>(i)) //
250+
<< " hex: " << absl::StrCat(absl::Hex(actual_field));
251+
}; break;
252+
case -64:
253+
case 64: {
254+
ASSERT_EQ(end_ptr - ptr, serialized.size());
255+
256+
auto actual_field = ReadAndReset<uint64_t>(&fake_msg[kFieldOffset]);
257+
EXPECT_EQ(actual_field, static_cast<decltype(actual_field)>(i)) //
258+
<< " hex: " << absl::StrCat(absl::Hex(actual_field));
259+
}; break;
260+
}
261+
EXPECT_TRUE(!fallback_ptr_received);
262+
EXPECT_TRUE(!fallback_hasbits_received);
263+
EXPECT_TRUE(!fallback_tag_received);
264+
auto hasbits = ReadAndReset<uint32_t>(&fake_msg[kHasBitsOffset]);
265+
EXPECT_EQ(hasbits, 1 << kHasBitIndex);
266+
267+
int offset = 0;
268+
for (char ch : fake_msg) {
269+
EXPECT_EQ(ch, kDND) << " corruption of message at offset " << offset;
270+
++offset;
271+
}
247272
}
248273
}
249274
}

0 commit comments

Comments
 (0)