Skip to content

Commit

Permalink
Allow full range for var-int encoded integers
Browse files Browse the repository at this point in the history
  • Loading branch information
yokofly committed Jan 8, 2024
1 parent ea30ae1 commit e4625c9
Show file tree
Hide file tree
Showing 6 changed files with 201 additions and 146 deletions.
16 changes: 16 additions & 0 deletions src/IO/VarInt.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#include <IO/VarInt.h>
#include <Common/Exception.h>

namespace DB
{
namespace ErrorCodes
{
extern const int ATTEMPT_TO_READ_AFTER_EOF;
}

void throwReadAfterEOF()
{
throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after eof");
}

}
241 changes: 98 additions & 143 deletions src/IO/VarInt.h
Original file line number Diff line number Diff line change
@@ -1,138 +1,97 @@
#pragma once

#include <iostream>
#include <base/types.h>
#include <IO/ReadBuffer.h>
#include <IO/WriteBuffer.h>


namespace DB
{
namespace ErrorCodes
{
extern const int ATTEMPT_TO_READ_AFTER_EOF;
}


/** Write UInt64 in variable length format (base128) NOTE Only up to 2^63 - 1 are supported. */
void writeVarUInt(UInt64 x, std::ostream & ostr);
void writeVarUInt(UInt64 x, WriteBuffer & ostr);
char * writeVarUInt(UInt64 x, char * ostr);
/// Variable-Length Quantity (VLQ) Base-128 compression, also known as Variable Byte (VB) or Varint encoding.

[[noreturn]] void throwReadAfterEOF();

/** Read UInt64, written in variable length format (base128) */
void readVarUInt(UInt64 & x, std::istream & istr);
void readVarUInt(UInt64 & x, ReadBuffer & istr);
const char * readVarUInt(UInt64 & x, const char * istr, size_t size);

inline void writeVarUInt(UInt64 x, WriteBuffer & ostr)
{
while (x > 0x7F)
{
uint8_t byte = 0x80 | (x & 0x7F);

/** Get the length of UInt64 in VarUInt format */
size_t getLengthOfVarUInt(UInt64 x);
ostr.nextIfAtEnd();
*ostr.position() = byte;
++ostr.position();

/** Get the Int64 length in VarInt format */
size_t getLengthOfVarInt(Int64 x);
x >>= 7;
}

uint8_t final_byte = static_cast<uint8_t>(x);

/** Write Int64 in variable length format (base128) */
template <typename OUT>
inline void writeVarInt(Int64 x, OUT & ostr)
{
writeVarUInt(static_cast<UInt64>((x << 1) ^ (x >> 63)), ostr);
ostr.nextIfAtEnd();
*ostr.position() = final_byte;
++ostr.position();
}

inline char * writeVarInt(Int64 x, char * ostr)
inline void writeVarUInt(UInt64 x, std::ostream & ostr)
{
return writeVarUInt(static_cast<UInt64>((x << 1) ^ (x >> 63)), ostr);
}
while (x > 0x7F)
{
uint8_t byte = 0x80 | (x & 0x7F);
ostr.put(byte);

x >>= 7;
}

/** Read Int64, written in variable length format (base128) */
template <typename IN>
inline void readVarInt(Int64 & x, IN & istr)
{
readVarUInt(*reinterpret_cast<UInt64*>(&x), istr);
x = (static_cast<UInt64>(x) >> 1) ^ -(x & 1);
uint8_t final_byte = static_cast<uint8_t>(x);
ostr.put(final_byte);
}

inline const char * readVarInt(Int64 & x, const char * istr, size_t size)
inline char * writeVarUInt(UInt64 x, char * ostr)
{
const char * res = readVarUInt(*reinterpret_cast<UInt64*>(&x), istr, size);
x = (static_cast<UInt64>(x) >> 1) ^ -(x & 1);
return res;
}


inline void writeVarT(UInt64 x, std::ostream & ostr) { writeVarUInt(x, ostr); }
inline void writeVarT(Int64 x, std::ostream & ostr) { writeVarInt(x, ostr); }
inline void writeVarT(UInt64 x, WriteBuffer & ostr) { writeVarUInt(x, ostr); }
inline void writeVarT(Int64 x, WriteBuffer & ostr) { writeVarInt(x, ostr); }
inline char * writeVarT(UInt64 x, char * & ostr) { return writeVarUInt(x, ostr); }
inline char * writeVarT(Int64 x, char * & ostr) { return writeVarInt(x, ostr); }
while (x > 0x7F)
{
uint8_t byte = 0x80 | (x & 0x7F);

inline void readVarT(UInt64 & x, std::istream & istr) { readVarUInt(x, istr); }
inline void readVarT(Int64 & x, std::istream & istr) { readVarInt(x, istr); }
inline void readVarT(UInt64 & x, ReadBuffer & istr) { readVarUInt(x, istr); }
inline void readVarT(Int64 & x, ReadBuffer & istr) { readVarInt(x, istr); }
inline const char * readVarT(UInt64 & x, const char * istr, size_t size) { return readVarUInt(x, istr, size); }
inline const char * readVarT(Int64 & x, const char * istr, size_t size) { return readVarInt(x, istr, size); }
*ostr = byte;
++ostr;

x >>= 7;
}

/// For [U]Int32, [U]Int16, size_t.
uint8_t final_byte = static_cast<uint8_t>(x);

inline void readVarUInt(UInt32 & x, ReadBuffer & istr)
{
UInt64 tmp;
readVarUInt(tmp, istr);
x = static_cast<UInt32>(tmp);
}
*ostr = final_byte;
++ostr;

inline void readVarInt(Int32 & x, ReadBuffer & istr)
{
Int64 tmp;
readVarInt(tmp, istr);
x = static_cast<Int32>(tmp);
return ostr;
}

inline void readVarUInt(UInt16 & x, ReadBuffer & istr)
template <typename Out>
inline void writeVarInt(Int64 x, Out & ostr)
{
UInt64 tmp;
readVarUInt(tmp, istr);
x = tmp;
writeVarUInt(static_cast<UInt64>((x << 1) ^ (x >> 63)), ostr);
}

inline void readVarInt(Int16 & x, ReadBuffer & istr)
inline char * writeVarInt(Int64 x, char * ostr)
{
Int64 tmp;
readVarInt(tmp, istr);
x = tmp;
return writeVarUInt(static_cast<UInt64>((x << 1) ^ (x >> 63)), ostr);
}

template <typename T>
requires (!std::is_same_v<T, UInt64>)
inline void readVarUInt(T & x, ReadBuffer & istr)
namespace impl
{
UInt64 tmp;
readVarUInt(tmp, istr);
x = tmp;
}


[[noreturn]] inline void throwReadAfterEOF()
{
throw Exception("Attempt to read after eof", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);
}

template <bool fast>
inline void readVarUIntImpl(UInt64 & x, ReadBuffer & istr)
template <bool check_eof>
inline void readVarUInt(UInt64 & x, ReadBuffer & istr)
{
x = 0;
for (size_t i = 0; i < 9; ++i)
for (size_t i = 0; i < 10; ++i)
{
if constexpr (!fast)
if (istr.eof())
if constexpr (check_eof)
if (istr.eof()) [[unlikely]]
throwReadAfterEOF();

UInt64 byte = *istr.position(); /// NOLINT
UInt64 byte = *istr.position();
++istr.position();
x |= (byte & 0x7F) << (7 * i);

Expand All @@ -141,18 +100,19 @@ inline void readVarUIntImpl(UInt64 & x, ReadBuffer & istr)
}
}

}

inline void readVarUInt(UInt64 & x, ReadBuffer & istr)
{
if (istr.buffer().end() - istr.position() >= 9)
return readVarUIntImpl<true>(x, istr);
return readVarUIntImpl<false>(x, istr);
if (istr.buffer().end() - istr.position() >= 10)
return impl::readVarUInt<false>(x, istr);
return impl::readVarUInt<true>(x, istr);
}


inline void readVarUInt(UInt64 & x, std::istream & istr)
{
x = 0;
for (size_t i = 0; i < 9; ++i)
for (size_t i = 0; i < 10; ++i)
{
UInt64 byte = istr.get();
x |= (byte & 0x7F) << (7 * i);
Expand All @@ -167,12 +127,12 @@ inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size)
const char * end = istr + size;

x = 0;
for (size_t i = 0; i < 9; ++i)
for (size_t i = 0; i < 10; ++i)
{
if (istr == end)
if (istr == end) [[unlikely]]
throwReadAfterEOF();

UInt64 byte = *istr; /// NOLINT
UInt64 byte = *istr;
++istr;
x |= (byte & 0x7F) << (7 * i);

Expand All @@ -183,62 +143,56 @@ inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size)
return istr;
}


inline void writeVarUInt(UInt64 x, WriteBuffer & ostr)
template <typename In>
inline void readVarInt(Int64 & x, In & istr)
{
for (size_t i = 0; i < 9; ++i)
{
uint8_t byte = x & 0x7F;
if (x > 0x7F)
byte |= 0x80;

ostr.nextIfAtEnd();
*ostr.position() = byte;
++ostr.position();

x >>= 7;
if (!x)
return;
}
readVarUInt(*reinterpret_cast<UInt64*>(&x), istr);
x = (static_cast<UInt64>(x) >> 1) ^ -(x & 1);
}


inline void writeVarUInt(UInt64 x, std::ostream & ostr)
inline const char * readVarInt(Int64 & x, const char * istr, size_t size)
{
for (size_t i = 0; i < 9; ++i)
{
uint8_t byte = x & 0x7F;
if (x > 0x7F)
byte |= 0x80;

ostr.put(byte);

x >>= 7;
if (!x)
return;
}
const char * res = readVarUInt(*reinterpret_cast<UInt64*>(&x), istr, size);
x = (static_cast<UInt64>(x) >> 1) ^ -(x & 1);
return res;
}


inline char * writeVarUInt(UInt64 x, char * ostr)
inline void readVarUInt(UInt32 & x, ReadBuffer & istr)
{
for (size_t i = 0; i < 9; ++i)
{
uint8_t byte = x & 0x7F;
if (x > 0x7F)
byte |= 0x80;
UInt64 tmp;
readVarUInt(tmp, istr);
x = static_cast<UInt32>(tmp);
}

*ostr = byte;
++ostr;
inline void readVarInt(Int32 & x, ReadBuffer & istr)
{
Int64 tmp;
readVarInt(tmp, istr);
x = static_cast<Int32>(tmp);
}

x >>= 7;
if (!x)
return ostr;
}
inline void readVarUInt(UInt16 & x, ReadBuffer & istr)
{
UInt64 tmp;
readVarUInt(tmp, istr);
x = tmp;
}

return ostr;
inline void readVarInt(Int16 & x, ReadBuffer & istr)
{
Int64 tmp;
readVarInt(tmp, istr);
x = tmp;
}

template <typename T>
requires (!std::is_same_v<T, UInt64>)
inline void readVarUInt(T & x, ReadBuffer & istr)
{
UInt64 tmp;
readVarUInt(tmp, istr);
x = tmp;
}

inline size_t getLengthOfVarUInt(UInt64 x)
{
Expand All @@ -250,7 +204,8 @@ inline size_t getLengthOfVarUInt(UInt64 x)
: (x < (1ULL << 42) ? 6
: (x < (1ULL << 49) ? 7
: (x < (1ULL << 56) ? 8
: 9)))))));
: (x < (1ULL << 63) ? 9
: 10))))))));
}


Expand Down
Loading

0 comments on commit e4625c9

Please sign in to comment.