Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support display of timezone for datetime and datetime64 #838

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/DataTypes/Serializations/SerializationDateTime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ void SerializationDateTime::serializeText(const IColumn & column, size_t row_num
switch (settings.date_time_output_format)
{
case FormatSettings::DateTimeOutputFormat::Simple:
writeDateTimeText(value, ostr, time_zone);
if (has_explicit_time_zone)
writeDateTimeTextWithTimeZone(value, ostr, time_zone);
else
writeDateTimeText(value, ostr, time_zone);
return;
case FormatSettings::DateTimeOutputFormat::UnixTimestamp:
writeIntText(value, ostr);
Expand Down
5 changes: 4 additions & 1 deletion src/DataTypes/Serializations/SerializationDateTime64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ void SerializationDateTime64::serializeText(const IColumn & column, size_t row_n
switch (settings.date_time_output_format)
{
case FormatSettings::DateTimeOutputFormat::Simple:
writeDateTimeText(value, scale, ostr, time_zone);
if (has_explicit_time_zone)
writeDateTimeTextWithTimeZone(value, scale, ostr, time_zone);
else
writeDateTimeText(value, scale, ostr, time_zone);
return;
case FormatSettings::DateTimeOutputFormat::UnixTimestamp:
writeDateTimeUnixTimestamp(value, scale, ostr);
Expand Down
112 changes: 98 additions & 14 deletions src/IO/ReadHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -989,7 +989,7 @@ template void readDateTextFallback<void>(LocalDate &, ReadBuffer &);
template bool readDateTextFallback<bool>(LocalDate &, ReadBuffer &);


template <typename ReturnType>
template <typename ReturnType, bool dt64_mode>
ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
Expand All @@ -998,31 +998,61 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
static constexpr auto date_broken_down_length = 10;
/// hh:mm:ss
static constexpr auto time_broken_down_length = 8;
/// YYYY-MM-DD hh:mm:ss
static constexpr auto date_time_broken_down_length = date_broken_down_length + 1 + time_broken_down_length;

char s[date_time_broken_down_length];
/// proton: starts
/// +zz:zz
static constexpr auto zone_broken_down_length = 6;
/// YYYY-MM-DD hh:mm:ss+zz:zz
static constexpr auto date_time_with_zone_broken_down_length = date_broken_down_length + 1 + time_broken_down_length + zone_broken_down_length;

char s[date_time_with_zone_broken_down_length];
/// proton: ends
char * s_pos = s;

/** Read characters, that could represent unix timestamp.
* Only unix timestamp of at least 5 characters is supported.
* Only unix timestamp of at least 5 characters is supported by default, exception is thrown for a shorter one
* (unless parsing a string like '1.23' or '-12': there is no ambiguity, it is a DT64 timestamp).
* Then look at 5th character. If it is a number - treat whole as unix timestamp.
* If it is not a number - then parse datetime in YYYY-MM-DD hh:mm:ss or YYYY-MM-DD format.
*/

int negative_multiplier = 1;

if (!buf.eof() && *buf.position() == '-')
{
if constexpr (dt64_mode)
{
negative_multiplier = -1;
++buf.position();
}
else
{
if constexpr (throw_exception)
throw ParsingException(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime");
else
return false;
}
}

/// A piece similar to unix timestamp, maybe scaled to subsecond precision.
while (s_pos < s + date_time_broken_down_length && !buf.eof() && isNumericASCII(*buf.position()))
while (s_pos < s + date_time_with_zone_broken_down_length && !buf.eof() && isNumericASCII(*buf.position()))
{
*s_pos = *buf.position();
++s_pos;
++buf.position();
}

/// 2015-01-01 01:02:03 or 2015-01-01
if (s_pos == s + 4 && !buf.eof() && !isNumericASCII(*buf.position()))
/// if negative, it is a timestamp with no ambiguity
if (negative_multiplier == 1 && s_pos == s + 4 && !buf.eof() && !isNumericASCII(*buf.position()))
{
const auto already_read_length = s_pos - s;
const size_t remaining_date_size = date_broken_down_length - already_read_length;
/// proton: starts
/// If have time zone symbol
bool has_time_zone_offset = false;
Int8 time_zone_offset_hour = 0;
Int8 time_zone_offset_minute = 0;
/// proton: ends

size_t size = buf.read(s_pos, remaining_date_size);
if (size != remaining_date_size)
Expand Down Expand Up @@ -1062,35 +1092,89 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
minute = (s[3] - '0') * 10 + (s[4] - '0');
second = (s[6] - '0') * 10 + (s[7] - '0');
}
/// proton: starts
if (!buf.eof() && (*buf.position() == '+' || *buf.position() == '-'))
{

has_time_zone_offset = true;
char timezone_sign = *buf.position();
++buf.position();

char tz[zone_broken_down_length];
size = buf.read(tz, zone_broken_down_length - 1);
tz[size] = 0;

if (size != zone_broken_down_length - 1 || tz[2] != ':')
{
throw ParsingException(std::string("Invalid timezone format ") + tz, ErrorCodes::CANNOT_PARSE_DATETIME);
}

time_zone_offset_hour = (tz[0] - '0') * 10 + (tz[1] - '0');
time_zone_offset_minute = (tz[3] - '0') * 10 + (tz[4] - '0');

if (timezone_sign == '-')
{
time_zone_offset_hour = -time_zone_offset_hour;
time_zone_offset_minute = -time_zone_offset_minute;
}

}
else if (!buf.eof() && *buf.position() == 'Z')
{
has_time_zone_offset = true;
++buf.position();
}

if (unlikely(year == 0))
{
datetime = 0;
}
else if (has_time_zone_offset)
{
const DateLUTImpl * utc_time_zone = &DateLUT::instance("UTC");
datetime = utc_time_zone->makeDateTime(year, month, day, hour, minute, second);
if (time_zone_offset_hour)
datetime -= time_zone_offset_hour * 3600;

if (time_zone_offset_minute)
datetime -= time_zone_offset_minute * 60;
}
else
{
datetime = date_lut.makeDateTime(year, month, day, hour, minute, second);
}
/// proton: ends
}
else
{
if (s_pos - s >= 5)
datetime = 0;
bool too_short = s_pos - s <= 4;

if (!too_short || dt64_mode)
{
/// Not very efficient.
datetime = 0;
for (const char * digit_pos = s; digit_pos < s_pos; ++digit_pos)
datetime = datetime * 10 + *digit_pos - '0';
}
else
datetime *= negative_multiplier;

if (too_short && negative_multiplier != -1)
{
if constexpr (throw_exception)
throw ParsingException("Cannot parse datetime", ErrorCodes::CANNOT_PARSE_DATETIME);
throw ParsingException(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse datetime");
else
return false;
}

}

return ReturnType(true);
}

template void readDateTimeTextFallback<void>(time_t &, ReadBuffer &, const DateLUTImpl &);
template bool readDateTimeTextFallback<bool>(time_t &, ReadBuffer &, const DateLUTImpl &);
template void readDateTimeTextFallback<void, false>(time_t &, ReadBuffer &, const DateLUTImpl &);
template void readDateTimeTextFallback<void, true>(time_t &, ReadBuffer &, const DateLUTImpl &);
template bool readDateTimeTextFallback<bool, false>(time_t &, ReadBuffer &, const DateLUTImpl &);
template bool readDateTimeTextFallback<bool, true>(time_t &, ReadBuffer &, const DateLUTImpl &);


void skipJSONField(ReadBuffer & buf, StringRef name_of_field)
Expand Down
130 changes: 106 additions & 24 deletions src/IO/ReadHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -907,25 +907,44 @@ inline T parseFromString(std::string_view str)
}


template <typename ReturnType = void>
template <typename ReturnType = void, bool dt64_mode = false>
ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut);

/** In YYYY-MM-DD hh:mm:ss or YYYY-MM-DD format, according to specified time zone.
* As an exception, also supported parsing of unix timestamp in form of decimal number.
*/
template <typename ReturnType = void>
template <typename ReturnType = void, bool dt64_mode = false>
inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;

if constexpr (!dt64_mode)
{
if (!buf.eof() && !isNumericASCII(*buf.position()))
{
if constexpr (throw_exception)
throw ParsingException(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse datetime");
else
return false;
}
}

/// Optimistic path, when whole value is in buffer.
const char * s = buf.position();

/// YYYY-MM-DD hh:mm:ss+zz:zz
static constexpr auto date_time_with_time_zone_broken_down_length = 25;
/// YYYY-MM-DD hh:mm:ss
static constexpr auto date_time_broken_down_length = 19;

/// proton: starts
/// YYYY-MM-DD
static constexpr auto date_broken_down_length = 10;
bool optimistic_path_for_date_time_input = s + date_time_broken_down_length <= buf.buffer().end();

if (optimistic_path_for_date_time_input)
bool optimistic_path_for_date_time_with_zone_input = s + date_time_with_time_zone_broken_down_length <= buf.buffer().end();
/// proton: ends

if (optimistic_path_for_date_time_with_zone_input)
{
if (s[4] < '0' || s[4] > '9')
{
Expand All @@ -946,36 +965,96 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons
second = (s[17] - '0') * 10 + (s[18] - '0');
}

if (unlikely(year == 0))
datetime = 0;
else
datetime = date_lut.makeDateTime(year, month, day, hour, minute, second);

if (dt_long)
buf.position() += date_time_broken_down_length;
else
buf.position() += date_broken_down_length;

/// proton: starts
/// processing time zone
bool has_time_zone_offset = false;
Int8 time_zone_offset_hour = 0;
Int8 time_zone_offset_minute = 0;
UInt8 timezone_length = 6;

if (*buf.position() == '+' || *buf.position() == '-')
{
has_time_zone_offset = true;
char timezone_sign = *buf.position();
++buf.position();

char tz[timezone_length];
auto size = buf.read(tz, timezone_length - 1);
tz[size] = 0;

if (size != timezone_length - 1 || tz[2] != ':')
throw ParsingException(std::string("Cannot parse Timezone ") + tz, ErrorCodes::CANNOT_PARSE_DATETIME);

time_zone_offset_hour = (tz[0] - '0') * 10 + (tz[1] - '0');
time_zone_offset_minute = (tz[3] - '0') * 10 + (tz[4] - '0');

if (timezone_sign == '-')
{
time_zone_offset_hour = -time_zone_offset_hour;
time_zone_offset_minute = -time_zone_offset_minute;
}
}
else if (*buf.position() == 'Z')
{
has_time_zone_offset = true;
++buf.position();
}

if (unlikely(year == 0))
{
datetime = 0;
}
else if (has_time_zone_offset)
{
datetime = DateLUT::instance("UTC").makeDateTime(year, month, day, hour, minute, second);
if (time_zone_offset_hour)
datetime -= time_zone_offset_hour * 3600;

if (time_zone_offset_minute)
datetime -= time_zone_offset_minute * 60;
}
else
{
datetime = date_lut.makeDateTime(year, month, day, hour, minute, second);
}
/// proton: ends

return ReturnType(true);
}
else
/// Why not readIntTextUnsafe? Because for needs of AdFox, parsing of unix timestamp with leading zeros is supported: 000...NNNN.
return readIntTextImpl<time_t, ReturnType, ReadIntTextCheckOverflow::CHECK_OVERFLOW>(datetime, buf);
}
else
return readDateTimeTextFallback<ReturnType>(datetime, buf, date_lut);
return readDateTimeTextFallback<ReturnType, dt64_mode>(datetime, buf, date_lut);
}

template <typename ReturnType>
inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut)
{
time_t whole;
if (!readDateTimeTextImpl<bool>(whole, buf, date_lut))
time_t whole = 0;
bool is_negative_timestamp = (!buf.eof() && *buf.position() == '-');
bool is_empty = buf.eof();

if (!is_empty)
{
return ReturnType(false);
try
{
readDateTimeTextImpl<ReturnType, true>(whole, buf, date_lut);
}
catch (const DB::ParsingException & exception)
{
if (buf.eof() || *buf.position() != '.')
throw exception;
}
}

int negative_multiplier = 1;
int negative_fraction_multiplier = 1;

DB::DecimalUtils::DecimalComponents<DateTime64> components{static_cast<DateTime64::NativeType>(whole), 0};

Expand Down Expand Up @@ -1003,18 +1082,18 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re
while (!buf.eof() && isNumericASCII(*buf.position()))
++buf.position();

/// Fractional part (subseconds) is treated as positive by users
/// (as DateTime64 itself is a positive, although underlying decimal is negative)
/// setting fractional part to be negative when whole is 0 results in wrong value,
/// so we multiply result by -1.
if (components.whole < 0 && components.fractional != 0)
/// Fractional part (subseconds) is treated as positive by users, but represented as a negative number.
/// E.g. `1925-12-12 13:14:15.123` is represented internally as timestamp `-1390214744.877`.
/// Thus need to convert <negative_timestamp>.<fractional> to <negative_timestamp+1>.<1-0.<fractional>>
/// Also, setting fractional part to be negative when whole is 0 results in wrong value, in this case multiply result by -1.
if (!is_negative_timestamp && components.whole < 0 && components.fractional != 0)
{
const auto scale_multiplier = DecimalUtils::scaleMultiplier<DateTime64::NativeType>(scale);
++components.whole;
components.fractional = scale_multiplier - components.fractional;
if (!components.whole)
{
negative_multiplier = -1;
negative_fraction_multiplier = -1;
}
}
}
Expand All @@ -1029,12 +1108,15 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re

bool is_ok = true;
if constexpr (std::is_same_v<ReturnType, void>)
datetime64 = DecimalUtils::decimalFromComponents<DateTime64>(components, scale);
{
datetime64 = DecimalUtils::decimalFromComponents<DateTime64>(components, scale) * negative_fraction_multiplier;
}
else
{
is_ok = DecimalUtils::tryGetDecimalFromComponents<DateTime64>(components, scale, datetime64);

datetime64 *= negative_multiplier;

if (is_ok)
datetime64 *= negative_fraction_multiplier;
}

return ReturnType(is_ok);
}
Expand Down
Loading
Loading