Skip to content

Commit

Permalink
fix code structure/tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Jasmine-ge committed Oct 15, 2024
1 parent e10f0b4 commit d906402
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 16 deletions.
2 changes: 1 addition & 1 deletion src/Core/Settings.h
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(Bool, input_format_protobuf_flatten_google_wrappers, false, "Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls", 0) \
M(Bool, output_format_protobuf_nullables_with_google_wrappers, false, "When serializing Nullable columns with Google wrappers, serialize default values as empty wrappers. If turned off, default and null values are not serialized", 0) \
\
M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::BestEffort, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \
M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \
M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \
\
M(Bool, input_format_ipv4_default_on_conversion_error, false, "Deserialization of IPv4 will use default values instead of throwing exception on conversion error.", 0) \
Expand Down
61 changes: 56 additions & 5 deletions src/IO/ReadHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -998,10 +998,12 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
static constexpr auto date_broken_down_length = 10;
/// hh:mm:ss
static constexpr auto time_broken_down_length = 8;
/// YYYY-MM-DD hh:mm:ss
static constexpr auto date_time_broken_down_length = date_broken_down_length + 1 + time_broken_down_length;
/// +zz:zz
static constexpr auto zone_broken_down_length = 6;
/// YYYY-MM-DD hh:mm:ss+zz:zz
static constexpr auto date_time_with_zone_broken_down_length = date_broken_down_length + 1 + time_broken_down_length + zone_broken_down_length;

char s[date_time_broken_down_length];
char s[date_time_with_zone_broken_down_length];
char * s_pos = s;

/** Read characters, that could represent unix timestamp.
Expand All @@ -1011,18 +1013,22 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
*/

/// A piece similar to unix timestamp, maybe scaled to subsecond precision.
while (s_pos < s + date_time_broken_down_length && !buf.eof() && isNumericASCII(*buf.position()))
while (s_pos < s + date_time_with_zone_broken_down_length && !buf.eof() && isNumericASCII(*buf.position()))
{
*s_pos = *buf.position();
++s_pos;
++buf.position();
}

/// 2015-01-01 01:02:03 or 2015-01-01
/// 2015-01-01 01:02:03+08:00 or 2015-01-01 01:02:03 or 2015-01-01
if (s_pos == s + 4 && !buf.eof() && !isNumericASCII(*buf.position()))
{
const auto already_read_length = s_pos - s;
const size_t remaining_date_size = date_broken_down_length - already_read_length;
/// If have time zone symbol
bool has_time_zone_offset = false;
UInt8 time_zone_offset_hour = 0;
UInt8 time_zone_offset_minute = 0;

size_t size = buf.read(s_pos, remaining_date_size);
if (size != remaining_date_size)
Expand Down Expand Up @@ -1062,11 +1068,56 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
minute = (s[3] - '0') * 10 + (s[4] - '0');
second = (s[6] - '0') * 10 + (s[7] - '0');
}
if (!buf.eof() && (*buf.position() == '+' || *buf.position() == '-'))
{

has_time_zone_offset = true;
char timezone_sign = *buf.position();
++buf.position();

char tz[zone_broken_down_length];
size = buf.read(tz, zone_broken_down_length - 1);
tz[size] = 0;

if (size != zone_broken_down_length - 1 || tz[2] != ':')
{
throw ParsingException(std::string("Invalid timezone format ") + tz, ErrorCodes::CANNOT_PARSE_DATETIME);
}

time_zone_offset_hour = (tz[0] - '0') * 10 + (tz[1] - '0');
time_zone_offset_minute = (tz[3] - '0') * 10 + (tz[4] - '0');

if (timezone_sign == '-')
{
time_zone_offset_hour = -time_zone_offset_hour;
time_zone_offset_minute = -time_zone_offset_minute;
}

}
else if (!buf.eof() && *buf.position() == 'Z')
{
has_time_zone_offset = true;
++buf.position();
}

if (unlikely(year == 0))
{
datetime = 0;
}
else if (has_time_zone_offset)
{
const DateLUTImpl * utc_time_zone = &DateLUT::instance("UTC");
datetime = utc_time_zone->makeDateTime(year, month, day, hour, minute, second);
if (time_zone_offset_hour)
datetime -= time_zone_offset_hour * 3600;

if (time_zone_offset_minute)
datetime -= time_zone_offset_minute * 60;
}
else
{
datetime = date_lut.makeDateTime(year, month, day, hour, minute, second);
}
}
else
{
Expand Down
64 changes: 57 additions & 7 deletions src/IO/ReadHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -919,13 +919,16 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons
/// Optimistic path, when whole value is in buffer.
const char * s = buf.position();

/// YYYY-MM-DD hh:mm:ss+zz:zz
static constexpr auto date_time_with_time_zone_broken_down_length = 25;
/// YYYY-MM-DD hh:mm:ss
static constexpr auto date_time_broken_down_length = 19;
/// YYYY-MM-DD
static constexpr auto date_broken_down_length = 10;
bool optimistic_path_for_date_time_input = s + date_time_broken_down_length <= buf.buffer().end();

if (optimistic_path_for_date_time_input)
bool optimistic_path_for_date_time_with_zone_input = s + date_time_with_time_zone_broken_down_length <= buf.buffer().end();

if (optimistic_path_for_date_time_with_zone_input)
{
if (s[4] < '0' || s[4] > '9')
{
Expand All @@ -946,16 +949,63 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons
second = (s[17] - '0') * 10 + (s[18] - '0');
}

if (unlikely(year == 0))
datetime = 0;
else
datetime = date_lut.makeDateTime(year, month, day, hour, minute, second);

if (dt_long)
buf.position() += date_time_broken_down_length;
else
buf.position() += date_broken_down_length;

/// processing time zone
bool has_time_zone_offset = false;
UInt8 time_zone_offset_hour = 0;
UInt8 time_zone_offset_minute = 0;
UInt8 timezone_length = 6;

if (*buf.position() == '+' || *buf.position() == '-')
{
has_time_zone_offset = true;
char timezone_sign = *buf.position();
++buf.position();

char tz[timezone_length];
auto size = buf.read(tz, timezone_length - 1);
tz[size] = 0;

if (size != timezone_length - 1 || tz[2] != ':')
throw ParsingException(std::string("Cannot parse Timezone ") + tz, ErrorCodes::CANNOT_PARSE_DATETIME);

time_zone_offset_hour = (tz[0] - '0') * 10 + (tz[1] - '0');
time_zone_offset_minute = (tz[3] - '0') * 10 + (tz[4] - '0');

if (timezone_sign == '-')
{
time_zone_offset_hour = -time_zone_offset_hour;
time_zone_offset_minute = -time_zone_offset_minute;
}
}
else if (*buf.position() == 'Z')
{
has_time_zone_offset = true;
++buf.position();
}

if (unlikely(year == 0))
{
datetime = 0;
}
else if (has_time_zone_offset)
{
datetime = DateLUT::instance("UTC").makeDateTime(year, month, day, hour, minute, second);
if (time_zone_offset_hour)
datetime -= time_zone_offset_hour * 3600;

if (time_zone_offset_minute)
datetime -= time_zone_offset_minute * 60;
}
else
{
datetime = date_lut.makeDateTime(year, month, day, hour, minute, second);
}

return ReturnType(true);
}
else
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
2024-06-09 13:28:00+01:00 2024-06-09 20:28:00 2024-06-09 22:28:00+10:00
2024-06-09 13:28:00.000+01:00 2024-06-09 05:28:00.000-07:00 2024-06-09 20:28:00.000
2024-06-09 13:28:00+01:00 2024-06-09 12:28:00 2024-06-09 22:28:00+10:00
2024-06-09 13:28:00.000+01:00 2024-06-09 05:28:00.000-07:00 2024-06-09 12:28:00.000
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2022-12-30 18:44:17 2022-12-30 16:44:17+03:00 2022-12-30 21:44:17Z 2022-12-30 13:44:17.000Z
2022-12-30 10:44:17 2022-12-30 16:44:17+03:00 2022-12-30 21:44:17Z 2022-12-30 13:44:17.000Z

0 comments on commit d906402

Please sign in to comment.