Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix date format identifies '\n' as invalid separator #4046

Merged
merged 9 commits into from
Feb 16, 2022
63 changes: 32 additions & 31 deletions dbms/src/Common/MyTime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ bool isValidSeperator(char c, int previous_parts)
if (isPunctuation(c))
return true;

return previous_parts == 2 && (c == ' ' || c == 'T');
// for https://github.com/pingcap/tics/issues/4036
return previous_parts == 2 && (c == 'T' || isWhitespaceASCII(c));
}

std::vector<String> parseDateFormat(String format)
Expand Down Expand Up @@ -533,8 +534,8 @@ std::pair<Field, bool> parseMyDateTimeAndJudgeIsDate(const String & str, int8_t

bool truncated_or_incorrect = false;

// noAbsorb tests if can absorb FSP or TZ
auto noAbsorb = [](const std::vector<String> & seps) {
// no_absorb tests if can absorb FSP or TZ
auto no_absorb = [](const std::vector<String> & seps) {
// if we have more than 5 parts (i.e. 6), the tailing part can't be absorbed
// or if we only have 1 part, but its length is longer than 4, then it is at least YYMMD, in this case, FSP can
// not be absorbed, and it will be handled later, and the leading sign prevents TZ from being absorbed, because
Expand All @@ -544,7 +545,7 @@ std::pair<Field, bool> parseMyDateTimeAndJudgeIsDate(const String & str, int8_t

if (!frac_str.empty())
{
if (!noAbsorb(seps))
if (!no_absorb(seps))
{
seps.push_back(frac_str);
frac_str = "";
Expand All @@ -555,7 +556,7 @@ std::pair<Field, bool> parseMyDateTimeAndJudgeIsDate(const String & str, int8_t
{
// if tz_sign is empty, it's sure that the string literal contains timezone (e.g., 2010-10-10T10:10:10Z),
// therefore we could safely skip this branch.
if (!noAbsorb(seps) && !(!tz_minute.empty() && tz_sep.empty()))
if (!no_absorb(seps) && !(!tz_minute.empty() && tz_sep.empty()))
{
// we can't absorb timezone if there is no separate between tz_hour and tz_minute
if (!tz_hour.empty())
Expand All @@ -580,51 +581,51 @@ std::pair<Field, bool> parseMyDateTimeAndJudgeIsDate(const String & str, int8_t
{
case 14: // YYYYMMDDHHMMSS
{
std::sscanf(seps[0].c_str(), "%4d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second);
std::sscanf(seps[0].c_str(), "%4d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); //NOLINT
hhmmss = true;
break;
}
case 12: // YYMMDDHHMMSS
{
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second);
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); //NOLINT
year = adjustYear(year);
hhmmss = true;
break;
}
case 11: // YYMMDDHHMMS
{
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute, &second);
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute, &second); //NOLINT
year = adjustYear(year);
hhmmss = true;
break;
}
case 10: // YYMMDDHHMM
{
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute);
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute); //NOLINT
year = adjustYear(year);
break;
}
case 9: // YYMMDDHHM
{
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute);
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute); //NOLINT
year = adjustYear(year);
break;
}
case 8: // YYYYMMDD
{
std::sscanf(seps[0].c_str(), "%4d%2d%2d", &year, &month, &day);
std::sscanf(seps[0].c_str(), "%4d%2d%2d", &year, &month, &day); //NOLINT
break;
}
case 7: // YYMMDDH
{
std::sscanf(seps[0].c_str(), "%2d%2d%2d%1d", &year, &month, &day, &hour);
std::sscanf(seps[0].c_str(), "%2d%2d%2d%1d", &year, &month, &day, &hour); //NOLINT
year = adjustYear(year);
break;
}
case 6: // YYMMDD
case 5: // YYMMD
{
std::sscanf(seps[0].c_str(), "%2d%2d%2d", &year, &month, &day);
std::sscanf(seps[0].c_str(), "%2d%2d%2d", &year, &month, &day); //NOLINT
year = adjustYear(year);
break;
}
Expand All @@ -649,18 +650,18 @@ std::pair<Field, bool> parseMyDateTimeAndJudgeIsDate(const String & str, int8_t
case 1:
case 2:
{
ret = std::sscanf(frac_str.c_str(), "%2d ", &hour);
ret = std::sscanf(frac_str.c_str(), "%2d ", &hour); //NOLINT
break;
}
case 3:
case 4:
{
ret = std::sscanf(frac_str.c_str(), "%2d%2d ", &hour, &minute);
ret = std::sscanf(frac_str.c_str(), "%2d%2d ", &hour, &minute); //NOLINT
break;
}
default:
{
ret = std::sscanf(frac_str.c_str(), "%2d%2d%2d ", &hour, &minute, &second);
ret = std::sscanf(frac_str.c_str(), "%2d%2d%2d ", &hour, &minute, &second); //NOLINT
break;
}
}
Expand All @@ -674,7 +675,7 @@ std::pair<Field, bool> parseMyDateTimeAndJudgeIsDate(const String & str, int8_t
}
else
{
truncated_or_incorrect = (std::sscanf(frac_str.c_str(), "%2d ", &second) == 0);
truncated_or_incorrect = (std::sscanf(frac_str.c_str(), "%2d ", &second) == 0); //NOLINT
}
}
if (truncated_or_incorrect)
Expand Down Expand Up @@ -1048,7 +1049,7 @@ void MyTimeBase::check(bool allow_zero_in_date, bool allow_invalid_date) const
static auto is_leap_year = [](UInt16 _year) {
return ((_year % 4 == 0) && (_year % 100 != 0)) || (_year % 400 == 0);
};
max_day = max_days_in_month[month - 1];
max_day = max_days_in_month[month - 1]; // NOLINT
if (month == 2 && is_leap_year(year))
{
max_day = 29;
Expand Down Expand Up @@ -1379,13 +1380,13 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
return ParseState::END_OF_FILE;
return ParseState::NORMAL;
};
auto skipWhitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState {
auto skip_whitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState {
while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos]))
++temp_pos;
return check_if_end();
};
auto parse_sep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState {
if (skipWhitespaces() == ParseState::END_OF_FILE)
auto parse_sep = [&temp_pos, &ctx, &skip_whitespaces]() -> ParseState {
if (skip_whitespaces() == ParseState::END_OF_FILE)
return ParseState::END_OF_FILE;
// parse ":"
if (ctx.view.data[temp_pos] != ':')
Expand All @@ -1402,7 +1403,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
// hh
size_t step = 0;
int32_t hour = 0;
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
std::tie(step, hour) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || hour > 12 || hour == 0)
Expand All @@ -1418,7 +1419,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
return state;

int32_t minute = 0;
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
std::tie(step, minute) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || minute > 59)
Expand All @@ -1430,7 +1431,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
return state;

int32_t second = 0;
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
std::tie(step, second) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || second > 59)
Expand All @@ -1439,7 +1440,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
temp_pos += step; // move forward

int meridiem = 0; // 0 - invalid, 1 - am, 2 - pm
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
// "AM"/"PM" must be parsed as a single element
// "11:13:56a" is an invalid input for "%r".
Expand Down Expand Up @@ -1483,13 +1484,13 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
return ParseState::END_OF_FILE;
return ParseState::NORMAL;
};
auto skipWhitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState {
auto skip_whitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState {
while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos]))
++temp_pos;
return check_if_end();
};
auto parse_sep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState {
if (skipWhitespaces() == ParseState::END_OF_FILE)
auto parse_sep = [&temp_pos, &ctx, &skip_whitespaces]() -> ParseState {
if (skip_whitespaces() == ParseState::END_OF_FILE)
return ParseState::END_OF_FILE;
// parse ":"
if (ctx.view.data[temp_pos] != ':')
Expand All @@ -1506,7 +1507,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
// hh
size_t step = 0;
int32_t hour = 0;
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
std::tie(step, hour) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || hour > 23)
Expand All @@ -1518,7 +1519,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
return state;

int32_t minute = 0;
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
std::tie(step, minute) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || minute > 59)
Expand All @@ -1530,7 +1531,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
return state;

int32_t second = 0;
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
std::tie(step, second) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || second > 59)
Expand Down
31 changes: 31 additions & 0 deletions dbms/src/Functions/tests/gtest_tidb_conversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1426,5 +1426,36 @@ try
}
CATCH

// for https://github.com/pingcap/tics/issues/4036
TEST_F(TestTidbConversion, castStringAsDateTime)
try
{
auto input = std::vector<String>{"2012-12-12 12:12:12", "2012-12-12\t12:12:12", "2012-12-12\n12:12:12", "2012-12-12\v12:12:12", "2012-12-12\f12:12:12", "2012-12-12\r12:12:12"};
auto to_column = createConstColumn<String>(1, "MyDateTime(6)");

// vector
auto from_column = createColumn<String>(input);
UInt64 except_packed = MyDateTime(2012, 12, 12, 12, 12, 12, 0).toPackedUInt();
auto vector_result = executeFunction("tidb_cast", {from_column, to_column});
for (size_t i = 0; i < input.size(); i++)
{
ASSERT_EQ(except_packed, vector_result.column.get()->get64(i));
}

// const
auto const_from_column = createConstColumn<String>(1, "2012-12-12\n12:12:12");
auto const_result = executeFunction("tidb_cast", {from_column, to_column});
ASSERT_EQ(except_packed, const_result.column.get()->get64(0));

// nullable
auto nullable_from_column = createColumn<Nullable<String>>({"2012-12-12 12:12:12", "2012-12-12\t12:12:12", "2012-12-12\n12:12:12", "2012-12-12\v12:12:12", "2012-12-12\f12:12:12", "2012-12-12\r12:12:12"});
auto nullable_result = executeFunction("tidb_cast", {from_column, to_column});
for (size_t i = 0; i < input.size(); i++)
{
ASSERT_EQ(except_packed, nullable_result.column.get()->get64(i));
}
}
CATCH

} // namespace
} // namespace DB::tests