Skip to content

Commit

Permalink
Support user-defined and incomplete date formats (#273)
Browse files Browse the repository at this point in the history
* Check custom formats for characters

Signed-off-by: Guian Gumpac <guian.gumpac@improving.com>

* Removed duplicated code

Signed-off-by: Guian Gumpac <guian.gumpac@improving.com>

* Reworked checking for exprcoretype

Signed-off-by: Guian Gumpac <guian.gumpac@improving.com>

* Changed check for time

Signed-off-by: Guian Gumpac <guian.gumpac@improving.com>

* Rework processing custom and incomplete formats and add tests.

Signed-off-by: Yury-Fridlyand <yury.fridlyand@improving.com>

* Values of incomplete and incorrect formats to be returned as `TIMESTAMP` instead of `STRING`.

Signed-off-by: Yury-Fridlyand <yury.fridlyand@improving.com>

* Complete fix and update tests.

Signed-off-by: Yury-Fridlyand <yury.fridlyand@improving.com>

* More fixes for god of fixes.

Signed-off-by: Yury-Fridlyand <yury.fridlyand@improving.com>

---------

Signed-off-by: Guian Gumpac <guian.gumpac@improving.com>
Signed-off-by: Yury-Fridlyand <yury.fridlyand@improving.com>
Co-authored-by: Yury-Fridlyand <yury.fridlyand@improving.com>
  • Loading branch information
GumpacG and Yury-Fridlyand authored Jul 8, 2023
1 parent a816a58 commit 56e5621
Show file tree
Hide file tree
Showing 7 changed files with 704 additions and 364 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import java.io.IOException;
import java.util.Locale;
import lombok.SneakyThrows;
import org.json.JSONObject;
import org.junit.jupiter.api.Test;
import org.opensearch.client.Request;
Expand Down Expand Up @@ -56,6 +57,69 @@ public void testDateFormatsWithOr() throws IOException {
rows("1984-04-12 09:07:42.000123456"));
}

@Test
@SneakyThrows
public void testCustomFormats() {
String query = String.format("SELECT custom_time, custom_timestamp, custom_date_or_date,"
+ "custom_date_or_custom_time, custom_time_parser_check FROM %s", TEST_INDEX_DATE_FORMATS);
JSONObject result = executeQuery(query);
verifySchema(result,
schema("custom_time", null, "time"),
schema("custom_timestamp", null, "timestamp"),
schema("custom_date_or_date", null, "date"),
schema("custom_date_or_custom_time", null, "timestamp"),
schema("custom_time_parser_check", null, "time"));
verifyDataRows(result,
rows("09:07:42", "1984-04-12 09:07:42", "1984-04-12", "1961-04-12 00:00:00", "23:44:36.321"),
rows("21:07:42", "1984-04-12 22:07:42", "1984-04-12", "1970-01-01 09:07:00", "09:01:16.542"));
}

@Test
@SneakyThrows
public void testCustomFormats2() {
String query = String.format("SELECT custom_no_delimiter_date, custom_no_delimiter_time,"
+ "custom_no_delimiter_ts FROM %s", TEST_INDEX_DATE_FORMATS);
JSONObject result = executeQuery(query);
verifySchema(result,
schema("custom_no_delimiter_date", null, "date"),
schema("custom_no_delimiter_time", null, "time"),
schema("custom_no_delimiter_ts", null, "timestamp"));
verifyDataRows(result,
rows("1984-10-20", "10:20:30", "1984-10-20 15:35:48"),
rows("1961-04-12", "09:07:00", "1961-04-12 09:07:00"));
}

@Test
@SneakyThrows
public void testIncompleteFormats() {
String query = String.format("SELECT incomplete_1, incomplete_2, incorrect,"
+ "incomplete_custom_time, incomplete_custom_date FROM %s", TEST_INDEX_DATE_FORMATS);
JSONObject result = executeQuery(query);
verifySchema(result,
schema("incomplete_1", null, "timestamp"),
schema("incomplete_2", null, "date"),
schema("incorrect", null, "timestamp"),
schema("incomplete_custom_time", null, "time"),
schema("incomplete_custom_date", null, "date"));
verifyDataRows(result,
rows("1984-01-01 00:00:00", null, null, "10:00:00", "1999-01-01"),
rows("2012-01-01 00:00:00", null, null, "20:00:00", "3021-01-01"));
}

@Test
@SneakyThrows
public void testNumericFormats() {
String query = String.format("SELECT epoch_sec, epoch_milli"
+ " FROM %s", TEST_INDEX_DATE_FORMATS);
JSONObject result = executeQuery(query);
verifySchema(result,
schema("epoch_sec", null, "timestamp"),
schema("epoch_milli", null, "timestamp"));
verifyDataRows(result,
rows("1970-01-01 00:00:42", "1970-01-01 00:00:00.042"),
rows("1970-01-02 03:55:00", "1970-01-01 00:01:40.5"));
}

protected JSONObject executeQuery(String query) throws IOException {
Request request = new Request("POST", QUERY_API_ENDPOINT);
request.setJsonEntity(String.format(Locale.ROOT, "{\n" + " \"query\": \"%s\"\n" + "}", query));
Expand Down
4 changes: 2 additions & 2 deletions integ-test/src/test/resources/date_formats.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{"index": {}}
{"epoch_millis": "450608862000.123456", "epoch_second": "450608862.000123456", "date_optional_time": "1984-04-12T09:07:42.000Z", "strict_date_optional_time": "1984-04-12T09:07:42.000Z", "strict_date_optional_time_nanos": "1984-04-12T09:07:42.000123456Z", "basic_date": "19840412", "basic_date_time": "19840412T090742.000Z", "basic_date_time_no_millis": "19840412T090742Z", "basic_ordinal_date": "1984103", "basic_ordinal_date_time": "1984103T090742.000Z", "basic_ordinal_date_time_no_millis": "1984103T090742Z", "basic_time": "090742.000Z", "basic_time_no_millis": "090742Z", "basic_t_time": "T090742.000Z", "basic_t_time_no_millis": "T090742Z", "basic_week_date": "1984W154", "strict_basic_week_date": "1984W154", "basic_week_date_time": "1984W154T090742.000Z", "strict_basic_week_date_time": "1984W154T090742.000Z", "basic_week_date_time_no_millis": "1984W154T090742Z", "strict_basic_week_date_time_no_millis": "1984W154T090742Z", "date": "1984-04-12", "strict_date": "1984-04-12", "date_hour": "1984-04-12T09", "strict_date_hour": "1984-04-12T09", "date_hour_minute": "1984-04-12T09:07", "strict_date_hour_minute": "1984-04-12T09:07", "date_hour_minute_second": "1984-04-12T09:07:42", "strict_date_hour_minute_second": "1984-04-12T09:07:42", "date_hour_minute_second_fraction": "1984-04-12T09:07:42.000", "strict_date_hour_minute_second_fraction": "1984-04-12T09:07:42.000", "date_hour_minute_second_millis": "1984-04-12T09:07:42.000", "strict_date_hour_minute_second_millis": "1984-04-12T09:07:42.000", "date_time": "1984-04-12T09:07:42.000Z", "strict_date_time": "1984-04-12T09:07:42.000123456Z", "date_time_no_millis": "1984-04-12T09:07:42Z", "strict_date_time_no_millis": "1984-04-12T09:07:42Z", "hour": "09", "strict_hour": "09", "hour_minute": "09:07", "strict_hour_minute": "09:07", "hour_minute_second": "09:07:42", "strict_hour_minute_second": "09:07:42", "hour_minute_second_fraction": "09:07:42.000", "strict_hour_minute_second_fraction": "09:07:42.000", "hour_minute_second_millis": "09:07:42.000", "strict_hour_minute_second_millis": "09:07:42.000", "ordinal_date": "1984-103", "strict_ordinal_date": "1984-103", "ordinal_date_time": "1984-103T09:07:42.000123456Z", "strict_ordinal_date_time": "1984-103T09:07:42.000123456Z", "ordinal_date_time_no_millis": "1984-103T09:07:42Z", "strict_ordinal_date_time_no_millis": "1984-103T09:07:42Z", "time": "09:07:42.000Z", "strict_time": "09:07:42.000Z", "time_no_millis": "09:07:42Z", "strict_time_no_millis": "09:07:42Z", "t_time": "T09:07:42.000Z", "strict_t_time": "T09:07:42.000Z", "t_time_no_millis": "T09:07:42Z", "strict_t_time_no_millis": "T09:07:42Z", "week_date": "1984-W15-4", "strict_week_date": "1984-W15-4", "week_date_time": "1984-W15-4T09:07:42.000Z", "strict_week_date_time": "1984-W15-4T09:07:42.000Z", "week_date_time_no_millis": "1984-W15-4T09:07:42Z", "strict_week_date_time_no_millis": "1984-W15-4T09:07:42Z", "weekyear_week_day": "1984-W15-4", "strict_weekyear_week_day": "1984-W15-4", "year_month_day": "1984-04-12", "strict_year_month_day": "1984-04-12", "yyyy-MM-dd": "1984-04-12", "HH:mm:ss": "09:07:42", "yyyy-MM-dd_OR_epoch_millis": "1984-04-12", "hour_minute_second_OR_t_time": "09:07:42"}
{"epoch_millis": "450608862000.123456", "epoch_second": "450608862.000123456", "date_optional_time": "1984-04-12T09:07:42.000Z", "strict_date_optional_time": "1984-04-12T09:07:42.000Z", "strict_date_optional_time_nanos": "1984-04-12T09:07:42.000123456Z", "basic_date": "19840412", "basic_date_time": "19840412T090742.000Z", "basic_date_time_no_millis": "19840412T090742Z", "basic_ordinal_date": "1984103", "basic_ordinal_date_time": "1984103T090742.000Z", "basic_ordinal_date_time_no_millis": "1984103T090742Z", "basic_time": "090742.000Z", "basic_time_no_millis": "090742Z", "basic_t_time": "T090742.000Z", "basic_t_time_no_millis": "T090742Z", "basic_week_date": "1984W154", "strict_basic_week_date": "1984W154", "basic_week_date_time": "1984W154T090742.000Z", "strict_basic_week_date_time": "1984W154T090742.000Z", "basic_week_date_time_no_millis": "1984W154T090742Z", "strict_basic_week_date_time_no_millis": "1984W154T090742Z", "date": "1984-04-12", "strict_date": "1984-04-12", "date_hour": "1984-04-12T09", "strict_date_hour": "1984-04-12T09", "date_hour_minute": "1984-04-12T09:07", "strict_date_hour_minute": "1984-04-12T09:07", "date_hour_minute_second": "1984-04-12T09:07:42", "strict_date_hour_minute_second": "1984-04-12T09:07:42", "date_hour_minute_second_fraction": "1984-04-12T09:07:42.000", "strict_date_hour_minute_second_fraction": "1984-04-12T09:07:42.000", "date_hour_minute_second_millis": "1984-04-12T09:07:42.000", "strict_date_hour_minute_second_millis": "1984-04-12T09:07:42.000", "date_time": "1984-04-12T09:07:42.000Z", "strict_date_time": "1984-04-12T09:07:42.000123456Z", "date_time_no_millis": "1984-04-12T09:07:42Z", "strict_date_time_no_millis": "1984-04-12T09:07:42Z", "hour": "09", "strict_hour": "09", "hour_minute": "09:07", "strict_hour_minute": "09:07", "hour_minute_second": "09:07:42", "strict_hour_minute_second": "09:07:42", "hour_minute_second_fraction": "09:07:42.000", "strict_hour_minute_second_fraction": "09:07:42.000", "hour_minute_second_millis": "09:07:42.000", "strict_hour_minute_second_millis": "09:07:42.000", "ordinal_date": "1984-103", "strict_ordinal_date": "1984-103", "ordinal_date_time": "1984-103T09:07:42.000123456Z", "strict_ordinal_date_time": "1984-103T09:07:42.000123456Z", "ordinal_date_time_no_millis": "1984-103T09:07:42Z", "strict_ordinal_date_time_no_millis": "1984-103T09:07:42Z", "time": "09:07:42.000Z", "strict_time": "09:07:42.000Z", "time_no_millis": "09:07:42Z", "strict_time_no_millis": "09:07:42Z", "t_time": "T09:07:42.000Z", "strict_t_time": "T09:07:42.000Z", "t_time_no_millis": "T09:07:42Z", "strict_t_time_no_millis": "T09:07:42Z", "week_date": "1984-W15-4", "strict_week_date": "1984-W15-4", "week_date_time": "1984-W15-4T09:07:42.000Z", "strict_week_date_time": "1984-W15-4T09:07:42.000Z", "week_date_time_no_millis": "1984-W15-4T09:07:42Z", "strict_week_date_time_no_millis": "1984-W15-4T09:07:42Z", "weekyear_week_day": "1984-W15-4", "strict_weekyear_week_day": "1984-W15-4", "year_month_day": "1984-04-12", "strict_year_month_day": "1984-04-12", "yyyy-MM-dd": "1984-04-12", "custom_time": "09:07:42 AM", "yyyy-MM-dd_OR_epoch_millis": "1984-04-12", "hour_minute_second_OR_t_time": "09:07:42", "custom_timestamp": "1984-04-12 09:07:42 ---- AM", "custom_date_or_date": "1984-04-12", "custom_date_or_custom_time": "1961-04-12", "custom_time_parser_check": "85476321", "incomplete_1" : 1984, "incomplete_2": null, "incomplete_custom_date": 1999, "incomplete_custom_time" : 10, "incorrect" : null, "epoch_sec" : 42, "epoch_milli" : 42, "custom_no_delimiter_date" : "19841020", "custom_no_delimiter_time" : "102030", "custom_no_delimiter_ts" : "19841020153548"}
{"index": {}}
{"epoch_millis": "450608862000.123456", "epoch_second": "450608862.000123456", "date_optional_time": "1984-04-12T09:07:42.000Z", "strict_date_optional_time": "1984-04-12T09:07:42.000Z", "strict_date_optional_time_nanos": "1984-04-12T09:07:42.000123456Z", "basic_date": "19840412", "basic_date_time": "19840412T090742.000Z", "basic_date_time_no_millis": "19840412T090742Z", "basic_ordinal_date": "1984103", "basic_ordinal_date_time": "1984103T090742.000Z", "basic_ordinal_date_time_no_millis": "1984103T090742Z", "basic_time": "090742.000Z", "basic_time_no_millis": "090742Z", "basic_t_time": "T090742.000Z", "basic_t_time_no_millis": "T090742Z", "basic_week_date": "1984W154", "strict_basic_week_date": "1984W154", "basic_week_date_time": "1984W154T090742.000Z", "strict_basic_week_date_time": "1984W154T090742.000Z", "basic_week_date_time_no_millis": "1984W154T090742Z", "strict_basic_week_date_time_no_millis": "1984W154T090742Z", "date": "1984-04-12", "strict_date": "1984-04-12", "date_hour": "1984-04-12T09", "strict_date_hour": "1984-04-12T09", "date_hour_minute": "1984-04-12T09:07", "strict_date_hour_minute": "1984-04-12T09:07", "date_hour_minute_second": "1984-04-12T09:07:42", "strict_date_hour_minute_second": "1984-04-12T09:07:42", "date_hour_minute_second_fraction": "1984-04-12T09:07:42.000", "strict_date_hour_minute_second_fraction": "1984-04-12T09:07:42.000", "date_hour_minute_second_millis": "1984-04-12T09:07:42.000", "strict_date_hour_minute_second_millis": "1984-04-12T09:07:42.000", "date_time": "1984-04-12T09:07:42.000Z", "strict_date_time": "1984-04-12T09:07:42.000123456Z", "date_time_no_millis": "1984-04-12T09:07:42Z", "strict_date_time_no_millis": "1984-04-12T09:07:42Z", "hour": "09", "strict_hour": "09", "hour_minute": "09:07", "strict_hour_minute": "09:07", "hour_minute_second": "09:07:42", "strict_hour_minute_second": "09:07:42", "hour_minute_second_fraction": "09:07:42.000", "strict_hour_minute_second_fraction": "09:07:42.000", "hour_minute_second_millis": "09:07:42.000", "strict_hour_minute_second_millis": "09:07:42.000", "ordinal_date": "1984-103", "strict_ordinal_date": "1984-103", "ordinal_date_time": "1984-103T09:07:42.000123456Z", "strict_ordinal_date_time": "1984-103T09:07:42.000123456Z", "ordinal_date_time_no_millis": "1984-103T09:07:42Z", "strict_ordinal_date_time_no_millis": "1984-103T09:07:42Z", "time": "09:07:42.000Z", "strict_time": "09:07:42.000Z", "time_no_millis": "09:07:42Z", "strict_time_no_millis": "09:07:42Z", "t_time": "T09:07:42.000Z", "strict_t_time": "T09:07:42.000Z", "t_time_no_millis": "T09:07:42Z", "strict_t_time_no_millis": "T09:07:42Z", "week_date": "1984-W15-4", "strict_week_date": "1984-W15-4", "week_date_time": "1984-W15-4T09:07:42.000Z", "strict_week_date_time": "1984-W15-4T09:07:42.000Z", "week_date_time_no_millis": "1984-W15-4T09:07:42Z", "strict_week_date_time_no_millis": "1984-W15-4T09:07:42Z", "weekyear_week_day": "1984-W15-4", "strict_weekyear_week_day": "1984-W15-4", "year_month_day": "1984-04-12", "strict_year_month_day": "1984-04-12", "yyyy-MM-dd": "1984-04-12", "HH:mm:ss": "09:07:42", "yyyy-MM-dd_OR_epoch_millis": "450608862000.123456", "hour_minute_second_OR_t_time": "T09:07:42.000Z"}
{"epoch_millis": "450608862000.123456", "epoch_second": "450608862.000123456", "date_optional_time": "1984-04-12T09:07:42.000Z", "strict_date_optional_time": "1984-04-12T09:07:42.000Z", "strict_date_optional_time_nanos": "1984-04-12T09:07:42.000123456Z", "basic_date": "19840412", "basic_date_time": "19840412T090742.000Z", "basic_date_time_no_millis": "19840412T090742Z", "basic_ordinal_date": "1984103", "basic_ordinal_date_time": "1984103T090742.000Z", "basic_ordinal_date_time_no_millis": "1984103T090742Z", "basic_time": "090742.000Z", "basic_time_no_millis": "090742Z", "basic_t_time": "T090742.000Z", "basic_t_time_no_millis": "T090742Z", "basic_week_date": "1984W154", "strict_basic_week_date": "1984W154", "basic_week_date_time": "1984W154T090742.000Z", "strict_basic_week_date_time": "1984W154T090742.000Z", "basic_week_date_time_no_millis": "1984W154T090742Z", "strict_basic_week_date_time_no_millis": "1984W154T090742Z", "date": "1984-04-12", "strict_date": "1984-04-12", "date_hour": "1984-04-12T09", "strict_date_hour": "1984-04-12T09", "date_hour_minute": "1984-04-12T09:07", "strict_date_hour_minute": "1984-04-12T09:07", "date_hour_minute_second": "1984-04-12T09:07:42", "strict_date_hour_minute_second": "1984-04-12T09:07:42", "date_hour_minute_second_fraction": "1984-04-12T09:07:42.000", "strict_date_hour_minute_second_fraction": "1984-04-12T09:07:42.000", "date_hour_minute_second_millis": "1984-04-12T09:07:42.000", "strict_date_hour_minute_second_millis": "1984-04-12T09:07:42.000", "date_time": "1984-04-12T09:07:42.000Z", "strict_date_time": "1984-04-12T09:07:42.000123456Z", "date_time_no_millis": "1984-04-12T09:07:42Z", "strict_date_time_no_millis": "1984-04-12T09:07:42Z", "hour": "09", "strict_hour": "09", "hour_minute": "09:07", "strict_hour_minute": "09:07", "hour_minute_second": "09:07:42", "strict_hour_minute_second": "09:07:42", "hour_minute_second_fraction": "09:07:42.000", "strict_hour_minute_second_fraction": "09:07:42.000", "hour_minute_second_millis": "09:07:42.000", "strict_hour_minute_second_millis": "09:07:42.000", "ordinal_date": "1984-103", "strict_ordinal_date": "1984-103", "ordinal_date_time": "1984-103T09:07:42.000123456Z", "strict_ordinal_date_time": "1984-103T09:07:42.000123456Z", "ordinal_date_time_no_millis": "1984-103T09:07:42Z", "strict_ordinal_date_time_no_millis": "1984-103T09:07:42Z", "time": "09:07:42.000Z", "strict_time": "09:07:42.000Z", "time_no_millis": "09:07:42Z", "strict_time_no_millis": "09:07:42Z", "t_time": "T09:07:42.000Z", "strict_t_time": "T09:07:42.000Z", "t_time_no_millis": "T09:07:42Z", "strict_t_time_no_millis": "T09:07:42Z", "week_date": "1984-W15-4", "strict_week_date": "1984-W15-4", "week_date_time": "1984-W15-4T09:07:42.000Z", "strict_week_date_time": "1984-W15-4T09:07:42.000Z", "week_date_time_no_millis": "1984-W15-4T09:07:42Z", "strict_week_date_time_no_millis": "1984-W15-4T09:07:42Z", "weekyear_week_day": "1984-W15-4", "strict_weekyear_week_day": "1984-W15-4", "year_month_day": "1984-04-12", "strict_year_month_day": "1984-04-12", "yyyy-MM-dd": "1984-04-12", "custom_time": "09:07:42 PM", "yyyy-MM-dd_OR_epoch_millis": "450608862000.123456", "hour_minute_second_OR_t_time": "T09:07:42.000Z", "custom_timestamp": "1984-04-12 10:07:42 ---- PM", "custom_date_or_date": "1984-04-12", "custom_date_or_custom_time": "09:07:00", "custom_time_parser_check": "::: 9-32476542", "incomplete_1" : 2012, "incomplete_2": null, "incomplete_custom_date": 3021, "incomplete_custom_time" : 20, "incorrect" : null, "epoch_sec" : 100500, "epoch_milli" : 100500, "custom_no_delimiter_date" : "19610412", "custom_no_delimiter_time" : "090700", "custom_no_delimiter_ts" : "19610412090700"}
Original file line number Diff line number Diff line change
Expand Up @@ -289,9 +289,9 @@
"type" : "date",
"format": "yyyy-MM-dd"
},
"HH:mm:ss" : {
"custom_time" : {
"type" : "date",
"format": "HH:mm:ss"
"format": "hh:mm:ss a"
},
"yyyy-MM-dd_OR_epoch_millis" : {
"type" : "date",
Expand All @@ -300,7 +300,63 @@
"hour_minute_second_OR_t_time" : {
"type" : "date",
"format": "hour_minute_second||t_time"
},
"custom_timestamp" : {
"type" : "date",
"format": "yyyy-MM-dd hh:mm:ss ---- a"
},
"custom_date_or_date" : {
"type" : "date",
"format": "yyyy-MM-dd||date"
},
"custom_date_or_custom_time" : {
"type" : "date",
"format" : "yyyy-MM-dd || HH:mm:ss"
},
"custom_time_parser_check" : {
"type" : "date",
"format" : "::: k-A || A "
},
"incomplete_1" : {
"type" : "date",
"format" : "year"
},
"incomplete_2" : {
"type" : "date",
"format" : "E-w"
},
"incomplete_custom_date" : {
"type" : "date",
"format" : "uuuu"
},
"incomplete_custom_time" : {
"type" : "date",
"format" : "HH"
},
"incorrect" : {
"type" : "date",
"format" : "'___'"
},
"epoch_sec" : {
"type" : "date",
"format" : "epoch_second"
},
"epoch_milli" : {
"type" : "date",
"format" : "epoch_millis"
},
"custom_no_delimiter_date" : {
"type" : "date",
"format" : "uuuuMMdd"
},
"custom_no_delimiter_time" : {
"type" : "date",
"format" : "HHmmss"
},
"custom_no_delimiter_ts" : {
"type" : "date",
"format" : "uuuuMMddHHmmss"
}
}
}
}
}
Loading

0 comments on commit 56e5621

Please sign in to comment.