From 78d35cdaa55353a3d0d170e8ac1acf85a616bec0 Mon Sep 17 00:00:00 2001 From: panguixin Date: Sat, 7 Sep 2024 00:57:02 +0800 Subject: [PATCH] enhance OS value parsing Signed-off-by: panguixin --- .../sql/ppl/DateTimeComparisonIT.java | 2 +- .../opensearch/sql/ppl/SystemFunctionIT.java | 16 ++--- .../org/opensearch/sql/sql/DataTypeIT.java | 60 +++++++++++++++++++ .../opensearch/sql/sql/DateTimeFormatsIT.java | 3 +- .../opensearch/sql/sql/SystemFunctionIT.java | 4 +- integ-test/src/test/resources/datatypes.json | 6 +- .../src/test/resources/datatypes_numeric.json | 4 ++ .../data/utils/OpenSearchJsonContent.java | 51 +++++++++++++--- .../value/OpenSearchExprValueFactory.java | 15 ++--- .../value/OpenSearchExprValueFactoryTest.java | 34 +++++++++-- 10 files changed, 161 insertions(+), 34 deletions(-) create mode 100644 integ-test/src/test/java/org/opensearch/sql/sql/DataTypeIT.java diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/DateTimeComparisonIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/DateTimeComparisonIT.java index 7cc083cbb6..f07a27c190 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/DateTimeComparisonIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/DateTimeComparisonIT.java @@ -401,7 +401,7 @@ public void testCompare() throws IOException { var result = executeQuery( String.format( - "source=%s | eval `%s` = %s | fields `%s`", + "source=%s | head 1 | eval `%s` = %s | fields `%s`", TEST_INDEX_DATATYPE_NONNUMERIC, name, functionCall, name)); verifySchema(result, schema(name, null, "boolean")); verifyDataRows(result, rows(expectedResult)); diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/SystemFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/SystemFunctionIT.java index c1356ce838..bb4e1dd3d1 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/SystemFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/SystemFunctionIT.java @@ -29,7 +29,7 @@ public void typeof_sql_types() throws IOException { JSONObject response = executeQuery( String.format( - "source=%s | eval `str` = typeof('pewpew')," + "source=%s | head 1 | eval `str` = typeof('pewpew')," + " `double` = typeof(1.0)," + "`int` = typeof(12345)," + " `long` = typeof(1234567891011)," @@ -42,7 +42,7 @@ public void typeof_sql_types() throws IOException { response = executeQuery( String.format( - "source=%s | eval " + "source=%s | head 1 | eval " + "`timestamp` = typeof(CAST('1961-04-12 09:07:00' AS TIMESTAMP))," + "`time` = typeof(CAST('09:07:00' AS TIME))," + "`date` = typeof(CAST('1961-04-12' AS DATE))" @@ -56,7 +56,7 @@ public void typeof_opensearch_types() throws IOException { JSONObject response = executeQuery( String.format( - "source=%s | eval `double` = typeof(double_number), `long` =" + "source=%s | head 1 | eval `double` = typeof(double_number), `long` =" + " typeof(long_number),`integer` = typeof(integer_number), `byte` =" + " typeof(byte_number),`short` = typeof(short_number), `float` =" + " typeof(float_number),`half_float` = typeof(half_float_number)," @@ -69,11 +69,11 @@ public void typeof_opensearch_types() throws IOException { response = executeQuery( String.format( - "source=%s | eval `text` = typeof(text_value), `date` = typeof(date_value)," - + " `date_nanos` = typeof(date_nanos_value),`boolean` = typeof(boolean_value)," - + " `object` = typeof(object_value),`keyword` = typeof(keyword_value), `ip` =" - + " typeof(ip_value),`binary` = typeof(binary_value), `geo_point` =" - + " typeof(geo_point_value)" + "source=%s | head 1 | eval `text` = typeof(text_value), `date` =" + + " typeof(date_value), `date_nanos` = typeof(date_nanos_value),`boolean` =" + + " typeof(boolean_value), `object` = typeof(object_value),`keyword` =" + + " typeof(keyword_value), `ip` = typeof(ip_value),`binary` =" + + " typeof(binary_value), `geo_point` = typeof(geo_point_value)" // TODO activate this test once `ARRAY` type supported, see // ExpressionAnalyzer::isTypeNotSupported // + ", `nested` = typeof(nested_value)" diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/DataTypeIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/DataTypeIT.java new file mode 100644 index 0000000000..6c7c9ab237 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/DataTypeIT.java @@ -0,0 +1,60 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import static org.opensearch.sql.legacy.SQLIntegTestCase.Index.DATA_TYPE_NONNUMERIC; +import static org.opensearch.sql.legacy.SQLIntegTestCase.Index.DATA_TYPE_NUMERIC; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.legacy.SQLIntegTestCase; + +public class DataTypeIT extends SQLIntegTestCase { + @Override + public void init() throws IOException { + loadIndex(DATA_TYPE_NUMERIC); + loadIndex(DATA_TYPE_NONNUMERIC); + } + + @Test + public void testReadingData() throws IOException { + String query = + String.format( + "SELECT" + + " long_number,integer_number,short_number,byte_number,double_number,float_number,half_float_number,scaled_float_number" + + " FROM %s LIMIT 5", + Index.DATA_TYPE_NUMERIC.getName()); + JSONObject result = executeJdbcRequest(query); + verifySchema( + result, + schema("long_number", "long"), + schema("integer_number", "integer"), + schema("short_number", "short"), + schema("byte_number", "byte"), + schema("float_number", "float"), + schema("double_number", "double"), + schema("half_float_number", "float"), + schema("scaled_float_number", "double")); + verifyDataRows( + result, + rows(1, 2, 3, 4, 5.1, 6.2, 7.3, 8.4), + rows(1, 2, 3, 4, 5.1, 6.2, 7.3, 8.4), + rows(1, 2, 3, 4, 5.1, 6.2, 7.3, 8.4)); + + query = + String.format( + "SELECT boolean_value,keyword_value FROM %s LIMIT 5", + Index.DATA_TYPE_NONNUMERIC.getName()); + result = executeJdbcRequest(query); + verifySchema(result, schema("boolean_value", "boolean"), schema("keyword_value", "keyword")); + verifyDataRows(result, rows(true, "123"), rows(true, "123"), rows(true, "123")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/DateTimeFormatsIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/DateTimeFormatsIT.java index 13c2eecd56..2298b9d584 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/DateTimeFormatsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/DateTimeFormatsIT.java @@ -223,7 +223,8 @@ public void testDateNanosGroupBy() { @SneakyThrows public void testDateNanosWithNanos() { String query = - String.format("SELECT date_nanos_value" + " FROM %s", TEST_INDEX_DATATYPE_NONNUMERIC); + String.format( + "SELECT date_nanos_value" + " FROM %s limit 1", TEST_INDEX_DATATYPE_NONNUMERIC); JSONObject result = executeQuery(query); verifySchema(result, schema("date_nanos_value", null, "timestamp")); verifyDataRows(result, rows("2019-03-24 01:34:46.123456789")); diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/SystemFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/SystemFunctionIT.java index 7129d058c0..e634cd347d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/SystemFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/SystemFunctionIT.java @@ -46,7 +46,7 @@ public void typeof_opensearch_types() { String.format( "SELECT typeof(double_number),typeof(long_number), typeof(integer_number)," + " typeof(byte_number), typeof(short_number),typeof(float_number)," - + " typeof(half_float_number), typeof(scaled_float_number) from %s;", + + " typeof(half_float_number), typeof(scaled_float_number) from %s limit 1;", TEST_INDEX_DATATYPE_NUMERIC)); verifyDataRows( response, rows("DOUBLE", "LONG", "INTEGER", "BYTE", "SHORT", "FLOAT", "FLOAT", "DOUBLE")); @@ -61,7 +61,7 @@ public void typeof_opensearch_types() { // TODO activate this test once `ARRAY` type supported, see // ExpressionAnalyzer::isTypeNotSupported // + ", typeof(nested_value)" - + " from %s;", + + " from %s limit 1;", TEST_INDEX_DATATYPE_NONNUMERIC)); verifyDataRows( response, diff --git a/integ-test/src/test/resources/datatypes.json b/integ-test/src/test/resources/datatypes.json index 70ddd28763..c052190c03 100644 --- a/integ-test/src/test/resources/datatypes.json +++ b/integ-test/src/test/resources/datatypes.json @@ -1,2 +1,6 @@ {"index":{"_id":"1"}} -{"boolean_value": true, "keyword_value": "keyword", "text_value": "text", "binary_value": "U29tZSBiaW5hcnkgYmxvYg==", "date_value": "2020-10-13 13:00:00", "date_nanos_value": "2019-03-23T21:34:46.123456789-04:00", "ip_value": "127.0.0.1", "object_value": {"first": "Dale", "last": "Dale"}, "nested_value": [{"first" : "John", "last" : "Smith"}, {"first" : "Alice", "last" : "White"}], "geo_point_value": { "lat": 40.71, "lon": 74.00 }} +{"boolean_value": true, "keyword_value": "123", "text_value": "text", "binary_value": "U29tZSBiaW5hcnkgYmxvYg==", "date_value": "2020-10-13 13:00:00", "date_nanos_value": "2019-03-23T21:34:46.123456789-04:00", "ip_value": "127.0.0.1", "object_value": {"first": "Dale", "last": "Dale"}, "nested_value": [{"first" : "John", "last" : "Smith"}, {"first" : "Alice", "last" : "White"}], "geo_point_value": { "lat": 40.71, "lon": 74.00 }} +{"index":{"_id":"2"}} +{"boolean_value": "true", "keyword_value": 123, "text_value": "text", "binary_value": "U29tZSBiaW5hcnkgYmxvYg==", "date_value": "2020-10-13 13:00:00", "date_nanos_value": "2019-03-23T21:34:46.123456789-04:00", "ip_value": "127.0.0.1", "object_value": {"first": "Dale", "last": "Dale"}, "nested_value": [{"first" : "John", "last" : "Smith"}, {"first" : "Alice", "last" : "White"}], "geo_point_value": { "lat": 40.71, "lon": 74.00 }} +{"index":{"_id":"3"}} +{"boolean_value": ["true"], "keyword_value": [123], "text_value": "text", "binary_value": "U29tZSBiaW5hcnkgYmxvYg==", "date_value": "2020-10-13 13:00:00", "date_nanos_value": "2019-03-23T21:34:46.123456789-04:00", "ip_value": "127.0.0.1", "object_value": {"first": "Dale", "last": "Dale"}, "nested_value": [{"first" : "John", "last" : "Smith"}, {"first" : "Alice", "last" : "White"}], "geo_point_value": { "lat": 40.71, "lon": 74.00 }} diff --git a/integ-test/src/test/resources/datatypes_numeric.json b/integ-test/src/test/resources/datatypes_numeric.json index 5aa3b00511..a07874584d 100644 --- a/integ-test/src/test/resources/datatypes_numeric.json +++ b/integ-test/src/test/resources/datatypes_numeric.json @@ -1,2 +1,6 @@ {"index":{"_id":"1"}} {"long_number": 1, "integer_number": 2, "short_number": 3, "byte_number": 4, "double_number": 5.1, "float_number": 6.2, "half_float_number": 7.3, "scaled_float_number": 8.4} +{"index":{"_id":"2"}} +{"long_number": "1", "integer_number": "2", "short_number": "3", "byte_number": "4", "double_number": "5.1", "float_number": "6.2", "half_float_number": "7.3", "scaled_float_number": "8.4"} +{"index":{"_id":"3"}} +{"long_number": ["1"], "integer_number": ["2"], "short_number": ["3"], "byte_number": ["4"], "double_number": ["5.1"], "float_number": ["6.2"], "half_float_number": ["7.3"], "scaled_float_number": ["8.4"]} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java index 4446c1f979..80430827fe 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java @@ -14,6 +14,7 @@ import lombok.RequiredArgsConstructor; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.OpenSearchParseException; +import org.opensearch.common.Numbers; import org.opensearch.common.geo.GeoPoint; import org.opensearch.common.geo.GeoUtils; import org.opensearch.common.xcontent.json.JsonXContentParser; @@ -29,32 +30,32 @@ public class OpenSearchJsonContent implements Content { @Override public Integer intValue() { - return value().intValue(); + return (int) extractDoubleValue(value()); } @Override public Long longValue() { - return value().longValue(); + return extractLongValue(value()); } @Override public Short shortValue() { - return value().shortValue(); + return (short) extractDoubleValue(value()); } @Override public Byte byteValue() { - return (byte) value().shortValue(); + return (byte) extractDoubleValue(value()); } @Override public Float floatValue() { - return value().floatValue(); + return (float) extractDoubleValue(value()); } @Override public Double doubleValue() { - return value().doubleValue(); + return extractDoubleValue(value()); } @Override @@ -64,7 +65,7 @@ public String stringValue() { @Override public Boolean booleanValue() { - return value().booleanValue(); + return extractBooleanValue(value()); } @Override @@ -148,4 +149,40 @@ public Pair geoValue() { private JsonNode value() { return value; } + + /** Get double value from JsonNode if possible. */ + private double extractDoubleValue(JsonNode node) { + if (node.isTextual()) { + return Double.parseDouble(node.textValue()); + } + if (node.isNumber()) { + return node.doubleValue(); + } else { + throw new OpenSearchParseException("node must be a number"); + } + } + + /** Get long value from JsonNode if possible. */ + private long extractLongValue(JsonNode node) { + if (node.isTextual()) { + return Numbers.toLong(node.textValue(), true); + } + if (node.isNumber()) { + return node.longValue(); + } else { + throw new OpenSearchParseException("node must be a number"); + } + } + + /** Get boolean value from JsonNode if possible. */ + private boolean extractBooleanValue(JsonNode node) { + if (node.isTextual()) { + return Boolean.parseBoolean(node.textValue()); + } + if (node.isBoolean()) { + return node.booleanValue(); + } else { + throw new OpenSearchParseException("node must be a boolean"); + } + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java index 417aaddaee..c00ff3d781 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java @@ -60,10 +60,8 @@ import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; -import org.opensearch.sql.opensearch.data.type.OpenSearchBinaryType; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; -import org.opensearch.sql.opensearch.data.type.OpenSearchIpType; import org.opensearch.sql.opensearch.data.utils.Content; import org.opensearch.sql.opensearch.data.utils.ObjectContent; import org.opensearch.sql.opensearch.data.utils.OpenSearchJsonContent; @@ -346,9 +344,8 @@ private ExprValue parseArray( if (content.objectValue() instanceof ObjectNode) { result.add(parseStruct(content, prefix, supportArrays)); // non-object type arrays are only supported when parsing inner_hits of OS response. - } else if (!(type instanceof OpenSearchDataType - && ((OpenSearchDataType) type).getExprType().equals(ARRAY)) - && !supportArrays) { + } else if (((OpenSearchDataType) type).getExprType().equals(ARRAY) == false + && supportArrays == false) { return parseInnerArrayValue(content.array().next(), prefix, type, supportArrays); } else { content @@ -415,10 +412,10 @@ private ExprValue parseGeoPoint(Content content, boolean supportArrays) { */ private ExprValue parseInnerArrayValue( Content content, String prefix, ExprType type, boolean supportArrays) { - if (type instanceof OpenSearchIpType - || type instanceof OpenSearchBinaryType - || type instanceof OpenSearchDateType) { - return parse(content, prefix, Optional.of(type), supportArrays); + if (content.isArray()) { + return parseArray(content, prefix, type, supportArrays); + } else if (typeActionMap.containsKey(type)) { + return typeActionMap.get(type).apply(content, type); } else if (content.isString()) { return parse(content, prefix, Optional.of(OpenSearchDataType.of(STRING)), supportArrays); } else if (content.isLong()) { diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java index 6b4d825ab1..22ef531a9c 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java @@ -156,6 +156,7 @@ public void constructNullArrayValue() { public void constructByte() { assertAll( () -> assertEquals(byteValue((byte) 1), tupleValue("{\"byteV\":1}").get("byteV")), + () -> assertEquals(byteValue((byte) 1), tupleValue("{\"byteV\":\"1\"}").get("byteV")), () -> assertEquals(byteValue((byte) 1), constructFromObject("byteV", 1)), () -> assertEquals(byteValue((byte) 1), constructFromObject("byteV", "1.0"))); } @@ -164,6 +165,7 @@ public void constructByte() { public void constructShort() { assertAll( () -> assertEquals(shortValue((short) 1), tupleValue("{\"shortV\":1}").get("shortV")), + () -> assertEquals(shortValue((short) 1), tupleValue("{\"shortV\":\"1\"}").get("shortV")), () -> assertEquals(shortValue((short) 1), constructFromObject("shortV", 1)), () -> assertEquals(shortValue((short) 1), constructFromObject("shortV", "1.0"))); } @@ -172,19 +174,16 @@ public void constructShort() { public void constructInteger() { assertAll( () -> assertEquals(integerValue(1), tupleValue("{\"intV\":1}").get("intV")), + () -> assertEquals(integerValue(1), tupleValue("{\"intV\":\"1\"}").get("intV")), () -> assertEquals(integerValue(1), constructFromObject("intV", 1)), () -> assertEquals(integerValue(1), constructFromObject("intV", "1.0"))); } - @Test - public void constructIntegerValueInStringValue() { - assertEquals(integerValue(1), constructFromObject("intV", "1")); - } - @Test public void constructLong() { assertAll( () -> assertEquals(longValue(1L), tupleValue("{\"longV\":1}").get("longV")), + () -> assertEquals(longValue(1L), tupleValue("{\"longV\":\"1\"}").get("longV")), () -> assertEquals(longValue(1L), constructFromObject("longV", 1L)), () -> assertEquals(longValue(1L), constructFromObject("longV", "1.0"))); } @@ -193,6 +192,7 @@ public void constructLong() { public void constructFloat() { assertAll( () -> assertEquals(floatValue(1f), tupleValue("{\"floatV\":1.0}").get("floatV")), + () -> assertEquals(floatValue(1f), tupleValue("{\"floatV\":\"1.0\"}").get("floatV")), () -> assertEquals(floatValue(1f), constructFromObject("floatV", 1f))); } @@ -200,6 +200,7 @@ public void constructFloat() { public void constructDouble() { assertAll( () -> assertEquals(doubleValue(1d), tupleValue("{\"doubleV\":1.0}").get("doubleV")), + () -> assertEquals(doubleValue(1d), tupleValue("{\"doubleV\":\"1.0\"}").get("doubleV")), () -> assertEquals(doubleValue(1d), constructFromObject("doubleV", 1d))); } @@ -215,6 +216,7 @@ public void constructString() { public void constructBoolean() { assertAll( () -> assertEquals(booleanValue(true), tupleValue("{\"boolV\":true}").get("boolV")), + () -> assertEquals(booleanValue(true), tupleValue("{\"boolV\":\"true\"}").get("boolV")), () -> assertEquals(booleanValue(true), constructFromObject("boolV", true)), () -> assertEquals(booleanValue(true), constructFromObject("boolV", "true")), () -> assertEquals(booleanValue(true), constructFromObject("boolV", 1)), @@ -755,6 +757,27 @@ public void constructGeoPointFromUnsupportedFormatShouldThrowException() { assertEquals("lat must be a number", exception.getMessage()); } + @Test + public void constructNumberFromUnsupportedFormatShouldThrowException() { + OpenSearchParseException exception = + assertThrows( + OpenSearchParseException.class, () -> tupleValue("{\"intV\": false}").get("intV")); + assertEquals("node must be a number", exception.getMessage()); + + exception = + assertThrows( + OpenSearchParseException.class, () -> tupleValue("{\"longV\": false}").get("intV")); + assertEquals("node must be a number", exception.getMessage()); + } + + @Test + public void constructBooleanFromUnsupportedFormatShouldThrowException() { + OpenSearchParseException exception = + assertThrows( + OpenSearchParseException.class, () -> tupleValue("{\"boolV\": 1}").get("boolV")); + assertEquals("node must be a boolean", exception.getMessage()); + } + @Test public void constructBinary() { assertEquals( @@ -769,6 +792,7 @@ public void constructBinary() { @Test public void constructFromOpenSearchArrayReturnFirstElement() { assertEquals(integerValue(1), tupleValue("{\"intV\":[1, 2, 3]}").get("intV")); + assertEquals(integerValue(1), tupleValue("{\"intV\":[\"1\", 2, 3]}").get("intV")); assertEquals( new ExprTupleValue( new LinkedHashMap() {