Skip to content

Commit

Permalink
[followup] Refactor JSON function and add TO_JSON_STRING, ARRAY_LENGH…
Browse files Browse the repository at this point in the history
…T functions (opensearch-project#870)

Signed-off-by: Lantao Jin <ltjin@amazon.com>
  • Loading branch information
LantaoJin authored and 14yapkc1 committed Dec 11, 2024
1 parent cc0f41d commit ead0d1b
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 59 deletions.
73 changes: 59 additions & 14 deletions docs/ppl-lang/functions/ppl-json.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

**Description**

`json(value)` Evaluates whether a value can be parsed as JSON. Returns the json string if valid, null otherwise.
`json(value)` Evaluates whether a string can be parsed as JSON format. Returns the string value if valid, null otherwise.

**Argument type:** STRING/JSON_ARRAY/JSON_OBJECT
**Argument type:** STRING

**Return type:** STRING
**Return type:** STRING/NULL

A STRING expression of a valid JSON object format.

Expand Down Expand Up @@ -47,15 +47,15 @@ A StructType expression of a valid JSON object.

Example:

os> source=people | eval result = json(json_object('key', 123.45)) | fields result
os> source=people | eval result = json_object('key', 123.45) | fields result
fetched rows / total rows = 1/1
+------------------+
| result |
+------------------+
| {"key":123.45} |
+------------------+

os> source=people | eval result = json(json_object('outer', json_object('inner', 123.45))) | fields result
os> source=people | eval result = json_object('outer', json_object('inner', 123.45)) | fields result
fetched rows / total rows = 1/1
+------------------------------+
| result |
Expand All @@ -81,29 +81,58 @@ Example:

os> source=people | eval `json_array` = json_array(1, 2, 0, -1, 1.1, -0.11)
fetched rows / total rows = 1/1
+----------------------------+
| json_array |
+----------------------------+
| 1.0,2.0,0.0,-1.0,1.1,-0.11 |
+----------------------------+
+------------------------------+
| json_array |
+------------------------------+
| [1.0,2.0,0.0,-1.0,1.1,-0.11] |
+------------------------------+

os> source=people | eval `json_array_object` = json(json_object("array", json_array(1, 2, 0, -1, 1.1, -0.11)))
os> source=people | eval `json_array_object` = json_object("array", json_array(1, 2, 0, -1, 1.1, -0.11))
fetched rows / total rows = 1/1
+----------------------------------------+
| json_array_object |
+----------------------------------------+
| {"array":[1.0,2.0,0.0,-1.0,1.1,-0.11]} |
+----------------------------------------+

### `TO_JSON_STRING`

**Description**

`to_json_string(jsonObject)` Returns a JSON string with a given json object value.

**Argument type:** JSON_OBJECT (Spark StructType/ArrayType)

**Return type:** STRING

Example:

os> source=people | eval `json_string` = to_json_string(json_array(1, 2, 0, -1, 1.1, -0.11)) | fields json_string
fetched rows / total rows = 1/1
+--------------------------------+
| json_string |
+--------------------------------+
| [1.0,2.0,0.0,-1.0,1.1,-0.11] |
+--------------------------------+

os> source=people | eval `json_string` = to_json_string(json_object('key', 123.45)) | fields json_string
fetched rows / total rows = 1/1
+-----------------+
| json_string |
+-----------------+
| {'key', 123.45} |
+-----------------+


### `JSON_ARRAY_LENGTH`

**Description**

`json_array_length(jsonArray)` Returns the number of elements in the outermost JSON array.
`json_array_length(jsonArrayString)` Returns the number of elements in the outermost JSON array string.

**Argument type:** STRING/JSON_ARRAY
**Argument type:** STRING

A STRING expression of a valid JSON array format, or JSON_ARRAY object.
A STRING expression of a valid JSON array format.

**Return type:** INTEGER

Expand All @@ -119,6 +148,21 @@ Example:
| 4 | 5 | null |
+-----------+-----------+-------------+


### `ARRAY_LENGTH`

**Description**

`array_length(jsonArray)` Returns the number of elements in the outermost array.

**Argument type:** ARRAY

ARRAY or JSON_ARRAY object.

**Return type:** INTEGER

Example:

os> source=people | eval `json_array` = json_array_length(json_array(1,2,3,4)), `empty_array` = json_array_length(json_array())
fetched rows / total rows = 1/1
+--------------+---------------+
Expand All @@ -127,6 +171,7 @@ Example:
| 4 | 0 |
+--------------+---------------+


### `JSON_EXTRACT`

**Description**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,30 +163,32 @@ class FlintSparkPPLJsonFunctionITSuite
assert(ex.getMessage().contains("should all be the same type"))
}

test("test json_array() with json()") {
test("test json_array() with to_json_tring()") {
val frame = sql(s"""
| source = $testTable | eval result = json(json_array(1,2,0,-1,1.1,-0.11)) | head 1 | fields result
| source = $testTable | eval result = to_json_string(json_array(1,2,0,-1,1.1,-0.11)) | head 1 | fields result
| """.stripMargin)
assertSameRows(Seq(Row("""[1.0,2.0,0.0,-1.0,1.1,-0.11]""")), frame)
}

test("test json_array_length()") {
test("test array_length()") {
var frame = sql(s"""
| source = $testTable | eval result = json_array_length(json_array('this', 'is', 'a', 'string', 'array')) | head 1 | fields result
| """.stripMargin)
| source = $testTable| eval result = array_length(json_array('this', 'is', 'a', 'string', 'array')) | head 1 | fields result
| """.stripMargin)
assertSameRows(Seq(Row(5)), frame)

frame = sql(s"""
| source = $testTable | eval result = json_array_length(json_array(1, 2, 0, -1, 1.1, -0.11)) | head 1 | fields result
| """.stripMargin)
| source = $testTable| eval result = array_length(json_array(1, 2, 0, -1, 1.1, -0.11)) | head 1 | fields result
| """.stripMargin)
assertSameRows(Seq(Row(6)), frame)

frame = sql(s"""
| source = $testTable | eval result = json_array_length(json_array()) | head 1 | fields result
| """.stripMargin)
| source = $testTable| eval result = array_length(json_array()) | head 1 | fields result
| """.stripMargin)
assertSameRows(Seq(Row(0)), frame)
}

frame = sql(s"""
test("test json_array_length()") {
var frame = sql(s"""
| source = $testTable | eval result = json_array_length('[]') | head 1 | fields result
| """.stripMargin)
assertSameRows(Seq(Row(0)), frame)
Expand All @@ -211,38 +213,38 @@ class FlintSparkPPLJsonFunctionITSuite
test("test json_object()") {
// test value is a string
var frame = sql(s"""
| source = $testTable| eval result = json(json_object('key', 'string_value')) | head 1 | fields result
| source = $testTable| eval result = to_json_string(json_object('key', 'string_value')) | head 1 | fields result
| """.stripMargin)
assertSameRows(Seq(Row("""{"key":"string_value"}""")), frame)

// test value is a number
frame = sql(s"""
| source = $testTable| eval result = json(json_object('key', 123.45)) | head 1 | fields result
| source = $testTable| eval result = to_json_string(json_object('key', 123.45)) | head 1 | fields result
| """.stripMargin)
assertSameRows(Seq(Row("""{"key":123.45}""")), frame)

// test value is a boolean
frame = sql(s"""
| source = $testTable| eval result = json(json_object('key', true)) | head 1 | fields result
| source = $testTable| eval result = to_json_string(json_object('key', true)) | head 1 | fields result
| """.stripMargin)
assertSameRows(Seq(Row("""{"key":true}""")), frame)

frame = sql(s"""
| source = $testTable| eval result = json(json_object("a", 1, "b", 2, "c", 3)) | head 1 | fields result
| source = $testTable| eval result = to_json_string(json_object("a", 1, "b", 2, "c", 3)) | head 1 | fields result
| """.stripMargin)
assertSameRows(Seq(Row("""{"a":1,"b":2,"c":3}""")), frame)
}

test("test json_object() and json_array()") {
// test value is an empty array
var frame = sql(s"""
| source = $testTable| eval result = json(json_object('key', array())) | head 1 | fields result
| source = $testTable| eval result = to_json_string(json_object('key', array())) | head 1 | fields result
| """.stripMargin)
assertSameRows(Seq(Row("""{"key":[]}""")), frame)

// test value is an array
frame = sql(s"""
| source = $testTable| eval result = json(json_object('key', array(1, 2, 3))) | head 1 | fields result
| source = $testTable| eval result = to_json_string(json_object('key', array(1, 2, 3))) | head 1 | fields result
| """.stripMargin)
assertSameRows(Seq(Row("""{"key":[1,2,3]}""")), frame)

Expand Down Expand Up @@ -272,14 +274,14 @@ class FlintSparkPPLJsonFunctionITSuite

test("test json_object() nested") {
val frame = sql(s"""
| source = $testTable | eval result = json(json_object('outer', json_object('inner', 123.45))) | head 1 | fields result
| source = $testTable | eval result = to_json_string(json_object('outer', json_object('inner', 123.45))) | head 1 | fields result
| """.stripMargin)
assertSameRows(Seq(Row("""{"outer":{"inner":123.45}}""")), frame)
}

test("test json_object(), json_array() and json()") {
val frame = sql(s"""
| source = $testTable | eval result = json(json_object("array", json_array(1,2,0,-1,1.1,-0.11))) | head 1 | fields result
| source = $testTable | eval result = to_json_string(json_object("array", json_array(1,2,0,-1,1.1,-0.11))) | head 1 | fields result
| """.stripMargin)
assertSameRows(Seq(Row("""{"array":[1.0,2.0,0.0,-1.0,1.1,-0.11]}""")), frame)
}
Expand Down
2 changes: 2 additions & 0 deletions ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,7 @@ JSON: 'JSON';
JSON_OBJECT: 'JSON_OBJECT';
JSON_ARRAY: 'JSON_ARRAY';
JSON_ARRAY_LENGTH: 'JSON_ARRAY_LENGTH';
TO_JSON_STRING: 'TO_JSON_STRING';
JSON_EXTRACT: 'JSON_EXTRACT';
JSON_KEYS: 'JSON_KEYS';
JSON_VALID: 'JSON_VALID';
Expand All @@ -393,6 +394,7 @@ JSON_VALID: 'JSON_VALID';

// COLLECTION FUNCTIONS
ARRAY: 'ARRAY';
ARRAY_LENGTH: 'ARRAY_LENGTH';

// LAMBDA FUNCTIONS
//EXISTS: 'EXISTS';
Expand Down
2 changes: 2 additions & 0 deletions ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -842,6 +842,7 @@ jsonFunctionName
| JSON_OBJECT
| JSON_ARRAY
| JSON_ARRAY_LENGTH
| TO_JSON_STRING
| JSON_EXTRACT
| JSON_KEYS
| JSON_VALID
Expand All @@ -858,6 +859,7 @@ jsonFunctionName

collectionFunctionName
: ARRAY
| ARRAY_LENGTH
;

lambdaFunctionName
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ public enum BuiltinFunctionName {
JSON_OBJECT(FunctionName.of("json_object")),
JSON_ARRAY(FunctionName.of("json_array")),
JSON_ARRAY_LENGTH(FunctionName.of("json_array_length")),
TO_JSON_STRING(FunctionName.of("to_json_string")),
JSON_EXTRACT(FunctionName.of("json_extract")),
JSON_KEYS(FunctionName.of("json_keys")),
JSON_VALID(FunctionName.of("json_valid")),
Expand All @@ -228,6 +229,7 @@ public enum BuiltinFunctionName {

/** COLLECTION Functions **/
ARRAY(FunctionName.of("array")),
ARRAY_LENGTH(FunctionName.of("array_length")),

/** LAMBDA Functions **/
ARRAY_FORALL(FunctionName.of("forall")),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

import static org.opensearch.sql.expression.function.BuiltinFunctionName.ADD;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ADDDATE;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_LENGTH;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DATEDIFF;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DATE_ADD;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DATE_SUB;
Expand Down Expand Up @@ -58,6 +59,7 @@
import static org.opensearch.sql.expression.function.BuiltinFunctionName.SYSDATE;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.TIMESTAMPADD;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.TIMESTAMPDIFF;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.TO_JSON_STRING;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.TRIM;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.UTC_TIMESTAMP;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEK;
Expand Down Expand Up @@ -102,7 +104,9 @@ public interface BuiltinFunctionTransformer {
.put(COALESCE, "coalesce")
.put(LENGTH, "length")
.put(TRIM, "trim")
.put(ARRAY_LENGTH, "array_size")
// json functions
.put(TO_JSON_STRING, "to_json")
.put(JSON_KEYS, "json_object_keys")
.put(JSON_EXTRACT, "get_json_object")
.build();
Expand All @@ -126,26 +130,12 @@ public interface BuiltinFunctionTransformer {
.put(
JSON_ARRAY_LENGTH,
args -> {
// Check if the input is an array (from json_array()) or a JSON string
if (args.get(0) instanceof UnresolvedFunction) {
// Input is a JSON array
return UnresolvedFunction$.MODULE$.apply("json_array_length",
seq(UnresolvedFunction$.MODULE$.apply("to_json", seq(args), false)), false);
} else {
// Input is a JSON string
return UnresolvedFunction$.MODULE$.apply("json_array_length", seq(args.get(0)), false);
}
return UnresolvedFunction$.MODULE$.apply("json_array_length", seq(args.get(0)), false);
})
.put(
JSON,
args -> {
// Check if the input is a named_struct (from json_object()) or a JSON string
if (args.get(0) instanceof UnresolvedFunction) {
return UnresolvedFunction$.MODULE$.apply("to_json", seq(args.get(0)), false);
} else {
return UnresolvedFunction$.MODULE$.apply("get_json_object",
seq(args.get(0), Literal$.MODULE$.apply("$")), false);
}
return UnresolvedFunction$.MODULE$.apply("get_json_object", seq(args.get(0), Literal$.MODULE$.apply("$")), false);
})
.put(
JSON_VALID,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class PPLLogicalPlanJsonFunctionsTranslatorTestSuite
val context = new CatalystPlanContext
val logPlan =
planTransformer.visit(
plan(pplParser, """source=t a = json(json_object('key', array(1, 2, 3)))"""),
plan(pplParser, """source=t a = to_json_string(json_object('key', array(1, 2, 3)))"""),
context)

val table = UnresolvedRelation(Seq("t"))
Expand Down Expand Up @@ -97,7 +97,9 @@ class PPLLogicalPlanJsonFunctionsTranslatorTestSuite
val context = new CatalystPlanContext
val logPlan =
planTransformer.visit(
plan(pplParser, """source=t a = json(json_object('key', json_array(1, 2, 3)))"""),
plan(
pplParser,
"""source=t a = to_json_string(json_object('key', json_array(1, 2, 3)))"""),
context)

val table = UnresolvedRelation(Seq("t"))
Expand Down Expand Up @@ -139,25 +141,21 @@ class PPLLogicalPlanJsonFunctionsTranslatorTestSuite
comparePlans(expectedPlan, logPlan, false)
}

test("test json_array_length(json_array())") {
test("test array_length(json_array())") {
val context = new CatalystPlanContext
val logPlan =
planTransformer.visit(
plan(pplParser, """source=t a = json_array_length(json_array(1,2,3))"""),
plan(pplParser, """source=t a = array_length(json_array(1,2,3))"""),
context)

val table = UnresolvedRelation(Seq("t"))
val jsonFunc =
UnresolvedFunction(
"json_array_length",
"array_size",
Seq(
UnresolvedFunction(
"to_json",
Seq(
UnresolvedFunction(
"array",
Seq(Literal(1), Literal(2), Literal(3)),
isDistinct = false)),
"array",
Seq(Literal(1), Literal(2), Literal(3)),
isDistinct = false)),
isDistinct = false)
val filterExpr = EqualTo(UnresolvedAttribute("a"), jsonFunc)
Expand Down

0 comments on commit ead0d1b

Please sign in to comment.