From a716989ecfa4b42fd64f1d57143b430d71b6a436 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Tue, 24 Sep 2024 13:58:34 +0200 Subject: [PATCH] fix: Fix SO in json inference (#18887) --- crates/polars-json/src/json/infer_schema.rs | 10 +++++++--- py-polars/tests/unit/io/test_json.py | 8 ++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/crates/polars-json/src/json/infer_schema.rs b/crates/polars-json/src/json/infer_schema.rs index 4d0eb4d47309..f00ed6e26eb8 100644 --- a/crates/polars-json/src/json/infer_schema.rs +++ b/crates/polars-json/src/json/infer_schema.rs @@ -134,11 +134,15 @@ pub(crate) fn coerce_dtype>(datatypes: &[A]) -> ArrowDa true, ))); } else if datatypes.len() > 2 { - return coerce_dtype(datatypes); + return datatypes + .iter() + .map(|t| t.borrow().clone()) + .reduce(|a, b| coerce_dtype(&[a, b])) + .expect("not empty"); } let (lhs, rhs) = (datatypes[0].borrow(), datatypes[1].borrow()); - return match (lhs, rhs) { + match (lhs, rhs) { (lhs, rhs) if lhs == rhs => lhs.clone(), (LargeList(lhs), LargeList(rhs)) => { let inner = coerce_dtype(&[lhs.dtype(), rhs.dtype()]); @@ -171,5 +175,5 @@ pub(crate) fn coerce_dtype>(datatypes: &[A]) -> ArrowDa (Null, rhs) => rhs.clone(), (lhs, Null) => lhs.clone(), (_, _) => LargeUtf8, - }; + } } diff --git a/py-polars/tests/unit/io/test_json.py b/py-polars/tests/unit/io/test_json.py index 30fed75f93dd..93780e79293d 100644 --- a/py-polars/tests/unit/io/test_json.py +++ b/py-polars/tests/unit/io/test_json.py @@ -430,3 +430,11 @@ def test_empty_list_json() -> None: df = pl.read_json(b"[]") assert df.shape == (0, 0) assert isinstance(df, pl.DataFrame) + + +def test_json_infer_3_dtypes() -> None: + # would SO before + df = pl.DataFrame({"a": ["{}", "1", "[1, 2]"]}) + out = df.select(pl.col("a").str.json_decode()) + assert out["a"].to_list() == [None, ["1"], ["1", "2"]] + assert out.dtypes[0] == pl.List(pl.String)