Skip to content

Commit

Permalink
fix: Fix SO in json inference (#18887)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Sep 24, 2024
1 parent f1133a4 commit a716989
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 3 deletions.
10 changes: 7 additions & 3 deletions crates/polars-json/src/json/infer_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,15 @@ pub(crate) fn coerce_dtype<A: Borrow<ArrowDataType>>(datatypes: &[A]) -> ArrowDa
true,
)));
} else if datatypes.len() > 2 {
return coerce_dtype(datatypes);
return datatypes
.iter()
.map(|t| t.borrow().clone())
.reduce(|a, b| coerce_dtype(&[a, b]))
.expect("not empty");
}
let (lhs, rhs) = (datatypes[0].borrow(), datatypes[1].borrow());

return match (lhs, rhs) {
match (lhs, rhs) {
(lhs, rhs) if lhs == rhs => lhs.clone(),
(LargeList(lhs), LargeList(rhs)) => {
let inner = coerce_dtype(&[lhs.dtype(), rhs.dtype()]);
Expand Down Expand Up @@ -171,5 +175,5 @@ pub(crate) fn coerce_dtype<A: Borrow<ArrowDataType>>(datatypes: &[A]) -> ArrowDa
(Null, rhs) => rhs.clone(),
(lhs, Null) => lhs.clone(),
(_, _) => LargeUtf8,
};
}
}
8 changes: 8 additions & 0 deletions py-polars/tests/unit/io/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,3 +430,11 @@ def test_empty_list_json() -> None:
df = pl.read_json(b"[]")
assert df.shape == (0, 0)
assert isinstance(df, pl.DataFrame)


def test_json_infer_3_dtypes() -> None:
# would SO before
df = pl.DataFrame({"a": ["{}", "1", "[1, 2]"]})
out = df.select(pl.col("a").str.json_decode())
assert out["a"].to_list() == [None, ["1"], ["1", "2"]]
assert out.dtypes[0] == pl.List(pl.String)

0 comments on commit a716989

Please sign in to comment.