Skip to content

Commit

Permalink
Infer UInt64 and Decimal128 schema from csv field
Browse files Browse the repository at this point in the history
  • Loading branch information
bezbac committed Aug 21, 2022
1 parent 34216d5 commit 4b3104e
Showing 1 changed file with 24 additions and 1 deletion.
25 changes: 24 additions & 1 deletion arrow/src/csv/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,18 @@ fn infer_field_schema(string: &str, datetime_re: Option<Regex>) -> DataType {
} else if DECIMAL_RE.is_match(string) {
DataType::Float64
} else if INTEGER_RE.is_match(string) {
DataType::Int64
match string.parse::<i64>() {
Ok(_) => return DataType::Int64,
_ => {}
}

match string.parse::<u64>() {
Ok(_) => return DataType::UInt64,
Err(e) => match e.kind() {
std::num::IntErrorKind::PosOverflow => DataType::Decimal128(u8::MAX, 0),
_ => DataType::Utf8,
},
}
} else if datetime_re.is_match(string) {
DataType::Date64
} else if DATE_RE.is_match(string) {
Expand Down Expand Up @@ -1555,6 +1566,18 @@ mod tests {
assert_eq!(infer_field_schema("A", None), DataType::Utf8);
assert_eq!(infer_field_schema("\"123\"", None), DataType::Utf8);
assert_eq!(infer_field_schema("10", None), DataType::Int64);
assert_eq!(
infer_field_schema("9223372036854775807", None),
DataType::Int64
);
assert_eq!(
infer_field_schema("18446744073709551615", None),
DataType::UInt64
);
assert_eq!(
infer_field_schema("18446744073709551616", None),
DataType::Decimal128(u8::MAX, 0)
);
assert_eq!(infer_field_schema("10.2", None), DataType::Float64);
assert_eq!(infer_field_schema(".2", None), DataType::Float64);
assert_eq!(infer_field_schema("2.", None), DataType::Float64);
Expand Down

0 comments on commit 4b3104e

Please sign in to comment.