Skip to content

Commit

Permalink
Add quote and escape attributes to create csv external table (#8351)
Browse files Browse the repository at this point in the history
* Minor: Improve the document format of JoinHashMap

* sql csv_with_quote_escape

* fix
  • Loading branch information
Asura7969 authored Nov 29, 2023
1 parent 4c914ea commit aeb012e
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 4 deletions.
6 changes: 6 additions & 0 deletions datafusion/common/src/file_options/csv_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,12 @@ impl TryFrom<(&ConfigOptions, &StatementOptions)> for CsvWriterOptions {
)
})?)
},
"quote" | "escape" => {
// https://github.com/apache/arrow-rs/issues/5146
// These two attributes are only available when reading csv files.
// To avoid error
builder
},
_ => return Err(DataFusionError::Configuration(format!("Found unsupported option {option} with value {value} for CSV format!")))
}
}
Expand Down
16 changes: 12 additions & 4 deletions datafusion/core/src/datasource/listing_table_factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,20 @@ impl TableProviderFactory for ListingTableFactory {
let file_extension = get_extension(cmd.location.as_str());

let file_format: Arc<dyn FileFormat> = match file_type {
FileType::CSV => Arc::new(
CsvFormat::default()
FileType::CSV => {
let mut statement_options = StatementOptions::from(&cmd.options);
let mut csv_format = CsvFormat::default()
.with_has_header(cmd.has_header)
.with_delimiter(cmd.delimiter as u8)
.with_file_compression_type(file_compression_type),
),
.with_file_compression_type(file_compression_type);
if let Some(quote) = statement_options.take_str_option("quote") {
csv_format = csv_format.with_quote(quote.as_bytes()[0])
}
if let Some(escape) = statement_options.take_str_option("escape") {
csv_format = csv_format.with_escape(Some(escape.as_bytes()[0]))
}
Arc::new(csv_format)
}
#[cfg(feature = "parquet")]
FileType::PARQUET => Arc::new(ParquetFormat::default()),
FileType::AVRO => Arc::new(AvroFormat),
Expand Down
11 changes: 11 additions & 0 deletions datafusion/core/tests/data/escape.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
c1,c2
"id0","value\"0"
"id1","value\"1"
"id2","value\"2"
"id3","value\"3"
"id4","value\"4"
"id5","value\"5"
"id6","value\"6"
"id7","value\"7"
"id8","value\"8"
"id9","value\"9"
11 changes: 11 additions & 0 deletions datafusion/core/tests/data/quote.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
c1,c2
~id0~,~value0~
~id1~,~value1~
~id2~,~value2~
~id3~,~value3~
~id4~,~value4~
~id5~,~value5~
~id6~,~value6~
~id7~,~value7~
~id8~,~value8~
~id9~,~value9~
65 changes: 65 additions & 0 deletions datafusion/sqllogictest/test_files/csv_files.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# create_external_table_with_quote_escape
statement ok
CREATE EXTERNAL TABLE csv_with_quote (
c1 VARCHAR,
c2 VARCHAR
) STORED AS CSV
WITH HEADER ROW
DELIMITER ','
OPTIONS ('quote' '~')
LOCATION '../core/tests/data/quote.csv';

statement ok
CREATE EXTERNAL TABLE csv_with_escape (
c1 VARCHAR,
c2 VARCHAR
) STORED AS CSV
WITH HEADER ROW
DELIMITER ','
OPTIONS ('escape' '\"')
LOCATION '../core/tests/data/escape.csv';

query TT
select * from csv_with_quote;
----
id0 value0
id1 value1
id2 value2
id3 value3
id4 value4
id5 value5
id6 value6
id7 value7
id8 value8
id9 value9

query TT
select * from csv_with_escape;
----
id0 value"0
id1 value"1
id2 value"2
id3 value"3
id4 value"4
id5 value"5
id6 value"6
id7 value"7
id8 value"8
id9 value"9

0 comments on commit aeb012e

Please sign in to comment.