diff --git a/CHANGELOG.md b/CHANGELOG.md index 785b79d..6f8f2f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ # NEXT-RELEASE +- NEXT-37536 - Added the profile mapping option `column_type` with which the column type can be specified if the inferred one leads to conversion t issues. Valid options are `string`, `number` and `boolean`. - NEXT-37310 - Added single row import strategy when encountering an error that cannot be handled automatically during a chunk import. # v0.8.0 diff --git a/README.md b/README.md index 3e77f50..5e792a8 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,8 @@ mappings: entity_path: "name" - file_column: "product number" entity_path: "productNumber" + # column type defines the data type for the internal processing of the column data + colum_type: "string" - file_column: "stock" entity_path: "stock" - file_column: "tax id" diff --git a/profiles/default_customer.yaml b/profiles/default_customer.yaml index 183b9c2..8fd2483 100644 --- a/profiles/default_customer.yaml +++ b/profiles/default_customer.yaml @@ -16,7 +16,8 @@ mappings: - file_column: "account type" entity_path: "accountType" - file_column: "customer number" - key: "customer_number" + entity_path: "customerNumber" + column_type: "string" - file_column: "first name" entity_path: "firstName" - file_column: "last name" @@ -52,7 +53,8 @@ mappings: - file_column: "default billing address street" entity_path: "defaultBillingAddress?.street" - file_column: "default billing address zip code" - key: "default_billing_address_zip_code" + entity_path: "defaultBillingAddress?.zipcode" + column_type: "string" - file_column: "default billing address city" entity_path: "defaultBillingAddress?.city" - file_column: "default billing address country id" @@ -74,7 +76,8 @@ mappings: - file_column: "default shipping address street" entity_path: "defaultShippingAddress?.street" - file_column: "default shipping address zip code" - key: "default_shipping_address_zip_code" + entity_path: "defaultShippingAddress?.zipcode" + column_type: "string" - file_column: "default shipping address city" entity_path: "defaultShippingAddress?.city" - file_column: "default shipping address country id" @@ -90,9 +93,6 @@ mappings: serialize_script: | row = #{ - customer_number: entity.customerNumber, - default_billing_address_zip_code: entity.defaultBillingAddress?.zipCode, - default_shipping_address_zip_code: entity.defaultShippingAddress?.zipCode, default_group_name: entity.group.translated?.name, default_sales_channel_name: entity.salesChannel.translated?.name, default_payment_method_name: entity.defaultPaymentMethod.translated?.name @@ -102,13 +102,6 @@ deserialize_script: | let default_language = get_default("LANGUAGE_SYSTEM"); entity = #{ - customerNumber: `${row.customer_number}`, // TODO: fix conversion (NEXT-37536) - defaultBillingAddress: #{ - zipCode: `${row.default_billing_address_zip_code}`, // TODO: fix conversion (NEXT-37536) - }, - defaultShippingAddress: #{ - zipCode: `${row.default_shipping_address_zip_code}`, // TODO: fix conversion (NEXT-37536) - }, group: #{ translations: [#{ languageId: default_language, diff --git a/profiles/default_newsletter_recipient.yaml b/profiles/default_newsletter_recipient.yaml index 1d20a1b..1cb1475 100644 --- a/profiles/default_newsletter_recipient.yaml +++ b/profiles/default_newsletter_recipient.yaml @@ -26,10 +26,5 @@ mappings: - file_column: "hash" entity_path: "hash" - file_column: "zipCode" - key: "zip_code" - -serialize_script: | - row.zip_code = entity.zipCode; - -deserialize_script: | - entity.zipCode = `${row.zip_code}`; // TODO: fix conversion (NEXT-37536) + entity_path: "zipCode" + column_type: "string" \ No newline at end of file diff --git a/profiles/default_order.yaml b/profiles/default_order.yaml index f905dfa..aa2c1e3 100644 --- a/profiles/default_order.yaml +++ b/profiles/default_order.yaml @@ -11,10 +11,9 @@ mappings: entity_path: "languageId" - file_column: "sales channel id" entity_path: "salesChannelId" - - file_column: "state id" - entity_path: "stateId" - file_column: "order number" - key: "order_number" + entity_path: "orderNumber" + column_type: "string" - file_column: "currency factor" entity_path: "currencyFactor" - file_column: "order date time" @@ -38,7 +37,8 @@ mappings: - file_column: "billing address street" entity_path: "billingAddress?.street" - file_column: "billing address zip code" - key: "billing_address_zipcode" + entity_path: "billingAddress?.zipcode" + column_type: "string" - file_column: "billing address company" entity_path: "billingAddress?.company" - file_column: "billing address city" @@ -73,14 +73,19 @@ mappings: key: "price_position_price" - file_column: "price tax rates" key: "price_tax_rates" + column_type: "string" - file_column: "price tax percentages" key: "price_tax_percentages" + column_type: "string" - file_column: "price calculated taxes" key: "price_calculated_taxes" + column_type: "string" - file_column: "price calculated tax prices" key: "price_calculated_tax_prices" + column_type: "string" - file_column: "price calculated tax rates" key: "price_calculated_tax_rates" + column_type: "string" - file_column: "shipping cost quantity" key: "shipping_cost_quantity" - file_column: "shipping cost unit price" @@ -89,14 +94,19 @@ mappings: key: "shipping_cost_total_price" - file_column: "shipping cost tax rates" key: "shipping_cost_tax_rates" + column_type: "string" - file_column: "shipping cost tax percentages" key: "shipping_cost_tax_percentages" + column_type: "string" - file_column: "shipping cost calculated taxes" key: "shipping_cost_calculated_taxes" + column_type: "string" - file_column: "shipping cost calculated tax prices" key: "shipping_cost_calculated_tax_prices" + column_type: "string" - file_column: "shipping cost calculated tax rates" key: "shipping_cost_calculated_tax_rates" + column_type: "string" serialize_script: | fn encode_values(arr, value_name) { @@ -114,8 +124,6 @@ serialize_script: | } row = #{ - order_number: entity.orderNumber, - billing_address_zipcode: entity.billingAddress?.zipcode, item_rounding_decimals: entity.itemRounding?.decimals, item_rounding_interval: entity.itemRounding?.interval, item_rounding_round_for_net: entity.itemRounding?.roundForNet, @@ -214,10 +222,6 @@ deserialize_script: | } entity = #{ - orderNumber: `${row.order_number}`, // TODO: fix conversion (NEXT-37536) - billingAddress: #{ - zipcode: `${row.billing_address_zipcode}`, // TODO: fix conversion (NEXT-37536) - }, itemRounding: #{ decimals: row.item_rounding_decimals, interval: row.item_rounding_interval, diff --git a/profiles/default_product.yaml b/profiles/default_product.yaml index d489199..dfb27cc 100644 --- a/profiles/default_product.yaml +++ b/profiles/default_product.yaml @@ -23,6 +23,7 @@ mappings: entity_path: "cover" - file_column: "product number" entity_path: "productNumber" + column_type: "string" - file_column: "active" entity_path: "active" - file_column: "stock" diff --git a/profiles/default_product_variants.yaml b/profiles/default_product_variants.yaml index 17fcd1c..b16c930 100644 --- a/profiles/default_product_variants.yaml +++ b/profiles/default_product_variants.yaml @@ -26,6 +26,7 @@ mappings: entity_path: "cover" - file_column: "product number" entity_path: "productNumber" + column_type: "string" - file_column: "active" entity_path: "active" - file_column: "stock" diff --git a/profiles/default_promotion_discount.yaml b/profiles/default_promotion_discount.yaml index 74f081a..6c6ce12 100644 --- a/profiles/default_promotion_discount.yaml +++ b/profiles/default_promotion_discount.yaml @@ -18,14 +18,9 @@ mappings: - file_column: "sorter key" entity_path: "sorterKey" - file_column: "applier key" - key: "applier_key" + entity_path: "applierKey" + column_type: "string" - file_column: "usage key" entity_path: "usageKey" - file_column: "picker key" entity_path: "pickerKey" - -serialize_script: | - row.applier_key = entity.applierKey; - -deserialize_script: | - entity.applierKey = `${row.applier_key}`; // TODO: fix conversion (NEXT-37536) diff --git a/src/config_file.rs b/src/config_file.rs index 9281e89..3fa458c 100644 --- a/src/config_file.rs +++ b/src/config_file.rs @@ -132,6 +132,7 @@ impl Mapping { pub struct EntityPathMapping { pub file_column: String, pub entity_path: String, + pub column_type: Option, } #[derive(Debug, Clone, Default, Eq, PartialEq, Deserialize)] @@ -139,6 +140,15 @@ pub struct EntityScriptMapping { pub file_column: String, /// used as an identifier inside the script pub key: String, + pub column_type: Option, +} + +#[derive(Debug, Clone, Eq, PartialEq, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum ColumnType { + String, + Number, + Boolean, } #[cfg(test)] diff --git a/src/data/import.rs b/src/data/import.rs index 3f58da0..e0c712f 100644 --- a/src/data/import.rs +++ b/src/data/import.rs @@ -4,7 +4,7 @@ use crate::api::filter::Criteria; use crate::api::{Entity, SwApiError, SwError, SwErrorBody, SyncAction}; use crate::data::transform::deserialize_row; use crate::SyncContext; -use anyhow::anyhow; +use anyhow::{anyhow, Context}; use csv::StringRecord; use itertools::Itertools; use std::sync::Arc; @@ -55,10 +55,10 @@ fn process_file_chunk( let headers = &headers; s.spawn_fifo(move |_| { println!("sync chunk {first_index}..={last_index} (size={chunk_length}) is now being deserialized"); - let entity_chunk = match deserialize_chunk(headers, records_chunk, &context_clone) { + let entity_chunk = match deserialize_chunk(headers, first_index, records_chunk, &context_clone) { Ok(chunk) => chunk, Err(e) => { - println!("sync chunk {first_index}..={last_index} (size={chunk_length}) failed to deserialize:\n{e}"); + println!("sync chunk {first_index}..={last_index} (size={chunk_length}) failed to deserialize:\n{e:#}"); return; } }; @@ -76,24 +76,21 @@ fn process_file_chunk( fn deserialize_chunk( headers: &StringRecord, + first_index: usize, records_chunk: Vec>, context: &Arc, ) -> anyhow::Result> { let mut entities: Vec = Vec::with_capacity(Criteria::MAX_LIMIT); - for record in records_chunk { + for (record_counter, record) in records_chunk.into_iter().enumerate() { let record = record?; // fail on first CSV read failure - let entity = match deserialize_row( + let entity = deserialize_row( headers, &record, &context.profile, &context.scripting_environment, - ) { - Ok(e) => e, - Err(e) => { - return Err(e); - } - }; + ) + .with_context(|| format!("error in row {}", record_counter + first_index))?; entities.push(entity); } diff --git a/src/data/transform/mod.rs b/src/data/transform/mod.rs index e7c0685..af2e102 100644 --- a/src/data/transform/mod.rs +++ b/src/data/transform/mod.rs @@ -3,7 +3,7 @@ pub mod script; use crate::api::Entity; -use crate::config_file::{Mapping, Profile}; +use crate::config_file::{ColumnType, Mapping, Profile}; use crate::data::ScriptingEnvironment; use anyhow::Context; use csv::StringRecord; @@ -35,7 +35,11 @@ pub fn deserialize_row( let raw_value = row .get(column_index) .context("failed to get column of row")?; - let json_value = get_json_value_from_string(raw_value); + + let json_value = get_json_value_from_string(raw_value, &path_mapping.column_type) + .with_context(|| { + format!("error in column \"{}\"", &headers[column_index]) + })?; entity.insert_by_path(&path_mapping.entity_path, json_value); } @@ -99,18 +103,33 @@ pub fn serialize_entity( Ok(row) } -fn get_json_value_from_string(raw_input: &str) -> serde_json::Value { +fn get_json_value_from_string( + raw_input: &str, + column_type: &Option, +) -> anyhow::Result { let raw_input_lowercase = raw_input.to_lowercase(); - if raw_input_lowercase == "null" || raw_input.trim().is_empty() { - serde_json::Value::Null - } else if raw_input_lowercase == "true" { - serde_json::Value::Bool(true) - } else if raw_input_lowercase == "false" { - serde_json::Value::Bool(false) - } else if let Ok(number) = serde_json::Number::from_str(raw_input) { - serde_json::Value::Number(number) - } else { - serde_json::Value::String(raw_input.to_owned()) + + match (raw_input_lowercase.as_str(), column_type) { + (_, Some(ColumnType::String)) => Ok(serde_json::Value::String(raw_input.to_owned())), + (_, Some(ColumnType::Number)) => + serde_json::Number::from_str(raw_input) + .map(serde_json::Value::Number) + .map_err(|_| anyhow::anyhow!("failed to convert {raw_input} into a number; make sure that you use the column types correctly")), + (_, Some(ColumnType::Boolean)) => + raw_input.parse::() + .map(serde_json::Value::Bool) + .map_err(|_| anyhow::anyhow!("failed to convert {raw_input} into a boolean; make sure that you use the column types correctly")), + ("null", _) => Ok(serde_json::Value::Null), + ("true", _) => Ok(serde_json::Value::Bool(true)), + ("false", _) => Ok(serde_json::Value::Bool(false)), + (input, _) if input.trim().is_empty() => Ok(serde_json::Value::Null), + _ => { + if let Ok(number) = serde_json::Number::from_str(raw_input) { + Ok(serde_json::Value::Number(number)) + } else { + Ok(serde_json::Value::String(raw_input.to_owned())) + } + }, } } @@ -220,6 +239,7 @@ impl EntityPath for Entity { #[cfg(test)] mod tests { + use crate::config_file::ColumnType; use crate::data::transform::{get_json_value_from_string, EntityPath}; use serde_json::{json, Number, Value}; @@ -411,21 +431,107 @@ mod tests { #[test] fn test_get_json_value_from_string() { - let value = get_json_value_from_string("null"); - assert_eq!(value, json!(null)); - let value = get_json_value_from_string(""); - assert_eq!(value, json!(null)); - - let value = get_json_value_from_string("true"); - assert_eq!(value, json!(true)); - - let value = get_json_value_from_string("false"); - assert_eq!(value, json!(false)); + #[derive(Debug)] + struct TestCase { + name: &'static str, + raw_input: String, + expect: ExpectResult, + column_type: Option, + } - let value = get_json_value_from_string("42.42"); - assert_eq!(value, json!(42.42)); + #[derive(Debug)] + enum ExpectResult { + Failure, + Value(Value), + } - let value = get_json_value_from_string("my string"); - assert_eq!(value, json!("my string")); + let test_cases = [ + TestCase { + name: "converting: 'null', expect: null", + raw_input: String::from("null"), + expect: ExpectResult::Value(json!(null)), + column_type: None, + }, + TestCase { + name: "converting: 'null', type: string, expect: 'null'", + raw_input: String::from("null"), + expect: ExpectResult::Value(json!("null")), + column_type: Some(ColumnType::String), + }, + TestCase { + name: "converting: '', expect: null", + raw_input: String::from(""), + expect: ExpectResult::Value(json!(null)), + column_type: None, + }, + TestCase { + name: "converting: 'true', expect: true", + raw_input: String::from("true"), + expect: ExpectResult::Value(json!(true)), + column_type: None, + }, + TestCase { + name: "converting: 'true', type: Boolean, expect: true", + raw_input: String::from("true"), + expect: ExpectResult::Value(json!(true)), + column_type: Some(ColumnType::Boolean), + }, + TestCase { + name: "converting: 'false', expect: false", + raw_input: String::from("false"), + expect: ExpectResult::Value(json!(false)), + column_type: None, + }, + TestCase { + name: "converting: 'false', type: Boolean, expect: false", + raw_input: String::from("false"), + expect: ExpectResult::Value(json!(false)), + column_type: Some(ColumnType::Boolean), + }, + TestCase { + name: "converting: '42.42', expect: 42.42", + raw_input: String::from("42.42"), + expect: ExpectResult::Value(json!(42.42)), + column_type: None, + }, + TestCase { + name: "converting: 'my string', expect: 'my string'", + raw_input: String::from("my string"), + expect: ExpectResult::Value(json!("my string")), + column_type: None, + }, + TestCase { + name: "converting: 'my string', type: String, expect: 'my string'", + raw_input: String::from("my string"), + expect: ExpectResult::Value(json!("my string")), + column_type: Some(ColumnType::String), + }, + TestCase { + name: "converting: 'my string', type: Number, expect: Failure", + raw_input: String::from("my string"), + expect: ExpectResult::Failure, + column_type: Some(ColumnType::Number), + }, + TestCase { + name: "converting: 'my string', type: Boolean, expect: Failure", + raw_input: String::from("my string"), + expect: ExpectResult::Failure, + column_type: Some(ColumnType::Boolean), + }, + ]; + + for test_case in test_cases { + let value = get_json_value_from_string(&test_case.raw_input, &test_case.column_type); + + match test_case.expect { + ExpectResult::Failure => { + assert!(value.is_err(), "{}", test_case.name); + } + ExpectResult::Value(expected) => { + assert!(value.is_ok(), "{}", test_case.name); + assert_eq!(value.unwrap(), expected, "{}", test_case.name); + } + } + } } } diff --git a/src/data/transform/script.rs b/src/data/transform/script.rs index cf1311b..366f13b 100644 --- a/src/data/transform/script.rs +++ b/src/data/transform/script.rs @@ -45,7 +45,8 @@ impl ScriptingEnvironment { .get(column_index) .context("failed to get column of row")?; - let json_value = get_json_value_from_string(raw_value); + let json_value = get_json_value_from_string(raw_value, &mapping.column_type)?; + let script_value = rhai::serde::to_dynamic(json_value) .context("failed to convert CSV value into script value")?; @@ -326,10 +327,12 @@ mod tests { Mapping::ByScript(EntityScriptMapping { file_column: "bar".to_string(), key: "bar_key".to_string(), + column_type: None, }), Mapping::ByScript(EntityScriptMapping { file_column: "number + 1".to_string(), key: "number_plus_one".to_string(), + column_type: None, }), ], ..Default::default() diff --git a/src/data/validate.rs b/src/data/validate.rs index 3f94417..e03f99b 100644 --- a/src/data/validate.rs +++ b/src/data/validate.rs @@ -50,6 +50,7 @@ pub fn validate_paths_for_entity( let mapping = Mapping::ByPath(EntityPathMapping { file_column: path_mapping.file_column.clone(), entity_path: path, + column_type: path_mapping.column_type.clone(), }); // validate the new mapping @@ -70,6 +71,7 @@ mod tests { let mapping = vec![Mapping::ByPath(EntityPathMapping { file_column: "manufacturer id".to_string(), entity_path: "manufacturerId".to_string(), + column_type: None, })]; let api_schema = json!({ "product": { @@ -93,6 +95,7 @@ mod tests { let mapping = vec![Mapping::ByPath(EntityPathMapping { file_column: "manufacturer id".to_string(), entity_path: "manufacturerId".to_string(), + column_type: None, })]; let api_schema = json!({ "product": { @@ -116,6 +119,7 @@ mod tests { let mapping = vec![Mapping::ByPath(EntityPathMapping { file_column: "manufacturer id".to_string(), entity_path: "manufacturerId".to_string(), + column_type: None, })]; let api_schema = json!({ "product": { @@ -143,6 +147,7 @@ mod tests { let mapping = vec![Mapping::ByPath(EntityPathMapping { file_column: "manufacturer name".to_string(), entity_path: "manufacturer.name".to_string(), + column_type: None, })]; let api_schema = json!({ "product": { @@ -172,6 +177,7 @@ mod tests { let mapping = vec![Mapping::ByPath(EntityPathMapping { file_column: "manufacturer name".to_string(), entity_path: "manufacturer.name".to_string(), + column_type: None, })]; let api_schema = json!({ "product": { @@ -208,6 +214,7 @@ mod tests { let mapping = vec![Mapping::ByPath(EntityPathMapping { file_column: "manufacturer name".to_string(), entity_path: "manufacturer?.name".to_string(), + column_type: None, })]; let api_schema = json!({ "product": { @@ -244,6 +251,7 @@ mod tests { let mapping = vec![Mapping::ByPath(EntityPathMapping { file_column: "manufacturer name".to_string(), entity_path: "manufacturer?.name".to_string(), + column_type: None, })]; let api_schema = json!({ "product": { @@ -282,6 +290,7 @@ mod tests { let mapping = vec![Mapping::ByPath(EntityPathMapping { file_column: "tax country".to_string(), entity_path: "tax.country.name".to_string(), + column_type: None, })]; let api_schema = json!({ "product": {