From 53dcfe7f2d2330163a1bfce169cda38869c93bb5 Mon Sep 17 00:00:00 2001 From: Jacob Mastel Date: Wed, 14 Jul 2021 19:01:54 -0700 Subject: [PATCH 1/5] Calculate the batch size with respect to SQL Server's 2100 parameter limit. --- .../macros/materializations/seed/seed.sql | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/dbt/include/sqlserver/macros/materializations/seed/seed.sql b/dbt/include/sqlserver/macros/materializations/seed/seed.sql index 64e6e68a..06126143 100644 --- a/dbt/include/sqlserver/macros/materializations/seed/seed.sql +++ b/dbt/include/sqlserver/macros/materializations/seed/seed.sql @@ -1,7 +1,23 @@ -{% macro sqlserver__basic_load_csv_rows(model, batch_size, agate_table) %} +{% macro calc_batch_size(num_columns,max_batch_size) %} + {# + SQL Server allows for a max of 2100 parameters in a single statement. + Check if the max_batch_size fits with the number of columns, otherwise + reduce the batch size so it fits. + #} + {% if num_columns * max_batch_size < 2100 %} + {% set batch_size = max_batch_size %} + {% else %} + {% set batch_size = (2100 / num_columns)|int %} + {% endif %} + + {{ return(batch_size) }} +{% endmacro %} + +{% macro sqlserver__basic_load_csv_rows(model, max_batch_size, agate_table) %} {% set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) %} - {% set bindings = [] %} + {% set batch_size = calc_batch_size(cols_sql|length, max_batch_size) %} + {% set bindings = [] %} {% set statements = [] %} {% for chunk in agate_table.rows | batch(batch_size) %} From ffe2494ec84ae79e98a8dc758d77fa8a48dbdffc Mon Sep 17 00:00:00 2001 From: Jacob Mastel Date: Mon, 19 Jul 2021 11:34:16 -0700 Subject: [PATCH 2/5] Set max_batch_size via a defaulting variable. --- dbt/include/sqlserver/macros/materializations/seed/seed.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbt/include/sqlserver/macros/materializations/seed/seed.sql b/dbt/include/sqlserver/macros/materializations/seed/seed.sql index 06126143..743005a1 100644 --- a/dbt/include/sqlserver/macros/materializations/seed/seed.sql +++ b/dbt/include/sqlserver/macros/materializations/seed/seed.sql @@ -50,5 +50,6 @@ {% endmacro %} {% macro sqlserver__load_csv_rows(model, agate_table) %} - {{ return(sqlserver__basic_load_csv_rows(model, 200, agate_table) )}} + {% set max_batch_size = var("max_batch_size", 400) %} + {{ return(sqlserver__basic_load_csv_rows(model, max_batch_size, agate_table) )}} {% endmacro %} \ No newline at end of file From 4267359dc2969a7c5a3153a8dac608458632591e Mon Sep 17 00:00:00 2001 From: Jacob Mastel Date: Mon, 19 Jul 2021 11:47:04 -0700 Subject: [PATCH 3/5] Added documentation about the max_batch_size variable. --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index 734a5282..320a2db5 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,15 @@ client_secret: clientsecret ### Seeds +By default, dbt-sqlserver will attempt to insert seed files in batches of 400 rows. If this exceeds SQL Server's 2100 parameter limit, the adapter will automatically limit to the highest safe value possible. + +To set a different default seed value, you can set the variable `max_batch_size` in your project configuration. + +```yaml +vars: + max_batch_size: 200 # Any integer will do. +``` + ### Hooks ### Custom schemas From 90816193642bc41cb10dddd2e31dade4a6761a6a Mon Sep 17 00:00:00 2001 From: Jacob Mastel Date: Mon, 19 Jul 2021 12:45:24 -0700 Subject: [PATCH 4/5] Added section for v0.20.0 and listed changes. --- CHANGELOG.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e944d9ce..ea545e95 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,15 @@ # Changelog + +### v0.20.0 + +#### features + +- users can now declare a custom `max_batch_size` in the project configuration to set the batch size used by the seed file loader. [#127](https://github.com/dbt-msft/dbt-sqlserver/issues/127) and [#151](https://github.com/dbt-msft/dbt-sqlserver/pull/151) thanks [@jacobm001](https://github.com/jacobm001) + +#### under the hood + +- `sqlserver__load_csv_rows` now has a safety provided by `calc_batch_size()` to ensure the insert statements won't exceed SQL Server's 2100 parameter limit. [#127](https://github.com/dbt-msft/dbt-sqlserver/issues/127) and [#151](https://github.com/dbt-msft/dbt-sqlserver/pull/151) thanks [@jacobm001](https://github.com/jacobm001) + ### v0.19.1 #### features: From 3fc968e488ce0ecd35ba0a4428dd2a8a6612829c Mon Sep 17 00:00:00 2001 From: Anders Date: Mon, 19 Jul 2021 13:46:57 -0700 Subject: [PATCH 5/5] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 320a2db5..43eb65ef 100644 --- a/README.md +++ b/README.md @@ -143,7 +143,7 @@ To set a different default seed value, you can set the variable `max_batch_size` ```yaml vars: - max_batch_size: 200 # Any integer will do. + max_batch_size: 200 # Any integer less than or equal to 2100 will do. ``` ### Hooks