Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support quoting columns in inferred schemas #337

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,28 @@ sources:
- last_name
- email

- name: users_csv_unpartitioned
external: &csv-users
location: '@{{ target.schema }}.dbt_external_tables_testing/csv'
file_format: '( type = csv skip_header = 1 )'
columns: &cols-of-the-users
- name: user.id
data_type: int
- name: user.first_name
data_type: varchar(64)
- name: user.last_name
data_type: varchar(64)
- name: user.email
data_type: varchar(64)
tests: &equal-to-the-users
- dbt_utils.equality:
compare_model: ref('users')
compare_columns:
- user.id
- user.first_name
- user.last_name
- user.email

- name: people_csv_partitioned
external:
<<: *csv-people
Expand All @@ -40,13 +62,31 @@ sources:
columns: *cols-of-the-people
tests: *equal-to-the-people

- name: users_csv_partitioned
external:
<<: *csv-users
auto_refresh: false # make sure this templates right
partitions: &parts-of-the-users
- name: section
data_type: varchar
expression: "substr(split_part(metadata$filename, 'section=', 2), 1, 1)"
columns: *cols-of-the-users
tests: *equal-to-the-users

- name: people_json_unpartitioned
external: &json-people
location: '@{{ target.schema }}.dbt_external_tables_testing/json'
file_format: '( type = json )'
columns: *cols-of-the-people
tests: *equal-to-the-people

- name: users_json_unpartitioned
external: &json-users
location: '@{{ target.schema }}.dbt_external_tables_testing/json'
file_format: '( type = json )'
columns: *cols-of-the-users
tests: *equal-to-the-users

- name: people_json_partitioned
external:
<<: *json-people
Expand Down Expand Up @@ -123,6 +163,13 @@ sources:
columns: *cols-of-the-people
tests: *equal-to-the-people

- name: users_parquet_column_list_unpartitioned
external: &parquet-users
location: '@{{ target.schema }}.dbt_external_tables_testing/parquet/'
file_format: '{{ target.schema }}.dbt_external_tables_testing_parquet'
columns: *cols-of-the-users
tests: *equal-to-the-users

- name: people_parquet_column_list_partitioned
external:
<<: *parquet-people
Expand All @@ -136,13 +183,26 @@ sources:
infer_schema: true
tests: *equal-to-the-people

- name: users_parquet_infer_schema_unpartitioned_quoted
external:
<<: *parquet-users
infer_schema: 'quote'
tests: *equal-to-the-users

- name: people_parquet_infer_schema_partitioned
external:
<<: *parquet-people
partitions: *parts-of-the-people
infer_schema: true
tests: *equal-to-the-people

- name: users_parquet_infer_schema_partitioned
external:
<<: *parquet-users
partitions: *parts-of-the-users
infer_schema: 'quote'
tests: *equal-to-the-users

- name: people_parquet_infer_schema_partitioned_and_column_desc
external:
<<: *parquet-people
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
201 changes: 201 additions & 0 deletions integration_tests/seeds/users.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
user.id,user.first_name,user.last_name,user.email
1,Jack,Hunter,jhunter0@pbs.org
2,Kathryn,Walker,kwalker1@ezinearticles.com
3,Gerald,Ryan,gryan2@com.com
4,Bonnie,Spencer,bspencer3@ameblo.jp
5,Harold,Taylor,htaylor4@people.com.cn
6,Jacqueline,Griffin,jgriffin5@t.co
7,Wanda,Arnold,warnold6@google.nl
8,Craig,Ortiz,cortiz7@sciencedaily.com
9,Gary,Day,gday8@nih.gov
10,Rose,Wright,rwright9@yahoo.co.jp
11,Raymond,Kelley,rkelleya@fc2.com
12,Gerald,Robinson,grobinsonb@disqus.com
13,Mildred,Martinez,mmartinezc@samsung.com
14,Dennis,Arnold,darnoldd@google.com
15,Judy,Gray,jgraye@opensource.org
16,Theresa,Garza,tgarzaf@epa.gov
17,Gerald,Robertson,grobertsong@csmonitor.com
18,Philip,Hernandez,phernandezh@adobe.com
19,Julia,Gonzalez,jgonzalezi@cam.ac.uk
20,Andrew,Davis,adavisj@patch.com
21,Kimberly,Harper,kharperk@foxnews.com
22,Mark,Martin,mmartinl@marketwatch.com
23,Cynthia,Ruiz,cruizm@google.fr
24,Samuel,Carroll,scarrolln@youtu.be
25,Jennifer,Larson,jlarsono@vinaora.com
26,Ashley,Perry,aperryp@rakuten.co.jp
27,Howard,Rodriguez,hrodriguezq@shutterfly.com
28,Amy,Brooks,abrooksr@theatlantic.com
29,Louise,Warren,lwarrens@adobe.com
30,Tina,Watson,twatsont@myspace.com
31,Janice,Kelley,jkelleyu@creativecommons.org
32,Terry,Mccoy,tmccoyv@bravesites.com
33,Jeffrey,Morgan,jmorganw@surveymonkey.com
34,Louis,Harvey,lharveyx@sina.com.cn
35,Philip,Miller,pmillery@samsung.com
36,Willie,Marshall,wmarshallz@ow.ly
37,Patrick,Lopez,plopez10@redcross.org
38,Adam,Jenkins,ajenkins11@harvard.edu
39,Benjamin,Cruz,bcruz12@linkedin.com
40,Ruby,Hawkins,rhawkins13@gmpg.org
41,Carlos,Barnes,cbarnes14@a8.net
42,Ruby,Griffin,rgriffin15@bravesites.com
43,Sean,Mason,smason16@icq.com
44,Anthony,Payne,apayne17@utexas.edu
45,Steve,Cruz,scruz18@pcworld.com
46,Anthony,Garcia,agarcia19@flavors.me
47,Doris,Lopez,dlopez1a@sphinn.com
48,Susan,Nichols,snichols1b@freewebs.com
49,Wanda,Ferguson,wferguson1c@yahoo.co.jp
50,Andrea,Pierce,apierce1d@google.co.uk
51,Lawrence,Phillips,lphillips1e@jugem.jp
52,Judy,Gilbert,jgilbert1f@multiply.com
53,Eric,Williams,ewilliams1g@joomla.org
54,Ralph,Romero,rromero1h@sogou.com
55,Jean,Wilson,jwilson1i@ocn.ne.jp
56,Lori,Reynolds,lreynolds1j@illinois.edu
57,Donald,Moreno,dmoreno1k@bbc.co.uk
58,Steven,Berry,sberry1l@eepurl.com
59,Theresa,Shaw,tshaw1m@people.com.cn
60,John,Stephens,jstephens1n@nationalgeographic.com
61,Richard,Jacobs,rjacobs1o@state.tx.us
62,Andrew,Lawson,alawson1p@over-blog.com
63,Peter,Morgan,pmorgan1q@rambler.ru
64,Nicole,Garrett,ngarrett1r@zimbio.com
65,Joshua,Kim,jkim1s@edublogs.org
66,Ralph,Roberts,rroberts1t@people.com.cn
67,George,Montgomery,gmontgomery1u@smugmug.com
68,Gerald,Alvarez,galvarez1v@flavors.me
69,Donald,Olson,dolson1w@whitehouse.gov
70,Carlos,Morgan,cmorgan1x@pbs.org
71,Aaron,Stanley,astanley1y@webnode.com
72,Virginia,Long,vlong1z@spiegel.de
73,Robert,Berry,rberry20@tripadvisor.com
74,Antonio,Brooks,abrooks21@unesco.org
75,Ruby,Garcia,rgarcia22@ovh.net
76,Jack,Hanson,jhanson23@blogtalkradio.com
77,Kathryn,Nelson,knelson24@walmart.com
78,Jason,Reed,jreed25@printfriendly.com
79,George,Coleman,gcoleman26@people.com.cn
80,Rose,King,rking27@ucoz.com
81,Johnny,Holmes,jholmes28@boston.com
82,Katherine,Gilbert,kgilbert29@altervista.org
83,Joshua,Thomas,jthomas2a@ustream.tv
84,Julie,Perry,jperry2b@opensource.org
85,Richard,Perry,rperry2c@oracle.com
86,Kenneth,Ruiz,kruiz2d@wikimedia.org
87,Jose,Morgan,jmorgan2e@webnode.com
88,Donald,Campbell,dcampbell2f@goo.ne.jp
89,Debra,Collins,dcollins2g@uol.com.br
90,Jesse,Johnson,jjohnson2h@stumbleupon.com
91,Elizabeth,Stone,estone2i@histats.com
92,Angela,Rogers,arogers2j@goodreads.com
93,Emily,Dixon,edixon2k@mlb.com
94,Albert,Scott,ascott2l@tinypic.com
95,Barbara,Peterson,bpeterson2m@ow.ly
96,Adam,Greene,agreene2n@fastcompany.com
97,Earl,Sanders,esanders2o@hc360.com
98,Angela,Brooks,abrooks2p@mtv.com
99,Harold,Foster,hfoster2q@privacy.gov.au
100,Carl,Meyer,cmeyer2r@disqus.com
101,Michael,Perez,mperez0@chronoengine.com
102,Shawn,Mccoy,smccoy1@reddit.com
103,Kathleen,Payne,kpayne2@cargocollective.com
104,Jimmy,Cooper,jcooper3@cargocollective.com
105,Katherine,Rice,krice4@typepad.com
106,Sarah,Ryan,sryan5@gnu.org
107,Martin,Mcdonald,mmcdonald6@opera.com
108,Frank,Robinson,frobinson7@wunderground.com
109,Jennifer,Franklin,jfranklin8@mail.ru
110,Henry,Welch,hwelch9@list-manage.com
111,Fred,Snyder,fsnydera@reddit.com
112,Amy,Dunn,adunnb@nba.com
113,Kathleen,Meyer,kmeyerc@cdc.gov
114,Steve,Ferguson,sfergusond@reverbnation.com
115,Teresa,Hill,thille@dion.ne.jp
116,Amanda,Harper,aharperf@mail.ru
117,Kimberly,Ray,krayg@xing.com
118,Johnny,Knight,jknighth@jalbum.net
119,Virginia,Freeman,vfreemani@tiny.cc
120,Anna,Austin,aaustinj@diigo.com
121,Willie,Hill,whillk@mail.ru
122,Sean,Harris,sharrisl@zdnet.com
123,Mildred,Adams,madamsm@usatoday.com
124,David,Graham,dgrahamn@zimbio.com
125,Victor,Hunter,vhuntero@ehow.com
126,Aaron,Ruiz,aruizp@weebly.com
127,Benjamin,Brooks,bbrooksq@jalbum.net
128,Lisa,Wilson,lwilsonr@japanpost.jp
129,Benjamin,King,bkings@comsenz.com
130,Christina,Williamson,cwilliamsont@boston.com
131,Jane,Gonzalez,jgonzalezu@networksolutions.com
132,Thomas,Owens,towensv@psu.edu
133,Katherine,Moore,kmoorew@naver.com
134,Jennifer,Stewart,jstewartx@yahoo.com
135,Sara,Tucker,stuckery@topsy.com
136,Harold,Ortiz,hortizz@vkontakte.ru
137,Shirley,James,sjames10@yelp.com
138,Dennis,Johnson,djohnson11@slate.com
139,Louise,Weaver,lweaver12@china.com.cn
140,Maria,Armstrong,marmstrong13@prweb.com
141,Gloria,Cruz,gcruz14@odnoklassniki.ru
142,Diana,Spencer,dspencer15@ifeng.com
143,Kelly,Nguyen,knguyen16@altervista.org
144,Jane,Rodriguez,jrodriguez17@biblegateway.com
145,Scott,Brown,sbrown18@geocities.jp
146,Norma,Cruz,ncruz19@si.edu
147,Marie,Peters,mpeters1a@mlb.com
148,Lillian,Carr,lcarr1b@typepad.com
149,Judy,Nichols,jnichols1c@t-online.de
150,Billy,Long,blong1d@yahoo.com
151,Howard,Reid,hreid1e@exblog.jp
152,Laura,Ferguson,lferguson1f@tuttocitta.it
153,Anne,Bailey,abailey1g@geocities.com
154,Rose,Morgan,rmorgan1h@ehow.com
155,Nicholas,Reyes,nreyes1i@google.ru
156,Joshua,Kennedy,jkennedy1j@house.gov
157,Paul,Watkins,pwatkins1k@upenn.edu
158,Kathryn,Kelly,kkelly1l@businessweek.com
159,Adam,Armstrong,aarmstrong1m@techcrunch.com
160,Norma,Wallace,nwallace1n@phoca.cz
161,Timothy,Reyes,treyes1o@google.cn
162,Elizabeth,Patterson,epatterson1p@sun.com
163,Edward,Gomez,egomez1q@google.fr
164,David,Cox,dcox1r@friendfeed.com
165,Brenda,Wood,bwood1s@over-blog.com
166,Adam,Walker,awalker1t@blogs.com
167,Michael,Hart,mhart1u@wix.com
168,Jesse,Ellis,jellis1v@google.co.uk
169,Janet,Powell,jpowell1w@un.org
170,Helen,Ford,hford1x@creativecommons.org
171,Gerald,Carpenter,gcarpenter1y@about.me
172,Kathryn,Oliver,koliver1z@army.mil
173,Alan,Berry,aberry20@gov.uk
174,Harry,Andrews,handrews21@ameblo.jp
175,Andrea,Hall,ahall22@hp.com
176,Barbara,Wells,bwells23@behance.net
177,Anne,Wells,awells24@apache.org
178,Harry,Harper,hharper25@rediff.com
179,Jack,Ray,jray26@wufoo.com
180,Phillip,Hamilton,phamilton27@joomla.org
181,Shirley,Hunter,shunter28@newsvine.com
182,Arthur,Daniels,adaniels29@reuters.com
183,Virginia,Rodriguez,vrodriguez2a@walmart.com
184,Christina,Ryan,cryan2b@hibu.com
185,Theresa,Mendoza,tmendoza2c@vinaora.com
186,Jason,Cole,jcole2d@ycombinator.com
187,Phillip,Bryant,pbryant2e@rediff.com
188,Adam,Torres,atorres2f@sun.com
189,Margaret,Johnston,mjohnston2g@ucsd.edu
190,Paul,Payne,ppayne2h@hhs.gov
191,Todd,Willis,twillis2i@businessweek.com
192,Willie,Oliver,woliver2j@noaa.gov
193,Frances,Robertson,frobertson2k@go.com
194,Gregory,Hawkins,ghawkins2l@joomla.org
195,Lisa,Perkins,lperkins2m@si.edu
196,Jacqueline,Anderson,janderson2n@cargocollective.com
197,Shirley,Diaz,sdiaz2o@ucla.edu
198,Nicole,Meyer,nmeyer2p@flickr.com
199,Mary,Gray,mgray2q@constantcontact.com
200,Jean,Mcdonald,jmcdonald2r@baidu.com
7 changes: 4 additions & 3 deletions macros/plugins/snowflake/create_external_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,16 @@
{% endfor %}
{% else %}
{%- for column in columns_infer %}
{%- set column_quoted = adapter.quote(column[0]) if infer_schema == 'quote' else column[0] %}
{%- set col_expression -%}
{%- if ignore_case -%}
{%- set col_id = 'GET_IGNORE_CASE($1, ' ~ "'"~ column[0] ~"'"~ ')' -%}
{%- set col_id = 'GET_IGNORE_CASE($1, ' ~ "'"~ column_quoted ~"'"~ ')' -%}
{%- else -%}
{%- set col_id = 'value:' ~ column[0] -%}
{%- set col_id = 'value:' ~ column_quoted -%}
{%- endif -%}
(case when is_null_value({{col_id}}) or lower({{col_id}}) = 'null' then null else {{col_id}} end)
{%- endset %}
{{column[0]}} {{column[1]}} as ({{col_expression}}::{{column[1]}})
{{column_quoted}} {{column[1]}} as ({{col_expression}}::{{column[1]}})
{{- ',' if not loop.last -}}
{% endfor %}
{%- endif -%}
Expand Down
18 changes: 18 additions & 0 deletions sample_sources/snowflake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,24 @@ sources:
- name: name
description: and this is a name

- name: parquet_with_inferred_schema_and_quoted_columns
description: "External table using Parquet, inferring the schema, quoting the columns"
external:
location: "@stage" # reference an existing external stage
file_format: "my_file_format" # we need a named file format for infer to work
infer_schema: "quote" # infer the table schema and quote the columns
partitions:
- name: section # we can define partitions on top of the schema columns
data_type: varchar(64)
expression: "substr(split_part(metadata$filename, 'section=', 2), 1, 1)"
columns: # columns can still be listed for documentation/testing purpose
- name: user.id
description: this is a user id
quote: true
- name: user.name
description: and this is a user name
quote: true

- name: aws_sns_refresh_tbl
description: "External table using AWS SNS for auto-refresh"
external:
Expand Down
Loading